From 7cb7f45c7feef43c8f71f5cfedfc0b19be2142f7 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Mon, 27 Jul 2009 18:42:38 -0400 Subject: Revert "ACPICA: Remove obsolete acpi_os_validate_address interface" This reverts commit f9ca058430333c9a24c5ca926aa445125f88df18. which caused a regression: http://bugzilla.kernel.org/show_bug.cgi?id=13620 Signed-off-by: Lin Ming Signed-off-by: Len Brown --- include/acpi/acpiosxf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h index 3e798593b17b..ab0b85cf21f3 100644 --- a/include/acpi/acpiosxf.h +++ b/include/acpi/acpiosxf.h @@ -242,6 +242,10 @@ acpi_os_derive_pci_id(acpi_handle rhandle, acpi_status acpi_os_validate_interface(char *interface); acpi_status acpi_osi_invalidate(char* interface); +acpi_status +acpi_os_validate_address(u8 space_id, acpi_physical_address address, + acpi_size length, char *name); + u64 acpi_os_get_timer(void); acpi_status acpi_os_signal(u32 function, void *info); -- cgit v1.2.3 From 27fed4175acf81ddd91d9a4ee2fd298981f60295 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 27 Jul 2009 18:39:45 -0700 Subject: ip: fix logic of reverse path filter sysctl Even though reverse path filter was changed from simple boolean to trinary control, the loose mode only works if both all and device are configured because of this logic error. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index acef2a770b6b..ad27c7da8798 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -82,7 +82,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev) #define IN_DEV_FORWARD(in_dev) IN_DEV_CONF_GET((in_dev), FORWARDING) #define IN_DEV_MFORWARD(in_dev) IN_DEV_ANDCONF((in_dev), MC_FORWARDING) -#define IN_DEV_RPFILTER(in_dev) IN_DEV_ANDCONF((in_dev), RP_FILTER) +#define IN_DEV_RPFILTER(in_dev) IN_DEV_MAXCONF((in_dev), RP_FILTER) #define IN_DEV_SOURCE_ROUTE(in_dev) IN_DEV_ANDCONF((in_dev), \ ACCEPT_SOURCE_ROUTE) #define IN_DEV_BOOTP_RELAY(in_dev) IN_DEV_ANDCONF((in_dev), BOOTP_RELAY) -- cgit v1.2.3 From 7c958e32649e0c35801762878fb0b6da8c55a515 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 31 Jul 2009 11:49:11 -0400 Subject: block: Add a wrapper for setting minimum request size without a queue Introduce blk_limits_io_min() and make blk_queue_io_min() call it. Signed-off-by: Mike Snitzer Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- block/blk-settings.c | 31 ++++++++++++++++++++++++------- include/linux/blkdev.h | 1 + 2 files changed, 25 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/block/blk-settings.c b/block/blk-settings.c index 8e86e2d2b147..1f7197434166 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -383,6 +383,29 @@ void blk_queue_alignment_offset(struct request_queue *q, unsigned int offset) } EXPORT_SYMBOL(blk_queue_alignment_offset); +/** + * blk_limits_io_min - set minimum request size for a device + * @limits: the queue limits + * @min: smallest I/O size in bytes + * + * Description: + * Some devices have an internal block size bigger than the reported + * hardware sector size. This function can be used to signal the + * smallest I/O the device can perform without incurring a performance + * penalty. + */ +void blk_limits_io_min(struct queue_limits *limits, unsigned int min) +{ + limits->io_min = min; + + if (limits->io_min < limits->logical_block_size) + limits->io_min = limits->logical_block_size; + + if (limits->io_min < limits->physical_block_size) + limits->io_min = limits->physical_block_size; +} +EXPORT_SYMBOL(blk_limits_io_min); + /** * blk_queue_io_min - set minimum request size for the queue * @q: the request queue for the device @@ -396,13 +419,7 @@ EXPORT_SYMBOL(blk_queue_alignment_offset); */ void blk_queue_io_min(struct request_queue *q, unsigned int min) { - q->limits.io_min = min; - - if (q->limits.io_min < q->limits.logical_block_size) - q->limits.io_min = q->limits.logical_block_size; - - if (q->limits.io_min < q->limits.physical_block_size) - q->limits.io_min = q->limits.physical_block_size; + blk_limits_io_min(&q->limits, min); } EXPORT_SYMBOL(blk_queue_io_min); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e7cb5dbf6c26..69103e053c92 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -913,6 +913,7 @@ extern void blk_queue_logical_block_size(struct request_queue *, unsigned short) extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); +extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); extern void blk_set_default_limits(struct queue_limits *lim); -- cgit v1.2.3 From 9f498cc5be7e013d8d6e4c616980ed0ffc8680d2 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 14:46:33 +0200 Subject: perf_counter: Full task tracing In order to be able to distinguish between no samples due to inactivity and no samples due to task ended, Arjan asked for PERF_EVENT_EXIT events. This is useful to the boot delay instrumentation (bootchart) app. This patch changes the PERF_EVENT_FORK to be emitted on every clone, and adds PERF_EVENT_EXIT to be emitted on task exit, after the task's counters have been closed. This task tracing is controlled through: attr.comm || attr.mmap and through the new attr.task field. Suggested-by: Arjan van de Ven Cc: Paul Mackerras Cc: Anton Blanchard Signed-off-by: Peter Zijlstra [ cleaned up perf_counter.h a bit ] Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 13 ++++++- kernel/fork.c | 4 +- kernel/perf_counter.c | 87 +++++++++++++++++++++++++++++--------------- 3 files changed, 71 insertions(+), 33 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index bd15d7a5f5ce..e604e6ef72dd 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -181,8 +181,9 @@ struct perf_counter_attr { freq : 1, /* use freq, not period */ inherit_stat : 1, /* per task counts */ enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ - __reserved_1 : 51; + __reserved_1 : 50; __u32 wakeup_events; /* wakeup every n events */ __u32 __reserved_2; @@ -308,6 +309,15 @@ enum perf_event_type { */ PERF_EVENT_COMM = 3, + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * }; + */ + PERF_EVENT_EXIT = 4, + /* * struct { * struct perf_event_header header; @@ -323,6 +333,7 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u32 pid, ppid; + * u32 tid, ptid; * }; */ PERF_EVENT_FORK = 7, diff --git a/kernel/fork.c b/kernel/fork.c index 29b532e718f7..466531eb92cc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1269,6 +1269,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, write_unlock_irq(&tasklist_lock); proc_fork_connector(p); cgroup_post_fork(p); + perf_counter_fork(p); return p; bad_fork_free_pid: @@ -1410,9 +1411,6 @@ long do_fork(unsigned long clone_flags, init_completion(&vfork); } - if (!(clone_flags & CLONE_THREAD)) - perf_counter_fork(p); - audit_finish_fork(p); tracehook_report_clone(regs, clone_flags, nr, p); diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 48471d75ae01..199ed4771315 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -42,6 +42,7 @@ static int perf_overcommit __read_mostly = 1; static atomic_t nr_counters __read_mostly; static atomic_t nr_mmap_counters __read_mostly; static atomic_t nr_comm_counters __read_mostly; +static atomic_t nr_task_counters __read_mostly; /* * perf counter paranoia level: @@ -1654,6 +1655,8 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_mmap_counters); if (counter->attr.comm) atomic_dec(&nr_comm_counters); + if (counter->attr.task) + atomic_dec(&nr_task_counters); } if (counter->destroy) @@ -2831,10 +2834,12 @@ perf_counter_read_event(struct perf_counter *counter, } /* - * fork tracking + * task tracking -- fork/exit + * + * enabled by: attr.comm | attr.mmap | attr.task */ -struct perf_fork_event { +struct perf_task_event { struct task_struct *task; struct { @@ -2842,37 +2847,42 @@ struct perf_fork_event { u32 pid; u32 ppid; + u32 tid; + u32 ptid; } event; }; -static void perf_counter_fork_output(struct perf_counter *counter, - struct perf_fork_event *fork_event) +static void perf_counter_task_output(struct perf_counter *counter, + struct perf_task_event *task_event) { struct perf_output_handle handle; - int size = fork_event->event.header.size; - struct task_struct *task = fork_event->task; + int size = task_event->event.header.size; + struct task_struct *task = task_event->task; int ret = perf_output_begin(&handle, counter, size, 0, 0); if (ret) return; - fork_event->event.pid = perf_counter_pid(counter, task); - fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); + task_event->event.pid = perf_counter_pid(counter, task); + task_event->event.ppid = perf_counter_pid(counter, task->real_parent); - perf_output_put(&handle, fork_event->event); + task_event->event.tid = perf_counter_tid(counter, task); + task_event->event.ptid = perf_counter_tid(counter, task->real_parent); + + perf_output_put(&handle, task_event->event); perf_output_end(&handle); } -static int perf_counter_fork_match(struct perf_counter *counter) +static int perf_counter_task_match(struct perf_counter *counter) { - if (counter->attr.comm || counter->attr.mmap) + if (counter->attr.comm || counter->attr.mmap || counter->attr.task) return 1; return 0; } -static void perf_counter_fork_ctx(struct perf_counter_context *ctx, - struct perf_fork_event *fork_event) +static void perf_counter_task_ctx(struct perf_counter_context *ctx, + struct perf_task_event *task_event) { struct perf_counter *counter; @@ -2881,19 +2891,19 @@ static void perf_counter_fork_ctx(struct perf_counter_context *ctx, rcu_read_lock(); list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_fork_match(counter)) - perf_counter_fork_output(counter, fork_event); + if (perf_counter_task_match(counter)) + perf_counter_task_output(counter, task_event); } rcu_read_unlock(); } -static void perf_counter_fork_event(struct perf_fork_event *fork_event) +static void perf_counter_task_event(struct perf_task_event *task_event) { struct perf_cpu_context *cpuctx; struct perf_counter_context *ctx; cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_fork_ctx(&cpuctx->ctx, fork_event); + perf_counter_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); rcu_read_lock(); @@ -2903,32 +2913,40 @@ static void perf_counter_fork_event(struct perf_fork_event *fork_event) */ ctx = rcu_dereference(current->perf_counter_ctxp); if (ctx) - perf_counter_fork_ctx(ctx, fork_event); + perf_counter_task_ctx(ctx, task_event); rcu_read_unlock(); } -void perf_counter_fork(struct task_struct *task) +static void perf_counter_task(struct task_struct *task, int new) { - struct perf_fork_event fork_event; + struct perf_task_event task_event; if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters)) + !atomic_read(&nr_mmap_counters) && + !atomic_read(&nr_task_counters)) return; - fork_event = (struct perf_fork_event){ + task_event = (struct perf_task_event){ .task = task, .event = { .header = { - .type = PERF_EVENT_FORK, + .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, .misc = 0, - .size = sizeof(fork_event.event), + .size = sizeof(task_event.event), }, /* .pid */ /* .ppid */ + /* .tid */ + /* .ptid */ }, }; - perf_counter_fork_event(&fork_event); + perf_counter_task_event(&task_event); +} + +void perf_counter_fork(struct task_struct *task) +{ + perf_counter_task(task, 1); } /* @@ -3887,6 +3905,8 @@ done: atomic_inc(&nr_mmap_counters); if (counter->attr.comm) atomic_inc(&nr_comm_counters); + if (counter->attr.task) + atomic_inc(&nr_task_counters); } return counter; @@ -4248,8 +4268,10 @@ void perf_counter_exit_task(struct task_struct *child) struct perf_counter_context *child_ctx; unsigned long flags; - if (likely(!child->perf_counter_ctxp)) + if (likely(!child->perf_counter_ctxp)) { + perf_counter_task(child, 0); return; + } local_irq_save(flags); /* @@ -4267,15 +4289,22 @@ void perf_counter_exit_task(struct task_struct *child) * incremented the context's refcount before we do put_ctx below. */ spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all * the counters from it. */ unclone_ctx(child_ctx); - spin_unlock(&child_ctx->lock); - local_irq_restore(flags); + spin_unlock_irqrestore(&child_ctx->lock, flags); + + /* + * Report the task dead after unscheduling the counters so that we + * won't get any samples after PERF_EVENT_EXIT. We can however still + * get a few PERF_EVENT_READ events. + */ + perf_counter_task(child, 0); + + child->perf_counter_ctxp = NULL; /* * We can recurse on the same lock type through: -- cgit v1.2.3 From 7699ad35ed06044c4fc1be162553880f98658616 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 15 Jun 2009 01:10:18 -0400 Subject: mtd: let include/linux/mtd/partitions.h stand on its own When declaring static MTD partitions in board specific code, only including should suffice without gcc nagging us with: In file included from arch/arm/mach-kirkwood/sheevaplug-setup.c:14: include/linux/mtd/partitions.h:50: warning: 'struct mtd_info' declared inside parameter list include/linux/mtd/partitions.h:50: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/mtd/partitions.h:51: warning: 'struct mtd_info' declared inside parameter list include/linux/mtd/partitions.h:61: warning: 'struct mtd_info' declared inside parameter list include/linux/mtd/partitions.h:67: warning: 'struct mtd_info' declared inside parameter list Signed-off-by: Nicolas Pitre Signed-off-by: Artem Bityutskiy Signed-off-by: David Woodhouse --- include/linux/mtd/partitions.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index af6dcb992bc3..b70313d33ff8 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -47,6 +47,8 @@ struct mtd_partition { #define MTDPART_SIZ_FULL (0) +struct mtd_info; + int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int); int del_mtd_partitions(struct mtd_info *); -- cgit v1.2.3 From 6afc4fdb3e94ba60cd566cb878b60c6c01979277 Mon Sep 17 00:00:00 2001 From: Saeed Bishara Date: Tue, 28 Jul 2009 04:56:43 -0700 Subject: mtd: fix the conversion from dev to mtd_info The patch fixes a bug when converting dev to mtd_info by using the drvdata of the dev, the previous code used container_of(dev, struct mtd_info, dev), but won't work for the mtdXro devices as they created without being contained inside mtd_info structure. Signed-off-by: Saeed Bishara Signed-off-by: David Woodhouse --- drivers/mtd/mtdcore.c | 7 ++++--- include/linux/mtd/mtd.h | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index fac54a3fa3f1..00ebf7af7467 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -65,8 +65,8 @@ static void mtd_release(struct device *dev) static int mtd_cls_suspend(struct device *dev, pm_message_t state) { struct mtd_info *mtd = dev_to_mtd(dev); - - if (mtd->suspend) + + if (mtd && mtd->suspend) return mtd->suspend(mtd); else return 0; @@ -76,7 +76,7 @@ static int mtd_cls_resume(struct device *dev) { struct mtd_info *mtd = dev_to_mtd(dev); - if (mtd->resume) + if (mtd && mtd->resume) mtd->resume(mtd); return 0; } @@ -298,6 +298,7 @@ int add_mtd_device(struct mtd_info *mtd) mtd->dev.class = &mtd_class; mtd->dev.devt = MTD_DEVT(i); dev_set_name(&mtd->dev, "mtd%d", i); + dev_set_drvdata(&mtd->dev, mtd); if (device_register(&mtd->dev) != 0) { mtd_table[i] = NULL; break; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 5675b63a0631..0f32a9b6ff55 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -251,7 +251,7 @@ struct mtd_info { static inline struct mtd_info *dev_to_mtd(struct device *dev) { - return dev ? container_of(dev, struct mtd_info, dev) : NULL; + return dev ? dev_get_drvdata(dev) : NULL; } static inline uint32_t mtd_div_by_eb(uint64_t sz, struct mtd_info *mtd) -- cgit v1.2.3 From af0d3b103bcfa877343ee338de12002cd50c9ee5 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Mon, 3 Aug 2009 04:26:16 +0000 Subject: bluetooth: rfcomm_init bug fix rfcomm tty may be used before rfcomm_tty_driver initilized, The problem is that now socket layer init before tty layer, if userspace program do socket callback right here then oops will happen. reporting in: http://marc.info/?l=linux-bluetooth&m=124404919324542&w=2 make 3 changes: 1. remove #ifdef in rfcomm/core.c, make it blank function when rfcomm tty not selected in rfcomm.h 2. tune the rfcomm_init error patch to ensure tty driver initilized before rfcomm socket usage. 3. remove __exit for rfcomm_cleanup_sockets because above change need call it in a __init function. Reported-by: Oliver Hartkopp Tested-by: Oliver Hartkopp Signed-off-by: Dave Young Signed-off-by: David S. Miller --- include/net/bluetooth/rfcomm.h | 12 +++++++++++- net/bluetooth/rfcomm/core.c | 27 +++++++++++++++++++-------- net/bluetooth/rfcomm/sock.c | 2 +- 3 files changed, 31 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 80072611d26a..c274993234e3 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -355,7 +355,17 @@ struct rfcomm_dev_list_req { }; int rfcomm_dev_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); + +#ifdef CONFIG_BT_RFCOMM_TTY int rfcomm_init_ttys(void); void rfcomm_cleanup_ttys(void); - +#else +static inline int rfcomm_init_ttys(void) +{ + return 0; +} +static inline void rfcomm_cleanup_ttys(void) +{ +} +#endif #endif /* __RFCOMM_H */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index e50566ebf9f9..94b3388c188b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -2080,28 +2080,41 @@ static CLASS_ATTR(rfcomm_dlc, S_IRUGO, rfcomm_dlc_sysfs_show, NULL); /* ---- Initialization ---- */ static int __init rfcomm_init(void) { + int ret; + l2cap_load(); hci_register_cb(&rfcomm_cb); rfcomm_thread = kthread_run(rfcomm_run, NULL, "krfcommd"); if (IS_ERR(rfcomm_thread)) { - hci_unregister_cb(&rfcomm_cb); - return PTR_ERR(rfcomm_thread); + ret = PTR_ERR(rfcomm_thread); + goto out_thread; } if (class_create_file(bt_class, &class_attr_rfcomm_dlc) < 0) BT_ERR("Failed to create RFCOMM info file"); - rfcomm_init_sockets(); + ret = rfcomm_init_ttys(); + if (ret) + goto out_tty; -#ifdef CONFIG_BT_RFCOMM_TTY - rfcomm_init_ttys(); -#endif + ret = rfcomm_init_sockets(); + if (ret) + goto out_sock; BT_INFO("RFCOMM ver %s", VERSION); return 0; + +out_sock: + rfcomm_cleanup_ttys(); +out_tty: + kthread_stop(rfcomm_thread); +out_thread: + hci_unregister_cb(&rfcomm_cb); + + return ret; } static void __exit rfcomm_exit(void) @@ -2112,9 +2125,7 @@ static void __exit rfcomm_exit(void) kthread_stop(rfcomm_thread); -#ifdef CONFIG_BT_RFCOMM_TTY rfcomm_cleanup_ttys(); -#endif rfcomm_cleanup_sockets(); } diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7f482784e9f7..0b85e8116859 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -1132,7 +1132,7 @@ error: return err; } -void __exit rfcomm_cleanup_sockets(void) +void rfcomm_cleanup_sockets(void) { class_remove_file(bt_class, &class_attr_rfcomm); -- cgit v1.2.3 From 371842448c05b42d11a4be1c8e4e81d62ecc7534 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 30 Jul 2009 17:43:48 -0700 Subject: cfg80211: fix regression on beacon world roaming feature A regression was added through patch a4ed90d6: "cfg80211: respect API on orig_flags on channel for beacon hint" We did indeed respect _orig flags but the intention was not clearly stated in the commit log. This patch fixes firmware issues picked up by iwlwifi when we lift passive scan of beaconing restrictions on channels its EEPROM has been configured to always enable. By doing so though we also disallowed beacon hints on devices registering their wiphy with custom world regulatory domains enabled, this happens to be currently ath5k, ath9k and ar9170. The passive scan and beacon restrictions on those devices would never be lifted even if we did find a beacon and the hardware did support such enhancements when world roaming. Since Johannes indicates iwlwifi firmware cannot be changed to allow beacon hinting we set up a flag now to specifically allow drivers to disable beacon hints for devices which cannot use them. We enable the flag on iwlwifi to disable beacon hints and by default enable it for all other drivers. It should be noted beacon hints lift passive scan flags and beacon restrictions when we receive a beacon from an AP on any 5 GHz non-DFS channels, and channels 12-14 on the 2.4 GHz band. We don't bother with channels 1-11 as those channels are allowed world wide. This should fix world roaming for ath5k, ath9k and ar9170, thereby improving scan time when we receive the first beacon from any AP, and also enabling beaconing operation (AP/IBSS/Mesh) on cards which would otherwise not be allowed to do so. Drivers not using custom regulatory stuff (wiphy_apply_custom_regulatory()) were not affected by this as the orig_flags for the channels would have been cleared upon wiphy registration. I tested this with a world roaming ath5k card. Cc: Jouni Malinen Signed-off-by: Luis R. Rodriguez Reviewed-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwlwifi/iwl-core.c | 3 +++ drivers/net/wireless/iwlwifi/iwl3945-base.c | 3 +++ include/net/cfg80211.h | 5 +++++ net/wireless/reg.c | 9 +++++---- net/wireless/reg.h | 3 ++- 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/wireless/iwlwifi/iwl-core.c b/drivers/net/wireless/iwlwifi/iwl-core.c index 6ab07165ea28..18b135f510e5 100644 --- a/drivers/net/wireless/iwlwifi/iwl-core.c +++ b/drivers/net/wireless/iwlwifi/iwl-core.c @@ -1332,6 +1332,9 @@ int iwl_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index 2f50ab60bfdf..523843369ca2 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -3968,6 +3968,9 @@ static int iwl3945_setup_mac(struct iwl_priv *priv) hw->wiphy->custom_regulatory = true; + /* Firmware does not support this */ + hw->wiphy->disable_beacon_hints = true; + hw->wiphy->max_scan_ssids = PROBE_OPTION_MAX_3945; /* we create the 802.11 header and a zero-length SSID element */ hw->wiphy->max_scan_ie_len = IWL_MAX_PROBE_REQUEST - 24 - 2; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 1a21895b732b..d1892d66701a 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -979,6 +979,10 @@ struct cfg80211_ops { * channels at a later time. This can be used for devices which do not * have calibration information gauranteed for frequencies or settings * outside of its regulatory domain. + * @disable_beacon_hints: enable this if your driver needs to ensure that + * passive scan flags and beaconing flags may not be lifted by cfg80211 + * due to regulatory beacon hints. For more information on beacon + * hints read the documenation for regulatory_hint_found_beacon() * @reg_notifier: the driver's regulatory notification callback * @regd: the driver's regulatory domain, if one was requested via * the regulatory_hint() API. This can be used by the driver @@ -1004,6 +1008,7 @@ struct wiphy { bool custom_regulatory; bool strict_regulatory; + bool disable_beacon_hints; enum cfg80211_signal_type signal_type; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5e14371cda70..75a406d33619 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1089,17 +1089,18 @@ static void handle_reg_beacon(struct wiphy *wiphy, chan->beacon_found = true; + if (wiphy->disable_beacon_hints) + return; + chan_before.center_freq = chan->center_freq; chan_before.flags = chan->flags; - if ((chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) && - !(chan->orig_flags & IEEE80211_CHAN_PASSIVE_SCAN)) { + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; channel_changed = true; } - if ((chan->flags & IEEE80211_CHAN_NO_IBSS) && - !(chan->orig_flags & IEEE80211_CHAN_NO_IBSS)) { + if (chan->flags & IEEE80211_CHAN_NO_IBSS) { chan->flags &= ~IEEE80211_CHAN_NO_IBSS; channel_changed = true; } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e37829a49dc4..4e167a8e11be 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -30,7 +30,8 @@ int set_regdom(const struct ieee80211_regdomain *rd); * non-radar 5 GHz channels. * * Drivers do not need to call this, cfg80211 will do it for after a scan - * on a newly found BSS. + * on a newly found BSS. If you cannot make use of this feature you can + * set the wiphy->disable_beacon_hints to true. */ int regulatory_hint_found_beacon(struct wiphy *wiphy, struct ieee80211_channel *beacon_chan, -- cgit v1.2.3 From 7320700df1864b601cef5adbddce8654a0e3f78b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Mon, 3 Aug 2009 17:01:53 -0400 Subject: drm/radeon: add some new r7xx pci ids Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 7174818c2c13..9d4c00491547 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -257,9 +257,12 @@ {0x1002, 0x940F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R600|RADEON_NEW_MEMMAP}, \ {0x1002, 0x94A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x94A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x94B1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ {0x1002, 0x94B3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ {0x1002, 0x94B5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x94B9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV740|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9440, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9441, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9442, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV770|RADEON_NEW_MEMMAP}, \ @@ -288,6 +291,7 @@ {0x1002, 0x948F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9490, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9491, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9495, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9498, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ {0x1002, 0x949C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ {0x1002, 0x949E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV730|RADEON_NEW_MEMMAP}, \ @@ -325,6 +329,7 @@ {0x1002, 0x9552, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9553, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9555, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x9557, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV710|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9580, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9581, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x9583, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV630|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3 From 18eac1cc100fa2afd5f39085aae6b694e417734b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 3 Aug 2009 10:58:29 -0700 Subject: tty-ldisc: make refcount be atomic_t 'users' count This is pure preparation of changing the ldisc reference counting to be a true refcount that defines the lifetime of the ldisc. But this is a purely syntactic change for now to make the next steps easier. This patch should make no semantic changes at all. But I wanted to make the ldisc refcount be an atomic (I will be touching it without locks soon enough), and I wanted to rename it so that there isn't quite as much confusion between 'ldo->refcount' (ldisk operations refcount) and 'ld->refcount' (ldisc refcount itself) in the same file. So it's now an atomic 'ld->users' count. It still starts at zero, despite having a reference from 'tty->ldisc', but that will change once we turn it into a _real_ refcount. Signed-off-by: Linus Torvalds Tested-by: OGAWA Hirofumi Tested-by: Sergey Senozhatsky Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/char/tty_ldisc.c | 18 ++++++++---------- include/linux/tty_ldisc.h | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/char/tty_ldisc.c b/drivers/char/tty_ldisc.c index acd76b767d4c..fd175e60aad5 100644 --- a/drivers/char/tty_ldisc.c +++ b/drivers/char/tty_ldisc.c @@ -142,7 +142,7 @@ static struct tty_ldisc *tty_ldisc_try_get(int disc) /* lock it */ ldops->refcount++; ld->ops = ldops; - ld->refcount = 0; + atomic_set(&ld->users, 0); err = 0; } } @@ -206,7 +206,7 @@ static void tty_ldisc_put(struct tty_ldisc *ld) ldo->refcount--; module_put(ldo->owner); spin_unlock_irqrestore(&tty_ldisc_lock, flags); - WARN_ON(ld->refcount); + WARN_ON(atomic_read(&ld->users)); kfree(ld); } @@ -297,7 +297,7 @@ static int tty_ldisc_try(struct tty_struct *tty) spin_lock_irqsave(&tty_ldisc_lock, flags); ld = tty->ldisc; if (test_bit(TTY_LDISC, &tty->flags)) { - ld->refcount++; + atomic_inc(&ld->users); ret = 1; } spin_unlock_irqrestore(&tty_ldisc_lock, flags); @@ -324,7 +324,7 @@ struct tty_ldisc *tty_ldisc_ref_wait(struct tty_struct *tty) { /* wait_event is a macro */ wait_event(tty_ldisc_wait, tty_ldisc_try(tty)); - WARN_ON(tty->ldisc->refcount == 0); + WARN_ON(atomic_read(&tty->ldisc->users) == 0); return tty->ldisc; } EXPORT_SYMBOL_GPL(tty_ldisc_ref_wait); @@ -365,11 +365,9 @@ void tty_ldisc_deref(struct tty_ldisc *ld) BUG_ON(ld == NULL); spin_lock_irqsave(&tty_ldisc_lock, flags); - if (ld->refcount == 0) + if (atomic_read(&ld->users) == 0) printk(KERN_ERR "tty_ldisc_deref: no references.\n"); - else - ld->refcount--; - if (ld->refcount == 0) + else if (atomic_dec_and_test(&ld->users)) wake_up(&tty_ldisc_wait); spin_unlock_irqrestore(&tty_ldisc_lock, flags); } @@ -536,10 +534,10 @@ static int tty_ldisc_wait_idle(struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tty_ldisc_lock, flags); - while (tty->ldisc->refcount) { + while (atomic_read(&tty->ldisc->users)) { spin_unlock_irqrestore(&tty_ldisc_lock, flags); if (wait_event_timeout(tty_ldisc_wait, - tty->ldisc->refcount == 0, 5 * HZ) == 0) + atomic_read(&tty->ldisc->users) == 0, 5 * HZ) == 0) return -EBUSY; spin_lock_irqsave(&tty_ldisc_lock, flags); } diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index 40f38d896777..0c4ee9b88f85 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -144,7 +144,7 @@ struct tty_ldisc_ops { struct tty_ldisc { struct tty_ldisc_ops *ops; - int refcount; + atomic_t users; }; #define TTY_LDISC_MAGIC 0x5403 -- cgit v1.2.3 From 6502fbfaf81b09b3f474bb7b3796257e9450273e Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 4 Aug 2009 11:24:24 -0400 Subject: drm/radeon: Add support for RS880 chips These are new AMD IGP chips Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r600_cp.c | 22 +++++++++++++++------- drivers/gpu/drm/radeon/radeon_drv.h | 1 + include/drm/drm_pciids.h | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 146f3570af8e..20f17908b036 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -384,8 +384,9 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) DRM_INFO("Loading RV670 PFP Microcode\n"); for (i = 0; i < PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RV670_pfp_microcode[i]); - } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { - DRM_INFO("Loading RS780 CP Microcode\n"); + } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { + DRM_INFO("Loading RS780/RS880 CP Microcode\n"); for (i = 0; i < PM4_UCODE_SIZE; i++) { RADEON_WRITE(R600_CP_ME_RAM_DATA, RS780_cp_microcode[i][0]); @@ -396,7 +397,7 @@ static void r600_cp_load_microcode(drm_radeon_private_t *dev_priv) } RADEON_WRITE(R600_CP_PFP_UCODE_ADDR, 0); - DRM_INFO("Loading RS780 PFP Microcode\n"); + DRM_INFO("Loading RS780/RS880 PFP Microcode\n"); for (i = 0; i < PFP_UCODE_SIZE; i++) RADEON_WRITE(R600_CP_PFP_UCODE_DATA, RS780_pfp_microcode[i]); } @@ -783,6 +784,7 @@ static void r600_gfx_init(struct drm_device *dev, break; case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: dev_priv->r600_max_pipes = 1; dev_priv->r600_max_tile_pipes = 1; @@ -917,7 +919,8 @@ static void r600_gfx_init(struct drm_device *dev, ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV630) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) RADEON_WRITE(R600_DB_DEBUG, R600_PREZ_MUST_WAIT_FOR_POSTZ_DONE); else RADEON_WRITE(R600_DB_DEBUG, 0); @@ -935,7 +938,8 @@ static void r600_gfx_init(struct drm_device *dev, sq_ms_fifo_sizes = RADEON_READ(R600_SQ_MS_FIFO_SIZES); if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { sq_ms_fifo_sizes = (R600_CACHE_FIFO_SIZE(0xa) | R600_FETCH_FIFO_HIWATER(0xa) | R600_DONE_FIFO_HIWATER(0xe0) | @@ -978,7 +982,8 @@ static void r600_gfx_init(struct drm_device *dev, R600_NUM_ES_STACK_ENTRIES(0)); } else if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) { + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) { /* no vertex cache */ sq_config &= ~R600_VC_ENABLE; @@ -1035,7 +1040,8 @@ static void r600_gfx_init(struct drm_device *dev, if (((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV610) || ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV620) || - ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780)) + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS780) || + ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RS880)) RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_TC_ONLY)); else RADEON_WRITE(R600_VGT_CACHE_INVALIDATION, R600_CACHE_INVALIDATION(R600_VC_AND_TC)); @@ -1078,6 +1084,7 @@ static void r600_gfx_init(struct drm_device *dev, break; case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: gs_prim_buffer_depth = 32; break; @@ -1123,6 +1130,7 @@ static void r600_gfx_init(struct drm_device *dev, switch (dev_priv->flags & RADEON_FAMILY_MASK) { case CHIP_RV610: case CHIP_RS780: + case CHIP_RS880: case CHIP_RV620: tc_cntl = R600_TC_L2_SIZE(8); break; diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 127d0456f628..3933f8216a34 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -143,6 +143,7 @@ enum radeon_family { CHIP_RV635, CHIP_RV670, CHIP_RS780, + CHIP_RS880, CHIP_RV770, CHIP_RV730, CHIP_RV710, diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 9d4c00491547..853508499d20 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -370,6 +370,11 @@ {0x1002, 0x9614, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9615, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0x1002, 0x9616, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS780|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9710, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9711, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9712, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9713, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ + {0x1002, 0x9714, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS880|RADEON_NEW_MEMMAP|RADEON_IS_IGP}, \ {0, 0, 0} #define r128_PCI_IDS \ -- cgit v1.2.3 From 5116d8f6b977970ebefc1932c0f313163a6ec91f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 26 Jul 2009 17:10:01 +0300 Subject: KVM: fix ack not being delivered when msi present kvm_notify_acked_irq does not check irq type, so that it sometimes interprets msi vector as irq. As a result, ack notifiers are not called, which typially hangs the guest. The fix is to track and check irq type. Signed-off-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 1 + virt/kvm/irq_comm.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 16713dc672e4..3060bdc35ffe 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -110,6 +110,7 @@ struct kvm_memory_slot { struct kvm_kernel_irq_routing_entry { u32 gsi; + u32 type; int (*set)(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int level); union { diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index a8bd466d00cc..ddc17f0e2f35 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) unsigned gsi = pin; list_for_each_entry(e, &kvm->irq_routing, link) - if (e->irqchip.irqchip == irqchip && + if (e->type == KVM_IRQ_ROUTING_IRQCHIP && + e->irqchip.irqchip == irqchip && e->irqchip.pin == pin) { gsi = e->gsi; break; @@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e, int delta; e->gsi = ue->gsi; + e->type = ue->type; switch (ue->type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; -- cgit v1.2.3 From af6af30c0fcd77e621638e53ef8b176bca8bd3b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 5 Aug 2009 20:41:04 +0200 Subject: ftrace: Fix perf-tracepoint OOPS Not all tracepoints are created equal, in specific the ftrace tracepoints are created with TRACE_EVENT_FORMAT() which does not generate the needed bits to tie them into perf counters. For those events, don't create the 'id' file and fail ->profile_enable when their ID is specified through other means. Reported-by: Chris Mason Signed-off-by: Peter Zijlstra Cc: Steven Rostedt LKML-Reference: <1249497664.5890.4.camel@laptop> [ v2: fix build error in the !CONFIG_EVENT_PROFILE case ] Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 8 +++----- kernel/trace/trace_event_profile.c | 2 +- kernel/trace/trace_events.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 5c093ffc655b..d7cd193c2277 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -119,11 +119,9 @@ struct ftrace_event_call { void *filter; void *mod; -#ifdef CONFIG_EVENT_PROFILE - atomic_t profile_count; - int (*profile_enable)(struct ftrace_event_call *); - void (*profile_disable)(struct ftrace_event_call *); -#endif + atomic_t profile_count; + int (*profile_enable)(struct ftrace_event_call *); + void (*profile_disable)(struct ftrace_event_call *); }; #define MAX_FILTER_PRED 32 diff --git a/kernel/trace/trace_event_profile.c b/kernel/trace/trace_event_profile.c index 5b5895afecfe..11ba5bb4ed0a 100644 --- a/kernel/trace/trace_event_profile.c +++ b/kernel/trace/trace_event_profile.c @@ -14,7 +14,7 @@ int ftrace_profile_enable(int event_id) mutex_lock(&event_mutex); list_for_each_entry(event, &ftrace_events, list) { - if (event->id == event_id) { + if (event->id == event_id && event->profile_enable) { ret = event->profile_enable(event); break; } diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 23d2972b22d6..e75276a49cf5 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -940,7 +940,7 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events, entry = trace_create_file("enable", 0644, call->dir, call, enable); - if (call->id) + if (call->id && call->profile_enable) entry = trace_create_file("id", 0444, call->dir, call, id); -- cgit v1.2.3 From d82f1c35348cebe2fb2d4a4d31ce0ab0769e3d93 Mon Sep 17 00:00:00 2001 From: Eric Miao Date: Wed, 5 Aug 2009 01:24:41 -0700 Subject: Input: matrix_keypad - make matrix keymap size dynamic Remove assumption on the shift and size of rows/columns form matrix_keypad driver. Signed-off-by: Eric Miao Signed-off-by: Dmitry Torokhov --- drivers/input/keyboard/matrix_keypad.c | 18 +++++++++--------- include/linux/input/matrix_keypad.h | 13 +++++++------ 2 files changed, 16 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/input/keyboard/matrix_keypad.c b/drivers/input/keyboard/matrix_keypad.c index e9b2e7cb05be..541b981ff075 100644 --- a/drivers/input/keyboard/matrix_keypad.c +++ b/drivers/input/keyboard/matrix_keypad.c @@ -27,6 +27,7 @@ struct matrix_keypad { const struct matrix_keypad_platform_data *pdata; struct input_dev *input_dev; unsigned short *keycodes; + unsigned int row_shift; uint32_t last_key_state[MATRIX_MAX_COLS]; struct delayed_work work; @@ -136,7 +137,7 @@ static void matrix_keypad_scan(struct work_struct *work) if ((bits_changed & (1 << row)) == 0) continue; - code = (row << 4) + col; + code = MATRIX_SCAN_CODE(row, col, keypad->row_shift); input_event(input_dev, EV_MSC, MSC_SCAN, code); input_report_key(input_dev, keypad->keycodes[code], @@ -317,6 +318,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) struct matrix_keypad *keypad; struct input_dev *input_dev; unsigned short *keycodes; + unsigned int row_shift; int i; int err; @@ -332,14 +334,11 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) return -EINVAL; } - if (!keymap_data->max_keymap_size) { - dev_err(&pdev->dev, "invalid keymap data supplied\n"); - return -EINVAL; - } + row_shift = get_count_order(pdata->num_col_gpios); keypad = kzalloc(sizeof(struct matrix_keypad), GFP_KERNEL); - keycodes = kzalloc(keymap_data->max_keymap_size * - sizeof(keypad->keycodes), + keycodes = kzalloc((pdata->num_row_gpios << row_shift) * + sizeof(*keycodes), GFP_KERNEL); input_dev = input_allocate_device(); if (!keypad || !keycodes || !input_dev) { @@ -350,6 +349,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) keypad->input_dev = input_dev; keypad->pdata = pdata; keypad->keycodes = keycodes; + keypad->row_shift = row_shift; keypad->stopped = true; INIT_DELAYED_WORK(&keypad->work, matrix_keypad_scan); spin_lock_init(&keypad->lock); @@ -363,7 +363,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) input_dev->keycode = keycodes; input_dev->keycodesize = sizeof(*keycodes); - input_dev->keycodemax = keymap_data->max_keymap_size; + input_dev->keycodemax = pdata->num_row_gpios << keypad->row_shift; for (i = 0; i < keymap_data->keymap_size; i++) { unsigned int key = keymap_data->keymap[i]; @@ -371,7 +371,7 @@ static int __devinit matrix_keypad_probe(struct platform_device *pdev) unsigned int col = KEY_COL(key); unsigned short code = KEY_VAL(key); - keycodes[(row << 4) + col] = code; + keycodes[MATRIX_SCAN_CODE(row, col, row_shift)] = code; __set_bit(code, input_dev->keybit); } __clear_bit(KEY_RESERVED, input_dev->keybit); diff --git a/include/linux/input/matrix_keypad.h b/include/linux/input/matrix_keypad.h index 7964516c6954..15d5903af2dd 100644 --- a/include/linux/input/matrix_keypad.h +++ b/include/linux/input/matrix_keypad.h @@ -15,12 +15,13 @@ #define KEY_COL(k) (((k) >> 16) & 0xff) #define KEY_VAL(k) ((k) & 0xffff) +#define MATRIX_SCAN_CODE(row, col, row_shift) (((row) << (row_shift)) + (col)) + /** * struct matrix_keymap_data - keymap for matrix keyboards * @keymap: pointer to array of uint32 values encoded with KEY() macro * representing keymap * @keymap_size: number of entries (initialized) in this keymap - * @max_keymap_size: maximum size of keymap supported by the device * * This structure is supposed to be used by platform code to supply * keymaps to drivers that implement matrix-like keypads/keyboards. @@ -28,14 +29,13 @@ struct matrix_keymap_data { const uint32_t *keymap; unsigned int keymap_size; - unsigned int max_keymap_size; }; /** * struct matrix_keypad_platform_data - platform-dependent keypad data * @keymap_data: pointer to &matrix_keymap_data - * @row_gpios: array of gpio numbers reporesenting rows - * @col_gpios: array of gpio numbers reporesenting colums + * @row_gpios: pointer to array of gpio numbers representing rows + * @col_gpios: pointer to array of gpio numbers reporesenting colums * @num_row_gpios: actual number of row gpios used by device * @num_col_gpios: actual number of col gpios used by device * @col_scan_delay_us: delay, measured in microseconds, that is @@ -48,8 +48,9 @@ struct matrix_keymap_data { struct matrix_keypad_platform_data { const struct matrix_keymap_data *keymap_data; - unsigned int row_gpios[MATRIX_MAX_ROWS]; - unsigned int col_gpios[MATRIX_MAX_COLS]; + const unsigned int *row_gpios; + const unsigned int *col_gpios; + unsigned int num_row_gpios; unsigned int num_col_gpios; -- cgit v1.2.3 From 54e346215e4fe2ca8c94c54e546cc61902060510 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Aug 2009 14:38:25 -0300 Subject: vfs: fix inode_init_always calling convention Currently inode_init_always calls into ->destroy_inode if the additional initialization fails. That's not only counter-intuitive because inode_init_always did not allocate the inode structure, but in case of XFS it's actively harmful as ->destroy_inode might delete the inode from a radix-tree that has never been added. This in turn might end up deleting the inode for the same inum that has been instanciated by another process and cause lots of cause subtile problems. Also in the case of re-initializing a reclaimable inode in XFS it would free an inode we still want to keep alive. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen --- fs/inode.c | 30 +++++++++++++++++------------- fs/xfs/xfs_iget.c | 17 +++++------------ include/linux/fs.h | 2 +- 3 files changed, 23 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index 901bad1e5f12..af2c05235cc8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -120,12 +120,11 @@ static void wake_up_inode(struct inode *inode) * These are initializations that need to be done on every inode * allocation as the fields are not initialised by slab allocation. */ -struct inode *inode_init_always(struct super_block *sb, struct inode *inode) +int inode_init_always(struct super_block *sb, struct inode *inode) { static const struct address_space_operations empty_aops; static struct inode_operations empty_iops; static const struct file_operations empty_fops; - struct address_space *const mapping = &inode->i_data; inode->i_sb = sb; @@ -152,7 +151,7 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->dirtied_when = 0; if (security_inode_alloc(inode)) - goto out_free_inode; + goto out; /* allocate and initialize an i_integrity */ if (ima_inode_alloc(inode)) @@ -198,16 +197,12 @@ struct inode *inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif - return inode; + return 0; out_free_security: security_inode_free(inode); -out_free_inode: - if (inode->i_sb->s_op->destroy_inode) - inode->i_sb->s_op->destroy_inode(inode); - else - kmem_cache_free(inode_cachep, (inode)); - return NULL; +out: + return -ENOMEM; } EXPORT_SYMBOL(inode_init_always); @@ -220,9 +215,18 @@ static struct inode *alloc_inode(struct super_block *sb) else inode = kmem_cache_alloc(inode_cachep, GFP_KERNEL); - if (inode) - return inode_init_always(sb, inode); - return NULL; + if (!inode) + return NULL; + + if (unlikely(inode_init_always(sb, inode))) { + if (inode->i_sb->s_op->destroy_inode) + inode->i_sb->s_op->destroy_inode(inode); + else + kmem_cache_free(inode_cachep, inode); + return NULL; + } + + return inode; } void destroy_inode(struct inode *inode) diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index 5fcec6f020a7..719c85b155f4 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -64,6 +64,10 @@ xfs_inode_alloc( ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP); if (!ip) return NULL; + if (inode_init_always(mp->m_super, VFS_I(ip))) { + kmem_zone_free(xfs_inode_zone, ip); + return NULL; + } ASSERT(atomic_read(&ip->i_iocount) == 0); ASSERT(atomic_read(&ip->i_pincount) == 0); @@ -105,17 +109,6 @@ xfs_inode_alloc( #ifdef XFS_DIR2_TRACE ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif - /* - * Now initialise the VFS inode. We do this after the xfs_inode - * initialisation as internal failures will result in ->destroy_inode - * being called and that will pass down through the reclaim path and - * free the XFS inode. This path requires the XFS inode to already be - * initialised. Hence if this call fails, the xfs_inode has already - * been freed and we should not reference it at all in the error - * handling. - */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) - return NULL; /* prevent anyone from using this yet */ VFS_I(ip)->i_state = I_NEW|I_LOCK; @@ -167,7 +160,7 @@ xfs_iget_cache_hit( * errors cleanly, then tag it so it can be set up correctly * later. */ - if (!inode_init_always(mp->m_super, VFS_I(ip))) { + if (inode_init_always(mp->m_super, VFS_I(ip))) { error = ENOMEM; goto out_error; } diff --git a/include/linux/fs.h b/include/linux/fs.h index a36ffa5a77a4..0c3b5e58a986 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2137,7 +2137,7 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); -extern struct inode * inode_init_always(struct super_block *, struct inode *); +extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); extern void inode_add_to_lists(struct super_block *, struct inode *); extern void iput(struct inode *); -- cgit v1.2.3 From 2e00c97e2c1d2ffc9e26252ca26b237678b0b772 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Aug 2009 14:38:29 -0300 Subject: vfs: add __destroy_inode When we want to tear down an inode that lost the add to the cache race in XFS we must not call into ->destroy_inode because that would delete the inode that won the race from the inode cache radix tree. This patch provides the __destroy_inode helper needed to fix this, the actual fix will be in th next patch. As XFS was the only reason destroy_inode was exported we shift the export to the new __destroy_inode. Signed-off-by: Christoph Hellwig Reviewed-by: Eric Sandeen --- fs/inode.c | 10 +++++++--- include/linux/fs.h | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/inode.c b/fs/inode.c index af2c05235cc8..ae7b67e48661 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -229,7 +229,7 @@ static struct inode *alloc_inode(struct super_block *sb) return inode; } -void destroy_inode(struct inode *inode) +void __destroy_inode(struct inode *inode) { BUG_ON(inode_has_buffers(inode)); ima_inode_free(inode); @@ -241,13 +241,17 @@ void destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif +} +EXPORT_SYMBOL(__destroy_inode); + +void destroy_inode(struct inode *inode) +{ + __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); else kmem_cache_free(inode_cachep, (inode)); } -EXPORT_SYMBOL(destroy_inode); - /* * These are initializations that only need to be done diff --git a/include/linux/fs.h b/include/linux/fs.h index 0c3b5e58a986..67888a9e0655 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2164,6 +2164,7 @@ extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); extern void clear_inode(struct inode *); extern void destroy_inode(struct inode *); +extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); extern int should_remove_suid(struct dentry *); extern int file_remove_suid(struct file *); -- cgit v1.2.3 From 4bfc44958e499af9a73f62201543b3a1f617cfeb Mon Sep 17 00:00:00 2001 From: KAMEZAWA Hiroyuki Date: Thu, 6 Aug 2009 15:07:33 -0700 Subject: mm: make set_mempolicy(MPOL_INTERLEAV) N_HIGH_MEMORY aware At first, init_task's mems_allowed is initialized as this. init_task->mems_allowed == node_state[N_POSSIBLE] And cpuset's top_cpuset mask is initialized as this top_cpuset->mems_allowed = node_state[N_HIGH_MEMORY] Before 2.6.29: policy's mems_allowed is initialized as this. 1. update tasks->mems_allowed by its cpuset->mems_allowed. 2. policy->mems_allowed = nodes_and(tasks->mems_allowed, user's mask) Updating task's mems_allowed in reference to top_cpuset's one. cpuset's mems_allowed is aware of N_HIGH_MEMORY, always. In 2.6.30: After commit 58568d2a8215cb6f55caf2332017d7bdff954e1c ("cpuset,mm: update tasks' mems_allowed in time"), policy's mems_allowed is initialized as this. 1. policy->mems_allowd = nodes_and(task->mems_allowed, user's mask) Here, if task is in top_cpuset, task->mems_allowed is not updated from init's one. Assume user excutes command as #numactrl --interleave=all ,.... policy->mems_allowd = nodes_and(N_POSSIBLE, ALL_SET_MASK) Then, policy's mems_allowd can includes a possible node, which has no pgdat. MPOL's INTERLEAVE just scans nodemask of task->mems_allowd and access this directly. NODE_DATA(nid)->zonelist even if NODE_DATA(nid)==NULL Then, what's we need is making policy->mems_allowed be aware of N_HIGH_MEMORY. This patch does that. But to do so, extra nodemask will be on statck. Because I know cpumask has a new interface of CPUMASK_ALLOC(), I added it to node. This patch stands on old behavior. But I feel this fix itself is just a Band-Aid. But to do fundametal fix, we have to take care of memory hotplug and it takes time. (task->mems_allowd should be N_HIGH_MEMORY, I think.) mpol_set_nodemask() should be aware of N_HIGH_MEMORY and policy's nodemask should be includes only online nodes. In old behavior, this is guaranteed by frequent reference to cpuset's code. Now, most of them are removed and mempolicy has to check it by itself. To do check, a few nodemask_t will be used for calculating nodemask. But, size of nodemask_t can be big and it's not good to allocate them on stack. Now, cpumask_t has CPUMASK_ALLOC/FREE an easy code for get scratch area. NODEMASK_ALLOC/FREE shoudl be there. [akpm@linux-foundation.org: cleanups & tweaks] Tested-by: KOSAKI Motohiro Signed-off-by: KAMEZAWA Hiroyuki Cc: Miao Xie Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Christoph Lameter Cc: Paul Menage Cc: Nick Piggin Cc: Yasunori Goto Cc: Pekka Enberg Cc: David Rientjes Cc: Lee Schermerhorn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 28 ++++++++++++++++ mm/mempolicy.c | 84 +++++++++++++++++++++++++++++++++--------------- 2 files changed, 86 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 829b94b156f2..b359c4a9ec9e 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -82,6 +82,12 @@ * to generate slightly worse code. So use a simple one-line #define * for node_isset(), instead of wrapping an inline inside a macro, the * way we do the other calls. + * + * NODEMASK_SCRATCH + * When doing above logical AND, OR, XOR, Remap operations the callers tend to + * need temporary nodemask_t's on the stack. But if NODES_SHIFT is large, + * nodemask_t's consume too much stack space. NODEMASK_SCRATCH is a helper + * for such situations. See below and CPUMASK_ALLOC also. */ #include @@ -473,4 +479,26 @@ static inline int num_node_state(enum node_states state) #define for_each_node(node) for_each_node_state(node, N_POSSIBLE) #define for_each_online_node(node) for_each_node_state(node, N_ONLINE) +/* + * For nodemask scrach area.(See CPUMASK_ALLOC() in cpumask.h) + */ + +#if NODES_SHIFT > 8 /* nodemask_t > 64 bytes */ +#define NODEMASK_ALLOC(x, m) struct x *m = kmalloc(sizeof(*m), GFP_KERNEL) +#define NODEMASK_FREE(m) kfree(m) +#else +#define NODEMASK_ALLOC(x, m) struct x _m, *m = &_m +#define NODEMASK_FREE(m) +#endif + +/* A example struture for using NODEMASK_ALLOC, used in mempolicy. */ +struct nodemask_scratch { + nodemask_t mask1; + nodemask_t mask2; +}; + +#define NODEMASK_SCRATCH(x) NODEMASK_ALLOC(nodemask_scratch, x) +#define NODEMASK_SCRATCH_FREE(x) NODEMASK_FREE(x) + + #endif /* __LINUX_NODEMASK_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e08e2c4da63a..7dd9d9f80694 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -191,25 +191,27 @@ static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) * Must be called holding task's alloc_lock to protect task's mems_allowed * and mempolicy. May also be called holding the mmap_semaphore for write. */ -static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) +static int mpol_set_nodemask(struct mempolicy *pol, + const nodemask_t *nodes, struct nodemask_scratch *nsc) { - nodemask_t cpuset_context_nmask; int ret; /* if mode is MPOL_DEFAULT, pol is NULL. This is right. */ if (pol == NULL) return 0; + /* Check N_HIGH_MEMORY */ + nodes_and(nsc->mask1, + cpuset_current_mems_allowed, node_states[N_HIGH_MEMORY]); VM_BUG_ON(!nodes); if (pol->mode == MPOL_PREFERRED && nodes_empty(*nodes)) nodes = NULL; /* explicit local allocation */ else { if (pol->flags & MPOL_F_RELATIVE_NODES) - mpol_relative_nodemask(&cpuset_context_nmask, nodes, - &cpuset_current_mems_allowed); + mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); else - nodes_and(cpuset_context_nmask, *nodes, - cpuset_current_mems_allowed); + nodes_and(nsc->mask2, *nodes, nsc->mask1); + if (mpol_store_user_nodemask(pol)) pol->w.user_nodemask = *nodes; else @@ -217,8 +219,10 @@ static int mpol_set_nodemask(struct mempolicy *pol, const nodemask_t *nodes) cpuset_current_mems_allowed; } - ret = mpol_ops[pol->mode].create(pol, - nodes ? &cpuset_context_nmask : NULL); + if (nodes) + ret = mpol_ops[pol->mode].create(pol, &nsc->mask2); + else + ret = mpol_ops[pol->mode].create(pol, NULL); return ret; } @@ -620,12 +624,17 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, { struct mempolicy *new, *old; struct mm_struct *mm = current->mm; + NODEMASK_SCRATCH(scratch); int ret; - new = mpol_new(mode, flags, nodes); - if (IS_ERR(new)) - return PTR_ERR(new); + if (!scratch) + return -ENOMEM; + new = mpol_new(mode, flags, nodes); + if (IS_ERR(new)) { + ret = PTR_ERR(new); + goto out; + } /* * prevent changing our mempolicy while show_numa_maps() * is using it. @@ -635,13 +644,13 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, if (mm) down_write(&mm->mmap_sem); task_lock(current); - ret = mpol_set_nodemask(new, nodes); + ret = mpol_set_nodemask(new, nodes, scratch); if (ret) { task_unlock(current); if (mm) up_write(&mm->mmap_sem); mpol_put(new); - return ret; + goto out; } old = current->mempolicy; current->mempolicy = new; @@ -654,7 +663,10 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, up_write(&mm->mmap_sem); mpol_put(old); - return 0; + ret = 0; +out: + NODEMASK_SCRATCH_FREE(scratch); + return ret; } /* @@ -1014,12 +1026,20 @@ static long do_mbind(unsigned long start, unsigned long len, if (err) return err; } - down_write(&mm->mmap_sem); - task_lock(current); - err = mpol_set_nodemask(new, nmask); - task_unlock(current); + { + NODEMASK_SCRATCH(scratch); + if (scratch) { + down_write(&mm->mmap_sem); + task_lock(current); + err = mpol_set_nodemask(new, nmask, scratch); + task_unlock(current); + if (err) + up_write(&mm->mmap_sem); + } else + err = -ENOMEM; + NODEMASK_SCRATCH_FREE(scratch); + } if (err) { - up_write(&mm->mmap_sem); mpol_put(new); return err; } @@ -1891,6 +1911,7 @@ restart: * Install non-NULL @mpol in inode's shared policy rb-tree. * On entry, the current task has a reference on a non-NULL @mpol. * This must be released on exit. + * This is called at get_inode() calls and we can use GFP_KERNEL. */ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { @@ -1902,19 +1923,24 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) if (mpol) { struct vm_area_struct pvma; struct mempolicy *new; + NODEMASK_SCRATCH(scratch); + if (!scratch) + return; /* contextualize the tmpfs mount point mempolicy */ new = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); if (IS_ERR(new)) { mpol_put(mpol); /* drop our ref on sb mpol */ + NODEMASK_SCRATCH_FREE(scratch); return; /* no valid nodemask intersection */ } task_lock(current); - ret = mpol_set_nodemask(new, &mpol->w.user_nodemask); + ret = mpol_set_nodemask(new, &mpol->w.user_nodemask, scratch); task_unlock(current); mpol_put(mpol); /* drop our ref on sb mpol */ if (ret) { + NODEMASK_SCRATCH_FREE(scratch); mpol_put(new); return; } @@ -1924,6 +1950,7 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) pvma.vm_end = TASK_SIZE; /* policy covers entire file */ mpol_set_shared_policy(sp, &pvma, new); /* adds ref */ mpol_put(new); /* drop initial ref */ + NODEMASK_SCRATCH_FREE(scratch); } } @@ -2140,13 +2167,18 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) err = 1; else { int ret; - - task_lock(current); - ret = mpol_set_nodemask(new, &nodes); - task_unlock(current); - if (ret) + NODEMASK_SCRATCH(scratch); + if (scratch) { + task_lock(current); + ret = mpol_set_nodemask(new, &nodes, scratch); + task_unlock(current); + } else + ret = -ENOMEM; + NODEMASK_SCRATCH_FREE(scratch); + if (ret) { err = 1; - else if (no_context) { + mpol_put(new); + } else if (no_context) { /* save for contextualization */ new->w.user_nodemask = nodes; } -- cgit v1.2.3 From daeb6b6fbe27049f465c48a7d0ee5555c3b84064 Mon Sep 17 00:00:00 2001 From: Phillip Lougher Date: Thu, 6 Aug 2009 15:09:30 -0700 Subject: bzip2/lzma/gzip: fix comments describing decompressor API Fix and improve comments in decompress/generic.h that describe the decompressor API. Also remove an unused definition, and rename INBUF_LEN in lib/decompress_inflate.c to conform to bzip2/lzma naming. Signed-off-by: Phillip Lougher Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/decompress/generic.h | 32 +++++++++++++++++++------------- lib/decompress_inflate.c | 8 ++++---- 2 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/decompress/generic.h b/include/linux/decompress/generic.h index 6dfb856327bb..0c7111a55a1a 100644 --- a/include/linux/decompress/generic.h +++ b/include/linux/decompress/generic.h @@ -1,31 +1,37 @@ #ifndef DECOMPRESS_GENERIC_H #define DECOMPRESS_GENERIC_H -/* Minimal chunksize to be read. - *Bzip2 prefers at least 4096 - *Lzma prefers 0x10000 */ -#define COMPR_IOBUF_SIZE 4096 - typedef int (*decompress_fn) (unsigned char *inbuf, int len, int(*fill)(void*, unsigned int), - int(*writebb)(void*, unsigned int), - unsigned char *output, + int(*flush)(void*, unsigned int), + unsigned char *outbuf, int *posp, void(*error)(char *x)); /* inbuf - input buffer *len - len of pre-read data in inbuf - *fill - function to fill inbuf if empty - *writebb - function to write out outbug + *fill - function to fill inbuf when empty + *flush - function to write out outbuf + *outbuf - output buffer *posp - if non-null, input position (number of bytes read) will be * returned here * - *If len != 0, the inbuf is initialized (with as much data), and fill - *should not be called - *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE - *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE + *If len != 0, inbuf should contain all the necessary input data, and fill + *should be NULL + *If len = 0, inbuf can be NULL, in which case the decompressor will allocate + *the input buffer. If inbuf != NULL it must be at least XXX_IOBUF_SIZE bytes. + *fill will be called (repeatedly...) to read data, at most XXX_IOBUF_SIZE + *bytes should be read per call. Replace XXX with the appropriate decompressor + *name, i.e. LZMA_IOBUF_SIZE. + * + *If flush = NULL, outbuf must be large enough to buffer all the expected + *output. If flush != NULL, the output buffer will be allocated by the + *decompressor (outbuf = NULL), and the flush function will be called to + *flush the output buffer at the appropriate time (decompressor and stream + *dependent). */ + /* Utility routine to detect the decompression method */ decompress_fn decompress_method(const unsigned char *inbuf, int len, const char **name); diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index e36b296fc9f8..bfe605ac534f 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -25,7 +25,7 @@ #include #include -#define INBUF_LEN (16*1024) +#define GZIP_IOBUF_SIZE (16*1024) /* Included from initramfs et al code */ STATIC int INIT gunzip(unsigned char *buf, int len, @@ -55,7 +55,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, if (buf) zbuf = buf; else { - zbuf = malloc(INBUF_LEN); + zbuf = malloc(GZIP_IOBUF_SIZE); len = 0; } if (!zbuf) { @@ -77,7 +77,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, } if (len == 0) - len = fill(zbuf, INBUF_LEN); + len = fill(zbuf, GZIP_IOBUF_SIZE); /* verify the gzip header */ if (len < 10 || @@ -113,7 +113,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, while (rc == Z_OK) { if (strm->avail_in == 0) { /* TODO: handle case where both pos and fill are set */ - len = fill(zbuf, INBUF_LEN); + len = fill(zbuf, GZIP_IOBUF_SIZE); if (len < 0) { rc = -1; error("read error"); -- cgit v1.2.3 From 3a6593050fbd8bbcaed3a44d01c31d907315c86c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 21 Jul 2009 17:34:57 +0200 Subject: perf_counter, ftrace: Fix perf_counter integration Adds possible second part to the assign argument of TP_EVENT(). TP_perf_assign( __perf_count(foo); __perf_addr(bar); ) Which, when specified make the swcounter increment with @foo instead of the usual 1, and report @bar for PERF_SAMPLE_ADDR (data address associated with the event) when this triggers a counter overflow. Signed-off-by: Peter Zijlstra Acked-by: Steven Rostedt Cc: Frederic Weisbecker Cc: Jason Baron Cc: Paul Mackerras Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 110 ++++++++++++++++++++++++++++++++++++++----------- kernel/perf_counter.c | 6 +-- 2 files changed, 88 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 1867553c61e5..fec71f8dbc48 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -144,6 +144,9 @@ #undef TP_fast_assign #define TP_fast_assign(args...) args +#undef TP_perf_assign +#define TP_perf_assign(args...) + #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ static int \ @@ -345,6 +348,88 @@ static inline int ftrace_get_offsets_##call( \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#ifdef CONFIG_EVENT_PROFILE + +/* + * Generate the functions needed for tracepoint perf_counter support. + * + * static void ftrace_profile_(proto) + * { + * extern void perf_tpcounter_event(int, u64, u64); + * u64 __addr = 0, __count = 1; + * + * <-- here we expand the TP_perf_assign() macro + * + * perf_tpcounter_event(event_.id, __addr, __count); + * } + * + * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) + * { + * int ret = 0; + * + * if (!atomic_inc_return(&event_call->profile_count)) + * ret = register_trace_(ftrace_profile_); + * + * return ret; + * } + * + * static void ftrace_profile_disable_(struct ftrace_event_call *event_call) + * { + * if (atomic_add_negative(-1, &event->call->profile_count)) + * unregister_trace_(ftrace_profile_); + * } + * + */ + +#undef TP_fast_assign +#define TP_fast_assign(args...) + +#undef TP_perf_assign +#define TP_perf_assign(args...) args + +#undef __perf_addr +#define __perf_addr(a) __addr = (a) + +#undef __perf_count +#define __perf_count(c) __count = (c) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ + \ +static void ftrace_profile_##call(proto) \ +{ \ + extern void perf_tpcounter_event(int, u64, u64); \ + u64 __addr = 0, __count = 1; \ + { assign; } \ + perf_tpcounter_event(event_##call.id, __addr, __count); \ +} \ + \ +static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +{ \ + int ret = 0; \ + \ + if (!atomic_inc_return(&event_call->profile_count)) \ + ret = register_trace_##call(ftrace_profile_##call); \ + \ + return ret; \ +} \ + \ +static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +{ \ + if (atomic_add_negative(-1, &event_call->profile_count)) \ + unregister_trace_##call(ftrace_profile_##call); \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) + +#undef TP_fast_assign +#define TP_fast_assign(args...) args + +#undef TP_perf_assign +#define TP_perf_assign(args...) + +#endif + /* * Stage 4 of the trace events. * @@ -447,28 +532,6 @@ static inline int ftrace_get_offsets_##call( \ #define TP_FMT(fmt, args...) fmt "\n", ##args #ifdef CONFIG_EVENT_PROFILE -#define _TRACE_PROFILE(call, proto, args) \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int); \ - perf_tpcounter_event(event_##call.id); \ -} \ - \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ -{ \ - int ret = 0; \ - \ - if (!atomic_inc_return(&event_call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ -} \ - \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ -{ \ - if (atomic_add_negative(-1, &event_call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ -} #define _TRACE_PROFILE_INIT(call) \ .profile_count = ATOMIC_INIT(-1), \ @@ -476,7 +539,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ .profile_disable = ftrace_profile_disable_##call, #else -#define _TRACE_PROFILE(call, proto, args) #define _TRACE_PROFILE_INIT(call) #endif @@ -502,7 +564,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \ \ static struct ftrace_event_call event_##call; \ \ @@ -586,6 +647,5 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef _TRACE_PROFILE #undef _TRACE_PROFILE_INIT diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 673c1aaf7332..52eb4b68d34f 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -3703,17 +3703,17 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id) +void perf_tpcounter_event(int event_id, u64 addr, u64 count) { struct perf_sample_data data = { .regs = get_irq_regs(), - .addr = 0, + .addr = addr, }; if (!data.regs) data.regs = task_pt_regs(current); - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); + do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data); } EXPORT_SYMBOL_GPL(perf_tpcounter_event); -- cgit v1.2.3 From f413cdb80ce00ec1a4d0ab949b5d96c81cae7f75 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 7 Aug 2009 01:25:54 +0200 Subject: perf_counter: Fix/complete ftrace event records sampling This patch implements the kernel side support for ftrace event record sampling. A new counter sampling attribute is added: PERF_SAMPLE_TP_RECORD which requests ftrace events record sampling. In this case if a PERF_TYPE_TRACEPOINT counter is active and a tracepoint fires, we emit the tracepoint binary record to the perfcounter event buffer, as a sample. Result, after setting PERF_SAMPLE_TP_RECORD attribute from perf record: perf record -f -F 1 -a -e workqueue:workqueue_execution perf report -D 0x21e18 [0x48]: event: 9 . . ... raw event: size 72 bytes . 0000: 09 00 00 00 01 00 48 00 d0 c7 00 81 ff ff ff ff ......H........ . 0010: 0a 00 00 00 0a 00 00 00 21 00 00 00 00 00 00 00 ........!...... . 0020: 2b 00 01 02 0a 00 00 00 0a 00 00 00 65 76 65 6e +...........eve . 0030: 74 73 2f 31 00 00 00 00 00 00 00 00 0a 00 00 00 ts/1........... . 0040: e0 b1 31 81 ff ff ff ff ....... . 0x21e18 [0x48]: PERF_EVENT_SAMPLE (IP, 1): 10: 0xffffffff8100c7d0 period: 33 The raw ftrace binary record starts at offset 0020. Translation: struct trace_entry { type = 0x2b = 43; flags = 1; preempt_count = 2; pid = 0xa = 10; tgid = 0xa = 10; } thread_comm = "events/1" thread_pid = 0xa = 10; func = 0xffffffff8131b1e0 = flush_to_ldisc() What will come next? - Userspace support ('perf trace'), 'flight data recorder' mode for perf trace, etc. - The unconditional copy from the profiling callback brings some costs however if someone wants no such sampling to occur, and needs to be fixed in the future. For that we need to have an instant access to the perf counter attribute. This is a matter of a flag to add in the struct ftrace_event. - Take care of the events recursivity! Don't ever try to record a lock event for example, it seems some locking is used in the profiling fast path and lead to a tracing recursivity. That will be fixed using raw spinlock or recursivity protection. - [...] - Profit! :-) Signed-off-by: Frederic Weisbecker Cc: Li Zefan Cc: Tom Zanussi Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Steven Rostedt Cc: Paul Mackerras Cc: Pekka Enberg Cc: Gabriel Munteanu Cc: Lai Jiangshan Signed-off-by: Ingo Molnar --- include/linux/ftrace_event.h | 4 +- include/linux/perf_counter.h | 9 ++- include/trace/ftrace.h | 130 ++++++++++++++++++++++++++++++++----------- kernel/perf_counter.c | 18 +++++- kernel/trace/trace.c | 1 + kernel/trace/trace.h | 4 -- tools/perf/builtin-record.c | 1 + 7 files changed, 126 insertions(+), 41 deletions(-) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index d7cd193c2277..a81170de7f6b 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -89,7 +89,9 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; - +void tracing_generic_entry_update(struct trace_entry *entry, + unsigned long flags, + int pc); struct ring_buffer_event * trace_current_buffer_lock_reserve(int type, unsigned long len, unsigned long flags, int pc); diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index e604e6ef72dd..a67dd5c5b6d3 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -121,8 +121,9 @@ enum perf_counter_sample_format { PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_TP_RECORD = 1U << 10, - PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ + PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ }; /* @@ -413,6 +414,11 @@ struct perf_callchain_entry { __u64 ip[PERF_MAX_STACK_DEPTH]; }; +struct perf_tracepoint_record { + int size; + char *record; +}; + struct task_struct; /** @@ -681,6 +687,7 @@ struct perf_sample_data { struct pt_regs *regs; u64 addr; u64 period; + void *private; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index fec71f8dbc48..7fb16d90e7b1 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -353,15 +353,7 @@ static inline int ftrace_get_offsets_##call( \ /* * Generate the functions needed for tracepoint perf_counter support. * - * static void ftrace_profile_(proto) - * { - * extern void perf_tpcounter_event(int, u64, u64); - * u64 __addr = 0, __count = 1; - * - * <-- here we expand the TP_perf_assign() macro - * - * perf_tpcounter_event(event_.id, __addr, __count); - * } + * NOTE: The insertion profile callback (ftrace_profile_) is defined later * * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) * { @@ -381,28 +373,10 @@ static inline int ftrace_get_offsets_##call( \ * */ -#undef TP_fast_assign -#define TP_fast_assign(args...) - -#undef TP_perf_assign -#define TP_perf_assign(args...) args - -#undef __perf_addr -#define __perf_addr(a) __addr = (a) - -#undef __perf_count -#define __perf_count(c) __count = (c) - #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ \ -static void ftrace_profile_##call(proto) \ -{ \ - extern void perf_tpcounter_event(int, u64, u64); \ - u64 __addr = 0, __count = 1; \ - { assign; } \ - perf_tpcounter_event(event_##call.id, __addr, __count); \ -} \ +static void ftrace_profile_##call(proto); \ \ static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ { \ @@ -422,12 +396,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TP_perf_assign -#define TP_perf_assign(args...) - #endif /* @@ -647,5 +615,99 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +/* + * Define the insertion callback to profile events + * + * The job is very similar to ftrace_raw_event_ except that we don't + * insert in the ring buffer but in a perf counter. + * + * static void ftrace_profile_(proto) + * { + * struct ftrace_data_offsets_ __maybe_unused __data_offsets; + * struct ftrace_event_call *event_call = &event_; + * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * struct ftrace_raw_##call *entry; + * u64 __addr = 0, __count = 1; + * unsigned long irq_flags; + * int __entry_size; + * int __data_size; + * int pc; + * + * local_save_flags(irq_flags); + * pc = preempt_count(); + * + * __data_size = ftrace_get_offsets_(&__data_offsets, args); + * __entry_size = __data_size + sizeof(*entry); + * + * do { + * char raw_data[__entry_size]; <- allocate our sample in the stack + * struct trace_entry *ent; + * + * entry = (struct ftrace_raw_ *)raw_data; + * ent = &entry->ent; + * tracing_generic_entry_update(ent, irq_flags, pc); + * ent->type = event_call->id; + * + * <- do some jobs with dynamic arrays + * + * <- affect our values + * + * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * __entry_size); <- submit them to perf counter + * } while (0); + * + * } + */ + +#ifdef CONFIG_EVENT_PROFILE + +#undef __perf_addr +#define __perf_addr(a) __addr = (a) + +#undef __perf_count +#define __perf_count(c) __count = (c) + +#undef TRACE_EVENT +#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ +static void ftrace_profile_##call(proto) \ +{ \ + struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ + struct ftrace_event_call *event_call = &event_##call; \ + extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + struct ftrace_raw_##call *entry; \ + u64 __addr = 0, __count = 1; \ + unsigned long irq_flags; \ + int __entry_size; \ + int __data_size; \ + int pc; \ + \ + local_save_flags(irq_flags); \ + pc = preempt_count(); \ + \ + __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ + __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ + \ + do { \ + char raw_data[__entry_size]; \ + struct trace_entry *ent; \ + \ + entry = (struct ftrace_raw_##call *)raw_data; \ + ent = &entry->ent; \ + tracing_generic_entry_update(ent, irq_flags, pc); \ + ent->type = event_call->id; \ + \ + tstruct \ + \ + { assign; } \ + \ + perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + __entry_size); \ + } while (0); \ + \ +} + +#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) +#endif /* CONFIG_EVENT_PROFILE */ + #undef _TRACE_PROFILE_INIT diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 52eb4b68d34f..868102172aa4 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2646,6 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u64 counter; } group_entry; struct perf_callchain_entry *callchain = NULL; + struct perf_tracepoint_record *tp; int callchain_size = 0; u64 time; struct { @@ -2714,6 +2715,11 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, header.size += sizeof(u64); } + if (sample_type & PERF_SAMPLE_TP_RECORD) { + tp = data->private; + header.size += tp->size; + } + ret = perf_output_begin(&handle, counter, header.size, nmi, 1); if (ret) return; @@ -2777,6 +2783,9 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } + if (sample_type & PERF_SAMPLE_TP_RECORD) + perf_output_copy(&handle, tp->record, tp->size); + perf_output_end(&handle); } @@ -3703,11 +3712,18 @@ static const struct pmu perf_ops_task_clock = { }; #ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id, u64 addr, u64 count) +void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, + int entry_size) { + struct perf_tracepoint_record tp = { + .size = entry_size, + .record = record, + }; + struct perf_sample_data data = { .regs = get_irq_regs(), .addr = addr, + .private = &tp, }; if (!data.regs) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8930e39b9d8c..c22b40f8f576 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); } +EXPORT_SYMBOL_GPL(tracing_generic_entry_update); struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, int type, diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 3548ae5cc780..8b9f4f6e9559 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr, struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts); -void tracing_generic_entry_update(struct trace_entry *entry, - unsigned long flags, - int pc); - void default_wait_pipe(struct trace_iterator *iter); void poll_wait_pipe(struct trace_iterator *iter); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6da09928130f..90c98082af10 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -412,6 +412,7 @@ static void create_counter(int counter, int cpu, pid_t pid) if (call_graph) attr->sample_type |= PERF_SAMPLE_CALLCHAIN; + attr->mmap = track; attr->comm = track; attr->inherit = (cpu < 0) && inherit; -- cgit v1.2.3 From 3a43ce68ae1758fa6a839386025ef45acb6baa22 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 8 Aug 2009 04:26:37 +0200 Subject: perf_counter: Fix tracepoint sampling to be part of generic sampling Based on Peter's comments, make tracepoint sampling generic just like all the other sampling bits are. This is a rename with no code changes: - PERF_SAMPLE_TP_RECORD to PERF_SAMPLE_RAW - struct perf_tracepoint_record to perf_raw_record We want the system in place that transport tracepoints raw samples events into the perf ring buffer to be generalized and usable by any type of counter. Reported-by; Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1249698400-5441-4-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 10 +++++----- kernel/perf_counter.c | 20 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a67dd5c5b6d3..2aabe43c1d04 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -121,7 +121,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_CPU = 1U << 7, PERF_SAMPLE_PERIOD = 1U << 8, PERF_SAMPLE_STREAM_ID = 1U << 9, - PERF_SAMPLE_TP_RECORD = 1U << 10, + PERF_SAMPLE_RAW = 1U << 10, PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */ }; @@ -414,9 +414,9 @@ struct perf_callchain_entry { __u64 ip[PERF_MAX_STACK_DEPTH]; }; -struct perf_tracepoint_record { - int size; - char *record; +struct perf_raw_record { + u32 size; + void *data; }; struct task_struct; @@ -687,7 +687,7 @@ struct perf_sample_data { struct pt_regs *regs; u64 addr; u64 period; - void *private; + struct perf_raw_record *raw; }; extern int perf_counter_overflow(struct perf_counter *counter, int nmi, diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 117622cb73a3..002310540417 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2646,7 +2646,7 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u64 counter; } group_entry; struct perf_callchain_entry *callchain = NULL; - struct perf_tracepoint_record *tp = NULL; + struct perf_raw_record *raw = NULL; int callchain_size = 0; u64 time; struct { @@ -2715,10 +2715,10 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, header.size += sizeof(u64); } - if (sample_type & PERF_SAMPLE_TP_RECORD) { - tp = data->private; - if (tp) - header.size += tp->size; + if (sample_type & PERF_SAMPLE_RAW) { + raw = data->raw; + if (raw) + header.size += raw->size; } ret = perf_output_begin(&handle, counter, header.size, nmi, 1); @@ -2784,8 +2784,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } - if ((sample_type & PERF_SAMPLE_TP_RECORD) && tp) - perf_output_copy(&handle, tp->record, tp->size); + if ((sample_type & PERF_SAMPLE_RAW) && raw) + perf_output_copy(&handle, raw->data, raw->size); perf_output_end(&handle); } @@ -3740,15 +3740,15 @@ static const struct pmu perf_ops_task_clock = { void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record, int entry_size) { - struct perf_tracepoint_record tp = { + struct perf_raw_record raw = { .size = entry_size, - .record = record, + .data = record, }; struct perf_sample_data data = { .regs = get_irq_regs(), .addr = addr, - .private = &tp, + .raw = &raw, }; if (!data.regs) -- cgit v1.2.3 From a044560c3a1f0ad75ce685c1ed7604820b9ed319 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 10 Aug 2009 11:16:52 +0200 Subject: perf_counter: Correct PERF_SAMPLE_RAW output PERF_SAMPLE_* output switches should unconditionally output the correct format, as they are the only way to unambiguously parse the PERF_EVENT_SAMPLE data. Signed-off-by: Peter Zijlstra Acked-by: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <1249896447.17467.74.camel@twins> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ include/trace/ftrace.h | 3 ++- kernel/perf_counter.c | 30 ++++++++++++++++++++++++------ 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2aabe43c1d04..a9d823a93fe8 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -369,6 +369,8 @@ enum perf_event_type { * * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW * }; */ PERF_EVENT_SAMPLE = 9, diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 7fb16d90e7b1..7167b9b97da2 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -685,7 +685,8 @@ static void ftrace_profile_##call(proto) \ pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ - __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ + __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ + sizeof(u64)); \ \ do { \ char raw_data[__entry_size]; \ diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 546e62d62941..5229d1666fa5 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -2646,7 +2646,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, u64 counter; } group_entry; struct perf_callchain_entry *callchain = NULL; - struct perf_raw_record *raw = NULL; int callchain_size = 0; u64 time; struct { @@ -2716,9 +2715,15 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } if (sample_type & PERF_SAMPLE_RAW) { - raw = data->raw; - if (raw) - header.size += raw->size; + int size = sizeof(u32); + + if (data->raw) + size += data->raw->size; + else + size += sizeof(u32); + + WARN_ON_ONCE(size & (sizeof(u64)-1)); + header.size += size; } ret = perf_output_begin(&handle, counter, header.size, nmi, 1); @@ -2784,8 +2789,21 @@ static void perf_counter_output(struct perf_counter *counter, int nmi, } } - if ((sample_type & PERF_SAMPLE_RAW) && raw) - perf_output_copy(&handle, raw->data, raw->size); + if (sample_type & PERF_SAMPLE_RAW) { + if (data->raw) { + perf_output_put(&handle, data->raw->size); + perf_output_copy(&handle, data->raw->data, data->raw->size); + } else { + struct { + u32 size; + u32 data; + } raw = { + .size = sizeof(u32), + .data = 0, + }; + perf_output_put(&handle, raw); + } + } perf_output_end(&handle); } -- cgit v1.2.3 From 2fc391112fb6f3424435a3aa2fda887497b5f807 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 10 Aug 2009 12:33:05 +0100 Subject: locking, sched: Give waitqueue spinlocks their own lockdep classes Give waitqueue spinlocks their own lockdep classes when they are initialised from init_waitqueue_head(). This means that struct wait_queue::func functions can operate other waitqueues. This is used by CacheFiles to catch the page from a backing fs being unlocked and to wake up another thread to take a copy of it. Signed-off-by: Peter Zijlstra Signed-off-by: David Howells Tested-by: Takashi Iwai Cc: linux-cachefs@redhat.com Cc: torvalds@osdl.org Cc: akpm@linux-foundation.org LKML-Reference: <20090810113305.17284.81508.stgit@warthog.procyon.org.uk> Signed-off-by: Ingo Molnar --- include/linux/wait.h | 9 ++++++++- kernel/wait.c | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 6788e1a4d4ca..cf3c2f5dba51 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -77,7 +77,14 @@ struct task_struct; #define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ { .flags = word, .bit_nr = bit, } -extern void init_waitqueue_head(wait_queue_head_t *q); +extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *); + +#define init_waitqueue_head(q) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_waitqueue_head((q), &__key); \ + } while (0) #ifdef CONFIG_LOCKDEP # define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ diff --git a/kernel/wait.c b/kernel/wait.c index ea7c3b4275cf..c4bd3d825f35 100644 --- a/kernel/wait.c +++ b/kernel/wait.c @@ -10,13 +10,14 @@ #include #include -void init_waitqueue_head(wait_queue_head_t *q) +void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key) { spin_lock_init(&q->lock); + lockdep_set_class(&q->lock, key); INIT_LIST_HEAD(&q->task_list); } -EXPORT_SYMBOL(init_waitqueue_head); +EXPORT_SYMBOL(__init_waitqueue_head); void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { -- cgit v1.2.3 From 304703aba31a87903b8c0db8f5e6890cac2d596d Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 10 Aug 2009 16:11:32 +0200 Subject: perf_counter: Subtract the buffer size field from the event record size We compute the perf raw sample size by aligning the raw ftrace event size plus the buffer size field itself. We do that instead of aligning only the perf raw sample size, so that we might economize some in some cases. But this buffer size field is not stored in the perf raw sample, we must then substract its size from the buffer once we computed the alignment unless we may get a useless u32 field in the buffer. Signed-off-by: Frederic Weisbecker Acked-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith Cc: Paul Mackerras LKML-Reference: <20090810141129.GA5124@nowhere> Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 7167b9b97da2..a05524fa245d 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -637,7 +637,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * pc = preempt_count(); * * __data_size = ftrace_get_offsets_(&__data_offsets, args); - * __entry_size = __data_size + sizeof(*entry); + * + * // Below we want to get the aligned size by taking into account + * // the u32 field that will later store the buffer size + * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32), + * sizeof(u64)); + * __entry_size -= sizeof(u32); * * do { * char raw_data[__entry_size]; <- allocate our sample in the stack @@ -687,6 +692,7 @@ static void ftrace_profile_##call(proto) \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\ sizeof(u64)); \ + __entry_size -= sizeof(u32); \ \ do { \ char raw_data[__entry_size]; \ -- cgit v1.2.3 From 1853db0e02ae4088f102b0d8e59e83dc98f93f03 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 10 Aug 2009 16:38:36 +0200 Subject: perf_counter: Zero dead bytes from ftrace raw samples size alignment After aligning the ftrace raw samples, there are dead bytes storing random data from the stack. We don't want to leak these to userspace, then zero these out. Before: 0x2de88 [0x50]: event: 9 . . ... raw event: size 80 bytes . 0000: 09 00 00 00 01 00 50 00 d0 c7 00 81 ff ff ff ff ......P........ . 0010: 68 01 00 00 68 01 00 00 2c 00 00 00 00 00 00 00 h...h...,...... . 0020: 2c 00 00 00 2b 00 01 02 68 01 00 00 68 01 00 00 ,...+...h...h.. . 0030: 6b 6f 6e 64 65 6d 61 6e 64 2f 30 00 00 00 00 00 kondemand/0.... . 0040: 68 01 00 00 40 7f 46 81 ff ff ff ff 00 10 1b 7f h...@.F........ ^ ^ ^ ^ Leak After: 0x2d318 [0x50]: event: 9 . . ... raw event: size 80 bytes . 0000: 09 00 00 00 01 00 50 00 d0 c7 00 81 ff ff ff ff ......P........ . 0010: 68 01 00 00 68 01 00 00 68 14 00 00 00 00 00 00 h...h...h...... . 0020: 2c 00 00 00 2b 00 01 02 68 01 00 00 68 01 00 00 ,...+...h...h.. . 0030: 6b 6f 6e 64 65 6d 61 6e 64 2f 30 00 00 00 00 00 kondemand/0.... . 0040: 68 01 00 00 a0 80 46 81 ff ff ff ff 00 00 00 00 h.....F........ ^ ^ ^ ^ Fixed Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith LKML-Reference: <1249915116-5210-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Mike Galbraith --- include/trace/ftrace.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a05524fa245d..f64fbaae781a 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -648,6 +648,9 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * char raw_data[__entry_size]; <- allocate our sample in the stack * struct trace_entry *ent; * + * zero dead bytes from alignment to avoid stack leak to userspace: + * + * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; * entry = (struct ftrace_raw_ *)raw_data; * ent = &entry->ent; * tracing_generic_entry_update(ent, irq_flags, pc); @@ -698,6 +701,7 @@ static void ftrace_profile_##call(proto) \ char raw_data[__entry_size]; \ struct trace_entry *ent; \ \ + *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ ent = &entry->ent; \ tracing_generic_entry_update(ent, irq_flags, pc); \ -- cgit v1.2.3 From 1ae88b2e446261c038f2c0c3150ffae142b227a2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Aug 2009 09:12:30 -0400 Subject: NFS: Fix an O_DIRECT Oops... We can't call nfs_readdata_release()/nfs_writedata_release() without first initialising and referencing args.context. Doing so inside nfs_direct_read_schedule_segment()/nfs_direct_write_schedule_segment() causes an Oops. We should rather be calling nfs_readdata_free()/nfs_writedata_free() in those cases. Looking at the O_DIRECT code, the "struct nfs_direct_req" is already referencing the nfs_open_context for us. Since the readdata and writedata structures carry a reference to that, we can simplify things by getting rid of the extra nfs_open_context references, so that we can replace all instances of nfs_readdata_release()/nfs_writedata_release(). Reported-by: Catalin Marinas Signed-off-by: Trond Myklebust Tested-by: Catalin Marinas Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 20 ++++++++++---------- fs/nfs/read.c | 6 ++---- fs/nfs/write.c | 6 ++---- include/linux/nfs_fs.h | 5 ++--- 4 files changed, 16 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 489fc01a3204..e4e089a8f294 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata) if (put_dreq(dreq)) nfs_direct_complete(dreq); - nfs_readdata_release(calldata); + nfs_readdata_free(data); } static const struct rpc_call_ops nfs_read_direct_ops = { @@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->npages, 1, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_readdata_release(data); + nfs_readdata_free(data); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_readdata_release(data); + nfs_readdata_free(data); break; } bytes -= pgbase; @@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); + data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; @@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq) struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); list_del(&data->pages); nfs_direct_release_pages(data->pagevec, data->npages); - nfs_writedata_release(data); + nfs_writedata_free(data); } } @@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata) dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); nfs_direct_write_complete(dreq, data->inode); - nfs_commitdata_release(calldata); + nfs_commit_free(data); } static const struct rpc_call_ops nfs_commit_direct_ops = { @@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) data->args.fh = NFS_FH(data->inode); data->args.offset = 0; data->args.count = 0; - data->args.context = get_nfs_open_context(dreq->ctx); + data->args.context = dreq->ctx; data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->npages, 0, 0, data->pagevec, NULL); up_read(¤t->mm->mmap_sem); if (result < 0) { - nfs_writedata_release(data); + nfs_writedata_free(data); break; } if ((unsigned)result < data->npages) { bytes = result * PAGE_SIZE; if (bytes <= pgbase) { nfs_direct_release_pages(data->pagevec, result); - nfs_writedata_release(data); + nfs_writedata_free(data); break; } bytes -= pgbase; @@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq, data->inode = inode; data->cred = msg.rpc_cred; data->args.fh = NFS_FH(inode); - data->args.context = get_nfs_open_context(ctx); + data->args.context = ctx; data->args.offset = pos; data->args.pgbase = pgbase; data->args.pages = data->pagevec; diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 73ea5e8d66ce..12c9e66d3f1d 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) return p; } -static void nfs_readdata_free(struct nfs_read_data *p) +void nfs_readdata_free(struct nfs_read_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_rdata_mempool); } -void nfs_readdata_release(void *data) +static void nfs_readdata_release(struct nfs_read_data *rdata) { - struct nfs_read_data *rdata = data; - put_nfs_open_context(rdata->args.context); nfs_readdata_free(rdata); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 0a0a2ff767c3..a34fae21fe10 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) return p; } -static void nfs_writedata_free(struct nfs_write_data *p) +void nfs_writedata_free(struct nfs_write_data *p) { if (p && (p->pagevec != &p->page_array[0])) kfree(p->pagevec); mempool_free(p, nfs_wdata_mempool); } -void nfs_writedata_release(void *data) +static void nfs_writedata_release(struct nfs_write_data *wdata) { - struct nfs_write_data *wdata = data; - put_nfs_open_context(wdata->args.context); nfs_writedata_free(wdata); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index fdffb413b192..f6b90240dd41 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -473,7 +473,6 @@ extern int nfs_writepages(struct address_space *, struct writeback_control *); extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); -extern void nfs_writedata_release(void *); /* * Try to write back everything synchronously (but check the @@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_write_data *wdata); -extern void nfs_commitdata_release(void *wdata); #else static inline int nfs_commit_inode(struct inode *inode, int how) @@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode) * Allocate nfs_write_data structures */ extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); +extern void nfs_writedata_free(struct nfs_write_data *); /* * linux/fs/nfs/read.c @@ -515,7 +514,6 @@ extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); -extern void nfs_readdata_release(void *data); extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, struct page *); @@ -523,6 +521,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, * Allocate nfs_read_data structures */ extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); +extern void nfs_readdata_free(struct nfs_read_data *); /* * linux/fs/nfs3proc.c -- cgit v1.2.3 From 28402971d869e26271b25301011f667d3a5640c3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 13 Aug 2009 10:13:22 +0200 Subject: perf_counter: Provide hw_perf_counter_setup_online() APIs Provide weak aliases for hw_perf_counter_setup_online(). This is used by the BTS patches (for v2.6.32), but it interacts with fixes so propagate this upstream. (it has no effect as of yet) Also export perf_counter_output() to architecture code. Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 2 ++ kernel/perf_counter.c | 10 +++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index a9d823a93fe8..2b36afe6ae0f 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -694,6 +694,8 @@ struct perf_sample_data { extern int perf_counter_overflow(struct perf_counter *counter, int nmi, struct perf_sample_data *data); +extern void perf_counter_output(struct perf_counter *counter, int nmi, + struct perf_sample_data *data); /* * Return 1 for a software counter, 0 for a hardware counter diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index b0b20a07f394..e26d2fcfa320 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); } void __weak hw_perf_enable(void) { barrier(); } void __weak hw_perf_counter_setup(int cpu) { barrier(); } +void __weak hw_perf_counter_setup_online(int cpu) { barrier(); } int __weak hw_perf_group_sched_in(struct perf_counter *group_leader, @@ -2630,7 +2631,7 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } -static void perf_counter_output(struct perf_counter *counter, int nmi, +void perf_counter_output(struct perf_counter *counter, int nmi, struct perf_sample_data *data) { int ret; @@ -4592,6 +4593,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) perf_counter_init_cpu(cpu); break; + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + hw_perf_counter_setup_online(cpu); + break; + case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: perf_counter_exit_cpu(cpu); @@ -4616,6 +4622,8 @@ void __init perf_counter_init(void) { perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, (void *)(long)smp_processor_id()); + perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, + (void *)(long)smp_processor_id()); register_cpu_notifier(&perf_cpu_nb); } -- cgit v1.2.3 From 3dab77fb1bf89664bb1c9544607159dcab6f7d57 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 13 Aug 2009 11:47:53 +0200 Subject: perf: Rework/fix the whole read vs group stuff Replace PERF_SAMPLE_GROUP with PERF_SAMPLE_READ and introduce PERF_FORMAT_GROUP to deal with group reads in a more generic way. This allows you to get group reads out of read() as well. Signed-off-by: Peter Zijlstra Cc: Corey J Ashford Cc: Paul Mackerras Cc: stephane eranian LKML-Reference: <20090813103655.117411814@chello.nl> Signed-off-by: Ingo Molnar --- include/linux/perf_counter.h | 47 ++++++-- kernel/perf_counter.c | 274 +++++++++++++++++++++++++++++++------------ 2 files changed, 238 insertions(+), 83 deletions(-) (limited to 'include') diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h index 2b36afe6ae0f..b53f7006cc4e 100644 --- a/include/linux/perf_counter.h +++ b/include/linux/perf_counter.h @@ -115,7 +115,7 @@ enum perf_counter_sample_format { PERF_SAMPLE_TID = 1U << 1, PERF_SAMPLE_TIME = 1U << 2, PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_GROUP = 1U << 4, + PERF_SAMPLE_READ = 1U << 4, PERF_SAMPLE_CALLCHAIN = 1U << 5, PERF_SAMPLE_ID = 1U << 6, PERF_SAMPLE_CPU = 1U << 7, @@ -127,16 +127,32 @@ enum perf_counter_sample_format { }; /* - * Bits that can be set in attr.read_format to request that - * reads on the counter should return the indicated quantities, - * in increasing order of bit value, after the counter value. + * The format of the data returned by read() on a perf counter fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_ENABLED + * { u64 time_running; } && PERF_FORMAT_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; */ enum perf_counter_read_format { PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, - PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ }; #define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ @@ -343,10 +359,8 @@ enum perf_event_type { * struct { * struct perf_event_header header; * u32 pid, tid; - * u64 value; - * { u64 time_enabled; } && PERF_FORMAT_ENABLED - * { u64 time_running; } && PERF_FORMAT_RUNNING - * { u64 parent_id; } && PERF_FORMAT_ID + * + * struct read_format values; * }; */ PERF_EVENT_READ = 8, @@ -364,11 +378,22 @@ enum perf_event_type { * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 period; } && PERF_SAMPLE_PERIOD * - * { u64 nr; - * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP + * { struct read_format values; } && PERF_SAMPLE_READ * * { u64 nr, * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * * { u32 size; * char data[size];}&& PERF_SAMPLE_RAW * }; diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c index 3dd4339589a0..b8c6b97a20a3 100644 --- a/kernel/perf_counter.c +++ b/kernel/perf_counter.c @@ -1692,7 +1692,32 @@ static int perf_release(struct inode *inode, struct file *file) return 0; } -static u64 perf_counter_read_tree(struct perf_counter *counter) +static int perf_counter_read_size(struct perf_counter *counter) +{ + int entry = sizeof(u64); /* value */ + int size = 0; + int nr = 1; + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + size += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + size += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_ID) + entry += sizeof(u64); + + if (counter->attr.read_format & PERF_FORMAT_GROUP) { + nr += counter->group_leader->nr_siblings; + size += sizeof(u64); + } + + size += entry * nr; + + return size; +} + +static u64 perf_counter_read_value(struct perf_counter *counter) { struct perf_counter *child; u64 total = 0; @@ -1704,14 +1729,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter) return total; } +static int perf_counter_read_entry(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + int n = 0, count = 0; + u64 values[2]; + + values[n++] = perf_counter_read_value(counter); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; +} + +static int perf_counter_read_group(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + struct perf_counter *leader = counter->group_leader, *sub; + int n = 0, size = 0, err = -EFAULT; + u64 values[3]; + + values[n++] = 1 + leader->nr_siblings; + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } + + size = n * sizeof(u64); + + if (copy_to_user(buf, values, size)) + return -EFAULT; + + err = perf_counter_read_entry(leader, read_format, buf + size); + if (err < 0) + return err; + + size += err; + + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + err = perf_counter_read_entry(counter, read_format, + buf + size); + if (err < 0) + return err; + + size += err; + } + + return size; +} + +static int perf_counter_read_one(struct perf_counter *counter, + u64 read_format, char __user *buf) +{ + u64 values[4]; + int n = 0; + + values[n++] = perf_counter_read_value(counter); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + if (copy_to_user(buf, values, n * sizeof(u64))) + return -EFAULT; + + return n * sizeof(u64); +} + /* * Read the performance counter - simple non blocking version for now */ static ssize_t perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) { - u64 values[4]; - int n; + u64 read_format = counter->attr.read_format; + int ret; /* * Return end-of-file for a read on a counter that is in @@ -1721,28 +1828,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) if (counter->state == PERF_COUNTER_STATE_ERROR) return 0; + if (count < perf_counter_read_size(counter)) + return -ENOSPC; + WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->child_mutex); - values[0] = perf_counter_read_tree(counter); - n = 1; - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - if (counter->attr.read_format & PERF_FORMAT_ID) - values[n++] = primary_counter_id(counter); + if (read_format & PERF_FORMAT_GROUP) + ret = perf_counter_read_group(counter, read_format, buf); + else + ret = perf_counter_read_one(counter, read_format, buf); mutex_unlock(&counter->child_mutex); - if (count < n * sizeof(u64)) - return -EINVAL; - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; + return ret; } static ssize_t @@ -2631,6 +2728,79 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) return task_pid_nr_ns(p, counter->ns); } +static void perf_output_read_one(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + u64 read_format = counter->attr.read_format; + u64 values[4]; + int n = 0; + + values[n++] = atomic64_read(&counter->count); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = counter->total_time_enabled + + atomic64_read(&counter->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = counter->total_time_running + + atomic64_read(&counter->child_total_time_running); + } + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(counter); + + perf_output_copy(handle, values, n * sizeof(u64)); +} + +/* + * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult. + */ +static void perf_output_read_group(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + struct perf_counter *leader = counter->group_leader, *sub; + u64 read_format = counter->attr.read_format; + u64 values[5]; + int n = 0; + + values[n++] = 1 + leader->nr_siblings; + + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + values[n++] = leader->total_time_enabled; + + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + values[n++] = leader->total_time_running; + + if (leader != counter) + leader->pmu->read(leader); + + values[n++] = atomic64_read(&leader->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(leader); + + perf_output_copy(handle, values, n * sizeof(u64)); + + list_for_each_entry(sub, &leader->sibling_list, list_entry) { + n = 0; + + if (sub != counter) + sub->pmu->read(sub); + + values[n++] = atomic64_read(&sub->count); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_counter_id(sub); + + perf_output_copy(handle, values, n * sizeof(u64)); + } +} + +static void perf_output_read(struct perf_output_handle *handle, + struct perf_counter *counter) +{ + if (counter->attr.read_format & PERF_FORMAT_GROUP) + perf_output_read_group(handle, counter); + else + perf_output_read_one(handle, counter); +} + void perf_counter_output(struct perf_counter *counter, int nmi, struct perf_sample_data *data) { @@ -2642,10 +2812,6 @@ void perf_counter_output(struct perf_counter *counter, int nmi, struct { u32 pid, tid; } tid_entry; - struct { - u64 id; - u64 counter; - } group_entry; struct perf_callchain_entry *callchain = NULL; int callchain_size = 0; u64 time; @@ -2700,10 +2866,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_PERIOD) header.size += sizeof(u64); - if (sample_type & PERF_SAMPLE_GROUP) { - header.size += sizeof(u64) + - counter->nr_siblings * sizeof(group_entry); - } + if (sample_type & PERF_SAMPLE_READ) + header.size += perf_counter_read_size(counter); if (sample_type & PERF_SAMPLE_CALLCHAIN) { callchain = perf_callchain(data->regs); @@ -2760,26 +2924,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi, if (sample_type & PERF_SAMPLE_PERIOD) perf_output_put(&handle, data->period); - /* - * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. - */ - if (sample_type & PERF_SAMPLE_GROUP) { - struct perf_counter *leader, *sub; - u64 nr = counter->nr_siblings; - - perf_output_put(&handle, nr); - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->pmu->read(sub); - - group_entry.id = primary_counter_id(sub); - group_entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, group_entry); - } - } + if (sample_type & PERF_SAMPLE_READ) + perf_output_read(&handle, counter); if (sample_type & PERF_SAMPLE_CALLCHAIN) { if (callchain) @@ -2818,8 +2964,6 @@ struct perf_read_event { u32 pid; u32 tid; - u64 value; - u64 format[3]; }; static void @@ -2831,34 +2975,20 @@ perf_counter_read_event(struct perf_counter *counter, .header = { .type = PERF_EVENT_READ, .misc = 0, - .size = sizeof(event) - sizeof(event.format), + .size = sizeof(event) + perf_counter_read_size(counter), }, .pid = perf_counter_pid(counter, task), .tid = perf_counter_tid(counter, task), - .value = atomic64_read(&counter->count), }; - int ret, i = 0; - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { - event.header.size += sizeof(u64); - event.format[i++] = counter->total_time_enabled; - } - - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { - event.header.size += sizeof(u64); - event.format[i++] = counter->total_time_running; - } - - if (counter->attr.read_format & PERF_FORMAT_ID) { - event.header.size += sizeof(u64); - event.format[i++] = primary_counter_id(counter); - } + int ret; ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); if (ret) return; - perf_output_copy(&handle, &event, event.header.size); + perf_output_put(&handle, event); + perf_output_read(&handle, counter); + perf_output_end(&handle); } @@ -3921,9 +4051,9 @@ perf_counter_alloc(struct perf_counter_attr *attr, atomic64_set(&hwc->period_left, hwc->sample_period); /* - * we currently do not support PERF_SAMPLE_GROUP on inherited counters + * we currently do not support PERF_FORMAT_GROUP on inherited counters */ - if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) + if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP)) goto done; switch (attr->type) { -- cgit v1.2.3 From cefb87efc9aa0288849149484870d5ab989fbd59 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sun, 16 Aug 2009 21:05:45 +1000 Subject: drm/radeon/kms: implement bo busy check + current domain This implements the busy ioctl along with a current domain check. returns 0 or -EBUSY puts the current domain no matter what the answer. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_gem.c | 22 +++++++++++++++++++++- drivers/gpu/drm/radeon/radeon_object.c | 19 +++++++++++++++++++ include/drm/radeon_drm.h | 2 +- 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 87170a56e37b..79ad98264e33 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -242,6 +242,7 @@ int radeon_object_pin(struct radeon_object *robj, uint32_t domain, uint64_t *gpu_addr); void radeon_object_unpin(struct radeon_object *robj); int radeon_object_wait(struct radeon_object *robj); +int radeon_object_busy_domain(struct radeon_object *robj, uint32_t *cur_placement); int radeon_object_evict_vram(struct radeon_device *rdev); int radeon_object_mmap(struct radeon_object *robj, uint64_t *offset); void radeon_object_force_delete(struct radeon_device *rdev); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index cded5180c752..d4ceff13bbb1 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -262,7 +262,27 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - /* FIXME: implement */ + struct drm_radeon_gem_busy *args = data; + struct drm_gem_object *gobj; + struct radeon_object *robj; + int r; + uint32_t cur_placement; + + gobj = drm_gem_object_lookup(dev, filp, args->handle); + if (gobj == NULL) { + return -EINVAL; + } + robj = gobj->driver_private; + r = radeon_object_busy_domain(robj, &cur_placement); + if (cur_placement == TTM_PL_VRAM) + args->domain = RADEON_GEM_DOMAIN_VRAM; + if (cur_placement == TTM_PL_FLAG_TT) + args->domain = RADEON_GEM_DOMAIN_GTT; + if (cur_placement == TTM_PL_FLAG_SYSTEM) + args->domain = RADEON_GEM_DOMAIN_CPU; + mutex_lock(&dev->struct_mutex); + drm_gem_object_unreference(gobj); + mutex_unlock(&dev->struct_mutex); return 0; } diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index e98cae3bf4a6..b85fb83d7ae8 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -316,6 +316,25 @@ int radeon_object_wait(struct radeon_object *robj) return r; } +int radeon_object_busy_domain(struct radeon_object *robj, uint32_t *cur_placement) +{ + int r = 0; + + r = radeon_object_reserve(robj, true); + if (unlikely(r != 0)) { + DRM_ERROR("radeon: failed to reserve object for waiting.\n"); + return r; + } + spin_lock(&robj->tobj.lock); + *cur_placement = robj->tobj.mem.mem_type; + if (robj->tobj.sync_obj) { + r = ttm_bo_wait(&robj->tobj, true, true, true); + } + spin_unlock(&robj->tobj.lock); + radeon_object_unreserve(robj); + return r; +} + int radeon_object_evict_vram(struct radeon_device *rdev) { if (rdev->flags & RADEON_IS_IGP) { diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index af4b4826997e..f81c3232accd 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -838,7 +838,7 @@ struct drm_radeon_gem_wait_idle { struct drm_radeon_gem_busy { uint32_t handle; - uint32_t busy; + uint32_t domain; }; struct drm_radeon_gem_pread { -- cgit v1.2.3 From 9c0d90103c7e0eb6e638e5b649e9f6d8d9c1b4b3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 31 Jul 2009 12:53:58 -0400 Subject: Capabilities: move cap_file_mmap to commoncap.c Currently we duplicate the mmap_min_addr test in cap_file_mmap and in security_file_mmap if !CONFIG_SECURITY. This patch moves cap_file_mmap into commoncap.c and then calls that function directly from security_file_mmap ifndef CONFIG_SECURITY like all of the other capability checks are done. Signed-off-by: Eric Paris Acked-by: Serge Hallyn Signed-off-by: James Morris --- include/linux/security.h | 7 ++++--- security/capability.c | 9 --------- security/commoncap.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index 5eff459b3833..ac4bc3760b46 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -66,6 +66,9 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -2197,9 +2200,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) - return -EACCES; - return 0; + return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/capability.c b/security/capability.c index 21b6cead6a8e..88f752e8152c 100644 --- a/security/capability.c +++ b/security/capability.c @@ -330,15 +330,6 @@ static int cap_file_ioctl(struct file *file, unsigned int command, return 0; } -static int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) -{ - if ((addr < mmap_min_addr) && !capable(CAP_SYS_RAWIO)) - return -EACCES; - return 0; -} - static int cap_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/commoncap.c b/security/commoncap.c index 48b7e0228fa3..6bcf6e81e547 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -984,3 +984,33 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) cap_sys_admin = 1; return __vm_enough_memory(mm, pages, cap_sys_admin); } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + int ret = 0; + + if (addr < mmap_min_addr) { + ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO, + SECURITY_CAP_AUDIT); + /* set PF_SUPERPRIV if it turns out we allow the low mmap */ + if (ret == 0) + current->flags |= PF_SUPERPRIV; + } + return ret; +} -- cgit v1.2.3 From 788084aba2ab7348257597496befcbccabdc98a3 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 31 Jul 2009 12:54:11 -0400 Subject: Security/SELinux: seperate lsm specific mmap_min_addr Currently SELinux enforcement of controls on the ability to map low memory is determined by the mmap_min_addr tunable. This patch causes SELinux to ignore the tunable and instead use a seperate Kconfig option specific to how much space the LSM should protect. The tunable will now only control the need for CAP_SYS_RAWIO and SELinux permissions will always protect the amount of low memory designated by CONFIG_LSM_MMAP_MIN_ADDR. This allows users who need to disable the mmap_min_addr controls (usual reason being they run WINE as a non-root user) to do so and still have SELinux controls preventing confined domains (like a web server) from being able to map some area of low memory. Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/mm.h | 15 --------------- include/linux/security.h | 17 +++++++++++++++++ kernel/sysctl.c | 7 ++++--- mm/Kconfig | 6 +++--- mm/mmap.c | 3 --- mm/nommu.c | 3 --- security/Kconfig | 16 ++++++++++++++++ security/Makefile | 2 +- security/commoncap.c | 2 +- security/min_addr.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ security/selinux/hooks.c | 2 +- 11 files changed, 92 insertions(+), 30 deletions(-) create mode 100644 security/min_addr.c (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ba3a7cb1eaa0..9a72cc78e6b8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -34,8 +34,6 @@ extern int sysctl_legacy_va_layout; #define sysctl_legacy_va_layout 0 #endif -extern unsigned long mmap_min_addr; - #include #include #include @@ -574,19 +572,6 @@ static inline void set_page_links(struct page *page, enum zone_type zone, set_page_section(page, pfn_to_section_nr(pfn)); } -/* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); - return hint; -} - /* * Some inline functions in vmstat.h depend on page_zone() */ diff --git a/include/linux/security.h b/include/linux/security.h index ac4bc3760b46..dc3472c1f781 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -28,6 +28,7 @@ #include #include #include +#include /* PAGE_ALIGN */ #include #include #include @@ -95,6 +96,7 @@ extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb); extern int cap_netlink_recv(struct sk_buff *skb, int cap); extern unsigned long mmap_min_addr; +extern unsigned long dac_mmap_min_addr; /* * Values used in the task_security_ops calls */ @@ -147,6 +149,21 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } +/* + * If a hint addr is less than mmap_min_addr change hint to be as + * low as possible but still greater than mmap_min_addr + */ +static inline unsigned long round_hint_to_min(unsigned long hint) +{ + hint &= PAGE_MASK; + if (((void *)hint != NULL) && + (hint < mmap_min_addr)) + return PAGE_ALIGN(mmap_min_addr); + return hint; +} + +extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); /** * struct security_operations - main security structure * diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 98e02328c67d..58be76017fd0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -1306,10 +1307,10 @@ static struct ctl_table vm_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "mmap_min_addr", - .data = &mmap_min_addr, - .maxlen = sizeof(unsigned long), + .data = &dac_mmap_min_addr, + .maxlen = sizeof(unsigned long), .mode = 0644, - .proc_handler = &proc_doulongvec_minmax, + .proc_handler = &mmap_min_addr_handler, }, #ifdef CONFIG_NUMA { diff --git a/mm/Kconfig b/mm/Kconfig index c948d4ca8bde..fe5f674d7a7d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -225,9 +225,9 @@ config DEFAULT_MMAP_MIN_ADDR For most ia64, ppc64 and x86 users with lots of address space a value of 65536 is reasonable and should cause no problems. On arm and other archs it should not be higher than 32768. - Programs which use vm86 functionality would either need additional - permissions from either the LSM or the capabilities module or have - this protection disabled. + Programs which use vm86 functionality or have some need to map + this low address space will need CAP_SYS_RAWIO or disable this + protection by setting the value to 0. This value can be changed after boot using the /proc/sys/vm/mmap_min_addr tunable. diff --git a/mm/mmap.c b/mm/mmap.c index 34579b23ebd5..8101de490c73 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -88,9 +88,6 @@ int sysctl_overcommit_ratio = 50; /* default is 50% */ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; struct percpu_counter vm_committed_as; -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; - /* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to diff --git a/mm/nommu.c b/mm/nommu.c index 53cab10fece4..28754c40be98 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -69,9 +69,6 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = CONFIG_NOMMU_INITIAL_TRIM_EXCESS; int heap_stack_gap = 0; -/* amount of vm to protect from userspace access */ -unsigned long mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; - atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); diff --git a/security/Kconfig b/security/Kconfig index d23c839038f0..9c60c346a91d 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -113,6 +113,22 @@ config SECURITY_ROOTPLUG If you are unsure how to answer this question, answer N. +config LSM_MMAP_MIN_ADDR + int "Low address space for LSM to from user allocation" + depends on SECURITY && SECURITY_SELINUX + default 65535 + help + This is the portion of low virtual memory which should be protected + from userspace allocation. Keeping a user from writing to low pages + can help reduce the impact of kernel NULL pointer bugs. + + For most ia64, ppc64 and x86 users with lots of address space + a value of 65536 is reasonable and should cause no problems. + On arm and other archs it should not be higher than 32768. + Programs which use vm86 functionality or have some need to map + this low address space will need the permission specific to the + systems running LSM. + source security/selinux/Kconfig source security/smack/Kconfig source security/tomoyo/Kconfig diff --git a/security/Makefile b/security/Makefile index c67557cdaa85..b56e7f9ecbc2 100644 --- a/security/Makefile +++ b/security/Makefile @@ -8,7 +8,7 @@ subdir-$(CONFIG_SECURITY_SMACK) += smack subdir-$(CONFIG_SECURITY_TOMOYO) += tomoyo # always enable default capabilities -obj-y += commoncap.o +obj-y += commoncap.o min_addr.o # Object file lists obj-$(CONFIG_SECURITY) += security.o capability.o diff --git a/security/commoncap.c b/security/commoncap.c index 6bcf6e81e547..e3097c0a1311 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -1005,7 +1005,7 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, { int ret = 0; - if (addr < mmap_min_addr) { + if (addr < dac_mmap_min_addr) { ret = cap_capable(current, current_cred(), CAP_SYS_RAWIO, SECURITY_CAP_AUDIT); /* set PF_SUPERPRIV if it turns out we allow the low mmap */ diff --git a/security/min_addr.c b/security/min_addr.c new file mode 100644 index 000000000000..14cc7b3b8d03 --- /dev/null +++ b/security/min_addr.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include + +/* amount of vm to protect from userspace access by both DAC and the LSM*/ +unsigned long mmap_min_addr; +/* amount of vm to protect from userspace using CAP_SYS_RAWIO (DAC) */ +unsigned long dac_mmap_min_addr = CONFIG_DEFAULT_MMAP_MIN_ADDR; +/* amount of vm to protect from userspace using the LSM = CONFIG_LSM_MMAP_MIN_ADDR */ + +/* + * Update mmap_min_addr = max(dac_mmap_min_addr, CONFIG_LSM_MMAP_MIN_ADDR) + */ +static void update_mmap_min_addr(void) +{ +#ifdef CONFIG_LSM_MMAP_MIN_ADDR + if (dac_mmap_min_addr > CONFIG_LSM_MMAP_MIN_ADDR) + mmap_min_addr = dac_mmap_min_addr; + else + mmap_min_addr = CONFIG_LSM_MMAP_MIN_ADDR; +#else + mmap_min_addr = dac_mmap_min_addr; +#endif +} + +/* + * sysctl handler which just sets dac_mmap_min_addr = the new value and then + * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly + */ +int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int ret; + + ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos); + + update_mmap_min_addr(); + + return ret; +} + +int __init init_mmap_min_addr(void) +{ + update_mmap_min_addr(); + + return 0; +} +pure_initcall(init_mmap_min_addr); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index e6d1432b0800..8d8b69c5664e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3036,7 +3036,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, * at bad behaviour/exploit that we always want to get the AVC, even * if DAC would have also denied the operation. */ - if (addr < mmap_min_addr) { + if (addr < CONFIG_LSM_MMAP_MIN_ADDR) { rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, MEMPROTECT__MMAP_ZERO, NULL); if (rc) -- cgit v1.2.3 From 1d9959734a1949ea4f2427bd2d8b21ede6b2441c Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Fri, 7 Aug 2009 14:53:57 -0400 Subject: security: define round_hint_to_min in !CONFIG_SECURITY Fix the header files to define round_hint_to_min() and to define mmap_min_addr_handler() in the !CONFIG_SECURITY case. Built and tested with !CONFIG_SECURITY Signed-off-by: Eric Paris Signed-off-by: James Morris --- include/linux/security.h | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index dc3472c1f781..1f16eea2017b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -121,6 +121,21 @@ struct request_sock; #define LSM_UNSAFE_PTRACE 2 #define LSM_UNSAFE_PTRACE_CAP 4 +/* + * If a hint addr is less than mmap_min_addr change hint to be as + * low as possible but still greater than mmap_min_addr + */ +static inline unsigned long round_hint_to_min(unsigned long hint) +{ + hint &= PAGE_MASK; + if (((void *)hint != NULL) && + (hint < mmap_min_addr)) + return PAGE_ALIGN(mmap_min_addr); + return hint; +} +extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, + void __user *buffer, size_t *lenp, loff_t *ppos); + #ifdef CONFIG_SECURITY struct security_mnt_opts { @@ -149,21 +164,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) opts->num_mnt_opts = 0; } -/* - * If a hint addr is less than mmap_min_addr change hint to be as - * low as possible but still greater than mmap_min_addr - */ -static inline unsigned long round_hint_to_min(unsigned long hint) -{ - hint &= PAGE_MASK; - if (((void *)hint != NULL) && - (hint < mmap_min_addr)) - return PAGE_ALIGN(mmap_min_addr); - return hint; -} - -extern int mmap_min_addr_handler(struct ctl_table *table, int write, struct file *filp, - void __user *buffer, size_t *lenp, loff_t *ppos); /** * struct security_operations - main security structure * -- cgit v1.2.3 From c1a8f1f1c8e01eab5862c8db39b49ace814e6c66 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 16 Aug 2009 09:36:49 +0000 Subject: net: restore gnet_stats_basic to previous definition In 5e140dfc1fe87eae27846f193086724806b33c7d "net: reorder struct Qdisc for better SMP performance" the definition of struct gnet_stats_basic changed incompatibly, as copies of this struct are shipped to userland via netlink. Restoring old behavior is not welcome, for performance reason. Fix is to use a private structure for kernel, and teach gnet_stats_copy_basic() to convert from kernel to user land, using legacy structure (struct gnet_stats_basic) Based on a report and initial patch from Michael Spang. Reported-by: Michael Spang Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/gen_stats.h | 5 +++++ include/net/act_api.h | 2 +- include/net/gen_stats.h | 10 +++++----- include/net/netfilter/xt_rateest.h | 2 +- include/net/sch_generic.h | 2 +- net/core/gen_estimator.c | 12 ++++++------ net/core/gen_stats.c | 11 ++++++++--- net/netfilter/xt_RATEEST.c | 2 +- net/sched/sch_atm.c | 2 +- net/sched/sch_cbq.c | 2 +- net/sched/sch_drr.c | 2 +- net/sched/sch_hfsc.c | 2 +- net/sched/sch_htb.c | 2 +- 13 files changed, 33 insertions(+), 23 deletions(-) (limited to 'include') diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h index 0ffa41df0ee8..710e901085d0 100644 --- a/include/linux/gen_stats.h +++ b/include/linux/gen_stats.h @@ -19,6 +19,11 @@ enum { * @packets: number of seen packets */ struct gnet_stats_basic +{ + __u64 bytes; + __u32 packets; +}; +struct gnet_stats_basic_packed { __u64 bytes; __u32 packets; diff --git a/include/net/act_api.h b/include/net/act_api.h index 565eed8fe496..c05fd717c588 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -16,7 +16,7 @@ struct tcf_common { u32 tcfc_capab; int tcfc_action; struct tcf_t tcfc_tm; - struct gnet_stats_basic tcfc_bstats; + struct gnet_stats_basic_packed tcfc_bstats; struct gnet_stats_queue tcfc_qstats; struct gnet_stats_rate_est tcfc_rate_est; spinlock_t tcfc_lock; diff --git a/include/net/gen_stats.h b/include/net/gen_stats.h index d136b5240ef2..c1488553e349 100644 --- a/include/net/gen_stats.h +++ b/include/net/gen_stats.h @@ -28,7 +28,7 @@ extern int gnet_stats_start_copy_compat(struct sk_buff *skb, int type, spinlock_t *lock, struct gnet_dump *d); extern int gnet_stats_copy_basic(struct gnet_dump *d, - struct gnet_stats_basic *b); + struct gnet_stats_basic_packed *b); extern int gnet_stats_copy_rate_est(struct gnet_dump *d, struct gnet_stats_rate_est *r); extern int gnet_stats_copy_queue(struct gnet_dump *d, @@ -37,14 +37,14 @@ extern int gnet_stats_copy_app(struct gnet_dump *d, void *st, int len); extern int gnet_stats_finish_copy(struct gnet_dump *d); -extern int gen_new_estimator(struct gnet_stats_basic *bstats, +extern int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern void gen_kill_estimator(struct gnet_stats_basic *bstats, +extern void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est); -extern int gen_replace_estimator(struct gnet_stats_basic *bstats, +extern int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt); -extern bool gen_estimator_active(const struct gnet_stats_basic *bstats, +extern bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est); #endif diff --git a/include/net/netfilter/xt_rateest.h b/include/net/netfilter/xt_rateest.h index 65d594dffbff..ddbf37e19616 100644 --- a/include/net/netfilter/xt_rateest.h +++ b/include/net/netfilter/xt_rateest.h @@ -8,7 +8,7 @@ struct xt_rateest { spinlock_t lock; struct gnet_estimator params; struct gnet_stats_rate_est rstats; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; }; extern struct xt_rateest *xt_rateest_lookup(const char *name); diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 964ffa0d8815..5482e9582f55 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -72,7 +72,7 @@ struct Qdisc */ unsigned long state; struct sk_buff_head q; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; }; diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 78e5bfc454ae..493775f4f2f1 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -81,7 +81,7 @@ struct gen_estimator { struct list_head list; - struct gnet_stats_basic *bstats; + struct gnet_stats_basic_packed *bstats; struct gnet_stats_rate_est *rate_est; spinlock_t *stats_lock; int ewma_log; @@ -165,7 +165,7 @@ static void gen_add_node(struct gen_estimator *est) } static -struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, +struct gen_estimator *gen_find_node(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { struct rb_node *p = est_root.rb_node; @@ -202,7 +202,7 @@ struct gen_estimator *gen_find_node(const struct gnet_stats_basic *bstats, * * NOTE: Called under rtnl_mutex */ -int gen_new_estimator(struct gnet_stats_basic *bstats, +int gen_new_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) @@ -262,7 +262,7 @@ static void __gen_kill_estimator(struct rcu_head *head) * * NOTE: Called under rtnl_mutex */ -void gen_kill_estimator(struct gnet_stats_basic *bstats, +void gen_kill_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est) { struct gen_estimator *e; @@ -292,7 +292,7 @@ EXPORT_SYMBOL(gen_kill_estimator); * * Returns 0 on success or a negative error code. */ -int gen_replace_estimator(struct gnet_stats_basic *bstats, +int gen_replace_estimator(struct gnet_stats_basic_packed *bstats, struct gnet_stats_rate_est *rate_est, spinlock_t *stats_lock, struct nlattr *opt) { @@ -308,7 +308,7 @@ EXPORT_SYMBOL(gen_replace_estimator); * * Returns true if estimator is active, and false if not. */ -bool gen_estimator_active(const struct gnet_stats_basic *bstats, +bool gen_estimator_active(const struct gnet_stats_basic_packed *bstats, const struct gnet_stats_rate_est *rate_est) { ASSERT_RTNL(); diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index c3d0ffeac243..8569310268ab 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -106,16 +106,21 @@ gnet_stats_start_copy(struct sk_buff *skb, int type, spinlock_t *lock, * if the room in the socket buffer was not sufficient. */ int -gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic *b) +gnet_stats_copy_basic(struct gnet_dump *d, struct gnet_stats_basic_packed *b) { if (d->compat_tc_stats) { d->tc_stats.bytes = b->bytes; d->tc_stats.packets = b->packets; } - if (d->tail) - return gnet_stats_copy(d, TCA_STATS_BASIC, b, sizeof(*b)); + if (d->tail) { + struct gnet_stats_basic sb; + memset(&sb, 0, sizeof(sb)); + sb.bytes = b->bytes; + sb.packets = b->packets; + return gnet_stats_copy(d, TCA_STATS_BASIC, &sb, sizeof(sb)); + } return 0; } diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 43f5676b1af4..d80b8192e0d4 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -74,7 +74,7 @@ static unsigned int xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) { const struct xt_rateest_target_info *info = par->targinfo; - struct gnet_stats_basic *stats = &info->est->bstats; + struct gnet_stats_basic_packed *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); stats->bytes += skb->len; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 2a8b83af7c47..ab82f145f689 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -49,7 +49,7 @@ struct atm_flow_data { struct socket *sock; /* for closing */ u32 classid; /* x:y type ID */ int ref; /* reference count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 23a167670fd5..d5798e17a832 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -128,7 +128,7 @@ struct cbq_class long avgidle; long deficit; /* Saved deficit for WRR */ psched_time_t penalized; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_cbq_xstats xstats; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 7597fe146866..12b2fb04b29b 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -22,7 +22,7 @@ struct drr_class { unsigned int refcnt; unsigned int filter_cnt; - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct list_head alist; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 362c2811b2df..dad0144423da 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -116,7 +116,7 @@ struct hfsc_class struct Qdisc_class_common cl_common; unsigned int refcnt; /* usage count */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; unsigned int level; /* class level in hierarchy */ diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 88cd02626621..ec4d46399d59 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -74,7 +74,7 @@ enum htb_cmode { struct htb_class { struct Qdisc_class_common common; /* general class parameters */ - struct gnet_stats_basic bstats; + struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; struct tc_htb_xstats xstats; /* our special stats */ -- cgit v1.2.3 From 0753ba01e126020bf0f8150934903b48935b697d Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 18 Aug 2009 14:11:10 -0700 Subject: mm: revert "oom: move oom_adj value" The commit 2ff05b2b (oom: move oom_adj value) moveed the oom_adj value to the mm_struct. It was a very good first step for sanitize OOM. However Paul Menage reported the commit makes regression to his job scheduler. Current OOM logic can kill OOM_DISABLED process. Why? His program has the code of similar to the following. ... set_oom_adj(OOM_DISABLE); /* The job scheduler never killed by oom */ ... if (vfork() == 0) { set_oom_adj(0); /* Invoked child can be killed */ execve("foo-bar-cmd"); } .... vfork() parent and child are shared the same mm_struct. then above set_oom_adj(0) doesn't only change oom_adj for vfork() child, it's also change oom_adj for vfork() parent. Then, vfork() parent (job scheduler) lost OOM immune and it was killed. Actually, fork-setting-exec idiom is very frequently used in userland program. We must not break this assumption. Then, this patch revert commit 2ff05b2b and related commit. Reverted commit list --------------------- - commit 2ff05b2b4e (oom: move oom_adj value from task_struct to mm_struct) - commit 4d8b9135c3 (oom: avoid unnecessary mm locking and scanning for OOM_DISABLE) - commit 8123681022 (oom: only oom kill exiting tasks with attached memory) - commit 933b787b57 (mm: copy over oom_adj value at fork time) Signed-off-by: KOSAKI Motohiro Cc: Paul Menage Cc: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: Rik van Riel Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Nick Piggin Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 15 +++------ fs/proc/base.c | 19 ++--------- include/linux/mm_types.h | 2 -- include/linux/sched.h | 1 + kernel/fork.c | 1 - mm/oom_kill.c | 64 +++++++++++++++++++++++--------------- 6 files changed, 48 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index fad18f9456e4..ffead13f9443 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -1167,13 +1167,11 @@ CHAPTER 3: PER-PROCESS PARAMETERS 3.1 /proc//oom_adj - Adjust the oom-killer score ------------------------------------------------------ -This file can be used to adjust the score used to select which processes should -be killed in an out-of-memory situation. The oom_adj value is a characteristic -of the task's mm, so all threads that share an mm with pid will have the same -oom_adj value. A high value will increase the likelihood of this process being -killed by the oom-killer. Valid values are in the range -16 to +15 as -explained below and a special value of -17, which disables oom-killing -altogether for threads sharing pid's mm. +This file can be used to adjust the score used to select which processes +should be killed in an out-of-memory situation. Giving it a high score will +increase the likelihood of this process being killed by the oom-killer. Valid +values are in the range -16 to +15, plus the special value -17, which disables +oom-killing altogether for this process. The process to be killed in an out-of-memory situation is selected among all others based on its badness score. This value equals the original memory size of the process @@ -1187,9 +1185,6 @@ the parent's score if they do not share the same memory. Thus forking servers are the prime candidates to be killed. Having only one 'hungry' child will make parent less preferable than the child. -/proc//oom_adj cannot be changed for kthreads since they are immune from -oom-killing already. - /proc//oom_score shows process' current badness score. The following heuristics are then applied: diff --git a/fs/proc/base.c b/fs/proc/base.c index 175db258942f..6f742f6658a9 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1003,12 +1003,7 @@ static ssize_t oom_adjust_read(struct file *file, char __user *buf, if (!task) return -ESRCH; - task_lock(task); - if (task->mm) - oom_adjust = task->mm->oom_adj; - else - oom_adjust = OOM_DISABLE; - task_unlock(task); + oom_adjust = task->oomkilladj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", oom_adjust); @@ -1037,19 +1032,11 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, task = get_proc_task(file->f_path.dentry->d_inode); if (!task) return -ESRCH; - task_lock(task); - if (!task->mm) { - task_unlock(task); - put_task_struct(task); - return -EINVAL; - } - if (oom_adjust < task->mm->oom_adj && !capable(CAP_SYS_RESOURCE)) { - task_unlock(task); + if (oom_adjust < task->oomkilladj && !capable(CAP_SYS_RESOURCE)) { put_task_struct(task); return -EACCES; } - task->mm->oom_adj = oom_adjust; - task_unlock(task); + task->oomkilladj = oom_adjust; put_task_struct(task); if (end - buffer == 0) return -EIO; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 7acc8439d9b3..0042090a4d70 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -240,8 +240,6 @@ struct mm_struct { unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ - s8 oom_adj; /* OOM kill score adjustment (bit shift) */ - cpumask_t cpu_vm_mask; /* Architecture-specific MM context */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 3ab08e4bb6b8..0f1ea4a66957 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1198,6 +1198,7 @@ struct task_struct { * a short time */ unsigned char fpu_counter; + s8 oomkilladj; /* OOM kill score adjustment (bit shift). */ #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif diff --git a/kernel/fork.c b/kernel/fork.c index 021e1138556e..144326b7af50 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -426,7 +426,6 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); mm->flags = (current->mm) ? current->mm->flags : default_dump_filter; - mm->oom_adj = (current->mm) ? current->mm->oom_adj : 0; mm->core_state = NULL; mm->nr_ptes = 0; set_mm_counter(mm, file_rss, 0); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 175a67a78a99..a7b2460e922b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -58,7 +58,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) unsigned long points, cpu_time, run_time; struct mm_struct *mm; struct task_struct *child; - int oom_adj; task_lock(p); mm = p->mm; @@ -66,11 +65,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) task_unlock(p); return 0; } - oom_adj = mm->oom_adj; - if (oom_adj == OOM_DISABLE) { - task_unlock(p); - return 0; - } /* * The memory size of the process is the basis for the badness. @@ -154,15 +148,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) points /= 8; /* - * Adjust the score by oom_adj. + * Adjust the score by oomkilladj. */ - if (oom_adj) { - if (oom_adj > 0) { + if (p->oomkilladj) { + if (p->oomkilladj > 0) { if (!points) points = 1; - points <<= oom_adj; + points <<= p->oomkilladj; } else - points >>= -(oom_adj); + points >>= -(p->oomkilladj); } #ifdef DEBUG @@ -257,8 +251,11 @@ static struct task_struct *select_bad_process(unsigned long *ppoints, *ppoints = ULONG_MAX; } + if (p->oomkilladj == OOM_DISABLE) + continue; + points = badness(p, uptime.tv_sec); - if (points > *ppoints) { + if (points > *ppoints || !chosen) { chosen = p; *ppoints = points; } @@ -307,7 +304,8 @@ static void dump_tasks(const struct mem_cgroup *mem) } printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, - get_mm_rss(mm), (int)task_cpu(p), mm->oom_adj, p->comm); + get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj, + p->comm); task_unlock(p); } while_each_thread(g, p); } @@ -325,8 +323,11 @@ static void __oom_kill_task(struct task_struct *p, int verbose) return; } - if (!p->mm) + if (!p->mm) { + WARN_ON(1); + printk(KERN_WARNING "tried to kill an mm-less task!\n"); return; + } if (verbose) printk(KERN_ERR "Killed process %d (%s)\n", @@ -348,13 +349,28 @@ static int oom_kill_task(struct task_struct *p) struct mm_struct *mm; struct task_struct *g, *q; - task_lock(p); mm = p->mm; - if (!mm || mm->oom_adj == OOM_DISABLE) { - task_unlock(p); + + /* WARNING: mm may not be dereferenced since we did not obtain its + * value from get_task_mm(p). This is OK since all we need to do is + * compare mm to q->mm below. + * + * Furthermore, even if mm contains a non-NULL value, p->mm may + * change to NULL at any time since we do not hold task_lock(p). + * However, this is of no concern to us. + */ + + if (mm == NULL) return 1; - } - task_unlock(p); + + /* + * Don't kill the process if any threads are set to OOM_DISABLE + */ + do_each_thread(g, q) { + if (q->mm == mm && q->oomkilladj == OOM_DISABLE) + return 1; + } while_each_thread(g, q); + __oom_kill_task(p, 1); /* @@ -377,11 +393,10 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, struct task_struct *c; if (printk_ratelimit()) { - task_lock(current); printk(KERN_WARNING "%s invoked oom-killer: " - "gfp_mask=0x%x, order=%d, oom_adj=%d\n", - current->comm, gfp_mask, order, - current->mm ? current->mm->oom_adj : OOM_DISABLE); + "gfp_mask=0x%x, order=%d, oomkilladj=%d\n", + current->comm, gfp_mask, order, current->oomkilladj); + task_lock(current); cpuset_print_task_mems_allowed(current); task_unlock(current); dump_stack(); @@ -394,9 +409,8 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, /* * If the task is already exiting, don't alarm the sysadmin or kill * its children or threads, just set TIF_MEMDIE so it can die quickly - * if its mm is still attached. */ - if (p->mm && (p->flags & PF_EXITING)) { + if (p->flags & PF_EXITING) { __oom_kill_task(p, 0); return 0; } -- cgit v1.2.3 From e3b2415e281a97ade36d88404094a90cfea838c0 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 21 Aug 2009 09:47:45 +1000 Subject: drm/radeon/kms: implement the bo busy ioctl properly. The previous patch assumes the ioctl already existed, when it actually didn't. It also didn't return the correct error code. Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_gem.c | 2 +- drivers/gpu/drm/radeon/radeon_kms.c | 1 + include/drm/radeon_drm.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index d4ceff13bbb1..14c199802920 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -283,7 +283,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, mutex_lock(&dev->struct_mutex); drm_gem_object_unreference(gobj); mutex_unlock(&dev->struct_mutex); - return 0; + return r; } int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index d2764bf6b2a2..11ed672543b1 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -318,5 +318,6 @@ struct drm_ioctl_desc radeon_ioctls_kms[] = { DRM_IOCTL_DEF(DRM_RADEON_INFO, radeon_info_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_GEM_SET_TILING, radeon_gem_set_tiling_ioctl, DRM_AUTH), DRM_IOCTL_DEF(DRM_RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH), + DRM_IOCTL_DEF(DRM_RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH), }; int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms); diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index f81c3232accd..b43925586b29 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -508,6 +508,7 @@ typedef struct { #define DRM_RADEON_INFO 0x27 #define DRM_RADEON_GEM_SET_TILING 0x28 #define DRM_RADEON_GEM_GET_TILING 0x29 +#define DRM_RADEON_GEM_BUSY 0x2a #define DRM_IOCTL_RADEON_CP_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, drm_radeon_init_t) #define DRM_IOCTL_RADEON_CP_START DRM_IO( DRM_COMMAND_BASE + DRM_RADEON_CP_START) @@ -548,6 +549,7 @@ typedef struct { #define DRM_IOCTL_RADEON_INFO DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_INFO, struct drm_radeon_info) #define DRM_IOCTL_RADEON_SET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_SET_TILING, struct drm_radeon_gem_set_tiling) #define DRM_IOCTL_RADEON_GET_TILING DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_GET_TILING, struct drm_radeon_gem_get_tiling) +#define DRM_IOCTL_RADEON_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_BUSY, struct drm_radeon_gem_busy) typedef struct drm_radeon_init { enum { -- cgit v1.2.3 From 1700f5fde88f9a251037bc86bde538ee32c59905 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 20 Aug 2009 22:05:53 -0700 Subject: Input: ucb1400_ts - enable ADC Filter This patch enables ADC filtering on UCB1400 codec by default. The benefit from this change is mostly on some Colibri boards where the ADCSYNC pin of the UCB1400 codec isn't connected causing the touchscreen to jitter very badly. This change has no visible effect on boards where the ADCSYNC pin is connected. Signed-off-by: Marek Vasut Tested-by: Palo Revak Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/ucb1400_ts.c | 9 +++++++++ include/linux/ucb1400.h | 4 ++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/drivers/input/touchscreen/ucb1400_ts.c b/drivers/input/touchscreen/ucb1400_ts.c index 6954f5500108..3b345f9cf0c7 100644 --- a/drivers/input/touchscreen/ucb1400_ts.c +++ b/drivers/input/touchscreen/ucb1400_ts.c @@ -345,6 +345,7 @@ static int ucb1400_ts_detect_irq(struct ucb1400_ts *ucb) static int ucb1400_ts_probe(struct platform_device *dev) { int error, x_res, y_res; + u16 fcsr; struct ucb1400_ts *ucb = dev->dev.platform_data; ucb->ts_idev = input_allocate_device(); @@ -382,6 +383,14 @@ static int ucb1400_ts_probe(struct platform_device *dev) ucb->ts_idev->evbit[0] = BIT_MASK(EV_ABS) | BIT_MASK(EV_KEY); ucb->ts_idev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); + /* + * Enable ADC filter to prevent horrible jitter on Colibri. + * This also further reduces jitter on boards where ADCSYNC + * pin is connected. + */ + fcsr = ucb1400_reg_read(ucb->ac97, UCB_FCSR); + ucb1400_reg_write(ucb->ac97, UCB_FCSR, fcsr | UCB_FCSR_AVE); + ucb1400_adc_enable(ucb->ac97); x_res = ucb1400_ts_read_xres(ucb); y_res = ucb1400_ts_read_yres(ucb); diff --git a/include/linux/ucb1400.h b/include/linux/ucb1400.h index ed889f4168f3..ae779bb8cc0f 100644 --- a/include/linux/ucb1400.h +++ b/include/linux/ucb1400.h @@ -73,6 +73,10 @@ #define UCB_ADC_DATA 0x68 #define UCB_ADC_DAT_VALID (1 << 15) + +#define UCB_FCSR 0x6c +#define UCB_FCSR_AVE (1 << 12) + #define UCB_ADC_DAT_MASK 0x3ff #define UCB_ID 0x7e -- cgit v1.2.3 From f779b3e513478218cbaaaa0a506d7801cab6fd14 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 19 Aug 2009 19:11:39 -0400 Subject: drm/radeon: add GET_PARAM/INFO support for Z pipes Needed for occlusion queries on rv530 chips. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/r300.c | 4 +++- drivers/gpu/drm/radeon/r420.c | 13 ++++++++++++- drivers/gpu/drm/radeon/r520.c | 1 - drivers/gpu/drm/radeon/radeon.h | 1 + drivers/gpu/drm/radeon/radeon_cp.c | 9 +++++++++ drivers/gpu/drm/radeon/radeon_drv.h | 5 ++++- drivers/gpu/drm/radeon/radeon_kms.c | 3 +++ drivers/gpu/drm/radeon/radeon_reg.h | 2 ++ drivers/gpu/drm/radeon/radeon_state.c | 3 +++ include/drm/radeon_drm.h | 2 ++ 10 files changed, 39 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index c47579dcafa1..053f4ec397f7 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -448,6 +448,7 @@ void r300_gpu_init(struct radeon_device *rdev) /* rv350,rv370,rv380 */ rdev->num_gb_pipes = 1; } + rdev->num_z_pipes = 1; gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16); switch (rdev->num_gb_pipes) { case 2: @@ -486,7 +487,8 @@ void r300_gpu_init(struct radeon_device *rdev) printk(KERN_WARNING "Failed to wait MC idle while " "programming pipes. Bad things might happen.\n"); } - DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes); + DRM_INFO("radeon: %d quad pipes, %d Z pipes initialized.\n", + rdev->num_gb_pipes, rdev->num_z_pipes); } int r300_ga_reset(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r420.c b/drivers/gpu/drm/radeon/r420.c index dea497a979f2..97426a6f370f 100644 --- a/drivers/gpu/drm/radeon/r420.c +++ b/drivers/gpu/drm/radeon/r420.c @@ -165,7 +165,18 @@ void r420_pipes_init(struct radeon_device *rdev) printk(KERN_WARNING "Failed to wait GUI idle while " "programming pipes. Bad things might happen.\n"); } - DRM_INFO("radeon: %d pipes initialized.\n", rdev->num_gb_pipes); + + if (rdev->family == CHIP_RV530) { + tmp = RREG32(RV530_GB_PIPE_SELECT2); + if ((tmp & 3) == 3) + rdev->num_z_pipes = 2; + else + rdev->num_z_pipes = 1; + } else + rdev->num_z_pipes = 1; + + DRM_INFO("radeon: %d quad pipes, %d z pipes initialized.\n", + rdev->num_gb_pipes, rdev->num_z_pipes); } void r420_gpu_init(struct radeon_device *rdev) diff --git a/drivers/gpu/drm/radeon/r520.c b/drivers/gpu/drm/radeon/r520.c index 09fb0b6ec7dd..ebd6b0f7bdff 100644 --- a/drivers/gpu/drm/radeon/r520.c +++ b/drivers/gpu/drm/radeon/r520.c @@ -177,7 +177,6 @@ void r520_gpu_init(struct radeon_device *rdev) */ /* workaround for RV530 */ if (rdev->family == CHIP_RV530) { - WREG32(0x4124, 1); WREG32(0x4128, 0xFF); } r420_pipes_init(rdev); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 79ad98264e33..b519fb2fecbb 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -655,6 +655,7 @@ struct radeon_device { int usec_timeout; enum radeon_pll_errata pll_errata; int num_gb_pipes; + int num_z_pipes; int disp_priority; /* BIOS */ uint8_t *bios; diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c index d8356827ef17..7a52c461145c 100644 --- a/drivers/gpu/drm/radeon/radeon_cp.c +++ b/drivers/gpu/drm/radeon/radeon_cp.c @@ -406,6 +406,15 @@ static void radeon_init_pipes(drm_radeon_private_t *dev_priv) { uint32_t gb_tile_config, gb_pipe_sel = 0; + if ((dev_priv->flags & RADEON_FAMILY_MASK) == CHIP_RV530) { + uint32_t z_pipe_sel = RADEON_READ(RV530_GB_PIPE_SELECT2); + if ((z_pipe_sel & 3) == 3) + dev_priv->num_z_pipes = 2; + else + dev_priv->num_z_pipes = 1; + } else + dev_priv->num_z_pipes = 1; + /* RS4xx/RS6xx/R4xx/R5xx */ if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R420) { gb_pipe_sel = RADEON_READ(R400_GB_PIPE_SELECT); diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h index 3933f8216a34..6fa32dac4e97 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.h +++ b/drivers/gpu/drm/radeon/radeon_drv.h @@ -100,9 +100,10 @@ * 1.28- Add support for VBL on CRTC2 * 1.29- R500 3D cmd buffer support * 1.30- Add support for occlusion queries + * 1.31- Add support for num Z pipes from GET_PARAM */ #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 30 +#define DRIVER_MINOR 31 #define DRIVER_PATCHLEVEL 0 /* @@ -329,6 +330,7 @@ typedef struct drm_radeon_private { resource_size_t fb_aper_offset; int num_gb_pipes; + int num_z_pipes; int track_flush; drm_local_map_t *mmio; @@ -689,6 +691,7 @@ extern void r600_page_table_cleanup(struct drm_device *dev, struct drm_ati_pciga /* pipe config regs */ #define R400_GB_PIPE_SELECT 0x402c +#define RV530_GB_PIPE_SELECT2 0x4124 #define R500_DYN_SCLK_PWMEM_PIPE 0x000d /* PLL */ #define R300_GB_TILE_CONFIG 0x4018 # define R300_ENABLE_TILING (1 << 0) diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 11ed672543b1..dce09ada32bc 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -95,6 +95,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) case RADEON_INFO_NUM_GB_PIPES: value = rdev->num_gb_pipes; break; + case RADEON_INFO_NUM_Z_PIPES: + value = rdev->num_z_pipes; + break; default: DRM_DEBUG("Invalid request %d\n", info->request); return -EINVAL; diff --git a/drivers/gpu/drm/radeon/radeon_reg.h b/drivers/gpu/drm/radeon/radeon_reg.h index 5834497b366d..4df43f62c678 100644 --- a/drivers/gpu/drm/radeon/radeon_reg.h +++ b/drivers/gpu/drm/radeon/radeon_reg.h @@ -3574,4 +3574,6 @@ #define RADEON_SCRATCH_REG4 0x15f0 #define RADEON_SCRATCH_REG5 0x15f4 +#define RV530_GB_PIPE_SELECT2 0x4124 + #endif diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c index 46645f3e0328..2882f40d5ec5 100644 --- a/drivers/gpu/drm/radeon/radeon_state.c +++ b/drivers/gpu/drm/radeon/radeon_state.c @@ -3081,6 +3081,9 @@ static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_fil case RADEON_PARAM_NUM_GB_PIPES: value = dev_priv->num_gb_pipes; break; + case RADEON_PARAM_NUM_Z_PIPES: + value = dev_priv->num_z_pipes; + break; default: DRM_DEBUG("Invalid parameter %d\n", param->param); return -EINVAL; diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index b43925586b29..2ba61e18fc8b 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -709,6 +709,7 @@ typedef struct drm_radeon_indirect { #define RADEON_PARAM_FB_LOCATION 14 /* FB location */ #define RADEON_PARAM_NUM_GB_PIPES 15 /* num GB pipes */ #define RADEON_PARAM_DEVICE_ID 16 +#define RADEON_PARAM_NUM_Z_PIPES 17 /* num Z pipes */ typedef struct drm_radeon_getparam { int param; @@ -897,6 +898,7 @@ struct drm_radeon_cs { #define RADEON_INFO_DEVICE_ID 0x00 #define RADEON_INFO_NUM_GB_PIPES 0x01 +#define RADEON_INFO_NUM_Z_PIPES 0x02 struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From f4b0373b26567cafd421d91101852ed7a34e9e94 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 21 Aug 2009 09:26:15 -0700 Subject: Make bitmask 'and' operators return a result code When 'and'ing two bitmasks (where 'andnot' is a variation on it), some cases want to know whether the result is the empty set or not. In particular, the TLB IPI sending code wants to do cpumask operations and determine if there are any CPU's left in the final set. So this just makes the bitmask (and cpumask) functions return a boolean for whether the result has any bits set. Cc: stable@kernel.org (2.6.30, needed by TLB shootdown fix) Signed-off-by: Linus Torvalds --- include/linux/bitmap.h | 18 ++++++++---------- include/linux/cpumask.h | 20 ++++++++++---------- lib/bitmap.c | 12 ++++++++---- 3 files changed, 26 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 2878811c6134..756d78b8c1c5 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -94,13 +94,13 @@ extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, int shift, int bits); extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, int shift, int bits); -extern void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); -extern void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); extern int __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, int bits); @@ -171,13 +171,12 @@ static inline void bitmap_copy(unsigned long *dst, const unsigned long *src, } } -static inline void bitmap_and(unsigned long *dst, const unsigned long *src1, +static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { if (small_const_nbits(nbits)) - *dst = *src1 & *src2; - else - __bitmap_and(dst, src1, src2, nbits); + return (*dst = *src1 & *src2) != 0; + return __bitmap_and(dst, src1, src2, nbits); } static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, @@ -198,13 +197,12 @@ static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1, __bitmap_xor(dst, src1, src2, nbits); } -static inline void bitmap_andnot(unsigned long *dst, const unsigned long *src1, +static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1, const unsigned long *src2, int nbits) { if (small_const_nbits(nbits)) - *dst = *src1 & ~(*src2); - else - __bitmap_andnot(dst, src1, src2, nbits); + return (*dst = *src1 & ~(*src2)) != 0; + return __bitmap_andnot(dst, src1, src2, nbits); } static inline void bitmap_complement(unsigned long *dst, const unsigned long *src, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c5ac87ca7bc6..796df12091b7 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -43,10 +43,10 @@ * int cpu_isset(cpu, mask) true iff bit 'cpu' set in mask * int cpu_test_and_set(cpu, mask) test and set bit 'cpu' in mask * - * void cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] + * int cpus_and(dst, src1, src2) dst = src1 & src2 [intersection] * void cpus_or(dst, src1, src2) dst = src1 | src2 [union] * void cpus_xor(dst, src1, src2) dst = src1 ^ src2 - * void cpus_andnot(dst, src1, src2) dst = src1 & ~src2 + * int cpus_andnot(dst, src1, src2) dst = src1 & ~src2 * void cpus_complement(dst, src) dst = ~src * * int cpus_equal(mask1, mask2) Does mask1 == mask2? @@ -179,10 +179,10 @@ static inline int __cpu_test_and_set(int cpu, cpumask_t *addr) } #define cpus_and(dst, src1, src2) __cpus_and(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, +static inline int __cpus_and(cpumask_t *dstp, const cpumask_t *src1p, const cpumask_t *src2p, int nbits) { - bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); + return bitmap_and(dstp->bits, src1p->bits, src2p->bits, nbits); } #define cpus_or(dst, src1, src2) __cpus_or(&(dst), &(src1), &(src2), NR_CPUS) @@ -201,10 +201,10 @@ static inline void __cpus_xor(cpumask_t *dstp, const cpumask_t *src1p, #define cpus_andnot(dst, src1, src2) \ __cpus_andnot(&(dst), &(src1), &(src2), NR_CPUS) -static inline void __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, +static inline int __cpus_andnot(cpumask_t *dstp, const cpumask_t *src1p, const cpumask_t *src2p, int nbits) { - bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); + return bitmap_andnot(dstp->bits, src1p->bits, src2p->bits, nbits); } #define cpus_complement(dst, src) __cpus_complement(&(dst), &(src), NR_CPUS) @@ -738,11 +738,11 @@ static inline void cpumask_clear(struct cpumask *dstp) * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_and(struct cpumask *dstp, +static inline int cpumask_and(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { - bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), + return bitmap_and(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } @@ -779,11 +779,11 @@ static inline void cpumask_xor(struct cpumask *dstp, * @src1p: the first input * @src2p: the second input */ -static inline void cpumask_andnot(struct cpumask *dstp, +static inline int cpumask_andnot(struct cpumask *dstp, const struct cpumask *src1p, const struct cpumask *src2p) { - bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), + return bitmap_andnot(cpumask_bits(dstp), cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits); } diff --git a/lib/bitmap.c b/lib/bitmap.c index 35a1f7ff4149..702565821c99 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -179,14 +179,16 @@ void __bitmap_shift_left(unsigned long *dst, } EXPORT_SYMBOL(__bitmap_shift_left); -void __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits) { int k; int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; for (k = 0; k < nr; k++) - dst[k] = bitmap1[k] & bitmap2[k]; + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + return result != 0; } EXPORT_SYMBOL(__bitmap_and); @@ -212,14 +214,16 @@ void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, } EXPORT_SYMBOL(__bitmap_xor); -void __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, +int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, const unsigned long *bitmap2, int bits) { int k; int nr = BITS_TO_LONGS(bits); + unsigned long result = 0; for (k = 0; k < nr; k++) - dst[k] = bitmap1[k] & ~bitmap2[k]; + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + return result != 0; } EXPORT_SYMBOL(__bitmap_andnot); -- cgit v1.2.3 From ee5f9757ea17759e1ce5503bdae2b07e48e32af9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 21 Aug 2009 16:33:34 -0700 Subject: pkt_sched: Convert qdisc_watchdog to tasklet_hrtimer None of this stuff should execute in hw IRQ context, therefore use a tasklet_hrtimer so that it runs in softirq context. Signed-off-by: David S. Miller Acked-by: Thomas Gleixner --- include/net/pkt_sched.h | 4 ++-- net/sched/sch_api.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 82a3191375f5..7eafb8d54470 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -61,8 +61,8 @@ psched_tdiff_bounded(psched_time_t tv1, psched_time_t tv2, psched_time_t bound) } struct qdisc_watchdog { - struct hrtimer timer; - struct Qdisc *qdisc; + struct tasklet_hrtimer timer; + struct Qdisc *qdisc; }; extern void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c294..e1c2bf7e9ba4 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -468,8 +468,8 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc) { - hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - wd->timer.function = qdisc_watchdog; + tasklet_hrtimer_init(&wd->timer, qdisc_watchdog, + CLOCK_MONOTONIC, HRTIMER_MODE_ABS); wd->qdisc = qdisc; } EXPORT_SYMBOL(qdisc_watchdog_init); @@ -485,13 +485,13 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) wd->qdisc->flags |= TCQ_F_THROTTLED; time = ktime_set(0, 0); time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); - hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); + tasklet_hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); } EXPORT_SYMBOL(qdisc_watchdog_schedule); void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) { - hrtimer_cancel(&wd->timer); + tasklet_hrtimer_cancel(&wd->timer); wd->qdisc->flags &= ~TCQ_F_THROTTLED; } EXPORT_SYMBOL(qdisc_watchdog_cancel); -- cgit v1.2.3 From 6777d773a463ac045d333b989d4e44660f8d92ad Mon Sep 17 00:00:00 2001 From: Mimi Zohar Date: Fri, 21 Aug 2009 14:32:48 -0400 Subject: kernel_read: redefine offset type vfs_read() offset is defined as loff_t, but kernel_read() offset is only defined as unsigned long. Redefine kernel_read() offset as loff_t. Cc: stable@kernel.org Signed-off-by: Mimi Zohar Signed-off-by: James Morris --- fs/exec.c | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/fs/exec.c b/fs/exec.c index 4a8849e45b21..fb4f3cdda78c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -678,8 +678,8 @@ exit: } EXPORT_SYMBOL(open_exec); -int kernel_read(struct file *file, unsigned long offset, - char *addr, unsigned long count) +int kernel_read(struct file *file, loff_t offset, + char *addr, unsigned long count) { mm_segment_t old_fs; loff_t pos = offset; diff --git a/include/linux/fs.h b/include/linux/fs.h index 67888a9e0655..73e9b643e455 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2123,7 +2123,7 @@ extern struct file *do_filp_open(int dfd, const char *pathname, int open_flag, int mode, int acc_mode); extern int may_open(struct path *, int, int); -extern int kernel_read(struct file *, unsigned long, char *, unsigned long); +extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ -- cgit v1.2.3 From 353d5c30c666580347515da609dd74a2b8e9b828 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 24 Aug 2009 16:30:28 +0100 Subject: mm: fix hugetlb bug due to user_shm_unlock call 2.6.30's commit 8a0bdec194c21c8fdef840989d0d7b742bb5d4bc removed user_shm_lock() calls in hugetlb_file_setup() but left the user_shm_unlock call in shm_destroy(). In detail: Assume that can_do_hugetlb_shm() returns true and hence user_shm_lock() is not called in hugetlb_file_setup(). However, user_shm_unlock() is called in any case in shm_destroy() and in the following atomic_dec_and_lock(&up->__count) in free_uid() is executed and if up->__count gets zero, also cleanup_user_struct() is scheduled. Note that sched_destroy_user() is empty if CONFIG_USER_SCHED is not set. However, the ref counter up->__count gets unexpectedly non-positive and the corresponding structs are freed even though there are live references to them, resulting in a kernel oops after a lots of shmget(SHM_HUGETLB)/shmctl(IPC_RMID) cycles and CONFIG_USER_SCHED set. Hugh changed Stefan's suggested patch: can_do_hugetlb_shm() at the time of shm_destroy() may give a different answer from at the time of hugetlb_file_setup(). And fixed newseg()'s no_id error path, which has missed user_shm_unlock() ever since it came in 2.6.9. Reported-by: Stefan Huber Signed-off-by: Hugh Dickins Tested-by: Stefan Huber Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 20 ++++++++++++-------- include/linux/hugetlb.h | 6 ++++-- ipc/shm.c | 8 +++++--- 3 files changed, 21 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 941c8425c10b..cb88dac8ccaa 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -935,26 +935,28 @@ static int can_do_hugetlb_shm(void) return capable(CAP_IPC_LOCK) || in_group_p(sysctl_hugetlb_shm_group); } -struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag) +struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag, + struct user_struct **user) { int error = -ENOMEM; - int unlock_shm = 0; struct file *file; struct inode *inode; struct dentry *dentry, *root; struct qstr quick_string; - struct user_struct *user = current_user(); + *user = NULL; if (!hugetlbfs_vfsmount) return ERR_PTR(-ENOENT); if (!can_do_hugetlb_shm()) { - if (user_shm_lock(size, user)) { - unlock_shm = 1; + *user = current_user(); + if (user_shm_lock(size, *user)) { WARN_ONCE(1, "Using mlock ulimits for SHM_HUGETLB deprecated\n"); - } else + } else { + *user = NULL; return ERR_PTR(-EPERM); + } } root = hugetlbfs_vfsmount->mnt_root; @@ -996,8 +998,10 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - if (unlock_shm) - user_shm_unlock(size, user); + if (*user) { + user_shm_unlock(size, *user); + *user = NULL; + } return ERR_PTR(error); } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2723513a5651..5cbc620bdfe0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -10,6 +10,7 @@ #include struct ctl_table; +struct user_struct; int PageHuge(struct page *page); @@ -146,7 +147,8 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern struct vm_operations_struct hugetlb_vm_ops; -struct file *hugetlb_file_setup(const char *name, size_t, int); +struct file *hugetlb_file_setup(const char *name, size_t size, int acct, + struct user_struct **user); int hugetlb_get_quota(struct address_space *mapping, long delta); void hugetlb_put_quota(struct address_space *mapping, long delta); @@ -168,7 +170,7 @@ static inline void set_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 #define set_file_hugepages(file) BUG() -#define hugetlb_file_setup(name,size,acctflag) ERR_PTR(-ENOSYS) +#define hugetlb_file_setup(name,size,acct,user) ERR_PTR(-ENOSYS) #endif /* !CONFIG_HUGETLBFS */ diff --git a/ipc/shm.c b/ipc/shm.c index 15dd238e5338..1bc4701ef4f0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -174,7 +174,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) shm_unlock(shp); if (!is_file_hugepages(shp->shm_file)) shmem_lock(shp->shm_file, 0, shp->mlock_user); - else + else if (shp->mlock_user) user_shm_unlock(shp->shm_file->f_path.dentry->d_inode->i_size, shp->mlock_user); fput (shp->shm_file); @@ -369,8 +369,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) /* hugetlb_file_setup applies strict accounting */ if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; - file = hugetlb_file_setup(name, size, acctflag); - shp->mlock_user = current_user(); + file = hugetlb_file_setup(name, size, acctflag, + &shp->mlock_user); } else { /* * Do not allow no accounting for OVERCOMMIT_NEVER, even @@ -410,6 +410,8 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) return error; no_id: + if (shp->mlock_user) /* shmflg & SHM_HUGETLB case */ + user_shm_unlock(size, shp->mlock_user); fput(file); no_file: security_shm_free(shp); -- cgit v1.2.3 From 8e7ee27095aee87b5db1b0061e2ceea5878a1bbd Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 26 Aug 2009 14:29:21 -0700 Subject: flex_array: declare parts member to have incomplete type The `parts' member of struct flex_array should evaluate to an incomplete type so that sizeof() cannot be used and C99 does not require the zero-length specification. Signed-off-by: David Rientjes Acked-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 23c1ec79a31b..603160db7c98 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -21,7 +21,7 @@ struct flex_array { struct { int element_size; int total_nr_elements; - struct flex_array_part *parts[0]; + struct flex_array_part *parts[]; }; /* * This little trick makes sure that -- cgit v1.2.3 From b62e408c05228f40e69bb38a48db8961cac6cd23 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Wed, 26 Aug 2009 14:29:22 -0700 Subject: flex_array: convert element_nr formals to unsigned It's problematic to allow signed element_nr's or total's to be passed as part of the flex array API. flex_array_alloc() allows total_nr_elements to be set to a negative quantity, which is obviously erroneous. flex_array_get() and flex_array_put() allows negative array indices in dereferencing an array part, which could address memory mapped before struct flex_array. The fix is to convert all existing element_nr formals to be qualified as unsigned. Existing checks to compare it to total_nr_elements or the max array size based on element_size need not be changed. Signed-off-by: David Rientjes Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/flex_array.h | 10 ++++++---- lib/flex_array.c | 24 +++++++++++++----------- 2 files changed, 19 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 603160db7c98..45ff18491514 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -36,12 +36,14 @@ struct flex_array { .total_nr_elements = (total), \ } } } -struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags); -int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags); +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags); +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); -int flex_array_put(struct flex_array *fa, int element_nr, void *src, +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); -void *flex_array_get(struct flex_array *fa, int element_nr); +void *flex_array_get(struct flex_array *fa, unsigned int element_nr); #endif /* _FLEX_ARRAY_H */ diff --git a/lib/flex_array.c b/lib/flex_array.c index cf4e372cae90..7baed2fc3bc8 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -99,7 +99,8 @@ static inline int elements_fit_in_base(struct flex_array *fa) * capacity in the base structure. Also note that no effort is made * to efficiently pack objects across page boundaries. */ -struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags) +struct flex_array *flex_array_alloc(int element_size, unsigned int total, + gfp_t flags) { struct flex_array *ret; int max_size = nr_base_part_ptrs() * __elements_per_part(element_size); @@ -115,7 +116,8 @@ struct flex_array *flex_array_alloc(int element_size, int total, gfp_t flags) return ret; } -static int fa_element_to_part_nr(struct flex_array *fa, int element_nr) +static int fa_element_to_part_nr(struct flex_array *fa, + unsigned int element_nr) { return element_nr / __elements_per_part(fa->element_size); } @@ -143,14 +145,12 @@ void flex_array_free(struct flex_array *fa) kfree(fa); } -static int fa_index_inside_part(struct flex_array *fa, int element_nr) +static unsigned int index_inside_part(struct flex_array *fa, + unsigned int element_nr) { - return element_nr % __elements_per_part(fa->element_size); -} + unsigned int part_offset; -static int index_inside_part(struct flex_array *fa, int element_nr) -{ - int part_offset = fa_index_inside_part(fa, element_nr); + part_offset = element_nr % __elements_per_part(fa->element_size); return part_offset * fa->element_size; } @@ -185,7 +185,8 @@ __fa_get_part(struct flex_array *fa, int part_nr, gfp_t flags) * * Locking must be provided by the caller. */ -int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags) +int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, + gfp_t flags) { int part_nr = fa_element_to_part_nr(fa, element_nr); struct flex_array_part *part; @@ -217,7 +218,8 @@ int flex_array_put(struct flex_array *fa, int element_nr, void *src, gfp_t flags * * Locking must be provided by the caller. */ -int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags) +int flex_array_prealloc(struct flex_array *fa, unsigned int start, + unsigned int end, gfp_t flags) { int start_part; int end_part; @@ -248,7 +250,7 @@ int flex_array_prealloc(struct flex_array *fa, int start, int end, gfp_t flags) * * Locking must be provided by the caller. */ -void *flex_array_get(struct flex_array *fa, int element_nr) +void *flex_array_get(struct flex_array *fa, unsigned int element_nr) { int part_nr = fa_element_to_part_nr(fa, element_nr); struct flex_array_part *part; -- cgit v1.2.3 From 2a908002c7b1b666616103e9df2419b38d7c6f1f Mon Sep 17 00:00:00 2001 From: Frans Pop Date: Wed, 26 Aug 2009 14:29:29 -0700 Subject: ACPI processor: force throttling state when BIOS returns incorrect value If the BIOS reports an invalid throttling state (which seems to be fairly common after system boot), a reset is done to state T0. Because of a check in acpi_processor_get_throttling_ptc(), the reset never actually gets executed, which results in the error reoccurring on every access of for example /proc/acpi/processor/CPU0/throttling. Add a 'force' option to acpi_processor_set_throttling() to ensure the reset really takes effect. Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13389 This patch, together with the next one, fixes a regression introduced in 2.6.30, listed on the regression list. They have been available for 2.5 months now in bugzilla, but have not been picked up, despite various reminders and without any reason given. Google shows that numerous people are hitting this issue. The issue is in itself relatively minor, but the bug in the code is clear. The patches have been in all my kernels and today testing has shown that throttling works correctly with the patches applied when the system overheats (http://bugzilla.kernel.org/show_bug.cgi?id=13918#c14). Signed-off-by: Frans Pop Acked-by: Zhang Rui Cc: Len Brown Cc: "Rafael J. Wysocki" Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/acpi/processor_thermal.c | 6 +++--- drivers/acpi/processor_throttling.c | 26 ++++++++++++++------------ include/acpi/processor.h | 5 +++-- 3 files changed, 20 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/acpi/processor_thermal.c b/drivers/acpi/processor_thermal.c index 39838c666032..31adda1099e0 100644 --- a/drivers/acpi/processor_thermal.c +++ b/drivers/acpi/processor_thermal.c @@ -66,7 +66,7 @@ static int acpi_processor_apply_limit(struct acpi_processor *pr) if (pr->limit.thermal.tx > tx) tx = pr->limit.thermal.tx; - result = acpi_processor_set_throttling(pr, tx); + result = acpi_processor_set_throttling(pr, tx, false); if (result) goto end; } @@ -421,12 +421,12 @@ processor_set_cur_state(struct thermal_cooling_device *cdev, if (state <= max_pstate) { if (pr->flags.throttling && pr->throttling.state) - result = acpi_processor_set_throttling(pr, 0); + result = acpi_processor_set_throttling(pr, 0, false); cpufreq_set_cur_state(pr->id, state); } else { cpufreq_set_cur_state(pr->id, max_pstate); result = acpi_processor_set_throttling(pr, - state - max_pstate); + state - max_pstate, false); } return result; } diff --git a/drivers/acpi/processor_throttling.c b/drivers/acpi/processor_throttling.c index 227543789ba9..841be4ee2109 100644 --- a/drivers/acpi/processor_throttling.c +++ b/drivers/acpi/processor_throttling.c @@ -62,7 +62,8 @@ struct throttling_tstate { #define THROTTLING_POSTCHANGE (2) static int acpi_processor_get_throttling(struct acpi_processor *pr); -int acpi_processor_set_throttling(struct acpi_processor *pr, int state); +int acpi_processor_set_throttling(struct acpi_processor *pr, + int state, bool force); static int acpi_processor_update_tsd_coord(void) { @@ -361,7 +362,7 @@ int acpi_processor_tstate_has_changed(struct acpi_processor *pr) */ target_state = throttling_limit; } - return acpi_processor_set_throttling(pr, target_state); + return acpi_processor_set_throttling(pr, target_state, false); } /* @@ -842,7 +843,7 @@ static int acpi_processor_get_throttling_ptc(struct acpi_processor *pr) ACPI_WARNING((AE_INFO, "Invalid throttling state, reset")); state = 0; - ret = acpi_processor_set_throttling(pr, state); + ret = acpi_processor_set_throttling(pr, state, true); if (ret) return ret; } @@ -915,7 +916,7 @@ static int acpi_processor_get_fadt_info(struct acpi_processor *pr) } static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, - int state) + int state, bool force) { u32 value = 0; u32 duty_mask = 0; @@ -930,7 +931,7 @@ static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, if (!pr->flags.throttling) return -ENODEV; - if (state == pr->throttling.state) + if (!force && (state == pr->throttling.state)) return 0; if (state < pr->throttling_platform_limit) @@ -988,7 +989,7 @@ static int acpi_processor_set_throttling_fadt(struct acpi_processor *pr, } static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, - int state) + int state, bool force) { int ret; acpi_integer value; @@ -1002,7 +1003,7 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, if (!pr->flags.throttling) return -ENODEV; - if (state == pr->throttling.state) + if (!force && (state == pr->throttling.state)) return 0; if (state < pr->throttling_platform_limit) @@ -1018,7 +1019,8 @@ static int acpi_processor_set_throttling_ptc(struct acpi_processor *pr, return 0; } -int acpi_processor_set_throttling(struct acpi_processor *pr, int state) +int acpi_processor_set_throttling(struct acpi_processor *pr, + int state, bool force) { cpumask_var_t saved_mask; int ret = 0; @@ -1070,7 +1072,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) /* FIXME: use work_on_cpu() */ set_cpus_allowed_ptr(current, cpumask_of(pr->id)); ret = p_throttling->acpi_processor_set_throttling(pr, - t_state.target_state); + t_state.target_state, force); } else { /* * When the T-state coordination is SW_ALL or HW_ALL, @@ -1103,7 +1105,7 @@ int acpi_processor_set_throttling(struct acpi_processor *pr, int state) set_cpus_allowed_ptr(current, cpumask_of(i)); ret = match_pr->throttling. acpi_processor_set_throttling( - match_pr, t_state.target_state); + match_pr, t_state.target_state, force); } } /* @@ -1201,7 +1203,7 @@ int acpi_processor_get_throttling_info(struct acpi_processor *pr) ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Disabling throttling (was T%d)\n", pr->throttling.state)); - result = acpi_processor_set_throttling(pr, 0); + result = acpi_processor_set_throttling(pr, 0, false); if (result) goto end; } @@ -1307,7 +1309,7 @@ static ssize_t acpi_processor_write_throttling(struct file *file, if (strcmp(tmpbuf, charp) != 0) return -EINVAL; - result = acpi_processor_set_throttling(pr, state_val); + result = acpi_processor_set_throttling(pr, state_val, false); if (result) return result; diff --git a/include/acpi/processor.h b/include/acpi/processor.h index baf1e0a9a7ee..740ac3ad8fd0 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -174,7 +174,7 @@ struct acpi_processor_throttling { cpumask_var_t shared_cpu_map; int (*acpi_processor_get_throttling) (struct acpi_processor * pr); int (*acpi_processor_set_throttling) (struct acpi_processor * pr, - int state); + int state, bool force); u32 address; u8 duty_offset; @@ -321,7 +321,8 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr) /* in processor_throttling.c */ int acpi_processor_tstate_has_changed(struct acpi_processor *pr); int acpi_processor_get_throttling_info(struct acpi_processor *pr); -extern int acpi_processor_set_throttling(struct acpi_processor *pr, int state); +extern int acpi_processor_set_throttling(struct acpi_processor *pr, + int state, bool force); extern const struct file_operations acpi_processor_throttling_fops; extern void acpi_processor_throttling_init(void); /* in processor_idle.c */ -- cgit v1.2.3 From 1a37f184fa7824982a5f434c06981ec46a66cef7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Mon, 31 Aug 2009 13:48:16 +1000 Subject: lmb: Also remove __init from lmb_end_of_RAM() declaration in lmb.h My previous patch (commit 4f8ee2c9cc: "lmb: Remove __init from lmb_end_of_DRAM()") removed __init in lmb.c but missed the fact that it was also marked as such in the .h Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Linus Torvalds --- include/linux/lmb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/lmb.h b/include/linux/lmb.h index c46c89505dac..2442e3f3d033 100644 --- a/include/linux/lmb.h +++ b/include/linux/lmb.h @@ -51,7 +51,7 @@ extern u64 __init lmb_alloc_base(u64 size, extern u64 __init __lmb_alloc_base(u64 size, u64 align, u64 max_addr); extern u64 __init lmb_phys_mem_size(void); -extern u64 __init lmb_end_of_DRAM(void); +extern u64 lmb_end_of_DRAM(void); extern void __init lmb_enforce_memory_limit(u64 memory_limit); extern int __init lmb_is_reserved(u64 addr); extern int lmb_find(struct lmb_property *res); -- cgit v1.2.3