From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001 From: Anatol Pomozov Date: Sun, 22 Apr 2012 18:45:24 -0700 Subject: fuse: add FALLOCATE operation fallocate filesystem operation preallocates media space for the given file. If fallocate returns success then any subsequent write to the given range never fails with 'not enough space' error. Signed-off-by: Anatol Pomozov Signed-off-by: Miklos Szeredi --- include/linux/fuse.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fuse.h b/include/linux/fuse.h index 8f2ab8fef929..9303348965fb 100644 --- a/include/linux/fuse.h +++ b/include/linux/fuse.h @@ -54,6 +54,9 @@ * 7.18 * - add FUSE_IOCTL_DIR flag * - add FUSE_NOTIFY_DELETE + * + * 7.19 + * - add FUSE_FALLOCATE */ #ifndef _LINUX_FUSE_H @@ -85,7 +88,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 18 +#define FUSE_KERNEL_MINOR_VERSION 19 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -278,6 +281,7 @@ enum fuse_opcode { FUSE_POLL = 40, FUSE_NOTIFY_REPLY = 41, FUSE_BATCH_FORGET = 42, + FUSE_FALLOCATE = 43, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; -- cgit v1.2.3 From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:06:54 -0600 Subject: mm: frontswap: add frontswap header file Frontswap is the alter ego of cleancache, the "yang" to cleancache's "yin"... and more precisely frontswap is the provider of anonymous pages to transcendent memory to nicely complement cleancache's providing of clean pagecache pages to transcendent memory. For optimal use of transcendent memory, both are necessary... because a kernel under memory pressure first reclaims clean pagecache pages and, when under more memory pressure, starts swapping anonymous pages. Frontswap and cleancache (which was merged at 3.0) are the "frontends" and the only necessary changes to the core kernel for transcendent memory; all other supporting code -- the "backends" -- is implemented as drivers. See the LWN.net article "Transcendent memory in a nutshell" for a detailed overview of frontswap and related kernel parts: https://lwn.net/Articles/454795/ Frontswap code was first posted publicly in January 2009 and on LKML in May 2009, and has remained functionally stable for nearly three years now. It is barely invasive, touching only the swap subsystem and adds less than 100 lines of code to existing swap subsystem code files. It has improved syntactically substantially between V1 and this posting of V14, thanks to the review of a few kernel developers, and has adapted easily to at least one major swap subsystem change. As of 3.4, there are three in-tree users of frontswap patiently waiting for this patchset and for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at 2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver merged at 3.4). In addition, a RFC has been posted for a KVM backend. The frontswap patchset has been in linux-next since next-110603. Earlier versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel and SuSE SLES. This patch, 1of4, provides the header file for the core code for frontswap that interfaces between the hooks in the swap subsystem and a frontswap backend via frontswap_ops. --- New file added: include/linux/frontswap.h [v14: add support for writethrough, per suggestion by aarcange@redhat.com] [v14: rebase to 3.4-rc2] [v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in] [v10: no change] [v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1] [v8: rebase to 3.0-rc4] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd] [v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls] [v6: rebase to 3.1-rc1] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Andrew Morton Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel [v15: int/bool on some functions] Signed-off-by: Konrad Wilk --- include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/linux/frontswap.h (limited to 'include/linux') diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h new file mode 100644 index 000000000000..68ff7af5c5fb --- /dev/null +++ b/include/linux/frontswap.h @@ -0,0 +1,127 @@ +#ifndef _LINUX_FRONTSWAP_H +#define _LINUX_FRONTSWAP_H + +#include +#include +#include + +struct frontswap_ops { + void (*init)(unsigned); + int (*put_page)(unsigned, pgoff_t, struct page *); + int (*get_page)(unsigned, pgoff_t, struct page *); + void (*invalidate_page)(unsigned, pgoff_t); + void (*invalidate_area)(unsigned); +}; + +extern bool frontswap_enabled; +extern struct frontswap_ops + frontswap_register_ops(struct frontswap_ops *ops); +extern void frontswap_shrink(unsigned long); +extern unsigned long frontswap_curr_pages(void); +extern void frontswap_writethrough(bool); + +extern void __frontswap_init(unsigned type); +extern int __frontswap_put_page(struct page *page); +extern int __frontswap_get_page(struct page *page); +extern void __frontswap_invalidate_page(unsigned, pgoff_t); +extern void __frontswap_invalidate_area(unsigned); + +#ifdef CONFIG_FRONTSWAP + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + bool ret = false; + + if (frontswap_enabled && sis->frontswap_map) + ret = test_bit(offset, sis->frontswap_map); + return ret; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + set_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ + if (frontswap_enabled && sis->frontswap_map) + clear_bit(offset, sis->frontswap_map); +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ + p->frontswap_map = map; +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return p->frontswap_map; +} +#else +/* all inline routines become no-ops and all externs are ignored */ + +#define frontswap_enabled (0) + +static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset) +{ + return false; +} + +static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset) +{ +} + +static inline void frontswap_map_set(struct swap_info_struct *p, + unsigned long *map) +{ +} + +static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) +{ + return NULL; +} +#endif + +static inline int frontswap_put_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_put_page(page); + return ret; +} + +static inline int frontswap_get_page(struct page *page) +{ + int ret = -1; + + if (frontswap_enabled) + ret = __frontswap_get_page(page); + return ret; +} + +static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset) +{ + if (frontswap_enabled) + __frontswap_invalidate_page(type, offset); +} + +static inline void frontswap_invalidate_area(unsigned type) +{ + if (frontswap_enabled) + __frontswap_invalidate_area(type); +} + +static inline void frontswap_init(unsigned type) +{ + if (frontswap_enabled) + __frontswap_init(type); +} + +#endif /* _LINUX_FRONTSWAP_H */ -- cgit v1.2.3 From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001 From: Dan Magenheimer Date: Mon, 9 Apr 2012 17:08:06 -0600 Subject: mm: frontswap: core swap subsystem hooks and headers This patch, 2of4, contains the changes to the core swap subsystem. This includes: (1) makes available core swap data structures (swap_lock, swap_list and swap_info) that are needed by frontswap.c but we don't need to expose them to the dozens of files that include swap.h so we create a new swapfile.h just to extern-ify these and modify their declarations to non-static (2) adds frontswap-related elements to swap_info_struct. Frontswap_map points to vzalloc'ed one-bit-per-swap-page metadata that indicates whether the swap page is in frontswap or in the device and frontswap_pages counts how many pages are in frontswap. (3) adds hooks in the swap subsystem and extends try_to_unuse so that frontswap_shrink can do a "partial swapoff". Note that a failed frontswap_map allocation is safe... failure is noted by lack of "FS" in the subsequent printk. --- [v14: rebase to 3.4-rc2] [v10: no change] [v9: akpm@linux-foundation.org: mark some statics __read_mostly] [v9: akpm@linux-foundation.org: add clarifying comments] [v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse] [v9: error27@gmail.com: remove superfluous check for NULL] [v8: rebase to 3.0-rc4] [v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races] [v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters] [v7: rebase to 3.0-rc3] [v7: JBeulich@novell.com: add new swap struct elements only if config'd] [v6: rebase to 3.0-rc1] [v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails] [v6: konrad.wilk@oracl.com: various checks and code clarifications/comments] [v5: no change from v4] [v4: rebase to 2.6.39] Signed-off-by: Dan Magenheimer Reviewed-by: Kamezawa Hiroyuki Acked-by: Jan Beulich Acked-by: Seth Jennings Cc: Jeremy Fitzhardinge Cc: Hugh Dickins Cc: Johannes Weiner Cc: Nitin Gupta Cc: Matthew Wilcox Cc: Chris Mason Cc: Rik Riel Cc: Andrew Morton [v11: Rebased, fixed mm/swapfile.c context change] Signed-off-by: Konrad Rzeszutek Wilk --- include/linux/swap.h | 4 ++++ include/linux/swapfile.h | 13 ++++++++++++ mm/page_io.c | 12 +++++++++++ mm/swapfile.c | 54 ++++++++++++++++++++++++++++++++++++------------ 4 files changed, 70 insertions(+), 13 deletions(-) create mode 100644 include/linux/swapfile.h (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index b1fd5c7925fe..50a55e2d58ec 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -197,6 +197,10 @@ struct swap_info_struct { struct block_device *bdev; /* swap device or bdev of swap file */ struct file *swap_file; /* seldom referenced */ unsigned int old_block_size; /* seldom referenced */ +#ifdef CONFIG_FRONTSWAP + unsigned long *frontswap_map; /* frontswap in-use, one bit per page */ + atomic_t frontswap_pages; /* frontswap pages in-use counter */ +#endif }; struct swap_list_t { diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h new file mode 100644 index 000000000000..e282624e8c10 --- /dev/null +++ b/include/linux/swapfile.h @@ -0,0 +1,13 @@ +#ifndef _LINUX_SWAPFILE_H +#define _LINUX_SWAPFILE_H + +/* + * these were static in swapfile.c but frontswap.c needs them and we don't + * want to expose them to the dozens of source files that include swap.h + */ +extern spinlock_t swap_lock; +extern struct swap_list_t swap_list; +extern struct swap_info_struct *swap_info[]; +extern int try_to_unuse(unsigned int, bool, unsigned long); + +#endif /* _LINUX_SWAPFILE_H */ diff --git a/mm/page_io.c b/mm/page_io.c index dc76b4d0611e..651a91259317 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static struct bio *get_swap_bio(gfp_t gfp_flags, @@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } + if (frontswap_put_page(page) == 0) { + set_page_writeback(page); + unlock_page(page); + end_page_writeback(page); + goto out; + } bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); if (bio == NULL) { set_page_dirty(page); @@ -122,6 +129,11 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); + if (frontswap_get_page(page) == 0) { + SetPageUptodate(page); + unlock_page(page); + goto out; + } bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read); if (bio == NULL) { unlock_page(page); diff --git a/mm/swapfile.c b/mm/swapfile.c index fafc26d1b1dc..9c7be87175c5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t, static void free_swap_count_continuations(struct swap_info_struct *); static sector_t map_swap_entry(swp_entry_t, struct block_device**); -static DEFINE_SPINLOCK(swap_lock); +DEFINE_SPINLOCK(swap_lock); static unsigned int nr_swapfiles; long nr_swap_pages; long total_swap_pages; @@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; static const char Unused_offset[] = "Unused swap offset entry "; -static struct swap_list_t swap_list = {-1, -1}; +struct swap_list_t swap_list = {-1, -1}; -static struct swap_info_struct *swap_info[MAX_SWAPFILES]; +struct swap_info_struct *swap_info[MAX_SWAPFILES]; static DEFINE_MUTEX(swapon_mutex); @@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p, swap_list.next = p->type; nr_swap_pages++; p->inuse_pages--; + frontswap_invalidate_page(p->type, offset); if ((p->flags & SWP_BLKDEV) && disk->fops->swap_slot_free_notify) disk->fops->swap_slot_free_notify(p->bdev, offset); @@ -1016,11 +1019,12 @@ static int unuse_mm(struct mm_struct *mm, } /* - * Scan swap_map from current position to next entry still in use. + * Scan swap_map (or frontswap_map if frontswap parameter is true) + * from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */ static unsigned int find_next_to_unuse(struct swap_info_struct *si, - unsigned int prev) + unsigned int prev, bool frontswap) { unsigned int max = si->max; unsigned int i = prev; @@ -1046,6 +1050,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, prev = 0; i = 1; } + if (frontswap) { + if (frontswap_test(si, i)) + break; + else + continue; + } count = si->swap_map[i]; if (count && swap_count(count) != SWAP_MAP_BAD) break; @@ -1057,8 +1067,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, * We completely avoid races by reading each swap page in advance, * and then search for the process using it. All the necessary * page table adjustments can then be made atomically. + * + * if the boolean frontswap is true, only unuse pages_to_unuse pages; + * pages_to_unuse==0 means all pages; ignored if frontswap is false */ -static int try_to_unuse(unsigned int type) +int try_to_unuse(unsigned int type, bool frontswap, + unsigned long pages_to_unuse) { struct swap_info_struct *si = swap_info[type]; struct mm_struct *start_mm; @@ -1091,7 +1105,7 @@ static int try_to_unuse(unsigned int type) * one pass through swap_map is enough, but not necessarily: * there are races when an instance of an entry might be missed. */ - while ((i = find_next_to_unuse(si, i)) != 0) { + while ((i = find_next_to_unuse(si, i, frontswap)) != 0) { if (signal_pending(current)) { retval = -EINTR; break; @@ -1258,6 +1272,10 @@ static int try_to_unuse(unsigned int type) * interactive performance. */ cond_resched(); + if (frontswap && pages_to_unuse > 0) { + if (!--pages_to_unuse) + break; + } } mmput(start_mm); @@ -1517,7 +1535,8 @@ bad_bmap: } static void enable_swap_info(struct swap_info_struct *p, int prio, - unsigned char *swap_map) + unsigned char *swap_map, + unsigned long *frontswap_map) { int i, prev; @@ -1527,6 +1546,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, else p->prio = --least_priority; p->swap_map = swap_map; + frontswap_map_set(p, frontswap_map); p->flags |= SWP_WRITEOK; nr_swap_pages += p->pages; total_swap_pages += p->pages; @@ -1543,6 +1563,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio, swap_list.head = swap_list.next = p->type; else swap_info[prev]->next = p->type; + frontswap_init(p->type); spin_unlock(&swap_lock); } @@ -1616,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) spin_unlock(&swap_lock); oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX); - err = try_to_unuse(type); + err = try_to_unuse(type, false, 0); /* force all pages to be unused */ compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj); if (err) { @@ -1627,7 +1648,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) * sys_swapoff for this swap_info_struct at this point. */ /* re-insert swap space back into swap_list */ - enable_swap_info(p, p->prio, p->swap_map); + enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p)); goto out_dput; } @@ -1653,9 +1674,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; + frontswap_invalidate_area(type); spin_unlock(&swap_lock); mutex_unlock(&swapon_mutex); vfree(swap_map); + vfree(frontswap_map_get(p)); /* Destroy swap account informatin */ swap_cgroup_swapoff(type); @@ -2019,6 +2042,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) sector_t span; unsigned long maxpages; unsigned char *swap_map = NULL; + unsigned long *frontswap_map = NULL; struct page *page = NULL; struct inode *inode = NULL; @@ -2102,6 +2126,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = nr_extents; goto bad_swap; } + /* frontswap enabled? set up bit-per-page map for frontswap */ + if (frontswap_enabled) + frontswap_map = vzalloc(maxpages / sizeof(long)); if (p->bdev) { if (blk_queue_nonrot(bdev_get_queue(p->bdev))) { @@ -2117,14 +2144,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (swap_flags & SWAP_FLAG_PREFER) prio = (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT; - enable_swap_info(p, prio, swap_map); + enable_swap_info(p, prio, swap_map, frontswap_map); printk(KERN_INFO "Adding %uk swap on %s. " - "Priority:%d extents:%d across:%lluk %s%s\n", + "Priority:%d extents:%d across:%lluk %s%s%s\n", p->pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10), (p->flags & SWP_SOLIDSTATE) ? "SS" : "", - (p->flags & SWP_DISCARDABLE) ? "D" : ""); + (p->flags & SWP_DISCARDABLE) ? "D" : "", + (frontswap_map) ? "FS" : ""); mutex_unlock(&swapon_mutex); atomic_inc(&proc_poll_event); -- cgit v1.2.3 From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Tue, 15 May 2012 11:32:15 -0400 Subject: frontswap: s/put_page/store/g s/get_page/load Sounds so much more natural. Suggested-by: Andrea Arcangeli Signed-off-by: Konrad Rzeszutek Wilk --- Documentation/vm/frontswap.txt | 50 +++++++++++++++---------------- drivers/staging/ramster/zcache-main.c | 8 ++--- drivers/staging/zcache/zcache-main.c | 10 +++---- drivers/xen/tmem.c | 8 ++--- include/linux/frontswap.h | 16 +++++----- mm/frontswap.c | 56 +++++++++++++++++------------------ mm/page_io.c | 4 +-- 7 files changed, 76 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt index a9f731af0fac..37067cf455f4 100644 --- a/Documentation/vm/frontswap.txt +++ b/Documentation/vm/frontswap.txt @@ -21,21 +21,21 @@ frontswap_ops funcs appropriately and the functions it provides must conform to certain policies as follows: An "init" prepares the device to receive frontswap pages associated -with the specified swap device number (aka "type"). A "put_page" will +with the specified swap device number (aka "type"). A "store" will copy the page to transcendent memory and associate it with the type and -offset associated with the page. A "get_page" will copy the page, if found, +offset associated with the page. A "load" will copy the page, if found, from transcendent memory into kernel memory, but will NOT remove the page from from transcendent memory. An "invalidate_page" will remove the page from transcendent memory and an "invalidate_area" will remove ALL pages associated with the swap type (e.g., like swapoff) and notify the "device" -to refuse further puts with that swap type. +to refuse further stores with that swap type. -Once a page is successfully put, a matching get on the page will normally +Once a page is successfully stored, a matching load on the page will normally succeed. So when the kernel finds itself in a situation where it needs -to swap out a page, it first attempts to use frontswap. If the put returns +to swap out a page, it first attempts to use frontswap. If the store returns success, the data has been successfully saved to transcendent memory and a disk write and, if the data is later read back, a disk read are avoided. -If a put returns failure, transcendent memory has rejected the data, and the +If a store returns failure, transcendent memory has rejected the data, and the page can be written to swap as usual. If a backend chooses, frontswap can be configured as a "writethrough @@ -44,18 +44,18 @@ in swap device writes is lost (and also a non-trivial performance advantage) in order to allow the backend to arbitrarily "reclaim" space used to store frontswap pages to more completely manage its memory usage. -Note that if a page is put and the page already exists in transcendent memory -(a "duplicate" put), either the put succeeds and the data is overwritten, -or the put fails AND the page is invalidated. This ensures stale data may +Note that if a page is stored and the page already exists in transcendent memory +(a "duplicate" store), either the store succeeds and the data is overwritten, +or the store fails AND the page is invalidated. This ensures stale data may never be obtained from frontswap. If properly configured, monitoring of frontswap is done via debugfs in the /sys/kernel/debug/frontswap directory. The effectiveness of frontswap can be measured (across all swap devices) with: -failed_puts - how many put attempts have failed -gets - how many gets were attempted (all should succeed) -succ_puts - how many put attempts have succeeded +failed_stores - how many store attempts have failed +loads - how many loads were attempted (all should succeed) +succ_stores - how many store attempts have succeeded invalidates - how many invalidates were attempted A backend implementation may provide additional metrics. @@ -125,7 +125,7 @@ nothingness and the only overhead is a few extra bytes per swapon'ed swap device. If CONFIG_FRONTSWAP is enabled but no frontswap "backend" registers, there is one extra global variable compared to zero for every swap page read or written. If CONFIG_FRONTSWAP is enabled -AND a frontswap backend registers AND the backend fails every "put" +AND a frontswap backend registers AND the backend fails every "store" request (i.e. provides no memory despite claiming it might), CPU overhead is still negligible -- and since every frontswap fail precedes a swap page write-to-disk, the system is highly likely @@ -159,13 +159,13 @@ entirely dynamic and random. Whenever a swap-device is swapon'd frontswap_init() is called, passing the swap device number (aka "type") as a parameter. -This notifies frontswap to expect attempts to "put" swap pages +This notifies frontswap to expect attempts to "store" swap pages associated with that number. Whenever the swap subsystem is readying a page to write to a swap -device (c.f swap_writepage()), frontswap_put_page is called. Frontswap +device (c.f swap_writepage()), frontswap_store is called. Frontswap consults with the frontswap backend and if the backend says it does NOT -have room, frontswap_put_page returns -1 and the kernel swaps the page +have room, frontswap_store returns -1 and the kernel swaps the page to the swap device as normal. Note that the response from the frontswap backend is unpredictable to the kernel; it may choose to never accept a page, it could accept every ninth page, or it might accept every @@ -177,7 +177,7 @@ corresponding to the page offset on the swap device to which it would otherwise have written the data. When the swap subsystem needs to swap-in a page (swap_readpage()), -it first calls frontswap_get_page() which checks the frontswap_map to +it first calls frontswap_load() which checks the frontswap_map to see if the page was earlier accepted by the frontswap backend. If it was, the page of data is filled from the frontswap backend and the swap-in is complete. If not, the normal swap-in code is @@ -185,7 +185,7 @@ executed to obtain the page of data from the real swap device. So every time the frontswap backend accepts a page, a swap device read and (potentially) a swap device write are replaced by a "frontswap backend -put" and (possibly) a "frontswap backend get", which are presumably much +store" and (possibly) a "frontswap backend loads", which are presumably much faster. 4) Can't frontswap be configured as a "special" swap device that is @@ -215,8 +215,8 @@ that are inappropriate for a RAM-oriented device including delaying the write of some pages for a significant amount of time. Synchrony is required to ensure the dynamicity of the backend and to avoid thorny race conditions that would unnecessarily and greatly complicate frontswap -and/or the block I/O subsystem. That said, only the initial "put" -and "get" operations need be synchronous. A separate asynchronous thread +and/or the block I/O subsystem. That said, only the initial "store" +and "load" operations need be synchronous. A separate asynchronous thread is free to manipulate the pages stored by frontswap. For example, the "remotification" thread in RAMster uses standard asynchronous kernel sockets to move compressed frontswap pages to a remote machine. @@ -229,7 +229,7 @@ choose to accept pages only until host-swapping might be imminent, then force guests to do their own swapping. There is a downside to the transcendent memory specifications for -frontswap: Since any "put" might fail, there must always be a real +frontswap: Since any "store" might fail, there must always be a real slot on a real swap device to swap the page. Thus frontswap must be implemented as a "shadow" to every swapon'd device with the potential capability of holding every page that the swap device might have held @@ -240,16 +240,16 @@ installation, frontswap is useless. Swapless portable devices can still use frontswap but a backend for such devices must configure some kind of "ghost" swap device and ensure that it is never used. -5) Why this weird definition about "duplicate puts"? If a page - has been previously successfully put, can't it always be +5) Why this weird definition about "duplicate stores"? If a page + has been previously successfully stored, can't it always be successfully overwritten? Nearly always it can, but no, sometimes it cannot. Consider an example where data is compressed and the original 4K page has been compressed to 1K. Now an attempt is made to overwrite the page with data that is non-compressible and so would take the entire 4K. But the backend -has no more space. In this case, the put must be rejected. Whenever -frontswap rejects a put that would overwrite, it also must invalidate +has no more space. In this case, the store must be rejected. Whenever +frontswap rejects a store that would overwrite, it also must invalidate the old data and ensure that it is no longer accessible. Since the swap subsystem then writes the new data to the read swap device, this is the correct course of action to ensure coherency. diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c index 68b2e053a0e6..2627b3de0d21 100644 --- a/drivers/staging/ramster/zcache-main.c +++ b/drivers/staging/ramster/zcache-main.c @@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c index 2734dacacbaf..784c796b9848 100644 --- a/drivers/staging/zcache/zcache-main.c +++ b/drivers/staging/zcache/zcache-main.c @@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1; * Swizzling increases objects per swaptype, increasing tmem concurrency * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from - * frontswap_get_page(), but has side-effects. Hence using 8. + * frontswap_load(), but has side-effects. Hence using 8. */ #define SWIZ_BITS 8 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1) @@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) return oid; } -static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset, /* returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int zcache_frontswap_get_page(unsigned type, pgoff_t offset, +static int zcache_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored) } static struct frontswap_ops zcache_frontswap_ops = { - .put_page = zcache_frontswap_put_page, - .get_page = zcache_frontswap_get_page, + .store = zcache_frontswap_store, + .load = zcache_frontswap_load, .invalidate_page = zcache_frontswap_flush_page, .invalidate_area = zcache_frontswap_flush_area, .init = zcache_frontswap_init diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c index dcb79521e6c8..89f264c67420 100644 --- a/drivers/xen/tmem.c +++ b/drivers/xen/tmem.c @@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind) } /* returns 0 if the page was successfully put into frontswap, -1 if not */ -static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_store(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset, * returns 0 if the page was successfully gotten from frontswap, -1 if * was not present (should never happen!) */ -static int tmem_frontswap_get_page(unsigned type, pgoff_t offset, +static int tmem_frontswap_load(unsigned type, pgoff_t offset, struct page *page) { u64 ind64 = (u64)offset; @@ -362,8 +362,8 @@ static int __init no_frontswap(char *s) __setup("nofrontswap", no_frontswap); static struct frontswap_ops __initdata tmem_frontswap_ops = { - .put_page = tmem_frontswap_put_page, - .get_page = tmem_frontswap_get_page, + .store = tmem_frontswap_store, + .load = tmem_frontswap_load, .invalidate_page = tmem_frontswap_flush_page, .invalidate_area = tmem_frontswap_flush_area, .init = tmem_frontswap_init diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h index 68ff7af5c5fb..0e4e2eec5c1d 100644 --- a/include/linux/frontswap.h +++ b/include/linux/frontswap.h @@ -7,8 +7,8 @@ struct frontswap_ops { void (*init)(unsigned); - int (*put_page)(unsigned, pgoff_t, struct page *); - int (*get_page)(unsigned, pgoff_t, struct page *); + int (*store)(unsigned, pgoff_t, struct page *); + int (*load)(unsigned, pgoff_t, struct page *); void (*invalidate_page)(unsigned, pgoff_t); void (*invalidate_area)(unsigned); }; @@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void); extern void frontswap_writethrough(bool); extern void __frontswap_init(unsigned type); -extern int __frontswap_put_page(struct page *page); -extern int __frontswap_get_page(struct page *page); +extern int __frontswap_store(struct page *page); +extern int __frontswap_load(struct page *page); extern void __frontswap_invalidate_page(unsigned, pgoff_t); extern void __frontswap_invalidate_area(unsigned); @@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p) } #endif -static inline int frontswap_put_page(struct page *page) +static inline int frontswap_store(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_put_page(page); + ret = __frontswap_store(page); return ret; } -static inline int frontswap_get_page(struct page *page) +static inline int frontswap_load(struct page *page) { int ret = -1; if (frontswap_enabled) - ret = __frontswap_get_page(page); + ret = __frontswap_load(page); return ret; } diff --git a/mm/frontswap.c b/mm/frontswap.c index 8c0a5f8683f0..e25025574a02 100644 --- a/mm/frontswap.c +++ b/mm/frontswap.c @@ -39,7 +39,7 @@ bool frontswap_enabled __read_mostly; EXPORT_SYMBOL(frontswap_enabled); /* - * If enabled, frontswap_put will return failure even on success. As + * If enabled, frontswap_store will return failure even on success. As * a result, the swap subsystem will always write the page to swap, in * effect converting frontswap into a writethrough cache. In this mode, * there is no direct reduction in swap writes, but a frontswap backend @@ -54,27 +54,27 @@ static bool frontswap_writethrough_enabled __read_mostly; * properly configured). These are for information only so are not protected * against increment races. */ -static u64 frontswap_gets; -static u64 frontswap_succ_puts; -static u64 frontswap_failed_puts; +static u64 frontswap_loads; +static u64 frontswap_succ_stores; +static u64 frontswap_failed_stores; static u64 frontswap_invalidates; -static inline void inc_frontswap_gets(void) { - frontswap_gets++; +static inline void inc_frontswap_loads(void) { + frontswap_loads++; } -static inline void inc_frontswap_succ_puts(void) { - frontswap_succ_puts++; +static inline void inc_frontswap_succ_stores(void) { + frontswap_succ_stores++; } -static inline void inc_frontswap_failed_puts(void) { - frontswap_failed_puts++; +static inline void inc_frontswap_failed_stores(void) { + frontswap_failed_stores++; } static inline void inc_frontswap_invalidates(void) { frontswap_invalidates++; } #else -static inline void inc_frontswap_gets(void) { } -static inline void inc_frontswap_succ_puts(void) { } -static inline void inc_frontswap_failed_puts(void) { } +static inline void inc_frontswap_loads(void) { } +static inline void inc_frontswap_succ_stores(void) { } +static inline void inc_frontswap_failed_stores(void) { } static inline void inc_frontswap_invalidates(void) { } #endif /* @@ -116,13 +116,13 @@ void __frontswap_init(unsigned type) EXPORT_SYMBOL(__frontswap_init); /* - * "Put" data from a page to frontswap and associate it with the page's + * "Store" data from a page to frontswap and associate it with the page's * swaptype and offset. Page must be locked and in the swap cache. * If frontswap already contains a page with matching swaptype and * offset, the frontswap implmentation may either overwrite the data and * return success or invalidate the page from frontswap and return failure. */ -int __frontswap_put_page(struct page *page) +int __frontswap_store(struct page *page) { int ret = -1, dup = 0; swp_entry_t entry = { .val = page_private(page), }; @@ -134,10 +134,10 @@ int __frontswap_put_page(struct page *page) BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) dup = 1; - ret = (*frontswap_ops.put_page)(type, offset, page); + ret = (*frontswap_ops.store)(type, offset, page); if (ret == 0) { frontswap_set(sis, offset); - inc_frontswap_succ_puts(); + inc_frontswap_succ_stores(); if (!dup) atomic_inc(&sis->frontswap_pages); } else if (dup) { @@ -147,22 +147,22 @@ int __frontswap_put_page(struct page *page) */ frontswap_clear(sis, offset); atomic_dec(&sis->frontswap_pages); - inc_frontswap_failed_puts(); + inc_frontswap_failed_stores(); } else - inc_frontswap_failed_puts(); + inc_frontswap_failed_stores(); if (frontswap_writethrough_enabled) /* report failure so swap also writes to swap device */ ret = -1; return ret; } -EXPORT_SYMBOL(__frontswap_put_page); +EXPORT_SYMBOL(__frontswap_store); /* * "Get" data from frontswap associated with swaptype and offset that were * specified when the data was put to frontswap and use it to fill the * specified page with data. Page must be locked and in the swap cache. */ -int __frontswap_get_page(struct page *page) +int __frontswap_load(struct page *page) { int ret = -1; swp_entry_t entry = { .val = page_private(page), }; @@ -173,12 +173,12 @@ int __frontswap_get_page(struct page *page) BUG_ON(!PageLocked(page)); BUG_ON(sis == NULL); if (frontswap_test(sis, offset)) - ret = (*frontswap_ops.get_page)(type, offset, page); + ret = (*frontswap_ops.load)(type, offset, page); if (ret == 0) - inc_frontswap_gets(); + inc_frontswap_loads(); return ret; } -EXPORT_SYMBOL(__frontswap_get_page); +EXPORT_SYMBOL(__frontswap_load); /* * Invalidate any data from frontswap associated with the specified swaptype @@ -301,10 +301,10 @@ static int __init init_frontswap(void) struct dentry *root = debugfs_create_dir("frontswap", NULL); if (root == NULL) return -ENXIO; - debugfs_create_u64("gets", S_IRUGO, root, &frontswap_gets); - debugfs_create_u64("succ_puts", S_IRUGO, root, &frontswap_succ_puts); - debugfs_create_u64("failed_puts", S_IRUGO, root, - &frontswap_failed_puts); + debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads); + debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores); + debugfs_create_u64("failed_stores", S_IRUGO, root, + &frontswap_failed_stores); debugfs_create_u64("invalidates", S_IRUGO, root, &frontswap_invalidates); #endif diff --git a/mm/page_io.c b/mm/page_io.c index 651a91259317..34f02923744c 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -99,7 +99,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) unlock_page(page); goto out; } - if (frontswap_put_page(page) == 0) { + if (frontswap_store(page) == 0) { set_page_writeback(page); unlock_page(page); end_page_writeback(page); @@ -129,7 +129,7 @@ int swap_readpage(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageUptodate(page)); - if (frontswap_get_page(page) == 0) { + if (frontswap_load(page) == 0) { SetPageUptodate(page); unlock_page(page); goto out; -- cgit v1.2.3 From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Wed, 9 May 2012 23:39:34 +0900 Subject: clockevents: Make clockevents_config() a global symbol Make clockevents_config() into a global symbol to allow it to be used by compiled-in clockevent drivers. This is needed by drivers that want to update the timer frequency after registration time. Signed-off-by: Magnus Damm Tested-by: Simon Horman Cc: arnd@arndb.de Cc: johnstul@us.ibm.com Cc: rjw@sisk.pl Cc: lethal@linux-sh.org Cc: gregkh@linuxfoundation.org Cc: olof@lixom.net Cc: Magnus Damm Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520 Signed-off-by: Thomas Gleixner --- include/linux/clockchips.h | 1 + kernel/time/clockevents.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 81e803e90aa4..acba894374a1 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *evt); extern void clockevents_register_device(struct clock_event_device *dev); +extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 9cd928f7a7c6..7e1ce012a851 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev) } EXPORT_SYMBOL_GPL(clockevents_register_device); -static void clockevents_config(struct clock_event_device *dev, - u32 freq) +void clockevents_config(struct clock_event_device *dev, u32 freq) { u64 sec; -- cgit v1.2.3 From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 May 2012 17:15:29 +0200 Subject: sched/nohz: Fix rq->cpu_load calculations some more Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[] calculations") since while that fixed the busy case it regressed the mostly idle case. Add a callback from the nohz exit to also age the rq->cpu_load[] array. This closes the hole where either there was no nohz load balance pass during the nohz, or there was a 'significant' amount of idle time between the last nohz balance and the nohz exit. So we'll update unconditionally from the tick to not insert any accidental 0 load periods while busy, and we try and catch up from nohz idle balance and nohz exit. Both these are still prone to missing a jiffy, but that has always been the case. Signed-off-by: Peter Zijlstra Cc: pjt@google.com Cc: Venkatesh Pallipadi Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 + kernel/sched/core.c | 53 +++++++++++++++++++++++++++++++++++++++--------- kernel/time/tick-sched.c | 1 + 3 files changed, 45 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f45c0b280b5d..d61e5977e517 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void); extern void calc_global_load(unsigned long ticks); +extern void update_cpu_load_nohz(void); extern unsigned long get_parent_ip(unsigned long addr); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 39eb6011bc38..75844a8f9aeb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load, sched_avg_update(this_rq); } +#ifdef CONFIG_NO_HZ +/* + * There is no sane way to deal with nohz on smp when using jiffies because the + * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading + * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}. + * + * Therefore we cannot use the delta approach from the regular tick since that + * would seriously skew the load calculation. However we'll make do for those + * updates happening while idle (nohz_idle_balance) or coming out of idle + * (tick_nohz_idle_exit). + * + * This means we might still be one tick off for nohz periods. + */ + /* * Called from nohz_idle_balance() to update the load ratings before doing the * idle balance. */ void update_idle_cpu_load(struct rq *this_rq) { - unsigned long curr_jiffies = jiffies; + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); unsigned long load = this_rq->load.weight; unsigned long pending_updates; /* - * Bloody broken means of dealing with nohz, but better than nothing.. - * jiffies is updated by one cpu, another cpu can drift wrt the jiffy - * update and see 0 difference the one time and 2 the next, even though - * we ticked at roughtly the same rate. - * - * Hence we only use this from nohz_idle_balance() and skip this - * nonsense when called from the scheduler_tick() since that's - * guaranteed a stable rate. + * bail if there's load or we're actually up-to-date. */ if (load || curr_jiffies == this_rq->last_load_update_tick) return; @@ -2546,13 +2553,39 @@ void update_idle_cpu_load(struct rq *this_rq) __update_cpu_load(this_rq, load, pending_updates); } +/* + * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed. + */ +void update_cpu_load_nohz(void) +{ + struct rq *this_rq = this_rq(); + unsigned long curr_jiffies = ACCESS_ONCE(jiffies); + unsigned long pending_updates; + + if (curr_jiffies == this_rq->last_load_update_tick) + return; + + raw_spin_lock(&this_rq->lock); + pending_updates = curr_jiffies - this_rq->last_load_update_tick; + if (pending_updates) { + this_rq->last_load_update_tick = curr_jiffies; + /* + * We were idle, this means load 0, the current load might be + * !0 due to remote wakeups and the sort. + */ + __update_cpu_load(this_rq, 0, pending_updates); + } + raw_spin_unlock(&this_rq->lock); +} +#endif /* CONFIG_NO_HZ */ + /* * Called from scheduler_tick() */ static void update_cpu_load_active(struct rq *this_rq) { /* - * See the mess in update_idle_cpu_load(). + * See the mess around update_idle_cpu_load() / update_cpu_load_nohz(). */ this_rq->last_load_update_tick = jiffies; __update_cpu_load(this_rq, this_rq->load.weight, 1); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff561..0c927cd85345 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void) /* Update jiffies first */ select_nohz_load_balancer(0); tick_do_update_jiffies64(now); + update_cpu_load_nohz(); #ifndef CONFIG_VIRT_CPU_ACCOUNTING /* -- cgit v1.2.3 From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Apr 2012 12:11:21 +0200 Subject: sched: Move nr_cpus_allowed out of 'struct sched_rt_entity' Since nr_cpus_allowed is used outside of sched/rt.c and wants to be used outside of there more, move it to a more natural site. Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org Signed-off-by: Ingo Molnar --- arch/blackfin/kernel/process.c | 2 +- include/linux/init_task.h | 2 +- include/linux/sched.h | 2 +- kernel/sched/core.c | 2 +- kernel/sched/fair.c | 2 +- kernel/sched/rt.c | 36 +++++++++++++++++++++--------------- 6 files changed, 26 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 2e3994b20169..62bcea7dcc6d 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs) unsigned long newsp; #ifdef __ARCH_SYNC_CORE_DCACHE - if (current->rt.nr_cpus_allowed == num_possible_cpus()) + if (current->nr_cpus_allowed == num_possible_cpus()) set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id())); #endif diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e4baff5f7ff4..9e65eff6af3b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -149,6 +149,7 @@ extern struct cred init_cred; .normal_prio = MAX_PRIO-20, \ .policy = SCHED_NORMAL, \ .cpus_allowed = CPU_MASK_ALL, \ + .nr_cpus_allowed= NR_CPUS, \ .mm = NULL, \ .active_mm = &init_mm, \ .se = { \ @@ -157,7 +158,6 @@ extern struct cred init_cred; .rt = { \ .run_list = LIST_HEAD_INIT(tsk.rt.run_list), \ .time_slice = RR_TIMESLICE, \ - .nr_cpus_allowed = NR_CPUS, \ }, \ .tasks = LIST_HEAD_INIT(tsk.tasks), \ INIT_PUSHABLE_TASKS(tsk) \ diff --git a/include/linux/sched.h b/include/linux/sched.h index d61e5977e517..0f50e78f7f44 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1188,7 +1188,6 @@ struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned int time_slice; - int nr_cpus_allowed; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED @@ -1253,6 +1252,7 @@ struct task_struct { #endif unsigned int policy; + int nr_cpus_allowed; cpumask_t cpus_allowed; #ifdef CONFIG_PREEMPT_RCU diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3a69374fb427..70cc36a6073f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5015,7 +5015,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask) p->sched_class->set_cpus_allowed(p, new_mask); cpumask_copy(&p->cpus_allowed, new_mask); - p->rt.nr_cpus_allowed = cpumask_weight(new_mask); + p->nr_cpus_allowed = cpumask_weight(new_mask); } /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 2b449a762074..b2a2d236f27b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags) int want_sd = 1; int sync = wake_flags & WF_SYNC; - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) return prev_cpu; if (sd_flag & SD_BALANCE_WAKE) { diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index c5565c3c515f..295da737b6fe 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq) static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total++; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory++; update_rt_migration(rt_rq); @@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) { + struct task_struct *p; + if (!rt_entity_is_task(rt_se)) return; + p = rt_task_of(rt_se); rt_rq = &rq_of_rt_rq(rt_rq)->rt; rt_rq->rt_nr_total--; - if (rt_se->nr_cpus_allowed > 1) + if (p->nr_cpus_allowed > 1) rt_rq->rt_nr_migratory--; update_rt_migration(rt_rq); @@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags) enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD); - if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1) + if (!task_current(rq, p) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); inc_nr_running(rq); @@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) cpu = task_cpu(p); - if (p->rt.nr_cpus_allowed == 1) + if (p->nr_cpus_allowed == 1) goto out; /* For anything but wake ups, just return the task_cpu */ @@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags) * will have to sort it out. */ if (curr && unlikely(rt_task(curr)) && - (curr->rt.nr_cpus_allowed < 2 || + (curr->nr_cpus_allowed < 2 || curr->prio <= p->prio) && - (p->rt.nr_cpus_allowed > 1)) { + (p->nr_cpus_allowed > 1)) { int target = find_lowest_rq(p); if (target != -1) @@ -1276,10 +1282,10 @@ out: static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) { - if (rq->curr->rt.nr_cpus_allowed == 1) + if (rq->curr->nr_cpus_allowed == 1) return; - if (p->rt.nr_cpus_allowed != 1 + if (p->nr_cpus_allowed != 1 && cpupri_find(&rq->rd->cpupri, p, NULL)) return; @@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) * The previous task needs to be made eligible for pushing * if it is still active */ - if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1) + if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1) enqueue_pushable_task(rq, p); } @@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) { if (!task_running(rq, p) && (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) && - (p->rt.nr_cpus_allowed > 1)) + (p->nr_cpus_allowed > 1)) return 1; return 0; } @@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task) if (unlikely(!lowest_mask)) return -1; - if (task->rt.nr_cpus_allowed == 1) + if (task->nr_cpus_allowed == 1) return -1; /* No other targets possible */ if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) @@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq) BUG_ON(rq->cpu != task_cpu(p)); BUG_ON(task_current(rq, p)); - BUG_ON(p->rt.nr_cpus_allowed <= 1); + BUG_ON(p->nr_cpus_allowed <= 1); BUG_ON(!p->on_rq); BUG_ON(!rt_task(p)); @@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p) if (!task_running(rq, p) && !test_tsk_need_resched(rq->curr) && has_pushable_tasks(rq) && - p->rt.nr_cpus_allowed > 1 && + p->nr_cpus_allowed > 1 && rt_task(rq->curr) && - (rq->curr->rt.nr_cpus_allowed < 2 || + (rq->curr->nr_cpus_allowed < 2 || rq->curr->prio <= p->prio)) push_rt_tasks(rq); } @@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p, * Only update if the process changes its state from whether it * can migrate or not. */ - if ((p->rt.nr_cpus_allowed > 1) == (weight > 1)) + if ((p->nr_cpus_allowed > 1) == (weight > 1)) return; rq = task_rq(p); -- cgit v1.2.3 From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 31 May 2012 14:43:27 +0900 Subject: perf tools: Check if callchain is corrupted We faced segmentation fault on perf top -G at very high sampling rate due to a corrupted callchain. While the root cause was not revealed (I failed to figure it out), this patch tries to protect us from the segfault on such cases. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Sunjin Yang Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 4 ++-- tools/perf/util/session.c | 14 +++++++++++++- 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f32578634d9d..1817d4015e5f 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,6 +555,8 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; +#define PERF_MAX_STACK_DEPTH 255 + enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, PERF_CONTEXT_KERNEL = (__u64)-128, @@ -609,8 +611,6 @@ struct perf_guest_info_callbacks { #include #include -#define PERF_MAX_STACK_DEPTH 255 - struct perf_callchain_entry { __u64 nr; __u64 ip[PERF_MAX_STACK_DEPTH]; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 3b6f8e460a31..04d1e33f4592 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -300,6 +300,11 @@ int machine__resolve_callchain(struct machine *self, callchain_cursor_reset(&callchain_cursor); + if (chain->nr > PERF_MAX_STACK_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + for (i = 0; i < chain->nr; i++) { u64 ip; struct addr_location al; @@ -318,7 +323,14 @@ int machine__resolve_callchain(struct machine *self, case PERF_CONTEXT_USER: cpumode = PERF_RECORD_MISC_USER; break; default: - break; + pr_debug("invalid callchain context: " + "%"PRId64"\n", (s64) ip); + /* + * It seems the callchain is corrupted. + * Discard all. + */ + callchain_cursor_reset(&callchain_cursor); + return 0; } continue; } -- cgit v1.2.3 From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Fri, 18 May 2012 09:29:34 -0600 Subject: i2c: Add generic I2C multiplexer using pinctrl API This is useful for SoCs whose I2C module's signals can be routed to different sets of pins at run-time, using the pinctrl API. +-----+ +-----+ | dev | | dev | +------------------------+ +-----+ +-----+ | SoC | | | | /----|------+--------+ | +---+ +------+ | child bus A, on first set of pins | |I2C|---|Pinmux| | | +---+ +------+ | child bus B, on second set of pins | \----|------+--------+--------+ | | | | | +------------------------+ +-----+ +-----+ +-----+ | dev | | dev | | dev | +-----+ +-----+ +-----+ Signed-off-by: Stephen Warren Acked-by: Linus Walleij Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-mux-pinctrl.txt | 93 +++++++ drivers/i2c/muxes/Kconfig | 12 + drivers/i2c/muxes/Makefile | 1 + drivers/i2c/muxes/i2c-mux-pinctrl.c | 279 +++++++++++++++++++++ include/linux/i2c-mux-pinctrl.h | 41 +++ 5 files changed, 426 insertions(+) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt create mode 100644 drivers/i2c/muxes/i2c-mux-pinctrl.c create mode 100644 include/linux/i2c-mux-pinctrl.h (limited to 'include/linux') diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt new file mode 100644 index 000000000000..ae8af1694e95 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt @@ -0,0 +1,93 @@ +Pinctrl-based I2C Bus Mux + +This binding describes an I2C bus multiplexer that uses pin multiplexing to +route the I2C signals, and represents the pin multiplexing configuration +using the pinctrl device tree bindings. + + +-----+ +-----+ + | dev | | dev | + +------------------------+ +-----+ +-----+ + | SoC | | | + | /----|------+--------+ + | +---+ +------+ | child bus A, on first set of pins + | |I2C|---|Pinmux| | + | +---+ +------+ | child bus B, on second set of pins + | \----|------+--------+--------+ + | | | | | + +------------------------+ +-----+ +-----+ +-----+ + | dev | | dev | | dev | + +-----+ +-----+ +-----+ + +Required properties: +- compatible: i2c-mux-pinctrl +- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side + port is connected to. + +Also required are: + +* Standard pinctrl properties that specify the pin mux state for each child + bus. See ../pinctrl/pinctrl-bindings.txt. + +* Standard I2C mux properties. See mux.txt in this directory. + +* I2C child bus nodes. See mux.txt in this directory. + +For each named state defined in the pinctrl-names property, an I2C child bus +will be created. I2C child bus numbers are assigned based on the index into +the pinctrl-names property. + +The only exception is that no bus will be created for a state named "idle". If +such a state is defined, it must be the last entry in pinctrl-names. For +example: + + pinctrl-names = "ddc", "pta", "idle" -> ddc = bus 0, pta = bus 1 + pinctrl-names = "ddc", "idle", "pta" -> Invalid ("idle" not last) + pinctrl-names = "idle", "ddc", "pta" -> Invalid ("idle" not last) + +Whenever an access is made to a device on a child bus, the relevant pinctrl +state will be programmed into hardware. + +If an idle state is defined, whenever an access is not being made to a device +on a child bus, the idle pinctrl state will be programmed into hardware. + +If an idle state is not defined, the most recently used pinctrl state will be +left programmed into hardware whenever no access is being made of a device on +a child bus. + +Example: + + i2cmux { + compatible = "i2c-mux-pinctrl"; + #address-cells = <1>; + #size-cells = <0>; + + i2c-parent = <&i2c1>; + + pinctrl-names = "ddc", "pta", "idle"; + pinctrl-0 = <&state_i2cmux_ddc>; + pinctrl-1 = <&state_i2cmux_pta>; + pinctrl-2 = <&state_i2cmux_idle>; + + i2c@0 { + reg = <0>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + + i2c@1 { + reg = <1>; + #address-cells = <1>; + #size-cells = <0>; + + eeprom { + compatible = "eeprom"; + reg = <0x50>; + }; + }; + }; + diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig index beb2491db274..a0edd9854218 100644 --- a/drivers/i2c/muxes/Kconfig +++ b/drivers/i2c/muxes/Kconfig @@ -37,4 +37,16 @@ config I2C_MUX_PCA954x This driver can also be built as a module. If so, the module will be called i2c-mux-pca954x. +config I2C_MUX_PINCTRL + tristate "pinctrl-based I2C multiplexer" + depends on PINCTRL + help + If you say yes to this option, support will be included for an I2C + multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing. + This is useful for SoCs whose I2C module's signals can be routed to + different sets of pins at run-time. + + This driver can also be built as a module. If so, the module will be + called pinctrl-i2cmux. + endmenu diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile index 5826249b29ca..76da8692afff 100644 --- a/drivers/i2c/muxes/Makefile +++ b/drivers/i2c/muxes/Makefile @@ -4,5 +4,6 @@ obj-$(CONFIG_I2C_MUX_GPIO) += i2c-mux-gpio.o obj-$(CONFIG_I2C_MUX_PCA9541) += i2c-mux-pca9541.o obj-$(CONFIG_I2C_MUX_PCA954x) += i2c-mux-pca954x.o +obj-$(CONFIG_I2C_MUX_PINCTRL) += i2c-mux-pinctrl.o ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c new file mode 100644 index 000000000000..46a669763476 --- /dev/null +++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c @@ -0,0 +1,279 @@ +/* + * I2C multiplexer using pinctrl API + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct i2c_mux_pinctrl { + struct device *dev; + struct i2c_mux_pinctrl_platform_data *pdata; + struct pinctrl *pinctrl; + struct pinctrl_state **states; + struct pinctrl_state *state_idle; + struct i2c_adapter *parent; + struct i2c_adapter **busses; +}; + +static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->states[chan]); +} + +static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data, + u32 chan) +{ + struct i2c_mux_pinctrl *mux = data; + + return pinctrl_select_state(mux->pinctrl, mux->state_idle); +} + +#ifdef CONFIG_OF +static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + struct device_node *np = pdev->dev.of_node; + int num_names, i, ret; + struct device_node *adapter_np; + struct i2c_adapter *adapter; + + if (!np) + return 0; + + mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL); + if (!mux->pdata) { + dev_err(mux->dev, + "Cannot allocate i2c_mux_pinctrl_platform_data\n"); + return -ENOMEM; + } + + num_names = of_property_count_strings(np, "pinctrl-names"); + if (num_names < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + num_names); + return num_names; + } + + mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev, + sizeof(*mux->pdata->pinctrl_states) * num_names, + GFP_KERNEL); + if (!mux->pdata->pinctrl_states) { + dev_err(mux->dev, "Cannot allocate pinctrl_states\n"); + return -ENOMEM; + } + + for (i = 0; i < num_names; i++) { + ret = of_property_read_string_index(np, "pinctrl-names", i, + &mux->pdata->pinctrl_states[mux->pdata->bus_count]); + if (ret < 0) { + dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n", + ret); + return ret; + } + if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count], + "idle")) { + if (i != num_names - 1) { + dev_err(mux->dev, "idle state must be last\n"); + return -EINVAL; + } + mux->pdata->pinctrl_state_idle = "idle"; + } else { + mux->pdata->bus_count++; + } + } + + adapter_np = of_parse_phandle(np, "i2c-parent", 0); + if (!adapter_np) { + dev_err(mux->dev, "Cannot parse i2c-parent\n"); + return -ENODEV; + } + adapter = of_find_i2c_adapter_by_node(adapter_np); + if (!adapter) { + dev_err(mux->dev, "Cannot find parent bus\n"); + return -ENODEV; + } + mux->pdata->parent_bus_num = i2c_adapter_id(adapter); + put_device(&adapter->dev); + + return 0; +} +#else +static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux, + struct platform_device *pdev) +{ + return 0; +} +#endif + +static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux; + int (*deselect)(struct i2c_adapter *, void *, u32); + int i, ret; + + mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL); + if (!mux) { + dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n"); + ret = -ENOMEM; + goto err; + } + platform_set_drvdata(pdev, mux); + + mux->dev = &pdev->dev; + + mux->pdata = pdev->dev.platform_data; + if (!mux->pdata) { + ret = i2c_mux_pinctrl_parse_dt(mux, pdev); + if (ret < 0) + goto err; + } + if (!mux->pdata) { + dev_err(&pdev->dev, "Missing platform data\n"); + ret = -ENODEV; + goto err; + } + + mux->states = devm_kzalloc(&pdev->dev, + sizeof(*mux->states) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate states\n"); + ret = -ENOMEM; + goto err; + } + + mux->busses = devm_kzalloc(&pdev->dev, + sizeof(mux->busses) * mux->pdata->bus_count, + GFP_KERNEL); + if (!mux->states) { + dev_err(&pdev->dev, "Cannot allocate busses\n"); + ret = -ENOMEM; + goto err; + } + + mux->pinctrl = devm_pinctrl_get(&pdev->dev); + if (IS_ERR(mux->pinctrl)) { + ret = PTR_ERR(mux->pinctrl); + dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret); + goto err; + } + for (i = 0; i < mux->pdata->bus_count; i++) { + mux->states[i] = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_states[i]); + if (IS_ERR(mux->states[i])) { + ret = PTR_ERR(mux->states[i]); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_states[i], ret); + goto err; + } + } + if (mux->pdata->pinctrl_state_idle) { + mux->state_idle = pinctrl_lookup_state(mux->pinctrl, + mux->pdata->pinctrl_state_idle); + if (IS_ERR(mux->state_idle)) { + ret = PTR_ERR(mux->state_idle); + dev_err(&pdev->dev, + "Cannot look up pinctrl state %s: %d\n", + mux->pdata->pinctrl_state_idle, ret); + goto err; + } + + deselect = i2c_mux_pinctrl_deselect; + } else { + deselect = NULL; + } + + mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num); + if (!mux->parent) { + dev_err(&pdev->dev, "Parent adapter (%d) not found\n", + mux->pdata->parent_bus_num); + ret = -ENODEV; + goto err; + } + + for (i = 0; i < mux->pdata->bus_count; i++) { + u32 bus = mux->pdata->base_bus_num ? + (mux->pdata->base_bus_num + i) : 0; + + mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev, + mux, bus, i, + i2c_mux_pinctrl_select, + deselect); + if (!mux->busses[i]) { + ret = -ENODEV; + dev_err(&pdev->dev, "Failed to add adapter %d\n", i); + goto err_del_adapter; + } + } + + return 0; + +err_del_adapter: + for (; i > 0; i--) + i2c_del_mux_adapter(mux->busses[i - 1]); + i2c_put_adapter(mux->parent); +err: + return ret; +} + +static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev) +{ + struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < mux->pdata->bus_count; i++) + i2c_del_mux_adapter(mux->busses[i]); + + i2c_put_adapter(mux->parent); + + return 0; +} + +#ifdef CONFIG_OF +static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = { + { .compatible = "i2c-mux-pinctrl", }, + {}, +}; +MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match); +#endif + +static struct platform_driver i2c_mux_pinctrl_driver = { + .driver = { + .name = "i2c-mux-pinctrl", + .owner = THIS_MODULE, + .of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match), + }, + .probe = i2c_mux_pinctrl_probe, + .remove = __devexit_p(i2c_mux_pinctrl_remove), +}; +module_platform_driver(i2c_mux_pinctrl_driver); + +MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver"); +MODULE_AUTHOR("Stephen Warren "); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:i2c-mux-pinctrl"); diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h new file mode 100644 index 000000000000..a65c86429e84 --- /dev/null +++ b/include/linux/i2c-mux-pinctrl.h @@ -0,0 +1,41 @@ +/* + * i2c-mux-pinctrl platform data + * + * Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _LINUX_I2C_MUX_PINCTRL_H +#define _LINUX_I2C_MUX_PINCTRL_H + +/** + * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl + * @parent_bus_num: Parent I2C bus number + * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic. + * @bus_count: Number of child busses. Also the number of elements in + * @pinctrl_states + * @pinctrl_states: The names of the pinctrl state to select for each child bus + * @pinctrl_state_idle: The pinctrl state to select when no child bus is being + * accessed. If NULL, the most recently used pinctrl state will be left + * selected. + */ +struct i2c_mux_pinctrl_platform_data { + int parent_bus_num; + int base_bus_num; + int bus_count; + const char **pinctrl_states; + const char *pinctrl_state_idle; +}; + +#endif -- cgit v1.2.3 From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 09:16:47 -0400 Subject: NFSv4: Fix an Oops in the open recovery code The open recovery code does not need to request a new value for the mdsthreshold, and so does not allocate a struct nfs4_threshold. The problem is that encode_getfattr_open() will still request an mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold. This patch fixes encode_getfattr_open so that it doesn't request an mdsthreshold when the caller isn't asking for one. It also fixes decode_attr_mdsthreshold so that it errors if the server returns an mdsthreshold that we didn't ask for (instead of Oopsing). Signed-off-by: Trond Myklebust Cc: Andy Adamson --- fs/nfs/nfs4_fs.h | 2 +- fs/nfs/nfs4proc.c | 22 +++++++++++++++++++++- fs/nfs/nfs4xdr.c | 12 ++++++++---- include/linux/nfs_xdr.h | 1 + 4 files changed, 31 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index c6827f93ab57..cc5900ac61b5 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp) extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; -extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_fattr_bitmap[3]; extern const u32 nfs4_statfs_bitmap[2]; extern const u32 nfs4_pathconf_bitmap[2]; extern const u32 nfs4_fsinfo_bitmap[3]; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d48dbefa0e71..ad1515521a6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -116,7 +116,7 @@ static int nfs4_map_errors(int err) /* * This is our standard bitmap for GETATTR requests. */ -const u32 nfs4_fattr_bitmap[2] = { +const u32 nfs4_fattr_bitmap[3] = { FATTR4_WORD0_TYPE | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE @@ -133,6 +133,24 @@ const u32 nfs4_fattr_bitmap[2] = { | FATTR4_WORD1_TIME_MODIFY }; +static const u32 nfs4_pnfs_open_bitmap[3] = { + FATTR4_WORD0_TYPE + | FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEID, + FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY, + FATTR4_WORD2_MDSTHRESHOLD +}; + const u32 nfs4_statfs_bitmap[2] = { FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE @@ -844,6 +862,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; + p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0]; p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; if (attrs != NULL && attrs->ia_valid != 0) { __be32 verf[2]; @@ -1820,6 +1839,7 @@ static int _nfs4_do_open(struct inode *dir, opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); if (!opendata->f_attr.mdsthreshold) goto err_opendata_put; + opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; } if (dentry->d_inode != NULL) opendata->state = nfs4_get_open_state(dentry->d_inode, sp); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ee4a74db95d0..9ca1428da9d9 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c } static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask, + const u32 *open_bitmap, struct compound_hdr *hdr) { encode_getattr_three(xdr, - bitmask[0] & nfs4_fattr_bitmap[0], - bitmask[1] & nfs4_fattr_bitmap[1], - bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD, + bitmask[0] & open_bitmap[0], + bitmask[1] & open_bitmap[1], + bitmask[2] & open_bitmap[2], hdr); } @@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_putfh(xdr, args->fh, &hdr); encode_open(xdr, args, &hdr); encode_getfh(xdr, &hdr); - encode_getfattr_open(xdr, args->bitmask, &hdr); + encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); encode_nops(&hdr); } @@ -4360,6 +4361,9 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr, if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U))) return -EIO; if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) { + /* Did the server return an unrequested attribute? */ + if (unlikely(res == NULL)) + return -EREMOTEIO; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d1a7bf51c326..7519baef025b 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -348,6 +348,7 @@ struct nfs_openargs { const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + const u32 * open_bitmap; __u32 claim; struct nfs4_sequence_args seq_args; }; -- cgit v1.2.3 From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 5 Jun 2012 21:36:33 +0400 Subject: radix-tree: fix contiguous iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes bug in macro radix_tree_for_each_contig(). If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following radix_tree_next_chunk() switches iterating into next chunk. As result iterating becomes non-contiguous and breaks vfs "splice" and all its users. Signed-off-by: Konstantin Khlebnikov Reported-and-bisected-by: Hans de Bruin Reported-and-bisected-by: Ondrej Zary Reported-bisected-and-tested-by: Toralf Förster Link: https://lkml.org/lkml/2012/6/5/64 Cc: stable # 3.4.x Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 5 ++++- lib/radix-tree.c | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0d04cd69ab9b..ffc444c38b0a 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags) iter->index++; if (likely(*slot)) return slot; - if (flags & RADIX_TREE_ITER_CONTIG) + if (flags & RADIX_TREE_ITER_CONTIG) { + /* forbid switching to the next chunk */ + iter->next_index = 0; break; + } } } return NULL; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d7c878cc006c..e7964296fd50 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, * during iterating; it can be zero only at the beginning. * And we cannot overflow iter->next_index in a single step, * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG. + * + * This condition also used by radix_tree_next_slot() to stop + * contiguous iterating, and forbid swithing to the next chunk. */ index = iter->next_index; if (!index && iter->index) -- cgit v1.2.3 From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Jun 2012 18:32:03 -0400 Subject: NFS: Fix a commit bug The new commit code fails to copy the verifier into the wb_verf field of _all_ the nfs_page structures; it only copies it into the first entry. The consequence is that most requests end up failing to match in nfs_commit_release. Fix is to copy the verifier into the req->wb_verf field in nfs_write_completion. Signed-off-by: Trond Myklebust Cc: Fred Isaman --- fs/nfs/direct.c | 4 ++-- fs/nfs/write.c | 7 ++++--- include/linux/nfs_xdr.h | 2 ++ 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 23d170bc44f4..b5385a7efd56 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -710,12 +710,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES) bit = NFS_IOHDR_NEED_RESCHED; else if (dreq->flags == 0) { - memcpy(&dreq->verf, &req->wb_verf, + memcpy(&dreq->verf, hdr->verf, sizeof(dreq->verf)); bit = NFS_IOHDR_NEED_COMMIT; dreq->flags = NFS_ODIRECT_DO_COMMIT; } else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { - if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) { + if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { dreq->flags = NFS_ODIRECT_RESCHED_WRITES; bit = NFS_IOHDR_NEED_RESCHED; } else diff --git a/fs/nfs/write.c b/fs/nfs/write.c index e6fe3d69d14c..4d6861c0dc14 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void) INIT_LIST_HEAD(&hdr->rpc_list); spin_lock_init(&hdr->lock); atomic_set(&hdr->refcnt, 0); + hdr->verf = &p->verf; } return p; } @@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr) goto next; } if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { + memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf)); nfs_mark_request_commit(req, hdr->lseg, &cinfo); goto next; } @@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata) struct nfs_write_data *data = calldata; struct nfs_pgio_header *hdr = data->header; int status = data->task.tk_status; - struct nfs_page *req = hdr->req; if ((status >= 0) && nfs_write_need_commit(data)) { spin_lock(&hdr->lock); if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) ; /* Do nothing */ else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) - memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); - else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) + memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf)); + else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf))) set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); spin_unlock(&hdr->lock); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 7519baef025b..8aadd90b808a 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1237,6 +1237,7 @@ struct nfs_pgio_header { struct list_head rpc_list; atomic_t refcnt; struct nfs_page *req; + struct nfs_writeverf *verf; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; @@ -1274,6 +1275,7 @@ struct nfs_write_data { struct nfs_write_header { struct nfs_pgio_header header; struct nfs_write_data rpc_data; + struct nfs_writeverf verf; }; struct nfs_mds_commit_info { -- cgit v1.2.3 From 2a0fe914a38745f5b03534c4e4f4056cbd6978b8 Mon Sep 17 00:00:00 2001 From: Yong Ding Date: Tue, 15 May 2012 13:09:43 +0800 Subject: mmc: sdio: fix setting card data bus width as 4-bit SDIO_CCCR_IF[1:0] in SDIO card is used for card data bus width setting as below: 00b: 1-bit bus 01b: Reserved 10b: 4-bit bus 11b: 8-bit bus (only for embedded SDIO) And sdio_enable_wide is for setting data bus width as 4-bit. But currently, it first reads the register, second OR' 1b with SDIO_CCCR_IF[1], and then writes it back. As we can see, this is based on such assumption that the SDIO_CCCR_IF[0] is always 0. Apparently, this is not right. Signed-off-by: Yong Ding Acked-by: Philip Rakity Signed-off-by: Chris Ball --- drivers/mmc/core/sdio.c | 6 ++++++ include/linux/mmc/sdio.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index 13d0e95380ab..41c5fd8848f4 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -218,6 +218,12 @@ static int sdio_enable_wide(struct mmc_card *card) if (ret) return ret; + if ((ctrl & SDIO_BUS_WIDTH_MASK) == SDIO_BUS_WIDTH_RESERVED) + pr_warning("%s: SDIO_CCCR_IF is invalid: 0x%02x\n", + mmc_hostname(card->host), ctrl); + + /* set as 4-bit bus width */ + ctrl &= ~SDIO_BUS_WIDTH_MASK; ctrl |= SDIO_BUS_WIDTH_4BIT; ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL); diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index c9fe66c58f8f..17446d3c3602 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -98,7 +98,9 @@ #define SDIO_CCCR_IF 0x07 /* bus interface controls */ +#define SDIO_BUS_WIDTH_MASK 0x03 /* data bus width setting */ #define SDIO_BUS_WIDTH_1BIT 0x00 +#define SDIO_BUS_WIDTH_RESERVED 0x01 #define SDIO_BUS_WIDTH_4BIT 0x02 #define SDIO_BUS_ECSI 0x20 /* Enable continuous SPI interrupt */ #define SDIO_BUS_SCSI 0x40 /* Support continuous SPI interrupt */ -- cgit v1.2.3 From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 31 May 2012 14:47:33 +0200 Subject: sched: Fix domain iteration Weird topologies can lead to asymmetric domain setups. This needs further consideration since these setups are typically non-minimal too. For now, make it work by adding an extra mask selecting which CPUs are allowed to iterate up. The topology that triggered it is the one from David Rientjes: 10 20 20 30 20 10 20 20 20 20 10 20 30 20 20 10 resulting in boxes that wouldn't even boot. Reported-by: David Rientjes Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++++++ kernel/sched/core.c | 64 ++++++++++++++++++++++++++++++++++++++++++++------- kernel/sched/fair.c | 5 ++-- kernel/sched/sched.h | 2 ++ 4 files changed, 72 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..ac321d753470 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -876,6 +876,8 @@ struct sched_group_power { * Number of busy cpus in this group. */ atomic_t nr_busy_cpus; + + unsigned long cpumask[0]; /* iteration mask */ }; struct sched_group { @@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg) return to_cpumask(sg->cpumask); } +/* + * cpumask masking which cpus in the group are allowed to iterate up the domain + * tree. + */ +static inline struct cpumask *sched_group_mask(struct sched_group *sg) +{ + return to_cpumask(sg->sgp->cpumask); +} + /** * group_first_cpu - Returns the first cpu in the cpumask of a sched_group. * @group: The group whose first cpu is to be returned. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6546083af3e0..781acb91a50a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5994,6 +5994,44 @@ struct sched_domain_topology_level { struct sd_data data; }; +/* + * Build an iteration mask that can exclude certain CPUs from the upwards + * domain traversal. + * + * Asymmetric node setups can result in situations where the domain tree is of + * unequal depth, make sure to skip domains that already cover the entire + * range. + * + * In that case build_sched_domains() will have terminated the iteration early + * and our sibling sd spans will be empty. Domains should always include the + * cpu they're built on, so check that. + * + */ +static void build_group_mask(struct sched_domain *sd, struct sched_group *sg) +{ + const struct cpumask *span = sched_domain_span(sd); + struct sd_data *sdd = sd->private; + struct sched_domain *sibling; + int i; + + for_each_cpu(i, span) { + sibling = *per_cpu_ptr(sdd->sd, i); + if (!cpumask_test_cpu(i, sched_domain_span(sibling))) + continue; + + cpumask_set_cpu(i, sched_group_mask(sg)); + } +} + +/* + * Return the canonical balance cpu for this group, this is the first cpu + * of this group that's also in the iteration mask. + */ +int group_balance_cpu(struct sched_group *sg) +{ + return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg)); +} + static int build_overlap_sched_groups(struct sched_domain *sd, int cpu) { @@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) if (cpumask_test_cpu(i, covered)) continue; + child = *per_cpu_ptr(sdd->sd, i); + + /* See the comment near build_group_mask(). */ + if (!cpumask_test_cpu(i, sched_domain_span(child))) + continue; + sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(), GFP_KERNEL, cpu_to_node(cpu)); @@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) goto fail; sg_span = sched_group_cpus(sg); - - child = *per_cpu_ptr(sdd->sd, i); if (child->child) { child = child->child; cpumask_copy(sg_span, sched_domain_span(child)); @@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu) cpumask_or(covered, covered, sg_span); sg->sgp = *per_cpu_ptr(sdd->sgp, i); - atomic_inc(&sg->sgp->ref); + if (atomic_inc_return(&sg->sgp->ref) == 1) + build_group_mask(sd, sg); + + /* + * Make sure the first group of this domain contains the + * canonical balance cpu. Otherwise the sched_domain iteration + * breaks. See update_sg_lb_stats(). + */ if ((!groups && cpumask_test_cpu(cpu, sg_span)) || - cpumask_first(sg_span) == cpu) { - WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span)); + group_balance_cpu(sg) == cpu) groups = sg; - } if (!first) first = sg; @@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu) cpumask_clear(sched_group_cpus(sg)); sg->sgp->power = 0; + cpumask_setall(sched_group_mask(sg)); for_each_cpu(j, span) { if (get_group(j, sdd, NULL) != group) @@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd) sg = sg->next; } while (sg != sd->groups); - if (cpu != group_first_cpu(sg)) + if (cpu != group_balance_cpu(sg)) return; update_group_power(sd, cpu); @@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map) *per_cpu_ptr(sdd->sg, j) = sg; - sgp = kzalloc_node(sizeof(struct sched_group_power), + sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(), GFP_KERNEL, cpu_to_node(j)); if (!sgp) return -ENOMEM; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b2a2d236f27b..54cbaa4e7b37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env, int i; if (local_group) - balance_cpu = group_first_cpu(group); + balance_cpu = group_balance_cpu(group); /* Tally up the load of all CPUs in the group */ max_cpu_load = 0; @@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env, /* Bias balancing toward cpus of our domain */ if (local_group) { - if (idle_cpu(i) && !first_idle_cpu) { + if (idle_cpu(i) && !first_idle_cpu && + cpumask_test_cpu(i, sched_group_mask(group))) { first_idle_cpu = 1; balance_cpu = i; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ba9dccfd24ce..6d52cea7f33d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag) DECLARE_PER_CPU(struct sched_domain *, sd_llc); DECLARE_PER_CPU(int, sd_llc_id); +extern int group_balance_cpu(struct sched_group *sg); + #endif /* CONFIG_SMP */ #include "stats.h" -- cgit v1.2.3 From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001 From: Arun Sharma Date: Fri, 20 Apr 2012 15:41:34 -0700 Subject: perf: Limit callchains to 127 Stack depth of 255 seems excessive, given that copy_from_user_nmi() could be slow. Signed-off-by: Arun Sharma Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 1817d4015e5f..45db49f64bb4 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -555,7 +555,7 @@ enum perf_event_type { PERF_RECORD_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 255 +#define PERF_MAX_STACK_DEPTH 127 enum perf_callchain_context { PERF_CONTEXT_HV = (__u64)-32, -- cgit v1.2.3 From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 May 2012 16:41:44 -0700 Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick() calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the next wakeup time based on the timer wheels. Only later, when actually entering the idle loop, rcu_prepare_for_idle() will be invoked. In some cases, rcu_prepare_for_idle() will post timers to wake the CPU back up. But all for naught: The next wakeup time for the CPU has already been computed, and posting a timer afterwards does not force that wakeup time to be recomputed. This means that rcu_prepare_for_idle()'s have no effect. This is not a problem on a busy system because something else will wake up the CPU soon enough. However, on lightly loaded systems, the CPU might stay asleep for a considerable length of time. If that CPU has a callback that the rest of the system is waiting on, the system might run very slowly or (in theory) even hang. This commit avoids this problem by having rcu_needs_cpu() give tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU to wake back up, which tick_nohz_stop_sched_tick() takes into account when programming the CPU's wakeup time. An alternative approach is for rcu_prepare_for_idle() to use hrtimers instead of normal timers, but timers are much more efficient than are hrtimers for frequently and repeatedly posting and cancelling a given timer, which is exactly what RCU_FAST_NO_HZ does. Reported-by: Pascal Chapperon Reported-by: Heiko Carstens Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Heiko Carstens Tested-by: Pascal Chapperon --- include/linux/rcutiny.h | 6 +++-- include/linux/rcutree.h | 2 +- kernel/rcutree_plugin.h | 66 +++++++++++++++++++++++++++++++----------------- kernel/time/tick-sched.c | 7 ++++- 4 files changed, 54 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index adb5e5a38cae..854dc4c5c271 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return 0; } @@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu) int rcu_preempt_needs_cpu(void); -static inline int rcu_needs_cpu(int cpu) +static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_preempt_needs_cpu(); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 3c6083cde4fc..952b79339304 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -32,7 +32,7 @@ extern void rcu_init(void); extern void rcu_note_context_switch(int cpu); -extern int rcu_needs_cpu(int cpu); +extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies); extern void rcu_cpu_stall_reset(void); /* diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 6bd9637d5d83..5271a020887e 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs * any flavor of RCU. */ -int rcu_needs_cpu(int cpu) +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { + *delta_jiffies = ULONG_MAX; return rcu_cpu_has_callbacks(cpu); } @@ -1962,28 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void) #define RCU_IDLE_GP_DELAY 6 /* Roughly one grace period. */ #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ -/* - * Allow the CPU to enter dyntick-idle mode if either: (1) There are no - * callbacks on this CPU, (2) this CPU has not yet attempted to enter - * dyntick-idle mode, or (3) this CPU is in the process of attempting to - * enter dyntick-idle mode. Otherwise, if we have recently tried and failed - * to enter dyntick-idle mode, we refuse to try to enter it. After all, - * it is better to incur scheduling-clock interrupts than to spin - * continuously for the same time duration! - */ -int rcu_needs_cpu(int cpu) -{ - struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); - - /* Flag a new idle sojourn to the idle-entry state machine. */ - rdtp->idle_first_pass = 1; - /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(cpu)) - return 0; - /* Otherwise, RCU needs the CPU only if it recently tried and failed. */ - return rdtp->dyntick_holdoff == jiffies; -} - /* * Does the specified flavor of RCU have non-lazy callbacks pending on * the specified CPU? Both RCU flavor and CPU are specified by the @@ -2026,6 +2005,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu) rcu_preempt_cpu_has_nonlazy_callbacks(cpu); } +/* + * Allow the CPU to enter dyntick-idle mode if either: (1) There are no + * callbacks on this CPU, (2) this CPU has not yet attempted to enter + * dyntick-idle mode, or (3) this CPU is in the process of attempting to + * enter dyntick-idle mode. Otherwise, if we have recently tried and failed + * to enter dyntick-idle mode, we refuse to try to enter it. After all, + * it is better to incur scheduling-clock interrupts than to spin + * continuously for the same time duration! + * + * The delta_jiffies argument is used to store the time when RCU is + * going to need the CPU again if it still has callbacks. The reason + * for this is that rcu_prepare_for_idle() might need to post a timer, + * but if so, it will do so after tick_nohz_stop_sched_tick() has set + * the wakeup time for this CPU. This means that RCU's timer can be + * delayed until the wakeup time, which defeats the purpose of posting + * a timer. + */ +int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) +{ + struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); + + /* Flag a new idle sojourn to the idle-entry state machine. */ + rdtp->idle_first_pass = 1; + /* If no callbacks, RCU doesn't need the CPU. */ + if (!rcu_cpu_has_callbacks(cpu)) { + *delta_jiffies = ULONG_MAX; + return 0; + } + if (rdtp->dyntick_holdoff == jiffies) { + /* RCU recently tried and failed, so don't try again. */ + *delta_jiffies = 1; + return 1; + } + /* Set up for the possibility that RCU will post a timer. */ + if (rcu_cpu_has_nonlazy_callbacks(cpu)) + *delta_jiffies = RCU_IDLE_GP_DELAY; + else + *delta_jiffies = RCU_IDLE_LAZY_GP_DELAY; + return 0; +} + /* * Handler for smp_call_function_single(). The only point of this * handler is to wake the CPU up, so the handler does only tracing. diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 6a3a5b9ff561..52f5ebbd443b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us); static void tick_nohz_stop_sched_tick(struct tick_sched *ts) { unsigned long seq, last_jiffies, next_jiffies, delta_jiffies; + unsigned long rcu_delta_jiffies; ktime_t last_update, expires, now; struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; @@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) time_delta = timekeeping_max_deferment(); } while (read_seqretry(&xtime_lock, seq)); - if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) || + if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) || arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; @@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); delta_jiffies = next_jiffies - last_jiffies; + if (rcu_delta_jiffies < delta_jiffies) { + next_jiffies = last_jiffies + rcu_delta_jiffies; + delta_jiffies = rcu_delta_jiffies; + } } /* * Do not stop the tick, if we are only one off -- cgit v1.2.3 From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Thu, 17 May 2012 22:35:46 +0000 Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing This patch addresses two issues: a) Fix usage of u32 and __be32 that causes endianess warnings via sparse. b) Ensure consistent hashing in a cluster that is composed of big and little endian systems. Thus, we obtain the same hash mark in an heterogeneous cluster. Reported-by: Dan Carpenter Signed-off-by: Hans Schillstrom Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/xt_HMARK.h | 5 +++ net/netfilter/xt_HMARK.c | 72 ++++++++++++++++++++++---------------- 2 files changed, 46 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h index abb1650940d2..826fc5807577 100644 --- a/include/linux/netfilter/xt_HMARK.h +++ b/include/linux/netfilter/xt_HMARK.h @@ -27,7 +27,12 @@ union hmark_ports { __u16 src; __u16 dst; } p16; + struct { + __be16 src; + __be16 dst; + } b16; __u32 v32; + __be32 b32; }; struct xt_hmark_info { diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 0a96a43108ed..1686ca1b53a1 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK"); MODULE_ALIAS("ip6t_HMARK"); struct hmark_tuple { - u32 src; - u32 dst; + __be32 src; + __be32 dst; union hmark_ports uports; - uint8_t proto; + u8 proto; }; -static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) +static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask) { return (addr32[0] & mask[0]) ^ (addr32[1] & mask[1]) ^ @@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask) (addr32[3] & mask[3]); } -static inline u32 -hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) +static inline __be32 +hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask) { switch (l3num) { case AF_INET: @@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask) return 0; } +static inline void hmark_swap_ports(union hmark_ports *uports, + const struct xt_hmark_info *info) +{ + union hmark_ports hp; + u16 src, dst; + + hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32; + src = ntohs(hp.b16.src); + dst = ntohs(hp.b16.dst); + + if (dst > src) + uports->v32 = (dst << 16) | src; + else + uports->v32 = (src << 16) | dst; +} + static int hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, const struct xt_hmark_info *info) @@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all, - info->src_mask.all); - t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all, - info->dst_mask.all); + t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6, + info->src_mask.ip6); + t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6, + info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; t->proto = nf_ct_protonum(ct); if (t->proto != IPPROTO_ICMP) { - t->uports.p16.src = otuple->src.u.all; - t->uports.p16.dst = rtuple->src.u.all; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + t->uports.b16.src = otuple->src.u.all; + t->uports.b16.dst = rtuple->src.u.all; + hmark_swap_ports(&t->uports, info); } return 0; @@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t, #endif } +/* This hash function is endian independent, to ensure consistent hashing if + * the cluster is composed of big and little endian systems. */ static inline u32 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info) { u32 hash; + u32 src = ntohl(t->src); + u32 dst = ntohl(t->dst); - if (t->dst < t->src) - swap(t->src, t->dst); + if (dst < src) + swap(src, dst); - hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd); + hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd); hash = hash ^ (t->proto & info->proto_mask); return (((u64)hash * info->hmodulus) >> 32) + info->hoffset; @@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff, if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0) return; - t->uports.v32 = (t->uports.v32 & info->port_mask.v32) | - info->port_set.v32; - - if (t->uports.p16.dst < t->uports.p16.src) - swap(t->uports.p16.dst, t->uports.p16.src); + hmark_swap_ports(&t->uports, info); } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) @@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t, return -1; } noicmp: - t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all); - t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all); + t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6); + t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6); if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; @@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t, } } - t->src = (__force u32) ip->saddr; - t->dst = (__force u32) ip->daddr; - - t->src &= info->src_mask.ip; - t->dst &= info->dst_mask.ip; + t->src = ip->saddr & info->src_mask.ip; + t->dst = ip->daddr & info->dst_mask.ip; if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3)) return 0; -- cgit v1.2.3 From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 7 Jun 2012 14:21:11 -0700 Subject: c/r: prctl: update prctl_set_mm_exe_file() after mm->num_exe_file_vmas removal A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new mm_struct::exe_file"). After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until final mmput(), it never becomes NULL while task is alive. We can check for other mapped files in mm instead of checking mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in order to forbid second changing of mm->exe_file. Signed-off-by: Konstantin Khlebnikov Reviewed-by: Cyrill Gorcunov Cc: Oleg Nesterov Cc: Matt Helsley Cc: Kees Cook Cc: KOSAKI Motohiro Cc: Tejun Heo Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + kernel/sys.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 6029d8c54476..c688d4cc2e40 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm); /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ +#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) diff --git a/kernel/sys.c b/kernel/sys.c index 9ff89cb9657a..54f20fdee93c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1796,17 +1796,11 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma, static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { + struct vm_area_struct *vma; struct file *exe_file; struct dentry *dentry; int err; - /* - * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's - * remain. So perform a quick test first. - */ - if (mm->num_exe_file_vmas) - return -EBUSY; - exe_file = fget(fd); if (!exe_file) return -EBADF; @@ -1827,17 +1821,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) if (err) goto exit; + down_write(&mm->mmap_sem); + + /* + * Forbid mm->exe_file change if there are mapped other files. + */ + err = -EBUSY; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (vma->vm_file && !path_equal(&vma->vm_file->f_path, + &exe_file->f_path)) + goto exit_unlock; + } + /* * The symlink can be changed only once, just to disallow arbitrary * transitions malicious software might bring in. This means one * could make a snapshot over all processes running and monitor * /proc/pid/exe changes to notice unusual activity if needed. */ - down_write(&mm->mmap_sem); - if (likely(!mm->exe_file)) - set_mm_exe_file(mm, exe_file); - else - err = -EBUSY; + err = -EPERM; + if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags)) + goto exit_unlock; + + set_mm_exe_file(mm, exe_file); +exit_unlock: up_write(&mm->mmap_sem); exit: -- cgit v1.2.3 From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 7 Jun 2012 14:21:12 -0700 Subject: c/r: prctl: add ability to get clear_tid_address Zero is written at clear_tid_address when the process exits. This functionality is used by pthread_join(). We already have sys_set_tid_address() to change this address for the current task but there is no way to obtain it from user space. Without the ability to find this address and dump it we can't restore pthread'ed apps which call pthread_join() once they have been restored. This patch introduces the PR_GET_TID_ADDRESS prctl option which allows the current process to obtain own clear_tid_address. This feature is available iif CONFIG_CHECKPOINT_RESTORE is set. [akpm@linux-foundation.org: fix prctl numbering] Signed-off-by: Andrew Vagin Signed-off-by: Cyrill Gorcunov Cc: Pedro Alves Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Tejun Heo Acked-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/prctl.h | 10 ++++++---- kernel/sys.c | 13 +++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 711e0a30aacc..3988012255dc 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -127,8 +127,8 @@ #define PR_SET_PTRACER 0x59616d61 # define PR_SET_PTRACER_ANY ((unsigned long)-1) -#define PR_SET_CHILD_SUBREAPER 36 -#define PR_GET_CHILD_SUBREAPER 37 +#define PR_SET_CHILD_SUBREAPER 36 +#define PR_GET_CHILD_SUBREAPER 37 /* * If no_new_privs is set, then operations that grant new privileges (i.e. @@ -142,7 +142,9 @@ * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. */ -#define PR_SET_NO_NEW_PRIVS 38 -#define PR_GET_NO_NEW_PRIVS 39 +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 + +#define PR_GET_TID_ADDRESS 40 #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 19a2c7139960..0ec1942ba7ea 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1988,12 +1988,22 @@ out: up_read(&mm->mmap_sem); return error; } + +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return put_user(me->clear_child_tid, tid_addr); +} + #else /* CONFIG_CHECKPOINT_RESTORE */ static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { return -EINVAL; } +static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr) +{ + return -EINVAL; +} #endif SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, @@ -2131,6 +2141,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, else return -EINVAL; break; + case PR_GET_TID_ADDRESS: + error = prctl_get_tid_address(me, (int __user **)arg2); + break; default: return -EINVAL; } -- cgit v1.2.3 From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 8 Jun 2012 14:58:13 +0930 Subject: module_param: stop double-calling parameters. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 026cee0086fe1df4cf74691cf273062cc769617d "params: _initcall-like kernel parameters" set old-style module parameters to level 0. And we call those level 0 calls where we used to, early in start_kernel(). We also loop through the initcall levels and call the levelled module_params before the corresponding initcall. Unfortunately level 0 is early_init(), so we call the standard module_param calls twice. (Turns out most things don't care, but at least ubi.mtd does). Change the level to -1 for standard module_param calls. Reported-by: Benoît Thébaudeau Signed-off-by: Rusty Russell Cc: stable@kernel.org --- include/linux/moduleparam.h | 10 +++++----- init/main.c | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1b14d25162cb..d6a58065c09c 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -128,7 +128,7 @@ struct kparam_array * The ops can have NULL set or get functions. */ #define module_param_cb(name, ops, arg, perm) \ - __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0) + __module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1) /** * _param_cb - general callback for a module/cmdline parameter @@ -192,7 +192,7 @@ struct kparam_array { (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, 0) + (perm) + sizeof(__check_old_set_param(set))*0, -1) /* We don't get oldget: it's often a new-style param_get_uint, etc. */ static inline int @@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void) */ #define core_param(name, var, type, perm) \ param_check_##type(name, &(var)); \ - __module_param_call("", name, ¶m_ops_##type, &var, perm, 0) + __module_param_call("", name, ¶m_ops_##type, &var, perm, -1) #endif /* !MODULE */ /** @@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void) = { len, string }; \ __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_ops_string, \ - .str = &__param_string_##name, perm, 0); \ + .str = &__param_string_##name, perm, -1); \ __MODULE_PARM_TYPE(name, "string") /** @@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); __module_param_call(MODULE_PARAM_PREFIX, name, \ ¶m_array_ops, \ .arr = &__param_arr_##name, \ - perm, 0); \ + perm, -1); \ __MODULE_PARM_TYPE(name, "array of " #type) extern struct kernel_param_ops param_array_ops; diff --git a/init/main.c b/init/main.c index 1ca6b32c4828..37e12098eac1 100644 --- a/init/main.c +++ b/init/main.c @@ -508,7 +508,7 @@ asmlinkage void __init start_kernel(void) parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, - 0, 0, &unknown_bootoption); + -1, -1, &unknown_bootoption); jump_label_init(); -- cgit v1.2.3 From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 7 Jun 2012 12:15:15 +0200 Subject: vga_switcheroo: Add a helper function to get the client state Add vga_switcheroo_get_client_state() to get the current state of the client. This is necessary to determine the proper initial state of audio clients in HD-audio driver. Acked-by: Dave Airlie Signed-off-by: Takashi Iwai --- drivers/gpu/vga/vga_switcheroo.c | 13 +++++++++++++ include/linux/vga_switcheroo.h | 7 +++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c index 38f9534ac513..eb4f64f0a565 100644 --- a/drivers/gpu/vga/vga_switcheroo.c +++ b/drivers/gpu/vga/vga_switcheroo.c @@ -190,6 +190,19 @@ find_active_client(struct list_head *head) return NULL; } +int vga_switcheroo_get_client_state(struct pci_dev *pdev) +{ + struct vga_switcheroo_client *client; + + client = find_client_from_pci(&vgasr_priv.clients, pdev); + if (!client) + return VGA_SWITCHEROO_NOT_FOUND; + if (!vgasr_priv.active) + return VGA_SWITCHEROO_INIT; + return client->pwr_state; +} +EXPORT_SYMBOL(vga_switcheroo_get_client_state); + void vga_switcheroo_unregister_client(struct pci_dev *pdev) { struct vga_switcheroo_client *client; diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b455c7c212eb..b176342ca031 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -12,6 +12,9 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, + /* below are referred only from vga_switcheroo_get_client_state() */ + VGA_SWITCHEROO_INIT, + VGA_SWITCHEROO_NOT_FOUND, }; enum vga_switcheroo_client_id { @@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); +int vga_switcheroo_get_client_state(struct pci_dev *dev); + #else static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} @@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } + #endif -- cgit v1.2.3 From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 8 Jun 2012 12:49:17 +0200 Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n. Signed-off-by: Takashi Iwai --- include/linux/vga_switcheroo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index b176342ca031..60da41fe9dc2 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id, bool active) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; } +static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } #endif -- cgit v1.2.3 From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001 From: Paul Pluzhnikov Date: Sat, 9 Jun 2012 07:53:03 -0700 Subject: net: Make linux/tcp.h C++ friendly (trivial) I originally sent this patch to , but Jiri Kosina did not feel that this is fully appropriate for the trivial tree. Using linux/tcp.h from C++ results in: cat t.cc #include int main() { } g++ -c t.cc In file included from t.cc:1: /usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression /usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression ... Attached trivial patch fixes this problem. Tested: - the t.cc above compiles with g++ and - the following program generates the same output before/after the patch: #include #include int main () { #define P(a) printf("%s: %08x\n", #a, (int)a) P(TCP_FLAG_CWR); P(TCP_FLAG_ECE); P(TCP_FLAG_URG); P(TCP_FLAG_ACK); P(TCP_FLAG_PSH); P(TCP_FLAG_RST); P(TCP_FLAG_SYN); P(TCP_FLAG_FIN); P(TCP_RESERVED_BITS); P(TCP_DATA_OFFSET); #undef P return 0; } Signed-off-by: Paul Pluzhnikov Signed-off-by: David S. Miller --- include/linux/tcp.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4c5b63283377..5f359dbfcdce 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -69,16 +69,16 @@ union tcp_word_hdr { #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) enum { - TCP_FLAG_CWR = __cpu_to_be32(0x00800000), - TCP_FLAG_ECE = __cpu_to_be32(0x00400000), - TCP_FLAG_URG = __cpu_to_be32(0x00200000), - TCP_FLAG_ACK = __cpu_to_be32(0x00100000), - TCP_FLAG_PSH = __cpu_to_be32(0x00080000), - TCP_FLAG_RST = __cpu_to_be32(0x00040000), - TCP_FLAG_SYN = __cpu_to_be32(0x00020000), - TCP_FLAG_FIN = __cpu_to_be32(0x00010000), - TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000), - TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000) + TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000), + TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000), + TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000), + TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000), + TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000), + TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000), + TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000), + TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000), + TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000), + TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000) }; /* -- cgit v1.2.3 From 601722157b3f6be73623644eeae6f14940f0bd8f Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:03 +0800 Subject: ARM: MMP: add pxa910-ssp into ssp_id_table add pxa910-ssp into ssp_id_table, and fix pxa-ssp compiling issue under mach-mmp architect. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- arch/arm/plat-pxa/ssp.c | 1 + include/linux/pxa2xx_ssp.h | 1 + include/linux/spi/pxa2xx_spi.h | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c index 58b79809d20c..584c9bf8ed2d 100644 --- a/arch/arm/plat-pxa/ssp.c +++ b/arch/arm/plat-pxa/ssp.c @@ -193,6 +193,7 @@ static const struct platform_device_id ssp_id_table[] = { { "pxa25x-nssp", PXA25x_NSSP }, { "pxa27x-ssp", PXA27x_SSP }, { "pxa168-ssp", PXA168_SSP }, + { "pxa910-ssp", PXA910_SSP }, { }, }; diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 44835fb39793..f9fe15ec2b51 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -161,6 +161,7 @@ enum pxa_ssp_type { PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, PXA168_SSP, + PXA910_SSP, CE4100_SSP, }; diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index d3e1075f7b60..c73d1445c77e 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -43,7 +43,7 @@ struct pxa2xx_spi_chip { void (*cs_control)(u32 command); }; -#ifdef CONFIG_ARCH_PXA +#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP) #include #include -- cgit v1.2.3 From 972a55b62d592cfcd6d73577df8a52f1251ea9a7 Mon Sep 17 00:00:00 2001 From: Qiao Zhou Date: Mon, 4 Jun 2012 10:41:04 +0800 Subject: ASoC: fix pxa-ssp compiling issue under mach-mmp pxa-ssp.c uses API like cpu_is_pxa3xx(), cpu_is_pxa2xx(), which is defined under arch-pxa architecture, and drivers under mach-mmp can't find it. so just use ssp->type to replace that API. Signed-off-by: Qiao Zhou Acked-by: Haojian Zhuang Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + sound/soc/pxa/pxa-ssp.c | 38 +++++++++++--------------------------- 2 files changed, 12 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index f9fe15ec2b51..f36632061c66 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -160,6 +160,7 @@ enum pxa_ssp_type { PXA25x_SSP, /* pxa 210, 250, 255, 26x */ PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */ PXA27x_SSP, + PXA3xx_SSP, PXA168_SSP, PXA910_SSP, CE4100_SSP, diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c index 1c2aa7fab3fd..4da5fc55c7ee 100644 --- a/sound/soc/pxa/pxa-ssp.c +++ b/sound/soc/pxa/pxa-ssp.c @@ -33,7 +33,6 @@ #include #include -#include #include "../../arm/pxa2xx-pcm.h" #include "pxa-ssp.h" @@ -194,7 +193,7 @@ static void pxa_ssp_set_scr(struct ssp_device *ssp, u32 div) { u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0); - if (cpu_is_pxa25x() && ssp->type == PXA25x_SSP) { + if (ssp->type == PXA25x_SSP) { sscr0 &= ~0x0000ff00; sscr0 |= ((div - 2)/2) << 8; /* 2..512 */ } else { @@ -212,7 +211,7 @@ static u32 pxa_ssp_get_scr(struct ssp_device *ssp) u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0); u32 div; - if (cpu_is_pxa25x() && ssp->type == PXA25x_SSP) + if (ssp->type == PXA25x_SSP) div = ((sscr0 >> 8) & 0xff) * 2 + 2; else div = ((sscr0 >> 8) & 0xfff) + 1; @@ -242,7 +241,7 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai, break; case PXA_SSP_CLK_PLL: /* Internal PLL is fixed */ - if (cpu_is_pxa25x()) + if (ssp->type == PXA25x_SSP) priv->sysclk = 1843200; else priv->sysclk = 13000000; @@ -266,11 +265,11 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai, /* The SSP clock must be disabled when changing SSP clock mode * on PXA2xx. On PXA3xx it must be enabled when doing so. */ - if (!cpu_is_pxa3xx()) + if (ssp->type != PXA3xx_SSP) clk_disable(ssp->clk); val = pxa_ssp_read_reg(ssp, SSCR0) | sscr0; pxa_ssp_write_reg(ssp, SSCR0, val); - if (!cpu_is_pxa3xx()) + if (ssp->type != PXA3xx_SSP) clk_enable(ssp->clk); return 0; @@ -294,24 +293,20 @@ static int pxa_ssp_set_dai_clkdiv(struct snd_soc_dai *cpu_dai, case PXA_SSP_AUDIO_DIV_SCDB: val = pxa_ssp_read_reg(ssp, SSACD); val &= ~SSACD_SCDB; -#if defined(CONFIG_PXA3xx) - if (cpu_is_pxa3xx()) + if (ssp->type == PXA3xx_SSP) val &= ~SSACD_SCDX8; -#endif switch (div) { case PXA_SSP_CLK_SCDB_1: val |= SSACD_SCDB; break; case PXA_SSP_CLK_SCDB_4: break; -#if defined(CONFIG_PXA3xx) case PXA_SSP_CLK_SCDB_8: - if (cpu_is_pxa3xx()) + if (ssp->type == PXA3xx_SSP) val |= SSACD_SCDX8; else return -EINVAL; break; -#endif default: return -EINVAL; } @@ -337,10 +332,8 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id, struct ssp_device *ssp = priv->ssp; u32 ssacd = pxa_ssp_read_reg(ssp, SSACD) & ~0x70; -#if defined(CONFIG_PXA3xx) - if (cpu_is_pxa3xx()) + if (ssp->type == PXA3xx_SSP) pxa_ssp_write_reg(ssp, SSACDD, 0); -#endif switch (freq_out) { case 5622000: @@ -365,11 +358,10 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id, break; default: -#ifdef CONFIG_PXA3xx /* PXA3xx has a clock ditherer which can be used to generate * a wider range of frequencies - calculate a value for it. */ - if (cpu_is_pxa3xx()) { + if (ssp->type == PXA3xx_SSP) { u32 val; u64 tmp = 19968; tmp *= 1000000; @@ -386,7 +378,6 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id, val, freq_out); break; } -#endif return -EINVAL; } @@ -590,10 +581,8 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, /* bit size */ switch (params_format(params)) { case SNDRV_PCM_FORMAT_S16_LE: -#ifdef CONFIG_PXA3xx - if (cpu_is_pxa3xx()) + if (ssp->type == PXA3xx_SSP) sscr0 |= SSCR0_FPCKE; -#endif sscr0 |= SSCR0_DataSize(16); break; case SNDRV_PCM_FORMAT_S24_LE: @@ -618,9 +607,7 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, * trying and failing a lot; some of the registers * needed for that mode are only available on PXA3xx. */ - -#ifdef CONFIG_PXA3xx - if (!cpu_is_pxa3xx()) + if (ssp->type != PXA3xx_SSP) return -EINVAL; sspsp |= SSPSP_SFRMWDTH(width * 2); @@ -628,9 +615,6 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream, sspsp |= SSPSP_EDMYSTOP(3); sspsp |= SSPSP_DMYSTOP(3); sspsp |= SSPSP_DMYSTRT(1); -#else - return -EINVAL; -#endif } else { /* The frame width is the width the LRCLK is * asserted for; the delay is expressed in -- cgit v1.2.3 From 1761a110a9c36ad9ba26b104516677ad8cb38a08 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Thu, 7 Jun 2012 11:20:31 -0300 Subject: [media] Fix regression in ioctl numbering Yuck. The VIDIOC_(TRY_)DECODER_CMD ioctls already had ioctl numbers 96 and 97, and after merging the timings API I forgot to continue numbering from 98. So now we have two ioctls with number 96 and two with 97. With the new table-driver ioctl handling in v4l2-ioctl.c it is essential that each ioctl has its own unique number, so let's fix this quickly for 3.5. Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/linux/videodev2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 370d11106c11..2039c5d3292e 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -2640,9 +2640,9 @@ struct v4l2_create_buffers { /* Experimental, these three ioctls may change over the next couple of kernel versions. */ -#define VIDIOC_ENUM_DV_TIMINGS _IOWR('V', 96, struct v4l2_enum_dv_timings) -#define VIDIOC_QUERY_DV_TIMINGS _IOR('V', 97, struct v4l2_dv_timings) -#define VIDIOC_DV_TIMINGS_CAP _IOWR('V', 98, struct v4l2_dv_timings_cap) +#define VIDIOC_ENUM_DV_TIMINGS _IOWR('V', 98, struct v4l2_enum_dv_timings) +#define VIDIOC_QUERY_DV_TIMINGS _IOR('V', 99, struct v4l2_dv_timings) +#define VIDIOC_DV_TIMINGS_CAP _IOWR('V', 100, struct v4l2_dv_timings_cap) /* Reminder: when adding new ioctls please add support for them to drivers/media/video/v4l2-compat-ioctl32.c as well! */ -- cgit v1.2.3 From 2177905ca7419c49910d47e38e44790affd918cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 11 Jun 2012 23:56:09 -0700 Subject: Input: fix input.h kernel-doc warning Fix kernel-doc warning in input.h: Warning(include/linux/input.h:140): No description found for parameter 'len' Signed-off-by: Randy Dunlap Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index a81671453575..2740d080ec6b 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -116,6 +116,7 @@ struct input_keymap_entry { /** * EVIOCGMTSLOTS(len) - get MT slot values + * @len: size of the data buffer in bytes * * The ioctl buffer argument should be binary equivalent to * -- cgit v1.2.3 From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 13 Jun 2012 11:20:19 -0400 Subject: USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch (as1558) fixes a problem affecting several ASUS computers: The machine crashes or corrupts memory when going into suspend if the ehci-hcd driver is bound to any controllers. Users have been forced to unbind or unload ehci-hcd before putting their systems to sleep. After extensive testing, it was determined that the machines don't like going into suspend when any EHCI controllers are in the PCI D3 power state. Presumably this is a firmware bug, but there's nothing we can do about it except to avoid putting the controllers in D3 during system sleep. The patch adds a new flag to indicate whether the problem is present, and avoids changing the controller's power state if the flag is set. Runtime suspend is unaffected; this matters only for system suspend. However as a side effect, the controller will not respond to remote wakeup requests while the system is asleep. Hence USB wakeup is not functional -- but of course, this is already true in the current state of affairs. A similar patch has already been applied as commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers). The patch supersedes that one and reverts it. There are two differences: The old patch added the flag at the USB level; this patch adds it at the PCI level. The old patch applied to all chipsets with the same vendor, subsystem vendor, and product IDs; this patch makes an exception for a known-good system (based on DMI information). Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Andrey Rahmatullin Tested-by: Steven Rostedt Cc: stable Reviewed-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 5 +++++ drivers/pci/quirks.c | 26 ++++++++++++++++++++++++++ drivers/usb/core/hcd-pci.c | 9 --------- drivers/usb/host/ehci-pci.c | 8 -------- include/linux/pci.h | 2 ++ include/linux/usb/hcd.h | 2 -- 6 files changed, 33 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 447e83472c01..77cb54a65cde 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1744,6 +1744,11 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; + /* Some devices mustn't be in D3 during system sleep */ + if (target_state == PCI_D3hot && + (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)) + return 0; + pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 2a7521677541..194b243a2817 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2929,6 +2929,32 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); +/* + * The Intel 6 Series/C200 Series chipset's EHCI controllers on many + * ASUS motherboards will cause memory corruption or a system crash + * if they are in D3 while the system is put into S3 sleep. + */ +static void __devinit asus_ehci_no_d3(struct pci_dev *dev) +{ + const char *sys_info; + static const char good_Asus_board[] = "P8Z68-V"; + + if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP) + return; + if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK) + return; + sys_info = dmi_get_system_info(DMI_BOARD_NAME); + if (sys_info && memcmp(sys_info, good_Asus_board, + sizeof(good_Asus_board) - 1) == 0) + return; + + dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n"); + dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP; + device_set_wakeup_capable(&dev->dev, false); +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3); + static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index 57ed9e400c06..622b4a48e732 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -493,15 +493,6 @@ static int hcd_pci_suspend_noirq(struct device *dev) pci_save_state(pci_dev); - /* - * Some systems crash if an EHCI controller is in D3 during - * a sleep transition. We have to leave such controllers in D0. - */ - if (hcd->broken_pci_sleep) { - dev_dbg(dev, "Staying in PCI D0\n"); - return retval; - } - /* If the root hub is dead rather than suspended, disallow remote * wakeup. usb_hc_died() should ensure that both hosts are marked as * dying, so we only need to check the primary roothub. diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c index bc94d7bf072d..123481793a47 100644 --- a/drivers/usb/host/ehci-pci.c +++ b/drivers/usb/host/ehci-pci.c @@ -144,14 +144,6 @@ static int ehci_pci_setup(struct usb_hcd *hcd) hcd->has_tt = 1; tdi_reset(ehci); } - if (pdev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK) { - /* EHCI #1 or #2 on 6 Series/C200 Series chipset */ - if (pdev->device == 0x1c26 || pdev->device == 0x1c2d) { - ehci_info(ehci, "broken D3 during system sleep on ASUS\n"); - hcd->broken_pci_sleep = 1; - device_set_wakeup_capable(&pdev->dev, false); - } - } break; case PCI_VENDOR_ID_TDI: if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) { diff --git a/include/linux/pci.h b/include/linux/pci.h index d8c379dba6ad..fefb4e19bf6a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,6 +176,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, + /* Device causes system crash if in D3 during S3 sleep */ + PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 7f855d50cdf5..49b3ac29726a 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -126,8 +126,6 @@ struct usb_hcd { unsigned wireless:1; /* Wireless USB HCD */ unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ - unsigned broken_pci_sleep:1; /* Don't put the - controller in PCI-D3 for system sleep */ unsigned int irq; /* irq allocated */ void __iomem *regs; /* device memory/io */ -- cgit v1.2.3 From 201e4aca5aa179e6c69a4dcd36a3562e56b8d670 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:49 -0700 Subject: pstore/ram: Should update old dmesg buffer before reading Without the update, we'll only see the new dmesg buffer after the reboot, but previously we could see it right away. Making an oops visible in pstore filesystem before reboot is a somewhat dubious feature, but removing it wasn't an intentional change, so let's restore it. For this we have to make persistent_ram_save_old() safe for calling multiple times, and also extern it. Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram.c | 2 ++ fs/pstore/ram_core.c | 15 ++++++++------- include/linux/pstore_ram.h | 1 + 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 9123cce28c1e..16ff7332eae0 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -106,6 +106,8 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_sec = 0; time->tv_nsec = 0; + /* Update old/shadowed buffer. */ + persistent_ram_save_old(prz); size = persistent_ram_old_size(prz); *buf = kmalloc(size, GFP_KERNEL); if (*buf == NULL) diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index 31f8d184f3a0..235513c46aaf 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -250,23 +250,24 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz, persistent_ram_update_ecc(prz, start, count); } -static void __init -persistent_ram_save_old(struct persistent_ram_zone *prz) +void persistent_ram_save_old(struct persistent_ram_zone *prz) { struct persistent_ram_buffer *buffer = prz->buffer; size_t size = buffer_size(prz); size_t start = buffer_start(prz); - char *dest; - persistent_ram_ecc_old(prz); + if (!size) + return; - dest = kmalloc(size, GFP_KERNEL); - if (dest == NULL) { + if (!prz->old_log) { + persistent_ram_ecc_old(prz); + prz->old_log = kmalloc(size, GFP_KERNEL); + } + if (!prz->old_log) { pr_err("persistent_ram: failed to allocate buffer\n"); return; } - prz->old_log = dest; prz->old_log_size = size; memcpy(prz->old_log, &buffer->data[start], size - start); memcpy(prz->old_log + size - start, &buffer->data[0], start); diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 7ed7fd4dba49..4491e8ff36e6 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -75,6 +75,7 @@ struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, int persistent_ram_write(struct persistent_ram_zone *prz, const void *s, unsigned int count); +void persistent_ram_save_old(struct persistent_ram_zone *prz); size_t persistent_ram_old_size(struct persistent_ram_zone *prz); void *persistent_ram_old(struct persistent_ram_zone *prz); void persistent_ram_free_old(struct persistent_ram_zone *prz); -- cgit v1.2.3 From fce397930475f7efc712a1345dc0dad269a10544 Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Sat, 26 May 2012 06:07:51 -0700 Subject: pstore/ram_core: Factor persistent_ram_zap() out of post_init() A handy function that we will use outside of ram_core soon. But so far just factor it out and start using it in post_init(). Signed-off-by: Anton Vorontsov Acked-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- fs/pstore/ram_core.c | 11 ++++++++--- include/linux/pstore_ram.h | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index f6650d12c0c1..c5fbdbbf81ac 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c @@ -320,6 +320,13 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz) prz->old_log_size = 0; } +void persistent_ram_zap(struct persistent_ram_zone *prz) +{ + atomic_set(&prz->buffer->start, 0); + atomic_set(&prz->buffer->size, 0); + persistent_ram_update_header_ecc(prz); +} + static void *persistent_ram_vmap(phys_addr_t start, size_t size) { struct page **pages; @@ -414,8 +421,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool } prz->buffer->sig = PERSISTENT_RAM_SIG; - atomic_set(&prz->buffer->start, 0); - atomic_set(&prz->buffer->size, 0); + persistent_ram_zap(prz); return 0; } @@ -450,7 +456,6 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, goto err; persistent_ram_post_init(prz, ecc); - persistent_ram_update_header_ecc(prz); return prz; err: diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h index 4491e8ff36e6..3b823d49a85a 100644 --- a/include/linux/pstore_ram.h +++ b/include/linux/pstore_ram.h @@ -69,6 +69,7 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start, size_t size, bool ecc); void persistent_ram_free(struct persistent_ram_zone *prz); +void persistent_ram_zap(struct persistent_ram_zone *prz); struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev, bool ecc); -- cgit v1.2.3 From e2ae715d66bf4becfb85eb84b7150e23cf27df30 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Fri, 15 Jun 2012 14:07:51 +0200 Subject: kmsg - kmsg_dump() use iterator to receive log buffer content Provide an iterator to receive the log buffer content, and convert all kmsg_dump() users to it. The structured data in the kmsg buffer now contains binary data, which should no longer be copied verbatim to the kmsg_dump() users. The iterator should provide reliable access to the buffer data, and also supports proper log line-aware chunking of data while iterating. Signed-off-by: Kay Sievers Tested-by: Tony Luck Reported-by: Anton Vorontsov Tested-by: Anton Vorontsov Signed-off-by: Greg Kroah-Hartman --- arch/powerpc/platforms/pseries/nvram.c | 61 +------- arch/x86/platform/mrst/early_printk_mrst.c | 13 +- drivers/mtd/mtdoops.c | 22 +-- fs/pstore/platform.c | 34 ++--- include/linux/kmsg_dump.h | 45 +++++- kernel/printk.c | 220 +++++++++++++++++++++++++---- 6 files changed, 258 insertions(+), 137 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 36f957f31842..8733a86ad52e 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -68,9 +68,7 @@ static const char *pseries_nvram_os_partitions[] = { }; static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, - const char *old_msgs, unsigned long old_len, - const char *new_msgs, unsigned long new_len); + enum kmsg_dump_reason reason); static struct kmsg_dumper nvram_kmsg_dumper = { .dump = oops_to_nvram @@ -503,28 +501,6 @@ int __init pSeries_nvram_init(void) return 0; } -/* - * Try to capture the last capture_len bytes of the printk buffer. Return - * the amount actually captured. - */ -static size_t capture_last_msgs(const char *old_msgs, size_t old_len, - const char *new_msgs, size_t new_len, - char *captured, size_t capture_len) -{ - if (new_len >= capture_len) { - memcpy(captured, new_msgs + (new_len - capture_len), - capture_len); - return capture_len; - } else { - /* Grab the end of old_msgs. */ - size_t old_tail_len = min(old_len, capture_len - new_len); - memcpy(captured, old_msgs + (old_len - old_tail_len), - old_tail_len); - memcpy(captured + old_tail_len, new_msgs, new_len); - return old_tail_len + new_len; - } -} - /* * Are we using the ibm,rtas-log for oops/panic reports? And if so, * would logging this oops/panic overwrite an RTAS event that rtas_errd @@ -541,27 +517,6 @@ static int clobbering_unread_rtas_event(void) NVRAM_RTAS_READ_TIMEOUT); } -/* Squeeze out each line's severity prefix. */ -static size_t elide_severities(char *buf, size_t len) -{ - char *in, *out, *buf_end = buf + len; - /* Assume a at the very beginning marks the start of a line. */ - int newline = 1; - - in = out = buf; - while (in < buf_end) { - if (newline && in+3 <= buf_end && - *in == '<' && isdigit(in[1]) && in[2] == '>') { - in += 3; - newline = 0; - } else { - newline = (*in == '\n'); - *out++ = *in++; - } - } - return out - buf; -} - /* Derived from logfs_compress() */ static int nvram_compress(const void *in, void *out, size_t inlen, size_t outlen) @@ -619,9 +574,7 @@ static int zip_oops(size_t text_len) * partition. If that's too much, go back and capture uncompressed text. */ static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, - const char *old_msgs, unsigned long old_len, - const char *new_msgs, unsigned long new_len) + enum kmsg_dump_reason reason) { static unsigned int oops_count = 0; static bool panicking = false; @@ -660,14 +613,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, return; if (big_oops_buf) { - text_len = capture_last_msgs(old_msgs, old_len, - new_msgs, new_len, big_oops_buf, big_oops_buf_sz); - text_len = elide_severities(big_oops_buf, text_len); + kmsg_dump_get_buffer(dumper, false, + big_oops_buf, big_oops_buf_sz, &text_len); rc = zip_oops(text_len); } if (rc != 0) { - text_len = capture_last_msgs(old_msgs, old_len, - new_msgs, new_len, oops_data, oops_data_sz); + kmsg_dump_rewind(dumper); + kmsg_dump_get_buffer(dumper, true, + oops_data, oops_data_sz, &text_len); err_type = ERR_TYPE_KERNEL_PANIC; *oops_len = (u16) text_len; } diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c index 3c6e328483c7..028454f0c3a5 100644 --- a/arch/x86/platform/mrst/early_printk_mrst.c +++ b/arch/x86/platform/mrst/early_printk_mrst.c @@ -110,19 +110,16 @@ static struct kmsg_dumper dw_dumper; static int dumper_registered; static void dw_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2) + enum kmsg_dump_reason reason) { - int i; + static char line[1024]; + size_t len; /* When run to this, we'd better re-init the HW */ mrst_early_console_init(); - for (i = 0; i < l1; i++) - early_mrst_console.write(&early_mrst_console, s1 + i, 1); - for (i = 0; i < l2; i++) - early_mrst_console.write(&early_mrst_console, s2 + i, 1); + while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len)) + early_mrst_console.write(&early_mrst_console, line, len); } /* Set the ratio rate to 115200, 8n1, IRQ disabled */ diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index ae36d7e1e913..551e316e4454 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -304,32 +304,17 @@ static void find_next_position(struct mtdoops_context *cxt) } static void mtdoops_do_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, const char *s1, unsigned long l1, - const char *s2, unsigned long l2) + enum kmsg_dump_reason reason) { struct mtdoops_context *cxt = container_of(dumper, struct mtdoops_context, dump); - unsigned long s1_start, s2_start; - unsigned long l1_cpy, l2_cpy; - char *dst; - - if (reason != KMSG_DUMP_OOPS && - reason != KMSG_DUMP_PANIC) - return; /* Only dump oopses if dump_oops is set */ if (reason == KMSG_DUMP_OOPS && !dump_oops) return; - dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */ - l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE); - l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy); - - s2_start = l2 - l2_cpy; - s1_start = l1 - l1_cpy; - - memcpy(dst, s1 + s1_start, l1_cpy); - memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); + kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE, + record_size - MTDOOPS_HEADER_SIZE, NULL); /* Panics must be written immediately */ if (reason != KMSG_DUMP_OOPS) @@ -375,6 +360,7 @@ static void mtdoops_notify_add(struct mtd_info *mtd) return; } + cxt->dump.max_reason = KMSG_DUMP_OOPS; cxt->dump.dump = mtdoops_do_dump; err = kmsg_dump_register(&cxt->dump); if (err) { diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 82c585f715e3..03ce7a9b81cc 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -94,20 +94,15 @@ static const char *get_reason_str(enum kmsg_dump_reason reason) * as we can from the end of the buffer. */ static void pstore_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2) + enum kmsg_dump_reason reason) { - unsigned long s1_start, s2_start; - unsigned long l1_cpy, l2_cpy; - unsigned long size, total = 0; - char *dst; + unsigned long total = 0; const char *why; u64 id; - int hsize, ret; unsigned int part = 1; unsigned long flags = 0; int is_locked = 0; + int ret; why = get_reason_str(reason); @@ -119,30 +114,25 @@ static void pstore_dump(struct kmsg_dumper *dumper, spin_lock_irqsave(&psinfo->buf_lock, flags); oopscount++; while (total < kmsg_bytes) { + char *dst; + unsigned long size; + int hsize; + size_t len; + dst = psinfo->buf; hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part); size = psinfo->bufsize - hsize; dst += hsize; - l2_cpy = min(l2, size); - l1_cpy = min(l1, size - l2_cpy); - - if (l1_cpy + l2_cpy == 0) + if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len)) break; - s2_start = l2 - l2_cpy; - s1_start = l1 - l1_cpy; - - memcpy(dst, s1 + s1_start, l1_cpy); - memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy); - ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, - hsize + l1_cpy + l2_cpy, psinfo); + hsize + len, psinfo); if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; - l1 -= l1_cpy; - l2 -= l2_cpy; - total += l1_cpy + l2_cpy; + + total += hsize + len; part++; } if (in_nmi()) { diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 35f7237ec972..af4eb5a39d9a 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -21,6 +21,7 @@ * is passed to the kernel. */ enum kmsg_dump_reason { + KMSG_DUMP_UNDEF, KMSG_DUMP_PANIC, KMSG_DUMP_OOPS, KMSG_DUMP_EMERG, @@ -31,23 +32,37 @@ enum kmsg_dump_reason { /** * struct kmsg_dumper - kernel crash message dumper structure - * @dump: The callback which gets called on crashes. The buffer is passed - * as two sections, where s1 (length l1) contains the older - * messages and s2 (length l2) contains the newer. * @list: Entry in the dumper list (private) + * @dump: Call into dumping code which will retrieve the data with + * through the record iterator + * @max_reason: filter for highest reason number that should be dumped * @registered: Flag that specifies if this is already registered */ struct kmsg_dumper { - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason, - const char *s1, unsigned long l1, - const char *s2, unsigned long l2); struct list_head list; - int registered; + void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + enum kmsg_dump_reason max_reason; + bool active; + bool registered; + + /* private state of the kmsg iterator */ + u32 cur_idx; + u32 next_idx; + u64 cur_seq; + u64 next_seq; }; #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len); + +void kmsg_dump_rewind(struct kmsg_dumper *dumper); + int kmsg_dump_register(struct kmsg_dumper *dumper); int kmsg_dump_unregister(struct kmsg_dumper *dumper); @@ -56,6 +71,22 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) +{ + return false; +} + +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) +{ + return false; +} + +void kmsg_dump_rewind(struct kmsg_dumper *dumper) +{ +} + static inline int kmsg_dump_register(struct kmsg_dumper *dumper) { return -EINVAL; diff --git a/kernel/printk.c b/kernel/printk.c index f205c25c37e2..ceb4a2f775a1 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -909,7 +909,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) /* * Find first record that fits, including all following records, * into the user-provided buffer for this dump. - */ + */ seq = clear_seq; idx = clear_idx; while (seq < log_next_seq) { @@ -919,6 +919,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear) idx = log_next(idx); seq++; } + + /* move first record forward until length fits into the buffer */ seq = clear_seq; idx = clear_idx; while (len > size && seq < log_next_seq) { @@ -929,7 +931,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) seq++; } - /* last message in this dump */ + /* last message fitting into this dump */ next_seq = log_next_seq; len = 0; @@ -2300,48 +2302,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR); * kmsg_dump - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping * - * Iterate through each of the dump devices and call the oops/panic - * callbacks with the log buffer. + * Call each of the registered dumper's dump() callback, which can + * retrieve the kmsg records with kmsg_dump_get_line() or + * kmsg_dump_get_buffer(). */ void kmsg_dump(enum kmsg_dump_reason reason) { - u64 idx; struct kmsg_dumper *dumper; - const char *s1, *s2; - unsigned long l1, l2; unsigned long flags; if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump) return; - /* Theoretically, the log could move on after we do this, but - there's not a lot we can do about that. The new messages - will overwrite the start of what we dump. */ + rcu_read_lock(); + list_for_each_entry_rcu(dumper, &dump_list, list) { + if (dumper->max_reason && reason > dumper->max_reason) + continue; + + /* initialize iterator with data about the stored records */ + dumper->active = true; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + dumper->cur_seq = clear_seq; + dumper->cur_idx = clear_idx; + dumper->next_seq = log_next_seq; + dumper->next_idx = log_next_idx; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + /* invoke dumper which will iterate over records */ + dumper->dump(dumper, reason); + + /* reset iterator */ + dumper->active = false; + } + rcu_read_unlock(); +} + +/** + * kmsg_dump_get_line - retrieve one kmsg log line + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) +{ + unsigned long flags; + struct log *msg; + size_t l = 0; + bool ret = false; + + if (!dumper->active) + goto out; raw_spin_lock_irqsave(&logbuf_lock, flags); - if (syslog_seq < log_first_seq) - idx = syslog_idx; - else - idx = log_first_idx; + if (dumper->cur_seq < log_first_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = log_first_seq; + dumper->cur_idx = log_first_idx; + } - if (idx > log_next_idx) { - s1 = log_buf; - l1 = log_next_idx; + /* last entry */ + if (dumper->cur_seq >= log_next_seq) { + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + goto out; + } - s2 = log_buf + idx; - l2 = log_buf_len - idx; - } else { - s1 = ""; - l1 = 0; + msg = log_from_idx(dumper->cur_idx); + l = msg_print_text(msg, syslog, + line, size); + + dumper->cur_idx = log_next(dumper->cur_idx); + dumper->cur_seq++; + ret = true; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); +out: + if (len) + *len = l; + return ret; +} +EXPORT_SYMBOL_GPL(kmsg_dump_get_line); + +/** + * kmsg_dump_get_buffer - copy kmsg log lines + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the end of the kmsg buffer and fill the provided buffer + * with as many of the the *youngest* kmsg records that fit into it. + * If the buffer is large enough, all available kmsg records will be + * copied with a single call. + * + * Consecutive calls will fill the buffer with the next block of + * available older records, not including the earlier retrieved ones. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) +{ + unsigned long flags; + u64 seq; + u32 idx; + u64 next_seq; + u32 next_idx; + size_t l = 0; + bool ret = false; + + if (!dumper->active) + goto out; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + if (dumper->cur_seq < log_first_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = log_first_seq; + dumper->cur_idx = log_first_idx; + } + + /* last entry */ + if (dumper->cur_seq >= dumper->next_seq) { + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + goto out; + } + + /* calculate length of entire buffer */ + seq = dumper->cur_seq; + idx = dumper->cur_idx; + while (seq < dumper->next_seq) { + struct log *msg = log_from_idx(idx); + + l += msg_print_text(msg, true, NULL, 0); + idx = log_next(idx); + seq++; + } + + /* move first record forward until length fits into the buffer */ + seq = dumper->cur_seq; + idx = dumper->cur_idx; + while (l > size && seq < dumper->next_seq) { + struct log *msg = log_from_idx(idx); - s2 = log_buf + idx; - l2 = log_next_idx - idx; + l -= msg_print_text(msg, true, NULL, 0); + idx = log_next(idx); + seq++; } + + /* last message in next interation */ + next_seq = seq; + next_idx = idx; + + l = 0; + while (seq < dumper->next_seq) { + struct log *msg = log_from_idx(idx); + + l += msg_print_text(msg, syslog, + buf + l, size - l); + + idx = log_next(idx); + seq++; + } + + dumper->next_seq = next_seq; + dumper->next_idx = next_idx; + ret = true; raw_spin_unlock_irqrestore(&logbuf_lock, flags); +out: + if (len) + *len = l; + return ret; +} +EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); - rcu_read_lock(); - list_for_each_entry_rcu(dumper, &dump_list, list) - dumper->dump(dumper, reason, s1, l1, s2, l2); - rcu_read_unlock(); +/** + * kmsg_dump_rewind - reset the interator + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + */ +void kmsg_dump_rewind(struct kmsg_dumper *dumper) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + dumper->cur_seq = clear_seq; + dumper->cur_idx = clear_idx; + dumper->next_seq = log_next_seq; + dumper->next_idx = log_next_idx; + raw_spin_unlock_irqrestore(&logbuf_lock, flags); } +EXPORT_SYMBOL_GPL(kmsg_dump_rewind); #endif -- cgit v1.2.3 From 62b1a8ab9b3660bb820d8dfe23148ed6cda38574 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Jun 2012 06:42:44 +0000 Subject: net: remove skb_orphan_try() Orphaning skb in dev_hard_start_xmit() makes bonding behavior unfriendly for applications sending big UDP bursts : Once packets pass the bonding device and come to real device, they might hit a full qdisc and be dropped. Without orphaning, the sender is automatically throttled because sk->sk_wmemalloc reaches sk->sk_sndbuf (assuming sk_sndbuf is not too big) We could try to defer the orphaning adding another test in dev_hard_start_xmit(), but all this seems of little gain, now that BQL tends to make packets more likely to be parked in Qdisc queues instead of NIC TX ring, in cases where performance matters. Reverts commits : fc6055a5ba31 net: Introduce skb_orphan_try() 87fd308cfc6b net: skb_tx_hash() fix relative to skb_orphan_try() and removes SKBTX_DRV_NEEDS_SK_REF flag Reported-and-bisected-by: Jean-Michel Hautbois Signed-off-by: Eric Dumazet Tested-by: Oliver Hartkopp Acked-by: Oliver Hartkopp Signed-off-by: David S. Miller --- include/linux/skbuff.h | 7 ++----- net/can/raw.c | 3 --- net/core/dev.c | 23 +---------------------- net/iucv/af_iucv.c | 1 - 4 files changed, 3 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b534a1be540a..642cb7355df3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -225,14 +225,11 @@ enum { /* device driver is going to provide hardware time stamp */ SKBTX_IN_PROGRESS = 1 << 2, - /* ensure the originating sk reference is available on driver level */ - SKBTX_DRV_NEEDS_SK_REF = 1 << 3, - /* device driver supports TX zero-copy buffers */ - SKBTX_DEV_ZEROCOPY = 1 << 4, + SKBTX_DEV_ZEROCOPY = 1 << 3, /* generate wifi status information (where possible) */ - SKBTX_WIFI_STATUS = 1 << 5, + SKBTX_WIFI_STATUS = 1 << 4, }; /* diff --git a/net/can/raw.c b/net/can/raw.c index cde1b4a20f75..46cca3a91d19 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -681,9 +681,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, if (err < 0) goto free_skb; - /* to be able to check the received tx sock reference in raw_rcv() */ - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; - skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index cd0981977f5c..6df214041a5e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2089,25 +2089,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features) return 0; } -/* - * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested or the sk-reference - * is needed on driver level for other reasons, e.g. see net/can/raw.c - */ -static inline void skb_orphan_try(struct sk_buff *skb) -{ - struct sock *sk = skb->sk; - - if (sk && !skb_shinfo(skb)->tx_flags) { - /* skb_tx_hash() wont be able to get sk. - * We copy sk_hash into skb->rxhash - */ - if (!skb->rxhash) - skb->rxhash = sk->sk_hash; - skb_orphan(skb); - } -} - static bool can_checksum_protocol(netdev_features_t features, __be16 protocol) { return ((features & NETIF_F_GEN_CSUM) || @@ -2193,8 +2174,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); - skb_orphan_try(skb); - features = netif_skb_features(skb); if (vlan_tx_tag_present(skb) && @@ -2304,7 +2283,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb, if (skb->sk && skb->sk->sk_hash) hash = skb->sk->sk_hash; else - hash = (__force u16) skb->protocol ^ skb->rxhash; + hash = (__force u16) skb->protocol; hash = jhash_1word(hash, hashrnd); return (u16) (((u64) hash * qcount) >> 32) + qoffset; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 07d7d55a1b93..cd6f7a991d80 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -372,7 +372,6 @@ static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock, skb_trim(skb, skb->dev->mtu); } skb->protocol = ETH_P_AF_IUCV; - skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; nskb = skb_clone(skb, GFP_ATOMIC); if (!nskb) return -ENOMEM; -- cgit v1.2.3 From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 15 Jun 2012 17:55:50 -0700 Subject: swap: fix shmem swapping when more than 8 areas Minchan Kim reports that when a system has many swap areas, and tmpfs swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read back the page cannot locate it, and the read fails with -ENOMEM. Whoops. Yes, I blindly followed read_swap_header()'s pte_to_swp_entry( swp_entry_to_pte()) technique for determining maximum usable swap offset, without stopping to realize that that actually depends upon the pte swap encoding shifting swap offset to the higher bits and truncating it there. Whereas our radix_tree swap encoding leaves offset in the lower bits: it's swap "type" (that is, index of swap area) that was truncated. Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header(). This does not reduce the usable size of a swap area any further, it leaves it as claimed when making the original commit: no change from 3.0 on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB per swapfile on i386 with PAE. It's not a change I would have risked five years ago, but with x86_64 supported for ten years, I believe it's appropriate now. Hmm, and what if some architecture implements its swap pte with offset encoded below type? That would equally break the maximum usable swap offset check. Happily, they all follow the same tradition of encoding offset above type, but I'll prepare a check on that for next. Reported-and-Reviewed-and-Tested-by: Minchan Kim Signed-off-by: Hugh Dickins Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4] Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 8 +++++--- mm/swapfile.c | 12 ++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 792d16d9cbc7..47ead515c811 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -9,13 +9,15 @@ * get good packing density in that tree, so the index should be dense in * the low-order bits. * - * We arrange the `type' and `offset' fields so that `type' is at the five + * We arrange the `type' and `offset' fields so that `type' is at the seven * high-order bits of the swp_entry_t and `offset' is right-aligned in the - * remaining bits. + * remaining bits. Although `type' itself needs only five bits, we allow for + * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry(). * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. */ -#define SWP_TYPE_SHIFT(e) (sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT) +#define SWP_TYPE_SHIFT(e) ((sizeof(e.val) * 8) - \ + (MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT)) #define SWP_OFFSET_MASK(e) ((1UL << SWP_TYPE_SHIFT(e)) - 1) /* diff --git a/mm/swapfile.c b/mm/swapfile.c index de5bc51c4a66..71373d03fcee 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p, /* * Find out how many pages are allowed for a single swap - * device. There are three limiting factors: 1) the number + * device. There are two limiting factors: 1) the number * of bits for the swap offset in the swp_entry_t type, and * 2) the number of bits in the swap pte as defined by the - * the different architectures, and 3) the number of free bits - * in an exceptional radix_tree entry. In order to find the + * different architectures. In order to find the * largest possible bit mask, a swap entry with swap type 0 * and swap offset ~0UL is created, encoded to a swap pte, * decoded to a swp_entry_t again, and finally the swap * offset is extracted. This will mask all the bits from * the initial ~0UL mask that can't be encoded in either * the swp_entry_t or the architecture definition of a - * swap pte. Then the same is done for a radix_tree entry. + * swap pte. */ maxpages = swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))); - maxpages = swp_offset(radix_to_swp_entry( - swp_to_radix_entry(swp_entry(0, maxpages)))) + 1; - + swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; if (maxpages > swap_header->info.last_page) { maxpages = swap_header->info.last_page + 1; /* p->max is an unsigned int: don't overflow it */ -- cgit v1.2.3 From f8fee8f5acb5c3f82e02f2ae139a6f1e7b4eb583 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 15 Jun 2012 12:46:17 -0700 Subject: vga_switcheroo.h: fix pci_dev warning Fix warnings on some architectures/configs (not on x86): include/linux/vga_switcheroo.h:28:30: warning: 'struct pci_dev' declared inside parameter list [enabled by default] include/linux/vga_switcheroo.h:28:30: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] Signed-off-by: Randy Dunlap Cc: Takashi Iwai Reported-by: Geert Uytterhoeven Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 60da41fe9dc2..d844b7790ea6 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -9,6 +9,8 @@ #include +struct pci_dev; + enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, -- cgit v1.2.3 From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 14 Jun 2012 13:08:38 -0400 Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS Currently there is a 'chicken and egg' issue when the DS is also the mounted MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client is not called to dereference the MDS usage of a deviceid which holds a reference to the DS nfs_client. The result is the umount program returns, but the nfs_client is not freed, and the cl_session hearbeat continues. The MDS (and all other nfs mounts) lose their last nfs_client reference in nfs_free_server when the last nfs_server (fsid) is umounted. The file layout DS lose their last nfs_client reference in destroy_ds when the last deviceid referencing the data server is put and destroy_ds is called. This is triggered by a call to nfs4_deviceid_purge_client which removes references to a pNFS deviceid used by an MDS mount. The fix is to track how many pnfs enabled filesystems are mounted from this server, and then to purge the device id cache once that count reaches zero. Reported-by: Jorge Mora Reported-by: Andy Adamson Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/pnfs.c | 5 +++++ include/linux/nfs_fs_sb.h | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 17ba6b995659..f005b5bebdc7 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -207,7 +207,6 @@ error_0: static void nfs4_shutdown_session(struct nfs_client *clp) { if (nfs4_has_session(clp)) { - nfs4_deviceid_purge_client(clp); nfs4_destroy_session(clp->cl_session); nfs4_destroy_clientid(clp); } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8323aa7b543..bdf7e52943c8 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -80,6 +80,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss) if (nfss->pnfs_curr_ld) { if (nfss->pnfs_curr_ld->clear_layoutdriver) nfss->pnfs_curr_ld->clear_layoutdriver(nfss); + /* Decrement the MDS count. Purge the deviceid cache if zero */ + if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count)) + nfs4_deviceid_purge_client(nfss->nfs_client); module_put(nfss->pnfs_curr_ld->owner); } nfss->pnfs_curr_ld = NULL; @@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh, module_put(ld_type->owner); goto out_no_driver; } + /* Bump the MDS count */ + atomic_inc(&server->nfs_client->cl_mds_count); dprintk("%s: pNFS module for %u set\n", __func__, id); return; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index fbb78fb09bd2..f58325a1d8fb 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -25,6 +25,7 @@ struct nfs41_impl_id; */ struct nfs_client { atomic_t cl_count; + atomic_t cl_mds_count; int cl_cons_state; /* current construction state (-ve: init error) */ #define NFS_CS_READY 0 /* ready to be used */ #define NFS_CS_INITING 1 /* busy initialising */ -- cgit v1.2.3 From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Thu, 14 Jun 2012 10:54:28 -0400 Subject: ftrace: Make all inline tags also include notrace Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when calling lockdep") prevented lockdep calls from the int3 breakpoint handler from reseting the stack if a function that was called was in the process of being converted for tracing and had a breakpoint on it. The idea is, before calling the lockdep code, do a load_idt() to the special IDT that kept the breakpoint stack from reseting. This worked well as a quick fix for this kernel release, until a certain config caused a lockup in the function tracer start up tests. Investigating it, I found that the load_idt that was used to prevent the int3 from changing stacks was itself being traced! Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and all 'inline' tags were set to always inline, there were still cases that it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it would add a pointer to the native_load_idt() which made that function to be traced. Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions") only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled. PARAVIRT_GUEST shows that this was not enough and we need to also mark always_inline with notrace as well. Reported-by: Fengguang Wu Tested-by: Fengguang Wu Signed-off-by: Steven Rostedt --- include/linux/compiler-gcc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e5834aa24b9e..6a6d7aefe12d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -47,9 +47,9 @@ */ #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) -# define inline inline __attribute__((always_inline)) -# define __inline__ __inline__ __attribute__((always_inline)) -# define __inline __inline __attribute__((always_inline)) +# define inline inline __attribute__((always_inline)) notrace +# define __inline__ __inline__ __attribute__((always_inline)) notrace +# define __inline __inline __attribute__((always_inline)) notrace #else /* A lot of inline functions can cause havoc with function tracing */ # define inline inline notrace -- cgit v1.2.3 From 246f6f2ff2c5cf46ded6d06f11f63e38bad880d1 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 19 Jun 2012 00:32:57 +0200 Subject: kmsg - kmsg_dump() fix CONFIG_PRINTK=n compilation Signed-off-by: Kay Sievers Cc: Stephen Rothwell Reported-by: Randy Dunlap Reported-by: Fengguang Wu Signed-off-by: Greg Kroah-Hartman --- include/linux/kmsg_dump.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index af4eb5a39d9a..d6bd50110ec2 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -71,19 +71,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - const char *line, size_t size, size_t *len) +static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + const char *line, size_t size, size_t *len) { return false; } -bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, - char *buf, size_t size, size_t *len) +static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, + char *buf, size_t size, size_t *len) { return false; } -void kmsg_dump_rewind(struct kmsg_dumper *dumper) +static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } -- cgit v1.2.3 From abca7c4965845924f65d40e0aa1092bdd895e314 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 20 Jun 2012 12:52:56 -0700 Subject: mm: fix slab->page _count corruption when using slub On arches that do not support this_cpu_cmpxchg_double() slab_lock is used to do atomic cmpxchg() on double word which contains page->_count. The page count can be changed from get_page() or put_page() without taking slab_lock. That corrupts page counter. Fix it by moving page->_count out of cmpxchg_double data. So that slub does no change it while updating slub meta-data in struct page. [akpm@linux-foundation.org: use standard comment layout, tweak comment text] Reported-by: Amey Bhide Signed-off-by: Pravin B Shelar Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index dad95bdd06d7..704a626d94a0 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -57,8 +57,18 @@ struct page { }; union { +#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \ + defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE) /* Used for cmpxchg_double in slub */ unsigned long counters; +#else + /* + * Keep _count separate from slub cmpxchg_double data. + * As the rest of the double word is protected by + * slab_lock but _count is not. + */ + unsigned counters; +#endif struct { -- cgit v1.2.3 From 10d8935f46e5028847b179757ecbf9238b13d129 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 20 Jun 2012 12:53:02 -0700 Subject: Viresh has moved viresh.kumar@st.com email-id doesn't exist anymore as I have left the company. Replace ST's id with viresh.linux@gmail.com. It also updates .mailmap file to fix address for 'git shortlog' Signed-off-by: Viresh Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 1 + Documentation/arm/SPEAr/overview.txt | 2 +- MAINTAINERS | 18 +++++++++--------- arch/arm/boot/dts/spear1310-evb.dts | 2 +- arch/arm/boot/dts/spear1310.dtsi | 2 +- arch/arm/boot/dts/spear1340-evb.dts | 2 +- arch/arm/boot/dts/spear1340.dtsi | 2 +- arch/arm/boot/dts/spear13xx.dtsi | 2 +- arch/arm/boot/dts/spear300-evb.dts | 2 +- arch/arm/boot/dts/spear300.dtsi | 2 +- arch/arm/boot/dts/spear310-evb.dts | 2 +- arch/arm/boot/dts/spear310.dtsi | 2 +- arch/arm/boot/dts/spear320-evb.dts | 2 +- arch/arm/boot/dts/spear320.dtsi | 2 +- arch/arm/boot/dts/spear3xx.dtsi | 2 +- arch/arm/include/asm/hardware/sp810.h | 2 +- arch/arm/mach-spear13xx/include/mach/debug-macro.S | 2 +- arch/arm/mach-spear13xx/include/mach/dma.h | 2 +- arch/arm/mach-spear13xx/include/mach/generic.h | 2 +- arch/arm/mach-spear13xx/include/mach/gpio.h | 2 +- arch/arm/mach-spear13xx/include/mach/irqs.h | 2 +- arch/arm/mach-spear13xx/include/mach/spear.h | 2 +- arch/arm/mach-spear13xx/include/mach/timex.h | 2 +- arch/arm/mach-spear13xx/include/mach/uncompress.h | 2 +- arch/arm/mach-spear13xx/spear1310.c | 2 +- arch/arm/mach-spear13xx/spear1340.c | 2 +- arch/arm/mach-spear13xx/spear13xx.c | 2 +- arch/arm/mach-spear3xx/include/mach/debug-macro.S | 2 +- arch/arm/mach-spear3xx/include/mach/generic.h | 2 +- arch/arm/mach-spear3xx/include/mach/gpio.h | 2 +- arch/arm/mach-spear3xx/include/mach/irqs.h | 2 +- arch/arm/mach-spear3xx/include/mach/misc_regs.h | 2 +- arch/arm/mach-spear3xx/include/mach/spear.h | 2 +- arch/arm/mach-spear3xx/include/mach/timex.h | 2 +- arch/arm/mach-spear3xx/include/mach/uncompress.h | 2 +- arch/arm/mach-spear3xx/spear300.c | 2 +- arch/arm/mach-spear3xx/spear310.c | 2 +- arch/arm/mach-spear3xx/spear320.c | 2 +- arch/arm/mach-spear3xx/spear3xx.c | 2 +- arch/arm/mach-spear6xx/include/mach/gpio.h | 2 +- arch/arm/mach-spear6xx/include/mach/misc_regs.h | 2 +- arch/arm/plat-spear/include/plat/debug-macro.S | 2 +- arch/arm/plat-spear/include/plat/pl080.h | 2 +- arch/arm/plat-spear/include/plat/shirq.h | 2 +- arch/arm/plat-spear/include/plat/timex.h | 2 +- arch/arm/plat-spear/include/plat/uncompress.h | 2 +- arch/arm/plat-spear/pl080.c | 2 +- arch/arm/plat-spear/restart.c | 2 +- arch/arm/plat-spear/shirq.c | 2 +- drivers/ata/pata_arasan_cf.c | 4 ++-- drivers/clk/spear/clk-aux-synth.c | 2 +- drivers/clk/spear/clk-frac-synth.c | 2 +- drivers/clk/spear/clk-gpt-synth.c | 2 +- drivers/clk/spear/clk-vco-pll.c | 2 +- drivers/clk/spear/clk.c | 2 +- drivers/clk/spear/clk.h | 2 +- drivers/clk/spear/spear1310_clock.c | 2 +- drivers/clk/spear/spear1340_clock.c | 2 +- drivers/clk/spear/spear3xx_clock.c | 2 +- drivers/clk/spear/spear6xx_clock.c | 2 +- drivers/dma/dw_dmac.c | 2 +- drivers/mfd/stmpe-i2c.c | 2 +- drivers/mfd/stmpe-spi.c | 4 ++-- drivers/mmc/host/sdhci-spear.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear.c | 2 +- drivers/pinctrl/spear/pinctrl-spear.h | 2 +- drivers/pinctrl/spear/pinctrl-spear1310.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear1340.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear300.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear310.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear320.c | 4 ++-- drivers/pinctrl/spear/pinctrl-spear3xx.c | 2 +- drivers/pinctrl/spear/pinctrl-spear3xx.h | 2 +- drivers/watchdog/sp805_wdt.c | 4 ++-- include/linux/mmc/sdhci-spear.h | 2 +- include/linux/pata_arasan_cf_data.h | 2 +- 76 files changed, 93 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/.mailmap b/.mailmap index 2909c33bc54e..658003aa9446 100644 --- a/.mailmap +++ b/.mailmap @@ -111,6 +111,7 @@ Uwe Kleine-König Uwe Kleine-König Uwe Kleine-König Valdis Kletnieks +Viresh Kumar Takashi YOSHII Yusuke Goda Gustavo Padovan diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt index 57aae7765c74..65610bf52ebf 100644 --- a/Documentation/arm/SPEAr/overview.txt +++ b/Documentation/arm/SPEAr/overview.txt @@ -60,4 +60,4 @@ Introduction Document Author --------------- - Viresh Kumar , (c) 2010-2012 ST Microelectronics + Viresh Kumar , (c) 2010-2012 ST Microelectronics diff --git a/MAINTAINERS b/MAINTAINERS index 3e30a3afe2a4..a81b298d9c75 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -579,7 +579,7 @@ F: drivers/net/appletalk/ F: net/appletalk/ ARASAN COMPACT FLASH PATA CONTROLLER -M: Viresh Kumar +M: Viresh Kumar L: linux-ide@vger.kernel.org S: Maintained F: include/linux/pata_arasan_cf_data.h @@ -5296,7 +5296,7 @@ S: Maintained F: drivers/pinctrl/ PIN CONTROLLER - ST SPEAR -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.st.com/spear @@ -5873,7 +5873,7 @@ S: Maintained F: drivers/tty/serial SYNOPSYS DESIGNWARE DMAC DRIVER -M: Viresh Kumar +M: Viresh Kumar S: Maintained F: include/linux/dw_dmac.h F: drivers/dma/dw_dmac_regs.h @@ -6021,7 +6021,7 @@ S: Maintained F: drivers/mmc/host/sdhci-s3c.c SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-mmc@vger.kernel.org S: Maintained @@ -6377,7 +6377,7 @@ S: Maintained F: include/linux/compiler.h SPEAR PLATFORM SUPPORT -M: Viresh Kumar +M: Viresh Kumar M: Shiraz Hashim L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -6386,7 +6386,7 @@ S: Maintained F: arch/arm/plat-spear/ SPEAR13XX MACHINE SUPPORT -M: Viresh Kumar +M: Viresh Kumar M: Shiraz Hashim L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -6395,7 +6395,7 @@ S: Maintained F: arch/arm/mach-spear13xx/ SPEAR3XX MACHINE SUPPORT -M: Viresh Kumar +M: Viresh Kumar M: Shiraz Hashim L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@ -6406,7 +6406,7 @@ F: arch/arm/mach-spear3xx/ SPEAR6XX MACHINE SUPPORT M: Rajeev Kumar M: Shiraz Hashim -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.st.com/spear @@ -6414,7 +6414,7 @@ S: Maintained F: arch/arm/mach-spear6xx/ SPEAR CLOCK FRAMEWORK SUPPORT -M: Viresh Kumar +M: Viresh Kumar L: spear-devel@list.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) W: http://www.st.com/spear diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts index 8314e4171884..dd4358bc26e2 100644 --- a/arch/arm/boot/dts/spear1310-evb.dts +++ b/arch/arm/boot/dts/spear1310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1310 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi index 9e61da404d57..419ea7413d23 100644 --- a/arch/arm/boot/dts/spear1310.dtsi +++ b/arch/arm/boot/dts/spear1310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1310 SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts index 0d8472e5ab9f..c9a54e06fb68 100644 --- a/arch/arm/boot/dts/spear1340-evb.dts +++ b/arch/arm/boot/dts/spear1340-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr1340 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi index a26fc47a55e8..d71fe2a68f09 100644 --- a/arch/arm/boot/dts/spear1340.dtsi +++ b/arch/arm/boot/dts/spear1340.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr1340 SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi index 1f8e1e1481df..10dcec7e7321 100644 --- a/arch/arm/boot/dts/spear13xx.dtsi +++ b/arch/arm/boot/dts/spear13xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr13xx SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts index fc82b1a26458..d71b8d581e3d 100644 --- a/arch/arm/boot/dts/spear300-evb.dts +++ b/arch/arm/boot/dts/spear300-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi index 01c5e358fdb2..ed3627c116cc 100644 --- a/arch/arm/boot/dts/spear300.dtsi +++ b/arch/arm/boot/dts/spear300.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr300 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts index dc5e2d445a93..b00544e0cd5d 100644 --- a/arch/arm/boot/dts/spear310-evb.dts +++ b/arch/arm/boot/dts/spear310-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi index e47081c494d9..62fc4fb3e5f9 100644 --- a/arch/arm/boot/dts/spear310.dtsi +++ b/arch/arm/boot/dts/spear310.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr310 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts index 6308fa3bec1e..c13fd1f3b09f 100644 --- a/arch/arm/boot/dts/spear320-evb.dts +++ b/arch/arm/boot/dts/spear320-evb.dts @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 Evaluation Baord * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi index 5372ca399b1f..1f49d69595a0 100644 --- a/arch/arm/boot/dts/spear320.dtsi +++ b/arch/arm/boot/dts/spear320.dtsi @@ -1,7 +1,7 @@ /* * DTS file for SPEAr320 SoC * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi index 91072553963f..3a8bb5736928 100644 --- a/arch/arm/boot/dts/spear3xx.dtsi +++ b/arch/arm/boot/dts/spear3xx.dtsi @@ -1,7 +1,7 @@ /* * DTS file for all SPEAr3xx SoCs * - * Copyright 2012 Viresh Kumar + * Copyright 2012 Viresh Kumar * * The code contained herein is licensed under the GNU General Public * License. You may obtain a copy of the GNU General Public License diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h index e0d1c0cfa548..6b9b077d86b3 100644 --- a/arch/arm/include/asm/hardware/sp810.h +++ b/arch/arm/include/asm/hardware/sp810.h @@ -4,7 +4,7 @@ * ARM PrimeXsys System Controller SP810 header file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/debug-macro.S b/arch/arm/mach-spear13xx/include/mach/debug-macro.S index ea1564609bd4..9e3ae6bfe50d 100644 --- a/arch/arm/mach-spear13xx/include/mach/debug-macro.S +++ b/arch/arm/mach-spear13xx/include/mach/debug-macro.S @@ -4,7 +4,7 @@ * Debugging macro include header spear13xx machine family * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/dma.h b/arch/arm/mach-spear13xx/include/mach/dma.h index 383ab04dc6c9..d50bdb605925 100644 --- a/arch/arm/mach-spear13xx/include/mach/dma.h +++ b/arch/arm/mach-spear13xx/include/mach/dma.h @@ -4,7 +4,7 @@ * DMA information for SPEAr13xx machine family * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/generic.h b/arch/arm/mach-spear13xx/include/mach/generic.h index 6d8c45b9f298..dac57fd0cdfd 100644 --- a/arch/arm/mach-spear13xx/include/mach/generic.h +++ b/arch/arm/mach-spear13xx/include/mach/generic.h @@ -4,7 +4,7 @@ * spear13xx machine family generic header file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/gpio.h b/arch/arm/mach-spear13xx/include/mach/gpio.h index cd6f4f86a56b..85f176311f63 100644 --- a/arch/arm/mach-spear13xx/include/mach/gpio.h +++ b/arch/arm/mach-spear13xx/include/mach/gpio.h @@ -4,7 +4,7 @@ * GPIO macros for SPEAr13xx machine family * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/irqs.h b/arch/arm/mach-spear13xx/include/mach/irqs.h index f542a24aa5f2..271a62b4cd31 100644 --- a/arch/arm/mach-spear13xx/include/mach/irqs.h +++ b/arch/arm/mach-spear13xx/include/mach/irqs.h @@ -4,7 +4,7 @@ * IRQ helper macros for spear13xx machine family * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/spear.h b/arch/arm/mach-spear13xx/include/mach/spear.h index 30c57ef72686..65f27def239b 100644 --- a/arch/arm/mach-spear13xx/include/mach/spear.h +++ b/arch/arm/mach-spear13xx/include/mach/spear.h @@ -4,7 +4,7 @@ * spear13xx Machine family specific definition * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/timex.h b/arch/arm/mach-spear13xx/include/mach/timex.h index 31af3e8d976e..3a58b8284a6a 100644 --- a/arch/arm/mach-spear13xx/include/mach/timex.h +++ b/arch/arm/mach-spear13xx/include/mach/timex.h @@ -4,7 +4,7 @@ * SPEAr3XX machine family specific timex definitions * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/include/mach/uncompress.h b/arch/arm/mach-spear13xx/include/mach/uncompress.h index c7840896ae6e..70fe72f05dea 100644 --- a/arch/arm/mach-spear13xx/include/mach/uncompress.h +++ b/arch/arm/mach-spear13xx/include/mach/uncompress.h @@ -4,7 +4,7 @@ * Serial port stubs for kernel decompress status messages * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/spear1310.c b/arch/arm/mach-spear13xx/spear1310.c index fefd15b2f380..732d29bc7330 100644 --- a/arch/arm/mach-spear13xx/spear1310.c +++ b/arch/arm/mach-spear13xx/spear1310.c @@ -4,7 +4,7 @@ * SPEAr1310 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/spear1340.c b/arch/arm/mach-spear13xx/spear1340.c index ee38cbc56869..81e4ed76ad06 100644 --- a/arch/arm/mach-spear13xx/spear1340.c +++ b/arch/arm/mach-spear13xx/spear1340.c @@ -4,7 +4,7 @@ * SPEAr1340 machine source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c index 50b349ae863d..cf936b106e27 100644 --- a/arch/arm/mach-spear13xx/spear13xx.c +++ b/arch/arm/mach-spear13xx/spear13xx.c @@ -4,7 +4,7 @@ * SPEAr13XX machines common source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/debug-macro.S b/arch/arm/mach-spear3xx/include/mach/debug-macro.S index 590519f10d6e..0a6381fad5d9 100644 --- a/arch/arm/mach-spear3xx/include/mach/debug-macro.S +++ b/arch/arm/mach-spear3xx/include/mach/debug-macro.S @@ -4,7 +4,7 @@ * Debugging macro include header spear3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/generic.h b/arch/arm/mach-spear3xx/include/mach/generic.h index 4a95b9453c2a..ce19113ca791 100644 --- a/arch/arm/mach-spear3xx/include/mach/generic.h +++ b/arch/arm/mach-spear3xx/include/mach/generic.h @@ -4,7 +4,7 @@ * SPEAr3XX machine family generic header file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/gpio.h b/arch/arm/mach-spear3xx/include/mach/gpio.h index 451b2081bfc9..2ac74c6db7f1 100644 --- a/arch/arm/mach-spear3xx/include/mach/gpio.h +++ b/arch/arm/mach-spear3xx/include/mach/gpio.h @@ -4,7 +4,7 @@ * GPIO macros for SPEAr3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/irqs.h b/arch/arm/mach-spear3xx/include/mach/irqs.h index 51bd62a0254c..803de76f5f36 100644 --- a/arch/arm/mach-spear3xx/include/mach/irqs.h +++ b/arch/arm/mach-spear3xx/include/mach/irqs.h @@ -4,7 +4,7 @@ * IRQ helper macros for SPEAr3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/misc_regs.h b/arch/arm/mach-spear3xx/include/mach/misc_regs.h index 18e2ac576f25..6309bf68d6f8 100644 --- a/arch/arm/mach-spear3xx/include/mach/misc_regs.h +++ b/arch/arm/mach-spear3xx/include/mach/misc_regs.h @@ -4,7 +4,7 @@ * Miscellaneous registers definitions for SPEAr3xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/spear.h b/arch/arm/mach-spear3xx/include/mach/spear.h index 51eb953148a9..8cca95193d4d 100644 --- a/arch/arm/mach-spear3xx/include/mach/spear.h +++ b/arch/arm/mach-spear3xx/include/mach/spear.h @@ -4,7 +4,7 @@ * SPEAr3xx Machine family specific definition * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/timex.h b/arch/arm/mach-spear3xx/include/mach/timex.h index a38cc9de876f..9f5d08bd0c44 100644 --- a/arch/arm/mach-spear3xx/include/mach/timex.h +++ b/arch/arm/mach-spear3xx/include/mach/timex.h @@ -4,7 +4,7 @@ * SPEAr3XX machine family specific timex definitions * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/include/mach/uncompress.h b/arch/arm/mach-spear3xx/include/mach/uncompress.h index 53ba8bbc0dfa..b909b011f7c8 100644 --- a/arch/arm/mach-spear3xx/include/mach/uncompress.h +++ b/arch/arm/mach-spear3xx/include/mach/uncompress.h @@ -4,7 +4,7 @@ * Serial port stubs for kernel decompress status messages * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/spear300.c b/arch/arm/mach-spear3xx/spear300.c index f74a05bdb829..0f882ecb7d81 100644 --- a/arch/arm/mach-spear3xx/spear300.c +++ b/arch/arm/mach-spear3xx/spear300.c @@ -4,7 +4,7 @@ * SPEAr300 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/spear310.c b/arch/arm/mach-spear3xx/spear310.c index 84dfb0900747..bbcf4571d361 100644 --- a/arch/arm/mach-spear3xx/spear310.c +++ b/arch/arm/mach-spear3xx/spear310.c @@ -4,7 +4,7 @@ * SPEAr310 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/spear320.c b/arch/arm/mach-spear3xx/spear320.c index a88fa841d29d..88d483bcd66a 100644 --- a/arch/arm/mach-spear3xx/spear320.c +++ b/arch/arm/mach-spear3xx/spear320.c @@ -4,7 +4,7 @@ * SPEAr320 machine source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c index f22419ed74a8..0f41bd1c47c3 100644 --- a/arch/arm/mach-spear3xx/spear3xx.c +++ b/arch/arm/mach-spear3xx/spear3xx.c @@ -4,7 +4,7 @@ * SPEAr3XX machines common source file * * Copyright (C) 2009-2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear6xx/include/mach/gpio.h b/arch/arm/mach-spear6xx/include/mach/gpio.h index 3a789dbb69f7..d42cefc0356d 100644 --- a/arch/arm/mach-spear6xx/include/mach/gpio.h +++ b/arch/arm/mach-spear6xx/include/mach/gpio.h @@ -4,7 +4,7 @@ * GPIO macros for SPEAr6xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/mach-spear6xx/include/mach/misc_regs.h b/arch/arm/mach-spear6xx/include/mach/misc_regs.h index 179e45774b3a..c34acc201d34 100644 --- a/arch/arm/mach-spear6xx/include/mach/misc_regs.h +++ b/arch/arm/mach-spear6xx/include/mach/misc_regs.h @@ -4,7 +4,7 @@ * Miscellaneous registers definitions for SPEAr6xx machine family * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/include/plat/debug-macro.S b/arch/arm/plat-spear/include/plat/debug-macro.S index ab3de721c5db..75b05ad0fbad 100644 --- a/arch/arm/plat-spear/include/plat/debug-macro.S +++ b/arch/arm/plat-spear/include/plat/debug-macro.S @@ -4,7 +4,7 @@ * Debugging macro include header for spear platform * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/include/plat/pl080.h b/arch/arm/plat-spear/include/plat/pl080.h index e14a3e4932f9..2bc6b54460a8 100644 --- a/arch/arm/plat-spear/include/plat/pl080.h +++ b/arch/arm/plat-spear/include/plat/pl080.h @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/include/plat/shirq.h b/arch/arm/plat-spear/include/plat/shirq.h index 03ed8b585dcf..88a7fbd24793 100644 --- a/arch/arm/plat-spear/include/plat/shirq.h +++ b/arch/arm/plat-spear/include/plat/shirq.h @@ -4,7 +4,7 @@ * SPEAr platform shared irq layer header file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/include/plat/timex.h b/arch/arm/plat-spear/include/plat/timex.h index 914d09dd50fd..ef95e5b780bd 100644 --- a/arch/arm/plat-spear/include/plat/timex.h +++ b/arch/arm/plat-spear/include/plat/timex.h @@ -4,7 +4,7 @@ * SPEAr platform specific timex definitions * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/include/plat/uncompress.h b/arch/arm/plat-spear/include/plat/uncompress.h index 6dd455bafdfd..2ce6cb17a98b 100644 --- a/arch/arm/plat-spear/include/plat/uncompress.h +++ b/arch/arm/plat-spear/include/plat/uncompress.h @@ -4,7 +4,7 @@ * Serial port stubs for kernel decompress status messages * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/pl080.c b/arch/arm/plat-spear/pl080.c index a56a067717c1..12cf27f935f9 100644 --- a/arch/arm/plat-spear/pl080.c +++ b/arch/arm/plat-spear/pl080.c @@ -4,7 +4,7 @@ * DMAC pl080 definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/restart.c b/arch/arm/plat-spear/restart.c index ea0a61302b7e..4f990115b1bd 100644 --- a/arch/arm/plat-spear/restart.c +++ b/arch/arm/plat-spear/restart.c @@ -4,7 +4,7 @@ * SPEAr platform specific restart functions * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/arch/arm/plat-spear/shirq.c b/arch/arm/plat-spear/shirq.c index 961fb7261243..853e891e1184 100644 --- a/arch/arm/plat-spear/shirq.c +++ b/arch/arm/plat-spear/shirq.c @@ -4,7 +4,7 @@ * SPEAr platform shared irq layer source file * * Copyright (C) 2009 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index 3239517f4d90..ac6a5beb28f3 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller source file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -959,7 +959,7 @@ static struct platform_driver arasan_cf_driver = { module_platform_driver(arasan_cf_driver); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("Arasan ATA Compact Flash driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/clk/spear/clk-aux-synth.c b/drivers/clk/spear/clk-aux-synth.c index af34074e702b..6756e7c3bc07 100644 --- a/drivers/clk/spear/clk-aux-synth.c +++ b/drivers/clk/spear/clk-aux-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-frac-synth.c b/drivers/clk/spear/clk-frac-synth.c index 4dbdb3fe18e0..958aa3ad1d60 100644 --- a/drivers/clk/spear/clk-frac-synth.c +++ b/drivers/clk/spear/clk-frac-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-gpt-synth.c b/drivers/clk/spear/clk-gpt-synth.c index b471c9762a97..1afc18c4effc 100644 --- a/drivers/clk/spear/clk-gpt-synth.c +++ b/drivers/clk/spear/clk-gpt-synth.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk-vco-pll.c b/drivers/clk/spear/clk-vco-pll.c index dcd4bdf4b0d9..5f1b6badeb15 100644 --- a/drivers/clk/spear/clk-vco-pll.c +++ b/drivers/clk/spear/clk-vco-pll.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk.c b/drivers/clk/spear/clk.c index 376d4e5ff326..7cd63788d546 100644 --- a/drivers/clk/spear/clk.c +++ b/drivers/clk/spear/clk.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/clk.h b/drivers/clk/spear/clk.h index 3321c46a071c..931737677dfa 100644 --- a/drivers/clk/spear/clk.h +++ b/drivers/clk/spear/clk.h @@ -2,7 +2,7 @@ * Clock framework definitions for SPEAr platform * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c index 42b68df9aeef..8f05652d53e6 100644 --- a/drivers/clk/spear/spear1310_clock.c +++ b/drivers/clk/spear/spear1310_clock.c @@ -4,7 +4,7 @@ * SPEAr1310 machine clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c index f130919d5bf8..e3ea72162236 100644 --- a/drivers/clk/spear/spear1340_clock.c +++ b/drivers/clk/spear/spear1340_clock.c @@ -4,7 +4,7 @@ * SPEAr1340 machine clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c index 440bb3e4c971..01dd6daff2a1 100644 --- a/drivers/clk/spear/spear3xx_clock.c +++ b/drivers/clk/spear/spear3xx_clock.c @@ -2,7 +2,7 @@ * SPEAr3xx machines clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c index f9a20b382304..554d64b062a1 100644 --- a/drivers/clk/spear/spear6xx_clock.c +++ b/drivers/clk/spear/spear6xx_clock.c @@ -2,7 +2,7 @@ * SPEAr6xx machines clock framework source file * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index e23dc82d43ac..721296157577 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1626,4 +1626,4 @@ module_exit(dw_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); MODULE_AUTHOR("Haavard Skinnemoen (Atmel)"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c index 373f423b1181..947a06a1845f 100644 --- a/drivers/mfd/stmpe-i2c.c +++ b/drivers/mfd/stmpe-i2c.c @@ -6,7 +6,7 @@ * * License Terms: GNU General Public License, version 2 * Author: Rabin Vincent for ST-Ericsson - * Author: Viresh Kumar for ST Microelectronics + * Author: Viresh Kumar for ST Microelectronics */ #include diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c index afd459013ecb..9edfe864cc05 100644 --- a/drivers/mfd/stmpe-spi.c +++ b/drivers/mfd/stmpe-spi.c @@ -4,7 +4,7 @@ * Copyright (C) ST Microelectronics SA 2011 * * License Terms: GNU General Public License, version 2 - * Author: Viresh Kumar for ST Microelectronics + * Author: Viresh Kumar for ST Microelectronics */ #include @@ -146,4 +146,4 @@ module_exit(stmpe_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("STMPE MFD SPI Interface Driver"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c index 1fe32dfa7cd4..423da8194cd8 100644 --- a/drivers/mmc/host/sdhci-spear.c +++ b/drivers/mmc/host/sdhci-spear.c @@ -4,7 +4,7 @@ * Support of SDHCI platform devices for spear soc family * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * Inspired by sdhci-pltfm.c * @@ -289,5 +289,5 @@ static struct platform_driver sdhci_driver = { module_platform_driver(sdhci_driver); MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver"); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_LICENSE("GPL v2"); diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c index 5ae50aadf885..b3f6b2873fdd 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.c +++ b/drivers/pinctrl/spear/pinctrl-spear.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * Inspired from: * - U300 Pinctl drivers diff --git a/drivers/pinctrl/spear/pinctrl-spear.h b/drivers/pinctrl/spear/pinctrl-spear.h index 9155783bb47f..d950eb78d939 100644 --- a/drivers/pinctrl/spear/pinctrl-spear.h +++ b/drivers/pinctrl/spear/pinctrl-spear.h @@ -2,7 +2,7 @@ * Driver header file for the ST Microelectronics SPEAr pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/pinctrl/spear/pinctrl-spear1310.c b/drivers/pinctrl/spear/pinctrl-spear1310.c index fff168be7f00..d6cca8c81b92 100644 --- a/drivers/pinctrl/spear/pinctrl-spear1310.c +++ b/drivers/pinctrl/spear/pinctrl-spear1310.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr1310 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -2192,7 +2192,7 @@ static void __exit spear1310_pinctrl_exit(void) } module_exit(spear1310_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr1310 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear1310_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear1340.c b/drivers/pinctrl/spear/pinctrl-spear1340.c index a8ab2a6f51bf..a0eb057e55bd 100644 --- a/drivers/pinctrl/spear/pinctrl-spear1340.c +++ b/drivers/pinctrl/spear/pinctrl-spear1340.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr1340 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -1983,7 +1983,7 @@ static void __exit spear1340_pinctrl_exit(void) } module_exit(spear1340_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr1340 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear1340_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear300.c b/drivers/pinctrl/spear/pinctrl-spear300.c index 9c82a35e4e78..4dfc2849b172 100644 --- a/drivers/pinctrl/spear/pinctrl-spear300.c +++ b/drivers/pinctrl/spear/pinctrl-spear300.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr300 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -702,7 +702,7 @@ static void __exit spear300_pinctrl_exit(void) } module_exit(spear300_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr300 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear300_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear310.c b/drivers/pinctrl/spear/pinctrl-spear310.c index 1a9707605125..96883693fb7e 100644 --- a/drivers/pinctrl/spear/pinctrl-spear310.c +++ b/drivers/pinctrl/spear/pinctrl-spear310.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr310 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -425,7 +425,7 @@ static void __exit spear310_pinctrl_exit(void) } module_exit(spear310_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr310 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, SPEAr310_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear320.c b/drivers/pinctrl/spear/pinctrl-spear320.c index de726e6c283a..020b1e0bdb3e 100644 --- a/drivers/pinctrl/spear/pinctrl-spear320.c +++ b/drivers/pinctrl/spear/pinctrl-spear320.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr320 pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any @@ -3462,7 +3462,7 @@ static void __exit spear320_pinctrl_exit(void) } module_exit(spear320_pinctrl_exit); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ST Microelectronics SPEAr320 pinctrl driver"); MODULE_LICENSE("GPL v2"); MODULE_DEVICE_TABLE(of, spear320_pinctrl_of_match); diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.c b/drivers/pinctrl/spear/pinctrl-spear3xx.c index 91c883bc46a6..0242378f7cb8 100644 --- a/drivers/pinctrl/spear/pinctrl-spear3xx.c +++ b/drivers/pinctrl/spear/pinctrl-spear3xx.c @@ -2,7 +2,7 @@ * Driver for the ST Microelectronics SPEAr3xx pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.h b/drivers/pinctrl/spear/pinctrl-spear3xx.h index 5d5fdd8df7b8..31f44347f17c 100644 --- a/drivers/pinctrl/spear/pinctrl-spear3xx.h +++ b/drivers/pinctrl/spear/pinctrl-spear3xx.h @@ -2,7 +2,7 @@ * Header file for the ST Microelectronics SPEAr3xx pinmux * * Copyright (C) 2012 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index afcd13676542..e4841c36798b 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -4,7 +4,7 @@ * Watchdog driver for ARM SP805 watchdog module * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2 or later. This program is licensed "as is" without any @@ -331,6 +331,6 @@ static struct amba_driver sp805_wdt_driver = { module_amba_driver(sp805_wdt_driver); -MODULE_AUTHOR("Viresh Kumar "); +MODULE_AUTHOR("Viresh Kumar "); MODULE_DESCRIPTION("ARM SP805 Watchdog Driver"); MODULE_LICENSE("GPL"); diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h index 5cdc96da9dd5..e78c0e236e9d 100644 --- a/include/linux/mmc/sdhci-spear.h +++ b/include/linux/mmc/sdhci-spear.h @@ -4,7 +4,7 @@ * SDHCI declarations specific to ST SPEAr platform * * Copyright (C) 2010 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h index a6ee9aa898bb..a7b4fc386e63 100644 --- a/include/linux/pata_arasan_cf_data.h +++ b/include/linux/pata_arasan_cf_data.h @@ -4,7 +4,7 @@ * Arasan Compact Flash host controller platform data header file * * Copyright (C) 2011 ST Microelectronics - * Viresh Kumar + * Viresh Kumar * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any -- cgit v1.2.3 From d3decf3a0c1d28c80c76be170373f4c7a7217f09 Mon Sep 17 00:00:00 2001 From: Ozan Çağlayan Date: Thu, 14 Jun 2012 15:02:35 +0300 Subject: vga_switcheroo: Add include guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Guard vga_switcheroo.h against multiple inclusion. Signed-off-by: Ozan Çağlayan Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index d844b7790ea6..ddb419cf4530 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -7,6 +7,9 @@ * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs */ +#ifndef _LINUX_VGA_SWITCHEROO_H_ +#define _LINUX_VGA_SWITCHEROO_H_ + #include struct pci_dev; @@ -73,3 +76,4 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return #endif +#endif /* _LINUX_VGA_SWITCHEROO_H_ */ -- cgit v1.2.3 From 87fac288083db40b5d5ab845393be268357c8827 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Jun 2012 15:30:46 -0700 Subject: linux/irq.h: fix kernel-doc warning Fix kernel-doc warning. This struct member was removed in commit 875682648b89 ("irq: Remove irq_chip->release()") so remove its associated kernel-doc entry also. Warning(include/linux/irq.h:338): Excess struct/union/enum/typedef member 'release' description in 'irq_chip' Signed-off-by: Randy Dunlap Cc: Richard Weinberger Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/irq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 61f5cec031e0..a5261e3d2e3c 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -301,8 +301,6 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) * @irq_pm_shutdown: function called from core code on shutdown once per chip * @irq_print_chip: optional to print special chip info in show_interrupts * @flags: chip specific flags - * - * @release: release function solely used by UML */ struct irq_chip { const char *name; -- cgit v1.2.3 From cba6d0d64ee53772b285d0c0c288deefbeaf7775 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 2 Jul 2012 07:08:42 -0700 Subject: Revert "rcu: Move PREEMPT_RCU preemption to switch_to() invocation" This reverts commit 616c310e83b872024271c915c1b9ab505b9efad9. (Move PREEMPT_RCU preemption to switch_to() invocation). Testing by Sasha Levin showed that this can result in deadlock due to invoking the scheduler when one of the runqueue locks is held. Because this commit was simply a performance optimization, revert it. Reported-by: Sasha Levin Signed-off-by: Paul E. McKenney Tested-by: Sasha Levin --- arch/um/drivers/mconsole_kern.c | 1 - include/linux/rcupdate.h | 1 - include/linux/rcutiny.h | 6 ++++++ include/linux/sched.h | 10 ---------- kernel/rcutree.c | 1 + kernel/rcutree.h | 1 + kernel/rcutree_plugin.h | 14 +++++++++++--- kernel/sched/core.c | 1 - 8 files changed, 19 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 88e466b159dc..43b39d61b538 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -705,7 +705,6 @@ static void stack_proc(void *arg) struct task_struct *from = current, *to = arg; to->thread.saved_task = from; - rcu_switch_from(from); switch_to(from, to, from); } diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 26d1a47591f1..9cac722b169c 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -184,7 +184,6 @@ static inline int rcu_preempt_depth(void) /* Internal to kernel */ extern void rcu_sched_qs(int cpu); extern void rcu_bh_qs(int cpu); -extern void rcu_preempt_note_context_switch(void); extern void rcu_check_callbacks(int cpu, int user); struct notifier_block; extern void rcu_idle_enter(void); diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 854dc4c5c271..4e56a9c69a35 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -87,6 +87,10 @@ static inline void kfree_call_rcu(struct rcu_head *head, #ifdef CONFIG_TINY_RCU +static inline void rcu_preempt_note_context_switch(void) +{ +} + static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) { *delta_jiffies = ULONG_MAX; @@ -95,6 +99,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) #else /* #ifdef CONFIG_TINY_RCU */ +void rcu_preempt_note_context_switch(void); int rcu_preempt_needs_cpu(void); static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) @@ -108,6 +113,7 @@ static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) static inline void rcu_note_context_switch(int cpu) { rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(); } /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f07..06a4c5f4f55c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1871,22 +1871,12 @@ static inline void rcu_copy_process(struct task_struct *p) INIT_LIST_HEAD(&p->rcu_node_entry); } -static inline void rcu_switch_from(struct task_struct *prev) -{ - if (prev->rcu_read_lock_nesting != 0) - rcu_preempt_note_context_switch(); -} - #else static inline void rcu_copy_process(struct task_struct *p) { } -static inline void rcu_switch_from(struct task_struct *prev) -{ -} - #endif #ifdef CONFIG_SMP diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 38ecdda3f55f..4b97bba7396e 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -201,6 +201,7 @@ void rcu_note_context_switch(int cpu) { trace_rcu_utilization("Start context switch"); rcu_sched_qs(cpu); + rcu_preempt_note_context_switch(cpu); trace_rcu_utilization("End context switch"); } EXPORT_SYMBOL_GPL(rcu_note_context_switch); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index ea056495783e..19b61ac1079f 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -444,6 +444,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); long rcu_batches_completed(void); +static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 5271a020887e..3e4899459f3d 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -153,7 +153,7 @@ static void rcu_preempt_qs(int cpu) * * Caller must disable preemption. */ -void rcu_preempt_note_context_switch(void) +static void rcu_preempt_note_context_switch(int cpu) { struct task_struct *t = current; unsigned long flags; @@ -164,7 +164,7 @@ void rcu_preempt_note_context_switch(void) (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) { /* Possibly blocking in an RCU read-side critical section. */ - rdp = __this_cpu_ptr(rcu_preempt_state.rda); + rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu); rnp = rdp->mynode; raw_spin_lock_irqsave(&rnp->lock, flags); t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED; @@ -228,7 +228,7 @@ void rcu_preempt_note_context_switch(void) * means that we continue to block the current grace period. */ local_irq_save(flags); - rcu_preempt_qs(smp_processor_id()); + rcu_preempt_qs(cpu); local_irq_restore(flags); } @@ -1001,6 +1001,14 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); +/* + * Because preemptible RCU does not exist, we never have to check for + * CPUs being in quiescent states. + */ +static void rcu_preempt_note_context_switch(int cpu) +{ +} + /* * Because preemptible RCU does not exist, there are never any preempted * RCU readers. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d5594a4268d4..eaead2df6aa8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2081,7 +2081,6 @@ context_switch(struct rq *rq, struct task_struct *prev, #endif /* Here we just switch the register state and the stack. */ - rcu_switch_from(prev); switch_to(prev, next, prev); barrier(); -- cgit v1.2.3 From d4db2935e4fffeba42540b0dc9d85e3036701221 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 29 Jun 2012 09:56:08 -0600 Subject: KVM: Pass kvm_irqfd to functions Prune this down to just the struct kvm_irqfd so we can avoid changing function definition for every flag or field we use. Signed-off-by: Alex Williamson Acked-by: Cornelia Huck Signed-off-by: Marcelo Tosatti --- include/linux/kvm_host.h | 4 ++-- virt/kvm/eventfd.c | 20 ++++++++++---------- virt/kvm/kvm_main.c | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c4464356b35b..96c158a37d3e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -815,7 +815,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_EVENTFD void kvm_eventfd_init(struct kvm *kvm); -int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); +int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args); void kvm_irqfd_release(struct kvm *kvm); void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *); int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); @@ -824,7 +824,7 @@ int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); static inline void kvm_eventfd_init(struct kvm *kvm) {} -static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { return -EINVAL; } diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f59c1e8de7a2..c307c24c147a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -198,7 +198,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, } static int -kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) +kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { struct kvm_irq_routing_table *irq_rt; struct _irqfd *irqfd, *tmp; @@ -212,12 +212,12 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi) return -ENOMEM; irqfd->kvm = kvm; - irqfd->gsi = gsi; + irqfd->gsi = args->gsi; INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); - file = eventfd_fget(fd); + file = eventfd_fget(args->fd); if (IS_ERR(file)) { ret = PTR_ERR(file); goto fail; @@ -298,19 +298,19 @@ kvm_eventfd_init(struct kvm *kvm) * shutdown any irqfd's that match fd+gsi */ static int -kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) +kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { struct _irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; - eventfd = eventfd_ctx_fdget(fd); + eventfd = eventfd_ctx_fdget(args->fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd); spin_lock_irq(&kvm->irqfds.lock); list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { - if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) { + if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* * This rcu_assign_pointer is needed for when * another thread calls kvm_irq_routing_update before @@ -338,12 +338,12 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi) } int -kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) +kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) { - if (flags & KVM_IRQFD_FLAG_DEASSIGN) - return kvm_irqfd_deassign(kvm, fd, gsi); + if (args->flags & KVM_IRQFD_FLAG_DEASSIGN) + return kvm_irqfd_deassign(kvm, args); - return kvm_irqfd_assign(kvm, fd, gsi); + return kvm_irqfd_assign(kvm, args); } /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7e140683ff14..e98a5cac55c4 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2047,7 +2047,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&data, argp, sizeof data)) goto out; - r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); + r = kvm_irqfd(kvm, &data); break; } case KVM_IOEVENTFD: { -- cgit v1.2.3 From 5a081caa0414b9bbb82c17ffab9d6fe66edbb72f Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Wed, 6 Jun 2012 10:09:25 +0300 Subject: rpmsg: avoid premature deallocation of endpoints When an inbound message arrives, the rpmsg core looks up its associated endpoint and invokes the registered callback. If a message arrives while its endpoint is being removed (because the rpmsg driver was removed, or a recovery of a remote processor has kicked in) we must ensure atomicity, i.e.: - Either the ept is removed before it is found or - The ept is found but will not be freed until the callback returns This is achieved by maintaining a per-ept reference count, which, when drops to zero, will trigger deallocation of the ept. With this in hand, it is now forbidden to directly deallocate epts once they have been added to the endpoints idr. Cc: stable Reported-by: Fernando Guzman Lugo Signed-off-by: Ohad Ben-Cohen --- drivers/rpmsg/virtio_rpmsg_bus.c | 36 ++++++++++++++++++++++++++++++++++-- include/linux/rpmsg.h | 3 +++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 75506ec2840e..9623327ba509 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -188,6 +188,26 @@ static int rpmsg_uevent(struct device *dev, struct kobj_uevent_env *env) rpdev->id.name); } +/** + * __ept_release() - deallocate an rpmsg endpoint + * @kref: the ept's reference count + * + * This function deallocates an ept, and is invoked when its @kref refcount + * drops to zero. + * + * Never invoke this function directly! + */ +static void __ept_release(struct kref *kref) +{ + struct rpmsg_endpoint *ept = container_of(kref, struct rpmsg_endpoint, + refcount); + /* + * At this point no one holds a reference to ept anymore, + * so we can directly free it + */ + kfree(ept); +} + /* for more info, see below documentation of rpmsg_create_ept() */ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, struct rpmsg_channel *rpdev, rpmsg_rx_cb_t cb, @@ -206,6 +226,8 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, return NULL; } + kref_init(&ept->refcount); + ept->rpdev = rpdev; ept->cb = cb; ept->priv = priv; @@ -238,7 +260,7 @@ rem_idr: idr_remove(&vrp->endpoints, request); free_ept: mutex_unlock(&vrp->endpoints_lock); - kfree(ept); + kref_put(&ept->refcount, __ept_release); return NULL; } @@ -306,7 +328,7 @@ __rpmsg_destroy_ept(struct virtproc_info *vrp, struct rpmsg_endpoint *ept) idr_remove(&vrp->endpoints, ept->addr); mutex_unlock(&vrp->endpoints_lock); - kfree(ept); + kref_put(&ept->refcount, __ept_release); } /** @@ -790,7 +812,13 @@ static void rpmsg_recv_done(struct virtqueue *rvq) /* use the dst addr to fetch the callback of the appropriate user */ mutex_lock(&vrp->endpoints_lock); + ept = idr_find(&vrp->endpoints, msg->dst); + + /* let's make sure no one deallocates ept while we use it */ + if (ept) + kref_get(&ept->refcount); + mutex_unlock(&vrp->endpoints_lock); if (ept && ept->cb) @@ -798,6 +826,10 @@ static void rpmsg_recv_done(struct virtqueue *rvq) else dev_warn(dev, "msg received with no recepient\n"); + /* farewell, ept, we don't need you anymore */ + if (ept) + kref_put(&ept->refcount, __ept_release); + /* publish the real size of the buffer */ sg_init_one(&sg, msg, RPMSG_BUF_SIZE); diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index a8e50e44203c..195f373590b8 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -38,6 +38,7 @@ #include #include #include +#include /* The feature bitmap for virtio rpmsg */ #define VIRTIO_RPMSG_F_NS 0 /* RP supports name service notifications */ @@ -120,6 +121,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); /** * struct rpmsg_endpoint - binds a local rpmsg address to its user * @rpdev: rpmsg channel device + * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler * @addr: local rpmsg address * @priv: private data for the driver's use @@ -140,6 +142,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); */ struct rpmsg_endpoint { struct rpmsg_channel *rpdev; + struct kref refcount; rpmsg_rx_cb_t cb; u32 addr; void *priv; -- cgit v1.2.3 From 15fd943af50dbc5f7f4de33835795c72595f7bf4 Mon Sep 17 00:00:00 2001 From: Ohad Ben-Cohen Date: Thu, 7 Jun 2012 15:39:35 +0300 Subject: rpmsg: make sure inflight messages don't invoke just-removed callbacks When inbound messages arrive, rpmsg core looks up their associated endpoint (by destination address) and then invokes their callback. We've made sure that endpoints will never be de-allocated after they were found by rpmsg core, but we also need to protect against the (rare) scenario where the rpmsg driver was just removed, and its callback function isn't available anymore. This is achieved by introducing a callback mutex, which must be taken before the callback is invoked, and, obviously, before it is removed. Cc: stable Reported-by: Fernando Guzman Lugo Signed-off-by: Ohad Ben-Cohen --- drivers/rpmsg/virtio_rpmsg_bus.c | 25 +++++++++++++++++++------ include/linux/rpmsg.h | 3 +++ 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/rpmsg/virtio_rpmsg_bus.c b/drivers/rpmsg/virtio_rpmsg_bus.c index 9623327ba509..39d3aa41adda 100644 --- a/drivers/rpmsg/virtio_rpmsg_bus.c +++ b/drivers/rpmsg/virtio_rpmsg_bus.c @@ -227,6 +227,7 @@ static struct rpmsg_endpoint *__rpmsg_create_ept(struct virtproc_info *vrp, } kref_init(&ept->refcount); + mutex_init(&ept->cb_lock); ept->rpdev = rpdev; ept->cb = cb; @@ -324,10 +325,16 @@ EXPORT_SYMBOL(rpmsg_create_ept); static void __rpmsg_destroy_ept(struct virtproc_info *vrp, struct rpmsg_endpoint *ept) { + /* make sure new inbound messages can't find this ept anymore */ mutex_lock(&vrp->endpoints_lock); idr_remove(&vrp->endpoints, ept->addr); mutex_unlock(&vrp->endpoints_lock); + /* make sure in-flight inbound messages won't invoke cb anymore */ + mutex_lock(&ept->cb_lock); + ept->cb = NULL; + mutex_unlock(&ept->cb_lock); + kref_put(&ept->refcount, __ept_release); } @@ -821,14 +828,20 @@ static void rpmsg_recv_done(struct virtqueue *rvq) mutex_unlock(&vrp->endpoints_lock); - if (ept && ept->cb) - ept->cb(ept->rpdev, msg->data, msg->len, ept->priv, msg->src); - else - dev_warn(dev, "msg received with no recepient\n"); + if (ept) { + /* make sure ept->cb doesn't go away while we use it */ + mutex_lock(&ept->cb_lock); - /* farewell, ept, we don't need you anymore */ - if (ept) + if (ept->cb) + ept->cb(ept->rpdev, msg->data, msg->len, ept->priv, + msg->src); + + mutex_unlock(&ept->cb_lock); + + /* farewell, ept, we don't need you anymore */ kref_put(&ept->refcount, __ept_release); + } else + dev_warn(dev, "msg received with no recepient\n"); /* publish the real size of the buffer */ sg_init_one(&sg, msg, RPMSG_BUF_SIZE); diff --git a/include/linux/rpmsg.h b/include/linux/rpmsg.h index 195f373590b8..82a673905edb 100644 --- a/include/linux/rpmsg.h +++ b/include/linux/rpmsg.h @@ -39,6 +39,7 @@ #include #include #include +#include /* The feature bitmap for virtio rpmsg */ #define VIRTIO_RPMSG_F_NS 0 /* RP supports name service notifications */ @@ -123,6 +124,7 @@ typedef void (*rpmsg_rx_cb_t)(struct rpmsg_channel *, void *, int, void *, u32); * @rpdev: rpmsg channel device * @refcount: when this drops to zero, the ept is deallocated * @cb: rx callback handler + * @cb_lock: must be taken before accessing/changing @cb * @addr: local rpmsg address * @priv: private data for the driver's use * @@ -144,6 +146,7 @@ struct rpmsg_endpoint { struct rpmsg_channel *rpdev; struct kref refcount; rpmsg_rx_cb_t cb; + struct mutex cb_lock; u32 addr; void *priv; }; -- cgit v1.2.3 From 2dfd06036ba7ae8e7be2daf5a2fff1dac42390bf Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 27 Jun 2012 17:09:54 +0800 Subject: aio: make kiocb->private NUll in init_sync_kiocb() Ocfs2 uses kiocb.*private as a flag of unsigned long size. In commit a11f7e6 ocfs2: serialize unaligned aio, the unaligned io flag is involved in it to serialize the unaligned aio. As *private is not initialized in init_sync_kiocb() of do_sync_write(), this unaligned io flag may be unexpectly set in an aligned dio. And this will cause OCFS2_I(inode)->ip_unaligned_aio decreased to -1 in ocfs2_dio_end_io(), thus the following unaligned dio will hang forever at ocfs2_aiodio_wait() in ocfs2_file_aio_write(). Signed-off-by: Junxiao Bi Cc: stable@vger.kernel.org Acked-by: Jeff Moyer Signed-off-by: Joel Becker --- include/linux/aio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/aio.h b/include/linux/aio.h index 2314ad8b3c9c..b1a520ec8b59 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -140,6 +140,7 @@ struct kiocb { (x)->ki_dtor = NULL; \ (x)->ki_obj.tsk = tsk; \ (x)->ki_user_data = 0; \ + (x)->private = NULL; \ } while (0) #define AIO_RING_MAGIC 0xa10a10a1 -- cgit v1.2.3 From f567fde24640cf6f2d6416196bfc8b3fefc8e433 Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Wed, 20 Jun 2012 14:14:05 +0530 Subject: gpio: fix bits conflict for gpio flags The bit 2 and 3 in GPIO flag are allocated for the flag OPEN_DRAIN/OPEN_SOURCE. These bits are reused for the flag EXPORT/EXPORT_CHANGEABLE and so creating conflict. Fix this conflict by assigning bit 4 and 5 for the flag EXPORT/EXPORT_CHANGEABLE. Signed-off-by: Laxman Dewangan Signed-off-by: Linus Walleij --- include/linux/gpio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index f07fc2d08159..2e31e8b3a190 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -22,8 +22,8 @@ /* Gpio pin is open source */ #define GPIOF_OPEN_SOURCE (1 << 3) -#define GPIOF_EXPORT (1 << 2) -#define GPIOF_EXPORT_CHANGEABLE (1 << 3) +#define GPIOF_EXPORT (1 << 4) +#define GPIOF_EXPORT_CHANGEABLE (1 << 5) #define GPIOF_EXPORT_DIR_FIXED (GPIOF_EXPORT) #define GPIOF_EXPORT_DIR_CHANGEABLE (GPIOF_EXPORT | GPIOF_EXPORT_CHANGEABLE) -- cgit v1.2.3 From 5167e8d5417bf5c322a703d2927daec727ea40dd Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jun 2012 15:52:09 +0200 Subject: sched/nohz: Rewrite and fix load-avg computation -- again Thanks to Charles Wang for spotting the defects in the current code: - If we go idle during the sample window -- after sampling, we get a negative bias because we can negate our own sample. - If we wake up during the sample window we get a positive bias because we push the sample to a known active period. So rewrite the entire nohz load-avg muck once again, now adding copious documentation to the code. Reported-and-tested-by: Doug Smythies Reported-and-tested-by: Charles Wang Signed-off-by: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1340373782.18025.74.camel@twins [ minor edits ] Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++ kernel/sched/core.c | 275 ++++++++++++++++++++++++++++++++++------------- kernel/sched/idle_task.c | 1 - kernel/sched/sched.h | 2 - kernel/time/tick-sched.c | 2 + 5 files changed, 213 insertions(+), 75 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f33f07..20cb7497c59c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1909,6 +1909,14 @@ static inline int set_cpus_allowed_ptr(struct task_struct *p, } #endif +#ifdef CONFIG_NO_HZ +void calc_load_enter_idle(void); +void calc_load_exit_idle(void); +#else +static inline void calc_load_enter_idle(void) { } +static inline void calc_load_exit_idle(void) { } +#endif /* CONFIG_NO_HZ */ + #ifndef CONFIG_CPUMASK_OFFSTACK static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index d5594a4268d4..bb840405335d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2161,11 +2161,73 @@ unsigned long this_cpu_load(void) } +/* + * Global load-average calculations + * + * We take a distributed and async approach to calculating the global load-avg + * in order to minimize overhead. + * + * The global load average is an exponentially decaying average of nr_running + + * nr_uninterruptible. + * + * Once every LOAD_FREQ: + * + * nr_active = 0; + * for_each_possible_cpu(cpu) + * nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible; + * + * avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n) + * + * Due to a number of reasons the above turns in the mess below: + * + * - for_each_possible_cpu() is prohibitively expensive on machines with + * serious number of cpus, therefore we need to take a distributed approach + * to calculating nr_active. + * + * \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0 + * = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) } + * + * So assuming nr_active := 0 when we start out -- true per definition, we + * can simply take per-cpu deltas and fold those into a global accumulate + * to obtain the same result. See calc_load_fold_active(). + * + * Furthermore, in order to avoid synchronizing all per-cpu delta folding + * across the machine, we assume 10 ticks is sufficient time for every + * cpu to have completed this task. + * + * This places an upper-bound on the IRQ-off latency of the machine. Then + * again, being late doesn't loose the delta, just wrecks the sample. + * + * - cpu_rq()->nr_uninterruptible isn't accurately tracked per-cpu because + * this would add another cross-cpu cacheline miss and atomic operation + * to the wakeup path. Instead we increment on whatever cpu the task ran + * when it went into uninterruptible state and decrement on whatever cpu + * did the wakeup. This means that only the sum of nr_uninterruptible over + * all cpus yields the correct result. + * + * This covers the NO_HZ=n code, for extra head-aches, see the comment below. + */ + /* Variables and functions for calc_load */ static atomic_long_t calc_load_tasks; static unsigned long calc_load_update; unsigned long avenrun[3]; -EXPORT_SYMBOL(avenrun); +EXPORT_SYMBOL(avenrun); /* should be removed */ + +/** + * get_avenrun - get the load average array + * @loads: pointer to dest load array + * @offset: offset to add + * @shift: shift count to shift the result left + * + * These values are estimates at best, so no need for locking. + */ +void get_avenrun(unsigned long *loads, unsigned long offset, int shift) +{ + loads[0] = (avenrun[0] + offset) << shift; + loads[1] = (avenrun[1] + offset) << shift; + loads[2] = (avenrun[2] + offset) << shift; +} static long calc_load_fold_active(struct rq *this_rq) { @@ -2182,6 +2244,9 @@ static long calc_load_fold_active(struct rq *this_rq) return delta; } +/* + * a1 = a0 * e + a * (1 - e) + */ static unsigned long calc_load(unsigned long load, unsigned long exp, unsigned long active) { @@ -2193,30 +2258,118 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active) #ifdef CONFIG_NO_HZ /* - * For NO_HZ we delay the active fold to the next LOAD_FREQ update. + * Handle NO_HZ for the global load-average. + * + * Since the above described distributed algorithm to compute the global + * load-average relies on per-cpu sampling from the tick, it is affected by + * NO_HZ. + * + * The basic idea is to fold the nr_active delta into a global idle-delta upon + * entering NO_HZ state such that we can include this as an 'extra' cpu delta + * when we read the global state. + * + * Obviously reality has to ruin such a delightfully simple scheme: + * + * - When we go NO_HZ idle during the window, we can negate our sample + * contribution, causing under-accounting. + * + * We avoid this by keeping two idle-delta counters and flipping them + * when the window starts, thus separating old and new NO_HZ load. + * + * The only trick is the slight shift in index flip for read vs write. + * + * 0s 5s 10s 15s + * +10 +10 +10 +10 + * |-|-----------|-|-----------|-|-----------|-| + * r:0 0 1 1 0 0 1 1 0 + * w:0 1 1 0 0 1 1 0 0 + * + * This ensures we'll fold the old idle contribution in this window while + * accumlating the new one. + * + * - When we wake up from NO_HZ idle during the window, we push up our + * contribution, since we effectively move our sample point to a known + * busy state. + * + * This is solved by pushing the window forward, and thus skipping the + * sample, for this cpu (effectively using the idle-delta for this cpu which + * was in effect at the time the window opened). This also solves the issue + * of having to deal with a cpu having been in NOHZ idle for multiple + * LOAD_FREQ intervals. * * When making the ILB scale, we should try to pull this in as well. */ -static atomic_long_t calc_load_tasks_idle; +static atomic_long_t calc_load_idle[2]; +static int calc_load_idx; -void calc_load_account_idle(struct rq *this_rq) +static inline int calc_load_write_idx(void) { + int idx = calc_load_idx; + + /* + * See calc_global_nohz(), if we observe the new index, we also + * need to observe the new update time. + */ + smp_rmb(); + + /* + * If the folding window started, make sure we start writing in the + * next idle-delta. + */ + if (!time_before(jiffies, calc_load_update)) + idx++; + + return idx & 1; +} + +static inline int calc_load_read_idx(void) +{ + return calc_load_idx & 1; +} + +void calc_load_enter_idle(void) +{ + struct rq *this_rq = this_rq(); long delta; + /* + * We're going into NOHZ mode, if there's any pending delta, fold it + * into the pending idle delta. + */ delta = calc_load_fold_active(this_rq); - if (delta) - atomic_long_add(delta, &calc_load_tasks_idle); + if (delta) { + int idx = calc_load_write_idx(); + atomic_long_add(delta, &calc_load_idle[idx]); + } } -static long calc_load_fold_idle(void) +void calc_load_exit_idle(void) { - long delta = 0; + struct rq *this_rq = this_rq(); + + /* + * If we're still before the sample window, we're done. + */ + if (time_before(jiffies, this_rq->calc_load_update)) + return; /* - * Its got a race, we don't care... + * We woke inside or after the sample window, this means we're already + * accounted through the nohz accounting, so skip the entire deal and + * sync up for the next window. */ - if (atomic_long_read(&calc_load_tasks_idle)) - delta = atomic_long_xchg(&calc_load_tasks_idle, 0); + this_rq->calc_load_update = calc_load_update; + if (time_before(jiffies, this_rq->calc_load_update + 10)) + this_rq->calc_load_update += LOAD_FREQ; +} + +static long calc_load_fold_idle(void) +{ + int idx = calc_load_read_idx(); + long delta = 0; + + if (atomic_long_read(&calc_load_idle[idx])) + delta = atomic_long_xchg(&calc_load_idle[idx], 0); return delta; } @@ -2302,66 +2455,39 @@ static void calc_global_nohz(void) { long delta, active, n; - /* - * If we crossed a calc_load_update boundary, make sure to fold - * any pending idle changes, the respective CPUs might have - * missed the tick driven calc_load_account_active() update - * due to NO_HZ. - */ - delta = calc_load_fold_idle(); - if (delta) - atomic_long_add(delta, &calc_load_tasks); - - /* - * It could be the one fold was all it took, we done! - */ - if (time_before(jiffies, calc_load_update + 10)) - return; - - /* - * Catch-up, fold however many we are behind still - */ - delta = jiffies - calc_load_update - 10; - n = 1 + (delta / LOAD_FREQ); + if (!time_before(jiffies, calc_load_update + 10)) { + /* + * Catch-up, fold however many we are behind still + */ + delta = jiffies - calc_load_update - 10; + n = 1 + (delta / LOAD_FREQ); - active = atomic_long_read(&calc_load_tasks); - active = active > 0 ? active * FIXED_1 : 0; + active = atomic_long_read(&calc_load_tasks); + active = active > 0 ? active * FIXED_1 : 0; - avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); - avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); - avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); + avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n); + avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n); + avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n); - calc_load_update += n * LOAD_FREQ; -} -#else -void calc_load_account_idle(struct rq *this_rq) -{ -} + calc_load_update += n * LOAD_FREQ; + } -static inline long calc_load_fold_idle(void) -{ - return 0; + /* + * Flip the idle index... + * + * Make sure we first write the new time then flip the index, so that + * calc_load_write_idx() will see the new time when it reads the new + * index, this avoids a double flip messing things up. + */ + smp_wmb(); + calc_load_idx++; } +#else /* !CONFIG_NO_HZ */ -static void calc_global_nohz(void) -{ -} -#endif +static inline long calc_load_fold_idle(void) { return 0; } +static inline void calc_global_nohz(void) { } -/** - * get_avenrun - get the load average array - * @loads: pointer to dest load array - * @offset: offset to add - * @shift: shift count to shift the result left - * - * These values are estimates at best, so no need for locking. - */ -void get_avenrun(unsigned long *loads, unsigned long offset, int shift) -{ - loads[0] = (avenrun[0] + offset) << shift; - loads[1] = (avenrun[1] + offset) << shift; - loads[2] = (avenrun[2] + offset) << shift; -} +#endif /* CONFIG_NO_HZ */ /* * calc_load - update the avenrun load estimates 10 ticks after the @@ -2369,11 +2495,18 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift) */ void calc_global_load(unsigned long ticks) { - long active; + long active, delta; if (time_before(jiffies, calc_load_update + 10)) return; + /* + * Fold the 'old' idle-delta to include all NO_HZ cpus. + */ + delta = calc_load_fold_idle(); + if (delta) + atomic_long_add(delta, &calc_load_tasks); + active = atomic_long_read(&calc_load_tasks); active = active > 0 ? active * FIXED_1 : 0; @@ -2384,12 +2517,7 @@ void calc_global_load(unsigned long ticks) calc_load_update += LOAD_FREQ; /* - * Account one period with whatever state we found before - * folding in the nohz state and ageing the entire idle period. - * - * This avoids loosing a sample when we go idle between - * calc_load_account_active() (10 ticks ago) and now and thus - * under-accounting. + * In case we idled for multiple LOAD_FREQ intervals, catch up in bulk. */ calc_global_nohz(); } @@ -2406,13 +2534,16 @@ static void calc_load_account_active(struct rq *this_rq) return; delta = calc_load_fold_active(this_rq); - delta += calc_load_fold_idle(); if (delta) atomic_long_add(delta, &calc_load_tasks); this_rq->calc_load_update += LOAD_FREQ; } +/* + * End of global load-average stuff + */ + /* * The exact cpuload at various idx values, calculated at every tick would be * load = (2^idx - 1) / 2^idx * load + 1 / 2^idx * cur_load diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index b44d604b35d1..b6baf370cae9 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -25,7 +25,6 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl static struct task_struct *pick_next_task_idle(struct rq *rq) { schedstat_inc(rq, sched_goidle); - calc_load_account_idle(rq); return rq->idle; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6d52cea7f33d..55844f24435a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -942,8 +942,6 @@ static inline u64 sched_avg_period(void) return (u64)sysctl_sched_time_avg * NSEC_PER_MSEC / 2; } -void calc_load_account_idle(struct rq *this_rq); - #ifdef CONFIG_SCHED_HRTICK /* diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 869997833928..4a08472c3ca7 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -406,6 +406,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts) */ if (!ts->tick_stopped) { select_nohz_load_balancer(1); + calc_load_enter_idle(); ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -597,6 +598,7 @@ void tick_nohz_idle_exit(void) account_idle_ticks(ticks); #endif + calc_load_exit_idle(); touch_softlockup_watchdog(); /* * Cancel the scheduled timer and restore the tick -- cgit v1.2.3 From c540521bba5d2f24bd2c0417157bfaf8b85e2eee Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 5 Jul 2012 11:23:24 -0700 Subject: security: Minor improvements to no_new_privs documentation The documentation didn't actually mention how to enable no_new_privs. This also adds a note about possible interactions between no_new_privs and LSMs (i.e. why teaching systemd to set no_new_privs is not necessarily a good idea), and it references the new docs from include/linux/prctl.h. Suggested-by: Rob Landley Signed-off-by: Andy Lutomirski Acked-by: Kees Cook Signed-off-by: James Morris --- Documentation/prctl/no_new_privs.txt | 7 +++++++ include/linux/prctl.h | 2 ++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/Documentation/prctl/no_new_privs.txt b/Documentation/prctl/no_new_privs.txt index cb705ec69abe..f7be84fba910 100644 --- a/Documentation/prctl/no_new_privs.txt +++ b/Documentation/prctl/no_new_privs.txt @@ -25,6 +25,13 @@ bits will no longer change the uid or gid; file capabilities will not add to the permitted set, and LSMs will not relax constraints after execve. +To set no_new_privs, use prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0). + +Be careful, though: LSMs might also not tighten constraints on exec +in no_new_privs mode. (This means that setting up a general-purpose +service launcher to set no_new_privs before execing daemons may +interfere with LSM-based sandboxing.) + Note that no_new_privs does not prevent privilege changes that do not involve execve. An appropriately privileged task can still call setuid(2) and receive SCM_RIGHTS datagrams. diff --git a/include/linux/prctl.h b/include/linux/prctl.h index 3988012255dc..289760f424aa 100644 --- a/include/linux/prctl.h +++ b/include/linux/prctl.h @@ -141,6 +141,8 @@ * Changing LSM security domain is considered a new privilege. So, for example, * asking selinux for a specific new context (e.g. with runcon) will result * in execve returning -EPERM. + * + * See Documentation/prctl/no_new_privs.txt for more details. */ #define PR_SET_NO_NEW_PRIVS 38 #define PR_GET_NO_NEW_PRIVS 39 -- cgit v1.2.3 From dbf0e4c7257f8d684ec1a3c919853464293de66e Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Mon, 9 Jul 2012 11:09:21 -0400 Subject: PCI: EHCI: fix crash during suspend on ASUS computers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quite a few ASUS computers experience a nasty problem, related to the EHCI controllers, when going into system suspend. It was observed that the problem didn't occur if the controllers were not put into the D3 power state before starting the suspend, and commit 151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during suspend on ASUS computers) was created to do this. It turned out this approach messed up other computers that didn't have the problem -- it prevented USB wakeup from working. Consequently commit c2fb8a3fa25513de8fedb38509b1f15a5bbee47b (USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2) was merged; it reverted the earlier commit and added a whitelist of known good board names. Now we know the actual cause of the problem. Thanks to AceLan Kao for tracking it down. According to him, an engineer at ASUS explained that some of their BIOSes contain a bug that was added in an attempt to work around a problem in early versions of Windows. When the computer goes into S3 suspend, the BIOS tries to verify that the EHCI controllers were first quiesced by the OS. Nothing's wrong with this, but the BIOS does it by checking that the PCI COMMAND registers contain 0 without checking the controllers' power state. If the register isn't 0, the BIOS assumes the controller needs to be quiesced and tries to do so. This involves making various MMIO accesses to the controller, which don't work very well if the controller is already in D3. The end result is a system hang or memory corruption. Since the value in the PCI COMMAND register doesn't matter once the controller has been suspended, and since the value will be restored anyway when the controller is resumed, we can work around the BIOS bug simply by setting the register to 0 during system suspend. This patch (as1590) does so and also reverts the second commit mentioned above, which is now unnecessary. In theory we could do this for every PCI device. However to avoid introducing new problems, the patch restricts itself to EHCI host controllers. Finally the affected systems can suspend with USB wakeup working properly. Reference: https://bugzilla.kernel.org/show_bug.cgi?id=37632 Reference: https://bugzilla.kernel.org/show_bug.cgi?id=42728 Based-on-patch-by: AceLan Kao Signed-off-by: Alan Stern Tested-by: Dâniel Fraga Tested-by: Javier Marcet Tested-by: Andrey Rahmatullin Tested-by: Oleksij Rempel Tested-by: Pavel Pisa Cc: stable Acked-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 12 ++++++++++++ drivers/pci/pci.c | 5 ----- drivers/pci/quirks.c | 26 -------------------------- include/linux/pci.h | 2 -- 4 files changed, 12 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index bf0cee629b60..099f46cd8e87 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -748,6 +748,18 @@ static int pci_pm_suspend_noirq(struct device *dev) pci_pm_set_unknown_state(pci_dev); + /* + * Some BIOSes from ASUS have a bug: If a USB EHCI host controller's + * PCI COMMAND register isn't 0, the BIOS assumes that the controller + * hasn't been quiesced and tries to turn it off. If the controller + * is already in D3, this can hang or cause memory corruption. + * + * Since the value of the COMMAND register doesn't matter once the + * device has been suspended, we can safely set it to 0 here. + */ + if (pci_dev->class == PCI_CLASS_SERIAL_USB_EHCI) + pci_write_config_word(pci_dev, PCI_COMMAND, 0); + return 0; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 77cb54a65cde..447e83472c01 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1744,11 +1744,6 @@ int pci_prepare_to_sleep(struct pci_dev *dev) if (target_state == PCI_POWER_ERROR) return -EIO; - /* Some devices mustn't be in D3 during system sleep */ - if (target_state == PCI_D3hot && - (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)) - return 0; - pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev)); error = pci_set_power_state(dev, target_state); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 194b243a2817..2a7521677541 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2929,32 +2929,6 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq); -/* - * The Intel 6 Series/C200 Series chipset's EHCI controllers on many - * ASUS motherboards will cause memory corruption or a system crash - * if they are in D3 while the system is put into S3 sleep. - */ -static void __devinit asus_ehci_no_d3(struct pci_dev *dev) -{ - const char *sys_info; - static const char good_Asus_board[] = "P8Z68-V"; - - if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP) - return; - if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK) - return; - sys_info = dmi_get_system_info(DMI_BOARD_NAME); - if (sys_info && memcmp(sys_info, good_Asus_board, - sizeof(good_Asus_board) - 1) == 0) - return; - - dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n"); - dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP; - device_set_wakeup_capable(&dev->dev, false); -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3); - static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) { diff --git a/include/linux/pci.h b/include/linux/pci.h index fefb4e19bf6a..d8c379dba6ad 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -176,8 +176,6 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2, /* Provide indication device is assigned by a Virtual Machine Manager */ PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4, - /* Device causes system crash if in D3 during S3 sleep */ - PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8, }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From f55a6faa384304c89cfef162768e88374d3312cb Mon Sep 17 00:00:00 2001 From: John Stultz Date: Tue, 10 Jul 2012 18:43:19 -0400 Subject: hrtimer: Provide clock_was_set_delayed() clock_was_set() cannot be called from hard interrupt context because it calls on_each_cpu(). For fixing the widely reported leap seconds issue it is necessary to call it from hard interrupt context, i.e. the timer tick code, which does the timekeeping updates. Provide a new function which denotes it in the hrtimer cpu base structure of the cpu on which it is called and raise the hrtimer softirq. We then execute the clock_was_set() notificiation from softirq context in run_hrtimer_softirq(). The hrtimer softirq is rarely used, so polling the flag there is not a performance issue. [ tglx: Made it depend on CONFIG_HIGH_RES_TIMERS. We really should get rid of all this ifdeffery ASAP ] Signed-off-by: John Stultz Reported-by: Jan Engelhardt Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1341960205-56738-2-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 9 ++++++++- kernel/hrtimer.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index fd0dc30c9f15..c9ec9400ee5b 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -165,6 +165,7 @@ enum hrtimer_base_type { * @lock: lock protecting the base and associated clock bases * and timers * @active_bases: Bitfield to mark bases with active timers + * @clock_was_set: Indicates that clock was set from irq context. * @expires_next: absolute time of the next event which was scheduled * via clock_set_next_event() * @hres_active: State of high resolution mode @@ -177,7 +178,8 @@ enum hrtimer_base_type { */ struct hrtimer_cpu_base { raw_spinlock_t lock; - unsigned long active_bases; + unsigned int active_bases; + unsigned int clock_was_set; #ifdef CONFIG_HIGH_RES_TIMERS ktime_t expires_next; int hres_active; @@ -286,6 +288,8 @@ extern void hrtimer_peek_ahead_timers(void); # define MONOTONIC_RES_NSEC HIGH_RES_NSEC # define KTIME_MONOTONIC_RES KTIME_HIGH_RES +extern void clock_was_set_delayed(void); + #else # define MONOTONIC_RES_NSEC LOW_RES_NSEC @@ -306,6 +310,9 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; } + +static inline void clock_was_set_delayed(void) { } + #endif extern void clock_was_set(void); diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index ae34bf51682b..3c24fb2c25c8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -717,6 +717,19 @@ static int hrtimer_switch_to_hres(void) return 1; } +/* + * Called from timekeeping code to reprogramm the hrtimer interrupt + * device. If called from the timer interrupt context we defer it to + * softirq context. + */ +void clock_was_set_delayed(void) +{ + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + cpu_base->clock_was_set = 1; + __raise_softirq_irqoff(HRTIMER_SOFTIRQ); +} + #else static inline int hrtimer_hres_active(void) { return 0; } @@ -1395,6 +1408,13 @@ void hrtimer_peek_ahead_timers(void) static void run_hrtimer_softirq(struct softirq_action *h) { + struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); + + if (cpu_base->clock_was_set) { + cpu_base->clock_was_set = 0; + clock_was_set(); + } + hrtimer_peek_ahead_timers(); } -- cgit v1.2.3 From f6c06abfb3972ad4914cef57d8348fcb2932bc3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 10 Jul 2012 18:43:24 -0400 Subject: timekeeping: Provide hrtimer update function To finally fix the infamous leap second issue and other race windows caused by functions which change the offsets between the various time bases (CLOCK_MONOTONIC, CLOCK_REALTIME and CLOCK_BOOTTIME) we need a function which atomically gets the current monotonic time and updates the offsets of CLOCK_REALTIME and CLOCK_BOOTTIME with minimalistic overhead. The previous patch which provides ktime_t offsets allows us to make this function almost as cheap as ktime_get() which is going to be replaced in hrtimer_interrupt(). Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Acked-by: Peter Zijlstra Acked-by: Prarit Bhargava Cc: stable@vger.kernel.org Signed-off-by: John Stultz Link: http://lkml.kernel.org/r/1341960205-56738-7-git-send-email-johnstul@us.ibm.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 1 + kernel/time/timekeeping.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index c9ec9400ee5b..cc07d2777bbe 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -327,6 +327,7 @@ extern ktime_t ktime_get(void); extern ktime_t ktime_get_real(void); extern ktime_t ktime_get_boottime(void); extern ktime_t ktime_get_monotonic_offset(void); +extern ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot); DECLARE_PER_CPU(struct tick_device, tick_cpu_device); diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 1c038dac71a2..269b1fe5f2ae 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1271,6 +1271,40 @@ void get_xtime_and_monotonic_and_sleep_offset(struct timespec *xtim, } while (read_seqretry(&timekeeper.lock, seq)); } +#ifdef CONFIG_HIGH_RES_TIMERS +/** + * ktime_get_update_offsets - hrtimer helper + * @offs_real: pointer to storage for monotonic -> realtime offset + * @offs_boot: pointer to storage for monotonic -> boottime offset + * + * Returns current monotonic time and updates the offsets + * Called from hrtimer_interupt() or retrigger_next_event() + */ +ktime_t ktime_get_update_offsets(ktime_t *offs_real, ktime_t *offs_boot) +{ + ktime_t now; + unsigned int seq; + u64 secs, nsecs; + + do { + seq = read_seqbegin(&timekeeper.lock); + + secs = timekeeper.xtime.tv_sec; + nsecs = timekeeper.xtime.tv_nsec; + nsecs += timekeeping_get_ns(); + /* If arch requires, add in gettimeoffset() */ + nsecs += arch_gettimeoffset(); + + *offs_real = timekeeper.offs_real; + *offs_boot = timekeeper.offs_boot; + } while (read_seqretry(&timekeeper.lock, seq)); + + now = ktime_add_ns(ktime_set(secs, 0), nsecs); + now = ktime_sub(now, *offs_real); + return now; +} +#endif + /** * ktime_get_monotonic_offset() - get wall_to_monotonic in ktime_t format */ -- cgit v1.2.3 From d8adde17e5f858427504725218c56aef90e90fc7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 11 Jul 2012 14:01:52 -0700 Subject: memory hotplug: fix invalid memory access caused by stale kswapd pointer kswapd_stop() is called to destroy the kswapd work thread when all memory of a NUMA node has been offlined. But kswapd_stop() only terminates the work thread without resetting NODE_DATA(nid)->kswapd to NULL. The stale pointer will prevent kswapd_run() from creating a new work thread when adding memory to the memory-less NUMA node again. Eventually the stale pointer may cause invalid memory access. An example stack dump as below. It's reproduced with 2.6.32, but latest kernel has the same issue. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] exit_creds+0x12/0x78 PGD 0 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/system/memory/memory391/state CPU 11 Modules linked in: cpufreq_conservative cpufreq_userspace cpufreq_powersave acpi_cpufreq microcode fuse loop dm_mod tpm_tis rtc_cmos i2c_i801 rtc_core tpm serio_raw pcspkr sg tpm_bios igb i2c_core iTCO_wdt rtc_lib mptctl iTCO_vendor_support button dca bnx2 usbhid hid uhci_hcd ehci_hcd usbcore sd_mod crc_t10dif edd ext3 mbcache jbd fan ide_pci_generic ide_core ata_generic ata_piix libata thermal processor thermal_sys hwmon mptsas mptscsih mptbase scsi_transport_sas scsi_mod Pid: 7949, comm: sh Not tainted 2.6.32.12-qiuxishi-5-default #92 Tecal RH2285 RIP: 0010:exit_creds+0x12/0x78 RSP: 0018:ffff8806044f1d78 EFLAGS: 00010202 RAX: 0000000000000000 RBX: ffff880604f22140 RCX: 0000000000019502 RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000 RBP: ffff880604f22150 R08: 0000000000000000 R09: ffffffff81a4dc10 R10: 00000000000032a0 R11: ffff880006202500 R12: 0000000000000000 R13: 0000000000c40000 R14: 0000000000008000 R15: 0000000000000001 FS: 00007fbc03d066f0(0000) GS:ffff8800282e0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 000000060f029000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process sh (pid: 7949, threadinfo ffff8806044f0000, task ffff880603d7c600) Stack: ffff880604f22140 ffffffff8103aac5 ffff880604f22140 ffffffff8104d21e ffff880006202500 0000000000008000 0000000000c38000 ffffffff810bd5b1 0000000000000000 ffff880603d7c600 00000000ffffdd29 0000000000000003 Call Trace: __put_task_struct+0x5d/0x97 kthread_stop+0x50/0x58 offline_pages+0x324/0x3da memory_block_change_state+0x179/0x1db store_mem_state+0x9e/0xbb sysfs_write_file+0xd0/0x107 vfs_write+0xad/0x169 sys_write+0x45/0x6e system_call_fastpath+0x16/0x1b Code: ff 4d 00 0f 94 c0 84 c0 74 08 48 89 ef e8 1f fd ff ff 5b 5d 31 c0 41 5c c3 53 48 8b 87 20 06 00 00 48 89 fb 48 8b bf 18 06 00 00 <8b> 00 48 c7 83 18 06 00 00 00 00 00 00 f0 ff 0f 0f 94 c0 84 c0 RIP exit_creds+0x12/0x78 RSP CR2: 0000000000000000 [akpm@linux-foundation.org: add pglist_data.kswapd locking comments] Signed-off-by: Xishi Qiu Signed-off-by: Jiang Liu Acked-by: KAMEZAWA Hiroyuki Acked-by: KOSAKI Motohiro Acked-by: Mel Gorman Acked-by: David Rientjes Reviewed-by: Minchan Kim Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- mm/vmscan.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2427706f78b4..68c569fcbb66 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -694,7 +694,7 @@ typedef struct pglist_data { range, including holes */ int node_id; wait_queue_head_t kswapd_wait; - struct task_struct *kswapd; + struct task_struct *kswapd; /* Protected by lock_memory_hotplug() */ int kswapd_max_order; enum zone_type classzone_idx; } pg_data_t; diff --git a/mm/vmscan.c b/mm/vmscan.c index eeb3bc9d1d36..661576324c7f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2955,14 +2955,17 @@ int kswapd_run(int nid) } /* - * Called by memory hotplug when all memory in a node is offlined. + * Called by memory hotplug when all memory in a node is offlined. Caller must + * hold lock_memory_hotplug(). */ void kswapd_stop(int nid) { struct task_struct *kswapd = NODE_DATA(nid)->kswapd; - if (kswapd) + if (kswapd) { kthread_stop(kswapd); + NODE_DATA(nid)->kswapd = NULL; + } } static int __init kswapd_init(void) -- cgit v1.2.3 From 99ab7b19440a72ebdf225f99b20f8ef40decee86 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:53 -0700 Subject: mm: sparse: fix usemap allocation above node descriptor section After commit f5bf18fa22f8 ("bootmem/sparsemem: remove limit constraint in alloc_bootmem_section"), usemap allocations may easily be placed outside the optimal section that holds the node descriptor, even if there is space available in that section. This results in unnecessary hotplug dependencies that need to have the node unplugged before the section holding the usemap. The reason is that the bootmem allocator doesn't guarantee a linear search starting from the passed allocation goal but may start out at a much higher address absent an upper limit. Fix this by trying the allocation with the limit at the section end, then retry without if that fails. This keeps the fix from f5bf18fa22f8 of not panicking if the allocation does not fit in the section, but still makes sure to try to stay within the section at first. Signed-off-by: Yinghai Lu Signed-off-by: Johannes Weiner Cc: [3.3.x, 3.4.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 5 +++++ mm/bootmem.c | 2 +- mm/nobootmem.c | 2 +- mm/sparse.c | 18 +++++++++++++----- 4 files changed, 20 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 324fe08ea3b1..6d6795d46a75 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -91,6 +91,11 @@ extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal); +void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat, + unsigned long size, + unsigned long align, + unsigned long goal, + unsigned long limit); extern void *__alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal); diff --git a/mm/bootmem.c b/mm/bootmem.c index ec4fcb7a56c8..73096630cb35 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { diff --git a/mm/nobootmem.c b/mm/nobootmem.c index d23415c001bc..0900b3910cda 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -274,7 +274,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align, return ___alloc_bootmem(size, align, goal, limit); } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, diff --git a/mm/sparse.c b/mm/sparse.c index e861397016a9..c7bb952400c8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -275,8 +275,9 @@ static unsigned long * __init sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, unsigned long size) { - pg_data_t *host_pgdat; - unsigned long goal; + unsigned long goal, limit; + unsigned long *p; + int nid; /* * A page may contain usemaps for other sections preventing the * page being freed and making a section unremovable while @@ -288,9 +289,16 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, * this problem. */ goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); - host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); - return __alloc_bootmem_node_nopanic(host_pgdat, size, - SMP_CACHE_BYTES, goal); + limit = goal + (1UL << PA_SECTION_SHIFT); + nid = early_pfn_to_nid(goal >> PAGE_SHIFT); +again: + p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, + SMP_CACHE_BYTES, goal, limit); + if (!p && limit) { + limit = 0; + goto again; + } + return p; } static void __init check_usemap_section_nr(int nid, unsigned long *usemap) -- cgit v1.2.3 From 29f6738609e40227dabcc63bfb3b84b3726a75bd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 Jul 2012 14:02:56 -0700 Subject: memblock: free allocated memblock_reserved_regions later memblock_free_reserved_regions() calls memblock_free(), but memblock_free() would double reserved.regions too, so we could free the old range for reserved.regions. Also tj said there is another bug which could be related to this. | I don't think we're saving any noticeable | amount by doing this "free - give it to page allocator - reserve | again" dancing. We should just allocate regions aligned to page | boundaries and free them later when memblock is no longer in use. in that case, when DEBUG_PAGEALLOC, will get panic: memblock_free: [0x0000102febc080-0x0000102febf080] memblock_free_reserved_regions+0x37/0x39 BUG: unable to handle kernel paging request at ffff88102febd948 IP: [] __next_free_mem_range+0x9b/0x155 PGD 4826063 PUD cf67a067 PMD cf7fa067 PTE 800000102febd160 Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC CPU 0 Pid: 0, comm: swapper Not tainted 3.5.0-rc2-next-20120614-sasha #447 RIP: 0010:[] [] __next_free_mem_range+0x9b/0x155 See the discussion at https://lkml.org/lkml/2012/6/13/469 So try to allocate with PAGE_SIZE alignment and free it later. Reported-by: Sasha Levin Acked-by: Tejun Heo Cc: Benjamin Herrenschmidt Signed-off-by: Yinghai Lu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 +--- mm/memblock.c | 51 ++++++++++++++++++++++-------------------------- mm/nobootmem.c | 38 ++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index a6bb10235148..19dc455b4f3d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -50,9 +50,7 @@ phys_addr_t memblock_find_in_range_node(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align, int nid); phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); -int memblock_free_reserved_regions(void); -int memblock_reserve_reserved_regions(void); - +phys_addr_t get_allocated_memblock_reserved_regions_info(phys_addr_t *addr); void memblock_allow_resize(void); int memblock_add_node(phys_addr_t base, phys_addr_t size, int nid); int memblock_add(phys_addr_t base, phys_addr_t size); diff --git a/mm/memblock.c b/mm/memblock.c index d4382095f8bd..5cc6731b00cc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -143,30 +143,6 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, MAX_NUMNODES); } -/* - * Free memblock.reserved.regions - */ -int __init_memblock memblock_free_reserved_regions(void) -{ - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; - - return memblock_free(__pa(memblock.reserved.regions), - sizeof(struct memblock_region) * memblock.reserved.max); -} - -/* - * Reserve memblock.reserved.regions - */ -int __init_memblock memblock_reserve_reserved_regions(void) -{ - if (memblock.reserved.regions == memblock_reserved_init_regions) - return 0; - - return memblock_reserve(__pa(memblock.reserved.regions), - sizeof(struct memblock_region) * memblock.reserved.max); -} - static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r) { type->total_size -= type->regions[r].size; @@ -184,6 +160,18 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u } } +phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( + phys_addr_t *addr) +{ + if (memblock.reserved.regions == memblock_reserved_init_regions) + return 0; + + *addr = __pa(memblock.reserved.regions); + + return PAGE_ALIGN(sizeof(struct memblock_region) * + memblock.reserved.max); +} + /** * memblock_double_array - double the size of the memblock regions array * @type: memblock type of the regions array being doubled @@ -204,6 +192,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, phys_addr_t new_area_size) { struct memblock_region *new_array, *old_array; + phys_addr_t old_alloc_size, new_alloc_size; phys_addr_t old_size, new_size, addr; int use_slab = slab_is_available(); int *in_slab; @@ -217,6 +206,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, /* Calculate new doubled size */ old_size = type->max * sizeof(struct memblock_region); new_size = old_size << 1; + /* + * We need to allocated new one align to PAGE_SIZE, + * so we can free them completely later. + */ + old_alloc_size = PAGE_ALIGN(old_size); + new_alloc_size = PAGE_ALIGN(new_size); /* Retrieve the slab flag */ if (type == &memblock.memory) @@ -245,11 +240,11 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, addr = memblock_find_in_range(new_area_start + new_area_size, memblock.current_limit, - new_size, sizeof(phys_addr_t)); + new_alloc_size, PAGE_SIZE); if (!addr && new_area_size) addr = memblock_find_in_range(0, min(new_area_start, memblock.current_limit), - new_size, sizeof(phys_addr_t)); + new_alloc_size, PAGE_SIZE); new_array = addr ? __va(addr) : 0; } @@ -279,13 +274,13 @@ static int __init_memblock memblock_double_array(struct memblock_type *type, kfree(old_array); else if (old_array != memblock_memory_init_regions && old_array != memblock_reserved_init_regions) - memblock_free(__pa(old_array), old_size); + memblock_free(__pa(old_array), old_alloc_size); /* Reserve the new array if that comes from the memblock. * Otherwise, we needn't do it */ if (!use_slab) - BUG_ON(memblock_reserve(addr, new_size)); + BUG_ON(memblock_reserve(addr, new_alloc_size)); /* Update slab flag */ *in_slab = use_slab; diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 0900b3910cda..405573010f99 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -105,27 +105,35 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end) __free_pages_bootmem(pfn_to_page(i), 0); } +static unsigned long __init __free_memory_core(phys_addr_t start, + phys_addr_t end) +{ + unsigned long start_pfn = PFN_UP(start); + unsigned long end_pfn = min_t(unsigned long, + PFN_DOWN(end), max_low_pfn); + + if (start_pfn > end_pfn) + return 0; + + __free_pages_memory(start_pfn, end_pfn); + + return end_pfn - start_pfn; +} + unsigned long __init free_low_memory_core_early(int nodeid) { unsigned long count = 0; - phys_addr_t start, end; + phys_addr_t start, end, size; u64 i; - /* free reserved array temporarily so that it's treated as free area */ - memblock_free_reserved_regions(); - - for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { - unsigned long start_pfn = PFN_UP(start); - unsigned long end_pfn = min_t(unsigned long, - PFN_DOWN(end), max_low_pfn); - if (start_pfn < end_pfn) { - __free_pages_memory(start_pfn, end_pfn); - count += end_pfn - start_pfn; - } - } + for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) + count += __free_memory_core(start, end); + + /* free range that is used for reserved array if we allocate it */ + size = get_allocated_memblock_reserved_regions_info(&start); + if (size) + count += __free_memory_core(start, start + size); - /* put region array back? */ - memblock_reserve_reserved_regions(); return count; } -- cgit v1.2.3 From d9914cf66181b8aa0929775f5c6f675c6ebc3eb5 Mon Sep 17 00:00:00 2001 From: Michael Kerrisk Date: Tue, 17 Jul 2012 21:37:27 +0200 Subject: PM: Rename CAP_EPOLLWAKEUP to CAP_BLOCK_SUSPEND As discussed in http://thread.gmane.org/gmane.linux.kernel/1249726/focus=1288990, the capability introduced in 4d7e30d98939a0340022ccd49325a3d70f7e0238 to govern EPOLLWAKEUP seems misnamed: this capability is about governing the ability to suspend the system, not using a particular API flag (EPOLLWAKEUP). We should make the name of the capability more general to encourage reuse in related cases. (Whether or not this capability should also be used to govern the use of /sys/power/wake_lock is a question that needs to be separately resolved.) This patch renames the capability to CAP_BLOCK_SUSPEND. In order to ensure that the old capability name doesn't make it out into the wild, could you please apply and push up the tree to ensure that it is incorporated for the 3.5 release. Signed-off-by: Michael Kerrisk Acked-by: Serge Hallyn Signed-off-by: Rafael J. Wysocki --- fs/eventpoll.c | 2 +- include/linux/capability.h | 6 +++--- include/linux/eventpoll.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 74598f67efeb..1c8b55670804 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1710,7 +1710,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, goto error_tgt_fput; /* Check if EPOLLWAKEUP is allowed */ - if ((epds.events & EPOLLWAKEUP) && !capable(CAP_EPOLLWAKEUP)) + if ((epds.events & EPOLLWAKEUP) && !capable(CAP_BLOCK_SUSPEND)) epds.events &= ~EPOLLWAKEUP; /* diff --git a/include/linux/capability.h b/include/linux/capability.h index 68d56effc328..d10b7ed595b1 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -360,11 +360,11 @@ struct cpu_vfs_cap_data { #define CAP_WAKE_ALARM 35 -/* Allow preventing system suspends while epoll events are pending */ +/* Allow preventing system suspends */ -#define CAP_EPOLLWAKEUP 36 +#define CAP_BLOCK_SUSPEND 36 -#define CAP_LAST_CAP CAP_EPOLLWAKEUP +#define CAP_LAST_CAP CAP_BLOCK_SUSPEND #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP) diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 6f8be328770a..f4bb378ccf6a 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -34,7 +34,7 @@ * re-allowed until epoll_wait is called again after consuming the wakeup * event(s). * - * Requires CAP_EPOLLWAKEUP + * Requires CAP_BLOCK_SUSPEND */ #define EPOLLWAKEUP (1 << 29) -- cgit v1.2.3 From 5bdca4e0768d3e0f4efa43d9a2cc8210aeb91ab9 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 10 Jul 2012 11:53:34 -0700 Subject: libceph: fix messenger retry In ancient times, the messenger could both initiate and accept connections. An artifact if that was data structures to store/process an incoming ceph_msg_connect request and send an outgoing ceph_msg_connect_reply. Sadly, the negotiation code was referencing those structures and ignoring important information (like the peer's connect_seq) from the correct ones. Among other things, this fixes tight reconnect loops where the server sends RETRY_SESSION and we (the client) retries with the same connect_seq as last time. This bug pretty easily triggered by injecting socket failures on the MDS and running some fs workload like workunits/direct_io/test_sync_io. Signed-off-by: Sage Weil --- include/linux/ceph/messenger.h | 12 ++---------- net/ceph/messenger.c | 12 ++++++------ 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 2521a95fa6d9..44c87e731e9d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -163,16 +163,8 @@ struct ceph_connection { /* connection negotiation temps */ char in_banner[CEPH_BANNER_MAX_LEN]; - union { - struct { /* outgoing connection */ - struct ceph_msg_connect out_connect; - struct ceph_msg_connect_reply in_reply; - }; - struct { /* incoming */ - struct ceph_msg_connect in_connect; - struct ceph_msg_connect_reply out_reply; - }; - }; + struct ceph_msg_connect out_connect; + struct ceph_msg_connect_reply in_reply; struct ceph_entity_addr actual_peer_addr; /* message out temps */ diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b332c3d76059..10255e81be79 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1423,7 +1423,7 @@ static int process_connect(struct ceph_connection *con) * dropped messages. */ dout("process_connect got RESET peer seq %u\n", - le32_to_cpu(con->in_connect.connect_seq)); + le32_to_cpu(con->in_reply.connect_seq)); pr_err("%s%lld %s connection reset\n", ENTITY_NAME(con->peer_name), ceph_pr_addr(&con->peer_addr.in_addr)); @@ -1450,10 +1450,10 @@ static int process_connect(struct ceph_connection *con) * If we sent a smaller connect_seq than the peer has, try * again with a larger value. */ - dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", + dout("process_connect got RETRY_SESSION my seq %u, peer %u\n", le32_to_cpu(con->out_connect.connect_seq), - le32_to_cpu(con->in_connect.connect_seq)); - con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); + le32_to_cpu(con->in_reply.connect_seq)); + con->connect_seq = le32_to_cpu(con->in_reply.connect_seq); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) @@ -1468,9 +1468,9 @@ static int process_connect(struct ceph_connection *con) */ dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", con->peer_global_seq, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); get_global_seq(con->msgr, - le32_to_cpu(con->in_connect.global_seq)); + le32_to_cpu(con->in_reply.global_seq)); ceph_con_out_kvec_reset(con); ret = prepare_write_connect(con); if (ret < 0) -- cgit v1.2.3 From eea03c20ae38a55405c0865ed9adfccc400e4c8e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 18 Jul 2012 18:15:46 -0700 Subject: Make wait_for_device_probe() also do scsi_complete_async_scans() Commit a7a20d103994 ("sd: limit the scope of the async probe domain") make the SCSI device probing run device discovery in it's own async domain. However, as a result, the partition detection was no longer synchronized by async_synchronize_full() (which, despite the name, only synchronizes the global async space, not all of them). Which in turn meant that "wait_for_device_probe()" would not wait for the SCSI partitions to be parsed. And "wait_for_device_probe()" was what the boot time init code relied on for mounting the root filesystem. Now, most people never noticed this, because not only is it timing-dependent, but modern distributions all use initrd. So the root filesystem isn't actually on a disk at all. And then before they actually mount the final disk filesystem, they will have loaded the scsi-wait-scan module, which not only does the expected wait_for_device_probe(), but also does scsi_complete_async_scans(). [ Side note: scsi_complete_async_scans() had also been partially broken, but that was fixed in commit 43a8d39d0137 ("fix async probe regression"), so that same commit a7a20d103994 had actually broken setups even if you used scsi-wait-scan explicitly ] Solve this problem by just moving the scsi_complete_async_scans() call into wait_for_device_probe(). Everybody who wants to wait for device probing to finish really wants the SCSI probing to complete, so there's no reason not to do this. So now "wait_for_device_probe()" really does what the name implies, and properly waits for device probing to finish. This also removes the now unnecessary extra calls to scsi_complete_async_scans(). Reported-and-tested-by: Artem S. Tashkinov Cc: Dan Williams Cc: Alan Stern Cc: James Bottomley Cc: Borislav Petkov Cc: linux-scsi Signed-off-by: Linus Torvalds --- drivers/base/dd.c | 2 ++ drivers/scsi/scsi_wait_scan.c | 5 ----- include/linux/device.h | 2 -- kernel/power/hibernate.c | 8 -------- kernel/power/user.c | 2 -- 5 files changed, 2 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/dd.c b/drivers/base/dd.c index dcb8a6e48692..4b01ab3d2c24 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "base.h" #include "power/power.h" @@ -332,6 +333,7 @@ void wait_for_device_probe(void) /* wait for the known devices to complete their probing */ wait_event(probe_waitqueue, atomic_read(&probe_count) == 0); async_synchronize_full(); + scsi_complete_async_scans(); } EXPORT_SYMBOL_GPL(wait_for_device_probe); diff --git a/drivers/scsi/scsi_wait_scan.c b/drivers/scsi/scsi_wait_scan.c index ae7814874618..072734538876 100644 --- a/drivers/scsi/scsi_wait_scan.c +++ b/drivers/scsi/scsi_wait_scan.c @@ -22,11 +22,6 @@ static int __init wait_scan_init(void) * and might not yet have reached the scsi async scanning */ wait_for_device_probe(); - /* - * and then we wait for the actual asynchronous scsi scan - * to finish. - */ - scsi_complete_async_scans(); return 0; } diff --git a/include/linux/device.h b/include/linux/device.h index 161d96241b1b..6de94151ff6f 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -865,8 +865,6 @@ extern int (*platform_notify_remove)(struct device *dev); extern struct device *get_device(struct device *dev); extern void put_device(struct device *dev); -extern void wait_for_device_probe(void); - #ifdef CONFIG_DEVTMPFS extern int devtmpfs_create_node(struct device *dev); extern int devtmpfs_delete_node(struct device *dev); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 8b53db38a279..238025f5472e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "power.h" @@ -748,13 +747,6 @@ static int software_resume(void) async_synchronize_full(); } - /* - * We can't depend on SCSI devices being available after loading - * one of their modules until scsi_complete_async_scans() is - * called and the resume device usually is a SCSI one. - */ - scsi_complete_async_scans(); - swsusp_resume_device = name_to_dev_t(resume_file); if (!swsusp_resume_device) { error = -ENODEV; diff --git a/kernel/power/user.c b/kernel/power/user.c index 91b0fd021a95..4ed81e74f86f 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -24,7 +24,6 @@ #include #include #include -#include #include @@ -84,7 +83,6 @@ static int snapshot_open(struct inode *inode, struct file *filp) * appear. */ wait_for_device_probe(); - scsi_complete_async_scans(); data->swap = -1; data->mode = O_WRONLY; -- cgit v1.2.3 From 533827c921c34310f63e859e1d6d0feec439657d Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Fri, 20 Jul 2012 17:28:07 -0700 Subject: printk: Implement some unlocked kmsg_dump functions If used from KDB, the locked variants are prone to deadlocks (suppose we got to the debugger w/ the logbuf lock held). So, we have to implement a few routines that grab no logbuf lock. Yet we don't need these functions in modules, so we don't export them. Signed-off-by: Anton Vorontsov Signed-off-by: Linus Torvalds --- include/linux/kmsg_dump.h | 16 +++++++++++ kernel/printk.c | 68 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 71 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index d6bd50110ec2..2e7a1e032c71 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -55,12 +55,17 @@ struct kmsg_dumper { #ifdef CONFIG_PRINTK void kmsg_dump(enum kmsg_dump_reason reason); +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len); + bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, char *line, size_t size, size_t *len); bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, char *buf, size_t size, size_t *len); +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper); + void kmsg_dump_rewind(struct kmsg_dumper *dumper); int kmsg_dump_register(struct kmsg_dumper *dumper); @@ -71,6 +76,13 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason) { } +static inline bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, + bool syslog, const char *line, + size_t size, size_t *len) +{ + return false; +} + static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, const char *line, size_t size, size_t *len) { @@ -83,6 +95,10 @@ static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, return false; } +static inline void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ +} + static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper) { } diff --git a/kernel/printk.c b/kernel/printk.c index c8129678dfbf..ac4bc9e79465 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -2510,7 +2510,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) } /** - * kmsg_dump_get_line - retrieve one kmsg log line + * kmsg_dump_get_line_nolock - retrieve one kmsg log line (unlocked version) * @dumper: registered kmsg dumper * @syslog: include the "<4>" prefixes * @line: buffer to copy the line to @@ -2525,11 +2525,12 @@ void kmsg_dump(enum kmsg_dump_reason reason) * * A return value of FALSE indicates that there are no more records to * read. + * + * The function is similar to kmsg_dump_get_line(), but grabs no locks. */ -bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, - char *line, size_t size, size_t *len) +bool kmsg_dump_get_line_nolock(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) { - unsigned long flags; struct log *msg; size_t l = 0; bool ret = false; @@ -2537,7 +2538,6 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, if (!dumper->active) goto out; - raw_spin_lock_irqsave(&logbuf_lock, flags); if (dumper->cur_seq < log_first_seq) { /* messages are gone, move to first available one */ dumper->cur_seq = log_first_seq; @@ -2545,10 +2545,8 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, } /* last entry */ - if (dumper->cur_seq >= log_next_seq) { - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + if (dumper->cur_seq >= log_next_seq) goto out; - } msg = log_from_idx(dumper->cur_idx); l = msg_print_text(msg, 0, syslog, line, size); @@ -2556,12 +2554,41 @@ bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, dumper->cur_idx = log_next(dumper->cur_idx); dumper->cur_seq++; ret = true; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); out: if (len) *len = l; return ret; } + +/** + * kmsg_dump_get_line - retrieve one kmsg log line + * @dumper: registered kmsg dumper + * @syslog: include the "<4>" prefixes + * @line: buffer to copy the line to + * @size: maximum size of the buffer + * @len: length of line placed into buffer + * + * Start at the beginning of the kmsg buffer, with the oldest kmsg + * record, and copy one record into the provided buffer. + * + * Consecutive calls will return the next available record moving + * towards the end of the buffer with the youngest messages. + * + * A return value of FALSE indicates that there are no more records to + * read. + */ +bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog, + char *line, size_t size, size_t *len) +{ + unsigned long flags; + bool ret; + + raw_spin_lock_irqsave(&logbuf_lock, flags); + ret = kmsg_dump_get_line_nolock(dumper, syslog, line, size, len); + raw_spin_unlock_irqrestore(&logbuf_lock, flags); + + return ret; +} EXPORT_SYMBOL_GPL(kmsg_dump_get_line); /** @@ -2663,6 +2690,24 @@ out: } EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer); +/** + * kmsg_dump_rewind_nolock - reset the interator (unlocked version) + * @dumper: registered kmsg dumper + * + * Reset the dumper's iterator so that kmsg_dump_get_line() and + * kmsg_dump_get_buffer() can be called again and used multiple + * times within the same dumper.dump() callback. + * + * The function is similar to kmsg_dump_rewind(), but grabs no locks. + */ +void kmsg_dump_rewind_nolock(struct kmsg_dumper *dumper) +{ + dumper->cur_seq = clear_seq; + dumper->cur_idx = clear_idx; + dumper->next_seq = log_next_seq; + dumper->next_idx = log_next_idx; +} + /** * kmsg_dump_rewind - reset the interator * @dumper: registered kmsg dumper @@ -2676,10 +2721,7 @@ void kmsg_dump_rewind(struct kmsg_dumper *dumper) unsigned long flags; raw_spin_lock_irqsave(&logbuf_lock, flags); - dumper->cur_seq = clear_seq; - dumper->cur_idx = clear_idx; - dumper->next_seq = log_next_seq; - dumper->next_idx = log_next_idx; + kmsg_dump_rewind_nolock(dumper); raw_spin_unlock_irqrestore(&logbuf_lock, flags); } EXPORT_SYMBOL_GPL(kmsg_dump_rewind); -- cgit v1.2.3 From bff9d1865640dcb4d9711dcc50714e9a8b859453 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 21 Jul 2012 20:24:52 +0200 Subject: Remove SYSTEM_SUSPEND_DISK system state The SYSTEM_SUSPEND_DISK system state is never used, so drop it. Signed-off-by: Rafael J. Wysocki Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e07f5e0c5df4..604382143bcf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -377,7 +377,6 @@ extern enum system_states { SYSTEM_HALT, SYSTEM_POWER_OFF, SYSTEM_RESTART, - SYSTEM_SUSPEND_DISK, } system_state; #define TAINT_PROPRIETARY_MODULE 0 -- cgit v1.2.3