From 2b37c35e6552b0d04d5db9728bc7af22d53f731a Mon Sep 17 00:00:00 2001 From: Becky Bruce Date: Mon, 25 Jul 2011 17:11:49 -0700 Subject: fs/hugetlbfs/inode.c: fix pgoff alignment checking on 32-bit This: vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT) is incorrect on 32-bit. It causes us to & the pgoff with something that looks like this (for a 4m hugepage): 0xfff003ff. The mask should be flipped and *then* shifted, to give you 0x0000_03fff. Signed-off-by: Becky Bruce Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7aafeb8fa300..537a2093c0e1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -94,7 +94,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); -- cgit v1.2.3 From be8f684d73d8d916847e996bf69cef14352872c6 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 25 Jul 2011 17:12:18 -0700 Subject: oom: make deprecated use of oom_adj more verbose /proc/pid/oom_adj is deprecated and scheduled for removal in August 2012 according to Documentation/feature-removal-schedule.txt. This patch makes the warning more verbose by making it appear as a more serious problem (the presence of a stack trace and being multiline should attract more attention) so that applications still using the old interface can get fixed. Very popular users of the old interface have been converted since the oom killer rewrite has been introduced. udevd switched to the /proc/pid/oom_score_adj interface for v162, kde switched in 4.6.1, and opensshd switched in 5.7p1. At the start of 2012, this should be changed into a WARN() to emit all such incidents and then finally remove the tunable in August 2012 as scheduled. Signed-off-by: David Rientjes Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 91fb655a5cbf..c9e3f650f23c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1118,10 +1118,9 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " - "please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), - task_pid_nr(task), task_pid_nr(task)); + WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); task->signal->oom_adj = oom_adjust; /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum -- cgit v1.2.3 From 708e3508c2a2204cc276dcdb543009a441bfe91b Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 25 Jul 2011 17:12:32 -0700 Subject: tmpfs: clone shmem_file_splice_read() Copy __generic_file_splice_read() and generic_file_splice_read() from fs/splice.c to shmem_file_splice_read() in mm/shmem.c. Make page_cache_pipe_buf_ops and spd_release_page() accessible to it. Signed-off-by: Hugh Dickins Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/splice.c | 4 +- include/linux/splice.h | 2 + mm/shmem.c | 218 ++++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 221 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/splice.c b/fs/splice.c index aa866d309695..fa2defa8afcf 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -132,7 +132,7 @@ error: return err; } -static const struct pipe_buf_operations page_cache_pipe_buf_ops = { +const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -264,7 +264,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, return ret; } -static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) +void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { page_cache_release(spd->pages[i]); } diff --git a/include/linux/splice.h b/include/linux/splice.h index 997c3b4c212b..26e5b613deda 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -88,5 +88,7 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *); extern void splice_shrink_spd(struct pipe_inode_info *, struct splice_pipe_desc *); +extern void spd_release_page(struct splice_pipe_desc *, unsigned int); +extern const struct pipe_buf_operations page_cache_pipe_buf_ops; #endif diff --git a/mm/shmem.c b/mm/shmem.c index c1db11cf220d..d176e488f04d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -51,6 +51,7 @@ static struct vfsmount *shm_mnt; #include #include #include +#include #include #include #include @@ -1844,6 +1845,221 @@ static ssize_t shmem_file_aio_read(struct kiocb *iocb, return retval; } +static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct address_space *mapping = in->f_mapping; + unsigned int loff, nr_pages, req_pages; + struct page *pages[PIPE_DEF_BUFFERS]; + struct partial_page partial[PIPE_DEF_BUFFERS]; + struct page *page; + pgoff_t index, end_index; + loff_t isize, left; + int error, page_nr; + struct splice_pipe_desc spd = { + .pages = pages, + .partial = partial, + .flags = flags, + .ops = &page_cache_pipe_buf_ops, + .spd_release = spd_release_page, + }; + + isize = i_size_read(in->f_mapping->host); + if (unlikely(*ppos >= isize)) + return 0; + + left = isize - *ppos; + if (unlikely(left < len)) + len = left; + + if (splice_grow_spd(pipe, &spd)) + return -ENOMEM; + + index = *ppos >> PAGE_CACHE_SHIFT; + loff = *ppos & ~PAGE_CACHE_MASK; + req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + nr_pages = min(req_pages, pipe->buffers); + + /* + * Lookup the (hopefully) full range of pages we need. + */ + spd.nr_pages = find_get_pages_contig(mapping, index, + nr_pages, spd.pages); + index += spd.nr_pages; + + /* + * If find_get_pages_contig() returned fewer pages than we needed, + * readahead/allocate the rest and fill in the holes. + */ + if (spd.nr_pages < nr_pages) + page_cache_sync_readahead(mapping, &in->f_ra, in, + index, req_pages - spd.nr_pages); + + error = 0; + while (spd.nr_pages < nr_pages) { + /* + * Page could be there, find_get_pages_contig() breaks on + * the first hole. + */ + page = find_get_page(mapping, index); + if (!page) { + /* + * page didn't exist, allocate one. + */ + page = page_cache_alloc_cold(mapping); + if (!page) + break; + + error = add_to_page_cache_lru(page, mapping, index, + GFP_KERNEL); + if (unlikely(error)) { + page_cache_release(page); + if (error == -EEXIST) + continue; + break; + } + /* + * add_to_page_cache() locks the page, unlock it + * to avoid convoluting the logic below even more. + */ + unlock_page(page); + } + + spd.pages[spd.nr_pages++] = page; + index++; + } + + /* + * Now loop over the map and see if we need to start IO on any + * pages, fill in the partial map, etc. + */ + index = *ppos >> PAGE_CACHE_SHIFT; + nr_pages = spd.nr_pages; + spd.nr_pages = 0; + for (page_nr = 0; page_nr < nr_pages; page_nr++) { + unsigned int this_len; + + if (!len) + break; + + /* + * this_len is the max we'll use from this page + */ + this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); + page = spd.pages[page_nr]; + + if (PageReadahead(page)) + page_cache_async_readahead(mapping, &in->f_ra, in, + page, index, req_pages - page_nr); + + /* + * If the page isn't uptodate, we may need to start io on it + */ + if (!PageUptodate(page)) { + lock_page(page); + + /* + * Page was truncated, or invalidated by the + * filesystem. Redo the find/create, but this time the + * page is kept locked, so there's no chance of another + * race with truncate/invalidate. + */ + if (!page->mapping) { + unlock_page(page); + page = find_or_create_page(mapping, index, + mapping_gfp_mask(mapping)); + + if (!page) { + error = -ENOMEM; + break; + } + page_cache_release(spd.pages[page_nr]); + spd.pages[page_nr] = page; + } + /* + * page was already under io and is now done, great + */ + if (PageUptodate(page)) { + unlock_page(page); + goto fill_it; + } + + /* + * need to read in the page + */ + error = mapping->a_ops->readpage(in, page); + if (unlikely(error)) { + /* + * We really should re-lookup the page here, + * but it complicates things a lot. Instead + * lets just do what we already stored, and + * we'll get it the next time we are called. + */ + if (error == AOP_TRUNCATED_PAGE) + error = 0; + + break; + } + } +fill_it: + /* + * i_size must be checked after PageUptodate. + */ + isize = i_size_read(mapping->host); + end_index = (isize - 1) >> PAGE_CACHE_SHIFT; + if (unlikely(!isize || index > end_index)) + break; + + /* + * if this is the last page, see if we need to shrink + * the length and stop + */ + if (end_index == index) { + unsigned int plen; + + /* + * max good bytes in this page + */ + plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; + if (plen <= loff) + break; + + /* + * force quit after adding this page + */ + this_len = min(this_len, plen - loff); + len = this_len; + } + + spd.partial[page_nr].offset = loff; + spd.partial[page_nr].len = this_len; + len -= this_len; + loff = 0; + spd.nr_pages++; + index++; + } + + /* + * Release any pages at the end, if we quit early. 'page_nr' is how far + * we got, 'nr_pages' is how many pages are in the map. + */ + while (page_nr < nr_pages) + page_cache_release(spd.pages[page_nr++]); + in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; + + if (spd.nr_pages) + error = splice_to_pipe(pipe, &spd); + + splice_shrink_spd(pipe, &spd); + + if (error > 0) { + *ppos += error; + file_accessed(in); + } + return error; +} + static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) { struct shmem_sb_info *sbinfo = SHMEM_SB(dentry->d_sb); @@ -2699,7 +2915,7 @@ static const struct file_operations shmem_file_operations = { .aio_read = shmem_file_aio_read, .aio_write = generic_file_aio_write, .fsync = noop_fsync, - .splice_read = generic_file_splice_read, + .splice_read = shmem_file_splice_read, .splice_write = generic_file_splice_write, #endif }; -- cgit v1.2.3 From 0c2fd1bfb155947a821fdaeb3c46aa1cfa20ad64 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 25 Jul 2011 17:13:37 -0700 Subject: reiserfs: use proper little-endian bitops Using __test_and_{set,clear}_bit_le() with ignoring its return value can be replaced with __{set,clear}_bit_le(). This introduces reiserfs_{set,clear}_le_bit for __{set,clear}_bit_le and does the above change with them. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 2 +- fs/reiserfs/resize.c | 6 +++--- include/linux/reiserfs_fs.h | 2 ++ 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 483442e66ed6..567385aa7813 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -214,7 +214,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, } /* otherwise we clear all bit were set ... */ while (--i >= *beg) - reiserfs_test_and_clear_le_bit + reiserfs_clear_le_bit (i, bh->b_data); reiserfs_restore_prepared_buffer(s, bh); *beg = org; diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index b3a94d20f0fc..b6b9b1fe33b0 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -136,7 +136,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) return -EIO; } memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); set_buffer_uptodate(bh); @@ -172,7 +172,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, bh->b_data); + reiserfs_clear_le_bit(i, bh->b_data); info->free_count += s->s_blocksize * 8 - block_r; journal_mark_dirty(&th, s, bh); @@ -190,7 +190,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, bh->b_data); + reiserfs_set_le_bit(i, bh->b_data); journal_mark_dirty(&th, s, bh); brelse(bh); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index eca75df00fed..96d465f8d3e6 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2332,7 +2332,9 @@ __u32 keyed_hash(const signed char *msg, int len); __u32 yura_hash(const signed char *msg, int len); __u32 r5_hash(const signed char *msg, int len); +#define reiserfs_set_le_bit __set_bit_le #define reiserfs_test_and_set_le_bit __test_and_set_bit_le +#define reiserfs_clear_le_bit __clear_bit_le #define reiserfs_test_and_clear_le_bit __test_and_clear_bit_le #define reiserfs_test_le_bit test_bit_le #define reiserfs_find_next_zero_le_bit find_next_zero_bit_le -- cgit v1.2.3 From 9d6bf5aa177ee7ffdcee2a590ef8a1bf9e8ade87 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 25 Jul 2011 17:13:38 -0700 Subject: reiserfs: use hweight_long() Use hweight_long() to count free bits in the bitmap. Signed-off-by: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/reiserfs/bitmap.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 567385aa7813..d1aca1df4f92 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1222,15 +1222,11 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb, info->free_count = 0; while (--cur >= (unsigned long *)bh->b_data) { - int i; - /* 0 and ~0 are special, we can optimize for them */ if (*cur == 0) info->free_count += BITS_PER_LONG; else if (*cur != ~0L) /* A mix, investigate */ - for (i = BITS_PER_LONG - 1; i >= 0; i--) - if (!reiserfs_test_le_bit(i, cur)) - info->free_count++; + info->free_count += BITS_PER_LONG - hweight_long(*cur); } } -- cgit v1.2.3