From 6072d13c429373c5d63b69dadbbef40a9b035552 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 1 Dec 2010 13:35:19 -0500 Subject: Call the filesystem back whenever a page is removed from the page cache NFS needs to be able to release objects that are stored in the page cache once the page itself is no longer visible from the page cache. This patch adds a callback to the address space operations that allows filesystems to perform page cleanups once the page has been removed from the page cache. Original patch by: Linus Torvalds [trondmy: cover the cases of invalidate_inode_pages2() and truncate_inode_pages()] Signed-off-by: Trond Myklebust --- Documentation/filesystems/Locking | 7 ++++++- Documentation/filesystems/vfs.txt | 7 +++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index a91f30890011..b6426f15b4ae 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -173,12 +173,13 @@ prototypes: sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); int (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); int (*launder_page) (struct page *); locking rules: - All except set_page_dirty may block + All except set_page_dirty and freepage may block BKL PageLocked(page) i_mutex writepage: no yes, unlocks (see below) @@ -193,6 +194,7 @@ perform_write: no n/a yes bmap: no invalidatepage: no yes releasepage: no yes +freepage: no yes direct_IO: no launder_page: no yes @@ -288,6 +290,9 @@ buffers from the page in preparation for freeing it. It returns zero to indicate that the buffers are (or may be) freeable. If ->releasepage is zero, the kernel assumes that the fs has no private interest in the buffers. + ->freepage() is called when the kernel is done dropping the page +from the page cache. + ->launder_page() may be called prior to releasing a page if it is still found to be dirty. It returns zero if the page was successfully cleaned, or an error value if not. Note that in order to prevent the page diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ed7e5efc06d8..3b14a557eca6 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -534,6 +534,7 @@ struct address_space_operations { sector_t (*bmap)(struct address_space *, sector_t); int (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, int); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); struct page* (*get_xip_page)(struct address_space *, sector_t, @@ -679,6 +680,12 @@ struct address_space_operations { need to ensure this. Possibly it can clear the PageUptodate bit if it cannot free private data yet. + freepage: freepage is called once the page is no longer visible in + the page cache in order to allow the cleanup of any private + data. Since it may be called by the memory reclaimer, it + should not assume that the original address_space mapping still + exists, and it should not block. + direct_IO: called by the generic read/write routines to perform direct_IO - that is IO requests which bypass the page cache and transfer data directly between the storage and the -- cgit v1.2.3 From f08f5a0add20834d3f3d876dfe08005a5df656db Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 16 Dec 2010 17:11:58 +0100 Subject: PM / Runtime: Fix pm_runtime_suspended() There are some situations (e.g. in __pm_generic_call()), where pm_runtime_suspended() is used to decide whether or not to execute a device's (system) ->suspend() callback. The callback is not executed if pm_runtime_suspended() returns true, but it does so for devices that don't even support runtime PM, because the power.disable_depth device field is ignored by it. This leads to problems (i.e. devices are not suspened when they should), so rework pm_runtime_suspended() so that it returns false if the device's power.disable_depth field is different from zero. Signed-off-by: Rafael J. Wysocki Cc: stable@kernel.org --- Documentation/power/runtime_pm.txt | 4 ++-- include/linux/pm_runtime.h | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt index 489e9bacd165..41cc7b30d7dd 100644 --- a/Documentation/power/runtime_pm.txt +++ b/Documentation/power/runtime_pm.txt @@ -379,8 +379,8 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h: zero) bool pm_runtime_suspended(struct device *dev); - - return true if the device's runtime PM status is 'suspended', or false - otherwise + - return true if the device's runtime PM status is 'suspended' and its + 'power.disable_depth' field is equal to zero, or false otherwise void pm_runtime_allow(struct device *dev); - set the power.runtime_auto flag for the device and decrease its usage diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 3ec2358f8692..d19f1cca7f74 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -77,7 +77,8 @@ static inline void device_set_run_wake(struct device *dev, bool enable) static inline bool pm_runtime_suspended(struct device *dev) { - return dev->power.runtime_status == RPM_SUSPENDED; + return dev->power.runtime_status == RPM_SUSPENDED + && !dev->power.disable_depth; } static inline void pm_runtime_mark_last_busy(struct device *dev) -- cgit v1.2.3 From c0f5ac5426f7fd82b23dd5c6a1e633b290294a08 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 16 Dec 2010 10:38:41 -0700 Subject: Revert "resources: support allocating space within a region from the top down" This reverts commit e7f8567db9a7f6b3151b0b275e245c1cef0d9c70. Acked-by: H. Peter Anvin Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 5 -- include/linux/ioport.h | 1 - kernel/resource.c | 98 ++----------------------------------- 3 files changed, 4 insertions(+), 100 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index cdd2a6e8a3b7..8b61c9360999 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2175,11 +2175,6 @@ and is between 256 and 4096 characters. It is defined in the file reset_devices [KNL] Force drivers to reset the underlying device during initialization. - resource_alloc_from_bottom - Allocate new resources from the beginning of available - space, not the end. If you need to use this, please - report a bug. - resume= [SWSUSP] Specify the partition device for software suspend diff --git a/include/linux/ioport.h b/include/linux/ioport.h index d377ea815d45..b22790268b64 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -112,7 +112,6 @@ struct resource_list { /* PC/ISA/whatever - the normal PC address spaces: IO and memory */ extern struct resource ioport_resource; extern struct resource iomem_resource; -extern int resource_alloc_from_bottom; extern struct resource *request_resource_conflict(struct resource *root, struct resource *new); extern int request_resource(struct resource *root, struct resource *new); diff --git a/kernel/resource.c b/kernel/resource.c index 9fad33efd0db..560659f7baef 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -40,23 +40,6 @@ EXPORT_SYMBOL(iomem_resource); static DEFINE_RWLOCK(resource_lock); -/* - * By default, we allocate free space bottom-up. The architecture can request - * top-down by clearing this flag. The user can override the architecture's - * choice with the "resource_alloc_from_bottom" kernel boot option, but that - * should only be a debugging tool. - */ -int resource_alloc_from_bottom = 1; - -static __init int setup_alloc_from_bottom(char *s) -{ - printk(KERN_INFO - "resource: allocating from bottom-up; please report a bug\n"); - resource_alloc_from_bottom = 1; - return 0; -} -early_param("resource_alloc_from_bottom", setup_alloc_from_bottom); - static void *r_next(struct seq_file *m, void *v, loff_t *pos) { struct resource *p = v; @@ -396,75 +379,8 @@ static bool resource_contains(struct resource *res1, struct resource *res2) return res1->start <= res2->start && res1->end >= res2->end; } -/* - * Find the resource before "child" in the sibling list of "root" children. - */ -static struct resource *find_sibling_prev(struct resource *root, struct resource *child) -{ - struct resource *this; - - for (this = root->child; this; this = this->sibling) - if (this->sibling == child) - return this; - - return NULL; -} - -/* - * Find empty slot in the resource tree given range and alignment. - * This version allocates from the end of the root resource first. - */ -static int find_resource_from_top(struct resource *root, struct resource *new, - resource_size_t size, resource_size_t min, - resource_size_t max, resource_size_t align, - resource_size_t (*alignf)(void *, - const struct resource *, - resource_size_t, - resource_size_t), - void *alignf_data) -{ - struct resource *this; - struct resource tmp, avail, alloc; - - tmp.start = root->end; - tmp.end = root->end; - - this = find_sibling_prev(root, NULL); - for (;;) { - if (this) { - if (this->end < root->end) - tmp.start = this->end + 1; - } else - tmp.start = root->start; - - resource_clip(&tmp, min, max); - - /* Check for overflow after ALIGN() */ - avail = *new; - avail.start = ALIGN(tmp.start, align); - avail.end = tmp.end; - if (avail.start >= tmp.start) { - alloc.start = alignf(alignf_data, &avail, size, align); - alloc.end = alloc.start + size - 1; - if (resource_contains(&avail, &alloc)) { - new->start = alloc.start; - new->end = alloc.end; - return 0; - } - } - - if (!this || this->start == root->start) - break; - - tmp.end = this->start - 1; - this = find_sibling_prev(root, this); - } - return -EBUSY; -} - /* * Find empty slot in the resource tree given range and alignment. - * This version allocates from the beginning of the root resource first. */ static int find_resource(struct resource *root, struct resource *new, resource_size_t size, resource_size_t min, @@ -480,15 +396,14 @@ static int find_resource(struct resource *root, struct resource *new, tmp.start = root->start; /* - * Skip past an allocated resource that starts at 0, since the - * assignment of this->start - 1 to tmp->end below would cause an - * underflow. + * Skip past an allocated resource that starts at 0, since the assignment + * of this->start - 1 to tmp->end below would cause an underflow. */ if (this && this->start == 0) { tmp.start = this->end + 1; this = this->sibling; } - for (;;) { + for(;;) { if (this) tmp.end = this->start - 1; else @@ -509,10 +424,8 @@ static int find_resource(struct resource *root, struct resource *new, return 0; } } - if (!this) break; - tmp.start = this->end + 1; this = this->sibling; } @@ -545,10 +458,7 @@ int allocate_resource(struct resource *root, struct resource *new, alignf = simple_align_resource; write_lock(&resource_lock); - if (resource_alloc_from_bottom) - err = find_resource(root, new, size, min, max, align, alignf, alignf_data); - else - err = find_resource_from_top(root, new, size, min, max, align, alignf, alignf_data); + err = find_resource(root, new, size, min, max, align, alignf, alignf_data); if (err >= 0 && __request_resource(root, new)) err = -EBUSY; write_unlock(&resource_lock); -- cgit v1.2.3