From 7063fbf2261194f72ee75afca67b3b38b554b5fa Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Thu, 15 Dec 2005 14:29:43 -0800 Subject: [PATCH] configfs: User-driven configuration filesystem Configfs, a file system for userspace-driven kernel object configuration. The OCFS2 stack makes extensive use of this for propagation of cluster configuration information into kernel. Signed-off-by: Joel Becker --- include/linux/configfs.h | 205 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 include/linux/configfs.h (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h new file mode 100644 index 000000000000..acffb8c9073a --- /dev/null +++ b/include/linux/configfs.h @@ -0,0 +1,205 @@ +/* -*- mode: c; c-basic-offset: 8; -*- + * vim: noexpandtab sw=8 ts=8 sts=0: + * + * configfs.h - definitions for the device driver filesystem + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + * + * Based on sysfs: + * sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel + * + * Based on kobject.h: + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * + * configfs Copyright (C) 2005 Oracle. All rights reserved. + * + * Please read Documentation/filesystems/configfs.txt before using the + * configfs interface, ESPECIALLY the parts about reference counts and + * item destructors. + */ + +#ifndef _CONFIGFS_H_ +#define _CONFIGFS_H_ + +#ifdef __KERNEL__ + +#include +#include +#include + +#include +#include + +#define CONFIGFS_ITEM_NAME_LEN 20 + +struct module; + +struct configfs_item_operations; +struct configfs_group_operations; +struct configfs_attribute; +struct configfs_subsystem; + +struct config_item { + char *ci_name; + char ci_namebuf[CONFIGFS_ITEM_NAME_LEN]; + struct kref ci_kref; + struct list_head ci_entry; + struct config_item *ci_parent; + struct config_group *ci_group; + struct config_item_type *ci_type; + struct dentry *ci_dentry; +}; + +extern int config_item_set_name(struct config_item *, const char *, ...); + +static inline char *config_item_name(struct config_item * item) +{ + return item->ci_name; +} + +extern void config_item_init(struct config_item *); +extern void config_item_init_type_name(struct config_item *item, + const char *name, + struct config_item_type *type); +extern void config_item_cleanup(struct config_item *); + +extern struct config_item * config_item_get(struct config_item *); +extern void config_item_put(struct config_item *); + +struct config_item_type { + struct module *ct_owner; + struct configfs_item_operations *ct_item_ops; + struct configfs_group_operations *ct_group_ops; + struct configfs_attribute **ct_attrs; +}; + + +/** + * group - a group of config_items of a specific type, belonging + * to a specific subsystem. + */ + +struct config_group { + struct config_item cg_item; + struct list_head cg_children; + struct configfs_subsystem *cg_subsys; + struct config_group **default_groups; +}; + + +extern void config_group_init(struct config_group *group); +extern void config_group_init_type_name(struct config_group *group, + const char *name, + struct config_item_type *type); + + +static inline struct config_group *to_config_group(struct config_item *item) +{ + return item ? container_of(item,struct config_group,cg_item) : NULL; +} + +static inline struct config_group *config_group_get(struct config_group *group) +{ + return group ? to_config_group(config_item_get(&group->cg_item)) : NULL; +} + +static inline void config_group_put(struct config_group *group) +{ + config_item_put(&group->cg_item); +} + +extern struct config_item *config_group_find_obj(struct config_group *, const char *); + + +struct configfs_attribute { + char *ca_name; + struct module *ca_owner; + mode_t ca_mode; +}; + + +/* + * If allow_link() exists, the item can symlink(2) out to other + * items. If the item is a group, it may support mkdir(2). + * Groups supply one of make_group() and make_item(). If the + * group supports make_group(), one can create group children. If it + * supports make_item(), one can create config_item children. If it has + * default_groups on group->default_groups, it has automatically created + * group children. default_groups may coexist alongsize make_group() or + * make_item(), but if the group wishes to have only default_groups + * children (disallowing mkdir(2)), it need not provide either function. + * If the group has commit(), it supports pending and commited (active) + * items. + */ +struct configfs_item_operations { + void (*release)(struct config_item *); + ssize_t (*show_attribute)(struct config_item *, struct configfs_attribute *,char *); + ssize_t (*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t); + int (*allow_link)(struct config_item *src, struct config_item *target); + int (*drop_link)(struct config_item *src, struct config_item *target); +}; + +struct configfs_group_operations { + struct config_item *(*make_item)(struct config_group *group, const char *name); + struct config_group *(*make_group)(struct config_group *group, const char *name); + int (*commit_item)(struct config_item *item); + void (*drop_item)(struct config_group *group, struct config_item *item); +}; + + + +/** + * Use these macros to make defining attributes easier. See include/linux/device.h + * for examples.. + */ + +#if 0 +#define __ATTR(_name,_mode,_show,_store) { \ + .attr = {.ca_name = __stringify(_name), .ca_mode = _mode, .ca_owner = THIS_MODULE }, \ + .show = _show, \ + .store = _store, \ +} + +#define __ATTR_RO(_name) { \ + .attr = { .ca_name = __stringify(_name), .ca_mode = 0444, .ca_owner = THIS_MODULE }, \ + .show = _name##_show, \ +} + +#define __ATTR_NULL { .attr = { .name = NULL } } + +#define attr_name(_attr) (_attr).attr.name +#endif + + +struct configfs_subsystem { + struct config_group su_group; + struct semaphore su_sem; +}; + +static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group) +{ + return group ? + container_of(group, struct configfs_subsystem, su_group) : + NULL; +} + +int configfs_register_subsystem(struct configfs_subsystem *subsys); +void configfs_unregister_subsystem(struct configfs_subsystem *subsys); + +#endif /* __KERNEL__ */ + +#endif /* _CONFIGFS_H_ */ -- cgit v1.2.3 From 994fc28c7b1e697ac56befe4aecabf23f0689f46 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Thu, 15 Dec 2005 14:28:17 -0800 Subject: [PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE readpage(), prepare_write(), and commit_write() callers are updated to understand the special return code AOP_TRUNCATED_PAGE in the style of writepage() and WRITEPAGE_ACTIVATE. AOP_TRUNCATED_PAGE tells the caller that the callee has unlocked the page and that the operation should be tried again with a new page. OCFS2 uses this to detect and work around a lock inversion in its aop methods. There should be no change in behaviour for methods that don't return AOP_TRUNCATED_PAGE. WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are made enums so that kerneldoc can be used to document their semantics. Signed-off-by: Zach Brown --- drivers/block/loop.c | 23 +++++++++++---- drivers/block/rd.c | 4 +-- fs/mpage.c | 2 +- include/linux/fs.h | 31 ++++++++++++++++++++ include/linux/writeback.h | 6 ---- mm/filemap.c | 73 ++++++++++++++++++++++++++++++++--------------- mm/readahead.c | 15 ++++++---- mm/shmem.c | 2 +- mm/vmscan.c | 2 +- 9 files changed, 113 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 96c664af8d06..a452b13620a2 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, struct address_space_operations *aops = mapping->a_ops; pgoff_t index; unsigned offset, bv_offs; - int len, ret = 0; + int len, ret; down(&mapping->host->i_sem); index = pos >> PAGE_CACHE_SHIFT; @@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, page = grab_cache_page(mapping, index); if (unlikely(!page)) goto fail; - if (unlikely(aops->prepare_write(file, page, offset, - offset + size))) + ret = aops->prepare_write(file, page, offset, + offset + size); + if (unlikely(ret)) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } goto unlock; + } transfer_result = lo_do_transfer(lo, WRITE, page, offset, bvec->bv_page, bv_offs, size, IV); if (unlikely(transfer_result)) { @@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, kunmap_atomic(kaddr, KM_USER0); } flush_dcache_page(page); - if (unlikely(aops->commit_write(file, page, offset, - offset + size))) + ret = aops->commit_write(file, page, offset, + offset + size); + if (unlikely(ret)) { + if (ret == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } goto unlock; + } if (unlikely(transfer_result)) goto unlock; bv_offs += size; @@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec, unlock_page(page); page_cache_release(page); } + ret = 0; out: up(&mapping->host->i_sem); return ret; diff --git a/drivers/block/rd.c b/drivers/block/rd.c index 68c60a5bcdab..ffd6abd6d5a0 100644 --- a/drivers/block/rd.c +++ b/drivers/block/rd.c @@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page, /* * ->writepage to the the blockdev's mapping has to redirty the page so that the - * VM doesn't go and steal it. We return WRITEPAGE_ACTIVATE so that the VM + * VM doesn't go and steal it. We return AOP_WRITEPAGE_ACTIVATE so that the VM * won't try to (pointlessly) write the page again for a while. * * Really, these pages should not be on the LRU at all. @@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc) make_page_uptodate(page); SetPageDirty(page); if (wbc->for_reclaim) - return WRITEPAGE_ACTIVATE; + return AOP_WRITEPAGE_ACTIVATE; unlock_page(page); return 0; } diff --git a/fs/mpage.c b/fs/mpage.c index c5adcdddf3cc..f1d2d02bd4c8 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -721,7 +721,7 @@ retry: &last_block_in_bio, &ret, wbc, page->mapping->a_ops->writepage); } - if (unlikely(ret == WRITEPAGE_ACTIVATE)) + if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE)) unlock_page(page); if (ret || (--(wbc->nr_to_write) <= 0)) done = 1; diff --git a/include/linux/fs.h b/include/linux/fs.h index cc35b6ac778d..ed9a41a71e8b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -302,6 +302,37 @@ struct iattr { */ #include +/** + * enum positive_aop_returns - aop return codes with specific semantics + * + * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has + * completed, that the page is still locked, and + * should be considered active. The VM uses this hint + * to return the page to the active list -- it won't + * be a candidate for writeback again in the near + * future. Other callers must be careful to unlock + * the page if they get this return. Returned by + * writepage(); + * + * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has + * unlocked it and the page might have been truncated. + * The caller should back up to acquiring a new page and + * trying again. The aop will be taking reasonable + * precautions not to livelock. If the caller held a page + * reference, it should drop it before retrying. Returned + * by readpage(), prepare_write(), and commit_write(). + * + * address_space_operation functions return these large constants to indicate + * special semantics to the caller. These are much larger than the bytes in a + * page to allow for functions that return the number of bytes operated on in a + * given page. + */ + +enum positive_aop_returns { + AOP_WRITEPAGE_ACTIVATE = 0x80000, + AOP_TRUNCATED_PAGE = 0x80001, +}; + /* * oh the beauties of C type declarations. */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 343d883d69c5..64a36ba43b2f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -59,12 +59,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ }; -/* - * ->writepage() return values (make these much larger than a pagesize, in - * case some fs is returning number-of-bytes-written from writepage) - */ -#define WRITEPAGE_ACTIVATE 0x80000 /* IO was not started: activate page */ - /* * fs/fs-writeback.c */ diff --git a/mm/filemap.c b/mm/filemap.c index 33a28bfde158..6e1d08a2b8b9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -831,8 +831,13 @@ readpage: /* Start the actual read. The read will unlock the page. */ error = mapping->a_ops->readpage(filp, page); - if (unlikely(error)) + if (unlikely(error)) { + if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto find_page; + } goto readpage_error; + } if (!PageUptodate(page)) { lock_page(page); @@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset) { struct address_space *mapping = file->f_mapping; struct page *page; - int error; + int ret; - page = page_cache_alloc_cold(mapping); - if (!page) - return -ENOMEM; + do { + page = page_cache_alloc_cold(mapping); + if (!page) + return -ENOMEM; + + ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); + if (ret == 0) + ret = mapping->a_ops->readpage(file, page); + else if (ret == -EEXIST) + ret = 0; /* losing race to add is OK */ - error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL); - if (!error) { - error = mapping->a_ops->readpage(file, page); page_cache_release(page); - return error; - } - /* - * We arrive here in the unlikely event that someone - * raced with us and added our page to the cache first - * or we are out of memory for radix-tree nodes. - */ - page_cache_release(page); - return error == -EEXIST ? 0 : error; + } while (ret == AOP_TRUNCATED_PAGE); + + return ret; } #define MMAP_LOTSAMISS (100) @@ -1331,10 +1334,14 @@ page_not_uptodate: goto success; } - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1358,10 +1365,14 @@ page_not_uptodate: goto success; } ClearPageError(page); - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1444,10 +1455,14 @@ page_not_uptodate: goto success; } - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1470,10 +1485,14 @@ page_not_uptodate: } ClearPageError(page); - if (!mapping->a_ops->readpage(file, page)) { + error = mapping->a_ops->readpage(file, page); + if (!error) { wait_on_page_locked(page); if (PageUptodate(page)) goto success; + } else if (error == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + goto retry_find; } /* @@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, status = a_ops->prepare_write(file, page, offset, offset+bytes); if (unlikely(status)) { loff_t isize = i_size_read(inode); + + if (status != AOP_TRUNCATED_PAGE) + unlock_page(page); + page_cache_release(page); + if (status == AOP_TRUNCATED_PAGE) + continue; /* * prepare_write() may have instantiated a few blocks * outside i_size. Trim these off again. */ - unlock_page(page); - page_cache_release(page); if (pos + bytes > isize) vmtruncate(inode, isize); break; @@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov, cur_iov, iov_base, bytes); flush_dcache_page(page); status = a_ops->commit_write(file, page, offset, offset+bytes); + if (status == AOP_TRUNCATED_PAGE) { + page_cache_release(page); + continue; + } if (likely(copied > 0)) { if (!status) status = copied; diff --git a/mm/readahead.c b/mm/readahead.c index 72e7adbb87c7..8d6eeaaa6296 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp, { unsigned page_idx; struct pagevec lru_pvec; - int ret = 0; + int ret; if (mapping->a_ops->readpages) { ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages); @@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp, list_del(&page->lru); if (!add_to_page_cache(page, mapping, page->index, GFP_KERNEL)) { - mapping->a_ops->readpage(filp, page); - if (!pagevec_add(&lru_pvec, page)) - __pagevec_lru_add(&lru_pvec); - } else { - page_cache_release(page); + ret = mapping->a_ops->readpage(filp, page); + if (ret != AOP_TRUNCATED_PAGE) { + if (!pagevec_add(&lru_pvec, page)) + __pagevec_lru_add(&lru_pvec); + continue; + } /* else fall through to release */ } + page_cache_release(page); } pagevec_lru_add(&lru_pvec); + ret = 0; out: return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index dc25565a61e9..d9fc277940da 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -855,7 +855,7 @@ unlock: swap_free(swap); redirty: set_page_dirty(page); - return WRITEPAGE_ACTIVATE; /* Return with the page locked */ + return AOP_WRITEPAGE_ACTIVATE; /* Return with the page locked */ } #ifdef CONFIG_NUMA diff --git a/mm/vmscan.c b/mm/vmscan.c index b0cd81c32de6..795a050fe471 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) res = mapping->a_ops->writepage(page, &wbc); if (res < 0) handle_write_error(mapping, page, res); - if (res == WRITEPAGE_ACTIVATE) { + if (res == AOP_WRITEPAGE_ACTIVATE) { ClearPageReclaim(page); return PAGE_ACTIVATE; } -- cgit v1.2.3