diff options
Diffstat (limited to 'fs/nilfs2')
-rw-r--r-- | fs/nilfs2/alloc.c | 104 | ||||
-rw-r--r-- | fs/nilfs2/alloc.h | 2 | ||||
-rw-r--r-- | fs/nilfs2/bmap.c | 15 | ||||
-rw-r--r-- | fs/nilfs2/bmap.h | 20 | ||||
-rw-r--r-- | fs/nilfs2/btnode.c | 129 | ||||
-rw-r--r-- | fs/nilfs2/btree.c | 27 | ||||
-rw-r--r-- | fs/nilfs2/btree.h | 1 | ||||
-rw-r--r-- | fs/nilfs2/cpfile.c | 389 | ||||
-rw-r--r-- | fs/nilfs2/cpfile.h | 10 | ||||
-rw-r--r-- | fs/nilfs2/dat.c | 57 | ||||
-rw-r--r-- | fs/nilfs2/dir.c | 285 | ||||
-rw-r--r-- | fs/nilfs2/direct.c | 3 | ||||
-rw-r--r-- | fs/nilfs2/file.c | 28 | ||||
-rw-r--r-- | fs/nilfs2/gcinode.c | 5 | ||||
-rw-r--r-- | fs/nilfs2/ifile.c | 21 | ||||
-rw-r--r-- | fs/nilfs2/ifile.h | 10 | ||||
-rw-r--r-- | fs/nilfs2/inode.c | 181 | ||||
-rw-r--r-- | fs/nilfs2/ioctl.c | 123 | ||||
-rw-r--r-- | fs/nilfs2/mdt.c | 97 | ||||
-rw-r--r-- | fs/nilfs2/namei.c | 62 | ||||
-rw-r--r-- | fs/nilfs2/nilfs.h | 52 | ||||
-rw-r--r-- | fs/nilfs2/page.c | 217 | ||||
-rw-r--r-- | fs/nilfs2/page.h | 25 | ||||
-rw-r--r-- | fs/nilfs2/recovery.c | 40 | ||||
-rw-r--r-- | fs/nilfs2/segbuf.c | 4 | ||||
-rw-r--r-- | fs/nilfs2/segment.c | 585 | ||||
-rw-r--r-- | fs/nilfs2/segment.h | 10 | ||||
-rw-r--r-- | fs/nilfs2/sufile.c | 149 | ||||
-rw-r--r-- | fs/nilfs2/super.c | 438 | ||||
-rw-r--r-- | fs/nilfs2/sysfs.c | 6 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.c | 35 | ||||
-rw-r--r-- | fs/nilfs2/the_nilfs.h | 12 |
32 files changed, 1673 insertions, 1469 deletions
diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 25881bdd212b..ba50388ee4bf 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -529,60 +529,62 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, ret = nilfs_palloc_get_desc_block(inode, group, 1, &desc_bh); if (ret < 0) return ret; - desc_kaddr = kmap(desc_bh->b_page); + desc_kaddr = kmap_local_page(desc_bh->b_page); desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); n = nilfs_palloc_rest_groups_in_desc_block(inode, group, maxgroup); - for (j = 0; j < n; j++, desc++, group++) { + for (j = 0; j < n; j++, desc++, group++, group_offset = 0) { lock = nilfs_mdt_bgl_lock(inode, group); - if (nilfs_palloc_group_desc_nfrees(desc, lock) > 0) { - ret = nilfs_palloc_get_bitmap_block( - inode, group, 1, &bitmap_bh); - if (ret < 0) - goto out_desc; - bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = bitmap_kaddr + bh_offset(bitmap_bh); - pos = nilfs_palloc_find_available_slot( - bitmap, group_offset, - entries_per_group, lock, wrap); - /* - * Since the search for a free slot in the - * second and subsequent bitmap blocks always - * starts from the beginning, the wrap flag - * only has an effect on the first search. - */ - if (pos >= 0) { - /* found a free entry */ - nilfs_palloc_group_desc_add_entries( - desc, lock, -1); - req->pr_entry_nr = - entries_per_group * group + pos; - kunmap(desc_bh->b_page); - kunmap(bitmap_bh->b_page); - - req->pr_desc_bh = desc_bh; - req->pr_bitmap_bh = bitmap_bh; - return 0; - } - kunmap(bitmap_bh->b_page); - brelse(bitmap_bh); + if (nilfs_palloc_group_desc_nfrees(desc, lock) == 0) + continue; + + kunmap_local(desc_kaddr); + ret = nilfs_palloc_get_bitmap_block(inode, group, 1, + &bitmap_bh); + if (unlikely(ret < 0)) { + brelse(desc_bh); + return ret; } - group_offset = 0; + desc_kaddr = kmap_local_page(desc_bh->b_page); + desc = nilfs_palloc_block_get_group_desc( + inode, group, desc_bh, desc_kaddr); + + bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); + pos = nilfs_palloc_find_available_slot( + bitmap, group_offset, entries_per_group, lock, + wrap); + /* + * Since the search for a free slot in the second and + * subsequent bitmap blocks always starts from the + * beginning, the wrap flag only has an effect on the + * first search. + */ + kunmap_local(bitmap_kaddr); + if (pos >= 0) + goto found; + + brelse(bitmap_bh); } - kunmap(desc_bh->b_page); + kunmap_local(desc_kaddr); brelse(desc_bh); } /* no entries left */ return -ENOSPC; - out_desc: - kunmap(desc_bh->b_page); - brelse(desc_bh); - return ret; +found: + /* found a free entry */ + nilfs_palloc_group_desc_add_entries(desc, lock, -1); + req->pr_entry_nr = entries_per_group * group + pos; + kunmap_local(desc_kaddr); + + req->pr_desc_bh = desc_bh; + req->pr_bitmap_bh = bitmap_bh; + return 0; } /** @@ -616,10 +618,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); + desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page); desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); + bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); lock = nilfs_mdt_bgl_lock(inode, group); @@ -631,8 +633,8 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap_kaddr); + kunmap_local(desc_kaddr); mark_buffer_dirty(req->pr_desc_bh); mark_buffer_dirty(req->pr_bitmap_bh); @@ -657,10 +659,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, spinlock_t *lock; group = nilfs_palloc_group(inode, req->pr_entry_nr, &group_offset); - desc_kaddr = kmap(req->pr_desc_bh->b_page); + desc_kaddr = kmap_local_page(req->pr_desc_bh->b_page); desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); - bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); + bitmap_kaddr = kmap_local_page(req->pr_bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); lock = nilfs_mdt_bgl_lock(inode, group); @@ -672,8 +674,8 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, else nilfs_palloc_group_desc_add_entries(desc, lock, 1); - kunmap(req->pr_bitmap_bh->b_page); - kunmap(req->pr_desc_bh->b_page); + kunmap_local(bitmap_kaddr); + kunmap_local(desc_kaddr); brelse(req->pr_bitmap_bh); brelse(req->pr_desc_bh); @@ -765,7 +767,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) /* Get the first entry number of the group */ group_min_nr = (__u64)group * epg; - bitmap_kaddr = kmap(bitmap_bh->b_page); + bitmap_kaddr = kmap_local_page(bitmap_bh->b_page); bitmap = bitmap_kaddr + bh_offset(bitmap_bh); lock = nilfs_mdt_bgl_lock(inode, group); @@ -811,7 +813,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) entry_start = rounddown(group_offset, epb); } while (true); - kunmap(bitmap_bh->b_page); + kunmap_local(bitmap_kaddr); mark_buffer_dirty(bitmap_bh); brelse(bitmap_bh); @@ -825,11 +827,11 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) inode->i_ino); } - desc_kaddr = kmap_atomic(desc_bh->b_page); + desc_kaddr = kmap_local_page(desc_bh->b_page); desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); nfree = nilfs_palloc_group_desc_add_entries(desc, lock, n); - kunmap_atomic(desc_kaddr); + kunmap_local(desc_kaddr); mark_buffer_dirty(desc_bh); nilfs_mdt_mark_dirty(inode); brelse(desc_bh); diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index d825a9faca6d..e19d7eb10084 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -37,7 +37,7 @@ void *nilfs_palloc_block_get_entry(const struct inode *, __u64, int nilfs_palloc_count_max_entries(struct inode *, u64, u64 *); /** - * nilfs_palloc_req - persistent allocator request and reply + * struct nilfs_palloc_req - persistent allocator request and reply * @pr_entry_nr: entry number (vblocknr or inode number) * @pr_desc_bh: buffer head of the buffer containing block group descriptors * @pr_bitmap_bh: buffer head of the buffer containing a block group bitmap diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 7a8f166f2c8d..c9e8d9a7d820 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -349,7 +349,7 @@ int nilfs_bmap_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) } /** - * nilfs_bmap_lookup_dirty_buffers - + * nilfs_bmap_lookup_dirty_buffers - collect dirty block buffers * @bmap: bmap * @listp: pointer to buffer head list */ @@ -450,15 +450,9 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, const struct buffer_head *bh) { - struct buffer_head *pbh; - __u64 key; + loff_t pos = folio_pos(bh->b_folio) + bh_offset(bh); - key = page_index(bh->b_page) << (PAGE_SHIFT - - bmap->b_inode->i_blkbits); - for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page) - key++; - - return key; + return pos >> bmap->b_inode->i_blkbits; } __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key) @@ -548,13 +542,10 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) */ void nilfs_bmap_write(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode) { - down_write(&bmap->b_sem); memcpy(raw_inode->i_bmap, bmap->b_u.u_data, NILFS_INODE_BMAP_SIZE * sizeof(__le64)); if (bmap->b_inode->i_ino == NILFS_DAT_INO) bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT; - - up_write(&bmap->b_sem); } void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 608168a5cb88..4656df392722 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -44,6 +44,19 @@ struct nilfs_bmap_stats { /** * struct nilfs_bmap_operations - bmap operation table + * @bop_lookup: single block search operation + * @bop_lookup_contig: consecutive block search operation + * @bop_insert: block insertion operation + * @bop_delete: block delete operation + * @bop_clear: block mapping resource release operation + * @bop_propagate: operation to propagate dirty state towards the + * mapping root + * @bop_lookup_dirty_buffers: operation to collect dirty block buffers + * @bop_assign: disk block address assignment operation + * @bop_mark: operation to mark in-use blocks as dirty for + * relocation by GC + * @bop_seek_key: find valid block key operation + * @bop_last_key: find last valid block key operation */ struct nilfs_bmap_operations { int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *); @@ -66,7 +79,7 @@ struct nilfs_bmap_operations { int (*bop_seek_key)(const struct nilfs_bmap *, __u64, __u64 *); int (*bop_last_key)(const struct nilfs_bmap *, __u64 *); - /* The following functions are internal use only. */ + /* private: internal use only */ int (*bop_check_insert)(const struct nilfs_bmap *, __u64); int (*bop_check_delete)(struct nilfs_bmap *, __u64); int (*bop_gather_data)(struct nilfs_bmap *, __u64 *, __u64 *, int); @@ -74,9 +87,8 @@ struct nilfs_bmap_operations { #define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64)) -#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */) -#define NILFS_BMAP_NEW_PTR_INIT \ - (1UL << (sizeof(unsigned long) * 8 /* CHAR_BIT */ - 1)) +#define NILFS_BMAP_KEY_BIT BITS_PER_LONG +#define NILFS_BMAP_NEW_PTR_INIT (1UL << (BITS_PER_LONG - 1)) static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) { diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 3dda7c39089b..54a3fa0cf67e 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -73,13 +73,13 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) set_buffer_mapped(bh); set_buffer_uptodate(bh); - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); return bh; failed: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); return ERR_PTR(-EIO); } @@ -90,7 +90,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, { struct buffer_head *bh; struct inode *inode = btnc->host; - struct page *page; + struct folio *folio; int err; bh = nilfs_grab_buffer(inode, btnc, blocknr, BIT(BH_NILFS_Node)); @@ -98,7 +98,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ - page = bh->b_page; + folio = bh->b_folio; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -144,8 +144,8 @@ found: *pbh = bh; out_locked: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return err; } @@ -159,30 +159,51 @@ out_locked: void nilfs_btnode_delete(struct buffer_head *bh) { struct address_space *mapping; - struct page *page = bh->b_page; - pgoff_t index = page_index(page); + struct folio *folio = bh->b_folio; + pgoff_t index = folio->index; int still_dirty; - get_page(page); - lock_page(page); - wait_on_page_writeback(page); + folio_get(folio); + folio_lock(folio); + folio_wait_writeback(folio); nilfs_forget_buffer(bh); - still_dirty = PageDirty(page); - mapping = page->mapping; - unlock_page(page); - put_page(page); + still_dirty = folio_test_dirty(folio); + mapping = folio->mapping; + folio_unlock(folio); + folio_put(folio); if (!still_dirty && mapping) invalidate_inode_pages2_range(mapping, index, index); } /** - * nilfs_btnode_prepare_change_key - * prepare to move contents of the block for old key to one of new key. - * the old buffer will not be removed, but might be reused for new buffer. - * it might return -ENOMEM because of memory allocation errors, - * and might return -EIO because of disk read errors. + * nilfs_btnode_prepare_change_key - prepare to change the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_prepare_change_key() prepares to move the contents of the + * b-tree node block of the old key given in the "oldkey" member of @ctxt to + * the position of the new key given in the "newkey" member of @ctxt in the + * page cache @btnc. Here, the key of the block is an index in units of + * blocks, and if the page and block sizes match, it matches the page index + * in the page cache. + * + * If the page size and block size match, this function attempts to move the + * entire folio, and in preparation for this, inserts the original folio into + * the new index of the cache. If this insertion fails or if the page size + * and block size are different, it falls back to a copy preparation using + * nilfs_btnode_create_block(), inserts a new block at the position + * corresponding to "newkey", and stores the buffer head pointer in the + * "newbh" member of @ctxt. + * + * Note that the current implementation does not support folio sizes larger + * than the page size. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EIO - I/O error (metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_btnode_prepare_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -199,23 +220,23 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc, ctxt->newbh = NULL; if (inode->i_blkbits == PAGE_SHIFT) { - struct page *opage = obh->b_page; - lock_page(opage); + struct folio *ofolio = obh->b_folio; + folio_lock(ofolio); retry: /* BUG_ON(oldkey != obh->b_folio->index); */ - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); xa_lock_irq(&btnc->i_pages); - err = __xa_insert(&btnc->i_pages, newkey, opage, GFP_NOFS); + err = __xa_insert(&btnc->i_pages, newkey, ofolio, GFP_NOFS); xa_unlock_irq(&btnc->i_pages); /* - * Note: page->index will not change to newkey until + * Note: folio->index will not change to newkey until * nilfs_btnode_commit_change_key() will be called. - * To protect the page in intermediate state, the page lock + * To protect the folio in intermediate state, the folio lock * is held. */ if (!err) @@ -227,7 +248,7 @@ retry: if (!err) goto retry; /* fallback to copy mode */ - unlock_page(opage); + folio_unlock(ofolio); } nbh = nilfs_btnode_create_block(btnc, newkey); @@ -239,28 +260,41 @@ retry: return 0; failed_unlock: - unlock_page(obh->b_page); + folio_unlock(obh->b_folio); return err; } /** - * nilfs_btnode_commit_change_key - * commit the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_commit_change_key - commit the change of the search key of + * a b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_commit_change_key() executes the key change based on the + * context @ctxt prepared by nilfs_btnode_prepare_change_key(). If no valid + * block buffer is prepared in "newbh" of @ctxt (i.e., a full folio move), + * this function removes the folio from the old index and completes the move. + * Otherwise, it copies the block data and inherited flag states of "oldbh" + * to "newbh" and clears the "oldbh" from the cache. In either case, the + * relocated buffer is marked as dirty. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_commit_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) { struct buffer_head *obh = ctxt->bh, *nbh = ctxt->newbh; __u64 oldkey = ctxt->oldkey, newkey = ctxt->newkey; - struct page *opage; + struct folio *ofolio; if (oldkey == newkey) return; if (nbh == NULL) { /* blocksize == pagesize */ - opage = obh->b_page; - if (unlikely(oldkey != opage->index)) - NILFS_PAGE_BUG(opage, + ofolio = obh->b_folio; + if (unlikely(oldkey != ofolio->index)) + NILFS_FOLIO_BUG(ofolio, "invalid oldkey %lld (newkey=%lld)", (unsigned long long)oldkey, (unsigned long long)newkey); @@ -271,8 +305,8 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, __xa_set_mark(&btnc->i_pages, newkey, PAGECACHE_TAG_DIRTY); xa_unlock_irq(&btnc->i_pages); - opage->index = obh->b_blocknr = newkey; - unlock_page(opage); + ofolio->index = obh->b_blocknr = newkey; + folio_unlock(ofolio); } else { nilfs_copy_buffer(nbh, obh); mark_buffer_dirty(nbh); @@ -284,8 +318,19 @@ void nilfs_btnode_commit_change_key(struct address_space *btnc, } /** - * nilfs_btnode_abort_change_key - * abort the change_key operation prepared by prepare_change_key(). + * nilfs_btnode_abort_change_key - abort the change of the search key of a + * b-tree node block + * @btnc: page cache in which the b-tree node block is buffered + * @ctxt: structure for exchanging context information for key change + * + * nilfs_btnode_abort_change_key() cancels the key change associated with the + * context @ctxt prepared via nilfs_btnode_prepare_change_key() and performs + * any necessary cleanup. If no valid block buffer is prepared in "newbh" of + * @ctxt, this function removes the folio from the destination index and aborts + * the move. Otherwise, it clears "newbh" from the cache. + * + * As with nilfs_btnode_prepare_change_key(), the current implementation does + * not support folio sizes larger than the page size. */ void nilfs_btnode_abort_change_key(struct address_space *btnc, struct nilfs_btnode_chkey_ctxt *ctxt) @@ -298,7 +343,7 @@ void nilfs_btnode_abort_change_key(struct address_space *btnc, if (nbh == NULL) { /* blocksize == pagesize */ xa_erase_irq(&btnc->i_pages, newkey); - unlock_page(ctxt->bh->b_page); + folio_unlock(ctxt->bh->b_folio); } else { /* * When canceling a buffer that a prepare operation has diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index dbd27a44632f..9c51a4ac2627 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -1861,13 +1861,22 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, } /** - * nilfs_btree_convert_and_insert - - * @bmap: - * @key: - * @ptr: - * @keys: - * @ptrs: - * @n: + * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree + * @btree: NILFS B-tree structure + * @key: Key of the new entry to be inserted + * @ptr: Pointer (block number) associated with the key to be inserted + * @keys: Array of keys to be inserted in addition to @key + * @ptrs: Array of pointers associated with @keys + * @n: Number of keys and pointers in @keys and @ptrs + * + * This function is used to insert a new entry specified by @key and @ptr, + * along with additional entries specified by @keys and @ptrs arrays, into a + * NILFS B-tree. + * It prepares the necessary changes by allocating the required blocks and any + * necessary intermediate nodes. It converts configurations from other forms of + * block mapping (the one that currently exists is direct mapping) to a B-tree. + * + * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, @@ -2094,11 +2103,13 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree, ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { - if (unlikely(ret == -ENOENT)) + if (unlikely(ret == -ENOENT)) { nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); + ret = -EINVAL; + } goto out; } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 92868e1a48ca..2a220f716c91 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -24,6 +24,7 @@ * @bp_index: index of child node * @bp_oldreq: ptr end request for old ptr * @bp_newreq: ptr alloc request for new ptr + * @bp_ctxt: context information for changing the key of a b-tree node block * @bp_op: rebalance operation */ struct nilfs_btree_path { diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 9ebefb3acb0e..f0ce37552446 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -28,7 +28,7 @@ nilfs_cpfile_get_blkoff(const struct inode *cpfile, __u64 cno) { __u64 tcno = cno + NILFS_MDT(cpfile)->mi_first_entry_offset - 1; - do_div(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); + tcno = div64_ul(tcno, nilfs_cpfile_checkpoints_per_block(cpfile)); return (unsigned long)tcno; } @@ -125,10 +125,17 @@ static void nilfs_cpfile_block_init(struct inode *cpfile, } } -static inline int nilfs_cpfile_get_header_block(struct inode *cpfile, - struct buffer_head **bhp) +static int nilfs_cpfile_get_header_block(struct inode *cpfile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(cpfile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(cpfile->i_sb, + "missing header block in checkpoint metadata"); + err = -EIO; + } + return err; } static inline int nilfs_cpfile_get_checkpoint_block(struct inode *cpfile, @@ -187,35 +194,90 @@ static inline int nilfs_cpfile_delete_checkpoint_block(struct inode *cpfile, } /** - * nilfs_cpfile_get_checkpoint - get a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @create: create flag - * @cpp: pointer to a checkpoint - * @bhp: pointer to a buffer head - * - * Description: nilfs_cpfile_get_checkpoint() acquires the checkpoint - * specified by @cno. A new checkpoint will be created if @cno is the current - * checkpoint number and @create is nonzero. + * nilfs_cpfile_read_checkpoint - read a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint entry to read + * @root: nilfs root object + * @ifile: ifile's inode to read and attach to @root * - * Return Value: On success, 0 is returned, and the checkpoint and the - * buffer head of the buffer on which the checkpoint is located are stored in - * the place pointed by @cpp and @bhp, respectively. On error, one of the - * following negative error codes is returned. + * This function imports checkpoint information from the checkpoint file and + * stores it to the inode file given by @ifile and the nilfs root object + * given by @root. * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + */ +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile) +{ + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + void *kaddr; + int ret; + + if (cno < 1 || cno > nilfs_mdt_cno(cpfile)) + return -EINVAL; + + down_read(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + ret = -EINVAL; + goto out_sem; + } + + kaddr = kmap_local_page(cp_bh->b_page); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + if (nilfs_checkpoint_invalid(cp)) { + ret = -EINVAL; + goto put_cp; + } + + ret = nilfs_read_inode_common(ifile, &cp->cp_ifile_inode); + if (unlikely(ret)) { + /* + * Since this inode is on a checkpoint entry, treat errors + * as metadata corruption. + */ + nilfs_err(cpfile->i_sb, + "ifile inode (checkpoint number=%llu) corrupted", + (unsigned long long)cno); + ret = -EIO; + goto put_cp; + } + + /* Configure the nilfs root object */ + atomic64_set(&root->inodes_count, le64_to_cpu(cp->cp_inodes_count)); + atomic64_set(&root->blocks_count, le64_to_cpu(cp->cp_blocks_count)); + root->ifile = ifile; + +put_cp: + kunmap_local(kaddr); + brelse(cp_bh); +out_sem: + up_read(&NILFS_MDT(cpfile)->mi_sem); + return ret; +} + +/** + * nilfs_cpfile_create_checkpoint - create a checkpoint entry on cpfile + * @cpfile: checkpoint file inode + * @cno: number of checkpoint to set up * - * %-ENOENT - No such checkpoint. + * This function creates a checkpoint with the number specified by @cno on + * cpfile. If the specified checkpoint entry already exists due to a past + * failure, it will be reused without returning an error. + * In either case, the buffer of the block containing the checkpoint entry + * and the cpfile inode are made dirty for inclusion in the write log. * - * %-EINVAL - invalid checkpoint. + * Return: 0 on success, or the following negative error code on failure. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-EROFS - Read only filesystem */ -int nilfs_cpfile_get_checkpoint(struct inode *cpfile, - __u64 cno, - int create, - struct nilfs_checkpoint **cpp, - struct buffer_head **bhp) +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno) { struct buffer_head *header_bh, *cp_bh; struct nilfs_cpfile_header *header; @@ -223,70 +285,123 @@ int nilfs_cpfile_get_checkpoint(struct inode *cpfile, void *kaddr; int ret; - if (unlikely(cno < 1 || cno > nilfs_mdt_cno(cpfile) || - (cno < nilfs_mdt_cno(cpfile) && create))) - return -EINVAL; + if (WARN_ON_ONCE(cno < 1)) + return -EIO; down_write(&NILFS_MDT(cpfile)->mi_sem); - ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); - if (ret < 0) + if (unlikely(ret < 0)) goto out_sem; - ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, create, &cp_bh); - if (ret < 0) + + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 1, &cp_bh); + if (unlikely(ret < 0)) goto out_header; - kaddr = kmap(cp_bh->b_page); + + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { - if (!create) { - kunmap(cp_bh->b_page); - brelse(cp_bh); - ret = -ENOENT; - goto out_header; - } /* a newly-created checkpoint */ nilfs_checkpoint_clear_invalid(cp); if (!nilfs_cpfile_is_in_first(cpfile, cno)) nilfs_cpfile_block_add_valid_checkpoints(cpfile, cp_bh, kaddr, 1); - mark_buffer_dirty(cp_bh); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, 1); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(header_bh); - nilfs_mdt_mark_dirty(cpfile); + } else { + kunmap_local(kaddr); } - if (cpp != NULL) - *cpp = cp; - *bhp = cp_bh; + /* Force the buffer and the inode to become dirty */ + mark_buffer_dirty(cp_bh); + brelse(cp_bh); + nilfs_mdt_mark_dirty(cpfile); - out_header: +out_header: brelse(header_bh); - out_sem: +out_sem: up_write(&NILFS_MDT(cpfile)->mi_sem); return ret; } /** - * nilfs_cpfile_put_checkpoint - put a checkpoint - * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * @bh: buffer head + * nilfs_cpfile_finalize_checkpoint - fill in a checkpoint entry in cpfile + * @cpfile: checkpoint file inode + * @cno: checkpoint number + * @root: nilfs root object + * @blkinc: number of blocks added by this checkpoint + * @ctime: checkpoint creation time + * @minor: minor checkpoint flag * - * Description: nilfs_cpfile_put_checkpoint() releases the checkpoint - * specified by @cno. @bh must be the buffer head which has been returned by - * a previous call to nilfs_cpfile_get_checkpoint() with @cno. + * This function completes the checkpoint entry numbered by @cno in the + * cpfile with the data given by the arguments @root, @blkinc, @ctime, and + * @minor. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ -void nilfs_cpfile_put_checkpoint(struct inode *cpfile, __u64 cno, - struct buffer_head *bh) +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor) { - kunmap(bh->b_page); - brelse(bh); + struct buffer_head *cp_bh; + struct nilfs_checkpoint *cp; + void *kaddr; + int ret; + + if (WARN_ON_ONCE(cno < 1)) + return -EIO; + + down_write(&NILFS_MDT(cpfile)->mi_sem); + ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); + if (unlikely(ret < 0)) { + if (ret == -ENOENT) + goto error; + goto out_sem; + } + + kaddr = kmap_local_page(cp_bh->b_page); + cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); + if (unlikely(nilfs_checkpoint_invalid(cp))) { + kunmap_local(kaddr); + brelse(cp_bh); + goto error; + } + + cp->cp_snapshot_list.ssl_next = 0; + cp->cp_snapshot_list.ssl_prev = 0; + cp->cp_inodes_count = cpu_to_le64(atomic64_read(&root->inodes_count)); + cp->cp_blocks_count = cpu_to_le64(atomic64_read(&root->blocks_count)); + cp->cp_nblk_inc = cpu_to_le64(blkinc); + cp->cp_create = cpu_to_le64(ctime); + cp->cp_cno = cpu_to_le64(cno); + + if (minor) + nilfs_checkpoint_set_minor(cp); + else + nilfs_checkpoint_clear_minor(cp); + + nilfs_write_inode_common(root->ifile, &cp->cp_ifile_inode); + nilfs_bmap_write(NILFS_I(root->ifile)->i_bmap, &cp->cp_ifile_inode); + + kunmap_local(kaddr); + brelse(cp_bh); +out_sem: + up_write(&NILFS_MDT(cpfile)->mi_sem); + return ret; + +error: + nilfs_error(cpfile->i_sb, + "checkpoint finalization failed due to metadata corruption."); + ret = -EIO; + goto out_sem; } /** @@ -347,7 +462,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, continue; } - kaddr = kmap_atomic(cp_bh->b_page); + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint( cpfile, cno, cp_bh, kaddr); nicps = 0; @@ -369,7 +484,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, cpfile, cp_bh, kaddr, nicps); if (count == 0) { /* make hole */ - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(cp_bh); ret = nilfs_cpfile_delete_checkpoint_block( @@ -384,18 +499,18 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile, } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(cp_bh); } if (tnicps > 0) { - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_ncheckpoints, -(u64)tnicps); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(cpfile); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } brelse(header_bh); @@ -447,7 +562,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, } ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, cur_cno); - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) { if (!nilfs_checkpoint_invalid(cp)) { @@ -457,7 +572,7 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop, n++; } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); } @@ -491,10 +606,10 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); curr = le64_to_cpu(header->ch_snapshot_list.ssl_next); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); if (curr == 0) { ret = 0; @@ -512,7 +627,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, ret = 0; /* No snapshots (started from a hole block) */ goto out; } - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); while (n < nci) { cp = nilfs_cpfile_block_get_checkpoint(cpfile, curr, bh, kaddr); curr = ~(__u64)0; /* Terminator */ @@ -528,7 +643,7 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, next_blkoff = nilfs_cpfile_get_blkoff(cpfile, next); if (curr_blkoff != next_blkoff) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, next, 0, &bh); @@ -536,12 +651,12 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, WARN_ON(ret == -ENOENT); goto out; } - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); } curr = next; curr_blkoff = next_blkoff; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); *cnop = curr; ret = n; @@ -552,11 +667,29 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop, } /** - * nilfs_cpfile_get_cpinfo - - * @cpfile: - * @cno: - * @ci: - * @nci: + * nilfs_cpfile_get_cpinfo - get information on checkpoints + * @cpfile: checkpoint file inode + * @cnop: place to pass a starting checkpoint number and receive a + * checkpoint number to continue the search + * @mode: mode of checkpoints that the caller wants to retrieve + * @buf: buffer for storing checkpoints' information + * @cisz: byte size of one checkpoint info item in array + * @nci: number of checkpoint info items to retrieve + * + * nilfs_cpfile_get_cpinfo() searches for checkpoints in @mode state + * starting from the checkpoint number stored in @cnop, and stores + * information about found checkpoints in @buf. + * The buffer pointed to by @buf must be large enough to store information + * for @nci checkpoints. If at least one checkpoint information is + * successfully retrieved, @cnop is updated to point to the checkpoint + * number to continue searching. + * + * Return: Count of checkpoint info items stored in the output buffer on + * success, or the following negative error code on failure. + * * %-EINVAL - Invalid checkpoint mode. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - Invalid checkpoint number specified. */ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, @@ -573,9 +706,15 @@ ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode, } /** - * nilfs_cpfile_delete_checkpoint - - * @cpfile: - * @cno: + * nilfs_cpfile_delete_checkpoint - delete a checkpoint + * @cpfile: checkpoint file inode + * @cno: checkpoint number to delete + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EBUSY - Checkpoint in use (snapshot specified). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No valid checkpoint found. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno) { @@ -632,24 +771,24 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(kaddr); goto out_cp; } if (nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(kaddr); goto out_cp; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) goto out_cp; - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); list = &header->ch_snapshot_list; curr_bh = header_bh; @@ -661,13 +800,13 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) prev_blkoff = nilfs_cpfile_get_blkoff(cpfile, prev); curr = prev; if (curr_blkoff != prev_blkoff) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(curr_bh); ret = nilfs_cpfile_get_checkpoint_block(cpfile, curr, 0, &curr_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(curr_bh->b_page); + kaddr = kmap_local_page(curr_bh->b_page); } curr_blkoff = prev_blkoff; cp = nilfs_cpfile_block_get_checkpoint( @@ -675,7 +814,7 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) list = &cp->cp_snapshot_list; prev = le64_to_cpu(list->ssl_prev); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (prev != 0) { ret = nilfs_cpfile_get_checkpoint_block(cpfile, prev, 0, @@ -687,29 +826,29 @@ static int nilfs_cpfile_set_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(curr_bh->b_page); + kaddr = kmap_local_page(curr_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, curr, curr_bh, kaddr); list->ssl_prev = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(cp_bh->b_page); + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(curr); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(prev); nilfs_checkpoint_set_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(prev_bh->b_page); + kaddr = kmap_local_page(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(cno); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, 1); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(prev_bh); mark_buffer_dirty(curr_bh); @@ -750,23 +889,23 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &cp_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(cp_bh->b_page); + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); if (nilfs_checkpoint_invalid(cp)) { ret = -ENOENT; - kunmap_atomic(kaddr); + kunmap_local(kaddr); goto out_cp; } if (!nilfs_checkpoint_snapshot(cp)) { ret = 0; - kunmap_atomic(kaddr); + kunmap_local(kaddr); goto out_cp; } list = &cp->cp_snapshot_list; next = le64_to_cpu(list->ssl_next); prev = le64_to_cpu(list->ssl_prev); - kunmap_atomic(kaddr); + kunmap_local(kaddr); ret = nilfs_cpfile_get_header_block(cpfile, &header_bh); if (ret < 0) @@ -790,29 +929,29 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) get_bh(prev_bh); } - kaddr = kmap_atomic(next_bh->b_page); + kaddr = kmap_local_page(next_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, next, next_bh, kaddr); list->ssl_prev = cpu_to_le64(prev); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(prev_bh->b_page); + kaddr = kmap_local_page(prev_bh->b_page); list = nilfs_cpfile_block_get_snapshot_list( cpfile, prev, prev_bh, kaddr); list->ssl_next = cpu_to_le64(next); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(cp_bh->b_page); + kaddr = kmap_local_page(cp_bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, cp_bh, kaddr); cp->cp_snapshot_list.ssl_next = cpu_to_le64(0); cp->cp_snapshot_list.ssl_prev = cpu_to_le64(0); nilfs_checkpoint_clear_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, header_bh, kaddr); le64_add_cpu(&header->ch_nsnapshots, -1); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(next_bh); mark_buffer_dirty(prev_bh); @@ -837,21 +976,15 @@ static int nilfs_cpfile_clear_snapshot(struct inode *cpfile, __u64 cno) } /** - * nilfs_cpfile_is_snapshot - + * nilfs_cpfile_is_snapshot - determine if checkpoint is a snapshot * @cpfile: inode of checkpoint file - * @cno: checkpoint number - * - * Description: - * - * Return Value: On success, 1 is returned if the checkpoint specified by - * @cno is a snapshot, or 0 if not. On error, one of the following negative - * error codes is returned. + * @cno: checkpoint number * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-ENOENT - No such checkpoint. + * Return: 1 if the checkpoint specified by @cno is a snapshot, 0 if not, or + * the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOENT - No such checkpoint. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) { @@ -871,13 +1004,13 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) ret = nilfs_cpfile_get_checkpoint_block(cpfile, cno, 0, &bh); if (ret < 0) goto out; - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr); if (nilfs_checkpoint_invalid(cp)) ret = -ENOENT; else ret = nilfs_checkpoint_snapshot(cp); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); out: @@ -954,12 +1087,12 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) ret = nilfs_cpfile_get_header_block(cpfile, &bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); header = nilfs_cpfile_block_get_header(cpfile, bh, kaddr); cpstat->cs_cno = nilfs_mdt_cno(cpfile); cpstat->cs_ncps = le64_to_cpu(header->ch_ncheckpoints); cpstat->cs_nsss = le64_to_cpu(header->ch_nsnapshots); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); out_sem: diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index edabb2dc5756..f5b1d59289eb 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -16,10 +16,12 @@ #include <linux/nilfs2_ondisk.h> /* nilfs_inode, nilfs_checkpoint */ -int nilfs_cpfile_get_checkpoint(struct inode *, __u64, int, - struct nilfs_checkpoint **, - struct buffer_head **); -void nilfs_cpfile_put_checkpoint(struct inode *, __u64, struct buffer_head *); +int nilfs_cpfile_read_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, struct inode *ifile); +int nilfs_cpfile_create_checkpoint(struct inode *cpfile, __u64 cno); +int nilfs_cpfile_finalize_checkpoint(struct inode *cpfile, __u64 cno, + struct nilfs_root *root, __u64 blkinc, + time64_t ctime, bool minor); int nilfs_cpfile_delete_checkpoints(struct inode *, __u64, __u64); int nilfs_cpfile_delete_checkpoint(struct inode *, __u64); int nilfs_cpfile_change_cpmode(struct inode *, __u64, int); diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 351010828d88..0bef662176a4 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -91,13 +91,13 @@ void nilfs_dat_commit_alloc(struct inode *dat, struct nilfs_palloc_req *req) struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MAX); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nilfs_palloc_commit_alloc_entry(dat, req); nilfs_dat_commit_entry(dat, req); @@ -115,13 +115,13 @@ static void nilfs_dat_commit_free(struct inode *dat, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(NILFS_CNO_MIN); entry->de_end = cpu_to_le64(NILFS_CNO_MIN); entry->de_blocknr = cpu_to_le64(0); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nilfs_dat_commit_entry(dat, req); @@ -145,12 +145,12 @@ void nilfs_dat_commit_start(struct inode *dat, struct nilfs_palloc_req *req, struct nilfs_dat_entry *entry; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); entry->de_start = cpu_to_le64(nilfs_mdt_cno(dat)); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nilfs_dat_commit_entry(dat, req); } @@ -167,12 +167,12 @@ int nilfs_dat_prepare_end(struct inode *dat, struct nilfs_palloc_req *req) if (ret < 0) return ret; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (blocknr == 0) { ret = nilfs_palloc_prepare_free_entry(dat, req); @@ -202,7 +202,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); end = start = le64_to_cpu(entry->de_start); @@ -212,7 +212,7 @@ void nilfs_dat_commit_end(struct inode *dat, struct nilfs_palloc_req *req, } entry->de_end = cpu_to_le64(end); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (blocknr == 0) nilfs_dat_commit_free(dat, req); @@ -227,12 +227,12 @@ void nilfs_dat_abort_end(struct inode *dat, struct nilfs_palloc_req *req) sector_t blocknr; void *kaddr; - kaddr = kmap_atomic(req->pr_entry_bh->b_page); + kaddr = kmap_local_page(req->pr_entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, req->pr_entry_nr, req->pr_entry_bh, kaddr); start = le64_to_cpu(entry->de_start); blocknr = le64_to_cpu(entry->de_blocknr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (start == nilfs_mdt_cno(dat) && blocknr == 0) nilfs_palloc_abort_free_entry(dat, req); @@ -271,18 +271,15 @@ void nilfs_dat_abort_update(struct inode *dat, } /** - * nilfs_dat_mark_dirty - - * @dat: DAT file inode + * nilfs_dat_mark_dirty - mark the DAT block buffer containing the specified + * virtual block address entry as dirty + * @dat: DAT file inode * @vblocknr: virtual block number * - * Description: - * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid DAT entry (internal code). + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ int nilfs_dat_mark_dirty(struct inode *dat, __u64 vblocknr) { @@ -362,7 +359,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) } } - kaddr = kmap_atomic(entry_bh->b_page); + kaddr = kmap_local_page(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { nilfs_crit(dat->i_sb, @@ -370,13 +367,13 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) __func__, (unsigned long long)vblocknr, (unsigned long long)le64_to_cpu(entry->de_start), (unsigned long long)le64_to_cpu(entry->de_end)); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(entry_bh); return -EINVAL; } WARN_ON(blocknr == 0); entry->de_blocknr = cpu_to_le64(blocknr); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(entry_bh); nilfs_mdt_mark_dirty(dat); @@ -426,7 +423,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) } } - kaddr = kmap_atomic(entry_bh->b_page); + kaddr = kmap_local_page(entry_bh->b_page); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); if (blocknr == 0) { @@ -436,7 +433,7 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) *blocknrp = blocknr; out: - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(entry_bh); return ret; } @@ -457,10 +454,10 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, 0, &entry_bh); if (ret < 0) return ret; - kaddr = kmap_atomic(entry_bh->b_page); + kaddr = kmap_local_page(entry_bh->b_page); /* last virtual block number in this block */ first = vinfo->vi_vblocknr; - do_div(first, entries_per_block); + first = div64_ul(first, entries_per_block); first *= entries_per_block; last = first + entries_per_block - 1; for (j = i, n = 0; @@ -473,7 +470,7 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned int visz, vinfo->vi_end = le64_to_cpu(entry->de_end); vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(entry_bh); } diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 652279c8b168..0cc32e9c71cb 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -64,12 +64,6 @@ static inline unsigned int nilfs_chunk_size(struct inode *inode) return inode->i_sb->s_blocksize; } -static inline void nilfs_put_page(struct page *page) -{ - kunmap(page); - put_page(page); -} - /* * Return the offset into page `page_nr' of the last valid * byte in that page, plus one. @@ -84,48 +78,46 @@ static unsigned int nilfs_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int nilfs_prepare_chunk(struct page *page, unsigned int from, +static int nilfs_prepare_chunk(struct folio *folio, unsigned int from, unsigned int to) { - loff_t pos = page_offset(page) + from; + loff_t pos = folio_pos(folio) + from; - return __block_write_begin(page, pos, to - from, nilfs_get_block); + return __block_write_begin(folio, pos, to - from, nilfs_get_block); } -static void nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned int from, unsigned int to) +static void nilfs_commit_chunk(struct folio *folio, + struct address_space *mapping, size_t from, size_t to) { struct inode *dir = mapping->host; - loff_t pos = page_offset(page) + from; - unsigned int len = to - from; - unsigned int nr_dirty, copied; + loff_t pos = folio_pos(folio) + from; + size_t copied, len = to - from; + unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, from, to); - copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); + nr_dirty = nilfs_page_count_clean_buffers(&folio->page, from, to); + copied = block_write_end(NULL, mapping, pos, len, len, folio, NULL); if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(dir, nr_dirty); WARN_ON(err); /* do not happen */ - unlock_page(page); + folio_unlock(folio); } -static bool nilfs_check_page(struct page *page) +static bool nilfs_check_folio(struct folio *folio, char *kaddr) { - struct inode *dir = page->mapping->host; + struct inode *dir = folio->mapping->host; struct super_block *sb = dir->i_sb; unsigned int chunk_size = nilfs_chunk_size(dir); - char *kaddr = page_address(page); - unsigned int offs, rec_len; - unsigned int limit = PAGE_SIZE; + size_t offs, rec_len; + size_t limit = folio_size(folio); struct nilfs_dir_entry *p; char *error; - if ((dir->i_size >> PAGE_SHIFT) == page->index) { - limit = dir->i_size & ~PAGE_MASK; + if (dir->i_size < folio_pos(folio) + limit) { + limit = dir->i_size - folio_pos(folio); if (limit & (chunk_size - 1)) goto Ebadsize; if (!limit) @@ -150,7 +142,7 @@ static bool nilfs_check_page(struct page *page) if (offs != limit) goto Eend; out: - SetPageChecked(page); + folio_set_checked(folio); return true; /* Too bad, we had an error */ @@ -176,8 +168,8 @@ Einumber: error = "disallowed inode number"; bad_entry: nilfs_error(sb, - "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%d, name_len=%d", - dir->i_ino, error, (page->index << PAGE_SHIFT) + offs, + "bad entry in directory #%lu: %s - offset=%lu, inode=%lu, rec_len=%zd, name_len=%d", + dir->i_ino, error, (folio->index << PAGE_SHIFT) + offs, (unsigned long)le64_to_cpu(p->inode), rec_len, p->name_len); goto fail; @@ -185,34 +177,33 @@ Eend: p = (struct nilfs_dir_entry *)(kaddr + offs); nilfs_error(sb, "entry in directory #%lu spans the page boundary offset=%lu, inode=%lu", - dir->i_ino, (page->index << PAGE_SHIFT) + offs, + dir->i_ino, (folio->index << PAGE_SHIFT) + offs, (unsigned long)le64_to_cpu(p->inode)); fail: - SetPageError(page); return false; } -static void *nilfs_get_page(struct inode *dir, unsigned long n, - struct page **pagep) +static void *nilfs_get_folio(struct inode *dir, unsigned long n, + struct folio **foliop) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_mapping_page(mapping, n, NULL); + struct folio *folio = read_mapping_folio(mapping, n, NULL); void *kaddr; - if (IS_ERR(page)) - return page; + if (IS_ERR(folio)) + return folio; - kaddr = kmap(page); - if (unlikely(!PageChecked(page))) { - if (!nilfs_check_page(page)) + kaddr = kmap_local_folio(folio, 0); + if (unlikely(!folio_test_checked(folio))) { + if (!nilfs_check_folio(folio, kaddr)) goto fail; } - *pagep = page; + *foliop = folio; return kaddr; fail: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return ERR_PTR(-EIO); } @@ -240,37 +231,6 @@ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) nilfs_rec_len_from_disk(p->rec_len)); } -static unsigned char -nilfs_filetype_table[NILFS_FT_MAX] = { - [NILFS_FT_UNKNOWN] = DT_UNKNOWN, - [NILFS_FT_REG_FILE] = DT_REG, - [NILFS_FT_DIR] = DT_DIR, - [NILFS_FT_CHRDEV] = DT_CHR, - [NILFS_FT_BLKDEV] = DT_BLK, - [NILFS_FT_FIFO] = DT_FIFO, - [NILFS_FT_SOCK] = DT_SOCK, - [NILFS_FT_SYMLINK] = DT_LNK, -}; - -#define S_SHIFT 12 -static unsigned char -nilfs_type_by_mode[(S_IFMT >> S_SHIFT) + 1] = { - [S_IFREG >> S_SHIFT] = NILFS_FT_REG_FILE, - [S_IFDIR >> S_SHIFT] = NILFS_FT_DIR, - [S_IFCHR >> S_SHIFT] = NILFS_FT_CHRDEV, - [S_IFBLK >> S_SHIFT] = NILFS_FT_BLKDEV, - [S_IFIFO >> S_SHIFT] = NILFS_FT_FIFO, - [S_IFSOCK >> S_SHIFT] = NILFS_FT_SOCK, - [S_IFLNK >> S_SHIFT] = NILFS_FT_SYMLINK, -}; - -static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) -{ - umode_t mode = inode->i_mode; - - de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; -} - static int nilfs_readdir(struct file *file, struct dir_context *ctx) { loff_t pos = ctx->pos; @@ -286,9 +246,9 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; - kaddr = nilfs_get_page(inode, n, &page); + kaddr = nilfs_get_folio(inode, n, &folio); if (IS_ERR(kaddr)) { nilfs_error(sb, "bad page in #%lu", inode->i_ino); ctx->pos += PAGE_SIZE - offset; @@ -300,65 +260,57 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) for ( ; (char *)de <= limit; de = nilfs_next_entry(de)) { if (de->rec_len == 0) { nilfs_error(sb, "zero-length directory entry"); - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return -EIO; } if (de->inode) { unsigned char t; - if (de->file_type < NILFS_FT_MAX) - t = nilfs_filetype_table[de->file_type]; - else - t = DT_UNKNOWN; + t = fs_ftype_to_dtype(de->file_type); if (!dir_emit(ctx, de->name, de->name_len, le64_to_cpu(de->inode), t)) { - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } } ctx->pos += nilfs_rec_len_from_disk(de->rec_len); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 0; } /* - * nilfs_find_entry() + * nilfs_find_entry() * - * finds an entry in the specified directory with the wanted name. It - * returns the page in which the entry was found, and the entry itself - * (as a parameter - res_dir). Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. + * Finds an entry in the specified directory with the wanted name. It + * returns the folio in which the entry was found, and the entry itself. + * The folio is mapped and unlocked. When the caller is finished with + * the entry, it should call folio_release_kmap(). * - * On failure, returns an error pointer and the caller should ignore res_page. + * On failure, returns an error pointer and the caller should ignore foliop. */ -struct nilfs_dir_entry * -nilfs_find_entry(struct inode *dir, const struct qstr *qstr, - struct page **res_page) +struct nilfs_dir_entry *nilfs_find_entry(struct inode *dir, + const struct qstr *qstr, struct folio **foliop) { const unsigned char *name = qstr->name; int namelen = qstr->len; unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); - struct page *page = NULL; struct nilfs_inode_info *ei = NILFS_I(dir); struct nilfs_dir_entry *de; if (npages == 0) goto out; - /* OFFSET_CACHE */ - *res_page = NULL; - start = ei->i_dir_start_lookup; if (start >= npages) start = 0; n = start; do { - char *kaddr = nilfs_get_page(dir, n, &page); + char *kaddr = nilfs_get_folio(dir, n, foliop); if (IS_ERR(kaddr)) return ERR_CAST(kaddr); @@ -369,18 +321,18 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, if (de->rec_len == 0) { nilfs_error(dir->i_sb, "zero-length directory entry"); - nilfs_put_page(page); + folio_release_kmap(*foliop, kaddr); goto out; } if (nilfs_match(namelen, name, de)) goto found; de = nilfs_next_entry(de); } - nilfs_put_page(page); + folio_release_kmap(*foliop, kaddr); if (++n >= npages) n = 0; - /* next page is past the blocks we've got */ + /* next folio is past the blocks we've got */ if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) { nilfs_error(dir->i_sb, "dir %lu size %lld exceeds block count %llu", @@ -393,19 +345,18 @@ out: return ERR_PTR(-ENOENT); found: - *res_page = page; ei->i_dir_start_lookup = n; return de; } -struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) +struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct folio **foliop) { - struct page *page; + struct folio *folio; struct nilfs_dir_entry *de, *next_de; size_t limit; char *msg; - de = nilfs_get_page(dir, 0, &page); + de = nilfs_get_folio(dir, 0, &folio); if (IS_ERR(de)) return NULL; @@ -426,47 +377,48 @@ struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) msg = "missing '..'"; goto fail; } - *p = page; + *foliop = folio; return next_de; fail: nilfs_error(dir->i_sb, "directory #%lu %s", dir->i_ino, msg); - nilfs_put_page(page); + folio_release_kmap(folio, de); return NULL; } int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino) { struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; - de = nilfs_find_entry(dir, qstr, &page); + de = nilfs_find_entry(dir, qstr, &folio); if (IS_ERR(de)) return PTR_ERR(de); *ino = le64_to_cpu(de->inode); - kunmap(page); - put_page(page); + folio_release_kmap(folio, de); return 0; } -/* Releases the page */ -void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, - struct page *page, struct inode *inode) +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode) { - unsigned int from = (char *)de - (char *)page_address(page); - unsigned int to = from + nilfs_rec_len_from_disk(de->rec_len); - struct address_space *mapping = page->mapping; + size_t from = offset_in_folio(folio, de); + size_t to = from + nilfs_rec_len_from_disk(de->rec_len); + struct address_space *mapping = folio->mapping; int err; - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + return err; + } de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, mapping, from, to); - nilfs_put_page(page); - dir->i_mtime = inode_set_ctime_current(dir); + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); + return 0; } /* @@ -480,30 +432,28 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) unsigned int chunk_size = nilfs_chunk_size(dir); unsigned int reclen = NILFS_DIR_REC_LEN(namelen); unsigned short rec_len, name_len; - struct page *page = NULL; + struct folio *folio = NULL; struct nilfs_dir_entry *de; unsigned long npages = dir_pages(dir); unsigned long n; - char *kaddr; - unsigned int from, to; + size_t from, to; int err; /* * We take care of directory expansion in the same loop. - * This code plays outside i_size, so it locks the page + * This code plays outside i_size, so it locks the folio * to protect that region. */ for (n = 0; n <= npages; n++) { + char *kaddr = nilfs_get_folio(dir, n, &folio); char *dir_end; - kaddr = nilfs_get_page(dir, n, &page); - err = PTR_ERR(kaddr); if (IS_ERR(kaddr)) - goto out; - lock_page(page); + return PTR_ERR(kaddr); + folio_lock(folio); dir_end = kaddr + nilfs_last_byte(dir, n); de = (struct nilfs_dir_entry *)kaddr; - kaddr += PAGE_SIZE - reclen; + kaddr += folio_size(folio) - reclen; while ((char *)de <= kaddr) { if ((char *)de == dir_end) { /* We hit i_size */ @@ -530,16 +480,16 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) goto got_it; de = (struct nilfs_dir_entry *)((char *)de + rec_len); } - unlock_page(page); - nilfs_put_page(page); + folio_unlock(folio); + folio_release_kmap(folio, kaddr); } BUG(); return -EINVAL; got_it: - from = (char *)de - (char *)page_address(page); + from = offset_in_folio(folio, de); to = from + rec_len; - err = nilfs_prepare_chunk(page, from, to); + err = nilfs_prepare_chunk(folio, from, to); if (err) goto out_unlock; if (de->inode) { @@ -553,30 +503,29 @@ got_it: de->name_len = namelen; memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); - nilfs_commit_chunk(page, page->mapping, from, to); - dir->i_mtime = inode_set_ctime_current(dir); + de->file_type = fs_umode_to_ftype(inode->i_mode); + nilfs_commit_chunk(folio, folio->mapping, from, to); + inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: - nilfs_put_page(page); -out: + folio_release_kmap(folio, de); return err; out_unlock: - unlock_page(page); + folio_unlock(folio); goto out_put; } /* * nilfs_delete_entry deletes a directory entry by merging it with the - * previous entry. Page is up-to-date. Releases the page. + * previous entry. Folio is up-to-date. */ -int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) +int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; - char *kaddr = page_address(page); - unsigned int from, to; + char *kaddr = (char *)((unsigned long)dir & ~(folio_size(folio) - 1)); + size_t from, to; struct nilfs_dir_entry *de, *pde = NULL; int err; @@ -595,17 +544,19 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) de = nilfs_next_entry(de); } if (pde) - from = (char *)pde - (char *)page_address(page); - lock_page(page); - err = nilfs_prepare_chunk(page, from, to); - BUG_ON(err); + from = (char *)pde - kaddr; + folio_lock(folio); + err = nilfs_prepare_chunk(folio, from, to); + if (unlikely(err)) { + folio_unlock(folio); + goto out; + } if (pde) pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; - nilfs_commit_chunk(page, mapping, from, to); - inode->i_mtime = inode_set_ctime_current(inode); + nilfs_commit_chunk(folio, mapping, from, to); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); out: - nilfs_put_page(page); return err; } @@ -615,39 +566,39 @@ out: int nilfs_make_empty(struct inode *inode, struct inode *parent) { struct address_space *mapping = inode->i_mapping; - struct page *page = grab_cache_page(mapping, 0); + struct folio *folio = filemap_grab_folio(mapping, 0); unsigned int chunk_size = nilfs_chunk_size(inode); struct nilfs_dir_entry *de; int err; void *kaddr; - if (!page) - return -ENOMEM; + if (IS_ERR(folio)) + return PTR_ERR(folio); - err = nilfs_prepare_chunk(page, 0, chunk_size); + err = nilfs_prepare_chunk(folio, 0, chunk_size); if (unlikely(err)) { - unlock_page(page); + folio_unlock(folio); goto fail; } - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); - nilfs_set_de_type(de, inode); + de->file_type = fs_umode_to_ftype(inode->i_mode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); - nilfs_set_de_type(de, inode); - kunmap_atomic(kaddr); - nilfs_commit_chunk(page, mapping, 0, chunk_size); + de->file_type = fs_umode_to_ftype(inode->i_mode); + kunmap_local(kaddr); + nilfs_commit_chunk(folio, mapping, 0, chunk_size); fail: - put_page(page); + folio_put(folio); return err; } @@ -656,14 +607,14 @@ fail: */ int nilfs_empty_dir(struct inode *inode) { - struct page *page = NULL; + struct folio *folio = NULL; + char *kaddr; unsigned long i, npages = dir_pages(inode); for (i = 0; i < npages; i++) { - char *kaddr; struct nilfs_dir_entry *de; - kaddr = nilfs_get_page(inode, i, &page); + kaddr = nilfs_get_folio(inode, i, &folio); if (IS_ERR(kaddr)) return 0; @@ -692,12 +643,12 @@ int nilfs_empty_dir(struct inode *inode) } de = nilfs_next_entry(de); } - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); } return 1; not_empty: - nilfs_put_page(page); + folio_release_kmap(folio, kaddr); return 0; } diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 893ab36824cc..2d8dc6b35b54 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -273,6 +273,9 @@ static int nilfs_direct_propagate(struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); ptr = nilfs_direct_get_ptr(bmap, key); + if (ptr == NILFS_BMAP_INVALID_PTR) + return -EINVAL; + if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 0505feef79f4..0e3fc5ba33c7 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -45,34 +45,36 @@ int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vma->vm_file); struct nilfs_transaction_info ti; + struct buffer_head *bh, *head; int ret = 0; if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info))) return VM_FAULT_SIGBUS; /* -ENOSPC */ sb_start_pagefault(inode->i_sb); - lock_page(page); - if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) { - unlock_page(page); + folio_lock(folio); + if (folio->mapping != inode->i_mapping || + folio_pos(folio) >= i_size_read(inode) || + !folio_test_uptodate(folio)) { + folio_unlock(folio); ret = -EFAULT; /* make the VM retry the fault */ goto out; } /* - * check to see if the page is mapped already (no holes) + * check to see if the folio is mapped already (no holes) */ - if (PageMappedToDisk(page)) + if (folio_test_mappedtodisk(folio)) goto mapped; - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int fully_mapped = 1; - bh = head = page_buffers(page); + bh = head; do { if (!buffer_mapped(bh)) { fully_mapped = 0; @@ -81,11 +83,11 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) } while (bh = bh->b_this_page, bh != head); if (fully_mapped) { - SetPageMappedToDisk(page); + folio_set_mappedtodisk(folio); goto mapped; } } - unlock_page(page); + folio_unlock(folio); /* * fill hole blocks @@ -111,7 +113,7 @@ static vm_fault_t nilfs_page_mkwrite(struct vm_fault *vmf) * necessary to wait for writeback to finish here, regardless of the * stable write requirement of the backing device. */ - wait_on_page_writeback(page); + folio_wait_writeback(folio); out: sb_end_pagefault(inode->i_sb); return vmf_fs_error(ret); diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 3a50fe042477..2dbb15767df1 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -96,8 +96,8 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, *out_bh = bh; failed: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); if (unlikely(err)) brelse(bh); return err; @@ -173,6 +173,7 @@ int nilfs_init_gcinode(struct inode *inode) /** * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes + * @nilfs: NILFS filesystem instance */ void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index ac10a62a41e9..1e86b9303b7c 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -15,6 +15,7 @@ #include "mdt.h" #include "alloc.h" #include "ifile.h" +#include "cpfile.h" /** * struct nilfs_ifile_info - on-memory private data of ifile @@ -112,11 +113,11 @@ int nilfs_ifile_delete_inode(struct inode *ifile, ino_t ino) return ret; } - kaddr = kmap_atomic(req.pr_entry_bh->b_page); + kaddr = kmap_local_page(req.pr_entry_bh->b_page); raw_inode = nilfs_palloc_block_get_entry(ifile, req.pr_entry_nr, req.pr_entry_bh, kaddr); raw_inode->i_flags = 0; - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(req.pr_entry_bh); brelse(req.pr_entry_bh); @@ -170,14 +171,18 @@ int nilfs_ifile_count_free_inodes(struct inode *ifile, * nilfs_ifile_read - read or get ifile inode * @sb: super block instance * @root: root object + * @cno: number of checkpoint entry to read * @inode_size: size of an inode - * @raw_inode: on-disk ifile inode - * @inodep: buffer to store the inode + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid checkpoint. + * * %-ENOMEM - Insufficient memory available. + * * %-EIO - I/O error (including metadata corruption). */ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep) + __u64 cno, size_t inode_size) { + struct the_nilfs *nilfs; struct inode *ifile; int err; @@ -198,13 +203,13 @@ int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); - err = nilfs_read_inode_common(ifile, raw_inode); + nilfs = sb->s_fs_info; + err = nilfs_cpfile_read_checkpoint(nilfs->ns_cpfile, cno, root, ifile); if (err) goto failed; unlock_new_inode(ifile); out: - *inodep = ifile; return 0; failed: iget_failed(ifile); diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index 35c5273f4821..625545cc2a98 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -21,15 +21,14 @@ static inline struct nilfs_inode * nilfs_ifile_map_inode(struct inode *ifile, ino_t ino, struct buffer_head *ibh) { - void *kaddr = kmap(ibh->b_page); + void *kaddr = kmap_local_page(ibh->b_page); return nilfs_palloc_block_get_entry(ifile, ino, ibh, kaddr); } -static inline void nilfs_ifile_unmap_inode(struct inode *ifile, ino_t ino, - struct buffer_head *ibh) +static inline void nilfs_ifile_unmap_inode(struct nilfs_inode *raw_inode) { - kunmap(ibh->b_page); + kunmap_local(raw_inode); } int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); @@ -39,7 +38,6 @@ int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); int nilfs_ifile_count_free_inodes(struct inode *, u64 *, u64 *); int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, - size_t inode_size, struct nilfs_inode *raw_inode, - struct inode **inodep); + __u64 cno, size_t inode_size); #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 489592644b68..afb3b9637740 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -15,6 +15,7 @@ #include <linux/writeback.h> #include <linux/uio.h> #include <linux/fiemap.h> +#include <linux/random.h> #include "nilfs.h" #include "btnode.h" #include "segment.h" @@ -28,17 +29,13 @@ * @ino: inode number * @cno: checkpoint number * @root: pointer on NILFS root object (mounted checkpoint) - * @for_gc: inode for GC flag - * @for_btnc: inode for B-tree node cache flag - * @for_shadow: inode for shadowed page cache flag + * @type: inode type */ struct nilfs_iget_args { u64 ino; __u64 cno; struct nilfs_root *root; - bool for_gc; - bool for_btnc; - bool for_shadow; + unsigned int type; }; static int nilfs_iget_test(struct inode *inode, void *opaque); @@ -162,7 +159,7 @@ static int nilfs_writepages(struct address_space *mapping, int err = 0; if (sb_rdonly(inode->i_sb)) { - nilfs_clear_dirty_pages(mapping, false); + nilfs_clear_dirty_pages(mapping); return -EROFS; } @@ -175,7 +172,8 @@ static int nilfs_writepages(struct address_space *mapping, static int nilfs_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(page); + struct inode *inode = folio->mapping->host; int err; if (sb_rdonly(inode->i_sb)) { @@ -185,13 +183,13 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc) * have dirty pages that try to be flushed in background. * So, here we simply discard this dirty page. */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); + nilfs_clear_folio_dirty(folio); + folio_unlock(folio); return -EROFS; } - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); if (wbc->sync_mode == WB_SYNC_ALL) { err = nilfs_construct_segment(inode->i_sb); @@ -214,7 +212,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping, /* * The page may not be locked, eg if called from try_to_unmap_one() */ - spin_lock(&mapping->private_lock); + spin_lock(&mapping->i_private_lock); head = folio_buffers(folio); if (head) { struct buffer_head *bh = head; @@ -230,7 +228,7 @@ static bool nilfs_dirty_folio(struct address_space *mapping, } else if (ret) { nr_dirty = 1 << (folio_shift(folio) - inode->i_blkbits); } - spin_unlock(&mapping->private_lock); + spin_unlock(&mapping->i_private_lock); if (nr_dirty) nilfs_set_file_dirty(inode, nr_dirty); @@ -249,7 +247,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to) static int nilfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { struct inode *inode = mapping->host; @@ -258,7 +256,7 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - err = block_write_begin(mapping, pos, len, pagep, nilfs_get_block); + err = block_write_begin(mapping, pos, len, foliop, nilfs_get_block); if (unlikely(err)) { nilfs_write_failed(mapping, pos + len); nilfs_transaction_abort(inode->i_sb); @@ -268,16 +266,16 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, static int nilfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { struct inode *inode = mapping->host; unsigned int start = pos & (PAGE_SIZE - 1); unsigned int nr_dirty; int err; - nr_dirty = nilfs_page_count_clean_buffers(page, start, + nr_dirty = nilfs_page_count_clean_buffers(&folio->page, start, start + copied); - copied = generic_write_end(file, mapping, pos, len, copied, page, + copied = generic_write_end(file, mapping, pos, len, copied, folio, fsdata); nilfs_set_file_dirty(inode, nr_dirty); err = nilfs_transaction_commit(inode->i_sb); @@ -318,8 +316,7 @@ static int nilfs_insert_inode_locked(struct inode *inode, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return insert_inode_locked4(inode, ino, nilfs_iget_test, &args); @@ -328,7 +325,6 @@ static int nilfs_insert_inode_locked(struct inode *inode, struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) { struct super_block *sb = dir->i_sb; - struct the_nilfs *nilfs = sb->s_fs_info; struct inode *inode; struct nilfs_inode_info *ii; struct nilfs_root *root; @@ -346,31 +342,19 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = BIT(NILFS_I_NEW); + ii->i_type = NILFS_I_TYPE_NORMAL; ii->i_root = root; err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - - if (unlikely(ino < NILFS_USER_INO)) { - nilfs_warn(sb, - "inode bitmap is inconsistent for reserved inodes"); - do { - brelse(bh); - err = nilfs_ifile_create_inode(root->ifile, &ino, &bh); - if (unlikely(err)) - goto failed_ifile_create_inode; - } while (ino < NILFS_USER_INO); - - nilfs_info(sb, "repaired inode bitmap for reserved inodes"); - } ii->i_bh = bh; atomic64_inc(&root->inodes_count); inode_init_owner(&nop_mnt_idmap, inode, dir, mode); inode->i_ino = ino; - inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode); + simple_inode_init_ts(inode); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { err = nilfs_bmap_read(ii->i_bmap, NULL); @@ -388,9 +372,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode) /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; nilfs_set_inode_flags(inode); - spin_lock(&nilfs->ns_next_gen_lock); - inode->i_generation = nilfs->ns_next_generation++; - spin_unlock(&nilfs->ns_next_gen_lock); + inode->i_generation = get_random_u32(); if (nilfs_insert_inode_locked(inode, root, ino) < 0) { err = -EIO; goto failed_after_creation; @@ -453,12 +435,12 @@ int nilfs_read_inode_common(struct inode *inode, i_gid_write(inode, le32_to_cpu(raw_inode->i_gid)); set_nlink(inode, le16_to_cpu(raw_inode->i_links_count)); inode->i_size = le64_to_cpu(raw_inode->i_size); - inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); + inode_set_atime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime), le32_to_cpu(raw_inode->i_ctime_nsec)); - inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime); - inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); + inode_set_mtime(inode, le64_to_cpu(raw_inode->i_mtime), + le32_to_cpu(raw_inode->i_mtime_nsec)); if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode)) return -EIO; /* this inode is for metadata and corrupted */ if (inode->i_nlink == 0) @@ -517,13 +499,20 @@ static int __nilfs_read_inode(struct super_block *sb, inode->i_op = &nilfs_symlink_inode_operations; inode_nohighmem(inode); inode->i_mapping->a_ops = &nilfs_aops; - } else { + } else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) || + S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) { inode->i_op = &nilfs_special_inode_operations; init_special_inode( inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); + } else { + nilfs_error(sb, + "invalid file type bits in mode 0%o for inode %lu", + inode->i_mode, ino); + err = -EIO; + goto failed_unmap; } - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); up_read(&NILFS_MDT(nilfs->ns_dat)->mi_sem); nilfs_set_inode_flags(inode); @@ -532,7 +521,7 @@ static int __nilfs_read_inode(struct super_block *sb, return 0; failed_unmap: - nilfs_ifile_unmap_inode(root->ifile, ino, bh); + nilfs_ifile_unmap_inode(raw_inode); brelse(bh); bad_inode: @@ -549,23 +538,10 @@ static int nilfs_iget_test(struct inode *inode, void *opaque) return 0; ii = NILFS_I(inode); - if (test_bit(NILFS_I_BTNC, &ii->i_state)) { - if (!args->for_btnc) - return 0; - } else if (args->for_btnc) { - return 0; - } - if (test_bit(NILFS_I_SHADOW, &ii->i_state)) { - if (!args->for_shadow) - return 0; - } else if (args->for_shadow) { + if (ii->i_type != args->type) return 0; - } - - if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) - return !args->for_gc; - return args->for_gc && args->cno == ii->i_cno; + return !(args->type & NILFS_I_TYPE_GC) || args->cno == ii->i_cno; } static int nilfs_iget_set(struct inode *inode, void *opaque) @@ -575,15 +551,9 @@ static int nilfs_iget_set(struct inode *inode, void *opaque) inode->i_ino = args->ino; NILFS_I(inode)->i_cno = args->cno; NILFS_I(inode)->i_root = args->root; + NILFS_I(inode)->i_type = args->type; if (args->root && args->ino == NILFS_ROOT_INO) nilfs_get_root(args->root); - - if (args->for_gc) - NILFS_I(inode)->i_state = BIT(NILFS_I_GCINODE); - if (args->for_btnc) - NILFS_I(inode)->i_state |= BIT(NILFS_I_BTNC); - if (args->for_shadow) - NILFS_I(inode)->i_state |= BIT(NILFS_I_SHADOW); return 0; } @@ -591,8 +561,7 @@ struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return ilookup5(sb, ino, nilfs_iget_test, &args); @@ -602,8 +571,7 @@ struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, unsigned long ino) { struct nilfs_iget_args args = { - .ino = ino, .root = root, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = root, .cno = 0, .type = NILFS_I_TYPE_NORMAL }; return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -640,8 +608,7 @@ struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, __u64 cno) { struct nilfs_iget_args args = { - .ino = ino, .root = NULL, .cno = cno, .for_gc = true, - .for_btnc = false, .for_shadow = false + .ino = ino, .root = NULL, .cno = cno, .type = NILFS_I_TYPE_GC }; struct inode *inode; int err; @@ -686,9 +653,7 @@ int nilfs_attach_btree_node_cache(struct inode *inode) args.ino = inode->i_ino; args.root = ii->i_root; args.cno = ii->i_cno; - args.for_gc = test_bit(NILFS_I_GCINODE, &ii->i_state) != 0; - args.for_btnc = true; - args.for_shadow = test_bit(NILFS_I_SHADOW, &ii->i_state) != 0; + args.type = ii->i_type | NILFS_I_TYPE_BTNC; btnc_inode = iget5_locked(inode->i_sb, inode->i_ino, nilfs_iget_test, nilfs_iget_set, &args); @@ -742,8 +707,8 @@ void nilfs_detach_btree_node_cache(struct inode *inode) struct inode *nilfs_iget_for_shadow(struct inode *inode) { struct nilfs_iget_args args = { - .ino = inode->i_ino, .root = NULL, .cno = 0, .for_gc = false, - .for_btnc = false, .for_shadow = true + .ino = inode->i_ino, .root = NULL, .cno = 0, + .type = NILFS_I_TYPE_SHADOW }; struct inode *s_inode; int err; @@ -769,8 +734,18 @@ struct inode *nilfs_iget_for_shadow(struct inode *inode) return s_inode; } +/** + * nilfs_write_inode_common - export common inode information to on-disk inode + * @inode: inode object + * @raw_inode: on-disk inode + * + * This function writes standard information from the on-memory inode @inode + * to @raw_inode on ifile, cpfile or a super root block. Since inode bmap + * data is not exported, nilfs_bmap_write() must be called separately during + * log writing. + */ void nilfs_write_inode_common(struct inode *inode, - struct nilfs_inode *raw_inode, int has_bmap) + struct nilfs_inode *raw_inode) { struct nilfs_inode_info *ii = NILFS_I(inode); @@ -779,30 +754,15 @@ void nilfs_write_inode_common(struct inode *inode, raw_inode->i_gid = cpu_to_le32(i_gid_read(inode)); raw_inode->i_links_count = cpu_to_le16(inode->i_nlink); raw_inode->i_size = cpu_to_le64(inode->i_size); - raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec); - raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); - raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec); - raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); + raw_inode->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); + raw_inode->i_mtime = cpu_to_le64(inode_get_mtime_sec(inode)); + raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + raw_inode->i_mtime_nsec = cpu_to_le32(inode_get_mtime_nsec(inode)); raw_inode->i_blocks = cpu_to_le64(inode->i_blocks); raw_inode->i_flags = cpu_to_le32(ii->i_flags); raw_inode->i_generation = cpu_to_le32(inode->i_generation); - if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) { - struct the_nilfs *nilfs = inode->i_sb->s_fs_info; - - /* zero-fill unused portion in the case of super root block */ - raw_inode->i_xattr = 0; - raw_inode->i_pad = 0; - memset((void *)raw_inode + sizeof(*raw_inode), 0, - nilfs->ns_inode_size - sizeof(*raw_inode)); - } - - if (has_bmap) - nilfs_bmap_write(ii->i_bmap, raw_inode); - else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) - raw_inode->i_device_code = - cpu_to_le64(huge_encode_dev(inode->i_rdev)); /* * When extending inode, nilfs->ns_inode_size should be checked * for substitutions of appended fields. @@ -823,14 +783,13 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh, int flags) if (flags & I_DIRTY_DATASYNC) set_bit(NILFS_I_INODE_SYNC, &ii->i_state); - nilfs_write_inode_common(inode, raw_inode, 0); - /* - * XXX: call with has_bmap = 0 is a workaround to avoid - * deadlock of bmap. This delays update of i_bmap to just - * before writing. - */ + nilfs_write_inode_common(inode, raw_inode); + + if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) + raw_inode->i_device_code = + cpu_to_le64(huge_encode_dev(inode->i_rdev)); - nilfs_ifile_unmap_inode(ifile, ino, ibh); + nilfs_ifile_unmap_inode(raw_inode); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -886,7 +845,7 @@ void nilfs_truncate(struct inode *inode) nilfs_truncate_bmap(ii, blkoff); - inode->i_mtime = inode_set_ctime_current(inode); + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); @@ -916,7 +875,7 @@ static void nilfs_clear_inode(struct inode *inode) if (test_bit(NILFS_I_BMAP, &ii->i_state)) nilfs_bmap_clear(ii->i_bmap); - if (!test_bit(NILFS_I_BTNC, &ii->i_state)) + if (!(ii->i_type & NILFS_I_TYPE_BTNC)) nilfs_detach_btree_node_cache(inode); if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) @@ -1267,7 +1226,7 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (size) { if (phys && blkphy << blkbits == phys + size) { /* The current extent goes on */ - size += n << blkbits; + size += (u64)n << blkbits; } else { /* Terminate the current extent */ ret = fiemap_fill_next_extent( @@ -1280,14 +1239,14 @@ int nilfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } } else { /* Start a new extent */ flags = FIEMAP_EXTENT_MERGED; logical = blkoff << blkbits; phys = blkphy << blkbits; - size = n << blkbits; + size = (u64)n << blkbits; } blkoff += n; } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 53022bfe0b72..fa77f78df681 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -17,6 +17,7 @@ #include <linux/mount.h> /* mnt_want_write_file(), mnt_drop_write_file() */ #include <linux/buffer_head.h> #include <linux/fileattr.h> +#include <linux/string.h> #include "nilfs.h" #include "segment.h" #include "bmap.h" @@ -114,7 +115,11 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, } /** - * nilfs_fileattr_get - ioctl to support lsattr + * nilfs_fileattr_get - retrieve miscellaneous file attributes + * @dentry: the object to retrieve from + * @fa: fileattr pointer + * + * Return: always 0 as success. */ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) { @@ -126,7 +131,12 @@ int nilfs_fileattr_get(struct dentry *dentry, struct fileattr *fa) } /** - * nilfs_fileattr_set - ioctl to support chattr + * nilfs_fileattr_set - change miscellaneous file attributes + * @idmap: idmap of the mount + * @dentry: the object to change + * @fa: fileattr pointer + * + * Return: 0 on success, or a negative error code on failure. */ int nilfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) @@ -159,6 +169,10 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap, /** * nilfs_ioctl_getversion - get info about a file's version (generation number) + * @inode: inode object + * @argp: userspace memory where the generation number of @inode is stored + * + * Return: 0 on success, or %-EFAULT on error. */ static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp) { @@ -872,16 +886,14 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, nsegs = argv[4].v_nmembs; if (argv[4].v_size != argsz[4]) goto out; - if (nsegs > UINT_MAX / sizeof(__u64)) - goto out; /* * argv[4] points to segment numbers this ioctl cleans. We - * use kmalloc() for its buffer because memory used for the - * segment numbers is enough small. + * use kmalloc() for its buffer because the memory used for the + * segment numbers is small enough. */ - kbufs[4] = memdup_user((void __user *)(unsigned long)argv[4].v_base, - nsegs * sizeof(__u64)); + kbufs[4] = memdup_array_user((void __user *)(unsigned long)argv[4].v_base, + nsegs, sizeof(__u64)); if (IS_ERR(kbufs[4])) { ret = PTR_ERR(kbufs[4]); goto out; @@ -1113,7 +1125,7 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) segbytes = nilfs->ns_blocks_per_segment * nilfs->ns_blocksize; minseg = range[0] + segbytes - 1; - do_div(minseg, segbytes); + minseg = div64_ul(minseg, segbytes); if (range[1] < 4096) goto out; @@ -1122,7 +1134,7 @@ static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp) if (maxseg < segbytes) goto out; - do_div(maxseg, segbytes); + maxseg = div64_ul(maxseg, segbytes); maxseg--; ret = nilfs_sufile_set_alloc_range(nilfs->ns_sufile, minseg, maxseg); @@ -1268,6 +1280,91 @@ out: return ret; } +/** + * nilfs_ioctl_get_fslabel - get the volume name of the file system + * @sb: super block instance + * @argp: pointer to userspace memory where the volume name should be stored + * + * Return: 0 on success, %-EFAULT if copying to userspace memory fails. + */ +static int nilfs_ioctl_get_fslabel(struct super_block *sb, void __user *argp) +{ + struct the_nilfs *nilfs = sb->s_fs_info; + char label[NILFS_MAX_VOLUME_NAME + 1]; + + BUILD_BUG_ON(NILFS_MAX_VOLUME_NAME >= FSLABEL_MAX); + + down_read(&nilfs->ns_sem); + memtostr_pad(label, nilfs->ns_sbp[0]->s_volume_name); + up_read(&nilfs->ns_sem); + + if (copy_to_user(argp, label, sizeof(label))) + return -EFAULT; + return 0; +} + +/** + * nilfs_ioctl_set_fslabel - set the volume name of the file system + * @sb: super block instance + * @filp: file object + * @argp: pointer to userspace memory that contains the volume name + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EFAULT - Error copying input data. + * * %-EINVAL - Label length exceeds record size in superblock. + * * %-EIO - I/O error. + * * %-EPERM - Operation not permitted (insufficient permissions). + * * %-EROFS - Read only file system. + */ +static int nilfs_ioctl_set_fslabel(struct super_block *sb, struct file *filp, + void __user *argp) +{ + char label[NILFS_MAX_VOLUME_NAME + 1]; + struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_super_block **sbp; + size_t len; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (copy_from_user(label, argp, NILFS_MAX_VOLUME_NAME + 1)) { + ret = -EFAULT; + goto out_drop_write; + } + + len = strnlen(label, NILFS_MAX_VOLUME_NAME + 1); + if (len > NILFS_MAX_VOLUME_NAME) { + nilfs_err(sb, "unable to set label with more than %zu bytes", + NILFS_MAX_VOLUME_NAME); + ret = -EINVAL; + goto out_drop_write; + } + + down_write(&nilfs->ns_sem); + sbp = nilfs_prepare_super(sb, false); + if (unlikely(!sbp)) { + ret = -EIO; + goto out_unlock; + } + + strtomem_pad(sbp[0]->s_volume_name, label, 0); + if (sbp[1]) + strtomem_pad(sbp[1]->s_volume_name, label, 0); + + ret = nilfs_commit_super(sb, NILFS_SB_COMMIT_ALL); + +out_unlock: + up_write(&nilfs->ns_sem); +out_drop_write: + mnt_drop_write_file(filp); + return ret; +} + long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct inode *inode = file_inode(filp); @@ -1310,6 +1407,10 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return nilfs_ioctl_set_alloc_range(inode, argp); case FITRIM: return nilfs_ioctl_trim_fs(inode, argp); + case FS_IOC_GETFSLABEL: + return nilfs_ioctl_get_fslabel(inode->i_sb, argp); + case FS_IOC_SETFSLABEL: + return nilfs_ioctl_set_fslabel(inode->i_sb, filp, argp); default: return -ENOTTY; } @@ -1336,6 +1437,8 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: case FITRIM: + case FS_IOC_GETFSLABEL: + case FS_IOC_SETFSLABEL: break; default: return -ENOIOCTLCMD; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 75a2ed5ee6e0..2db6350b5ac2 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -47,12 +47,12 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, set_buffer_mapped(bh); - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); memset(kaddr + bh_offset(bh), 0, i_blocksize(inode)); if (init_block) init_block(inode, bh, kaddr); flush_dcache_page(bh->b_page); - kunmap_atomic(kaddr); + kunmap_local(kaddr); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -96,8 +96,8 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, } failed_bh: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed_unlock: @@ -157,8 +157,8 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, blk_opf_t opf, *out_bh = bh; failed_bh: - unlock_page(bh->b_page); - put_page(bh->b_page); + folio_unlock(bh->b_folio); + folio_put(bh->b_folio); brelse(bh); failed: return ret; @@ -355,30 +355,28 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) */ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) { - pgoff_t index = (pgoff_t)block >> - (PAGE_SHIFT - inode->i_blkbits); - struct page *page; - unsigned long first_block; + pgoff_t index = block >> (PAGE_SHIFT - inode->i_blkbits); + struct folio *folio; + struct buffer_head *bh; int ret = 0; int still_dirty; - page = find_lock_page(inode->i_mapping, index); - if (!page) + folio = filemap_lock_folio(inode->i_mapping, index); + if (IS_ERR(folio)) return -ENOENT; - wait_on_page_writeback(page); - - first_block = (unsigned long)index << - (PAGE_SHIFT - inode->i_blkbits); - if (page_has_buffers(page)) { - struct buffer_head *bh; + folio_wait_writeback(folio); - bh = nilfs_page_get_nth_block(page, block - first_block); + bh = folio_buffers(folio); + if (bh) { + unsigned long first_block = index << + (PAGE_SHIFT - inode->i_blkbits); + bh = get_nth_bh(bh, block - first_block); nilfs_forget_buffer(bh); } - still_dirty = PageDirty(page); - unlock_page(page); - put_page(page); + still_dirty = folio_test_dirty(folio); + folio_unlock(folio); + folio_put(folio); if (still_dirty || invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) @@ -400,7 +398,8 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; + struct folio *folio = page_folio(page); + struct inode *inode = folio->mapping->host; struct super_block *sb; int err = 0; @@ -408,16 +407,16 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) /* * It means that filesystem was remounted in read-only * mode because of error or metadata corruption. But we - * have dirty pages that try to be flushed in background. - * So, here we simply discard this dirty page. + * have dirty folios that try to be flushed in background. + * So, here we simply discard this dirty folio. */ - nilfs_clear_dirty_page(page, false); - unlock_page(page); + nilfs_clear_folio_dirty(folio); + folio_unlock(folio); return -EROFS; } - redirty_page_for_writepage(wbc, page); - unlock_page(page); + folio_redirty_for_writepage(wbc, folio); + folio_unlock(folio); if (!inode) return 0; @@ -559,17 +558,19 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen; - struct page *page; + struct folio *folio; int blkbits = inode->i_blkbits; - page = grab_cache_page(shadow->inode->i_mapping, bh->b_folio->index); - if (!page) - return -ENOMEM; + folio = filemap_grab_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, 0); + bh_frozen = folio_buffers(folio); + if (!bh_frozen) + bh_frozen = create_empty_buffers(folio, 1 << blkbits, 0); - bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + bh_frozen = get_nth_bh(bh_frozen, bh_offset(bh) >> blkbits); if (!buffer_uptodate(bh_frozen)) nilfs_copy_buffer(bh_frozen, bh); @@ -581,8 +582,8 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) brelse(bh_frozen); /* already frozen */ } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return 0; } @@ -591,17 +592,19 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) { struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; struct buffer_head *bh_frozen = NULL; - struct page *page; + struct folio *folio; int n; - page = find_lock_page(shadow->inode->i_mapping, bh->b_folio->index); - if (page) { - if (page_has_buffers(page)) { + folio = filemap_lock_folio(shadow->inode->i_mapping, + bh->b_folio->index); + if (!IS_ERR(folio)) { + bh_frozen = folio_buffers(folio); + if (bh_frozen) { n = bh_offset(bh) >> inode->i_blkbits; - bh_frozen = nilfs_page_get_nth_block(page, n); + bh_frozen = get_nth_bh(bh_frozen, n); } - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); } return bh_frozen; } @@ -634,10 +637,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode *inode) if (mi->mi_palloc_cache) nilfs_palloc_clear_cache(inode); - nilfs_clear_dirty_pages(inode->i_mapping, true); + nilfs_clear_dirty_pages(inode->i_mapping); nilfs_copy_back_pages(inode->i_mapping, shadow->inode->i_mapping); - nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping, true); + nilfs_clear_dirty_pages(ii->i_assoc_inode->i_mapping); nilfs_copy_back_pages(ii->i_assoc_inode->i_mapping, NILFS_I(shadow->inode)->i_assoc_inode->i_mapping); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ac0adeb58e41..e02fae6757f1 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -276,10 +276,10 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; - struct page *page; + struct folio *folio; int err; - de = nilfs_find_entry(dir, &dentry->d_name, &page); + de = nilfs_find_entry(dir, &dentry->d_name, &folio); if (IS_ERR(de)) { err = PTR_ERR(de); goto out; @@ -296,7 +296,8 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) inode->i_ino, inode->i_nlink); set_nlink(inode, 1); } - err = nilfs_delete_entry(de, page); + err = nilfs_delete_entry(de, folio); + folio_release_kmap(folio, de); if (err) goto out; @@ -364,9 +365,9 @@ static int nilfs_rename(struct mnt_idmap *idmap, { struct inode *old_inode = d_inode(old_dentry); struct inode *new_inode = d_inode(new_dentry); - struct page *dir_page = NULL; + struct folio *dir_folio = NULL; struct nilfs_dir_entry *dir_de = NULL; - struct page *old_page; + struct folio *old_folio; struct nilfs_dir_entry *old_de; struct nilfs_transaction_info ti; int err; @@ -378,7 +379,7 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (unlikely(err)) return err; - old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_page); + old_de = nilfs_find_entry(old_dir, &old_dentry->d_name, &old_folio); if (IS_ERR(old_de)) { err = PTR_ERR(old_de); goto out; @@ -386,13 +387,13 @@ static int nilfs_rename(struct mnt_idmap *idmap, if (S_ISDIR(old_inode->i_mode)) { err = -EIO; - dir_de = nilfs_dotdot(old_inode, &dir_page); + dir_de = nilfs_dotdot(old_inode, &dir_folio); if (!dir_de) goto out_old; } if (new_inode) { - struct page *new_page; + struct folio *new_folio; struct nilfs_dir_entry *new_de; err = -ENOTEMPTY; @@ -400,12 +401,15 @@ static int nilfs_rename(struct mnt_idmap *idmap, goto out_dir; new_de = nilfs_find_entry(new_dir, &new_dentry->d_name, - &new_page); + &new_folio); if (IS_ERR(new_de)) { err = PTR_ERR(new_de); goto out_dir; } - nilfs_set_link(new_dir, new_de, new_page, old_inode); + err = nilfs_set_link(new_dir, new_de, new_folio, old_inode); + folio_release_kmap(new_folio, new_de); + if (unlikely(err)) + goto out_dir; nilfs_mark_inode_dirty(new_dir); inode_set_ctime_current(new_inode); if (dir_de) @@ -428,28 +432,27 @@ static int nilfs_rename(struct mnt_idmap *idmap, */ inode_set_ctime_current(old_inode); - nilfs_delete_entry(old_de, old_page); - - if (dir_de) { - nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - drop_nlink(old_dir); + err = nilfs_delete_entry(old_de, old_folio); + if (likely(!err)) { + if (dir_de) { + err = nilfs_set_link(old_inode, dir_de, dir_folio, + new_dir); + drop_nlink(old_dir); + } + nilfs_mark_inode_dirty(old_dir); } - nilfs_mark_inode_dirty(old_dir); nilfs_mark_inode_dirty(old_inode); - err = nilfs_transaction_commit(old_dir->i_sb); - return err; - out_dir: - if (dir_de) { - kunmap(dir_page); - put_page(dir_page); - } + if (dir_de) + folio_release_kmap(dir_folio, dir_de); out_old: - kunmap(old_page); - put_page(old_page); + folio_release_kmap(old_folio, old_de); out: - nilfs_transaction_abort(old_dir->i_sb); + if (likely(!err)) + err = nilfs_transaction_commit(old_dir->i_sb); + else + nilfs_transaction_abort(old_dir->i_sb); return err; } @@ -460,7 +463,6 @@ static struct dentry *nilfs_get_parent(struct dentry *child) { ino_t ino; int res; - struct inode *inode; struct nilfs_root *root; res = nilfs_inode_by_name(d_inode(child), &dotdot_name, &ino); @@ -469,11 +471,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child) root = NILFS_I(d_inode(child))->i_root; - inode = nilfs_iget(child->d_sb, root, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - return d_obtain_alias(inode); + return d_obtain_alias(nilfs_iget(child->d_sb, root, ino)); } static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index e2c5376b56cd..cb6ed54accd7 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -22,6 +22,7 @@ /** * struct nilfs_inode_info - nilfs inode data in memory * @i_flags: inode flags + * @i_type: inode type (combination of flags that inidicate usage) * @i_state: dynamic state flags * @i_bmap: pointer on i_bmap_data * @i_bmap_data: raw block mapping @@ -37,6 +38,7 @@ */ struct nilfs_inode_info { __u32 i_flags; + unsigned int i_type; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; struct nilfs_bmap i_bmap_data; @@ -90,9 +92,16 @@ enum { NILFS_I_UPDATED, /* The file has been written back */ NILFS_I_INODE_SYNC, /* dsync is not allowed for inode */ NILFS_I_BMAP, /* has bmap and btnode_cache */ - NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_BTNC, /* inode for btree node cache */ - NILFS_I_SHADOW, /* inode for shadowed page cache */ +}; + +/* + * Flags to identify the usage of on-memory inodes (i_type) + */ +enum { + NILFS_I_TYPE_NORMAL = 0, + NILFS_I_TYPE_GC = 0x0001, /* For data caching during GC */ + NILFS_I_TYPE_BTNC = 0x0002, /* For btree node cache */ + NILFS_I_TYPE_SHADOW = 0x0004, /* For shadowed page cache */ }; /* @@ -103,6 +112,18 @@ enum { NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; +/** + * define NILFS_MAX_VOLUME_NAME - maximum number of characters (bytes) in a + * file system volume name + * + * Defined by the size of the volume name field in the on-disk superblocks. + * This volume name does not include the terminating NULL byte if the string + * length matches the field size, so use (NILFS_MAX_VOLUME_NAME + 1) for the + * size of the buffer that requires a NULL byte termination. + */ +#define NILFS_MAX_VOLUME_NAME \ + sizeof_field(struct nilfs_super_block, s_volume_name) + /* * Macros to check inode numbers */ @@ -232,16 +253,16 @@ static inline __u32 nilfs_mask_flags(umode_t mode, __u32 flags) } /* dir.c */ -extern int nilfs_add_link(struct dentry *, struct inode *); +int nilfs_add_link(struct dentry *, struct inode *); int nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr, ino_t *ino); -extern int nilfs_make_empty(struct inode *, struct inode *); -extern struct nilfs_dir_entry * -nilfs_find_entry(struct inode *, const struct qstr *, struct page **); -extern int nilfs_delete_entry(struct nilfs_dir_entry *, struct page *); -extern int nilfs_empty_dir(struct inode *); -extern struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct page **); -extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *, - struct page *, struct inode *); +int nilfs_make_empty(struct inode *, struct inode *); +struct nilfs_dir_entry *nilfs_find_entry(struct inode *, const struct qstr *, + struct folio **); +int nilfs_delete_entry(struct nilfs_dir_entry *, struct folio *); +int nilfs_empty_dir(struct inode *); +struct nilfs_dir_entry *nilfs_dotdot(struct inode *, struct folio **); +int nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, + struct folio *folio, struct inode *inode); /* file.c */ extern int nilfs_sync_file(struct file *, loff_t, loff_t, int); @@ -262,7 +283,8 @@ extern struct inode *nilfs_new_inode(struct inode *, umode_t); extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); -extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); +void nilfs_write_inode_common(struct inode *inode, + struct nilfs_inode *raw_inode); struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, unsigned long ino); struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, @@ -340,8 +362,8 @@ void __nilfs_error(struct super_block *sb, const char *function, extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); -extern int nilfs_store_magic_and_option(struct super_block *, - struct nilfs_super_block *, char *); +extern int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp); extern int nilfs_check_feature_compatibility(struct super_block *, struct nilfs_super_block *); extern void nilfs_set_log_cursor(struct nilfs_super_block *, diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 144e200c4909..06f18fe86407 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -25,19 +25,19 @@ (BIT(BH_Uptodate) | BIT(BH_Mapped) | BIT(BH_NILFS_Node) | \ BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked)) -static struct buffer_head * -__nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, - int blkbits, unsigned long b_state) +static struct buffer_head *__nilfs_get_folio_block(struct folio *folio, + unsigned long block, pgoff_t index, int blkbits, + unsigned long b_state) { unsigned long first_block; - struct buffer_head *bh; + struct buffer_head *bh = folio_buffers(folio); - if (!page_has_buffers(page)) - create_empty_buffers(page, 1 << blkbits, b_state); + if (!bh) + bh = create_empty_buffers(folio, 1 << blkbits, b_state); first_block = (unsigned long)index << (PAGE_SHIFT - blkbits); - bh = nilfs_page_get_nth_block(page, block - first_block); + bh = get_nth_bh(bh, block - first_block); wait_on_buffer(bh); return bh; @@ -50,17 +50,17 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits); - struct page *page; + struct folio *folio; struct buffer_head *bh; - page = grab_cache_page(mapping, index); - if (unlikely(!page)) + folio = filemap_grab_folio(mapping, index); + if (IS_ERR(folio)) return NULL; - bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); + bh = __nilfs_get_folio_block(folio, blkoff, index, blkbits, b_state); if (unlikely(!bh)) { - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); return NULL; } bh->b_bdev = inode->i_sb->s_bdev; @@ -73,7 +73,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, */ void nilfs_forget_buffer(struct buffer_head *bh) { - struct page *page = bh->b_page; + struct folio *folio = bh->b_folio; const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | @@ -82,12 +82,12 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); bh->b_blocknr = -1; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); unlock_buffer(bh); brelse(bh); } @@ -104,11 +104,11 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) struct page *spage = sbh->b_page, *dpage = dbh->b_page; struct buffer_head *bh; - kaddr0 = kmap_atomic(spage); - kaddr1 = kmap_atomic(dpage); + kaddr0 = kmap_local_page(spage); + kaddr1 = kmap_local_page(dpage); memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); - kunmap_atomic(kaddr1); - kunmap_atomic(kaddr0); + kunmap_local(kaddr1); + kunmap_local(kaddr0); dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; dbh->b_blocknr = sbh->b_blocknr; @@ -132,48 +132,49 @@ void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) } /** - * nilfs_page_buffers_clean - check if a page has dirty buffers or not. - * @page: page to be checked + * nilfs_folio_buffers_clean - Check if a folio has dirty buffers or not. + * @folio: Folio to be checked. * - * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. - * Otherwise, it returns non-zero value. + * nilfs_folio_buffers_clean() returns false if the folio has dirty buffers. + * Otherwise, it returns true. */ -int nilfs_page_buffers_clean(struct page *page) +bool nilfs_folio_buffers_clean(struct folio *folio) { struct buffer_head *bh, *head; - bh = head = page_buffers(page); + bh = head = folio_buffers(folio); do { if (buffer_dirty(bh)) - return 0; + return false; bh = bh->b_this_page; } while (bh != head); - return 1; + return true; } -void nilfs_page_bug(struct page *page) +void nilfs_folio_bug(struct folio *folio) { + struct buffer_head *bh, *head; struct address_space *m; unsigned long ino; - if (unlikely(!page)) { - printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); + if (unlikely(!folio)) { + printk(KERN_CRIT "NILFS_FOLIO_BUG(NULL)\n"); return; } - m = page->mapping; + m = folio->mapping; ino = m ? m->host->i_ino : 0; - printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " + printk(KERN_CRIT "NILFS_FOLIO_BUG(%p): cnt=%d index#=%llu flags=0x%lx " "mapping=%p ino=%lu\n", - page, page_ref_count(page), - (unsigned long long)page->index, page->flags, m, ino); + folio, folio_ref_count(folio), + (unsigned long long)folio->index, folio->flags, m, ino); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { int i = 0; - bh = head = page_buffers(page); + bh = head; do { printk(KERN_CRIT " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", @@ -185,30 +186,32 @@ void nilfs_page_bug(struct page *page) } /** - * nilfs_copy_page -- copy the page with buffers - * @dst: destination page - * @src: source page - * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. + * nilfs_copy_folio -- copy the folio with buffers + * @dst: destination folio + * @src: source folio + * @copy_dirty: flag whether to copy dirty states on the folio's buffer heads. * - * This function is for both data pages and btnode pages. The dirty flag - * should be treated by caller. The page must not be under i/o. - * Both src and dst page must be locked + * This function is for both data folios and btnode folios. The dirty flag + * should be treated by caller. The folio must not be under i/o. + * Both src and dst folio must be locked */ -static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) +static void nilfs_copy_folio(struct folio *dst, struct folio *src, + bool copy_dirty) { struct buffer_head *dbh, *dbufs, *sbh; unsigned long mask = NILFS_BUFFER_INHERENT_BITS; - BUG_ON(PageWriteback(dst)); + BUG_ON(folio_test_writeback(dst)); - sbh = page_buffers(src); - if (!page_has_buffers(dst)) - create_empty_buffers(dst, sbh->b_size, 0); + sbh = folio_buffers(src); + dbh = folio_buffers(dst); + if (!dbh) + dbh = create_empty_buffers(dst, sbh->b_size, 0); if (copy_dirty) mask |= BIT(BH_Dirty); - dbh = dbufs = page_buffers(dst); + dbufs = dbh; do { lock_buffer(sbh); lock_buffer(dbh); @@ -219,16 +222,16 @@ static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) dbh = dbh->b_this_page; } while (dbh != dbufs); - copy_highpage(dst, src); + folio_copy(dst, src); - if (PageUptodate(src) && !PageUptodate(dst)) - SetPageUptodate(dst); - else if (!PageUptodate(src) && PageUptodate(dst)) - ClearPageUptodate(dst); - if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) - SetPageMappedToDisk(dst); - else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) - ClearPageMappedToDisk(dst); + if (folio_test_uptodate(src) && !folio_test_uptodate(dst)) + folio_mark_uptodate(dst); + else if (!folio_test_uptodate(src) && folio_test_uptodate(dst)) + folio_clear_uptodate(dst); + if (folio_test_mappedtodisk(src) && !folio_test_mappedtodisk(dst)) + folio_set_mappedtodisk(dst); + else if (!folio_test_mappedtodisk(src) && folio_test_mappedtodisk(dst)) + folio_clear_mappedtodisk(dst); do { unlock_buffer(sbh); @@ -257,20 +260,20 @@ repeat: folio_lock(folio); if (unlikely(!folio_test_dirty(folio))) - NILFS_PAGE_BUG(&folio->page, "inconsistent dirty state"); + NILFS_FOLIO_BUG(folio, "inconsistent dirty state"); dfolio = filemap_grab_folio(dmap, folio->index); - if (unlikely(IS_ERR(dfolio))) { + if (IS_ERR(dfolio)) { /* No empty page is added to the page cache */ folio_unlock(folio); err = PTR_ERR(dfolio); break; } if (unlikely(!folio_buffers(folio))) - NILFS_PAGE_BUG(&folio->page, + NILFS_FOLIO_BUG(folio, "found empty page in dat page cache"); - nilfs_copy_page(&dfolio->page, &folio->page, 1); + nilfs_copy_folio(dfolio, folio, true); filemap_dirty_folio(folio_mapping(dfolio), dfolio); folio_unlock(dfolio); @@ -315,7 +318,7 @@ repeat: if (!IS_ERR(dfolio)) { /* overwrite existing folio in the destination cache */ WARN_ON(folio_test_dirty(dfolio)); - nilfs_copy_page(&dfolio->page, &folio->page, 0); + nilfs_copy_folio(dfolio, folio, false); folio_unlock(dfolio); folio_put(dfolio); /* Do we not need to remove folio from smap here? */ @@ -355,9 +358,8 @@ repeat: /** * nilfs_clear_dirty_pages - discard dirty pages in address space * @mapping: address space with dirty pages for discarding - * @silent: suppress [true] or print [false] warning messages */ -void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) +void nilfs_clear_dirty_pages(struct address_space *mapping) { struct folio_batch fbatch; unsigned int i; @@ -378,7 +380,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) * was acquired. Skip processing in that case. */ if (likely(folio->mapping == mapping)) - nilfs_clear_dirty_page(&folio->page, silent); + nilfs_clear_folio_dirty(folio); folio_unlock(folio); } @@ -388,47 +390,59 @@ void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent) } /** - * nilfs_clear_dirty_page - discard dirty page - * @page: dirty page that will be discarded - * @silent: suppress [true] or print [false] warning messages + * nilfs_clear_folio_dirty - discard dirty folio + * @folio: dirty folio that will be discarded + * + * nilfs_clear_folio_dirty() clears working states including dirty state for + * the folio and its buffers. If the folio has buffers, clear only if it is + * confirmed that none of the buffer heads are busy (none have valid + * references and none are locked). */ -void nilfs_clear_dirty_page(struct page *page, bool silent) +void nilfs_clear_folio_dirty(struct folio *folio) { - struct inode *inode = page->mapping->host; - struct super_block *sb = inode->i_sb; - - BUG_ON(!PageLocked(page)); - - if (!silent) - nilfs_warn(sb, "discard dirty page: offset=%lld, ino=%lu", - page_offset(page), inode->i_ino); + struct buffer_head *bh, *head; - ClearPageUptodate(page); - ClearPageMappedToDisk(page); - ClearPageChecked(page); + BUG_ON(!folio_test_locked(folio)); - if (page_has_buffers(page)) { - struct buffer_head *bh, *head; + head = folio_buffers(folio); + if (head) { const unsigned long clear_bits = (BIT(BH_Uptodate) | BIT(BH_Dirty) | BIT(BH_Mapped) | BIT(BH_Async_Write) | BIT(BH_NILFS_Volatile) | BIT(BH_NILFS_Checked) | BIT(BH_NILFS_Redirected) | BIT(BH_Delay)); + bool busy, invalidated = false; - bh = head = page_buffers(page); +recheck_buffers: + busy = false; + bh = head; do { - lock_buffer(bh); - if (!silent) - nilfs_warn(sb, - "discard dirty block: blocknr=%llu, size=%zu", - (u64)bh->b_blocknr, bh->b_size); + if (atomic_read(&bh->b_count) | buffer_locked(bh)) { + busy = true; + break; + } + } while (bh = bh->b_this_page, bh != head); + + if (busy) { + if (invalidated) + return; + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } + bh = head; + do { + lock_buffer(bh); set_mask_bits(&bh->b_state, clear_bits, 0); unlock_buffer(bh); } while (bh = bh->b_this_page, bh != head); } - __nilfs_clear_page_dirty(page); + folio_clear_uptodate(folio); + folio_clear_mappedtodisk(folio); + folio_clear_checked(folio); + __nilfs_clear_folio_dirty(folio); } unsigned int nilfs_page_count_clean_buffers(struct page *page, @@ -458,22 +472,23 @@ unsigned int nilfs_page_count_clean_buffers(struct page *page, * 2) Some B-tree operations like insertion or deletion may dispose buffers * in dirty state, and this needs to cancel the dirty state of their pages. */ -int __nilfs_clear_page_dirty(struct page *page) +void __nilfs_clear_folio_dirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping) { xa_lock_irq(&mapping->i_pages); - if (test_bit(PG_dirty, &page->flags)) { - __xa_clear_mark(&mapping->i_pages, page_index(page), + if (folio_test_dirty(folio)) { + __xa_clear_mark(&mapping->i_pages, folio->index, PAGECACHE_TAG_DIRTY); xa_unlock_irq(&mapping->i_pages); - return clear_page_dirty_for_io(page); + folio_clear_dirty_for_io(folio); + return; } xa_unlock_irq(&mapping->i_pages); - return 0; + return; } - return TestClearPageDirty(page); + folio_clear_dirty(folio); } /** diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 21ddcdd4d63e..64521a03a19e 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -30,37 +30,26 @@ BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ -int __nilfs_clear_page_dirty(struct page *); +void __nilfs_clear_folio_dirty(struct folio *); struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *, unsigned long, unsigned long); void nilfs_forget_buffer(struct buffer_head *); void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *); -int nilfs_page_buffers_clean(struct page *); -void nilfs_page_bug(struct page *); +bool nilfs_folio_buffers_clean(struct folio *); +void nilfs_folio_bug(struct folio *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); -void nilfs_clear_dirty_page(struct page *, bool); -void nilfs_clear_dirty_pages(struct address_space *, bool); +void nilfs_clear_folio_dirty(struct folio *folio); +void nilfs_clear_dirty_pages(struct address_space *mapping); unsigned int nilfs_page_count_clean_buffers(struct page *, unsigned int, unsigned int); unsigned long nilfs_find_uncommitted_extent(struct inode *inode, sector_t start_blk, sector_t *blkoff); -#define NILFS_PAGE_BUG(page, m, a...) \ - do { nilfs_page_bug(page); BUG(); } while (0) - -static inline struct buffer_head * -nilfs_page_get_nth_block(struct page *page, unsigned int count) -{ - struct buffer_head *bh = page_buffers(page); - - while (count-- > 0) - bh = bh->b_this_page; - get_bh(bh); - return bh; -} +#define NILFS_FOLIO_BUG(folio, m, a...) \ + do { nilfs_folio_bug(folio); BUG(); } while (0) #endif /* _NILFS_PAGE_H */ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index ce30b51ac593..21d81097a89f 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -433,8 +433,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, * The next segment is invalidated by this recovery. */ err = nilfs_sufile_free(sufile, segnum[1]); - if (unlikely(err)) + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "checkpoint log inconsistency at block %llu (segment %llu): next segment %llu is unallocated", + (unsigned long long)nilfs->ns_last_pseg, + (unsigned long long)nilfs->ns_segnum, + (unsigned long long)segnum[1]); + err = -EINVAL; + } goto failed; + } for (i = 1; i < 4; i++) { err = nilfs_segment_list_add(head, segnum[i]); @@ -482,9 +491,9 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, if (unlikely(!bh_org)) return -EIO; - kaddr = kmap_atomic(page); + kaddr = kmap_local_page(page); memcpy(kaddr + from, bh_org->b_data, bh_org->b_size); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh_org); return 0; } @@ -498,7 +507,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, struct inode *inode; struct nilfs_recovery_block *rb, *n; unsigned int blocksize = nilfs->ns_blocksize; - struct page *page; + struct folio *folio; loff_t pos; int err = 0, err2 = 0; @@ -512,7 +521,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, pos = rb->blkoff << inode->i_blkbits; err = block_write_begin(inode->i_mapping, pos, blocksize, - &page, nilfs_get_block); + &folio, nilfs_get_block); if (unlikely(err)) { loff_t isize = inode->i_size; @@ -522,7 +531,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_inode; } - err = nilfs_recovery_copy_block(nilfs, rb, pos, page); + err = nilfs_recovery_copy_block(nilfs, rb, pos, &folio->page); if (unlikely(err)) goto failed_page; @@ -531,17 +540,17 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, goto failed_page; block_write_end(NULL, inode->i_mapping, pos, blocksize, - blocksize, page, NULL); + blocksize, folio, NULL); - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); (*nr_salvaged_blocks)++; goto next; failed_page: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); failed_inode: nilfs_warn(sb, @@ -563,6 +572,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, * checkpoint * @nilfs: nilfs object * @sb: super block instance + * @root: NILFS root instance * @ri: pointer to a nilfs_recovery_info */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, @@ -698,9 +708,15 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, return; bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); - BUG_ON(!bh); + if (WARN_ON(!bh)) + return; /* should never happen */ + + lock_buffer(bh); memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); set_buffer_dirty(bh); + unlock_buffer(bh); + err = sync_dirty_buffer(bh); if (unlikely(err)) nilfs_warn(nilfs->ns_sb, diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 6e59dc19a732..dc431b4c34c9 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -220,9 +220,9 @@ static void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, crc = crc32_le(crc, bh->b_data, bh->b_size); } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); crc = crc32_le(crc, kaddr + bh_offset(bh), bh->b_size); - kunmap_atomic(kaddr); + kunmap_local(kaddr); } raw_sum->ss_datasum = cpu_to_le32(crc); } diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 0610cb12c11c..58a598b548fa 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -519,7 +519,7 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, ii = NILFS_I(inode); - if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + if (ii->i_type & NILFS_I_TYPE_GC) cno = ii->i_cno; else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) cno = 0; @@ -731,11 +731,9 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, continue; } head = folio_buffers(folio); - if (!head) { - create_empty_buffers(&folio->page, i_blocksize(inode), 0); - head = folio_buffers(folio); - } - folio_unlock(folio); + if (!head) + head = create_empty_buffers(folio, + i_blocksize(inode), 0); bh = head; do { @@ -745,11 +743,14 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, list_add_tail(&bh->b_assoc_buffers, listp); ndirties++; if (unlikely(ndirties >= nlimit)) { + folio_unlock(folio); folio_batch_release(&fbatch); cond_resched(); return ndirties; } } while (bh = bh->b_this_page, bh != head); + + folio_unlock(folio); } folio_batch_release(&fbatch); cond_resched(); @@ -881,76 +882,6 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) nilfs_mdt_clear_dirty(nilfs->ns_dat); } -static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - /* XXX: this interface will be changed */ - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, - &raw_cp, &bh_cp); - if (likely(!err)) { - /* - * The following code is duplicated with cpfile. But, it is - * needed to collect the checkpoint even if it was not newly - * created. - */ - mark_buffer_dirty(bh_cp); - nilfs_mdt_mark_dirty(nilfs->ns_cpfile); - nilfs_cpfile_put_checkpoint( - nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - } else if (err == -EINVAL || err == -ENOENT) { - nilfs_error(sci->sc_super, - "checkpoint creation failed due to metadata corruption."); - err = -EIO; - } - return err; -} - -static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) -{ - struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - struct buffer_head *bh_cp; - struct nilfs_checkpoint *raw_cp; - int err; - - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, - &raw_cp, &bh_cp); - if (unlikely(err)) { - if (err == -EINVAL || err == -ENOENT) { - nilfs_error(sci->sc_super, - "checkpoint finalization failed due to metadata corruption."); - err = -EIO; - } - goto failed_ibh; - } - raw_cp->cp_snapshot_list.ssl_next = 0; - raw_cp->cp_snapshot_list.ssl_prev = 0; - raw_cp->cp_inodes_count = - cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); - raw_cp->cp_blocks_count = - cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); - raw_cp->cp_nblk_inc = - cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); - raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); - raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); - - if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) - nilfs_checkpoint_clear_minor(raw_cp); - else - nilfs_checkpoint_set_minor(raw_cp); - - nilfs_write_inode_common(sci->sc_root->ifile, - &raw_cp->cp_ifile_inode, 1); - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); - return 0; - - failed_ibh: - return err; -} - static void nilfs_fill_in_file_bmap(struct inode *ifile, struct nilfs_inode_info *ii) @@ -964,7 +895,7 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, ibh); nilfs_bmap_write(ii->i_bmap, raw_inode); - nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); + nilfs_ifile_unmap_inode(raw_inode); } } @@ -978,6 +909,33 @@ static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) } } +/** + * nilfs_write_root_mdt_inode - export root metadata inode information to + * the on-disk inode + * @inode: inode object of the root metadata file + * @raw_inode: on-disk inode + * + * nilfs_write_root_mdt_inode() writes inode information and bmap data of + * @inode to the inode area of the metadata file allocated on the super root + * block created to finalize the log. Since super root blocks are configured + * each time, this function zero-fills the unused area of @raw_inode. + */ +static void nilfs_write_root_mdt_inode(struct inode *inode, + struct nilfs_inode *raw_inode) +{ + struct the_nilfs *nilfs = inode->i_sb->s_fs_info; + + nilfs_write_inode_common(inode, raw_inode); + + /* zero-fill unused portion of raw_inode */ + raw_inode->i_xattr = 0; + raw_inode->i_pad = 0; + memset((void *)raw_inode + sizeof(*raw_inode), 0, + nilfs->ns_inode_size - sizeof(*raw_inode)); + + nilfs_bmap_write(NILFS_I(inode)->i_bmap, raw_inode); +} + static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { @@ -999,12 +957,13 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + - NILFS_SR_DAT_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + - NILFS_SR_CPFILE_OFFSET(isz), 1); - nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + - NILFS_SR_SUFILE_OFFSET(isz), 1); + nilfs_write_root_mdt_inode(nilfs->ns_dat, (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz)); + nilfs_write_root_mdt_inode(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz)); + memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); set_buffer_uptodate(bh_sr); unlock_buffer(bh_sr); @@ -1145,12 +1104,64 @@ static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, return err; } +/** + * nilfs_free_segments - free the segments given by an array of segment numbers + * @nilfs: nilfs object + * @segnumv: array of segment numbers to be freed + * @nsegs: number of segments to be freed in @segnumv + * + * nilfs_free_segments() wraps nilfs_sufile_freev() and + * nilfs_sufile_cancel_freev(), and edits the segment usage metadata file + * (sufile) to free all segments given by @segnumv and @nsegs at once. If + * it fails midway, it cancels the changes so that none of the segments are + * freed. If @nsegs is 0, this function does nothing. + * + * The freeing of segments is not finalized until the writing of a log with + * a super root block containing this sufile change is complete, and it can + * be canceled with nilfs_sufile_cancel_freev() until then. + * + * Return: 0 on success, or the following negative error code on failure. + * * %-EINVAL - Invalid segment number. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. + */ +static int nilfs_free_segments(struct the_nilfs *nilfs, __u64 *segnumv, + size_t nsegs) +{ + size_t ndone; + int ret; + + if (!nsegs) + return 0; + + ret = nilfs_sufile_freev(nilfs->ns_sufile, segnumv, nsegs, &ndone); + if (unlikely(ret)) { + nilfs_sufile_cancel_freev(nilfs->ns_sufile, segnumv, ndone, + NULL); + /* + * If a segment usage of the segments to be freed is in a + * hole block, nilfs_sufile_freev() will return -ENOENT. + * In this case, -EINVAL should be returned to the caller + * since there is something wrong with the given segment + * number array. This error can only occur during GC, so + * there is no need to worry about it propagating to other + * callers (such as fsync). + */ + if (ret == -ENOENT) { + nilfs_err(nilfs->ns_sb, + "The segment usage entry %llu to be freed is invalid (in a hole)", + (unsigned long long)segnumv[ndone]); + ret = -EINVAL; + } + } + return ret; +} + static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) { struct the_nilfs *nilfs = sci->sc_super->s_fs_info; struct list_head *head; struct nilfs_inode_info *ii; - size_t ndone; int err = 0; switch (nilfs_sc_cstage_get(sci)) { @@ -1231,7 +1242,8 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) break; nilfs_sc_cstage_inc(sci); /* Creating a checkpoint */ - err = nilfs_segctor_create_checkpoint(sci); + err = nilfs_cpfile_create_checkpoint(nilfs->ns_cpfile, + nilfs->ns_cno); if (unlikely(err)) break; fallthrough; @@ -1243,14 +1255,10 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) nilfs_sc_cstage_inc(sci); fallthrough; case NILFS_ST_SUFILE: - err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, - sci->sc_nfreesegs, &ndone); - if (unlikely(err)) { - nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, ndone, - NULL); + err = nilfs_free_segments(nilfs, sci->sc_freesegs, + sci->sc_nfreesegs); + if (unlikely(err)) break; - } sci->sc_stage.flags |= NILFS_CF_SUFREED; err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, @@ -1666,71 +1674,95 @@ static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) return 0; } -static void nilfs_begin_page_io(struct page *page) +static void nilfs_begin_folio_io(struct folio *folio) { - if (!page || PageWriteback(page)) + if (!folio || folio_test_writeback(folio)) /* * For split b-tree node pages, this function may be called * twice. We ignore the 2nd or later calls by this check. */ return; - lock_page(page); - clear_page_dirty_for_io(page); - set_page_writeback(page); - unlock_page(page); + folio_lock(folio); + folio_clear_dirty_for_io(folio); + folio_start_writeback(folio); + folio_unlock(folio); } -static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) +/** + * nilfs_prepare_write_logs - prepare to write logs + * @logs: logs to prepare for writing + * @seed: checksum seed value + * + * nilfs_prepare_write_logs() adds checksums and prepares the block + * buffers/folios for writing logs. In order to stabilize folios of + * memory-mapped file blocks by putting them in writeback state before + * calculating the checksums, first prepare to write payload blocks other + * than segment summary and super root blocks in which the checksums will + * be embedded. + */ +static void nilfs_prepare_write_logs(struct list_head *logs, u32 seed) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + struct folio *bd_folio = NULL, *fs_folio = NULL; + struct buffer_head *bh; - list_for_each_entry(bh, &segbuf->sb_segsum_buffers, + /* Prepare to write payload blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh->b_page != bd_page) { - if (bd_page) { - lock_page(bd_page); - wait_on_page_writeback(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - } - bd_page = bh->b_page; + if (bh == segbuf->sb_super_root) + break; + set_buffer_async_write(bh); + if (bh->b_folio != fs_folio) { + nilfs_begin_folio_io(fs_folio); + fs_folio = bh->b_folio; } } + } + nilfs_begin_folio_io(fs_folio); - list_for_each_entry(bh, &segbuf->sb_payload_buffers, + nilfs_add_checksums_on_logs(logs, seed); + + /* Prepare to write segment summary blocks */ + list_for_each_entry(segbuf, logs, sb_list) { + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - if (bh == segbuf->sb_super_root) { - if (bh->b_page != bd_page) { - lock_page(bd_page); - wait_on_page_writeback(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); - bd_page = bh->b_page; - } - break; - } - set_buffer_async_write(bh); - if (bh->b_page != fs_page) { - nilfs_begin_page_io(fs_page); - fs_page = bh->b_page; + mark_buffer_dirty(bh); + if (bh->b_folio == bd_folio) + continue; + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } + bd_folio = bh->b_folio; } } - if (bd_page) { - lock_page(bd_page); - wait_on_page_writeback(bd_page); - clear_page_dirty_for_io(bd_page); - set_page_writeback(bd_page); - unlock_page(bd_page); + + /* Prepare to write super root block */ + bh = NILFS_LAST_SEGBUF(logs)->sb_super_root; + if (bh) { + mark_buffer_dirty(bh); + if (bh->b_folio != bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); + bd_folio = bh->b_folio; + } + } + + if (bd_folio) { + folio_lock(bd_folio); + folio_wait_writeback(bd_folio); + folio_clear_dirty_for_io(bd_folio); + folio_start_writeback(bd_folio); + folio_unlock(bd_folio); } - nilfs_begin_page_io(fs_page); } static int nilfs_segctor_write(struct nilfs_sc_info *sci, @@ -1743,17 +1775,18 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, return ret; } -static void nilfs_end_page_io(struct page *page, int err) +static void nilfs_end_folio_io(struct folio *folio, int err) { - if (!page) + if (!folio) return; - if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { + if (buffer_nilfs_node(folio_buffers(folio)) && + !folio_test_writeback(folio)) { /* * For b-tree node pages, this function may be called twice * or more because they might be split in a segment. */ - if (PageDirty(page)) { + if (folio_test_dirty(folio)) { /* * For pages holding split b-tree node buffers, dirty * flag on the buffers may be cleared discretely. @@ -1761,30 +1794,24 @@ static void nilfs_end_page_io(struct page *page, int err) * remaining buffers, and it must be cancelled if * all the buffers get cleaned later. */ - lock_page(page); - if (nilfs_page_buffers_clean(page)) - __nilfs_clear_page_dirty(page); - unlock_page(page); + folio_lock(folio); + if (nilfs_folio_buffers_clean(folio)) + __nilfs_clear_folio_dirty(folio); + folio_unlock(folio); } return; } - if (!err) { - if (!nilfs_page_buffers_clean(page)) - __set_page_dirty_nobuffers(page); - ClearPageError(page); - } else { - __set_page_dirty_nobuffers(page); - SetPageError(page); - } + if (err || !nilfs_folio_buffers_clean(folio)) + filemap_dirty_folio(folio->mapping, folio); - end_page_writeback(page); + folio_end_writeback(folio); } static void nilfs_abort_logs(struct list_head *logs, int err) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct buffer_head *bh; if (list_empty(logs)) @@ -1794,10 +1821,10 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { clear_buffer_uptodate(bh); - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } @@ -1805,23 +1832,23 @@ static void nilfs_abort_logs(struct list_head *logs, int err) b_assoc_buffers) { if (bh == segbuf->sb_super_root) { clear_buffer_uptodate(bh); - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } break; } clear_buffer_async_write(bh); - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, err); - fs_page = bh->b_page; + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, err); + fs_folio = bh->b_folio; } } } - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, err); + nilfs_end_folio_io(fs_folio, err); } static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, @@ -1866,7 +1893,7 @@ static void nilfs_set_next_segment(struct the_nilfs *nilfs, static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) { struct nilfs_segment_buffer *segbuf; - struct page *bd_page = NULL, *fs_page = NULL; + struct folio *bd_folio = NULL, *fs_folio = NULL; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; int update_sr = false; @@ -1877,21 +1904,21 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); - if (bh->b_page != bd_page) { - if (bd_page) - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + if (bd_folio) + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } } /* - * We assume that the buffers which belong to the same page + * We assume that the buffers which belong to the same folio * continue over the buffer list. - * Under this assumption, the last BHs of pages is - * identifiable by the discontinuity of bh->b_page - * (page != fs_page). + * Under this assumption, the last BHs of folios is + * identifiable by the discontinuity of bh->b_folio + * (folio != fs_folio). * * For B-tree node blocks, however, this assumption is not - * guaranteed. The cleanup code of B-tree node pages needs + * guaranteed. The cleanup code of B-tree node folios needs * special care. */ list_for_each_entry(bh, &segbuf->sb_payload_buffers, @@ -1905,17 +1932,17 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) if (bh == segbuf->sb_super_root) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); - if (bh->b_page != bd_page) { - end_page_writeback(bd_page); - bd_page = bh->b_page; + if (bh->b_folio != bd_folio) { + folio_end_writeback(bd_folio); + bd_folio = bh->b_folio; } update_sr = true; break; } set_mask_bits(&bh->b_state, clear_bits, set_bits); - if (bh->b_page != fs_page) { - nilfs_end_page_io(fs_page, 0); - fs_page = bh->b_page; + if (bh->b_folio != fs_folio) { + nilfs_end_folio_io(fs_folio, 0); + fs_folio = bh->b_folio; } } @@ -1929,13 +1956,13 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) } } /* - * Since pages may continue over multiple segment buffers, - * end of the last page must be checked outside of the loop. + * Since folios may continue over multiple segment buffers, + * end of the last folio must be checked outside of the loop. */ - if (bd_page) - end_page_writeback(bd_page); + if (bd_folio) + folio_end_writeback(bd_folio); - nilfs_end_page_io(fs_page, 0); + nilfs_end_folio_io(fs_folio, 0); nilfs_drop_collected_inodes(&sci->sc_dirty_files); @@ -2107,7 +2134,11 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (mode == SC_LSEG_SR && nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { - err = nilfs_segctor_fill_in_checkpoint(sci); + err = nilfs_cpfile_finalize_checkpoint( + nilfs->ns_cpfile, nilfs->ns_cno, sci->sc_root, + sci->sc_nblk_inc + sci->sc_nblk_this_inc, + sci->sc_seg_ctime, + !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)); if (unlikely(err)) goto failed_to_write; @@ -2116,10 +2147,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); /* Write partial segments */ - nilfs_segctor_prepare_write(sci); - - nilfs_add_checksums_on_logs(&sci->sc_segbufs, - nilfs->ns_crc_seed); + nilfs_prepare_write_logs(&sci->sc_segbufs, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) @@ -2478,7 +2506,7 @@ static void nilfs_construction_timeout(struct timer_list *t) { struct nilfs_sc_info *sci = from_timer(sci, t, sc_timer); - wake_up_process(sci->sc_timer_task); + wake_up_process(sci->sc_task); } static void @@ -2604,122 +2632,85 @@ static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) } /** - * nilfs_segctor_thread - main loop of the segment constructor thread. + * nilfs_log_write_required - determine whether log writing is required + * @sci: nilfs_sc_info struct + * @modep: location for storing log writing mode + * + * Return: true if log writing is required, false otherwise. If log writing + * is required, the mode is stored in the location pointed to by @modep. + */ +static bool nilfs_log_write_required(struct nilfs_sc_info *sci, int *modep) +{ + bool timedout, ret = true; + + spin_lock(&sci->sc_state_lock); + timedout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + time_after_eq(jiffies, sci->sc_timer.expires)); + if (timedout || sci->sc_seq_request != sci->sc_seq_done) + *modep = SC_LSEG_SR; + else if (sci->sc_flush_request) + *modep = nilfs_segctor_flush_mode(sci); + else + ret = false; + + spin_unlock(&sci->sc_state_lock); + return ret; +} + +/** + * nilfs_segctor_thread - main loop of the log writer thread * @arg: pointer to a struct nilfs_sc_info. * - * nilfs_segctor_thread() initializes a timer and serves as a daemon - * to execute segment constructions. + * nilfs_segctor_thread() is the main loop function of the log writer kernel + * thread, which determines whether log writing is necessary, and if so, + * performs the log write in the background, or waits if not. It is also + * used to decide the background writeback of the superblock. + * + * Return: Always 0. */ static int nilfs_segctor_thread(void *arg) { struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; struct the_nilfs *nilfs = sci->sc_super->s_fs_info; - int timeout = 0; - - sci->sc_timer_task = current; - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - /* start sync. */ - sci->sc_task = current; - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ nilfs_info(sci->sc_super, "segctord starting. Construction interval = %lu seconds, CP frequency < %lu seconds", sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); - spin_lock(&sci->sc_state_lock); - loop: - for (;;) { - int mode; + set_freezable(); - if (sci->sc_state & NILFS_SEGCTOR_QUIT) - goto end_thread; - - if (timeout || sci->sc_seq_request != sci->sc_seq_done) - mode = SC_LSEG_SR; - else if (sci->sc_flush_request) - mode = nilfs_segctor_flush_mode(sci); - else - break; - - spin_unlock(&sci->sc_state_lock); - nilfs_segctor_thread_construct(sci, mode); - spin_lock(&sci->sc_state_lock); - timeout = 0; - } - - - if (freezing(current)) { - spin_unlock(&sci->sc_state_lock); - try_to_freeze(); - spin_lock(&sci->sc_state_lock); - } else { + while (!kthread_should_stop()) { DEFINE_WAIT(wait); - int should_sleep = 1; + bool should_write; + int mode; + + if (freezing(current)) { + try_to_freeze(); + continue; + } prepare_to_wait(&sci->sc_wait_daemon, &wait, TASK_INTERRUPTIBLE); - - if (sci->sc_seq_request != sci->sc_seq_done) - should_sleep = 0; - else if (sci->sc_flush_request) - should_sleep = 0; - else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) - should_sleep = time_before(jiffies, - sci->sc_timer.expires); - - if (should_sleep) { - spin_unlock(&sci->sc_state_lock); + should_write = nilfs_log_write_required(sci, &mode); + if (!should_write) schedule(); - spin_lock(&sci->sc_state_lock); - } finish_wait(&sci->sc_wait_daemon, &wait); - timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && - time_after_eq(jiffies, sci->sc_timer.expires)); if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) set_nilfs_discontinued(nilfs); + + if (should_write) + nilfs_segctor_thread_construct(sci, mode); } - goto loop; - end_thread: /* end sync. */ + spin_lock(&sci->sc_state_lock); sci->sc_task = NULL; timer_shutdown_sync(&sci->sc_timer); - wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; } -static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) -{ - struct task_struct *t; - - t = kthread_run(nilfs_segctor_thread, sci, "segctord"); - if (IS_ERR(t)) { - int err = PTR_ERR(t); - - nilfs_err(sci->sc_super, "error %d creating segctord thread", - err); - return err; - } - wait_event(sci->sc_wait_task, sci->sc_task != NULL); - return 0; -} - -static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) - __acquires(&sci->sc_state_lock) - __releases(&sci->sc_state_lock) -{ - sci->sc_state |= NILFS_SEGCTOR_QUIT; - - while (sci->sc_task) { - wake_up(&sci->sc_wait_daemon); - spin_unlock(&sci->sc_state_lock); - wait_event(sci->sc_wait_task, sci->sc_task == NULL); - spin_lock(&sci->sc_state_lock); - } -} - /* * Setup & clean-up functions */ @@ -2740,7 +2731,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); - init_waitqueue_head(&sci->sc_wait_task); spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); @@ -2795,8 +2785,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) up_write(&nilfs->ns_segctor_sem); + if (sci->sc_task) { + wake_up(&sci->sc_wait_daemon); + kthread_stop(sci->sc_task); + } + spin_lock(&sci->sc_state_lock); - nilfs_segctor_kill_thread(sci); flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); @@ -2844,14 +2838,15 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * This allocates a log writer object, initializes it, and starts the * log writer. * - * Return Value: On success, 0 is returned. On error, one of the following - * negative error code is returned. - * - * %-ENOMEM - Insufficient memory available. + * Return: 0 on success, or the following negative error code on failure. + * * %-EINTR - Log writer thread creation failed due to interruption. + * * %-ENOMEM - Insufficient memory available. */ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) { struct the_nilfs *nilfs = sb->s_fs_info; + struct nilfs_sc_info *sci; + struct task_struct *t; int err; if (nilfs->ns_writer) { @@ -2864,17 +2859,23 @@ int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) return 0; } - nilfs->ns_writer = nilfs_segctor_new(sb, root); - if (!nilfs->ns_writer) + sci = nilfs_segctor_new(sb, root); + if (unlikely(!sci)) return -ENOMEM; - inode_attach_wb(nilfs->ns_bdev->bd_inode, NULL); - - err = nilfs_segctor_start_thread(nilfs->ns_writer); - if (unlikely(err)) + nilfs->ns_writer = sci; + t = kthread_create(nilfs_segctor_thread, sci, "segctord"); + if (IS_ERR(t)) { + err = PTR_ERR(t); + nilfs_err(sb, "error %d creating segctord thread", err); nilfs_detach_log_writer(sb); + return err; + } + sci->sc_task = t; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); - return err; + wake_up_process(sci->sc_task); + return 0; } /** diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 1060f72ebf5a..f723f47ddc4e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -22,10 +22,10 @@ struct nilfs_root; * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status * @ri_super_root: Block number of the last super root - * @ri_ri_cno: Number of the last checkpoint + * @ri_cno: Number of the last checkpoint * @ri_lsegs_start: Region for roll-forwarding (start block number) * @ri_lsegs_end: Region for roll-forwarding (end block number) - * @ri_lseg_start_seq: Sequence value of the segment at ri_lsegs_start + * @ri_lsegs_start_seq: Sequence value of the segment at ri_lsegs_start * @ri_used_segments: List of segments to be mark active * @ri_pseg_start: Block number of the last partial segment * @ri_seq: Sequence number on the last partial segment @@ -105,9 +105,8 @@ struct nilfs_segsum_pointer { * @sc_flush_request: inode bitmap of metadata files to be flushed * @sc_wait_request: Client request queue * @sc_wait_daemon: Daemon wait queue - * @sc_wait_task: Start/end wait queue to control segctord task * @sc_seq_request: Request counter - * @sc_seq_accept: Accepted request count + * @sc_seq_accepted: Accepted request count * @sc_seq_done: Completion counter * @sc_sync: Request of explicit sync operation * @sc_interval: Timeout value of background construction @@ -158,7 +157,6 @@ struct nilfs_sc_info { wait_queue_head_t sc_wait_request; wait_queue_head_t sc_wait_daemon; - wait_queue_head_t sc_wait_task; __u32 sc_seq_request; __u32 sc_seq_accepted; @@ -171,7 +169,6 @@ struct nilfs_sc_info { unsigned long sc_watermark; struct timer_list sc_timer; - struct task_struct *sc_timer_task; struct task_struct *sc_task; }; @@ -192,7 +189,6 @@ enum { }; /* sc_state */ -#define NILFS_SEGCTOR_QUIT 0x0001 /* segctord is being destroyed */ #define NILFS_SEGCTOR_COMMIT 0x0004 /* committed transaction exists */ /* diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 58ca7c936393..eea5a6a12f7b 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -48,7 +48,7 @@ nilfs_sufile_get_blkoff(const struct inode *sufile, __u64 segnum) { __u64 t = segnum + NILFS_MDT(sufile)->mi_first_entry_offset; - do_div(t, nilfs_sufile_segment_usages_per_block(sufile)); + t = div64_ul(t, nilfs_sufile_segment_usages_per_block(sufile)); return (unsigned long)t; } @@ -79,10 +79,17 @@ nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, NILFS_MDT(sufile)->mi_entry_size; } -static inline int nilfs_sufile_get_header_block(struct inode *sufile, - struct buffer_head **bhp) +static int nilfs_sufile_get_header_block(struct inode *sufile, + struct buffer_head **bhp) { - return nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + int err = nilfs_mdt_get_block(sufile, 0, 0, NULL, bhp); + + if (unlikely(err == -ENOENT)) { + nilfs_error(sufile->i_sb, + "missing header block in segment usage metadata"); + err = -EIO; + } + return err; } static inline int @@ -107,11 +114,11 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, struct nilfs_sufile_header *header; void *kaddr; - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, ncleanadd); le64_add_cpu(&header->sh_ndirtysegs, ndirtyadd); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(header_bh); } @@ -315,10 +322,10 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) ret = nilfs_sufile_get_header_block(sufile, &header_bh); if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); last_alloc = le64_to_cpu(header->sh_last_alloc); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nsegments = nilfs_sufile_get_nsegments(sufile); maxsegnum = sui->allocmax; @@ -352,7 +359,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) &su_bh); if (ret < 0) goto out_header; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); @@ -363,14 +370,14 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) continue; /* found a clean segment */ nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); - kunmap_atomic(kaddr); + kunmap_local(kaddr); sui->ncleansegs--; mark_buffer_dirty(header_bh); @@ -384,7 +391,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) goto out_header; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } @@ -406,16 +413,16 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, struct nilfs_segment_usage *su; void *kaddr; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (unlikely(!nilfs_segment_usage_clean(su))) { nilfs_warn(sufile->i_sb, "%s: segment %llu must be clean", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + kunmap_local(kaddr); return; } nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nilfs_sufile_mod_counter(header_bh, -1, 1); NILFS_SUI(sufile)->ncleansegs--; @@ -432,11 +439,11 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, void *kaddr; int clean, dirty; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (su->su_flags == cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)) && su->su_nblocks == cpu_to_le32(0)) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); return; } clean = nilfs_segment_usage_clean(su); @@ -446,7 +453,7 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(0); su->su_nblocks = cpu_to_le32(0); su->su_flags = cpu_to_le32(BIT(NILFS_SEGMENT_USAGE_DIRTY)); - kunmap_atomic(kaddr); + kunmap_local(kaddr); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); NILFS_SUI(sufile)->ncleansegs -= clean; @@ -463,20 +470,25 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, void *kaddr; int sudirty; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_clean(su)) { nilfs_warn(sufile->i_sb, "%s: segment %llu is already clean", __func__, (unsigned long long)segnum); - kunmap_atomic(kaddr); + kunmap_local(kaddr); return; } - WARN_ON(nilfs_segment_usage_error(su)); - WARN_ON(!nilfs_segment_usage_dirty(su)); + if (unlikely(nilfs_segment_usage_error(su))) + nilfs_warn(sufile->i_sb, "free segment %llu marked in error", + (unsigned long long)segnum); sudirty = nilfs_segment_usage_dirty(su); + if (unlikely(!sudirty)) + nilfs_warn(sufile->i_sb, "free unallocated segment %llu", + (unsigned long long)segnum); + nilfs_segment_usage_set_clean(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); @@ -501,15 +513,22 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) down_write(&NILFS_MDT(sufile)->mi_sem); ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); - if (ret) + if (unlikely(ret)) { + if (ret == -ENOENT) { + nilfs_error(sufile->i_sb, + "segment usage for segment %llu is unreadable due to a hole block", + (unsigned long long)segnum); + ret = -EIO; + } goto out_sem; + } - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); if (unlikely(nilfs_segment_usage_error(su))) { struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(bh); if (nilfs_segment_is_active(nilfs, segnum)) { nilfs_error(sufile->i_sb, @@ -527,7 +546,7 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) ret = -EIO; } else { nilfs_segment_usage_set_dirty(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); brelse(bh); @@ -557,7 +576,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, if (ret < 0) goto out_sem; - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); if (modtime) { /* @@ -568,7 +587,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, su->su_lastmod = cpu_to_le64(modtime); } su->su_nblocks = cpu_to_le32(nblocks); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(bh); nilfs_mdt_mark_dirty(sufile); @@ -609,7 +628,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) if (ret < 0) goto out_sem; - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); @@ -619,7 +638,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) spin_lock(&nilfs->ns_last_segment_lock); sustat->ss_prot_seq = nilfs->ns_prot_seq; spin_unlock(&nilfs->ns_last_segment_lock); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(header_bh); out_sem: @@ -635,15 +654,15 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, void *kaddr; int suclean; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); if (nilfs_segment_usage_error(su)) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); return; } suclean = nilfs_segment_usage_clean(su); nilfs_segment_usage_set_error(su); - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); @@ -712,7 +731,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, /* hole */ continue; } - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); su2 = su; @@ -721,7 +740,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, ~BIT(NILFS_SEGMENT_USAGE_ERROR)) || nilfs_segment_is_active(nilfs, segnum + j)) { ret = -EBUSY; - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); goto out_header; } @@ -733,7 +752,7 @@ static int nilfs_sufile_truncate_range(struct inode *sufile, nc++; } } - kunmap_atomic(kaddr); + kunmap_local(kaddr); if (nc > 0) { mark_buffer_dirty(su_bh); ncleaned += nc; @@ -818,10 +837,10 @@ int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs) sui->allocmin = 0; } - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); header->sh_ncleansegs = cpu_to_le64(sui->ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(kaddr); mark_buffer_dirty(header_bh); nilfs_mdt_mark_dirty(sufile); @@ -835,21 +854,17 @@ out: } /** - * nilfs_sufile_get_suinfo - + * nilfs_sufile_get_suinfo - get segment usage information * @sufile: inode of segment usage file * @segnum: segment number to start looking - * @buf: array of suinfo - * @sisz: byte size of suinfo - * @nsi: size of suinfo array - * - * Description: + * @buf: array of suinfo + * @sisz: byte size of suinfo + * @nsi: size of suinfo array * - * Return Value: On success, 0 is returned and .... On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. + * Return: Count of segment usage info items stored in the output buffer on + * success, or the following negative error code on failure. + * * %-EIO - I/O error (including metadata corruption). + * * %-ENOMEM - Insufficient memory available. */ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, unsigned int sisz, size_t nsi) @@ -886,7 +901,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, continue; } - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); for (j = 0; j < n; @@ -899,7 +914,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, si->sui_flags |= BIT(NILFS_SEGMENT_USAGE_ACTIVE); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(su_bh); } ret = nsegs; @@ -968,7 +983,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, goto out_header; for (;;) { - kaddr = kmap_atomic(bh->b_page); + kaddr = kmap_local_page(bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, sup->sup_segnum, bh, kaddr); @@ -1005,7 +1020,7 @@ ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); sup = (void *)sup + supsz; if (sup >= supend) @@ -1110,7 +1125,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) continue; } - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage(sufile, segnum, su_bh, kaddr); for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { @@ -1140,7 +1155,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) } if (nblocks >= minlen) { - kunmap_atomic(kaddr); + kunmap_local(kaddr); ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, @@ -1152,7 +1167,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) } ndiscarded += nblocks; - kaddr = kmap_atomic(su_bh->b_page); + kaddr = kmap_local_page(su_bh->b_page); su = nilfs_sufile_block_get_segment_usage( sufile, segnum, su_bh, kaddr); } @@ -1161,7 +1176,7 @@ int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) start = seg_start; nblocks = seg_end - seg_start + 1; } - kunmap_atomic(kaddr); + kunmap_local(kaddr); put_bh(su_bh); } @@ -1236,15 +1251,21 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, if (err) goto failed; - err = nilfs_sufile_get_header_block(sufile, &header_bh); - if (err) + err = nilfs_mdt_get_block(sufile, 0, 0, NULL, &header_bh); + if (unlikely(err)) { + if (err == -ENOENT) { + nilfs_err(sb, + "missing header block in segment usage metadata"); + err = -EINVAL; + } goto failed; + } sui = NILFS_SUI(sufile); - kaddr = kmap_atomic(header_bh->b_page); + kaddr = kmap_local_page(header_bh->b_page); header = kaddr + bh_offset(header_bh); sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr); + kunmap_local(kaddr); brelse(header_bh); sui->allocmax = nilfs_sufile_get_nsegments(sufile) - 1; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index a5d1fa4e7552..eca79cca3803 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -29,13 +29,13 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <linux/parser.h> #include <linux/crc32.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/seq_file.h> #include <linux/mount.h> #include <linux/fs_context.h> +#include <linux/fs_parser.h> #include "nilfs.h" #include "export.h" #include "mdt.h" @@ -61,7 +61,6 @@ struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; static int nilfs_setup_super(struct super_block *sb, int is_mount); -static int nilfs_remount(struct super_block *sb, int *flags, char *data); void __nilfs_msg(struct super_block *sb, const char *fmt, ...) { @@ -106,6 +105,10 @@ static void nilfs_set_error(struct super_block *sb) /** * __nilfs_error() - report failure condition on a filesystem + * @sb: super block instance + * @function: name of calling function + * @fmt: format string for message to be output + * @...: optional arguments to @fmt * * __nilfs_error() sets an ERROR_FS flag on the superblock as well as * reporting an error message. This function should be called when @@ -157,6 +160,7 @@ struct inode *nilfs_alloc_inode(struct super_block *sb) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_type = 0; ii->i_cno = 0; ii->i_assoc_inode = NULL; ii->i_bmap = &ii->i_bmap_data; @@ -448,7 +452,7 @@ int nilfs_resize_fs(struct super_block *sb, __u64 newsize) sb2off = NILFS_SB2_OFFSET_BYTES(newsize); newnsegs = sb2off >> nilfs->ns_blocksize_bits; - do_div(newnsegs, nilfs->ns_blocks_per_segment); + newnsegs = div64_ul(newnsegs, nilfs->ns_blocks_per_segment); ret = nilfs_sufile_resize(nilfs->ns_sufile, newnsegs); up_write(&nilfs->ns_segctor_sem); @@ -544,8 +548,6 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, { struct the_nilfs *nilfs = sb->s_fs_info; struct nilfs_root *root; - struct nilfs_checkpoint *raw_cp; - struct buffer_head *bh_cp; int err = -ENOMEM; root = nilfs_find_or_create_root( @@ -557,38 +559,19 @@ int nilfs_attach_checkpoint(struct super_block *sb, __u64 cno, int curr_mnt, goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); - err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, - &bh_cp); + err = nilfs_ifile_read(sb, root, cno, nilfs->ns_inode_size); up_read(&nilfs->ns_segctor_sem); - if (unlikely(err)) { - if (err == -ENOENT || err == -EINVAL) { - nilfs_err(sb, - "Invalid checkpoint (checkpoint number=%llu)", - (unsigned long long)cno); - err = -EINVAL; - } + if (unlikely(err)) goto failed; - } - - err = nilfs_ifile_read(sb, root, nilfs->ns_inode_size, - &raw_cp->cp_ifile_inode, &root->ifile); - if (err) - goto failed_bh; - - atomic64_set(&root->inodes_count, - le64_to_cpu(raw_cp->cp_inodes_count)); - atomic64_set(&root->blocks_count, - le64_to_cpu(raw_cp->cp_blocks_count)); - - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); reuse: *rootp = root; return 0; - failed_bh: - nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: + if (err == -EINVAL) + nilfs_err(sb, "Invalid checkpoint (checkpoint number=%llu)", + (unsigned long long)cno); nilfs_put_root(root); return err; @@ -723,105 +706,98 @@ static const struct super_operations nilfs_sops = { .freeze_fs = nilfs_freeze, .unfreeze_fs = nilfs_unfreeze, .statfs = nilfs_statfs, - .remount_fs = nilfs_remount, .show_options = nilfs_show_options }; enum { - Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, - Opt_discard, Opt_nodiscard, Opt_err, + Opt_err, Opt_barrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, }; -static match_table_t tokens = { - {Opt_err_cont, "errors=continue"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier"}, - {Opt_nobarrier, "nobarrier"}, - {Opt_snapshot, "cp=%u"}, - {Opt_order, "order=%s"}, - {Opt_norecovery, "norecovery"}, - {Opt_discard, "discard"}, - {Opt_nodiscard, "nodiscard"}, - {Opt_err, NULL} +static const struct constant_table nilfs_param_err[] = { + {"continue", NILFS_MOUNT_ERRORS_CONT}, + {"panic", NILFS_MOUNT_ERRORS_PANIC}, + {"remount-ro", NILFS_MOUNT_ERRORS_RO}, + {} }; -static int parse_options(char *options, struct super_block *sb, int is_remount) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - char *p; - substring_t args[MAX_OPT_ARGS]; - - if (!options) - return 1; - - while ((p = strsep(&options, ",")) != NULL) { - int token; +static const struct fs_parameter_spec nilfs_param_spec[] = { + fsparam_enum ("errors", Opt_err, nilfs_param_err), + fsparam_flag_no ("barrier", Opt_barrier), + fsparam_u64 ("cp", Opt_snapshot), + fsparam_string ("order", Opt_order), + fsparam_flag ("norecovery", Opt_norecovery), + fsparam_flag_no ("discard", Opt_discard), + {} +}; - if (!*p) - continue; +struct nilfs_fs_context { + unsigned long ns_mount_opt; + __u64 cno; +}; - token = match_token(p, tokens, args); - switch (token) { - case Opt_barrier: - nilfs_set_opt(nilfs, BARRIER); - break; - case Opt_nobarrier: +static int nilfs_parse_param(struct fs_context *fc, struct fs_parameter *param) +{ + struct nilfs_fs_context *nilfs = fc->fs_private; + int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + struct fs_parse_result result; + int opt; + + opt = fs_parse(fc, nilfs_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_barrier: + if (result.negated) nilfs_clear_opt(nilfs, BARRIER); - break; - case Opt_order: - if (strcmp(args[0].from, "relaxed") == 0) - /* Ordered data semantics */ - nilfs_clear_opt(nilfs, STRICT_ORDER); - else if (strcmp(args[0].from, "strict") == 0) - /* Strict in-order semantics */ - nilfs_set_opt(nilfs, STRICT_ORDER); - else - return 0; - break; - case Opt_err_panic: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_PANIC); - break; - case Opt_err_ro: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_RO); - break; - case Opt_err_cont: - nilfs_write_opt(nilfs, ERROR_MODE, ERRORS_CONT); - break; - case Opt_snapshot: - if (is_remount) { - nilfs_err(sb, - "\"%s\" option is invalid for remount", - p); - return 0; - } - break; - case Opt_norecovery: - nilfs_set_opt(nilfs, NORECOVERY); - break; - case Opt_discard: - nilfs_set_opt(nilfs, DISCARD); - break; - case Opt_nodiscard: - nilfs_clear_opt(nilfs, DISCARD); - break; - default: - nilfs_err(sb, "unrecognized mount option \"%s\"", p); - return 0; + else + nilfs_set_opt(nilfs, BARRIER); + break; + case Opt_order: + if (strcmp(param->string, "relaxed") == 0) + /* Ordered data semantics */ + nilfs_clear_opt(nilfs, STRICT_ORDER); + else if (strcmp(param->string, "strict") == 0) + /* Strict in-order semantics */ + nilfs_set_opt(nilfs, STRICT_ORDER); + else + return -EINVAL; + break; + case Opt_err: + nilfs->ns_mount_opt &= ~NILFS_MOUNT_ERROR_MODE; + nilfs->ns_mount_opt |= result.uint_32; + break; + case Opt_snapshot: + if (is_remount) { + struct super_block *sb = fc->root->d_sb; + + nilfs_err(sb, + "\"%s\" option is invalid for remount", + param->key); + return -EINVAL; + } + if (result.uint_64 == 0) { + nilfs_err(NULL, + "invalid option \"cp=0\": invalid checkpoint number 0"); + return -EINVAL; } + nilfs->cno = result.uint_64; + break; + case Opt_norecovery: + nilfs_set_opt(nilfs, NORECOVERY); + break; + case Opt_discard: + if (result.negated) + nilfs_clear_opt(nilfs, DISCARD); + else + nilfs_set_opt(nilfs, DISCARD); + break; + default: + return -EINVAL; } - return 1; -} -static inline void -nilfs_set_default_options(struct super_block *sb, - struct nilfs_super_block *sbp) -{ - struct the_nilfs *nilfs = sb->s_fs_info; - - nilfs->ns_mount_opt = - NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + return 0; } static int nilfs_setup_super(struct super_block *sb, int is_mount) @@ -878,9 +854,8 @@ struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, return (struct nilfs_super_block *)((char *)(*pbh)->b_data + offset); } -int nilfs_store_magic_and_option(struct super_block *sb, - struct nilfs_super_block *sbp, - char *data) +int nilfs_store_magic(struct super_block *sb, + struct nilfs_super_block *sbp) { struct the_nilfs *nilfs = sb->s_fs_info; @@ -891,14 +866,12 @@ int nilfs_store_magic_and_option(struct super_block *sb, sb->s_flags |= SB_NOATIME; #endif - nilfs_set_default_options(sb, sbp); - nilfs->ns_resuid = le16_to_cpu(sbp->s_def_resuid); nilfs->ns_resgid = le16_to_cpu(sbp->s_def_resgid); nilfs->ns_interval = le32_to_cpu(sbp->s_c_interval); nilfs->ns_watermark = le32_to_cpu(sbp->s_c_block_max); - return !parse_options(data, sb, 0) ? -EINVAL : 0; + return 0; } int nilfs_check_feature_compatibility(struct super_block *sb, @@ -1056,17 +1029,17 @@ int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) /** * nilfs_fill_super() - initialize a super block instance * @sb: super_block - * @data: mount options - * @silent: silent mode flag + * @fc: filesystem context * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent) +nilfs_fill_super(struct super_block *sb, struct fs_context *fc) { struct the_nilfs *nilfs; struct nilfs_root *fsroot; + struct nilfs_fs_context *ctx = fc->fs_private; __u64 cno; int err; @@ -1076,10 +1049,13 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = nilfs; - err = init_nilfs(nilfs, sb, (char *)data); + err = init_nilfs(nilfs, sb); if (err) goto failed_nilfs; + /* Copy in parsed mount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; + sb->s_op = &nilfs_sops; sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; @@ -1092,6 +1068,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto failed_nilfs; + super_set_uuid(sb, nilfs->ns_sbp[0]->s_uuid, + sizeof(nilfs->ns_sbp[0]->s_uuid)); + super_set_sysfs_name_bdev(sb); + cno = nilfs_last_cno(nilfs); err = nilfs_attach_checkpoint(sb, cno, true, &fsroot); if (err) { @@ -1138,34 +1118,25 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent) return err; } -static int nilfs_remount(struct super_block *sb, int *flags, char *data) +static int nilfs_reconfigure(struct fs_context *fc) { + struct nilfs_fs_context *ctx = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct the_nilfs *nilfs = sb->s_fs_info; - unsigned long old_sb_flags; - unsigned long old_mount_opt; int err; sync_filesystem(sb); - old_sb_flags = sb->s_flags; - old_mount_opt = nilfs->ns_mount_opt; - - if (!parse_options(data, sb, 1)) { - err = -EINVAL; - goto restore_opts; - } - sb->s_flags = (sb->s_flags & ~SB_POSIXACL); err = -EINVAL; if (!nilfs_valid_fs(nilfs)) { nilfs_warn(sb, "couldn't remount because the filesystem is in an incomplete recovery state"); - goto restore_opts; + goto ignore_opts; } - - if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) + if ((bool)(fc->sb_flags & SB_RDONLY) == sb_rdonly(sb)) goto out; - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; /* @@ -1193,146 +1164,67 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) "couldn't remount RDWR because of unsupported optional features (%llx)", (unsigned long long)features); err = -EROFS; - goto restore_opts; + goto ignore_opts; } sb->s_flags &= ~SB_RDONLY; root = NILFS_I(d_inode(sb->s_root))->i_root; err = nilfs_attach_log_writer(sb, root); - if (err) - goto restore_opts; + if (err) { + sb->s_flags |= SB_RDONLY; + goto ignore_opts; + } down_write(&nilfs->ns_sem); nilfs_setup_super(sb, true); up_write(&nilfs->ns_sem); } out: - return 0; - - restore_opts: - sb->s_flags = old_sb_flags; - nilfs->ns_mount_opt = old_mount_opt; - return err; -} - -struct nilfs_super_data { - __u64 cno; - int flags; -}; - -static int nilfs_parse_snapshot_option(const char *option, - const substring_t *arg, - struct nilfs_super_data *sd) -{ - unsigned long long val; - const char *msg = NULL; - int err; - - if (!(sd->flags & SB_RDONLY)) { - msg = "read-only option is not specified"; - goto parse_error; - } - - err = kstrtoull(arg->from, 0, &val); - if (err) { - if (err == -ERANGE) - msg = "too large checkpoint number"; - else - msg = "malformed argument"; - goto parse_error; - } else if (val == 0) { - msg = "invalid checkpoint number 0"; - goto parse_error; - } - sd->cno = val; - return 0; - -parse_error: - nilfs_err(NULL, "invalid option \"%s\": %s", option, msg); - return 1; -} - -/** - * nilfs_identify - pre-read mount options needed to identify mount instance - * @data: mount options - * @sd: nilfs_super_data - */ -static int nilfs_identify(char *data, struct nilfs_super_data *sd) -{ - char *p, *options = data; - substring_t args[MAX_OPT_ARGS]; - int token; - int ret = 0; - - do { - p = strsep(&options, ","); - if (p != NULL && *p) { - token = match_token(p, tokens, args); - if (token == Opt_snapshot) - ret = nilfs_parse_snapshot_option(p, &args[0], - sd); - } - if (!options) - break; - BUG_ON(options == data); - *(options - 1) = ','; - } while (!ret); - return ret; -} + sb->s_flags = (sb->s_flags & ~SB_POSIXACL); + /* Copy over parsed remount options */ + nilfs->ns_mount_opt = ctx->ns_mount_opt; -static int nilfs_set_bdev_super(struct super_block *s, void *data) -{ - s->s_dev = *(dev_t *)data; return 0; -} -static int nilfs_test_bdev_super(struct super_block *s, void *data) -{ - return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data; + ignore_opts: + return err; } -static struct dentry * -nilfs_mount(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int +nilfs_get_tree(struct fs_context *fc) { - struct nilfs_super_data sd = { .flags = flags }; + struct nilfs_fs_context *ctx = fc->fs_private; struct super_block *s; dev_t dev; int err; - if (nilfs_identify(data, &sd)) - return ERR_PTR(-EINVAL); + if (ctx->cno && !(fc->sb_flags & SB_RDONLY)) { + nilfs_err(NULL, + "invalid option \"cp=%llu\": read-only option is not specified", + ctx->cno); + return -EINVAL; + } - err = lookup_bdev(dev_name, &dev); + err = lookup_bdev(fc->source, &dev); if (err) - return ERR_PTR(err); + return err; - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, - &dev); + s = sget_dev(fc, dev); if (IS_ERR(s)) - return ERR_CAST(s); + return PTR_ERR(s); if (!s->s_root) { - /* - * We drop s_umount here because we need to open the bdev and - * bdev->open_mutex ranks above s_umount (blkdev_put() -> - * __invalidate_device()). It is safe because we have active sb - * reference and SB_BORN is not set yet. - */ - up_write(&s->s_umount); - err = setup_bdev_super(s, flags, NULL); - down_write(&s->s_umount); + err = setup_bdev_super(s, fc->sb_flags, fc); if (!err) - err = nilfs_fill_super(s, data, - flags & SB_SILENT ? 1 : 0); + err = nilfs_fill_super(s, fc); if (err) goto failed_super; s->s_flags |= SB_ACTIVE; - } else if (!sd.cno) { + } else if (!ctx->cno) { if (nilfs_tree_is_busy(s->s_root)) { - if ((flags ^ s->s_flags) & SB_RDONLY) { + if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) { nilfs_err(s, "the device already has a %s mount.", sb_rdonly(s) ? "read-only" : "read/write"); @@ -1341,37 +1233,75 @@ nilfs_mount(struct file_system_type *fs_type, int flags, } } else { /* - * Try remount to setup mount states if the current + * Try reconfigure to setup mount states if the current * tree is not mounted and only snapshots use this sb. + * + * Since nilfs_reconfigure() requires fc->root to be + * set, set it first and release it on failure. */ - err = nilfs_remount(s, &flags, data); - if (err) + fc->root = dget(s->s_root); + err = nilfs_reconfigure(fc); + if (err) { + dput(fc->root); + fc->root = NULL; /* prevent double release */ goto failed_super; + } + return 0; } } - if (sd.cno) { + if (ctx->cno) { struct dentry *root_dentry; - err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + err = nilfs_attach_snapshot(s, ctx->cno, &root_dentry); if (err) goto failed_super; - return root_dentry; + fc->root = root_dentry; + return 0; } - return dget(s->s_root); + fc->root = dget(s->s_root); + return 0; failed_super: deactivate_locked_super(s); - return ERR_PTR(err); + return err; +} + +static void nilfs_free_fc(struct fs_context *fc) +{ + kfree(fc->fs_private); +} + +static const struct fs_context_operations nilfs_context_ops = { + .parse_param = nilfs_parse_param, + .get_tree = nilfs_get_tree, + .reconfigure = nilfs_reconfigure, + .free = nilfs_free_fc, +}; + +static int nilfs_init_fs_context(struct fs_context *fc) +{ + struct nilfs_fs_context *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->ns_mount_opt = NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; + fc->fs_private = ctx; + fc->ops = &nilfs_context_ops; + + return 0; } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", - .mount = nilfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, + .init_fs_context = nilfs_init_fs_context, + .parameters = nilfs_param_spec, }; MODULE_ALIAS_FS("nilfs2"); diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c index 905c7eadf967..14868a3dd592 100644 --- a/fs/nilfs2/sysfs.c +++ b/fs/nilfs2/sysfs.c @@ -56,7 +56,7 @@ static void nilfs_##name##_attr_release(struct kobject *kobj) \ sg_##name##_kobj); \ complete(&subgroups->sg_##name##_kobj_unregister); \ } \ -static struct kobj_type nilfs_##name##_ktype = { \ +static const struct kobj_type nilfs_##name##_ktype = { \ .default_groups = nilfs_##name##_groups, \ .sysfs_ops = &nilfs_##name##_attr_ops, \ .release = nilfs_##name##_attr_release, \ @@ -166,7 +166,7 @@ static const struct sysfs_ops nilfs_snapshot_attr_ops = { .store = nilfs_snapshot_attr_store, }; -static struct kobj_type nilfs_snapshot_ktype = { +static const struct kobj_type nilfs_snapshot_ktype = { .default_groups = nilfs_snapshot_groups, .sysfs_ops = &nilfs_snapshot_attr_ops, .release = nilfs_snapshot_attr_release, @@ -990,7 +990,7 @@ static const struct sysfs_ops nilfs_dev_attr_ops = { .store = nilfs_dev_attr_store, }; -static struct kobj_type nilfs_dev_ktype = { +static const struct kobj_type nilfs_dev_ktype = { .default_groups = nilfs_dev_groups, .sysfs_ops = &nilfs_dev_attr_ops, .release = nilfs_dev_attr_release, diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index be41e26b7824..ecd71c190885 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> -#include <linux/random.h> #include <linux/log2.h> #include <linux/crc32.h> #include "nilfs.h" @@ -69,7 +68,6 @@ struct the_nilfs *alloc_nilfs(struct super_block *sb) INIT_LIST_HEAD(&nilfs->ns_dirty_files); INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_inode_lock); - spin_lock_init(&nilfs->ns_next_gen_lock); spin_lock_init(&nilfs->ns_last_segment_lock); nilfs->ns_cptree = RB_ROOT; spin_lock_init(&nilfs->ns_cptree_lock); @@ -413,7 +411,7 @@ static u64 nilfs_max_segment_count(struct the_nilfs *nilfs) { u64 max_count = U64_MAX; - do_div(max_count, nilfs->ns_blocks_per_segment); + max_count = div64_ul(max_count, nilfs->ns_blocks_per_segment); return min_t(u64, max_count, ULONG_MAX); } @@ -598,7 +596,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, struct nilfs_super_block **sbp = nilfs->ns_sbp; struct buffer_head **sbh = nilfs->ns_sbh; u64 sb2off, devsize = bdev_nr_bytes(nilfs->ns_bdev); - int valid[2], swp = 0; + int valid[2], swp = 0, older; if (devsize < NILFS_SEG_MIN_BLOCKS * NILFS_MIN_BLOCK_SIZE + 4096) { nilfs_err(sb, "device size too small"); @@ -654,9 +652,25 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, if (swp) nilfs_swap_super_block(nilfs); + /* + * Calculate the array index of the older superblock data. + * If one has been dropped, set index 0 pointing to the remaining one, + * otherwise set index 1 pointing to the old one (including if both + * are the same). + * + * Divided case valid[0] valid[1] swp -> older + * ------------------------------------------------------------- + * Both SBs are invalid 0 0 N/A (Error) + * SB1 is invalid 0 1 1 0 + * SB2 is invalid 1 0 0 0 + * SB2 is newer 1 1 1 0 + * SB2 is older or the same 1 1 0 1 + */ + older = valid[1] ^ swp; + nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); + nilfs->ns_prot_seq = le64_to_cpu(sbp[older]->s_last_seq); *sbpp = sbp[0]; return 0; } @@ -665,7 +679,6 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * init_nilfs - initialize a NILFS instance. * @nilfs: the_nilfs structure * @sb: super block - * @data: mount options * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing @@ -674,14 +687,12 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * Return Value: On success, 0 is returned. On error, a negative error * code is returned. */ -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb) { struct nilfs_super_block *sbp; int blocksize; int err; - down_write(&nilfs->ns_sem); - blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { nilfs_err(sb, "unable to set blocksize"); @@ -692,7 +703,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) if (err) goto out; - err = nilfs_store_magic_and_option(sb, sbp, data); + err = nilfs_store_magic(sb, sbp); if (err) goto failed_sbh; @@ -739,9 +750,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) nilfs->ns_blocksize_bits = sb->s_blocksize_bits; nilfs->ns_blocksize = blocksize; - get_random_bytes(&nilfs->ns_next_generation, - sizeof(nilfs->ns_next_generation)); - err = nilfs_store_disk_layout(nilfs, sbp); if (err) goto failed_sbh; @@ -757,7 +765,6 @@ int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data) set_nilfs_init(nilfs); err = 0; out: - up_write(&nilfs->ns_sem); return err; failed_sbh: diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 17fee562ee50..4776a70f01ae 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -71,8 +71,6 @@ enum { * @ns_dirty_files: list of dirty files * @ns_inode_lock: lock protecting @ns_dirty_files * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_next_generation: next generation number for inodes - * @ns_next_gen_lock: lock protecting @ns_next_generation * @ns_mount_opt: mount options * @ns_resuid: uid for reserved blocks * @ns_resgid: gid for reserved blocks @@ -161,10 +159,6 @@ struct the_nilfs { /* GC inode list */ struct list_head ns_gc_inodes; - /* Inode allocator */ - u32 ns_next_generation; - spinlock_t ns_next_gen_lock; - /* Mount options */ unsigned long ns_mount_opt; @@ -219,10 +213,6 @@ THE_NILFS_FNS(PURGING, purging) #define nilfs_set_opt(nilfs, opt) \ ((nilfs)->ns_mount_opt |= NILFS_MOUNT_##opt) #define nilfs_test_opt(nilfs, opt) ((nilfs)->ns_mount_opt & NILFS_MOUNT_##opt) -#define nilfs_write_opt(nilfs, mask, opt) \ - ((nilfs)->ns_mount_opt = \ - (((nilfs)->ns_mount_opt & ~NILFS_MOUNT_##mask) | \ - NILFS_MOUNT_##opt)) \ /** * struct nilfs_root - nilfs root object @@ -276,7 +266,7 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); struct the_nilfs *alloc_nilfs(struct super_block *sb); void destroy_nilfs(struct the_nilfs *nilfs); -int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb, char *data); +int init_nilfs(struct the_nilfs *nilfs, struct super_block *sb); int load_nilfs(struct the_nilfs *nilfs, struct super_block *sb); unsigned long nilfs_nrsvsegs(struct the_nilfs *nilfs, unsigned long nsegs); void nilfs_set_nsegments(struct the_nilfs *nilfs, unsigned long nsegs); |