summaryrefslogtreecommitdiff
path: root/fs/f2fs/data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/f2fs/data.c')
-rw-r--r--fs/f2fs/data.c1268
1 files changed, 1079 insertions, 189 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 985ed023a750..9bedfa8dd3a5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,12 +12,13 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
-#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
#include <linux/prefetch.h>
+#include <linux/uio.h>
+#include <linux/cleancache.h>
#include "f2fs.h"
#include "node.h"
@@ -25,11 +26,23 @@
#include "trace.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *extent_tree_slab;
+static struct kmem_cache *extent_node_slab;
+
static void f2fs_read_end_io(struct bio *bio, int err)
{
struct bio_vec *bvec;
int i;
+ if (f2fs_bio_encrypted(bio)) {
+ if (err) {
+ f2fs_release_crypto_ctx(bio->bi_private);
+ } else {
+ f2fs_end_io_crypto_work(bio->bi_private, bio);
+ return;
+ }
+ }
+
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
@@ -53,6 +66,8 @@ static void f2fs_write_end_io(struct bio *bio, int err)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
+ f2fs_restore_and_release_control_page(&page);
+
if (unlikely(err)) {
set_page_dirty(page);
set_bit(AS_EIO, &page->mapping->flags);
@@ -83,7 +98,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
bio->bi_bdev = sbi->sb->s_bdev;
bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
- bio->bi_private = sbi;
+ bio->bi_private = is_read ? NULL : sbi;
return bio;
}
@@ -130,16 +145,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
* Fill the locked page with data located in the block address.
* Return unlocked page.
*/
-int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_io_info *fio)
+int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio;
+ struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
trace_f2fs_submit_page_bio(page, fio);
- f2fs_trace_ios(page, fio, 0);
+ f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
- bio = __bio_alloc(sbi, fio->blk_addr, 1, is_read_io(fio->rw));
+ bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
bio_put(bio);
@@ -151,12 +166,13 @@ int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
return 0;
}
-void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
- struct f2fs_io_info *fio)
+void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
{
+ struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->rw);
+ struct page *bio_page;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -178,17 +194,19 @@ alloc_new:
io->fio = *fio;
}
- if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
+ bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+ if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
PAGE_CACHE_SIZE) {
__submit_merged_bio(io);
goto alloc_new;
}
io->last_block_in_bio = fio->blk_addr;
- f2fs_trace_ios(page, fio, 0);
+ f2fs_trace_ios(fio, 0);
up_write(&io->io_rwsem);
- trace_f2fs_submit_page_mbio(page, fio);
+ trace_f2fs_submit_page_mbio(fio->page, fio);
}
/*
@@ -197,7 +215,7 @@ alloc_new:
* ->node_page
* update block addresses in the node page
*/
-static void __set_data_blkaddr(struct dnode_of_data *dn)
+void set_data_blkaddr(struct dnode_of_data *dn)
{
struct f2fs_node *rn;
__le32 *addr_array;
@@ -226,7 +244,7 @@ int reserve_new_block(struct dnode_of_data *dn)
trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
dn->data_blkaddr = NEW_ADDR;
- __set_data_blkaddr(dn);
+ set_data_blkaddr(dn);
mark_inode_dirty(dn->inode);
sync_inode_page(dn);
return 0;
@@ -248,73 +266,49 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
return err;
}
-static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
- struct buffer_head *bh_result)
+static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
pgoff_t start_fofs, end_fofs;
block_t start_blkaddr;
- if (is_inode_flag_set(fi, FI_NO_EXTENT))
- return 0;
-
- read_lock(&fi->ext.ext_lock);
+ read_lock(&fi->ext_lock);
if (fi->ext.len == 0) {
- read_unlock(&fi->ext.ext_lock);
- return 0;
+ read_unlock(&fi->ext_lock);
+ return false;
}
stat_inc_total_hit(inode->i_sb);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
- start_blkaddr = fi->ext.blk_addr;
+ start_blkaddr = fi->ext.blk;
if (pgofs >= start_fofs && pgofs <= end_fofs) {
- unsigned int blkbits = inode->i_sb->s_blocksize_bits;
- size_t count;
-
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb,
- start_blkaddr + pgofs - start_fofs);
- count = end_fofs - pgofs + 1;
- if (count < (UINT_MAX >> blkbits))
- bh_result->b_size = (count << blkbits);
- else
- bh_result->b_size = UINT_MAX;
-
+ *ei = fi->ext;
stat_inc_read_hit(inode->i_sb);
- read_unlock(&fi->ext.ext_lock);
- return 1;
+ read_unlock(&fi->ext_lock);
+ return true;
}
- read_unlock(&fi->ext.ext_lock);
- return 0;
+ read_unlock(&fi->ext_lock);
+ return false;
}
-void update_extent_cache(struct dnode_of_data *dn)
+static bool update_extent_info(struct inode *inode, pgoff_t fofs,
+ block_t blkaddr)
{
- struct f2fs_inode_info *fi = F2FS_I(dn->inode);
- pgoff_t fofs, start_fofs, end_fofs;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ pgoff_t start_fofs, end_fofs;
block_t start_blkaddr, end_blkaddr;
int need_update = true;
- f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
-
- /* Update the page address in the parent node */
- __set_data_blkaddr(dn);
-
- if (is_inode_flag_set(fi, FI_NO_EXTENT))
- return;
-
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
- dn->ofs_in_node;
-
- write_lock(&fi->ext.ext_lock);
+ write_lock(&fi->ext_lock);
start_fofs = fi->ext.fofs;
end_fofs = fi->ext.fofs + fi->ext.len - 1;
- start_blkaddr = fi->ext.blk_addr;
- end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;
+ start_blkaddr = fi->ext.blk;
+ end_blkaddr = fi->ext.blk + fi->ext.len - 1;
/* Drop and initialize the matched extent */
if (fi->ext.len == 1 && fofs == start_fofs)
@@ -322,24 +316,24 @@ void update_extent_cache(struct dnode_of_data *dn)
/* Initial extent */
if (fi->ext.len == 0) {
- if (dn->data_blkaddr != NULL_ADDR) {
+ if (blkaddr != NULL_ADDR) {
fi->ext.fofs = fofs;
- fi->ext.blk_addr = dn->data_blkaddr;
+ fi->ext.blk = blkaddr;
fi->ext.len = 1;
}
goto end_update;
}
/* Front merge */
- if (fofs == start_fofs - 1 && dn->data_blkaddr == start_blkaddr - 1) {
+ if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) {
fi->ext.fofs--;
- fi->ext.blk_addr--;
+ fi->ext.blk--;
fi->ext.len++;
goto end_update;
}
/* Back merge */
- if (fofs == end_fofs + 1 && dn->data_blkaddr == end_blkaddr + 1) {
+ if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) {
fi->ext.len++;
goto end_update;
}
@@ -351,8 +345,7 @@ void update_extent_cache(struct dnode_of_data *dn)
fi->ext.len = fofs - start_fofs;
} else {
fi->ext.fofs = fofs + 1;
- fi->ext.blk_addr = start_blkaddr +
- fofs - start_fofs + 1;
+ fi->ext.blk = start_blkaddr + fofs - start_fofs + 1;
fi->ext.len -= fofs - start_fofs + 1;
}
} else {
@@ -366,85 +359,580 @@ void update_extent_cache(struct dnode_of_data *dn)
need_update = true;
}
end_update:
- write_unlock(&fi->ext.ext_lock);
- if (need_update)
- sync_inode_page(dn);
- return;
+ write_unlock(&fi->ext_lock);
+ return need_update;
}
-struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
+static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_info *ei,
+ struct rb_node *parent, struct rb_node **p)
{
- struct address_space *mapping = inode->i_mapping;
- struct dnode_of_data dn;
- struct page *page;
- int err;
- struct f2fs_io_info fio = {
- .type = DATA,
- .rw = sync ? READ_SYNC : READA,
- };
+ struct extent_node *en;
- page = find_get_page(mapping, index);
- if (page && PageUptodate(page))
- return page;
- f2fs_put_page(page, 0);
+ en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
+ if (!en)
+ return NULL;
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
- if (err)
- return ERR_PTR(err);
- f2fs_put_dnode(&dn);
+ en->ei = *ei;
+ INIT_LIST_HEAD(&en->list);
- if (dn.data_blkaddr == NULL_ADDR)
- return ERR_PTR(-ENOENT);
+ rb_link_node(&en->rb_node, parent, p);
+ rb_insert_color(&en->rb_node, &et->root);
+ et->count++;
+ atomic_inc(&sbi->total_ext_node);
+ return en;
+}
- /* By fallocate(), there is no cached page, but with NEW_ADDR */
- if (unlikely(dn.data_blkaddr == NEW_ADDR))
- return ERR_PTR(-EINVAL);
+static void __detach_extent_node(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ rb_erase(&en->rb_node, &et->root);
+ et->count--;
+ atomic_dec(&sbi->total_ext_node);
- page = grab_cache_page(mapping, index);
- if (!page)
- return ERR_PTR(-ENOMEM);
+ if (et->cached_en == en)
+ et->cached_en = NULL;
+}
- if (PageUptodate(page)) {
- unlock_page(page);
- return page;
+static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi,
+ nid_t ino)
+{
+ struct extent_tree *et;
+
+ down_read(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+ if (!et) {
+ up_read(&sbi->extent_tree_lock);
+ return NULL;
}
+ atomic_inc(&et->refcount);
+ up_read(&sbi->extent_tree_lock);
- fio.blk_addr = dn.data_blkaddr;
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
- if (err)
- return ERR_PTR(err);
+ return et;
+}
- if (sync) {
- wait_on_page_locked(page);
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 0);
- return ERR_PTR(-EIO);
+static struct extent_tree *__grab_extent_tree(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ nid_t ino = inode->i_ino;
+
+ down_write(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+ if (!et) {
+ et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
+ f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
+ memset(et, 0, sizeof(struct extent_tree));
+ et->ino = ino;
+ et->root = RB_ROOT;
+ et->cached_en = NULL;
+ rwlock_init(&et->lock);
+ atomic_set(&et->refcount, 0);
+ et->count = 0;
+ sbi->total_ext_tree++;
+ }
+ atomic_inc(&et->refcount);
+ up_write(&sbi->extent_tree_lock);
+
+ return et;
+}
+
+static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
+ unsigned int fofs)
+{
+ struct rb_node *node = et->root.rb_node;
+ struct extent_node *en;
+
+ if (et->cached_en) {
+ struct extent_info *cei = &et->cached_en->ei;
+
+ if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
+ return et->cached_en;
+ }
+
+ while (node) {
+ en = rb_entry(node, struct extent_node, rb_node);
+
+ if (fofs < en->ei.fofs) {
+ node = node->rb_left;
+ } else if (fofs >= en->ei.fofs + en->ei.len) {
+ node = node->rb_right;
+ } else {
+ et->cached_en = en;
+ return en;
}
}
- return page;
+ return NULL;
}
-/*
- * If it tries to access a hole, return an error.
- * Because, the callers, functions in dir.c and GC, should be able to know
- * whether this page exists or not.
- */
-struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ struct extent_node *prev;
+ struct rb_node *node;
+
+ node = rb_prev(&en->rb_node);
+ if (!node)
+ return NULL;
+
+ prev = rb_entry(node, struct extent_node, rb_node);
+ if (__is_back_mergeable(&en->ei, &prev->ei)) {
+ en->ei.fofs = prev->ei.fofs;
+ en->ei.blk = prev->ei.blk;
+ en->ei.len += prev->ei.len;
+ __detach_extent_node(sbi, et, prev);
+ return prev;
+ }
+ return NULL;
+}
+
+static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ struct extent_node *next;
+ struct rb_node *node;
+
+ node = rb_next(&en->rb_node);
+ if (!node)
+ return NULL;
+
+ next = rb_entry(node, struct extent_node, rb_node);
+ if (__is_front_mergeable(&en->ei, &next->ei)) {
+ en->ei.len += next->ei.len;
+ __detach_extent_node(sbi, et, next);
+ return next;
+ }
+ return NULL;
+}
+
+static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_info *ei,
+ struct extent_node **den)
+{
+ struct rb_node **p = &et->root.rb_node;
+ struct rb_node *parent = NULL;
+ struct extent_node *en;
+
+ while (*p) {
+ parent = *p;
+ en = rb_entry(parent, struct extent_node, rb_node);
+
+ if (ei->fofs < en->ei.fofs) {
+ if (__is_front_mergeable(ei, &en->ei)) {
+ f2fs_bug_on(sbi, !den);
+ en->ei.fofs = ei->fofs;
+ en->ei.blk = ei->blk;
+ en->ei.len += ei->len;
+ *den = __try_back_merge(sbi, et, en);
+ return en;
+ }
+ p = &(*p)->rb_left;
+ } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
+ if (__is_back_mergeable(ei, &en->ei)) {
+ f2fs_bug_on(sbi, !den);
+ en->ei.len += ei->len;
+ *den = __try_front_merge(sbi, et, en);
+ return en;
+ }
+ p = &(*p)->rb_right;
+ } else {
+ f2fs_bug_on(sbi, 1);
+ }
+ }
+
+ return __attach_extent_node(sbi, et, ei, parent, p);
+}
+
+static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, bool free_all)
+{
+ struct rb_node *node, *next;
+ struct extent_node *en;
+ unsigned int count = et->count;
+
+ node = rb_first(&et->root);
+ while (node) {
+ next = rb_next(node);
+ en = rb_entry(node, struct extent_node, rb_node);
+
+ if (free_all) {
+ spin_lock(&sbi->extent_lock);
+ if (!list_empty(&en->list))
+ list_del_init(&en->list);
+ spin_unlock(&sbi->extent_lock);
+ }
+
+ if (free_all || list_empty(&en->list)) {
+ __detach_extent_node(sbi, et, en);
+ kmem_cache_free(extent_node_slab, en);
+ }
+ node = next;
+ }
+
+ return count - et->count;
+}
+
+static void f2fs_init_extent_tree(struct inode *inode,
+ struct f2fs_extent *i_ext)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en;
+ struct extent_info ei;
+
+ if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
+ return;
+
+ et = __grab_extent_tree(inode);
+
+ write_lock(&et->lock);
+ if (et->count)
+ goto out;
+
+ set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
+ le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));
+
+ en = __insert_extent_tree(sbi, et, &ei, NULL);
+ if (en) {
+ et->cached_en = en;
+
+ spin_lock(&sbi->extent_lock);
+ list_add_tail(&en->list, &sbi->extent_list);
+ spin_unlock(&sbi->extent_lock);
+ }
+out:
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+}
+
+static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en;
+
+ trace_f2fs_lookup_extent_tree_start(inode, pgofs);
+
+ et = __find_extent_tree(sbi, inode->i_ino);
+ if (!et)
+ return false;
+
+ read_lock(&et->lock);
+ en = __lookup_extent_tree(et, pgofs);
+ if (en) {
+ *ei = en->ei;
+ spin_lock(&sbi->extent_lock);
+ if (!list_empty(&en->list))
+ list_move_tail(&en->list, &sbi->extent_list);
+ spin_unlock(&sbi->extent_lock);
+ stat_inc_read_hit(sbi->sb);
+ }
+ stat_inc_total_hit(sbi->sb);
+ read_unlock(&et->lock);
+
+ trace_f2fs_lookup_extent_tree_end(inode, pgofs, en);
+
+ atomic_dec(&et->refcount);
+ return en ? true : false;
+}
+
+static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
+ block_t blkaddr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
+ struct extent_node *den = NULL;
+ struct extent_info ei, dei;
+ unsigned int endofs;
+
+ trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
+
+ et = __grab_extent_tree(inode);
+
+ write_lock(&et->lock);
+
+ /* 1. lookup and remove existing extent info in cache */
+ en = __lookup_extent_tree(et, fofs);
+ if (!en)
+ goto update_extent;
+
+ dei = en->ei;
+ __detach_extent_node(sbi, et, en);
+
+ /* 2. if extent can be split more, split and insert the left part */
+ if (dei.len > 1) {
+ /* insert left part of split extent into cache */
+ if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
+ set_extent_info(&ei, dei.fofs, dei.blk,
+ fofs - dei.fofs);
+ en1 = __insert_extent_tree(sbi, et, &ei, NULL);
+ }
+
+ /* insert right part of split extent into cache */
+ endofs = dei.fofs + dei.len - 1;
+ if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
+ set_extent_info(&ei, fofs + 1,
+ fofs - dei.fofs + dei.blk, endofs - fofs);
+ en2 = __insert_extent_tree(sbi, et, &ei, NULL);
+ }
+ }
+
+update_extent:
+ /* 3. update extent in extent cache */
+ if (blkaddr) {
+ set_extent_info(&ei, fofs, blkaddr, 1);
+ en3 = __insert_extent_tree(sbi, et, &ei, &den);
+ }
+
+ /* 4. update in global extent list */
+ spin_lock(&sbi->extent_lock);
+ if (en && !list_empty(&en->list))
+ list_del(&en->list);
+ /*
+ * en1 and en2 split from en, they will become more and more smaller
+ * fragments after splitting several times. So if the length is smaller
+ * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
+ */
+ if (en1)
+ list_add_tail(&en1->list, &sbi->extent_list);
+ if (en2)
+ list_add_tail(&en2->list, &sbi->extent_list);
+ if (en3) {
+ if (list_empty(&en3->list))
+ list_add_tail(&en3->list, &sbi->extent_list);
+ else
+ list_move_tail(&en3->list, &sbi->extent_list);
+ }
+ if (den && !list_empty(&den->list))
+ list_del(&den->list);
+ spin_unlock(&sbi->extent_lock);
+
+ /* 5. release extent node */
+ if (en)
+ kmem_cache_free(extent_node_slab, en);
+ if (den)
+ kmem_cache_free(extent_node_slab, den);
+
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+}
+
+void f2fs_preserve_extent_tree(struct inode *inode)
+{
+ struct extent_tree *et;
+ struct extent_info *ext = &F2FS_I(inode)->ext;
+ bool sync = false;
+
+ if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ return;
+
+ et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino);
+ if (!et) {
+ if (ext->len) {
+ ext->len = 0;
+ update_inode_page(inode);
+ }
+ return;
+ }
+
+ read_lock(&et->lock);
+ if (et->count) {
+ struct extent_node *en;
+
+ if (et->cached_en) {
+ en = et->cached_en;
+ } else {
+ struct rb_node *node = rb_first(&et->root);
+
+ if (!node)
+ node = rb_last(&et->root);
+ en = rb_entry(node, struct extent_node, rb_node);
+ }
+
+ if (__is_extent_same(ext, &en->ei))
+ goto out;
+
+ *ext = en->ei;
+ sync = true;
+ } else if (ext->len) {
+ ext->len = 0;
+ sync = true;
+ }
+out:
+ read_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+
+ if (sync)
+ update_inode_page(inode);
+}
+
+void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+ struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
+ struct extent_node *en, *tmp;
+ unsigned long ino = F2FS_ROOT_INO(sbi);
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned int found;
+ unsigned int node_cnt = 0, tree_cnt = 0;
+
+ if (!test_opt(sbi, EXTENT_CACHE))
+ return;
+
+ if (available_free_memory(sbi, EXTENT_CACHE))
+ return;
+
+ spin_lock(&sbi->extent_lock);
+ list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
+ if (!nr_shrink--)
+ break;
+ list_del_init(&en->list);
+ }
+ spin_unlock(&sbi->extent_lock);
+
+ down_read(&sbi->extent_tree_lock);
+ while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
+ (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
+ unsigned i;
+
+ ino = treevec[found - 1]->ino + 1;
+ for (i = 0; i < found; i++) {
+ struct extent_tree *et = treevec[i];
+
+ atomic_inc(&et->refcount);
+ write_lock(&et->lock);
+ node_cnt += __free_extent_tree(sbi, et, false);
+ write_unlock(&et->lock);
+ atomic_dec(&et->refcount);
+ }
+ }
+ up_read(&sbi->extent_tree_lock);
+
+ down_write(&sbi->extent_tree_lock);
+ radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter,
+ F2FS_ROOT_INO(sbi)) {
+ struct extent_tree *et = (struct extent_tree *)*slot;
+
+ if (!atomic_read(&et->refcount) && !et->count) {
+ radix_tree_delete(&sbi->extent_tree_root, et->ino);
+ kmem_cache_free(extent_tree_slab, et);
+ sbi->total_ext_tree--;
+ tree_cnt++;
+ }
+ }
+ up_write(&sbi->extent_tree_lock);
+
+ trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
+}
+
+void f2fs_destroy_extent_tree(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct extent_tree *et;
+ unsigned int node_cnt = 0;
+
+ if (!test_opt(sbi, EXTENT_CACHE))
+ return;
+
+ et = __find_extent_tree(sbi, inode->i_ino);
+ if (!et)
+ goto out;
+
+ /* free all extent info belong to this extent tree */
+ write_lock(&et->lock);
+ node_cnt = __free_extent_tree(sbi, et, true);
+ write_unlock(&et->lock);
+
+ atomic_dec(&et->refcount);
+
+ /* try to find and delete extent tree entry in radix tree */
+ down_write(&sbi->extent_tree_lock);
+ et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino);
+ if (!et) {
+ up_write(&sbi->extent_tree_lock);
+ goto out;
+ }
+ f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
+ radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+ kmem_cache_free(extent_tree_slab, et);
+ sbi->total_ext_tree--;
+ up_write(&sbi->extent_tree_lock);
+out:
+ trace_f2fs_destroy_extent_tree(inode, node_cnt);
+ return;
+}
+
+void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext)
+{
+ if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ f2fs_init_extent_tree(inode, i_ext);
+
+ write_lock(&F2FS_I(inode)->ext_lock);
+ get_extent_info(&F2FS_I(inode)->ext, *i_ext);
+ write_unlock(&F2FS_I(inode)->ext_lock);
+}
+
+static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
+ struct extent_info *ei)
+{
+ if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
+ return false;
+
+ if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE))
+ return f2fs_lookup_extent_tree(inode, pgofs, ei);
+
+ return lookup_extent_info(inode, pgofs, ei);
+}
+
+void f2fs_update_extent_cache(struct dnode_of_data *dn)
+{
+ struct f2fs_inode_info *fi = F2FS_I(dn->inode);
+ pgoff_t fofs;
+
+ f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
+
+ if (is_inode_flag_set(fi, FI_NO_EXTENT))
+ return;
+
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ dn->ofs_in_node;
+
+ if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE))
+ return f2fs_update_extent_tree(dn->inode, fofs,
+ dn->data_blkaddr);
+
+ if (update_extent_info(dn->inode, fofs, dn->data_blkaddr))
+ sync_inode_page(dn);
+}
+
+struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
{
struct address_space *mapping = inode->i_mapping;
struct dnode_of_data dn;
struct page *page;
+ struct extent_info ei;
int err;
struct f2fs_io_info fio = {
+ .sbi = F2FS_I_SB(inode),
.type = DATA,
- .rw = READ_SYNC,
+ .rw = rw,
+ .encrypted_page = NULL,
};
-repeat:
+
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return read_mapping_page(mapping, index, NULL);
+
page = grab_cache_page(mapping, index);
if (!page)
return ERR_PTR(-ENOMEM);
+ if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+ dn.data_blkaddr = ei.blk + index - ei.fofs;
+ goto got_it;
+ }
+
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
if (err) {
@@ -457,9 +945,11 @@ repeat:
f2fs_put_page(page, 1);
return ERR_PTR(-ENOENT);
}
-
- if (PageUptodate(page))
+got_it:
+ if (PageUptodate(page)) {
+ unlock_page(page);
return page;
+ }
/*
* A new dentry page is allocated but not able to be written, since its
@@ -470,14 +960,58 @@ repeat:
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
+ unlock_page(page);
return page;
}
fio.blk_addr = dn.data_blkaddr;
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
+ fio.page = page;
+ err = f2fs_submit_page_bio(&fio);
if (err)
return ERR_PTR(err);
+ return page;
+}
+
+struct page *find_data_page(struct inode *inode, pgoff_t index)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ page = find_get_page(mapping, index);
+ if (page && PageUptodate(page))
+ return page;
+ f2fs_put_page(page, 0);
+
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
+ return page;
+ if (PageUptodate(page))
+ return page;
+
+ wait_on_page_locked(page);
+ if (unlikely(!PageUptodate(page))) {
+ f2fs_put_page(page, 0);
+ return ERR_PTR(-EIO);
+ }
+ return page;
+}
+
+/*
+ * If it tries to access a hole, return an error.
+ * Because, the callers, functions in dir.c and GC, should be able to know
+ * whether this page exists or not.
+ */
+struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+repeat:
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
+ return page;
+
+ /* wait for read completion */
lock_page(page);
if (unlikely(!PageUptodate(page))) {
f2fs_put_page(page, 1);
@@ -505,46 +1039,37 @@ struct page *get_new_data_page(struct inode *inode,
struct page *page;
struct dnode_of_data dn;
int err;
+repeat:
+ page = grab_cache_page(mapping, index);
+ if (!page)
+ return ERR_PTR(-ENOMEM);
set_new_dnode(&dn, inode, ipage, NULL, 0);
err = f2fs_reserve_block(&dn, index);
- if (err)
+ if (err) {
+ f2fs_put_page(page, 1);
return ERR_PTR(err);
-repeat:
- page = grab_cache_page(mapping, index);
- if (!page) {
- err = -ENOMEM;
- goto put_err;
}
+ if (!ipage)
+ f2fs_put_dnode(&dn);
if (PageUptodate(page))
- return page;
+ goto got_it;
if (dn.data_blkaddr == NEW_ADDR) {
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
SetPageUptodate(page);
} else {
- struct f2fs_io_info fio = {
- .type = DATA,
- .rw = READ_SYNC,
- .blk_addr = dn.data_blkaddr,
- };
- err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, &fio);
- if (err)
- goto put_err;
+ f2fs_put_page(page, 1);
- lock_page(page);
- if (unlikely(!PageUptodate(page))) {
- f2fs_put_page(page, 1);
- err = -EIO;
- goto put_err;
- }
- if (unlikely(page->mapping != mapping)) {
- f2fs_put_page(page, 1);
+ page = get_read_data_page(inode, index, READ_SYNC);
+ if (IS_ERR(page))
goto repeat;
- }
- }
+ /* wait for read completion */
+ lock_page(page);
+ }
+got_it:
if (new_i_size &&
i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
@@ -552,10 +1077,6 @@ repeat:
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
return page;
-
-put_err:
- f2fs_put_dnode(&dn);
- return ERR_PTR(err);
}
static int __allocate_data_block(struct dnode_of_data *dn)
@@ -569,19 +1090,26 @@ static int __allocate_data_block(struct dnode_of_data *dn)
if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
return -EPERM;
+
+ dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
+ if (dn->data_blkaddr == NEW_ADDR)
+ goto alloc;
+
if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
return -ENOSPC;
+alloc:
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
seg = CURSEG_DIRECT_IO;
- allocate_data_block(sbi, NULL, NULL_ADDR, &dn->data_blkaddr, &sum, seg);
+ allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
+ &sum, seg);
/* direct IO doesn't use extent cache to maximize the performance */
- __set_data_blkaddr(dn);
+ set_data_blkaddr(dn);
/* update i_size */
fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
@@ -615,7 +1143,10 @@ static void __allocate_data_blocks(struct inode *inode, loff_t offset,
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
while (dn.ofs_in_node < end_offset && len) {
- if (dn.data_blkaddr == NULL_ADDR) {
+ block_t blkaddr;
+
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+ if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
if (__allocate_data_block(&dn))
goto sync_out;
allocated = true;
@@ -643,29 +1174,37 @@ out:
}
/*
- * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
+ * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
+ * f2fs_map_blocks structure.
* If original data blocks are allocated, then give them to blockdev.
* Otherwise,
* a. preallocate requested block addresses
* b. do not use extent cache for better performance
* c. give the block addresses to blockdev
*/
-static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create, bool fiemap)
+static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
+ int create, bool fiemap)
{
- unsigned int blkbits = inode->i_sb->s_blocksize_bits;
- unsigned maxblocks = bh_result->b_size >> blkbits;
+ unsigned int maxblocks = map->m_len;
struct dnode_of_data dn;
int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
pgoff_t pgofs, end_offset;
int err = 0, ofs = 1;
+ struct extent_info ei;
bool allocated = false;
- /* Get the page offset from the block offset(iblock) */
- pgofs = (pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));
+ map->m_len = 0;
+ map->m_flags = 0;
+
+ /* it only supports block size == page size */
+ pgofs = (pgoff_t)map->m_lblk;
- if (check_extent_cache(inode, pgofs, bh_result))
+ if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ map->m_pblk = ei.blk + pgofs - ei.fofs;
+ map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
+ map->m_flags = F2FS_MAP_MAPPED;
goto out;
+ }
if (create)
f2fs_lock_op(F2FS_I_SB(inode));
@@ -682,21 +1221,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
goto put_out;
if (dn.data_blkaddr != NULL_ADDR) {
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ map->m_flags = F2FS_MAP_MAPPED;
+ map->m_pblk = dn.data_blkaddr;
+ if (dn.data_blkaddr == NEW_ADDR)
+ map->m_flags |= F2FS_MAP_UNWRITTEN;
} else if (create) {
err = __allocate_data_block(&dn);
if (err)
goto put_out;
allocated = true;
- set_buffer_new(bh_result);
- map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
+ map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
+ map->m_pblk = dn.data_blkaddr;
} else {
goto put_out;
}
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
- bh_result->b_size = (((size_t)1) << blkbits);
+ map->m_len = 1;
dn.ofs_in_node++;
pgofs++;
@@ -720,21 +1261,25 @@ get_next:
end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
}
- if (maxblocks > (bh_result->b_size >> blkbits)) {
+ if (maxblocks > map->m_len) {
block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
if (blkaddr == NULL_ADDR && create) {
err = __allocate_data_block(&dn);
if (err)
goto sync_out;
allocated = true;
+ map->m_flags |= F2FS_MAP_NEW;
blkaddr = dn.data_blkaddr;
}
/* Give more consecutive addresses for the readahead */
- if (blkaddr == (bh_result->b_blocknr + ofs)) {
+ if ((map->m_pblk != NEW_ADDR &&
+ blkaddr == (map->m_pblk + ofs)) ||
+ (map->m_pblk == NEW_ADDR &&
+ blkaddr == NEW_ADDR)) {
ofs++;
dn.ofs_in_node++;
pgofs++;
- bh_result->b_size += (((size_t)1) << blkbits);
+ map->m_len++;
goto get_next;
}
}
@@ -747,10 +1292,28 @@ unlock_out:
if (create)
f2fs_unlock_op(F2FS_I_SB(inode));
out:
- trace_f2fs_get_data_block(inode, iblock, bh_result, err);
+ trace_f2fs_map_blocks(inode, map, err);
return err;
}
+static int __get_data_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create, bool fiemap)
+{
+ struct f2fs_map_blocks map;
+ int ret;
+
+ map.m_lblk = iblock;
+ map.m_len = bh->b_size >> inode->i_blkbits;
+
+ ret = f2fs_map_blocks(inode, &map, create, fiemap);
+ if (!ret) {
+ map_bh(bh, inode->i_sb, map.m_pblk);
+ bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
+ bh->b_size = map.m_len << inode->i_blkbits;
+ }
+ return ret;
+}
+
static int get_data_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -763,11 +1326,268 @@ static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create, true);
}
+static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
+{
+ return (offset >> inode->i_blkbits);
+}
+
+static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
+{
+ return (blk << inode->i_blkbits);
+}
+
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
u64 start, u64 len)
{
- return generic_block_fiemap(inode, fieinfo,
- start, len, get_data_block_fiemap);
+ struct buffer_head map_bh;
+ sector_t start_blk, last_blk;
+ loff_t isize = i_size_read(inode);
+ u64 logical = 0, phys = 0, size = 0;
+ u32 flags = 0;
+ bool past_eof = false, whole_file = false;
+ int ret = 0;
+
+ ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (len >= isize) {
+ whole_file = true;
+ len = isize;
+ }
+
+ if (logical_to_blk(inode, len) == 0)
+ len = blk_to_logical(inode, 1);
+
+ start_blk = logical_to_blk(inode, start);
+ last_blk = logical_to_blk(inode, start + len - 1);
+next:
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len;
+
+ ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0);
+ if (ret)
+ goto out;
+
+ /* HOLE */
+ if (!buffer_mapped(&map_bh)) {
+ start_blk++;
+
+ if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
+ past_eof = 1;
+
+ if (past_eof && size) {
+ flags |= FIEMAP_EXTENT_LAST;
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ } else if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ size = 0;
+ }
+
+ /* if we have holes up to/past EOF then we're done */
+ if (start_blk > last_blk || past_eof || ret)
+ goto out;
+ } else {
+ if (start_blk > last_blk && !whole_file) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ goto out;
+ }
+
+ /*
+ * if size != 0 then we know we already have an extent
+ * to add, so add it.
+ */
+ if (size) {
+ ret = fiemap_fill_next_extent(fieinfo, logical,
+ phys, size, flags);
+ if (ret)
+ goto out;
+ }
+
+ logical = blk_to_logical(inode, start_blk);
+ phys = blk_to_logical(inode, map_bh.b_blocknr);
+ size = map_bh.b_size;
+ flags = 0;
+ if (buffer_unwritten(&map_bh))
+ flags = FIEMAP_EXTENT_UNWRITTEN;
+
+ start_blk += logical_to_blk(inode, size);
+
+ /*
+ * If we are past the EOF, then we need to make sure as
+ * soon as we find a hole that the last extent we found
+ * is marked with FIEMAP_EXTENT_LAST
+ */
+ if (!past_eof && logical + size >= isize)
+ past_eof = true;
+ }
+ cond_resched();
+ if (fatal_signal_pending(current))
+ ret = -EINTR;
+ else
+ goto next;
+out:
+ if (ret == 1)
+ ret = 0;
+
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+}
+
+/*
+ * This function was originally taken from fs/mpage.c, and customized for f2fs.
+ * Major change was from block_size == page_size in f2fs by default.
+ */
+static int f2fs_mpage_readpages(struct address_space *mapping,
+ struct list_head *pages, struct page *page,
+ unsigned nr_pages)
+{
+ struct bio *bio = NULL;
+ unsigned page_idx;
+ sector_t last_block_in_bio = 0;
+ struct inode *inode = mapping->host;
+ const unsigned blkbits = inode->i_blkbits;
+ const unsigned blocksize = 1 << blkbits;
+ sector_t block_in_file;
+ sector_t last_block;
+ sector_t last_block_in_file;
+ sector_t block_nr;
+ struct block_device *bdev = inode->i_sb->s_bdev;
+ struct f2fs_map_blocks map;
+
+ map.m_pblk = 0;
+ map.m_lblk = 0;
+ map.m_len = 0;
+ map.m_flags = 0;
+
+ for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
+
+ prefetchw(&page->flags);
+ if (pages) {
+ page = list_entry(pages->prev, struct page, lru);
+ list_del(&page->lru);
+ if (add_to_page_cache_lru(page, mapping,
+ page->index, GFP_KERNEL))
+ goto next_page;
+ }
+
+ block_in_file = (sector_t)page->index;
+ last_block = block_in_file + nr_pages;
+ last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
+ blkbits;
+ if (last_block > last_block_in_file)
+ last_block = last_block_in_file;
+
+ /*
+ * Map blocks using the previous result first.
+ */
+ if ((map.m_flags & F2FS_MAP_MAPPED) &&
+ block_in_file > map.m_lblk &&
+ block_in_file < (map.m_lblk + map.m_len))
+ goto got_it;
+
+ /*
+ * Then do more f2fs_map_blocks() calls until we are
+ * done with this page.
+ */
+ map.m_flags = 0;
+
+ if (block_in_file < last_block) {
+ map.m_lblk = block_in_file;
+ map.m_len = last_block - block_in_file;
+
+ if (f2fs_map_blocks(inode, &map, 0, false))
+ goto set_error_page;
+ }
+got_it:
+ if ((map.m_flags & F2FS_MAP_MAPPED)) {
+ block_nr = map.m_pblk + block_in_file - map.m_lblk;
+ SetPageMappedToDisk(page);
+
+ if (!PageUptodate(page) && !cleancache_get_page(page)) {
+ SetPageUptodate(page);
+ goto confused;
+ }
+ } else {
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ unlock_page(page);
+ goto next_page;
+ }
+
+ /*
+ * This page will go to BIO. Do we need to send this
+ * BIO off first?
+ */
+ if (bio && (last_block_in_bio != block_nr - 1)) {
+submit_and_realloc:
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ if (bio == NULL) {
+ struct f2fs_crypto_ctx *ctx = NULL;
+
+ if (f2fs_encrypted_inode(inode) &&
+ S_ISREG(inode->i_mode)) {
+ struct page *cpage;
+
+ ctx = f2fs_get_crypto_ctx(inode);
+ if (IS_ERR(ctx))
+ goto set_error_page;
+
+ /* wait the page to be moved by cleaning */
+ cpage = find_lock_page(
+ META_MAPPING(F2FS_I_SB(inode)),
+ block_nr);
+ if (cpage) {
+ f2fs_wait_on_page_writeback(cpage,
+ DATA);
+ f2fs_put_page(cpage, 1);
+ }
+ }
+
+ bio = bio_alloc(GFP_KERNEL,
+ min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
+ if (!bio) {
+ if (ctx)
+ f2fs_release_crypto_ctx(ctx);
+ goto set_error_page;
+ }
+ bio->bi_bdev = bdev;
+ bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
+ bio->bi_end_io = f2fs_read_end_io;
+ bio->bi_private = ctx;
+ }
+
+ if (bio_add_page(bio, page, blocksize, 0) < blocksize)
+ goto submit_and_realloc;
+
+ last_block_in_bio = block_nr;
+ goto next_page;
+set_error_page:
+ SetPageError(page);
+ zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ unlock_page(page);
+ goto next_page;
+confused:
+ if (bio) {
+ submit_bio(READ, bio);
+ bio = NULL;
+ }
+ unlock_page(page);
+next_page:
+ if (pages)
+ page_cache_release(page);
+ }
+ BUG_ON(pages && !list_empty(pages));
+ if (bio)
+ submit_bio(READ, bio);
+ return 0;
}
static int f2fs_read_data_page(struct file *file, struct page *page)
@@ -781,8 +1601,7 @@ static int f2fs_read_data_page(struct file *file, struct page *page)
if (f2fs_has_inline_data(inode))
ret = f2fs_read_inline_data(inode, page);
if (ret == -EAGAIN)
- ret = mpage_readpage(page, get_data_block);
-
+ ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
return ret;
}
@@ -796,11 +1615,12 @@ static int f2fs_read_data_pages(struct file *file,
if (f2fs_has_inline_data(inode))
return 0;
- return mpage_readpages(mapping, pages, nr_pages, get_data_block);
+ return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
}
-int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
+int do_write_data_page(struct f2fs_io_info *fio)
{
+ struct page *page = fio->page;
struct inode *inode = page->mapping->host;
struct dnode_of_data dn;
int err = 0;
@@ -813,8 +1633,18 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
fio->blk_addr = dn.data_blkaddr;
/* This page is already truncated */
- if (fio->blk_addr == NULL_ADDR)
+ if (fio->blk_addr == NULL_ADDR) {
+ ClearPageUptodate(page);
goto out_writepage;
+ }
+
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ fio->encrypted_page = f2fs_encrypt(inode, fio->page);
+ if (IS_ERR(fio->encrypted_page)) {
+ err = PTR_ERR(fio->encrypted_page);
+ goto out_writepage;
+ }
+ }
set_page_writeback(page);
@@ -825,12 +1655,17 @@ int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
if (unlikely(fio->blk_addr != NEW_ADDR &&
!is_cold_data(page) &&
need_inplace_update(inode))) {
- rewrite_data_page(page, fio);
+ rewrite_data_page(fio);
set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
+ trace_f2fs_do_write_data_page(page, IPU);
} else {
- write_data_page(page, &dn, fio);
- update_extent_cache(&dn);
+ write_data_page(&dn, fio);
+ set_data_blkaddr(&dn);
+ f2fs_update_extent_cache(&dn);
+ trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+ if (page->index == 0)
+ set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
}
out_writepage:
f2fs_put_dnode(&dn);
@@ -849,8 +1684,11 @@ static int f2fs_write_data_page(struct page *page,
bool need_balance_fs = false;
int err = 0;
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = DATA,
.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
+ .page = page,
+ .encrypted_page = NULL,
};
trace_f2fs_writepage(page, DATA);
@@ -880,7 +1718,7 @@ write:
if (S_ISDIR(inode->i_mode)) {
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- err = do_write_data_page(page, &fio);
+ err = do_write_data_page(&fio);
goto done;
}
@@ -900,7 +1738,7 @@ write:
if (f2fs_has_inline_data(inode))
err = f2fs_write_inline_data(inode, page);
if (err == -EAGAIN)
- err = do_write_data_page(page, &fio);
+ err = do_write_data_page(&fio);
f2fs_unlock_op(sbi);
done:
if (err && err != -ENOENT)
@@ -909,6 +1747,8 @@ done:
clear_cold_data(page);
out:
inode_dec_dirty_pages(inode);
+ if (err)
+ ClearPageUptodate(page);
unlock_page(page);
if (need_balance_fs)
f2fs_balance_fs(sbi);
@@ -950,6 +1790,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,
available_free_memory(sbi, DIRTY_DENTS))
goto skip_write;
+ /* during POR, we don't need to trigger writepage at all. */
+ if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
+ goto skip_write;
+
diff = nr_pages_to_write(sbi, DATA, wbc);
if (!S_ISDIR(inode->i_mode)) {
@@ -1063,11 +1907,14 @@ put_next:
zero_user_segment(page, 0, PAGE_CACHE_SIZE);
} else {
struct f2fs_io_info fio = {
+ .sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
.blk_addr = dn.data_blkaddr,
+ .page = page,
+ .encrypted_page = NULL,
};
- err = f2fs_submit_page_bio(sbi, page, &fio);
+ err = f2fs_submit_page_bio(&fio);
if (err)
goto fail;
@@ -1081,6 +1928,15 @@ put_next:
f2fs_put_page(page, 1);
goto repeat;
}
+
+ /* avoid symlink page */
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ err = f2fs_decrypt_one(inode, page);
+ if (err) {
+ f2fs_put_page(page, 1);
+ goto fail;
+ }
+ }
}
out:
SetPageUptodate(page);
@@ -1118,12 +1974,12 @@ static int f2fs_write_end(struct file *file,
return copied;
}
-static int check_direct_IO(struct inode *inode, int rw,
- struct iov_iter *iter, loff_t offset)
+static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
+ loff_t offset)
{
unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
- if (rw == READ)
+ if (iov_iter_rw(iter) == READ)
return 0;
if (offset & blocksize_mask)
@@ -1135,8 +1991,8 @@ static int check_direct_IO(struct inode *inode, int rw,
return 0;
}
-static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
- struct iov_iter *iter, loff_t offset)
+static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -1151,19 +2007,22 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
return err;
}
- if (check_direct_IO(inode, rw, iter, offset))
+ if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+ return 0;
+
+ if (check_direct_IO(inode, iter, offset))
return 0;
- trace_f2fs_direct_IO_enter(inode, offset, count, rw);
+ trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
- if (rw & WRITE)
+ if (iov_iter_rw(iter) == WRITE)
__allocate_data_blocks(inode, offset, count);
- err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
- if (err < 0 && (rw & WRITE))
+ err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
+ if (err < 0 && iov_iter_rw(iter) == WRITE)
f2fs_write_failed(mapping, offset + count);
- trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
+ trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
return err;
}
@@ -1236,6 +2095,37 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
return generic_block_bmap(mapping, block, get_data_block);
}
+void init_extent_cache_info(struct f2fs_sb_info *sbi)
+{
+ INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
+ init_rwsem(&sbi->extent_tree_lock);
+ INIT_LIST_HEAD(&sbi->extent_list);
+ spin_lock_init(&sbi->extent_lock);
+ sbi->total_ext_tree = 0;
+ atomic_set(&sbi->total_ext_node, 0);
+}
+
+int __init create_extent_cache(void)
+{
+ extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
+ sizeof(struct extent_tree));
+ if (!extent_tree_slab)
+ return -ENOMEM;
+ extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
+ sizeof(struct extent_node));
+ if (!extent_node_slab) {
+ kmem_cache_destroy(extent_tree_slab);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void destroy_extent_cache(void)
+{
+ kmem_cache_destroy(extent_node_slab);
+ kmem_cache_destroy(extent_tree_slab);
+}
+
const struct address_space_operations f2fs_dblock_aops = {
.readpage = f2fs_read_data_page,
.readpages = f2fs_read_data_pages,