summaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorTahsin Erdogan <tahsin@google.com>2017-06-22 18:44:55 +0300
committerTheodore Ts'o <tytso@mit.edu>2017-06-22 18:44:55 +0300
commitdec214d00e0d78a08b947d7dccdfdb84407a9f4d (patch)
treed0912312aa9d0deb0b6544445859e09090c1d404 /fs/ext4/inode.c
parent30a7eb970c3aae6f1b74b2edea896fdca1cbea38 (diff)
downloadlinux-dec214d00e0d78a08b947d7dccdfdb84407a9f4d.tar.xz
ext4: xattr inode deduplication
Ext4 now supports xattr values that are up to 64k in size (vfs limit). Large xattr values are stored in external inodes each one holding a single value. Once written the data blocks of these inodes are immutable. The real world use cases are expected to have a lot of value duplication such as inherited acls etc. To reduce data duplication on disk, this patch implements a deduplicator that allows sharing of xattr inodes. The deduplication is based on an in-memory hash lookup that is a best effort sharing scheme. When a xattr inode is read from disk (i.e. getxattr() call), its crc32c hash is added to a hash table. Before creating a new xattr inode for a value being set, the hash table is checked to see if an existing inode holds an identical value. If such an inode is found, the ref count on that inode is incremented. On value removal the ref count is decremented and if it reaches zero the inode is deleted. The quota charging for such inodes is manually managed. Every reference holder is charged the full size as if there was no sharing happening. This is consistent with how xattr blocks are also charged. [ Fixed up journal credits calculation to handle inline data and the rare case where an shared xattr block can get freed when two thread race on breaking the xattr block sharing. --tytso ] Signed-off-by: Tahsin Erdogan <tahsin@google.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c13
1 files changed, 11 insertions, 2 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 46def73d3472..962f28a0e176 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -139,6 +139,8 @@ static void ext4_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
static int __ext4_journalled_writepage(struct page *page, unsigned int len);
static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
+static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
+ int pextents);
/*
* Test whether an inode is a fast symlink.
@@ -4843,8 +4845,15 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
}
brelse(iloc.bh);
ext4_set_inode_flags(inode);
- if (ei->i_flags & EXT4_EA_INODE_FL)
+
+ if (ei->i_flags & EXT4_EA_INODE_FL) {
ext4_xattr_inode_set_class(inode);
+
+ inode_lock(inode);
+ inode->i_flags |= S_NOQUOTA;
+ inode_unlock(inode);
+ }
+
unlock_new_inode(inode);
return inode;
@@ -5503,7 +5512,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int lblocks,
*
* Also account for superblock, inode, quota and xattr blocks
*/
-int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
+static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
int pextents)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);