diff options
author | Tahsin Erdogan <tahsin@google.com> | 2017-06-22 18:44:55 +0300 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2017-06-22 18:44:55 +0300 |
commit | dec214d00e0d78a08b947d7dccdfdb84407a9f4d (patch) | |
tree | d0912312aa9d0deb0b6544445859e09090c1d404 /fs/ext4/super.c | |
parent | 30a7eb970c3aae6f1b74b2edea896fdca1cbea38 (diff) | |
download | linux-dec214d00e0d78a08b947d7dccdfdb84407a9f4d.tar.xz |
ext4: xattr inode deduplication
Ext4 now supports xattr values that are up to 64k in size (vfs limit).
Large xattr values are stored in external inodes each one holding a
single value. Once written the data blocks of these inodes are immutable.
The real world use cases are expected to have a lot of value duplication
such as inherited acls etc. To reduce data duplication on disk, this patch
implements a deduplicator that allows sharing of xattr inodes.
The deduplication is based on an in-memory hash lookup that is a best
effort sharing scheme. When a xattr inode is read from disk (i.e.
getxattr() call), its crc32c hash is added to a hash table. Before
creating a new xattr inode for a value being set, the hash table is
checked to see if an existing inode holds an identical value. If such an
inode is found, the ref count on that inode is incremented. On value
removal the ref count is decremented and if it reaches zero the inode is
deleted.
The quota charging for such inodes is manually managed. Every reference
holder is charged the full size as if there was no sharing happening.
This is consistent with how xattr blocks are also charged.
[ Fixed up journal credits calculation to handle inline data and the
rare case where an shared xattr block can get freed when two thread
race on breaking the xattr block sharing. --tytso ]
Signed-off-by: Tahsin Erdogan <tahsin@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r-- | fs/ext4/super.c | 37 |
1 files changed, 34 insertions, 3 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 380389740575..d501f8256dc4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -927,6 +927,10 @@ static void ext4_put_super(struct super_block *sb) invalidate_bdev(sbi->journal_bdev); ext4_blkdev_remove(sbi); } + if (sbi->s_ea_inode_cache) { + ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); + sbi->s_ea_inode_cache = NULL; + } if (sbi->s_ea_block_cache) { ext4_xattr_destroy_cache(sbi->s_ea_block_cache); sbi->s_ea_block_cache = NULL; @@ -1178,7 +1182,10 @@ static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, if (res) return res; retry: - credits = ext4_xattr_set_credits(inode, len); + res = ext4_xattr_set_credits(inode, len, &credits); + if (res) + return res; + handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -3445,7 +3452,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } /* Load the checksum driver */ - if (ext4_has_feature_metadata_csum(sb)) { + if (ext4_has_feature_metadata_csum(sb) || + ext4_has_feature_ea_inode(sb)) { sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(sbi->s_chksum_driver)) { ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); @@ -3467,7 +3475,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) /* Precompute checksum seed for all metadata */ if (ext4_has_feature_csum_seed(sb)) sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); - else if (ext4_has_metadata_csum(sb)) + else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, sizeof(es->s_uuid)); @@ -3597,6 +3605,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) "The Hurd can't support 64-bit file systems"); goto failed_mount; } + + /* + * ea_inode feature uses l_i_version field which is not + * available in HURD_COMPAT mode. + */ + if (ext4_has_feature_ea_inode(sb)) { + ext4_msg(sb, KERN_ERR, + "ea_inode feature is not supported for Hurd"); + goto failed_mount; + } } if (IS_EXT2_SB(sb)) { @@ -4067,6 +4085,15 @@ no_journal: goto failed_mount_wq; } + if (ext4_has_feature_ea_inode(sb)) { + sbi->s_ea_inode_cache = ext4_xattr_create_cache(); + if (!sbi->s_ea_inode_cache) { + ext4_msg(sb, KERN_ERR, + "Failed to create ea_inode_cache"); + goto failed_mount_wq; + } + } + if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) && (blocksize != PAGE_SIZE)) { ext4_msg(sb, KERN_ERR, @@ -4296,6 +4323,10 @@ failed_mount4: if (EXT4_SB(sb)->rsv_conversion_wq) destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); failed_mount_wq: + if (sbi->s_ea_inode_cache) { + ext4_xattr_destroy_cache(sbi->s_ea_inode_cache); + sbi->s_ea_inode_cache = NULL; + } if (sbi->s_ea_block_cache) { ext4_xattr_destroy_cache(sbi->s_ea_block_cache); sbi->s_ea_block_cache = NULL; |