diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-18 02:37:06 +0300 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-02-18 02:37:06 +0300 |
| commit | 75a452d31ba697fc986609dd4905294e07687992 (patch) | |
| tree | 01043ab6ecabecc81ab9aeb3e2310d223704a9d6 | |
| parent | 87a367f1bffadf1db5def15bd0cd2148acd057ad (diff) | |
| parent | 10d7c95af043b45a85dc738c3271bf760ff3577e (diff) | |
| download | linux-75a452d31ba697fc986609dd4905294e07687992.tar.xz | |
Merge tag 'ntfs3_for_7.0' of https://github.com/Paragon-Software-Group/linux-ntfs3
Pull ntfs3 updates from Konstantin Komarov:
"New code:
- improve readahead for bitmap initialization and large directory scans
- fsync files by syncing parent inodes
- drop of preallocated clusters for sparse and compressed files
- zero-fill folios beyond i_valid in ntfs_read_folio()
- implement llseek SEEK_DATA/SEEK_HOLE by scanning data runs
- implement iomap-based file operations
- allow explicit boolean acl/prealloc mount options
- fall-through between switch labels
- delayed-allocation (delalloc) support
Fixes:
- check return value of indx_find to avoid infinite loop
- initialize new folios before use
- infinite loop in attr_load_runs_range on inconsistent metadata
- infinite loop triggered by zero-sized ATTR_LIST
- ntfs_mount_options leak in ntfs_fill_super()
- deadlock in ni_read_folio_cmpr
- circular locking dependency in run_unpack_ex
- prevent infinite loops caused by the next valid being the same
- restore NULL folio initialization in ntfs_writepages()
- slab-out-of-bounds read in DeleteIndexEntryRoot
Updates:
- allow readdir() to finish after directory mutations without rewinddir()
- handle attr_set_size() errors when truncating files
- make ntfs_writeback_ops static
- refactor duplicate kmemdup pattern in do_action()
- avoid calling run_get_entry() when run == NULL in ntfs_read_run_nb_ra()
Replaced:
- use wait_on_buffer() directly
- rename ni_readpage_cmpr into ni_read_folio_cmpr"
* tag 'ntfs3_for_7.0' of https://github.com/Paragon-Software-Group/linux-ntfs3: (26 commits)
fs/ntfs3: add delayed-allocation (delalloc) support
fs/ntfs3: avoid calling run_get_entry() when run == NULL in ntfs_read_run_nb_ra()
fs/ntfs3: add fall-through between switch labels
fs/ntfs3: allow explicit boolean acl/prealloc mount options
fs/ntfs3: Fix slab-out-of-bounds read in DeleteIndexEntryRoot
ntfs3: Restore NULL folio initialization in ntfs_writepages()
ntfs3: Refactor duplicate kmemdup pattern in do_action()
fs/ntfs3: prevent infinite loops caused by the next valid being the same
fs/ntfs3: make ntfs_writeback_ops static
ntfs3: fix circular locking dependency in run_unpack_ex
fs/ntfs3: implement iomap-based file operations
fs/ntfs3: fix deadlock in ni_read_folio_cmpr
fs/ntfs3: implement llseek SEEK_DATA/SEEK_HOLE by scanning data runs
fs/ntfs3: zero-fill folios beyond i_valid in ntfs_read_folio()
fs/ntfs3: handle attr_set_size() errors when truncating files
fs/ntfs3: drop preallocated clusters for sparse and compressed files
fs/ntfs3: fsync files by syncing parent inodes
fs/ntfs3: fix ntfs_mount_options leak in ntfs_fill_super()
fs/ntfs3: allow readdir() to finish after directory mutations without rewinddir()
fs/ntfs3: improve readahead for bitmap initialization and large directory scans
...
| -rw-r--r-- | fs/ntfs3/attrib.c | 412 | ||||
| -rw-r--r-- | fs/ntfs3/attrlist.c | 17 | ||||
| -rw-r--r-- | fs/ntfs3/bitmap.c | 17 | ||||
| -rw-r--r-- | fs/ntfs3/dir.c | 108 | ||||
| -rw-r--r-- | fs/ntfs3/file.c | 599 | ||||
| -rw-r--r-- | fs/ntfs3/frecord.c | 382 | ||||
| -rw-r--r-- | fs/ntfs3/fslog.c | 65 | ||||
| -rw-r--r-- | fs/ntfs3/fsntfs.c | 112 | ||||
| -rw-r--r-- | fs/ntfs3/index.c | 49 | ||||
| -rw-r--r-- | fs/ntfs3/inode.c | 800 | ||||
| -rw-r--r-- | fs/ntfs3/ntfs.h | 4 | ||||
| -rw-r--r-- | fs/ntfs3/ntfs_fs.h | 153 | ||||
| -rw-r--r-- | fs/ntfs3/run.c | 163 | ||||
| -rw-r--r-- | fs/ntfs3/super.c | 73 | ||||
| -rw-r--r-- | fs/ntfs3/xattr.c | 2 |
15 files changed, 1822 insertions, 1134 deletions
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c index 980ae9157248..6cb9bc5d605c 100644 --- a/fs/ntfs3/attrib.c +++ b/fs/ntfs3/attrib.c @@ -91,7 +91,8 @@ static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, * run_deallocate_ex - Deallocate clusters. */ static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST len, CLST *done, bool trim) + CLST vcn, CLST len, CLST *done, bool trim, + struct runs_tree *run_da) { int err = 0; CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0; @@ -120,6 +121,16 @@ failed: if (sbi) { /* mark bitmap range [lcn + clen) as free and trim clusters. */ mark_as_free_ex(sbi, lcn, clen, trim); + + if (run_da) { + CLST da_len; + if (!run_remove_range(run_da, vcn, clen, + &da_len)) { + err = -ENOMEM; + goto failed; + } + ntfs_sub_da(sbi, da_len); + } } dn += clen; } @@ -147,9 +158,10 @@ out: * attr_allocate_clusters - Find free space, mark it as used and store in @run. */ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn, CLST *new_len) + struct runs_tree *run_da, CLST vcn, CLST lcn, + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, + CLST *alen, const size_t fr, CLST *new_lcn, + CLST *new_len) { int err; CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; @@ -166,6 +178,12 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, continue; } + if (err == -ENOSPC && new_len && vcn - vcn0) { + /* Keep already allocated clusters. */ + *alen = vcn - vcn0; + return 0; + } + if (err) goto out; @@ -179,12 +197,21 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, /* Add new fragment into run storage. */ if (!run_add_entry(run, vcn, lcn, flen, opt & ALLOCATE_MFT)) { +undo_alloc: /* Undo last 'ntfs_look_for_free_space' */ mark_as_free_ex(sbi, lcn, len, false); err = -ENOMEM; goto out; } + if (run_da) { + CLST da_len; + if (!run_remove_range(run_da, vcn, flen, &da_len)) { + goto undo_alloc; + } + ntfs_sub_da(sbi, da_len); + } + if (opt & ALLOCATE_ZERO) { u8 shift = sbi->cluster_bits - SECTOR_SHIFT; @@ -199,7 +226,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, vcn += flen; if (flen >= len || (opt & ALLOCATE_MFT) || - (fr && run->count - cnt >= fr)) { + (opt & ALLOCATE_ONE_FR) || (fr && run->count - cnt >= fr)) { *alen = vcn - vcn0; return 0; } @@ -210,7 +237,8 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, out: /* Undo 'ntfs_look_for_free_space' */ if (vcn - vcn0) { - run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false); + run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false, + run_da); run_truncate(run, vcn0); } @@ -275,7 +303,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, } else { const char *data = resident_data(attr); - err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL, + err = attr_allocate_clusters(sbi, run, NULL, 0, 0, len, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) @@ -391,7 +419,7 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr, } /* - * attr_set_size - Change the size of attribute. + * attr_set_size_ex - Change the size of attribute. * * Extend: * - Sparse/compressed: No allocated clusters. @@ -399,24 +427,28 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr, * Shrink: * - No deallocate if @keep_prealloc is set. */ -int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, struct runs_tree *run, - u64 new_size, const u64 *new_valid, bool keep_prealloc, - struct ATTRIB **ret) +int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret, bool no_da) { int err = 0; struct ntfs_sb_info *sbi = ni->mi.sbi; u8 cluster_bits = sbi->cluster_bits; bool is_mft = ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && !name_len; - u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp; + u64 old_valid, old_size, old_alloc, new_alloc_tmp; + u64 new_alloc = 0; struct ATTRIB *attr = NULL, *attr_b; struct ATTR_LIST_ENTRY *le, *le_b; struct mft_inode *mi, *mi_b; CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; CLST next_svcn, pre_alloc = -1, done = 0; - bool is_ext, is_bad = false; + bool is_ext = false, is_bad = false; bool dirty = false; + struct runs_tree *run_da = run == &ni->file.run ? &ni->file.run_da : + NULL; + bool da = !is_mft && sbi->options->delalloc && run_da && !no_da; u32 align; struct MFT_REC *rec; @@ -448,8 +480,11 @@ again: is_ext = is_attr_ext(attr_b); align = sbi->cluster_size; - if (is_ext) + if (is_ext) { align <<= attr_b->nres.c_unit; + keep_prealloc = false; + da = false; + } old_valid = le64_to_cpu(attr_b->nres.valid_size); old_size = le64_to_cpu(attr_b->nres.data_size); @@ -467,6 +502,37 @@ again_1: goto ok; } + if (da && + (vcn = old_alen + run_len(&ni->file.run_da), new_alen > vcn)) { + /* Resize up normal file. Delay new clusters allocation. */ + alen = new_alen - vcn; + + if (ntfs_check_free_space(sbi, alen, 0, true)) { + if (!run_add_entry(&ni->file.run_da, vcn, SPARSE_LCN, + alen, false)) { + err = -ENOMEM; + goto out; + } + + ntfs_add_da(sbi, alen); + goto ok1; + } + } + + if (!keep_prealloc && run_da && run_da->count && + (vcn = run_get_max_vcn(run_da), new_alen < vcn)) { + /* Shrink delayed clusters. */ + + /* Try to remove fragment from delay allocated run. */ + if (!run_remove_range(run_da, new_alen, vcn - new_alen, + &alen)) { + err = -ENOMEM; + goto out; + } + + ntfs_sub_da(sbi, alen); + } + vcn = old_alen - 1; svcn = le64_to_cpu(attr_b->nres.svcn); @@ -572,7 +638,8 @@ add_alloc_in_same_attr_seg: } else { /* ~3 bytes per fragment. */ err = attr_allocate_clusters( - sbi, run, vcn, lcn, to_allocate, &pre_alloc, + sbi, run, run_da, vcn, lcn, to_allocate, + &pre_alloc, is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen, is_mft ? 0 : (sbi->record_size - @@ -751,14 +818,14 @@ pack_runs: mi_b->dirty = dirty = true; err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen, - true); + true, run_da); if (err) goto out; if (is_ext) { /* dlen - really deallocated clusters. */ le64_sub_cpu(&attr_b->nres.total_size, - ((u64)dlen << cluster_bits)); + (u64)dlen << cluster_bits); } run_truncate(run, vcn); @@ -813,14 +880,14 @@ ok1: if (((type == ATTR_DATA && !name_len) || (type == ATTR_ALLOC && name == I30_NAME))) { /* Update inode_set_bytes. */ - if (attr_b->non_res) { - new_alloc = le64_to_cpu(attr_b->nres.alloc_size); - if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { - inode_set_bytes(&ni->vfs_inode, new_alloc); - dirty = true; - } + if (attr_b->non_res && + inode_get_bytes(&ni->vfs_inode) != new_alloc) { + inode_set_bytes(&ni->vfs_inode, new_alloc); + dirty = true; } + i_size_write(&ni->vfs_inode, new_size); + /* Don't forget to update duplicate information in parent. */ if (dirty) { ni->ni_flags |= NI_FLAG_UPDATE_PARENT; @@ -861,7 +928,7 @@ restore_run: is_bad = true; undo_1: - run_deallocate_ex(sbi, run, vcn, alen, NULL, false); + run_deallocate_ex(sbi, run, vcn, alen, NULL, false, run_da); run_truncate(run, vcn); out: @@ -884,43 +951,74 @@ bad_inode: * - new allocated clusters are zeroed via blkdev_issue_zeroout. */ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero) + CLST *len, bool *new, bool zero, void **res, bool no_da) { - int err = 0; - struct runs_tree *run = &ni->file.run; - struct ntfs_sb_info *sbi; - u8 cluster_bits; - struct ATTRIB *attr, *attr_b; - struct ATTR_LIST_ENTRY *le, *le_b; - struct mft_inode *mi, *mi_b; - CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen; - CLST alloc, evcn; - unsigned fr; - u64 total_size, total_size0; - int step = 0; + int err; if (new) *new = false; + if (res) + *res = NULL; /* Try to find in cache. */ down_read(&ni->file.run_lock); - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + if (!no_da && run_lookup_entry(&ni->file.run_da, vcn, lcn, len, NULL)) { + /* The requested vcn is delay allocated. */ + *lcn = DELALLOC_LCN; + } else if (run_lookup_entry(&ni->file.run, vcn, lcn, len, NULL)) { + /* The requested vcn is known in current run. */ + } else { *len = 0; + } up_read(&ni->file.run_lock); if (*len && (*lcn != SPARSE_LCN || !new)) return 0; /* Fast normal way without allocation. */ /* No cluster in cache or we need to allocate cluster in hole. */ - sbi = ni->mi.sbi; - cluster_bits = sbi->cluster_bits; - ni_lock(ni); down_write(&ni->file.run_lock); - /* Repeat the code above (under write lock). */ - if (!run_lookup_entry(run, vcn, lcn, len, NULL)) + err = attr_data_get_block_locked(ni, vcn, clen, lcn, len, new, zero, + res, no_da); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; +} + +/* + * attr_data_get_block_locked - Helper for attr_data_get_block. + */ +int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, + CLST *lcn, CLST *len, bool *new, bool zero, + void **res, bool no_da) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct runs_tree *run = &ni->file.run; + struct runs_tree *run_da = &ni->file.run_da; + bool da = sbi->options->delalloc && !no_da; + u8 cluster_bits; + struct ATTRIB *attr, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0; + CLST alloc, evcn; + unsigned fr; + u64 total_size, total_size0; + int step; + +again: + if (da && run_lookup_entry(run_da, vcn, lcn, len, NULL)) { + /* The requested vcn is delay allocated. */ + *lcn = DELALLOC_LCN; + } else if (run_lookup_entry(run, vcn, lcn, len, NULL)) { + /* The requested vcn is known in current run. */ + } else { *len = 0; + } if (*len) { if (*lcn != SPARSE_LCN || !new) @@ -929,6 +1027,9 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, clen = *len; } + cluster_bits = sbi->cluster_bits; + step = 0; + le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); if (!attr_b) { @@ -937,8 +1038,15 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, } if (!attr_b->non_res) { + u32 data_size = le32_to_cpu(attr_b->res.data_size); *lcn = RESIDENT_LCN; - *len = 1; + *len = data_size; + if (res && data_size) { + *res = kmemdup(resident_data(attr_b), data_size, + GFP_KERNEL); + if (!*res) + err = -ENOMEM; + } goto out; } @@ -948,7 +1056,7 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, err = -EINVAL; } else { *len = 1; - *lcn = SPARSE_LCN; + *lcn = EOF_LCN; } goto out; } @@ -1026,7 +1134,8 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, to_alloc = ((vcn0 + clen + clst_per_frame - 1) & cmask) - vcn; if (fr < clst_per_frame) fr = clst_per_frame; - zero = true; + if (vcn != vcn0) + zero = true; /* Check if 'vcn' and 'vcn0' in different attribute segments. */ if (vcn < svcn || evcn1 <= vcn) { @@ -1043,11 +1152,38 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, if (err) goto out; } + da = false; /* no delalloc for compressed file. */ } if (vcn + to_alloc > asize) to_alloc = asize - vcn; + if (da) { + CLST rlen1, rlen2; + if (!ntfs_check_free_space(sbi, to_alloc, 0, true)) { + err = ni_allocate_da_blocks_locked(ni); + if (err) + goto out; + /* Layout of records may be changed. Start again without 'da'. */ + da = false; + goto again; + } + + /* run_add_entry consolidates existed ranges. */ + rlen1 = run_len(run_da); + if (!run_add_entry(run_da, vcn, SPARSE_LCN, to_alloc, false)) { + err = -ENOMEM; + goto out; + } + rlen2 = run_len(run_da); + + /* new added delay clusters = rlen2 - rlen1. */ + ntfs_add_da(sbi, rlen2 - rlen1); + *len = to_alloc; + *lcn = DELALLOC_LCN; + goto ok; + } + /* Get the last LCN to allocate from. */ hint = 0; @@ -1062,18 +1198,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, } /* Allocate and zeroout new clusters. */ - err = attr_allocate_clusters(sbi, run, vcn, hint + 1, to_alloc, NULL, - zero ? ALLOCATE_ZERO : ALLOCATE_DEF, &alen, - fr, lcn, len); + err = attr_allocate_clusters(sbi, run, run_da, vcn, hint + 1, to_alloc, + NULL, + zero ? ALLOCATE_ZERO : ALLOCATE_ONE_FR, + len, fr, lcn, len); if (err) goto out; *new = true; step = 1; - end = vcn + alen; + end = vcn + *len; /* Save 'total_size0' to restore if error. */ total_size0 = le64_to_cpu(attr_b->nres.total_size); - total_size = total_size0 + ((u64)alen << cluster_bits); + total_size = total_size0 + ((u64)*len << cluster_bits); if (vcn != vcn0) { if (!run_lookup_entry(run, vcn0, lcn, len, NULL)) { @@ -1139,7 +1276,7 @@ repack: * in 'ni_insert_nonresident'. * Return in advance -ENOSPC here if there are no free cluster and no free MFT. */ - if (!ntfs_check_for_free_space(sbi, 1, 1)) { + if (!ntfs_check_free_space(sbi, 1, 1, false)) { /* Undo step 1. */ err = -ENOSPC; goto undo1; @@ -1224,8 +1361,6 @@ out: /* Too complex to restore. */ _ntfs_bad_inode(&ni->vfs_inode); } - up_write(&ni->file.run_lock); - ni_unlock(ni); return err; @@ -1234,41 +1369,14 @@ undo1: attr_b->nres.total_size = cpu_to_le64(total_size0); inode_set_bytes(&ni->vfs_inode, total_size0); - if (run_deallocate_ex(sbi, run, vcn, alen, NULL, false) || - !run_add_entry(run, vcn, SPARSE_LCN, alen, false) || + if (run_deallocate_ex(sbi, run, vcn, *len, NULL, false, run_da) || + !run_add_entry(run, vcn, SPARSE_LCN, *len, false) || mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn)) { _ntfs_bad_inode(&ni->vfs_inode); } goto out; } -int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio) -{ - u64 vbo; - struct ATTRIB *attr; - u32 data_size; - size_t len; - - attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); - if (!attr) - return -EINVAL; - - if (attr->non_res) - return E_NTFS_NONRESIDENT; - - vbo = folio->index << PAGE_SHIFT; - data_size = le32_to_cpu(attr->res.data_size); - if (vbo > data_size) - len = 0; - else - len = min(data_size - vbo, folio_size(folio)); - - folio_fill_tail(folio, 0, resident_data(attr) + vbo, len); - folio_mark_uptodate(folio); - - return 0; -} - int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) { u64 vbo; @@ -1285,7 +1393,7 @@ int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio) return E_NTFS_NONRESIDENT; } - vbo = folio->index << PAGE_SHIFT; + vbo = folio_pos(folio); data_size = le32_to_cpu(attr->res.data_size); if (vbo < data_size) { char *data = resident_data(attr); @@ -1354,19 +1462,27 @@ int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn; CLST vcn_last = (to - 1) >> cluster_bits; CLST lcn, clen; - int err; + int err = 0; + int retry = 0; for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) { - if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { - err = attr_load_runs_vcn(ni, type, name, name_len, run, - vcn); - if (err) - return err; - clen = 0; /* Next run_lookup_entry(vcn) must be success. */ + if (run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { + retry = 0; + continue; + } + if (retry) { + err = -EINVAL; + break; } + err = attr_load_runs_vcn(ni, type, name, name_len, run, vcn); + if (err) + break; + + clen = 0; /* Next run_lookup_entry(vcn) must be success. */ + retry++; } - return 0; + return err; } #ifdef CONFIG_NTFS3_LZX_XPRESS @@ -1689,7 +1805,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, if (len < clst_data) { err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len, - NULL, true); + NULL, true, NULL); if (err) goto out; @@ -1709,7 +1825,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, hint = -1; } - err = attr_allocate_clusters(sbi, run, vcn + clst_data, + err = attr_allocate_clusters(sbi, run, NULL, vcn + clst_data, hint + 1, len - clst_data, NULL, ALLOCATE_DEF, &alen, 0, NULL, NULL); @@ -1864,6 +1980,7 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) CLST vcn, end; u64 valid_size, data_size, alloc_size, total_size; u32 mask; + u64 i_size; __le16 a_flags; if (!bytes) @@ -1879,52 +1996,79 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) return 0; } - data_size = le64_to_cpu(attr_b->nres.data_size); - alloc_size = le64_to_cpu(attr_b->nres.alloc_size); - a_flags = attr_b->flags; - - if (is_attr_ext(attr_b)) { - total_size = le64_to_cpu(attr_b->nres.total_size); - mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; - } else { - total_size = alloc_size; - mask = sbi->cluster_mask; - } - - if ((vbo & mask) || (bytes & mask)) { + mask = is_attr_ext(attr_b) ? + ((sbi->cluster_size << attr_b->nres.c_unit) - 1) : + sbi->cluster_mask; + if ((vbo | bytes) & mask) { /* Allow to collapse only cluster aligned ranges. */ return -EINVAL; } - if (vbo > data_size) + /* i_size - size of file with delay allocated clusters. */ + i_size = ni->vfs_inode.i_size; + + if (vbo > i_size) return -EINVAL; down_write(&ni->file.run_lock); - if (vbo + bytes >= data_size) { - u64 new_valid = min(ni->i_valid, vbo); + if (vbo + bytes >= i_size) { + valid_size = min(ni->i_valid, vbo); /* Simple truncate file at 'vbo'. */ truncate_setsize(&ni->vfs_inode, vbo); err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo, - &new_valid, true, NULL); + &valid_size, true); - if (!err && new_valid < ni->i_valid) - ni->i_valid = new_valid; + if (!err && valid_size < ni->i_valid) + ni->i_valid = valid_size; goto out; } - /* - * Enumerate all attribute segments and collapse. - */ - alen = alloc_size >> sbi->cluster_bits; vcn = vbo >> sbi->cluster_bits; len = bytes >> sbi->cluster_bits; end = vcn + len; dealloc = 0; done = 0; + /* + * Check delayed clusters. + */ + if (ni->file.run_da.count) { + struct runs_tree *run_da = &ni->file.run_da; + if (run_is_mapped_full(run_da, vcn, end - 1)) { + /* + * The requested range is full in delayed clusters. + */ + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, run, + i_size - bytes, NULL, false, + NULL, true); + goto out; + } + + /* Collapse request crosses real and delayed clusters. */ + err = ni_allocate_da_blocks_locked(ni); + if (err) + goto out; + + /* Layout of records maybe changed. */ + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b || !attr_b->non_res) { + err = -ENOENT; + goto out; + } + } + + data_size = le64_to_cpu(attr_b->nres.data_size); + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + total_size = is_attr_ext(attr_b) ? + le64_to_cpu(attr_b->nres.total_size) : + alloc_size; + alen = alloc_size >> sbi->cluster_bits; + a_flags = attr_b->flags; svcn = le64_to_cpu(attr_b->nres.svcn); evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; @@ -1947,6 +2091,9 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) goto out; } + /* + * Enumerate all attribute segments and collapse. + */ for (;;) { CLST vcn1, eat, next_svcn; @@ -1974,13 +2121,13 @@ check_seg: vcn1 = vcn + done; /* original vcn in attr/run. */ eat = min(end, evcn1) - vcn1; - err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true); + err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true, + NULL); if (err) goto out; if (svcn + eat < evcn1) { /* Collapse a part of this attribute segment. */ - if (!run_collapse_range(run, vcn1, eat, done)) { err = -ENOMEM; goto out; @@ -2161,9 +2308,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) bytes = alloc_size; bytes -= vbo; - if ((vbo & mask) || (bytes & mask)) { + if ((vbo | bytes) & mask) { /* We have to zero a range(s). */ - if (frame_size == NULL) { + if (!frame_size) { /* Caller insists range is aligned. */ return -EINVAL; } @@ -2222,7 +2369,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) * Calculate how many clusters there are. * Don't do any destructive actions. */ - err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false); + err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false, + NULL); if (err) goto done; @@ -2260,7 +2408,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size) } /* Real deallocate. Should not fail. */ - run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true); + run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true, + &ni->file.run_da); next_attr: /* Free all allocated memory. */ @@ -2372,7 +2521,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) return -EINVAL; } - if ((vbo & mask) || (bytes & mask)) { + if ((vbo | bytes) & mask) { /* Allow to insert only frame aligned ranges. */ return -EINVAL; } @@ -2391,7 +2540,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (!attr_b->non_res) { err = attr_set_size(ni, ATTR_DATA, NULL, 0, run, - data_size + bytes, NULL, false, NULL); + data_size + bytes, NULL, false); le_b = NULL; attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, @@ -2414,7 +2563,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) goto done; } - /* Resident files becomes nonresident. */ + /* Resident file becomes nonresident. */ data_size = le64_to_cpu(attr_b->nres.data_size); alloc_size = le64_to_cpu(attr_b->nres.alloc_size); } @@ -2451,10 +2600,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) if (err) goto out; - if (!run_insert_range(run, vcn, len)) { - err = -ENOMEM; + err = run_insert_range(run, vcn, len); + if (err) + goto out; + + err = run_insert_range_da(&ni->file.run_da, vcn, len); + if (err) goto out; - } /* Try to pack in current record as much as possible. */ err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn); diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c index a4d74bed74fa..270a29323530 100644 --- a/fs/ntfs3/attrlist.c +++ b/fs/ntfs3/attrlist.c @@ -52,6 +52,11 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) if (!attr->non_res) { lsize = le32_to_cpu(attr->res.data_size); + if (!lsize) { + err = -EINVAL; + goto out; + } + /* attr is resident: lsize < record_size (1K or 4K) */ le = kvmalloc(al_aligned(lsize), GFP_KERNEL); if (!le) { @@ -66,6 +71,10 @@ int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) u16 run_off = le16_to_cpu(attr->nres.run_off); lsize = le64_to_cpu(attr->nres.data_size); + if (!lsize) { + err = -EINVAL; + goto out; + } run_init(&ni->attr_list.run); @@ -336,8 +345,8 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, le->id = id; memcpy(le->name, name, sizeof(short) * name_len); - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size, - &new_size, true, &attr); + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, new_size, + &new_size, true, &attr, false); if (err) { /* Undo memmove above. */ memmove(le, Add2Ptr(le, sz), old_size - off); @@ -395,8 +404,8 @@ int al_update(struct ntfs_inode *ni, int sync) * Attribute list increased on demand in al_add_le. * Attribute list decreased here. */ - err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, - false, &attr); + err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, + false, &attr, false); if (err) goto out; diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c index 65d05e6a0566..db7d0ecfb469 100644 --- a/fs/ntfs3/bitmap.c +++ b/fs/ntfs3/bitmap.c @@ -508,6 +508,8 @@ static int wnd_rescan(struct wnd_bitmap *wnd) size_t wpos, wbit, iw, vbo; struct buffer_head *bh = NULL; CLST lcn, clen; + struct file_ra_state *ra; + struct address_space *mapping = sb->s_bdev->bd_mapping; wnd->uptodated = 0; wnd->extent_max = 0; @@ -516,6 +518,13 @@ static int wnd_rescan(struct wnd_bitmap *wnd) vbo = 0; + /* Allocate in memory instead of stack. Not critical if failed. */ + ra = kzalloc(sizeof(*ra), GFP_NOFS); + if (ra) { + file_ra_state_init(ra, mapping); + ra->ra_pages = (wnd->nbits / 8 + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + for (iw = 0; iw < wnd->nwnd; iw++) { if (iw + 1 == wnd->nwnd) wbits = wnd->bits_last; @@ -552,6 +561,13 @@ static int wnd_rescan(struct wnd_bitmap *wnd) len = ((u64)clen << cluster_bits) - off; } + if (ra) { + pgoff_t idx = lbo >> PAGE_SHIFT; + if (!ra_has_index(ra, idx)) + page_cache_sync_readahead(mapping, ra, NULL, + idx, 1); + } + bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); if (!bh) { err = -EIO; @@ -638,6 +654,7 @@ next_wnd: } out: + kfree(ra); return err; } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 596f8c62f033..4652a56ad105 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -393,33 +393,77 @@ static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, * ntfs_readdir - file_operations::iterate_shared * * Use non sorted enumeration. - * We have an example of broken volume where sorted enumeration - * counts each name twice. + * Sorted enumeration may result infinite loop if names tree contains loop. */ static int ntfs_readdir(struct file *file, struct dir_context *ctx) { const struct INDEX_ROOT *root; - u64 vbo; size_t bit; - loff_t eod; int err = 0; struct inode *dir = file_inode(file); struct ntfs_inode *ni = ntfs_i(dir); struct super_block *sb = dir->i_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; loff_t i_size = i_size_read(dir); - u32 pos = ctx->pos; + u64 pos = ctx->pos; u8 *name = NULL; struct indx_node *node = NULL; u8 index_bits = ni->dir.index_bits; + size_t max_bit = i_size >> ni->dir.index_bits; + loff_t eod = i_size + sbi->record_size; /* Name is a buffer of PATH_MAX length. */ static_assert(NTFS_NAME_LEN * 4 < PATH_MAX); - eod = i_size + sbi->record_size; + if (!pos) { + /* + * ni->dir.version increments each directory change. + * Save the initial value of ni->dir.version. + */ + file->private_data = (void *)ni->dir.version; + } - if (pos >= eod) - return 0; + if (pos >= eod) { + if (file->private_data == (void *)ni->dir.version) { + /* No changes since first readdir. */ + return 0; + } + + /* + * Handle directories that changed after the initial readdir(). + * + * Some user space code implements recursive removal like this instead + * of calling rmdir(2) directly: + * + * fd = opendir(path); + * while ((dent = readdir(fd))) + * unlinkat(dirfd(fd), dent->d_name, 0); + * closedir(fd); + * + * POSIX leaves unspecified what readdir() should return once the + * directory has been modified after opendir()/rewinddir(), so this + * pattern is not guaranteed to work on all filesystems or platforms. + * + * In ntfs3 the internal name tree may be reshaped while entries are + * being removed, so there is no stable anchor for continuing a + * single-pass walk based on the original readdir() order. + * + * In practice some widely used tools (for example certain rm(1) + * implementations) have used this readdir()/unlink() loop, and some + * filesystems behave in a way that effectively makes it work in the + * common case. + * + * The code below follows that practice and tries to provide + * "rmdir-like" behaviour for such callers on ntfs3, even though the + * situation is not strictly defined by the APIs. + * + * Apple documents the same readdir()/unlink() issue and a workaround + * for HFS file systems in: + * https://web.archive.org/web/20220122122948/https:/support.apple.com/kb/TA21420?locale=en_US + */ + ctx->pos = pos = 3; + file->private_data = (void *)ni->dir.version; + } if (!dir_emit_dots(file, ctx)) return 0; @@ -454,58 +498,58 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) if (pos >= sbi->record_size) { bit = (pos - sbi->record_size) >> index_bits; } else { + /* + * Add each name from root in 'ctx'. + */ err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx); if (err) goto out; bit = 0; } - if (!i_size) { - ctx->pos = eod; - goto out; - } - - for (;;) { - vbo = (u64)bit << index_bits; - if (vbo >= i_size) { - ctx->pos = eod; - goto out; - } - + /* + * Enumerate indexes until the end of dir. + */ + for (; bit < max_bit; bit += 1) { + /* Get the next used index. */ err = indx_used_bit(&ni->dir, ni, &bit); if (err) goto out; if (bit == MINUS_ONE_T) { - ctx->pos = eod; - goto out; + /* no more used indexes. end of dir. */ + break; } - vbo = (u64)bit << index_bits; - if (vbo >= i_size) { + if (bit >= max_bit) { + /* Corrupted directory. */ err = -EINVAL; goto out; } - err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, - &node); + err = indx_read_ra(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, + &node, &file->f_ra); if (err) goto out; + /* + * Add each name from index in 'ctx'. + */ err = ntfs_read_hdr(sbi, ni, &node->index->ihdr, - vbo + sbi->record_size, pos, name, ctx); + ((u64)bit << index_bits) + sbi->record_size, + pos, name, ctx); if (err) goto out; - - bit += 1; } out: - kfree(name); put_indx_node(node); - if (err == 1) { + if (!err) { + /* End of directory. */ + ctx->pos = eod; + } else if (err == 1) { /* 'ctx' is full. */ err = 0; } else if (err == -ENOENT) { @@ -624,7 +668,7 @@ const struct file_operations ntfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = ntfs_readdir, - .fsync = generic_file_fsync, + .fsync = ntfs_file_fsync, .open = ntfs_file_open, .unlocked_ioctl = ntfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 6cb4479072a6..ae8c47cac406 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -15,6 +15,7 @@ #include <linux/fiemap.h> #include <linux/fileattr.h> #include <linux/filelock.h> +#include <linux/iomap.h> #include "debug.h" #include "ntfs.h" @@ -26,6 +27,38 @@ */ #define NTFS3_IOC_SHUTDOWN _IOR('X', 125, __u32) +/* + * Helper for ntfs_should_use_dio. + */ +static u32 ntfs_dio_alignment(struct inode *inode) +{ + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_resident(ni)) { + /* Check delalloc. */ + if (!ni->file.run_da.count) + return 0; + } + + /* In most cases this is bdev_logical_block_size(bdev). */ + return ni->mi.sbi->bdev_blocksize; +} + +/* + * Returns %true if the given DIO request should be attempted with DIO, or + * %false if it should fall back to buffered I/O. + */ +static bool ntfs_should_use_dio(struct kiocb *iocb, struct iov_iter *iter) +{ + struct inode *inode = file_inode(iocb->ki_filp); + u32 dio_align = ntfs_dio_alignment(inode); + + if (!dio_align) + return false; + + return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align); +} + static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) { struct fstrim_range __user *user_range; @@ -186,13 +219,10 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, static int ntfs_extend_initialized_size(struct file *file, struct ntfs_inode *ni, - const loff_t valid, const loff_t new_valid) { struct inode *inode = &ni->vfs_inode; - struct address_space *mapping = inode->i_mapping; - struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; - loff_t pos = valid; + const loff_t valid = ni->i_valid; int err; if (valid >= new_valid) @@ -203,142 +233,41 @@ static int ntfs_extend_initialized_size(struct file *file, return 0; } - WARN_ON(is_compressed(ni)); - - for (;;) { - u32 zerofrom, len; - struct folio *folio; - u8 bits; - CLST vcn, lcn, clen; - - if (is_sparsed(ni)) { - bits = sbi->cluster_bits; - vcn = pos >> bits; - - err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, - false); - if (err) - goto out; - - if (lcn == SPARSE_LCN) { - pos = ((loff_t)clen + vcn) << bits; - ni->i_valid = pos; - goto next; - } - } - - zerofrom = pos & (PAGE_SIZE - 1); - len = PAGE_SIZE - zerofrom; - - if (pos + len > new_valid) - len = new_valid - pos; - - err = ntfs_write_begin(NULL, mapping, pos, len, &folio, NULL); - if (err) - goto out; - - folio_zero_range(folio, zerofrom, folio_size(folio) - zerofrom); - - err = ntfs_write_end(NULL, mapping, pos, len, len, folio, NULL); - if (err < 0) - goto out; - pos += len; - -next: - if (pos >= new_valid) - break; - - balance_dirty_pages_ratelimited(mapping); - cond_resched(); + err = iomap_zero_range(inode, valid, new_valid - valid, NULL, + &ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL); + if (err) { + ni->i_valid = valid; + ntfs_inode_warn(inode, + "failed to extend initialized size to %llx.", + new_valid); + return err; } return 0; - -out: - ni->i_valid = valid; - ntfs_inode_warn(inode, "failed to extend initialized size to %llx.", - new_valid); - return err; } -/* - * ntfs_zero_range - Helper function for punch_hole. - * - * It zeroes a range [vbo, vbo_to). - */ -static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) +static void ntfs_filemap_close(struct vm_area_struct *vma) { - int err = 0; - struct address_space *mapping = inode->i_mapping; - u32 blocksize = i_blocksize(inode); - pgoff_t idx = vbo >> PAGE_SHIFT; - u32 from = vbo & (PAGE_SIZE - 1); - pgoff_t idx_end = (vbo_to + PAGE_SIZE - 1) >> PAGE_SHIFT; - loff_t page_off; - struct buffer_head *head, *bh; - u32 bh_next, bh_off, to; - sector_t iblock; - struct folio *folio; - bool dirty = false; - - for (; idx < idx_end; idx += 1, from = 0) { - page_off = (loff_t)idx << PAGE_SHIFT; - to = (page_off + PAGE_SIZE) > vbo_to ? (vbo_to - page_off) : - PAGE_SIZE; - iblock = page_off >> inode->i_blkbits; - - folio = __filemap_get_folio( - mapping, idx, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - mapping_gfp_constraint(mapping, ~__GFP_FS)); - if (IS_ERR(folio)) - return PTR_ERR(folio); - - head = folio_buffers(folio); - if (!head) - head = create_empty_buffers(folio, blocksize, 0); - - bh = head; - bh_off = 0; - do { - bh_next = bh_off + blocksize; - - if (bh_next <= from || bh_off >= to) - continue; - - if (!buffer_mapped(bh)) { - ntfs_get_block(inode, iblock, bh, 0); - /* Unmapped? It's a hole - nothing to do. */ - if (!buffer_mapped(bh)) - continue; - } - - /* Ok, it's mapped. Make sure it's up-to-date. */ - if (folio_test_uptodate(folio)) - set_buffer_uptodate(bh); - else if (bh_read(bh, 0) < 0) { - err = -EIO; - folio_unlock(folio); - folio_put(folio); - goto out; - } - - mark_buffer_dirty(bh); - } while (bh_off = bh_next, iblock += 1, - head != (bh = bh->b_this_page)); - - folio_zero_segment(folio, from, to); - dirty = true; + struct inode *inode = file_inode(vma->vm_file); + struct ntfs_inode *ni = ntfs_i(inode); + u64 from = (u64)vma->vm_pgoff << PAGE_SHIFT; + u64 to = min_t(u64, i_size_read(inode), + from + vma->vm_end - vma->vm_start); - folio_unlock(folio); - folio_put(folio); - cond_resched(); - } -out: - if (dirty) + if (ni->i_valid < to) { + ni->i_valid = to; mark_inode_dirty(inode); - return err; + } } +/* Copy of generic_file_vm_ops. */ +static const struct vm_operations_struct ntfs_file_vm_ops = { + .close = ntfs_filemap_close, + .fault = filemap_fault, + .map_pages = filemap_map_pages, + .page_mkwrite = filemap_page_mkwrite, +}; + /* * ntfs_file_mmap_prepare - file_operations::mmap_prepare */ @@ -347,7 +276,6 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) struct file *file = desc->file; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - u64 from = ((u64)desc->pgoff << PAGE_SHIFT); bool rw = desc->vm_flags & VM_WRITE; int err; @@ -379,7 +307,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) } if (rw) { - u64 to = min_t(loff_t, i_size_read(inode), + u64 from = (u64)desc->pgoff << PAGE_SHIFT; + u64 to = min_t(u64, i_size_read(inode), from + vma_desc_size(desc)); if (is_sparsed(ni)) { @@ -392,7 +321,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) for (; vcn < end; vcn += len) { err = attr_data_get_block(ni, vcn, 1, &lcn, - &len, &new, true); + &len, &new, true, + NULL, false); if (err) goto out; } @@ -403,8 +333,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) err = -EAGAIN; goto out; } - err = ntfs_extend_initialized_size(file, ni, - ni->i_valid, to); + err = ntfs_extend_initialized_size(file, ni, to); inode_unlock(inode); if (err) goto out; @@ -412,6 +341,8 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc) } err = generic_file_mmap_prepare(desc); + if (!err && rw) + desc->vm_ops = &ntfs_file_vm_ops; out: return err; } @@ -432,55 +363,23 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); if (end > inode->i_size) { + /* + * Normal files: increase file size, allocate space. + * Sparse/Compressed: increase file size. No space allocated. + */ err = ntfs_set_size(inode, end); if (err) goto out; } if (extend_init && !is_compressed(ni)) { - err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); + err = ntfs_extend_initialized_size(file, ni, pos); if (err) goto out; } else { err = 0; } - if (file && is_sparsed(ni)) { - /* - * This code optimizes large writes to sparse file. - * TODO: merge this fragment with fallocate fragment. - */ - struct ntfs_sb_info *sbi = ni->mi.sbi; - CLST vcn = pos >> sbi->cluster_bits; - CLST cend = bytes_to_cluster(sbi, end); - CLST cend_v = bytes_to_cluster(sbi, ni->i_valid); - CLST lcn, clen; - bool new; - - if (cend_v > cend) - cend_v = cend; - - /* - * Allocate and zero new clusters. - * Zeroing these clusters may be too long. - */ - for (; vcn < cend_v; vcn += clen) { - err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, - &clen, &new, true); - if (err) - goto out; - } - /* - * Allocate but not zero new clusters. - */ - for (; vcn < cend; vcn += clen) { - err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false); - if (err) - goto out; - } - } - inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); mark_inode_dirty(inode); @@ -504,53 +403,37 @@ out: static int ntfs_truncate(struct inode *inode, loff_t new_size) { - struct super_block *sb = inode->i_sb; + int err; struct ntfs_inode *ni = ntfs_i(inode); - int err, dirty = 0; - u64 new_valid; - - if (!S_ISREG(inode->i_mode)) - return 0; - - if (is_compressed(ni)) { - if (ni->i_valid > new_size) - ni->i_valid = new_size; - } else { - err = block_truncate_page(inode->i_mapping, new_size, - ntfs_get_block); - if (err) - return err; - } - - new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); + u64 new_valid = min_t(u64, ni->i_valid, new_size); truncate_setsize(inode, new_size); ni_lock(ni); down_write(&ni->file.run_lock); - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &new_valid, ni->mi.sbi->options->prealloc, NULL); + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &new_valid, ni->mi.sbi->options->prealloc, NULL, + false); up_write(&ni->file.run_lock); - if (new_valid < ni->i_valid) - ni->i_valid = new_valid; + ni->i_valid = new_valid; ni_unlock(ni); + if (err) + return err; + ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); if (!IS_DIRSYNC(inode)) { - dirty = 1; + mark_inode_dirty(inode); } else { err = ntfs_sync_inode(inode); if (err) return err; } - if (dirty) - mark_inode_dirty(inode); - return 0; } @@ -623,7 +506,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (mode & FALLOC_FL_PUNCH_HOLE) { u32 frame_size; - loff_t mask, vbo_a, end_a, tmp; + loff_t mask, vbo_a, end_a, tmp, from; err = filemap_write_and_wait_range(mapping, vbo_down, LLONG_MAX); @@ -643,24 +526,24 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* Process not aligned punch. */ err = 0; + if (end > i_size) + end = i_size; mask = frame_size - 1; vbo_a = (vbo + mask) & ~mask; end_a = end & ~mask; tmp = min(vbo_a, end); - if (tmp > vbo) { - err = ntfs_zero_range(inode, vbo, tmp); + from = min_t(loff_t, ni->i_valid, vbo); + /* Zero head of punch. */ + if (tmp > from) { + err = iomap_zero_range(inode, from, tmp - from, NULL, + &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); if (err) goto out; } - if (vbo < end_a && end_a < end) { - err = ntfs_zero_range(inode, end_a, end); - if (err) - goto out; - } - - /* Aligned punch_hole */ + /* Aligned punch_hole. Deallocate clusters. */ if (end_a > vbo_a) { ni_lock(ni); err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL); @@ -668,6 +551,15 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) if (err) goto out; } + + /* Zero tail of punch. */ + if (vbo < end_a && end_a < end) { + err = iomap_zero_range(inode, end_a, end - end_a, NULL, + &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + if (err) + goto out; + } } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { /* * Write tail of the last page before removed range since @@ -765,17 +657,26 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) for (; vcn < cend_v; vcn += clen) { err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn, &clen, &new, - true); + true, NULL, false); if (err) goto out; } + + /* + * Moving up 'valid size'. + */ + err = ntfs_extend_initialized_size( + file, ni, (u64)cend_v << cluster_bits); + if (err) + goto out; + /* * Allocate but not zero new clusters. */ for (; vcn < cend; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, &clen, &new, - false); + false, NULL, false); if (err) goto out; } @@ -786,10 +687,11 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) /* True - Keep preallocated. */ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, i_size, &ni->i_valid, - true, NULL); + true); ni_unlock(ni); if (err) goto out; + i_size_write(inode, i_size); } else if (new_size > i_size) { i_size_write(inode, new_size); } @@ -926,12 +828,18 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); + size_t bytes = iov_iter_count(iter); + loff_t valid, i_size, vbo, end; + unsigned int dio_flags; ssize_t err; err = check_read_restriction(inode); if (err) return err; + if (!bytes) + return 0; /* skip atime */ + if (is_compressed(ni)) { if (iocb->ki_flags & IOCB_DIRECT) { ntfs_inode_warn( @@ -942,17 +850,63 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) file->f_ra.ra_pages = 0; } - /* Check minimum alignment for dio. */ - if (iocb->ki_flags & IOCB_DIRECT) { - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; - if ((iocb->ki_pos | iov_iter_alignment(iter)) & - sbi->bdev_blocksize_mask) { - iocb->ki_flags &= ~IOCB_DIRECT; + /* Fallback to buffered I/O if the inode does not support direct I/O. */ + if (!(iocb->ki_flags & IOCB_DIRECT) || + !ntfs_should_use_dio(iocb, iter)) { + iocb->ki_flags &= ~IOCB_DIRECT; + return generic_file_read_iter(iocb, iter); + } + + if (iocb->ki_flags & IOCB_NOWAIT) { + if (!inode_trylock_shared(inode)) + return -EAGAIN; + } else { + inode_lock_shared(inode); + } + + vbo = iocb->ki_pos; + end = vbo + bytes; + dio_flags = 0; + valid = ni->i_valid; + i_size = inode->i_size; + + if (vbo < valid) { + if (valid < end) { + /* read cross 'valid' size. */ + dio_flags |= IOMAP_DIO_FORCE_WAIT; + } + + if (ni->file.run_da.count) { + /* Direct I/O is not compatible with delalloc. */ + err = ni_allocate_da_blocks(ni); + if (err) + goto out; } + + err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, dio_flags, + NULL, 0); + + if (err <= 0) + goto out; + end = vbo + err; + if (valid < end) { + size_t to_zero = end - valid; + /* Fix iter. */ + iov_iter_revert(iter, to_zero); + iov_iter_zero(to_zero, iter); + } + } else if (vbo < i_size) { + if (end > i_size) + bytes = i_size - vbo; + iov_iter_zero(bytes, iter); + iocb->ki_pos += bytes; + err = bytes; } - return generic_file_read_iter(iocb, iter); +out: + inode_unlock_shared(inode); + file_accessed(iocb->ki_filp); + return err; } /* @@ -996,7 +950,7 @@ static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, - gfp_mask); + gfp_mask | __GFP_ZERO); if (IS_ERR(folio)) { while (npages--) { folio = page_folio(pages[npages]); @@ -1073,7 +1027,7 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) off = valid & (frame_size - 1); err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn, - &clen, NULL, false); + &clen, NULL, false, NULL, false); if (err) goto out; @@ -1265,6 +1219,9 @@ static int check_write_restriction(struct inode *inode) return -EOPNOTSUPP; } + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + return 0; } @@ -1276,8 +1233,7 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct ntfs_inode *ni = ntfs_i(inode); - ssize_t ret; - int err; + ssize_t ret, err; if (!inode_trylock(inode)) { if (iocb->ki_flags & IOCB_NOWAIT) @@ -1315,15 +1271,75 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret) goto out; - ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) : - __generic_file_write_iter(iocb, from); + if (is_compressed(ni)) { + ret = ntfs_compress_write(iocb, from); + goto out; + } + + /* Fallback to buffered I/O if the inode does not support direct I/O. */ + if (!(iocb->ki_flags & IOCB_DIRECT) || + !ntfs_should_use_dio(iocb, from)) { + iocb->ki_flags &= ~IOCB_DIRECT; + + ret = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + inode_unlock(inode); + + if (likely(ret > 0)) + ret = generic_write_sync(iocb, ret); + + return ret; + } + + if (ni->file.run_da.count) { + /* Direct I/O is not compatible with delalloc. */ + ret = ni_allocate_da_blocks(ni); + if (ret) + goto out; + } + + ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, 0, NULL, 0); + + if (ret == -ENOTBLK) { + /* Returns -ENOTBLK in case of a page invalidation failure for writes.*/ + /* The callers needs to fall back to buffered I/O in this case. */ + ret = 0; + } + + if (ret >= 0 && iov_iter_count(from)) { + loff_t offset = iocb->ki_pos, endbyte; + + iocb->ki_flags &= ~IOCB_DIRECT; + err = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops, + &ntfs_iomap_folio_ops, NULL); + if (err < 0) { + ret = err; + goto out; + } + + /* + * We need to ensure that the pages within the page cache for + * the range covered by this I/O are written to disk and + * invalidated. This is in attempt to preserve the expected + * direct I/O semantics in the case we fallback to buffered I/O + * to complete off the I/O request. + */ + ret += err; + endbyte = offset + err - 1; + err = filemap_write_and_wait_range(inode->i_mapping, offset, + endbyte); + if (err) { + ret = err; + goto out; + } + + invalidate_mapping_pages(inode->i_mapping, offset >> PAGE_SHIFT, + endbyte >> PAGE_SHIFT); + } out: inode_unlock(inode); - if (ret > 0) - ret = generic_write_sync(iocb, ret); - return ret; } @@ -1362,39 +1378,49 @@ int ntfs_file_open(struct inode *inode, struct file *file) #endif } + file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT; + return generic_file_open(inode, file); } /* * ntfs_file_release - file_operations::release + * + * Called when an inode is released. Note that this is different + * from ntfs_file_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. */ static int ntfs_file_release(struct inode *inode, struct file *file) { - struct ntfs_inode *ni = ntfs_i(inode); - struct ntfs_sb_info *sbi = ni->mi.sbi; - int err = 0; - - /* If we are last writer on the inode, drop the block reservation. */ - if (sbi->options->prealloc && - ((file->f_mode & FMODE_WRITE) && - atomic_read(&inode->i_writecount) == 1) - /* - * The only file when inode->i_fop = &ntfs_file_operations and - * init_rwsem(&ni->file.run_lock) is not called explicitly is MFT. - * - * Add additional check here. - */ - && inode->i_ino != MFT_REC_MFT) { + int err; + struct ntfs_inode *ni; + + if (!(file->f_mode & FMODE_WRITE) || + atomic_read(&inode->i_writecount) != 1 || + inode->i_ino == MFT_REC_MFT) { + return 0; + } + + /* Close the last writer on the inode. */ + ni = ntfs_i(inode); + + /* Allocate delayed blocks (clusters). */ + err = ni_allocate_da_blocks(ni); + if (err) + goto out; + + if (ni->mi.sbi->options->prealloc) { ni_lock(ni); down_write(&ni->file.run_lock); + /* Deallocate preallocated. */ err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - i_size_read(inode), &ni->i_valid, false, - NULL); + inode->i_size, &ni->i_valid, false); up_write(&ni->file.run_lock); ni_unlock(ni); } +out: return err; } @@ -1411,16 +1437,30 @@ int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, if (unlikely(is_bad_ni(ni))) return -EINVAL; - err = fiemap_prep(inode, fieinfo, start, &len, ~FIEMAP_FLAG_XATTR); - if (err) - return err; + if (is_compressed(ni)) { + /* Unfortunately cp -r incorrectly treats compressed clusters. */ + ntfs_inode_warn(inode, + "fiemap is not supported for compressed file"); + return -EOPNOTSUPP; + } - ni_lock(ni); + if (S_ISDIR(inode->i_mode)) { + /* TODO: add support for dirs (ATTR_ALLOC). */ + ntfs_inode_warn(inode, + "fiemap is not supported for directories"); + return -EOPNOTSUPP; + } - err = ni_fiemap(ni, fieinfo, start, len); + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + ntfs_inode_warn(inode, "fiemap(xattr) is not supported"); + return -EOPNOTSUPP; + } - ni_unlock(ni); + inode_lock_shared(inode); + err = iomap_fiemap(inode, fieinfo, start, len, &ntfs_iomap_ops); + + inode_unlock_shared(inode); return err; } @@ -1444,13 +1484,62 @@ static ssize_t ntfs_file_splice_write(struct pipe_inode_info *pipe, /* * ntfs_file_fsync - file_operations::fsync */ -static int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) +int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file_inode(file); - if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + int err, ret; + + if (unlikely(ntfs3_forced_shutdown(sb))) return -EIO; - return generic_file_fsync(file, start, end, datasync); + ret = file_write_and_wait_range(file, start, end); + if (ret) + return ret; + + ret = write_inode_now(inode, !datasync); + + if (!ret) { + ret = ni_write_parents(ntfs_i(inode), !datasync); + } + + if (!ret) { + ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); + ntfs_update_mftmirr(sbi); + } + + err = sync_blockdev(sb->s_bdev); + if (unlikely(err && !ret)) + ret = err; + if (!ret) + blkdev_issue_flush(sb->s_bdev); + return ret; +} + +/* + * ntfs_llseek - file_operations::llseek + */ +static loff_t ntfs_llseek(struct file *file, loff_t offset, int whence) +{ + struct inode *inode = file->f_mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + loff_t maxbytes = ntfs_get_maxbytes(ni); + loff_t ret; + + if (whence == SEEK_DATA || whence == SEEK_HOLE) { + inode_lock_shared(inode); + /* Scan file for hole or data. */ + ret = ni_seek_data_or_hole(ni, offset, whence == SEEK_DATA); + inode_unlock_shared(inode); + + if (ret >= 0) + ret = vfs_setpos(file, ret, maxbytes); + } else { + ret = generic_file_llseek_size(file, offset, whence, maxbytes, + i_size_read(inode)); + } + return ret; } // clang-format off @@ -1464,7 +1553,7 @@ const struct inode_operations ntfs_file_inode_operations = { }; const struct file_operations ntfs_file_operations = { - .llseek = generic_file_llseek, + .llseek = ntfs_llseek, .read_iter = ntfs_file_read_iter, .write_iter = ntfs_file_write_iter, .unlocked_ioctl = ntfs_ioctl, diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c index 641ddaf8d4a0..bd0fa481e4b3 100644 --- a/fs/ntfs3/frecord.c +++ b/fs/ntfs3/frecord.c @@ -123,6 +123,8 @@ void ni_clear(struct ntfs_inode *ni) indx_clear(&ni->dir); else { run_close(&ni->file.run); + ntfs_sub_da(ni->mi.sbi, run_len(&ni->file.run_da)); + run_close(&ni->file.run_da); #ifdef CONFIG_NTFS3_LZX_XPRESS if (ni->file.offs_folio) { /* On-demand allocated page for offsets. */ @@ -1850,183 +1852,11 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, return REPARSE_LINK; } -/* - * ni_fiemap - Helper for file_fiemap(). - * - * Assumed ni_lock. - * TODO: Less aggressive locks. - */ -int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - __u64 vbo, __u64 len) -{ - int err = 0; - struct ntfs_sb_info *sbi = ni->mi.sbi; - u8 cluster_bits = sbi->cluster_bits; - struct runs_tree run; - struct ATTRIB *attr; - CLST vcn = vbo >> cluster_bits; - CLST lcn, clen; - u64 valid = ni->i_valid; - u64 lbo, bytes; - u64 end, alloc_size; - size_t idx = -1; - u32 flags; - bool ok; - - run_init(&run); - if (S_ISDIR(ni->vfs_inode.i_mode)) { - attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME, - ARRAY_SIZE(I30_NAME), NULL, NULL); - } else { - attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, - NULL); - if (!attr) { - err = -EINVAL; - goto out; - } - if (is_attr_compressed(attr)) { - /* Unfortunately cp -r incorrectly treats compressed clusters. */ - err = -EOPNOTSUPP; - ntfs_inode_warn( - &ni->vfs_inode, - "fiemap is not supported for compressed file (cp -r)"); - goto out; - } - } - - if (!attr || !attr->non_res) { - err = fiemap_fill_next_extent( - fieinfo, 0, 0, - attr ? le32_to_cpu(attr->res.data_size) : 0, - FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST | - FIEMAP_EXTENT_MERGED); - goto out; - } - - end = vbo + len; - alloc_size = le64_to_cpu(attr->nres.alloc_size); - if (end > alloc_size) - end = alloc_size; - - while (vbo < end) { - if (idx == -1) { - ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); - } else { - CLST vcn_next = vcn; - - ok = run_get_entry(&run, ++idx, &vcn, &lcn, &clen) && - vcn == vcn_next; - if (!ok) - vcn = vcn_next; - } - - if (!ok) { - err = attr_load_runs_vcn(ni, attr->type, - attr_name(attr), - attr->name_len, &run, vcn); - - if (err) - break; - - ok = run_lookup_entry(&run, vcn, &lcn, &clen, &idx); - - if (!ok) { - err = -EINVAL; - break; - } - } - - if (!clen) { - err = -EINVAL; // ? - break; - } - - if (lcn == SPARSE_LCN) { - vcn += clen; - vbo = (u64)vcn << cluster_bits; - continue; - } - - flags = FIEMAP_EXTENT_MERGED; - if (S_ISDIR(ni->vfs_inode.i_mode)) { - ; - } else if (is_attr_compressed(attr)) { - CLST clst_data; - - err = attr_is_frame_compressed(ni, attr, - vcn >> attr->nres.c_unit, - &clst_data, &run); - if (err) - break; - if (clst_data < NTFS_LZNT_CLUSTERS) - flags |= FIEMAP_EXTENT_ENCODED; - } else if (is_attr_encrypted(attr)) { - flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; - } - - vbo = (u64)vcn << cluster_bits; - bytes = (u64)clen << cluster_bits; - lbo = (u64)lcn << cluster_bits; - - vcn += clen; - - if (vbo + bytes >= end) - bytes = end - vbo; - - if (vbo + bytes <= valid) { - ; - } else if (vbo >= valid) { - flags |= FIEMAP_EXTENT_UNWRITTEN; - } else { - /* vbo < valid && valid < vbo + bytes */ - u64 dlen = valid - vbo; - - if (vbo + dlen >= end) - flags |= FIEMAP_EXTENT_LAST; - - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, - flags); - - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - - vbo = valid; - bytes -= dlen; - if (!bytes) - continue; - - lbo += dlen; - flags |= FIEMAP_EXTENT_UNWRITTEN; - } - - if (vbo + bytes >= end) - flags |= FIEMAP_EXTENT_LAST; - - err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); - if (err < 0) - break; - if (err == 1) { - err = 0; - break; - } - - vbo += bytes; - } - -out: - run_close(&run); - return err; -} - static struct page *ntfs_lock_new_page(struct address_space *mapping, - pgoff_t index, gfp_t gfp) + pgoff_t index, gfp_t gfp) { - struct folio *folio = __filemap_get_folio(mapping, index, - FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); + struct folio *folio = __filemap_get_folio( + mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); struct page *page; if (IS_ERR(folio)) @@ -2046,18 +1876,18 @@ static struct page *ntfs_lock_new_page(struct address_space *mapping, } /* - * ni_readpage_cmpr + * ni_read_folio_cmpr * * When decompressing, we typically obtain more than one page per reference. * We inject the additional pages into the page cache. */ -int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio) +int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio) { int err; struct ntfs_sb_info *sbi = ni->mi.sbi; struct address_space *mapping = folio->mapping; - pgoff_t index = folio->index; - u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT; + pgoff_t index; + u64 frame_vbo, vbo = folio_pos(folio); struct page **pages = NULL; /* Array of at most 16 pages. stack? */ u8 frame_bits; CLST frame; @@ -2107,7 +1937,9 @@ int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio) pages[i] = pg; } + ni_lock(ni); err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame, 0); + ni_unlock(ni); out1: for (i = 0; i < pages_per_frame; i++) { @@ -2184,7 +2016,8 @@ int ni_decompress_file(struct ntfs_inode *ni) for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, - &clen, &new, false); + &clen, &new, false, NULL, + false); if (err) goto out; } @@ -2405,7 +2238,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, struct runs_tree *run = &ni->file.run; u64 valid_size = ni->i_valid; u64 vbo_disk; - size_t unc_size; + size_t unc_size = 0; u32 frame_size, i, ondisk_size; struct page *pg; struct ATTRIB *attr; @@ -3002,6 +2835,134 @@ bool ni_is_dirty(struct inode *inode) } /* + * ni_seek_data_or_hole + * + * Helper function for ntfs_llseek( SEEK_DATA/SEEK_HOLE ) + */ +loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data) +{ + int err; + u8 cluster_bits = ni->mi.sbi->cluster_bits; + CLST vcn, lcn, clen; + loff_t vbo; + + /* Enumerate all fragments. */ + for (vcn = offset >> cluster_bits;; vcn += clen) { + err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false, + NULL, false); + if (err) { + return err; + } + + if (lcn == RESIDENT_LCN) { + /* clen - resident size in bytes. clen == ni->vfs_inode.i_size */ + if (offset >= clen) { + /* check eof. */ + return -ENXIO; + } + + if (data) { + return offset; + } + + return clen; + } + + if (lcn == EOF_LCN) { + if (data) { + return -ENXIO; + } + + /* implicit hole at the end of file. */ + return ni->vfs_inode.i_size; + } + + if (data) { + /* + * Adjust the file offset to the next location in the file greater than + * or equal to offset containing data. If offset points to data, then + * the file offset is set to offset. + */ + if (lcn != SPARSE_LCN) { + vbo = (u64)vcn << cluster_bits; + return max(vbo, offset); + } + } else { + /* + * Adjust the file offset to the next hole in the file greater than or + * equal to offset. If offset points into the middle of a hole, then the + * file offset is set to offset. If there is no hole past offset, then the + * file offset is adjusted to the end of the file + * (i.e., there is an implicit hole at the end of any file). + */ + if (lcn == SPARSE_LCN && + /* native compression hole begins at aligned vcn. */ + (!(ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) || + !(vcn & (NTFS_LZNT_CLUSTERS - 1)))) { + vbo = (u64)vcn << cluster_bits; + return max(vbo, offset); + } + } + + if (!clen) { + /* Corrupted file. */ + return -EINVAL; + } + } +} + +/* + * ni_write_parents + * + * Helper function for ntfs_file_fsync. + */ +int ni_write_parents(struct ntfs_inode *ni, int sync) +{ + int err = 0; + struct ATTRIB *attr = NULL; + struct ATTR_LIST_ENTRY *le = NULL; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct super_block *sb = sbi->sb; + + while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL, + NULL))) { + struct inode *dir; + struct ATTR_FILE_NAME *fname; + + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (!fname) + continue; + + /* Check simple case when parent inode equals current inode. */ + if (ino_get(&fname->home) == ni->vfs_inode.i_ino) { + if (MFT_REC_ROOT != ni->vfs_inode.i_ino) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + } + continue; + } + + dir = ntfs_iget5(sb, &fname->home, NULL); + if (IS_ERR(dir)) { + ntfs_inode_warn( + &ni->vfs_inode, + "failed to open parent directory r=%lx to write", + (long)ino_get(&fname->home)); + continue; + } + + if (!is_bad_inode(dir)) { + int err2 = write_inode_now(dir, sync); + if (!err) + err = err2; + } + iput(dir); + } + + return err; +} + +/* * ni_update_parent * * Update duplicate info of ATTR_FILE_NAME in MFT and in parent directories. @@ -3277,3 +3238,62 @@ out: return 0; } + +/* + * Force to allocate all delay allocated clusters. + */ +int ni_allocate_da_blocks(struct ntfs_inode *ni) +{ + int err; + + ni_lock(ni); + down_write(&ni->file.run_lock); + + err = ni_allocate_da_blocks_locked(ni); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; +} + +/* + * Force to allocate all delay allocated clusters. + */ +int ni_allocate_da_blocks_locked(struct ntfs_inode *ni) +{ + int err; + + if (!ni->file.run_da.count) + return 0; + + if (is_sparsed(ni)) { + CLST vcn, lcn, clen, alen; + bool new; + + /* + * Sparse file allocates clusters in 'attr_data_get_block_locked' + */ + while (run_get_entry(&ni->file.run_da, 0, &vcn, &lcn, &clen)) { + /* TODO: zero=true? */ + err = attr_data_get_block_locked(ni, vcn, clen, &lcn, + &alen, &new, true, + NULL, true); + if (err) + break; + if (!new) { + err = -EINVAL; + break; + } + } + } else { + /* + * Normal file allocates clusters in 'attr_set_size' + */ + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, + ni->vfs_inode.i_size, &ni->i_valid, + false, NULL, true); + } + + return err; +} diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 38934e6978ec..10863c83c315 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1074,6 +1074,8 @@ struct ntfs_log { u32 client_undo_commit; struct restart_info rst_info, rst_info2; + + struct file_ra_state read_ahead; }; static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn) @@ -1164,8 +1166,8 @@ static int read_log_page(struct ntfs_log *log, u32 vbo, page_buf = page_off ? log->one_page_buf : *buffer; - err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf, - log->page_size, NULL); + err = ntfs_read_run_nb_ra(ni->mi.sbi, &ni->file.run, page_vbo, page_buf, + log->page_size, NULL, &log->read_ahead); if (err) goto out; @@ -3029,6 +3031,26 @@ static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi, } /* + * update_oa_attr - Synchronize OpenAttr's attribute pointer with modified attribute + * @oa2: OpenAttr structure in memory that needs to be updated + * @attr: Modified attribute from MFT record to duplicate + * + * Returns true on success, false on allocation failure. + */ +static bool update_oa_attr(struct OpenAttr *oa2, struct ATTRIB *attr) +{ + void *p2; + + p2 = kmemdup(attr, le32_to_cpu(attr->size), GFP_NOFS); + if (p2) { + kfree(oa2->attr); + oa2->attr = p2; + return true; + } + return false; +} + +/* * do_action - Common routine for the Redo and Undo Passes. * @rlsn: If it is NULL then undo. */ @@ -3251,15 +3273,8 @@ skip_load_parent: le16_add_cpu(&rec->hard_links, 1); oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - // run_close(oa2->run1); - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2) + update_oa_attr(oa2, attr); mi->dirty = true; break; @@ -3318,16 +3333,8 @@ move_data: memmove(Add2Ptr(attr, aoff), data, dlen); oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - // run_close(&oa2->run0); - oa2->run1 = &oa2->run0; - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2 && update_oa_attr(oa2, attr)) + oa2->run1 = &oa2->run0; mi->dirty = true; break; @@ -3377,14 +3384,9 @@ move_data: attr->nres.total_size = new_sz->total_size; oa2 = find_loaded_attr(log, attr, rno_base); - if (oa2) { - void *p2 = kmemdup(attr, le32_to_cpu(attr->size), - GFP_NOFS); - if (p2) { - kfree(oa2->attr); - oa2->attr = p2; - } - } + if (oa2) + update_oa_attr(oa2, attr); + mi->dirty = true; break; @@ -3429,6 +3431,9 @@ move_data: e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); esize = le16_to_cpu(e1->size); + if (PtrOffset(e1, Add2Ptr(hdr, used)) < esize) + goto dirty_vol; + e2 = Add2Ptr(e1, esize); memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used))); @@ -5128,7 +5133,7 @@ commit_undo: undo_action_done: - ntfs_update_mftmirr(sbi, 0); + ntfs_update_mftmirr(sbi); sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY; diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c index bd67ba7b5015..0df2aa81d884 100644 --- a/fs/ntfs3/fsntfs.c +++ b/fs/ntfs3/fsntfs.c @@ -445,36 +445,59 @@ up_write: } /* - * ntfs_check_for_free_space + * ntfs_check_free_space * * Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records */ -bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen) +bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, + bool da) { size_t free, zlen, avail; struct wnd_bitmap *wnd; + CLST da_clusters = ntfs_get_da(sbi); wnd = &sbi->used.bitmap; down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); free = wnd_zeroes(wnd); + + if (free >= da_clusters) { + free -= da_clusters; + } else { + free = 0; + } + zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd)); up_read(&wnd->rw_lock); - if (free < zlen + clen) + if (free < zlen + clen) { return false; + } avail = free - (zlen + clen); - wnd = &sbi->mft.bitmap; - down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); - free = wnd_zeroes(wnd); - zlen = wnd_zone_len(wnd); - up_read(&wnd->rw_lock); + /* + * When delalloc is active then keep in mind some reserved space. + * The worst case: 1 mft record per each ~500 clusters. + */ + if (da) { + /* 1 mft record per each 1024 clusters. */ + mlen += da_clusters >> 10; + } + + if (mlen || !avail) { + wnd = &sbi->mft.bitmap; + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + free = wnd_zeroes(wnd); + zlen = wnd_zone_len(wnd); + up_read(&wnd->rw_lock); - if (free >= zlen + mlen) - return true; + if (free < zlen + mlen && + avail < bytes_to_cluster(sbi, mlen << sbi->record_bits)) { + return false; + } + } - return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits); + return true; } /* @@ -509,8 +532,8 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) /* Step 1: Resize $MFT::DATA. */ down_write(&ni->file.run_lock); - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, - new_mft_bytes, NULL, false, &attr); + err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, + new_mft_bytes, NULL, false, &attr, false); if (err) { up_write(&ni->file.run_lock); @@ -525,7 +548,7 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi) new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total); err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, - new_bitmap_bytes, &new_bitmap_bytes, true, NULL); + new_bitmap_bytes, &new_bitmap_bytes, true); /* Refresh MFT Zone if necessary. */ down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); @@ -843,9 +866,8 @@ int ntfs_refresh_zone(struct ntfs_sb_info *sbi) /* * ntfs_update_mftmirr - Update $MFTMirr data. */ -void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi) { - int err; struct super_block *sb = sbi->sb; u32 blocksize, bytes; sector_t block1, block2; @@ -875,9 +897,7 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) return; } - if (buffer_locked(bh2)) - __wait_on_buffer(bh2); - + wait_on_buffer(bh2); lock_buffer(bh2); memcpy(bh2->b_data, bh1->b_data, blocksize); set_buffer_uptodate(bh2); @@ -886,12 +906,7 @@ void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) put_bh(bh1); bh1 = NULL; - - err = wait ? sync_dirty_buffer(bh2) : 0; - put_bh(bh2); - if (err) - return; } sbi->flags &= ~NTFS_FLAGS_MFTMIRR; @@ -1069,9 +1084,7 @@ int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, return -ENOMEM; } - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); if (buf) { memcpy(bh->b_data + off, buf, op); @@ -1168,11 +1181,13 @@ struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, return ntfs_bread(sb, lbo >> sb->s_blocksize_bits); } -int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, - u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb) +int ntfs_read_run_nb_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb, + struct file_ra_state *ra) { int err; struct super_block *sb = sbi->sb; + struct address_space *mapping = sb->s_bdev->bd_mapping; u32 blocksize = sb->s_blocksize; u8 cluster_bits = sbi->cluster_bits; u32 off = vbo & sbi->cluster_mask; @@ -1212,10 +1227,22 @@ int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, nb->bytes = bytes; } + if (ra && !ra->ra_pages) + file_ra_state_init(ra, mapping); + for (;;) { u32 len32 = len >= bytes ? bytes : len; sector_t block = lbo >> sb->s_blocksize_bits; + if (ra) { + pgoff_t index = lbo >> PAGE_SHIFT; + if (!ra_has_index(ra, index)) { + page_cache_sync_readahead(mapping, ra, NULL, + index, 1); + ra->prev_pos = (loff_t)index << PAGE_SHIFT; + } + } + do { u32 op = blocksize - off; @@ -1252,6 +1279,12 @@ int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, } while (len32); + if (!run) { + err = -EINVAL; + goto out; + } + + /* Get next fragment to read. */ vcn_next = vcn + clen; if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || vcn != vcn_next) { @@ -1286,11 +1319,11 @@ out: * * Return: < 0 if error, 0 if ok, -E_NTFS_FIXUP if need to update fixups. */ -int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, - struct NTFS_RECORD_HEADER *rhdr, u32 bytes, - struct ntfs_buffers *nb) +int ntfs_read_bh_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb, struct file_ra_state *ra) { - int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb); + int err = ntfs_read_run_nb_ra(sbi, run, vbo, rhdr, bytes, nb, ra); if (err) return err; @@ -1347,12 +1380,9 @@ int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, err = -ENOMEM; goto out; } - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); - if (!buffer_uptodate(bh)) - { + if (!buffer_uptodate(bh)) { memset(bh->b_data, 0, blocksize); set_buffer_uptodate(bh); } @@ -1427,9 +1457,7 @@ int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, if (op > bytes) op = bytes; - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); bh_data = bh->b_data + off; @@ -2186,7 +2214,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi, if (new_sds_size > ni->vfs_inode.i_size) { err = attr_set_size(ni, ATTR_DATA, SDS_NAME, ARRAY_SIZE(SDS_NAME), &ni->file.run, - new_sds_size, &new_sds_size, false, NULL); + new_sds_size, &new_sds_size, false); if (err) goto out; } diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index 7157cfd70fdc..2416c61050f1 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -252,9 +252,7 @@ static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni, bbuf->bh = bh; - if (buffer_locked(bh)) - __wait_on_buffer(bh); - + wait_on_buffer(bh); lock_buffer(bh); sb = sbi->sb; @@ -1028,17 +1026,18 @@ static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, } /* - * indx_read + * indx_read_ra * * If ntfs_readdir calls this function * inode is shared locked and no ni_lock. * Use rw_semaphore for read/write access to alloc_run. */ -int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, - struct indx_node **node) +int indx_read_ra(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node, struct file_ra_state *ra) { int err; struct INDEX_BUFFER *ib; + struct ntfs_sb_info *sbi = ni->mi.sbi; struct runs_tree *run = &indx->alloc_run; struct rw_semaphore *lock = &indx->run_lock; u64 vbo = (u64)vbn << indx->vbn2vbo_bits; @@ -1064,7 +1063,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, } down_read(lock); - err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + err = ntfs_read_bh_ra(sbi, run, vbo, &ib->rhdr, bytes, &in->nb, ra); up_read(lock); if (!err) goto ok; @@ -1084,7 +1083,7 @@ int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, goto out; down_read(lock); - err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + err = ntfs_read_bh_ra(sbi, run, vbo, &ib->rhdr, bytes, &in->nb, ra); up_read(lock); if (err == -E_NTFS_FIXUP) goto ok; @@ -1100,7 +1099,7 @@ ok: } if (err == -E_NTFS_FIXUP) { - ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); + ntfs_write_bh(sbi, &ib->rhdr, &in->nb, 0); err = 0; } @@ -1190,7 +1189,12 @@ int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, return -EINVAL; } - fnd_push(fnd, node, e); + err = fnd_push(fnd, node, e); + + if (err) { + put_indx_node(node); + return err; + } } *entry = e; @@ -1442,8 +1446,8 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, run_init(&run); - err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF, - &alen, 0, NULL, NULL); + err = attr_allocate_clusters(sbi, &run, NULL, 0, 0, len, NULL, + ALLOCATE_DEF, &alen, 0, NULL, NULL); if (err) goto out; @@ -1527,8 +1531,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, /* Increase bitmap. */ err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, &indx->bitmap_run, - ntfs3_bitmap_size(bit + 1), NULL, true, - NULL); + ntfs3_bitmap_size(bit + 1), NULL, true); if (err) goto out1; } @@ -1549,8 +1552,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, data_size, &data_size, true, - NULL); + &indx->alloc_run, data_size, &data_size, true); if (err) { if (bmp) goto out2; @@ -1568,7 +1570,7 @@ out: out2: /* Ops. No space? */ attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); + &indx->bitmap_run, bmp_size, &bmp_size_v, false); out1: return err; @@ -1998,6 +2000,7 @@ int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, fnd->level - 1, fnd); } + indx->version += 1; out: fnd_put(fnd_a); out1: @@ -2101,7 +2104,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, new_data = (u64)bit << indx->index_bits; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, new_data, &new_data, false, NULL); + &indx->alloc_run, new_data, &new_data, false); if (err) return err; @@ -2113,7 +2116,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, return 0; err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, bpb, &bpb, false, NULL); + &indx->bitmap_run, bpb, &bpb, false); return err; } @@ -2328,6 +2331,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, hdr = &root->ihdr; e = fnd->root_de; n = NULL; + ib = NULL; } e_size = le16_to_cpu(e->size); @@ -2350,7 +2354,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, * Check to see if removing that entry made * the leaf empty. */ - if (ib_is_leaf(ib) && ib_is_empty(ib)) { + if (ib && ib_is_leaf(ib) && ib_is_empty(ib)) { fnd_pop(fnd); fnd_push(fnd2, n, e); } @@ -2598,7 +2602,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, in = &s_index_names[indx->type]; err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, - &indx->alloc_run, 0, NULL, false, NULL); + &indx->alloc_run, 0, NULL, false); if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, 0); @@ -2607,7 +2611,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, run_close(&indx->alloc_run); err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, - &indx->bitmap_run, 0, NULL, false, NULL); + &indx->bitmap_run, 0, NULL, false); err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, false, NULL); run_close(&indx->bitmap_run); @@ -2645,6 +2649,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, mi->dirty = true; } + indx->version += 1; out: fnd_put(fnd2); out1: diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index edfb973e4e82..6e65066ebcc1 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -12,6 +12,7 @@ #include <linux/nls.h> #include <linux/uio.h> #include <linux/writeback.h> +#include <linux/iomap.h> #include "debug.h" #include "ntfs.h" @@ -39,7 +40,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, u32 rp_fa = 0, asize, t32; u16 roff, rsize, names = 0, links = 0; const struct ATTR_FILE_NAME *fname = NULL; - const struct INDEX_ROOT *root; + const struct INDEX_ROOT *root = NULL; struct REPARSE_DATA_BUFFER rp; // 0x18 bytes u64 t64; struct MFT_REC *rec; @@ -166,9 +167,7 @@ next_attr: std5 = Add2Ptr(attr, roff); -#ifdef STATX_BTIME nt2kernel(std5->cr_time, &ni->i_crtime); -#endif nt2kernel(std5->a_time, &ts); inode_set_atime_to_ts(inode, ts); nt2kernel(std5->c_time, &ts); @@ -555,194 +554,148 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, return inode; } -enum get_block_ctx { - GET_BLOCK_GENERAL = 0, - GET_BLOCK_WRITE_BEGIN = 1, - GET_BLOCK_DIRECT_IO_R = 2, - GET_BLOCK_DIRECT_IO_W = 3, - GET_BLOCK_BMAP = 4, -}; - -static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, - struct buffer_head *bh, int create, - enum get_block_ctx ctx) +static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) { - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; + struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); - struct folio *folio = bh->b_folio; - u8 cluster_bits = sbi->cluster_bits; - u32 block_size = sb->s_blocksize; - u64 bytes, lbo, valid; - u32 off; - int err; - CLST vcn, lcn, len; - bool new; - - /* Clear previous state. */ - clear_buffer_new(bh); - clear_buffer_uptodate(bh); - - if (is_resident(ni)) { - bh->b_blocknr = RESIDENT_LCN; - bh->b_size = block_size; - if (!folio) { - /* direct io (read) or bmap call */ - err = 0; - } else { - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - - if (!err) - set_buffer_uptodate(bh); - } - return err; - } - vcn = vbo >> cluster_bits; - off = vbo & sbi->cluster_mask; - new = false; - - err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL, - create && sbi->cluster_size > PAGE_SIZE); - if (err) - goto out; - - if (!len) + /* + * We can get here for an inline file via the FIBMAP ioctl + */ + if (is_resident(ni)) return 0; - bytes = ((u64)len << cluster_bits) - off; - - if (lcn >= sbi->used.bitmap.nbits) { - /* This case includes resident/compressed/sparse. */ - if (!create) { - if (bh->b_size > bytes) - bh->b_size = bytes; - return 0; - } - WARN_ON(1); - } - - if (new) - set_buffer_new(bh); - - lbo = ((u64)lcn << cluster_bits) + off; - - set_buffer_mapped(bh); - bh->b_bdev = sb->s_bdev; - bh->b_blocknr = lbo >> sb->s_blocksize_bits; - - valid = ni->i_valid; - - if (ctx == GET_BLOCK_DIRECT_IO_W) { - /* ntfs_direct_IO will update ni->i_valid. */ - if (vbo >= valid) - set_buffer_new(bh); - } else if (create) { - /* Normal write. */ - if (bytes > bh->b_size) - bytes = bh->b_size; - - if (vbo >= valid) - set_buffer_new(bh); - - if (vbo + bytes > valid) { - ni->i_valid = vbo + bytes; - mark_inode_dirty(inode); - } - } else if (vbo >= valid) { - /* Read out of valid data. */ - clear_buffer_mapped(bh); - } else if (vbo + bytes <= valid) { - /* Normal read. */ - } else if (vbo + block_size <= valid) { - /* Normal short read. */ - bytes = block_size; - } else { + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !run_is_empty(&ni->file.run_da)) { /* - * Read across valid size: vbo < valid && valid < vbo + block_size + * With delalloc data we want to sync the file so + * that we can make sure we allocate blocks for file and data + * is in place for the user to see it */ - bytes = block_size; - - if (folio) { - u32 voff = valid - vbo; + ni_allocate_da_blocks(ni); + } - bh->b_size = block_size; - off = vbo & (PAGE_SIZE - 1); - folio_set_bh(bh, folio, off); + return iomap_bmap(mapping, block, &ntfs_iomap_ops); +} - if (bh_read(bh, 0) < 0) { - err = -EIO; - goto out; - } - folio_zero_segment(folio, off + voff, off + block_size); +static void ntfs_iomap_read_end_io(struct bio *bio) +{ + int error = blk_status_to_errno(bio->bi_status); + struct folio_iter fi; + + bio_for_each_folio_all(fi, bio) { + struct folio *folio = fi.folio; + struct inode *inode = folio->mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + u64 valid = ni->i_valid; + u32 f_size = folio_size(folio); + loff_t f_pos = folio_pos(folio); + + + if (valid < f_pos + f_size) { + u32 z_from = valid <= f_pos ? + 0 : + offset_in_folio(folio, valid); + /* The only thing ntfs_iomap_read_end_io used for. */ + folio_zero_segment(folio, z_from, f_size); } - } - if (bh->b_size > bytes) - bh->b_size = bytes; - -#ifndef __LP64__ - if (ctx == GET_BLOCK_DIRECT_IO_W || ctx == GET_BLOCK_DIRECT_IO_R) { - static_assert(sizeof(size_t) < sizeof(loff_t)); - if (bytes > 0x40000000u) - bh->b_size = 0x40000000u; + iomap_finish_folio_read(folio, fi.offset, fi.length, error); } -#endif - - return 0; - -out: - return err; + bio_put(bio); } -int ntfs_get_block(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create) +/* + * Copied from iomap/bio.c. + */ +static int ntfs_iomap_bio_read_folio_range(const struct iomap_iter *iter, + struct iomap_read_folio_ctx *ctx, + size_t plen) { - return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, - bh_result, create, GET_BLOCK_GENERAL); + struct folio *folio = ctx->cur_folio; + const struct iomap *iomap = &iter->iomap; + loff_t pos = iter->pos; + size_t poff = offset_in_folio(folio, pos); + loff_t length = iomap_length(iter); + sector_t sector; + struct bio *bio = ctx->read_ctx; + + sector = iomap_sector(iomap, pos); + if (!bio || bio_end_sector(bio) != sector || + !bio_add_folio(bio, folio, plen, poff)) { + gfp_t gfp = mapping_gfp_constraint(folio->mapping, GFP_KERNEL); + gfp_t orig_gfp = gfp; + unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE); + + if (bio) + submit_bio(bio); + + if (ctx->rac) /* same as readahead_gfp_mask */ + gfp |= __GFP_NORETRY | __GFP_NOWARN; + bio = bio_alloc(iomap->bdev, bio_max_segs(nr_vecs), REQ_OP_READ, + gfp); + /* + * If the bio_alloc fails, try it again for a single page to + * avoid having to deal with partial page reads. This emulates + * what do_mpage_read_folio does. + */ + if (!bio) + bio = bio_alloc(iomap->bdev, 1, REQ_OP_READ, orig_gfp); + if (ctx->rac) + bio->bi_opf |= REQ_RAHEAD; + bio->bi_iter.bi_sector = sector; + bio->bi_end_io = ntfs_iomap_read_end_io; + bio_add_folio_nofail(bio, folio, plen, poff); + ctx->read_ctx = bio; + } + return 0; } -static int ntfs_get_block_bmap(struct inode *inode, sector_t vsn, - struct buffer_head *bh_result, int create) +static void ntfs_iomap_bio_submit_read(struct iomap_read_folio_ctx *ctx) { - return ntfs_get_block_vbo(inode, - (u64)vsn << inode->i_sb->s_blocksize_bits, - bh_result, create, GET_BLOCK_BMAP); -} + struct bio *bio = ctx->read_ctx; -static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) -{ - return generic_block_bmap(mapping, block, ntfs_get_block_bmap); + if (bio) + submit_bio(bio); } +static const struct iomap_read_ops ntfs_iomap_bio_read_ops = { + .read_folio_range = ntfs_iomap_bio_read_folio_range, + .submit_read = ntfs_iomap_bio_submit_read, +}; + static int ntfs_read_folio(struct file *file, struct folio *folio) { int err; struct address_space *mapping = folio->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + loff_t vbo = folio_pos(folio); + struct iomap_read_folio_ctx ctx = { + .cur_folio = folio, + .ops = &ntfs_iomap_bio_read_ops, + }; - if (is_resident(ni)) { - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - if (err != E_NTFS_NONRESIDENT) { - folio_unlock(folio); - return err; - } + if (unlikely(is_bad_ni(ni))) { + folio_unlock(folio); + return -EIO; + } + + if (ni->i_valid <= vbo) { + folio_zero_range(folio, 0, folio_size(folio)); + folio_mark_uptodate(folio); + folio_unlock(folio); + return 0; } if (is_compressed(ni)) { - ni_lock(ni); - err = ni_readpage_cmpr(ni, folio); - ni_unlock(ni); + /* ni_lock is taken inside ni_read_folio_cmpr after page locks */ + err = ni_read_folio_cmpr(ni, folio); return err; } - /* Normal + sparse files. */ - return mpage_read_folio(folio, ntfs_get_block); + iomap_read_folio(&ntfs_iomap_ops, &ctx, NULL); + return 0; } static void ntfs_readahead(struct readahead_control *rac) @@ -750,8 +703,10 @@ static void ntfs_readahead(struct readahead_control *rac) struct address_space *mapping = rac->mapping; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); - u64 valid; - loff_t pos; + struct iomap_read_folio_ctx ctx = { + .ops = &ntfs_iomap_bio_read_ops, + .rac = rac, + }; if (is_resident(ni)) { /* No readahead for resident. */ @@ -763,115 +718,324 @@ static void ntfs_readahead(struct readahead_control *rac) return; } - valid = ni->i_valid; - pos = readahead_pos(rac); + iomap_readahead(&ntfs_iomap_ops, &ctx, NULL); +} - if (valid < i_size_read(inode) && pos <= valid && - valid < pos + readahead_length(rac)) { - /* Range cross 'valid'. Read it page by page. */ - return; +int ntfs_set_size(struct inode *inode, u64 new_size) +{ + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(inode); + int err; + + /* Check for maximum file size. */ + if (is_sparsed(ni) || is_compressed(ni)) { + if (new_size > sbi->maxbytes_sparse) { + return -EFBIG; + } + } else if (new_size > sbi->maxbytes) { + return -EFBIG; } - mpage_readahead(rac, ntfs_get_block); -} + ni_lock(ni); + down_write(&ni->file.run_lock); -static int ntfs_get_block_direct_IO_R(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, - bh_result, create, GET_BLOCK_DIRECT_IO_R); -} + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &ni->i_valid, true); -static int ntfs_get_block_direct_IO_W(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, - bh_result, create, GET_BLOCK_DIRECT_IO_W); + if (!err) { + i_size_write(inode, new_size); + mark_inode_dirty(inode); + } + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; } -static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +/* + * Special value to detect ntfs_writeback_range call + */ +#define WB_NO_DA (struct iomap *)1 +/* + * Function to get mapping vbo -> lbo. + * used with: + * - iomap_zero_range + * - iomap_truncate_page + * - iomap_dio_rw + * - iomap_file_buffered_write + * - iomap_bmap + * - iomap_fiemap + * - iomap_bio_read_folio + * - iomap_bio_readahead + */ +static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, + unsigned int flags, struct iomap *iomap, + struct iomap *srcmap) { - struct file *file = iocb->ki_filp; - struct address_space *mapping = file->f_mapping; - struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); - loff_t vbo = iocb->ki_pos; - loff_t end; - int wr = iov_iter_rw(iter) & WRITE; - size_t iter_count = iov_iter_count(iter); - loff_t valid; - ssize_t ret; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + CLST vcn = offset >> cluster_bits; + u32 off = offset & sbi->cluster_mask; + bool rw = flags & IOMAP_WRITE; + loff_t endbyte = offset + length; + void *res = NULL; + int err; + CLST lcn, clen, clen_max = 1; + bool new_clst = false; + bool no_da; + bool zero = false; + if (unlikely(ntfs3_forced_shutdown(sbi->sb))) + return -EIO; - if (is_resident(ni)) { - /* Switch to buffered write. */ - ret = 0; - goto out; + if (flags & IOMAP_REPORT) { + if (offset > ntfs_get_maxbytes(ni)) { + /* called from fiemap/bmap. */ + return -EINVAL; + } + + if (offset >= inode->i_size) { + /* special code for report. */ + return -ENOENT; + } } - if (is_compressed(ni)) { - ret = 0; - goto out; + + if (IOMAP_ZERO == flags && (endbyte & sbi->cluster_mask)) { + rw = true; + } else if (rw) { + clen_max = bytes_to_cluster(sbi, endbyte) - vcn; } - ret = blockdev_direct_IO(iocb, inode, iter, - wr ? ntfs_get_block_direct_IO_W : - ntfs_get_block_direct_IO_R); + /* + * Force to allocate clusters if directIO(write) or writeback_range. + * NOTE: attr_data_get_block allocates clusters only for sparse file. + * Normal file allocates clusters in attr_set_size. + */ + no_da = flags == (IOMAP_DIRECT | IOMAP_WRITE) || srcmap == WB_NO_DA; - if (ret > 0) - end = vbo + ret; - else if (wr && ret == -EIOCBQUEUED) - end = vbo + iter_count; - else - goto out; + err = attr_data_get_block(ni, vcn, clen_max, &lcn, &clen, + rw ? &new_clst : NULL, zero, &res, no_da); - valid = ni->i_valid; - if (wr) { - if (end > valid && !S_ISBLK(inode->i_mode)) { - ni->i_valid = end; - mark_inode_dirty(inode); + if (err) { + return err; + } + + if (lcn == EOF_LCN) { + /* request out of file. */ + if (flags & IOMAP_REPORT) { + /* special code for report. */ + return -ENOENT; + } + + if (rw) { + /* should never be here. */ + return -EINVAL; } - } else if (vbo < valid && valid < end) { - /* Fix page. */ - iov_iter_revert(iter, end - valid); - iov_iter_zero(end - valid, iter); + lcn = SPARSE_LCN; } -out: - return ret; + iomap->flags = new_clst ? IOMAP_F_NEW : 0; + + if (lcn == RESIDENT_LCN) { + if (offset >= clen) { + kfree(res); + if (flags & IOMAP_REPORT) { + /* special code for report. */ + return -ENOENT; + } + return -EFAULT; + } + + iomap->private = iomap->inline_data = res; + iomap->type = IOMAP_INLINE; + iomap->offset = 0; + iomap->length = clen; /* resident size in bytes. */ + return 0; + } + + if (!clen) { + /* broken file? */ + return -EINVAL; + } + + iomap->bdev = inode->i_sb->s_bdev; + iomap->offset = offset; + iomap->length = ((loff_t)clen << cluster_bits) - off; + + if (lcn == COMPRESSED_LCN) { + /* should never be here. */ + return -EOPNOTSUPP; + } + + if (lcn == DELALLOC_LCN) { + iomap->type = IOMAP_DELALLOC; + iomap->addr = IOMAP_NULL_ADDR; + } else { + + /* Translate clusters into bytes. */ + iomap->addr = ((loff_t)lcn << cluster_bits) + off; + if (length && iomap->length > length) + iomap->length = length; + else + endbyte = offset + iomap->length; + + if (lcn == SPARSE_LCN) { + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; + // if (IOMAP_ZERO == flags && !off) { + // iomap->length = (endbyte - offset) & + // sbi->cluster_mask_inv; + // } + } else if (endbyte <= ni->i_valid) { + iomap->type = IOMAP_MAPPED; + } else if (offset < ni->i_valid) { + iomap->type = IOMAP_MAPPED; + if (flags & IOMAP_REPORT) + iomap->length = ni->i_valid - offset; + } else if (rw || (flags & IOMAP_ZERO)) { + iomap->type = IOMAP_MAPPED; + } else { + iomap->type = IOMAP_UNWRITTEN; + } + } + + if ((flags & IOMAP_ZERO) && + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { + /* Avoid too large requests. */ + u32 tail; + u32 off_a = offset & (PAGE_SIZE - 1); + if (off_a) + tail = PAGE_SIZE - off_a; + else + tail = PAGE_SIZE; + + if (iomap->length > tail) + iomap->length = tail; + } + + return 0; } -int ntfs_set_size(struct inode *inode, u64 new_size) +static int ntfs_iomap_end(struct inode *inode, loff_t pos, loff_t length, + ssize_t written, unsigned int flags, + struct iomap *iomap) { - struct super_block *sb = inode->i_sb; - struct ntfs_sb_info *sbi = sb->s_fs_info; + int err = 0; struct ntfs_inode *ni = ntfs_i(inode); - int err; + loff_t endbyte = pos + written; - /* Check for maximum file size. */ - if (is_sparsed(ni) || is_compressed(ni)) { - if (new_size > sbi->maxbytes_sparse) { - err = -EFBIG; - goto out; - } - } else if (new_size > sbi->maxbytes) { - err = -EFBIG; - goto out; - } + if ((flags & IOMAP_WRITE) || (flags & IOMAP_ZERO)) { + if (iomap->type == IOMAP_INLINE) { + u32 data_size; + struct ATTRIB *attr; + struct mft_inode *mi; - ni_lock(ni); - down_write(&ni->file.run_lock); + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, + NULL, &mi); + if (!attr || attr->non_res) { + err = -EINVAL; + goto out; + } - err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, - &ni->i_valid, true, NULL); + data_size = le32_to_cpu(attr->res.data_size); + if (!(pos < data_size && endbyte <= data_size)) { + err = -EINVAL; + goto out; + } - up_write(&ni->file.run_lock); - ni_unlock(ni); + /* Update resident data. */ + memcpy(resident_data(attr) + pos, + iomap_inline_data(iomap, pos), written); + mi->dirty = true; + ni->i_valid = data_size; + } else if (ni->i_valid < endbyte) { + ni->i_valid = endbyte; + mark_inode_dirty(inode); + } + } - mark_inode_dirty(inode); + if ((flags & IOMAP_ZERO) && + (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) { + /* Pair for code in ntfs_iomap_begin. */ + balance_dirty_pages_ratelimited(inode->i_mapping); + cond_resched(); + } out: + if (iomap->type == IOMAP_INLINE) { + kfree(iomap->private); + iomap->private = NULL; + } + return err; } +/* + * write_begin + put_folio + write_end. + * iomap_zero_range + * iomap_truncate_page + * iomap_file_buffered_write + */ +static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos, + unsigned int len, struct folio *folio) +{ + struct ntfs_inode *ni = ntfs_i(inode); + loff_t end = pos + len; + u32 f_size = folio_size(folio); + loff_t f_pos = folio_pos(folio); + loff_t f_end = f_pos + f_size; + + if (ni->i_valid <= end && end < f_end) { + /* zero range [end - f_end). */ + /* The only thing ntfs_iomap_put_folio used for. */ + folio_zero_segment(folio, offset_in_folio(folio, end), f_size); + } + folio_unlock(folio); + folio_put(folio); +} + +/* + * iomap_writeback_ops::writeback_range + */ +static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc, + struct folio *folio, u64 offset, + unsigned int len, u64 end_pos) +{ + struct iomap *iomap = &wpc->iomap; + /* Check iomap position. */ + if (iomap->offset + iomap->length <= offset || offset < iomap->offset) { + int err; + struct inode *inode = wpc->inode; + struct ntfs_inode *ni = ntfs_i(inode); + struct ntfs_sb_info *sbi = ntfs_sb(inode->i_sb); + loff_t i_size_up = ntfs_up_cluster(sbi, inode->i_size); + loff_t len_max = i_size_up - offset; + + err = ni->file.run_da.count ? ni_allocate_da_blocks(ni) : 0; + + if (!err) { + /* Use local special value 'WB_NO_DA' to disable delalloc. */ + err = ntfs_iomap_begin(inode, offset, len_max, + IOMAP_WRITE, iomap, WB_NO_DA); + } + + if (err) { + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + return err; + } + } + + return iomap_add_to_ioend(wpc, folio, offset, end_pos, len); +} + + +static const struct iomap_writeback_ops ntfs_writeback_ops = { + .writeback_range = ntfs_writeback_range, + .writeback_submit = iomap_ioend_writeback_submit, +}; + static int ntfs_resident_writepage(struct folio *folio, struct writeback_control *wbc) { @@ -900,39 +1064,14 @@ static int ntfs_resident_writepage(struct folio *folio, static int ntfs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - struct inode *inode = mapping->host; - - /* Avoid any operation if inode is bad. */ - if (unlikely(is_bad_ni(ntfs_i(inode)))) - return -EINVAL; - - if (unlikely(ntfs3_forced_shutdown(inode->i_sb))) - return -EIO; - - if (is_resident(ntfs_i(inode))) { - struct folio *folio = NULL; - int error; - - while ((folio = writeback_iter(mapping, wbc, folio, &error))) - error = ntfs_resident_writepage(folio, wbc); - return error; - } - return mpage_writepages(mapping, wbc, ntfs_get_block); -} - -static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create) -{ - return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, - bh_result, create, GET_BLOCK_WRITE_BEGIN); -} - -int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, struct folio **foliop, void **fsdata) -{ int err; struct inode *inode = mapping->host; struct ntfs_inode *ni = ntfs_i(inode); + struct iomap_writepage_ctx wpc = { + .inode = mapping->host, + .wbc = wbc, + .ops = &ntfs_writeback_ops, + }; /* Avoid any operation if inode is bad. */ if (unlikely(is_bad_ni(ni))) @@ -942,100 +1081,15 @@ int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, return -EIO; if (is_resident(ni)) { - struct folio *folio = __filemap_get_folio( - mapping, pos >> PAGE_SHIFT, FGP_WRITEBEGIN, - mapping_gfp_mask(mapping)); - - if (IS_ERR(folio)) { - err = PTR_ERR(folio); - goto out; - } - - ni_lock(ni); - err = attr_data_read_resident(ni, folio); - ni_unlock(ni); - - if (!err) { - *foliop = folio; - goto out; - } - folio_unlock(folio); - folio_put(folio); - - if (err != E_NTFS_NONRESIDENT) - goto out; - } - - err = block_write_begin(mapping, pos, len, foliop, - ntfs_get_block_write_begin); - -out: - return err; -} - -/* - * ntfs_write_end - Address_space_operations::write_end. - */ -int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, u32 copied, struct folio *folio, - void *fsdata) -{ - struct inode *inode = mapping->host; - struct ntfs_inode *ni = ntfs_i(inode); - u64 valid = ni->i_valid; - bool dirty = false; - int err; - - if (is_resident(ni)) { - ni_lock(ni); - err = attr_data_write_resident(ni, folio); - ni_unlock(ni); - if (!err) { - struct buffer_head *head = folio_buffers(folio); - dirty = true; - /* Clear any buffers in folio. */ - if (head) { - struct buffer_head *bh = head; - - do { - clear_buffer_dirty(bh); - clear_buffer_mapped(bh); - set_buffer_uptodate(bh); - } while (head != (bh = bh->b_this_page)); - } - folio_mark_uptodate(folio); - err = copied; - } - folio_unlock(folio); - folio_put(folio); - } else { - err = generic_write_end(iocb, mapping, pos, len, copied, folio, - fsdata); - } - - if (err >= 0) { - if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) { - inode_set_mtime_to_ts(inode, - inode_set_ctime_current(inode)); - ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; - dirty = true; - } - - if (valid != ni->i_valid) { - /* ni->i_valid is changed in ntfs_get_block_vbo. */ - dirty = true; - } + struct folio *folio = NULL; - if (pos + err > inode->i_size) { - i_size_write(inode, pos + err); - dirty = true; - } + while ((folio = writeback_iter(mapping, wbc, folio, &err))) + err = ntfs_resident_writepage(folio, wbc); - if (dirty) - mark_inode_dirty(inode); + return err; } - return err; + return iomap_writepages(&wpc); } int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc) @@ -1050,6 +1104,7 @@ int ntfs_sync_inode(struct inode *inode) /* * Helper function to read file. + * Used to read $AttrDef and $UpCase */ int inode_read_data(struct inode *inode, void *data, size_t bytes) { @@ -1539,9 +1594,10 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, nsize)); - err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0, - clst, NULL, ALLOCATE_DEF, - &alen, 0, NULL, NULL); + err = attr_allocate_clusters(sbi, &ni->file.run, NULL, + 0, 0, clst, NULL, + ALLOCATE_DEF, &alen, 0, + NULL, NULL); if (err) goto out5; @@ -1682,7 +1738,7 @@ out6: /* Delete ATTR_EA, if non-resident. */ struct runs_tree run; run_init(&run); - attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false, NULL); + attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false); run_close(&run); } @@ -2094,18 +2150,26 @@ const struct address_space_operations ntfs_aops = { .read_folio = ntfs_read_folio, .readahead = ntfs_readahead, .writepages = ntfs_writepages, - .write_begin = ntfs_write_begin, - .write_end = ntfs_write_end, - .direct_IO = ntfs_direct_IO, .bmap = ntfs_bmap, - .dirty_folio = block_dirty_folio, - .migrate_folio = buffer_migrate_folio, - .invalidate_folio = block_invalidate_folio, + .dirty_folio = iomap_dirty_folio, + .migrate_folio = filemap_migrate_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, }; const struct address_space_operations ntfs_aops_cmpr = { .read_folio = ntfs_read_folio, - .dirty_folio = block_dirty_folio, - .direct_IO = ntfs_direct_IO, + .dirty_folio = iomap_dirty_folio, + .release_folio = iomap_release_folio, + .invalidate_folio = iomap_invalidate_folio, +}; + +const struct iomap_ops ntfs_iomap_ops = { + .iomap_begin = ntfs_iomap_begin, + .iomap_end = ntfs_iomap_end, +}; + +const struct iomap_write_ops ntfs_iomap_folio_ops = { + .put_folio = ntfs_iomap_put_folio, }; // clang-format on diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 552b97905813..892f13e65d42 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -77,10 +77,14 @@ static_assert(sizeof(size_t) == 8); typedef u32 CLST; #endif +/* On-disk sparsed cluster is marked as -1. */ #define SPARSE_LCN64 ((u64)-1) #define SPARSE_LCN ((CLST)-1) +/* Below is virtual (not on-disk) values. */ #define RESIDENT_LCN ((CLST)-2) #define COMPRESSED_LCN ((CLST)-3) +#define EOF_LCN ((CLST)-4) +#define DELALLOC_LCN ((CLST)-5) enum RECORD_NUM { MFT_REC_MFT = 0, diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index f18349689458..921b526eb0f4 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -109,6 +109,7 @@ struct ntfs_mount_options { unsigned force : 1; /* RW mount dirty volume. */ unsigned prealloc : 1; /* Preallocate space when file is growing. */ unsigned nocase : 1; /* case insensitive. */ + unsigned delalloc : 1; /* delay allocation. */ }; /* Special value to unpack and deallocate. */ @@ -133,7 +134,8 @@ struct ntfs_buffers { enum ALLOCATE_OPT { ALLOCATE_DEF = 0, // Allocate all clusters. ALLOCATE_MFT = 1, // Allocate for MFT. - ALLOCATE_ZERO = 2, // Zeroout new allocated clusters + ALLOCATE_ZERO = 2, // Zeroout new allocated clusters. + ALLOCATE_ONE_FR = 4, // Allocate one fragment only. }; enum bitmap_mutex_classes { @@ -192,6 +194,7 @@ struct ntfs_index { struct runs_tree alloc_run; /* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */ struct rw_semaphore run_lock; + size_t version; /* increment each change */ /*TODO: Remove 'cmp'. */ NTFS_CMP_FUNC cmp; @@ -213,7 +216,7 @@ struct ntfs_sb_info { u32 discard_granularity; u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1) - u32 bdev_blocksize_mask; // bdev_logical_block_size(bdev) - 1; + u32 bdev_blocksize; // bdev_logical_block_size(bdev) u32 cluster_size; // bytes per cluster u32 cluster_mask; // == cluster_size - 1 @@ -272,6 +275,12 @@ struct ntfs_sb_info { struct { struct wnd_bitmap bitmap; // $Bitmap::Data CLST next_free_lcn; + /* Total sum of delay allocated clusters in all files. */ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_t da; +#else + atomic_t da; +#endif } used; struct { @@ -379,7 +388,7 @@ struct ntfs_inode { */ u8 mi_loaded; - /* + /* * Use this field to avoid any write(s). * If inode is bad during initialization - use make_bad_inode * If inode is bad during operations - use this field @@ -390,7 +399,14 @@ struct ntfs_inode { struct ntfs_index dir; struct { struct rw_semaphore run_lock; + /* Unpacked runs from just one record. */ struct runs_tree run; + /* + * Pairs [vcn, len] for all delay allocated clusters. + * Normal file always contains delayed clusters in one fragment. + * TODO: use 2 CLST per pair instead of 3. + */ + struct runs_tree run_da; #ifdef CONFIG_NTFS3_LZX_XPRESS struct folio *offs_folio; #endif @@ -430,20 +446,32 @@ enum REPARSE_SIGN { /* Functions from attrib.c */ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, - CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, - enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, - CLST *new_lcn, CLST *new_len); + struct runs_tree *run_da, CLST vcn, CLST lcn, + CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt, + CLST *alen, const size_t fr, CLST *new_lcn, + CLST *new_len); int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, u64 new_size, struct runs_tree *run, struct ATTRIB **ins_attr, struct page *page); -int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, - const __le16 *name, u8 name_len, struct runs_tree *run, - u64 new_size, const u64 *new_valid, bool keep_prealloc, - struct ATTRIB **ret); +int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret, bool no_da); +static inline int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, + struct runs_tree *run, u64 new_size, + const u64 *new_valid, bool keep_prealloc) +{ + return attr_set_size_ex(ni, type, name, name_len, run, new_size, + new_valid, keep_prealloc, NULL, false); +} int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, - CLST *len, bool *new, bool zero); -int attr_data_read_resident(struct ntfs_inode *ni, struct folio *folio); + CLST *len, bool *new, bool zero, void **res, + bool no_da); +int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen, + CLST *lcn, CLST *len, bool *new, bool zero, + void **res, bool no_da); int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio); int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, u8 name_len, struct runs_tree *run, @@ -512,6 +540,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int ntfs_file_open(struct inode *inode, struct file *file); int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); +int ntfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg); long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg); extern const struct inode_operations ntfs_special_inode_operations; @@ -567,9 +596,7 @@ enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, struct REPARSE_DATA_BUFFER *buffer); int ni_write_inode(struct inode *inode, int sync, const char *hint); #define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) -int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, - __u64 vbo, __u64 len); -int ni_readpage_cmpr(struct ntfs_inode *ni, struct folio *folio); +int ni_read_folio_cmpr(struct ntfs_inode *ni, struct folio *folio); int ni_decompress_file(struct ntfs_inode *ni); int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, u32 pages_per_frame, int copy); @@ -590,6 +617,10 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni, struct NTFS_DE *new_de); bool ni_is_dirty(struct inode *inode); +loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data); +int ni_write_parents(struct ntfs_inode *ni, int sync); +int ni_allocate_da_blocks(struct ntfs_inode *ni); +int ni_allocate_da_blocks_locked(struct ntfs_inode *ni); /* Globals from fslog.c */ bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes); @@ -605,13 +636,14 @@ int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi); int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, CLST *new_lcn, CLST *new_len, enum ALLOCATE_OPT opt); -bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen); +bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen, + bool da); int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, struct ntfs_inode *ni, struct mft_inode **mi); void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft); int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to); int ntfs_refresh_zone(struct ntfs_sb_info *sbi); -void ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait); +void ntfs_update_mftmirr(struct ntfs_sb_info *sbi); void ntfs_bad_inode(struct inode *inode, const char *hint); #define _ntfs_bad_inode(i) ntfs_bad_inode(i, __func__) enum NTFS_DIRTY_FLAGS { @@ -626,11 +658,27 @@ int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, const void *buf, size_t bytes, int sync); struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo); -int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, - u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb); -int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, - struct NTFS_RECORD_HEADER *rhdr, u32 bytes, - struct ntfs_buffers *nb); +int ntfs_read_run_nb_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb, + struct file_ra_state *ra); +static inline int ntfs_read_run_nb(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo, + void *buf, u32 bytes, + struct ntfs_buffers *nb) +{ + return ntfs_read_run_nb_ra(sbi, run, vbo, buf, bytes, nb, NULL); +} +int ntfs_read_bh_ra(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb, struct file_ra_state *ra); +static inline int ntfs_read_bh(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo, + struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb) +{ + return ntfs_read_bh_ra(sbi, run, vbo, rhdr, bytes, nb, NULL); +} + int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, u32 bytes, struct ntfs_buffers *nb); int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, @@ -696,8 +744,13 @@ int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, const struct ATTRIB *attr, enum index_mutex_classed type); struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, struct ATTRIB **attr, struct mft_inode **mi); -int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn, - struct indx_node **node); +int indx_read_ra(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node, struct file_ra_state *ra); +static inline int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, + CLST vbn, struct indx_node **node) +{ + return indx_read_ra(idx, ni, vbn, node, NULL); +} int indx_find(struct ntfs_index *indx, struct ntfs_inode *dir, const struct INDEX_ROOT *root, const void *Key, size_t KeyLen, const void *param, int *diff, struct NTFS_DE **entry, @@ -721,13 +774,6 @@ int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, const struct cpu_str *name); int ntfs_set_size(struct inode *inode, u64 new_size); -int ntfs_get_block(struct inode *inode, sector_t vbn, - struct buffer_head *bh_result, int create); -int ntfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, struct folio **foliop, void **fsdata); -int ntfs_write_end(const struct kiocb *iocb, struct address_space *mapping, - loff_t pos, u32 len, u32 copied, struct folio *folio, - void *fsdata); int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); int ntfs_sync_inode(struct inode *inode); int inode_read_data(struct inode *inode, void *data, size_t bytes); @@ -738,6 +784,8 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir, int ntfs_link_inode(struct inode *inode, struct dentry *dentry); int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry); void ntfs_evict_inode(struct inode *inode); +extern const struct iomap_ops ntfs_iomap_ops; +extern const struct iomap_write_ops ntfs_iomap_folio_ops; extern const struct inode_operations ntfs_link_inode_operations; extern const struct address_space_operations ntfs_aops; extern const struct address_space_operations ntfs_aops_cmpr; @@ -815,7 +863,8 @@ void run_truncate_around(struct runs_tree *run, CLST vcn); bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, bool is_mft); bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub); -bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len); +int run_insert_range(struct runs_tree *run, CLST vcn, CLST len); +int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len); bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, CLST *lcn, CLST *len); bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn); @@ -835,6 +884,9 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, #endif int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn); int run_clone(const struct runs_tree *run, struct runs_tree *new_run); +bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done); +CLST run_len(const struct runs_tree *run); +CLST run_get_max_vcn(const struct runs_tree *run); /* Globals from super.c */ void *ntfs_set_shared(void *ptr, u32 bytes); @@ -1011,6 +1063,36 @@ static inline int ntfs3_forced_shutdown(struct super_block *sb) return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags); } +/* Returns total sum of delay allocated clusters in all files. */ +static inline CLST ntfs_get_da(struct ntfs_sb_info *sbi) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + return atomic64_read(&sbi->used.da); +#else + return atomic_read(&sbi->used.da); +#endif +} + +/* Update total count of delay allocated clusters. */ +static inline void ntfs_add_da(struct ntfs_sb_info *sbi, CLST da) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_add(da, &sbi->used.da); +#else + atomic_add(da, &sbi->used.da); +#endif +} + +/* Update total count of delay allocated clusters. */ +static inline void ntfs_sub_da(struct ntfs_sb_info *sbi, CLST da) +{ +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + atomic64_sub(da, &sbi->used.da); +#else + atomic_sub(da, &sbi->used.da); +#endif +} + /* * ntfs_up_cluster - Align up on cluster boundary. */ @@ -1084,6 +1166,13 @@ static inline int is_resident(struct ntfs_inode *ni) return ni->ni_flags & NI_FLAG_RESIDENT; } +static inline loff_t ntfs_get_maxbytes(struct ntfs_inode *ni) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + return is_sparsed(ni) || is_compressed(ni) ? sbi->maxbytes_sparse : + sbi->maxbytes; +} + static inline void le16_sub_cpu(__le16 *var, u16 val) { *var = cpu_to_le16(le16_to_cpu(*var) - val); diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c index 395b20492525..c0324cdc174d 100644 --- a/fs/ntfs3/run.c +++ b/fs/ntfs3/run.c @@ -454,7 +454,7 @@ requires_new_range: /* * If existing range fits then were done. - * Otherwise extend found one and fall back to range jocode. + * Otherwise extend found one and fall back to range join code. */ if (r->vcn + r->len < vcn + len) r->len += len - ((r->vcn + r->len) - vcn); @@ -482,7 +482,8 @@ requires_new_range: return true; } -/* run_collapse_range +/* + * run_collapse_range * * Helper for attr_collapse_range(), * which is helper for fallocate(collapse_range). @@ -493,8 +494,9 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub) struct ntfs_run *r, *e, *eat_start, *eat_end; CLST end; - if (WARN_ON(!run_lookup(run, vcn, &index))) - return true; /* Should never be here. */ + if (!run_lookup(run, vcn, &index) && index >= run->count) { + return true; + } e = run->runs + run->count; r = run->runs + index; @@ -560,13 +562,13 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub) * Helper for attr_insert_range(), * which is helper for fallocate(insert_range). */ -bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) +int run_insert_range(struct runs_tree *run, CLST vcn, CLST len) { size_t index; struct ntfs_run *r, *e; if (WARN_ON(!run_lookup(run, vcn, &index))) - return false; /* Should never be here. */ + return -EINVAL; /* Should never be here. */ e = run->runs + run->count; r = run->runs + index; @@ -588,13 +590,49 @@ bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len) r->len = len1; if (!run_add_entry(run, vcn + len, lcn2, len2, false)) - return false; + return -ENOMEM; } if (!run_add_entry(run, vcn, SPARSE_LCN, len, false)) - return false; + return -ENOMEM; - return true; + return 0; +} + +/* run_insert_range_da + * + * Helper for attr_insert_range(), + * which is helper for fallocate(insert_range). + */ +int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len) +{ + struct ntfs_run *r, *r0 = NULL, *e = run->runs + run->count; + ; + + for (r = run->runs; r < e; r++) { + CLST end = r->vcn + r->len; + + if (vcn >= end) + continue; + + if (!r0 && r->vcn < vcn) { + r0 = r; + } else { + r->vcn += len; + } + } + + if (r0) { + /* split fragment. */ + CLST len1 = vcn - r0->vcn; + CLST len2 = r0->len - len1; + + r0->len = len1; + if (!run_add_entry(run, vcn + len, SPARSE_LCN, len2, false)) + return -ENOMEM; + } + + return 0; } /* @@ -1131,11 +1169,14 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, struct rw_semaphore *lock = is_mounted(sbi) ? &sbi->mft.ni->file.run_lock : NULL; - if (lock) - down_read(lock); - ntfs_refresh_zone(sbi); - if (lock) - up_read(lock); + if (lock) { + if (down_read_trylock(lock)) { + ntfs_refresh_zone(sbi); + up_read(lock); + } + } else { + ntfs_refresh_zone(sbi); + } } up_write(&wnd->rw_lock); if (err) @@ -1206,3 +1247,97 @@ int run_clone(const struct runs_tree *run, struct runs_tree *new_run) new_run->count = run->count; return 0; } + +/* + * run_remove_range + * + */ +bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done) +{ + size_t index, eat; + struct ntfs_run *r, *e, *eat_start, *eat_end; + CLST end, d; + + *done = 0; + + /* Fast check. */ + if (!run->count) + return true; + + if (!run_lookup(run, vcn, &index) && index >= run->count) { + /* No entries in this run. */ + return true; + } + + + e = run->runs + run->count; + r = run->runs + index; + end = vcn + len; + + if (vcn > r->vcn) { + CLST r_end = r->vcn + r->len; + d = vcn - r->vcn; + + if (r_end > end) { + /* Remove a middle part, split. */ + *done += len; + r->len = d; + return run_add_entry(run, end, r->lcn, r_end - end, + false); + } + /* Remove tail of run .*/ + *done += r->len - d; + r->len = d; + r += 1; + } + + eat_start = r; + eat_end = r; + + for (; r < e; r++) { + if (r->vcn >= end) + continue; + + if (r->vcn + r->len <= end) { + /* Eat this run. */ + *done += r->len; + eat_end = r + 1; + continue; + } + + d = end - r->vcn; + *done += d; + if (r->lcn != SPARSE_LCN) + r->lcn += d; + r->len -= d; + r->vcn = end; + } + + eat = eat_end - eat_start; + memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r)); + run->count -= eat; + + return true; +} + +CLST run_len(const struct runs_tree *run) +{ + const struct ntfs_run *r, *e; + CLST len = 0; + + for (r = run->runs, e = r + run->count; r < e; r++) { + len += r->len; + } + + return len; +} + +CLST run_get_max_vcn(const struct runs_tree *run) +{ + const struct ntfs_run *r; + if (!run->count) + return 0; + + r = run->runs + run->count - 1; + return r->vcn + r->len; +} diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index 8b0cf0ed4f72..27411203082a 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -58,9 +58,9 @@ #include <linux/buffer_head.h> #include <linux/exportfs.h> #include <linux/fs.h> -#include <linux/fs_struct.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> +#include <linux/fs_struct.h> #include <linux/log2.h> #include <linux/minmax.h> #include <linux/module.h> @@ -264,9 +264,13 @@ enum Opt { Opt_windows_names, Opt_showmeta, Opt_acl, + Opt_acl_bool, Opt_iocharset, Opt_prealloc, + Opt_prealloc_bool, Opt_nocase, + Opt_delalloc, + Opt_delalloc_bool, Opt_err, }; @@ -285,10 +289,14 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = { fsparam_flag("hide_dot_files", Opt_hide_dot_files), fsparam_flag("windows_names", Opt_windows_names), fsparam_flag("showmeta", Opt_showmeta), - fsparam_flag_no("acl", Opt_acl), + fsparam_flag("acl", Opt_acl), + fsparam_bool("acl", Opt_acl_bool), fsparam_string("iocharset", Opt_iocharset), - fsparam_flag_no("prealloc", Opt_prealloc), + fsparam_flag("prealloc", Opt_prealloc), + fsparam_bool("prealloc", Opt_prealloc_bool), fsparam_flag("nocase", Opt_nocase), + fsparam_flag("delalloc", Opt_delalloc), + fsparam_bool("delalloc", Opt_delalloc_bool), {} }; // clang-format on @@ -379,15 +387,17 @@ static int ntfs_fs_parse_param(struct fs_context *fc, case Opt_showmeta: opts->showmeta = 1; break; - case Opt_acl: - if (!result.negated) + case Opt_acl_bool: + if (result.boolean) { + fallthrough; + case Opt_acl: #ifdef CONFIG_NTFS3_FS_POSIX_ACL fc->sb_flags |= SB_POSIXACL; #else return invalf( fc, "ntfs3: Support for ACL not compiled in!"); #endif - else + } else fc->sb_flags &= ~SB_POSIXACL; break; case Opt_iocharset: @@ -396,11 +406,20 @@ static int ntfs_fs_parse_param(struct fs_context *fc, param->string = NULL; break; case Opt_prealloc: - opts->prealloc = !result.negated; + opts->prealloc = 1; + break; + case Opt_prealloc_bool: + opts->prealloc = result.boolean; break; case Opt_nocase: opts->nocase = 1; break; + case Opt_delalloc: + opts->delalloc = 1; + break; + case Opt_delalloc_bool: + opts->delalloc = result.boolean; + break; default: /* Should not be here unless we forget add case. */ return -EINVAL; @@ -674,7 +693,7 @@ static noinline void ntfs3_put_sbi(struct ntfs_sb_info *sbi) sbi->volume.ni = NULL; } - ntfs_update_mftmirr(sbi, 0); + ntfs_update_mftmirr(sbi); indx_clear(&sbi->security.index_sii); indx_clear(&sbi->security.index_sdh); @@ -705,9 +724,7 @@ static void ntfs_put_super(struct super_block *sb) ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); if (sbi->options) { - unload_nls(sbi->options->nls); - kfree(sbi->options->nls_name); - kfree(sbi->options); + put_mount_options(sbi->options); sbi->options = NULL; } @@ -719,14 +736,22 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *sb = dentry->d_sb; struct ntfs_sb_info *sbi = sb->s_fs_info; struct wnd_bitmap *wnd = &sbi->used.bitmap; + CLST da_clusters = ntfs_get_da(sbi); buf->f_type = sb->s_magic; - buf->f_bsize = sbi->cluster_size; + buf->f_bsize = buf->f_frsize = sbi->cluster_size; buf->f_blocks = wnd->nbits; - buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd); + buf->f_bfree = wnd_zeroes(wnd); + if (buf->f_bfree > da_clusters) { + buf->f_bfree -= da_clusters; + } else { + buf->f_bfree = 0; + } + buf->f_bavail = buf->f_bfree; + buf->f_fsid.val[0] = sbi->volume.ser_num; - buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32); + buf->f_fsid.val[1] = sbi->volume.ser_num >> 32; buf->f_namelen = NTFS_NAME_LEN; return 0; @@ -771,6 +796,8 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root) seq_puts(m, ",prealloc"); if (opts->nocase) seq_puts(m, ",nocase"); + if (opts->delalloc) + seq_puts(m, ",delalloc"); return 0; } @@ -823,7 +850,12 @@ static int ntfs_sync_fs(struct super_block *sb, int wait) if (!err) ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); - ntfs_update_mftmirr(sbi, wait); + ntfs_update_mftmirr(sbi); + + if (wait) { + sync_blockdev(sb->s_bdev); + blkdev_issue_flush(sb->s_bdev); + } return err; } @@ -1076,7 +1108,7 @@ read_boot: dev_size += sector_size - 1; } - sbi->bdev_blocksize_mask = max(boot_sector_size, sector_size) - 1; + sbi->bdev_blocksize = max(boot_sector_size, sector_size); sbi->mft.lbo = mlcn << cluster_bits; sbi->mft.lbo2 = mlcn2 << cluster_bits; @@ -1253,7 +1285,6 @@ static int ntfs_fill_super(struct super_block *sb, struct fs_context *fc) } } sbi->options = options; - fc->fs_private = NULL; sb->s_flags |= SB_NODIRATIME; sb->s_magic = 0x7366746e; // "ntfs" sb->s_op = &ntfs_sops; @@ -1652,9 +1683,7 @@ load_root: */ struct buffer_head *bh0 = sb_getblk(sb, 0); if (bh0) { - if (buffer_locked(bh0)) - __wait_on_buffer(bh0); - + wait_on_buffer(bh0); lock_buffer(bh0); memcpy(bh0->b_data, boot2, sizeof(*boot2)); set_buffer_uptodate(bh0); @@ -1679,9 +1708,7 @@ put_inode_out: out: /* sbi->options == options */ if (options) { - unload_nls(options->nls); - kfree(options->nls_name); - kfree(options); + put_mount_options(sbi->options); sbi->options = NULL; } diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c index b96e6ac90e9f..3fffda784892 100644 --- a/fs/ntfs3/xattr.c +++ b/fs/ntfs3/xattr.c @@ -460,7 +460,7 @@ update_ea: new_sz = size; err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz, - false, NULL); + false); if (err) goto out; |
