diff options
Diffstat (limited to 'fs/ocfs2')
42 files changed, 848 insertions, 808 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 395e23920632..821cb7874685 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -566,7 +566,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle, struct ocfs2_path *path, struct ocfs2_extent_rec *insert_rec); /* - * Reset the actual path elements so that we can re-use the structure + * Reset the actual path elements so that we can reuse the structure * to build another path. Generally, this involves freeing the buffer * heads. */ @@ -1182,7 +1182,7 @@ static int ocfs2_add_branch(handle_t *handle, /* * If there is a gap before the root end and the real end - * of the righmost leaf block, we need to remove the gap + * of the rightmost leaf block, we need to remove the gap * between new_cpos and root_end first so that the tree * is consistent after we add a new branch(it will start * from new_cpos). @@ -1238,7 +1238,7 @@ static int ocfs2_add_branch(handle_t *handle, /* Note: new_eb_bhs[new_blocks - 1] is the guy which will be * linked with the rest of the tree. - * conversly, new_eb_bhs[0] is the new bottommost leaf. + * conversely, new_eb_bhs[0] is the new bottommost leaf. * * when we leave the loop, new_last_eb_blk will point to the * newest leaf, and next_blkno will point to the topmost extent @@ -1803,6 +1803,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci, el = root_el; while (el->l_tree_depth) { + if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) { + ocfs2_error(ocfs2_metadata_cache_get_super(ci), + "Owner %llu has invalid tree depth %u in extent list\n", + (unsigned long long)ocfs2_metadata_cache_owner(ci), + le16_to_cpu(el->l_tree_depth)); + ret = -EROFS; + goto out; + } if (le16_to_cpu(el->l_next_free_rec) == 0) { ocfs2_error(ocfs2_metadata_cache_get_super(ci), "Owner %llu has empty extent list at depth %u\n", @@ -3712,7 +3720,7 @@ static int ocfs2_try_to_merge_extent(handle_t *handle, * update split_index here. * * When the split_index is zero, we need to merge it to the - * prevoius extent block. It is more efficient and easier + * previous extent block. It is more efficient and easier * if we do merge_right first and merge_left later. */ ret = ocfs2_merge_rec_right(path, handle, et, split_rec, @@ -4517,7 +4525,7 @@ static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et, } /* - * This should only be called against the righmost leaf extent list. + * This should only be called against the rightmost leaf extent list. * * ocfs2_figure_appending_type() will figure out whether we'll have to * insert at the tail of the rightmost leaf. @@ -6154,6 +6162,9 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, int status; struct inode *inode = NULL; struct buffer_head *bh = NULL; + struct ocfs2_dinode *di; + struct ocfs2_truncate_log *tl; + unsigned int tl_count; inode = ocfs2_get_system_file_inode(osb, TRUNCATE_LOG_SYSTEM_INODE, @@ -6171,6 +6182,18 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, goto bail; } + di = (struct ocfs2_dinode *)bh->b_data; + tl = &di->id2.i_dealloc; + tl_count = le16_to_cpu(tl->tl_count); + if (unlikely(tl_count > ocfs2_truncate_recs_per_inode(osb->sb) || + tl_count == 0)) { + status = -EFSCORRUPTED; + iput(inode); + brelse(bh); + mlog_errno(status); + goto bail; + } + *tl_inode = inode; *tl_bh = bh; bail: @@ -6808,27 +6831,27 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) return 0; } -void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, - unsigned int from, unsigned int to, - struct page *page, int zero, u64 *phys) +void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle, + size_t from, size_t to, struct folio *folio, int zero, + u64 *phys) { int ret, partial = 0; - loff_t start_byte = ((loff_t)page->index << PAGE_SHIFT) + from; + loff_t start_byte = folio_pos(folio) + from; loff_t length = to - from; - ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0); + ret = ocfs2_map_folio_blocks(folio, phys, inode, from, to, 0); if (ret) mlog_errno(ret); if (zero) - zero_user_segment(page, from, to); + folio_zero_segment(folio, from, to); /* * Need to set the buffers we zero'd into uptodate * here if they aren't - ocfs2_map_page_blocks() * might've skipped some */ - ret = walk_page_buffers(handle, page_buffers(page), + ret = walk_page_buffers(handle, folio_buffers(folio), from, to, &partial, ocfs2_zero_func); if (ret < 0) @@ -6841,92 +6864,89 @@ void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, } if (!partial) - SetPageUptodate(page); + folio_mark_uptodate(folio); - flush_dcache_page(page); + flush_dcache_folio(folio); } -static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, - loff_t end, struct page **pages, - int numpages, u64 phys, handle_t *handle) +static void ocfs2_zero_cluster_folios(struct inode *inode, loff_t start, + loff_t end, struct folio **folios, int numfolios, + u64 phys, handle_t *handle) { int i; - struct page *page; - unsigned int from, to = PAGE_SIZE; struct super_block *sb = inode->i_sb; BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb))); - if (numpages == 0) + if (numfolios == 0) goto out; - to = PAGE_SIZE; - for(i = 0; i < numpages; i++) { - page = pages[i]; + for (i = 0; i < numfolios; i++) { + struct folio *folio = folios[i]; + size_t to = folio_size(folio); + size_t from = offset_in_folio(folio, start); - from = start & (PAGE_SIZE - 1); - if ((end >> PAGE_SHIFT) == page->index) - to = end & (PAGE_SIZE - 1); + if (to > end - folio_pos(folio)) + to = end - folio_pos(folio); - BUG_ON(from > PAGE_SIZE); - BUG_ON(to > PAGE_SIZE); + ocfs2_map_and_dirty_folio(inode, handle, from, to, folio, 1, + &phys); - ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1, - &phys); - - start = (page->index + 1) << PAGE_SHIFT; + start = folio_next_index(folio) << PAGE_SHIFT; } out: - if (pages) - ocfs2_unlock_and_free_pages(pages, numpages); + if (folios) + ocfs2_unlock_and_free_folios(folios, numfolios); } -int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, - struct page **pages, int *num) +static int ocfs2_grab_folios(struct inode *inode, loff_t start, loff_t end, + struct folio **folios, int *num) { - int numpages, ret = 0; + int numfolios, ret = 0; struct address_space *mapping = inode->i_mapping; unsigned long index; loff_t last_page_bytes; BUG_ON(start > end); - numpages = 0; + numfolios = 0; last_page_bytes = PAGE_ALIGN(end); index = start >> PAGE_SHIFT; do { - pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS); - if (!pages[numpages]) { - ret = -ENOMEM; + folios[numfolios] = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); + if (IS_ERR(folios[numfolios])) { + ret = PTR_ERR(folios[numfolios]); mlog_errno(ret); + folios[numfolios] = NULL; goto out; } - numpages++; - index++; + index = folio_next_index(folios[numfolios]); + numfolios++; } while (index < (last_page_bytes >> PAGE_SHIFT)); out: if (ret != 0) { - if (pages) - ocfs2_unlock_and_free_pages(pages, numpages); - numpages = 0; + if (folios) + ocfs2_unlock_and_free_folios(folios, numfolios); + numfolios = 0; } - *num = numpages; + *num = numfolios; return ret; } -static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, - struct page **pages, int *num) +static int ocfs2_grab_eof_folios(struct inode *inode, loff_t start, loff_t end, + struct folio **folios, int *num) { struct super_block *sb = inode->i_sb; BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); - return ocfs2_grab_pages(inode, start, end, pages, num); + return ocfs2_grab_folios(inode, start, end, folios, num); } /* @@ -6940,8 +6960,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, u64 range_start, u64 range_end) { - int ret = 0, numpages; - struct page **pages = NULL; + int ret = 0, numfolios; + struct folio **folios = NULL; u64 phys; unsigned int ext_flags; struct super_block *sb = inode->i_sb; @@ -6954,17 +6974,17 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, return 0; /* - * Avoid zeroing pages fully beyond current i_size. It is pointless as - * underlying blocks of those pages should be already zeroed out and + * Avoid zeroing folios fully beyond current i_size. It is pointless as + * underlying blocks of those folios should be already zeroed out and * page writeback will skip them anyway. */ range_end = min_t(u64, range_end, i_size_read(inode)); if (range_start >= range_end) return 0; - pages = kcalloc(ocfs2_pages_per_cluster(sb), - sizeof(struct page *), GFP_NOFS); - if (pages == NULL) { + folios = kcalloc(ocfs2_pages_per_cluster(sb), + sizeof(struct folio *), GFP_NOFS); + if (folios == NULL) { ret = -ENOMEM; mlog_errno(ret); goto out; @@ -6985,18 +7005,18 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN) goto out; - ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, - &numpages); + ret = ocfs2_grab_eof_folios(inode, range_start, range_end, folios, + &numfolios); if (ret) { mlog_errno(ret); goto out; } - ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, - numpages, phys, handle); + ocfs2_zero_cluster_folios(inode, range_start, range_end, folios, + numfolios, phys, handle); /* - * Initiate writeout of the pages we zero'd here. We don't + * Initiate writeout of the folios we zero'd here. We don't * wait on them - the truncate_inode_pages() call later will * do that for us. */ @@ -7006,7 +7026,7 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, mlog_errno(ret); out: - kfree(pages); + kfree(folios); return ret; } @@ -7059,7 +7079,7 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct buffer_head *di_bh) { - int ret, has_data, num_pages = 0; + int ret, has_data, num_folios = 0; int need_free = 0; u32 bit_off, num; handle_t *handle; @@ -7068,7 +7088,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; struct ocfs2_alloc_context *data_ac = NULL; - struct page *page = NULL; + struct folio *folio = NULL; struct ocfs2_extent_tree et; int did_quota = 0; @@ -7119,12 +7139,12 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, /* * Save two copies, one for insert, and one that can - * be changed by ocfs2_map_and_dirty_page() below. + * be changed by ocfs2_map_and_dirty_folio() below. */ block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); - ret = ocfs2_grab_eof_pages(inode, 0, page_end, &page, - &num_pages); + ret = ocfs2_grab_eof_folios(inode, 0, page_end, &folio, + &num_folios); if (ret) { mlog_errno(ret); need_free = 1; @@ -7135,15 +7155,15 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, * This should populate the 1st page for us and mark * it up to date. */ - ret = ocfs2_read_inline_data(inode, page, di_bh); + ret = ocfs2_read_inline_data(inode, folio, di_bh); if (ret) { mlog_errno(ret); need_free = 1; goto out_unlock; } - ocfs2_map_and_dirty_page(inode, handle, 0, page_end, page, 0, - &phys); + ocfs2_map_and_dirty_folio(inode, handle, 0, page_end, folio, 0, + &phys); } spin_lock(&oi->ip_lock); @@ -7174,8 +7194,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, } out_unlock: - if (page) - ocfs2_unlock_and_free_pages(&page, num_pages); + if (folio) + ocfs2_unlock_and_free_folios(&folio, num_folios); out_commit: if (ret < 0 && did_quota) diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 4af7abaa6e40..1c0c83362904 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -254,11 +254,9 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) return !rec->e_leaf_clusters; } -int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end, - struct page **pages, int *num); -void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, - unsigned int from, unsigned int to, - struct page *page, int zero, u64 *phys); +void ocfs2_map_and_dirty_folio(struct inode *inode, handle_t *handle, + size_t from, size_t to, struct folio *folio, int zero, + u64 *phys); /* * Structures which describe a path through a btree, and functions to * manipulate them. diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index db72b3e924b3..40b6bce12951 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -46,7 +46,6 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh = NULL; struct buffer_head *buffer_cache_bh = NULL; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - void *kaddr; trace_ocfs2_symlink_get_block( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -91,17 +90,11 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, * could've happened. Since we've got a reference on * the bh, even if it commits while we're doing the * copy, the data is still good. */ - if (buffer_jbd(buffer_cache_bh) - && ocfs2_inode_is_new(inode)) { - kaddr = kmap_atomic(bh_result->b_page); - if (!kaddr) { - mlog(ML_ERROR, "couldn't kmap!\n"); - goto bail; - } - memcpy(kaddr + (bh_result->b_size * iblock), - buffer_cache_bh->b_data, - bh_result->b_size); - kunmap_atomic(kaddr); + if (buffer_jbd(buffer_cache_bh) && ocfs2_inode_is_new(inode)) { + memcpy_to_folio(bh_result->b_folio, + bh_result->b_size * iblock, + buffer_cache_bh->b_data, + bh_result->b_size); set_buffer_uptodate(bh_result); } brelse(buffer_cache_bh); @@ -215,10 +208,9 @@ bail: return err; } -int ocfs2_read_inline_data(struct inode *inode, struct page *page, +int ocfs2_read_inline_data(struct inode *inode, struct folio *folio, struct buffer_head *di_bh) { - void *kaddr; loff_t size; struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; @@ -230,7 +222,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, size = i_size_read(inode); - if (size > PAGE_SIZE || + if (size > folio_size(folio) || size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) { ocfs2_error(inode->i_sb, "Inode %llu has with inline data has bad size: %Lu\n", @@ -239,25 +231,18 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page, return -EROFS; } - kaddr = kmap_atomic(page); - if (size) - memcpy(kaddr, di->id2.i_data.id_data, size); - /* Clear the remaining part of the page */ - memset(kaddr + size, 0, PAGE_SIZE - size); - flush_dcache_page(page); - kunmap_atomic(kaddr); - - SetPageUptodate(page); + folio_fill_tail(folio, 0, di->id2.i_data.id_data, size); + folio_mark_uptodate(folio); return 0; } -static int ocfs2_readpage_inline(struct inode *inode, struct page *page) +static int ocfs2_readpage_inline(struct inode *inode, struct folio *folio) { int ret; struct buffer_head *di_bh = NULL; - BUG_ON(!PageLocked(page)); + BUG_ON(!folio_test_locked(folio)); BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); ret = ocfs2_read_inode_block(inode, &di_bh); @@ -266,9 +251,9 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) goto out; } - ret = ocfs2_read_inline_data(inode, page, di_bh); + ret = ocfs2_read_inline_data(inode, folio, di_bh); out: - unlock_page(page); + folio_unlock(folio); brelse(di_bh); return ret; @@ -283,7 +268,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) trace_ocfs2_readpage((unsigned long long)oi->ip_blkno, folio->index); - ret = ocfs2_inode_lock_with_page(inode, NULL, 0, &folio->page); + ret = ocfs2_inode_lock_with_folio(inode, NULL, 0, folio); if (ret != 0) { if (ret == AOP_TRUNCATED_PAGE) unlock = 0; @@ -305,7 +290,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) } /* - * i_size might have just been updated as we grabed the meta lock. We + * i_size might have just been updated as we grabbed the meta lock. We * might now be discovering a truncate that hit on another node. * block_read_full_folio->get_block freaks out if it is asked to read * beyond the end of a file, so we check here. Callers @@ -322,7 +307,7 @@ static int ocfs2_read_folio(struct file *file, struct folio *folio) } if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - ret = ocfs2_readpage_inline(inode, &folio->page); + ret = ocfs2_readpage_inline(inode, folio); else ret = block_read_full_folio(folio, ocfs2_get_block); unlock = 0; @@ -534,7 +519,7 @@ static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb, * * from == to == 0 is code for "zero the entire cluster region" */ -static void ocfs2_clear_page_regions(struct page *page, +static void ocfs2_clear_folio_regions(struct folio *folio, struct ocfs2_super *osb, u32 cpos, unsigned from, unsigned to) { @@ -543,7 +528,7 @@ static void ocfs2_clear_page_regions(struct page *page, ocfs2_figure_cluster_boundaries(osb, cpos, &cluster_start, &cluster_end); - kaddr = kmap_atomic(page); + kaddr = kmap_local_folio(folio, 0); if (from || to) { if (from > cluster_start) @@ -554,13 +539,13 @@ static void ocfs2_clear_page_regions(struct page *page, memset(kaddr + cluster_start, 0, cluster_end - cluster_start); } - kunmap_atomic(kaddr); + kunmap_local(kaddr); } /* * Nonsparse file systems fully allocate before we get to the write * code. This prevents ocfs2_write() from tagging the write as an - * allocating one, which means ocfs2_map_page_blocks() might try to + * allocating one, which means ocfs2_map_folio_blocks() might try to * read-in the blocks at the tail of our file. Avoid reading them by * testing i_size against each block offset. */ @@ -585,11 +570,10 @@ static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio, * * This will also skip zeroing, which is handled externally. */ -int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, +int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new) { - struct folio *folio = page_folio(page); int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; @@ -729,24 +713,24 @@ struct ocfs2_write_ctxt { unsigned int w_large_pages; /* - * Pages involved in this write. + * Folios involved in this write. * - * w_target_page is the page being written to by the user. + * w_target_folio is the folio being written to by the user. * - * w_pages is an array of pages which always contains - * w_target_page, and in the case of an allocating write with + * w_folios is an array of folios which always contains + * w_target_folio, and in the case of an allocating write with * page_size < cluster size, it will contain zero'd and mapped - * pages adjacent to w_target_page which need to be written + * pages adjacent to w_target_folio which need to be written * out in so that future reads from that region will get * zero's. */ - unsigned int w_num_pages; - struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; - struct page *w_target_page; + unsigned int w_num_folios; + struct folio *w_folios[OCFS2_MAX_CTXT_PAGES]; + struct folio *w_target_folio; /* * w_target_locked is used for page_mkwrite path indicating no unlocking - * against w_target_page in ocfs2_write_end_nolock. + * against w_target_folio in ocfs2_write_end_nolock. */ unsigned int w_target_locked:1; @@ -771,40 +755,40 @@ struct ocfs2_write_ctxt { unsigned int w_unwritten_count; }; -void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) +void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios) { int i; - for(i = 0; i < num_pages; i++) { - if (pages[i]) { - unlock_page(pages[i]); - mark_page_accessed(pages[i]); - put_page(pages[i]); - } + for(i = 0; i < num_folios; i++) { + if (!folios[i]) + continue; + folio_unlock(folios[i]); + folio_mark_accessed(folios[i]); + folio_put(folios[i]); } } -static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_folios(struct ocfs2_write_ctxt *wc) { int i; /* * w_target_locked is only set to true in the page_mkwrite() case. * The intent is to allow us to lock the target page from write_begin() - * to write_end(). The caller must hold a ref on w_target_page. + * to write_end(). The caller must hold a ref on w_target_folio. */ if (wc->w_target_locked) { - BUG_ON(!wc->w_target_page); - for (i = 0; i < wc->w_num_pages; i++) { - if (wc->w_target_page == wc->w_pages[i]) { - wc->w_pages[i] = NULL; + BUG_ON(!wc->w_target_folio); + for (i = 0; i < wc->w_num_folios; i++) { + if (wc->w_target_folio == wc->w_folios[i]) { + wc->w_folios[i] = NULL; break; } } - mark_page_accessed(wc->w_target_page); - put_page(wc->w_target_page); + folio_mark_accessed(wc->w_target_folio); + folio_put(wc->w_target_folio); } - ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); + ocfs2_unlock_and_free_folios(wc->w_folios, wc->w_num_folios); } static void ocfs2_free_unwritten_list(struct inode *inode, @@ -826,7 +810,7 @@ static void ocfs2_free_write_ctxt(struct inode *inode, struct ocfs2_write_ctxt *wc) { ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list); - ocfs2_unlock_pages(wc); + ocfs2_unlock_folios(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -869,29 +853,30 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, * and dirty so they'll be written out (in order to prevent uninitialised * block data from leaking). And clear the new bit. */ -static void ocfs2_zero_new_buffers(struct page *page, unsigned from, unsigned to) +static void ocfs2_zero_new_buffers(struct folio *folio, size_t from, size_t to) { unsigned int block_start, block_end; struct buffer_head *head, *bh; - BUG_ON(!PageLocked(page)); - if (!page_has_buffers(page)) + BUG_ON(!folio_test_locked(folio)); + head = folio_buffers(folio); + if (!head) return; - bh = head = page_buffers(page); + bh = head; block_start = 0; do { block_end = block_start + bh->b_size; if (buffer_new(bh)) { if (block_end > from && block_start < to) { - if (!PageUptodate(page)) { + if (!folio_test_uptodate(folio)) { unsigned start, end; start = max(from, block_start); end = min(to, block_end); - zero_user_segment(page, start, end); + folio_zero_segment(folio, start, end); set_buffer_uptodate(bh); } @@ -916,29 +901,26 @@ static void ocfs2_write_failure(struct inode *inode, int i; unsigned from = user_pos & (PAGE_SIZE - 1), to = user_pos + user_len; - struct page *tmppage; - if (wc->w_target_page) - ocfs2_zero_new_buffers(wc->w_target_page, from, to); + if (wc->w_target_folio) + ocfs2_zero_new_buffers(wc->w_target_folio, from, to); - for(i = 0; i < wc->w_num_pages; i++) { - tmppage = wc->w_pages[i]; + for (i = 0; i < wc->w_num_folios; i++) { + struct folio *folio = wc->w_folios[i]; - if (tmppage && page_has_buffers(tmppage)) { + if (folio && folio_buffers(folio)) { if (ocfs2_should_order_data(inode)) ocfs2_jbd2_inode_add_write(wc->w_handle, inode, user_pos, user_len); - block_commit_write(tmppage, from, to); + block_commit_write(folio, from, to); } } } -static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, - struct ocfs2_write_ctxt *wc, - struct page *page, u32 cpos, - loff_t user_pos, unsigned user_len, - int new) +static int ocfs2_prepare_folio_for_write(struct inode *inode, u64 *p_blkno, + struct ocfs2_write_ctxt *wc, struct folio *folio, u32 cpos, + loff_t user_pos, unsigned user_len, int new) { int ret; unsigned int map_from = 0, map_to = 0; @@ -951,20 +933,19 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, /* treat the write as new if the a hole/lseek spanned across * the page boundary. */ - new = new | ((i_size_read(inode) <= page_offset(page)) && - (page_offset(page) <= user_pos)); + new = new | ((i_size_read(inode) <= folio_pos(folio)) && + (folio_pos(folio) <= user_pos)); - if (page == wc->w_target_page) { + if (folio == wc->w_target_folio) { map_from = user_pos & (PAGE_SIZE - 1); map_to = map_from + user_len; if (new) - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - cluster_start, cluster_end, - new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + cluster_start, cluster_end, new); else - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - map_from, map_to, new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + map_from, map_to, new); if (ret) { mlog_errno(ret); goto out; @@ -978,7 +959,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, } } else { /* - * If we haven't allocated the new page yet, we + * If we haven't allocated the new folio yet, we * shouldn't be writing it out without copying user * data. This is likely a math error from the caller. */ @@ -987,8 +968,8 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, map_from = cluster_start; map_to = cluster_end; - ret = ocfs2_map_page_blocks(page, p_blkno, inode, - cluster_start, cluster_end, new); + ret = ocfs2_map_folio_blocks(folio, p_blkno, inode, + cluster_start, cluster_end, new); if (ret) { mlog_errno(ret); goto out; @@ -996,20 +977,20 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, } /* - * Parts of newly allocated pages need to be zero'd. + * Parts of newly allocated folios need to be zero'd. * * Above, we have also rewritten 'to' and 'from' - as far as * the rest of the function is concerned, the entire cluster - * range inside of a page needs to be written. + * range inside of a folio needs to be written. * - * We can skip this if the page is up to date - it's already + * We can skip this if the folio is uptodate - it's already * been zero'd from being read in as a hole. */ - if (new && !PageUptodate(page)) - ocfs2_clear_page_regions(page, OCFS2_SB(inode->i_sb), + if (new && !folio_test_uptodate(folio)) + ocfs2_clear_folio_regions(folio, OCFS2_SB(inode->i_sb), cpos, user_data_from, user_data_to); - flush_dcache_page(page); + flush_dcache_folio(folio); out: return ret; @@ -1018,11 +999,9 @@ out: /* * This function will only grab one clusters worth of pages. */ -static int ocfs2_grab_pages_for_write(struct address_space *mapping, - struct ocfs2_write_ctxt *wc, - u32 cpos, loff_t user_pos, - unsigned user_len, int new, - struct page *mmap_page) +static int ocfs2_grab_folios_for_write(struct address_space *mapping, + struct ocfs2_write_ctxt *wc, u32 cpos, loff_t user_pos, + unsigned user_len, int new, struct folio *mmap_folio) { int ret = 0, i; unsigned long start, target_index, end_index, index; @@ -1039,7 +1018,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, * last page of the write. */ if (new) { - wc->w_num_pages = ocfs2_pages_per_cluster(inode->i_sb); + wc->w_num_folios = ocfs2_pages_per_cluster(inode->i_sb); start = ocfs2_align_clusters_to_page_index(inode->i_sb, cpos); /* * We need the index *past* the last page we could possibly @@ -1049,15 +1028,15 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, last_byte = max(user_pos + user_len, i_size_read(inode)); BUG_ON(last_byte < 1); end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1; - if ((start + wc->w_num_pages) > end_index) - wc->w_num_pages = end_index - start; + if ((start + wc->w_num_folios) > end_index) + wc->w_num_folios = end_index - start; } else { - wc->w_num_pages = 1; + wc->w_num_folios = 1; start = target_index; } end_index = (user_pos + user_len - 1) >> PAGE_SHIFT; - for(i = 0; i < wc->w_num_pages; i++) { + for(i = 0; i < wc->w_num_folios; i++) { index = start + i; if (index >= target_index && index <= end_index && @@ -1067,37 +1046,38 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping, * and wants us to directly use the page * passed in. */ - lock_page(mmap_page); + folio_lock(mmap_folio); /* Exit and let the caller retry */ - if (mmap_page->mapping != mapping) { - WARN_ON(mmap_page->mapping); - unlock_page(mmap_page); + if (mmap_folio->mapping != mapping) { + WARN_ON(mmap_folio->mapping); + folio_unlock(mmap_folio); ret = -EAGAIN; goto out; } - get_page(mmap_page); - wc->w_pages[i] = mmap_page; + folio_get(mmap_folio); + wc->w_folios[i] = mmap_folio; wc->w_target_locked = true; } else if (index >= target_index && index <= end_index && wc->w_type == OCFS2_WRITE_DIRECT) { /* Direct write has no mapping page. */ - wc->w_pages[i] = NULL; + wc->w_folios[i] = NULL; continue; } else { - wc->w_pages[i] = find_or_create_page(mapping, index, - GFP_NOFS); - if (!wc->w_pages[i]) { - ret = -ENOMEM; + wc->w_folios[i] = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, + GFP_NOFS); + if (IS_ERR(wc->w_folios[i])) { + ret = PTR_ERR(wc->w_folios[i]); mlog_errno(ret); goto out; } } - wait_for_stable_page(wc->w_pages[i]); + folio_wait_stable(wc->w_folios[i]); if (index == target_index) - wc->w_target_page = wc->w_pages[i]; + wc->w_target_folio = wc->w_folios[i]; } out: if (ret) @@ -1181,19 +1161,18 @@ static int ocfs2_write_cluster(struct address_space *mapping, if (!should_zero) p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1); - for(i = 0; i < wc->w_num_pages; i++) { + for (i = 0; i < wc->w_num_folios; i++) { int tmpret; /* This is the direct io target page. */ - if (wc->w_pages[i] == NULL) { + if (wc->w_folios[i] == NULL) { p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits)); continue; } - tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc, - wc->w_pages[i], cpos, - user_pos, user_len, - should_zero); + tmpret = ocfs2_prepare_folio_for_write(inode, &p_blkno, wc, + wc->w_folios[i], cpos, user_pos, user_len, + should_zero); if (tmpret) { mlog_errno(tmpret); if (ret == 0) @@ -1472,7 +1451,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, { int ret; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct page *page; + struct folio *folio; handle_t *handle; struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; @@ -1483,19 +1462,21 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, goto out; } - page = find_or_create_page(mapping, 0, GFP_NOFS); - if (!page) { + folio = __filemap_get_folio(mapping, 0, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); + if (IS_ERR(folio)) { ocfs2_commit_trans(osb, handle); - ret = -ENOMEM; + ret = PTR_ERR(folio); mlog_errno(ret); goto out; } /* - * If we don't set w_num_pages then this page won't get unlocked + * If we don't set w_num_folios then this folio won't get unlocked * and freed on cleanup of the write context. */ - wc->w_pages[0] = wc->w_target_page = page; - wc->w_num_pages = 1; + wc->w_target_folio = folio; + wc->w_folios[0] = folio; + wc->w_num_folios = 1; ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE); @@ -1509,8 +1490,8 @@ static int ocfs2_write_begin_inline(struct address_space *mapping, if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)) ocfs2_set_inode_data_inline(inode, di); - if (!PageUptodate(page)) { - ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh); + if (!folio_test_uptodate(folio)) { + ret = ocfs2_read_inline_data(inode, folio, wc->w_di_bh); if (ret) { ocfs2_commit_trans(osb, handle); @@ -1533,9 +1514,8 @@ int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size) } static int ocfs2_try_to_write_inline_data(struct address_space *mapping, - struct inode *inode, loff_t pos, - unsigned len, struct page *mmap_page, - struct ocfs2_write_ctxt *wc) + struct inode *inode, loff_t pos, size_t len, + struct folio *mmap_folio, struct ocfs2_write_ctxt *wc) { int ret, written = 0; loff_t end = pos + len; @@ -1550,7 +1530,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping, * Handle inodes which already have inline data 1st. */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { - if (mmap_page == NULL && + if (mmap_folio == NULL && ocfs2_size_fits_inline_data(wc->w_di_bh, end)) goto do_inline_write; @@ -1574,7 +1554,7 @@ static int ocfs2_try_to_write_inline_data(struct address_space *mapping, * Check whether the write can fit. */ di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; - if (mmap_page || + if (mmap_folio || end > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) return 0; @@ -1641,9 +1621,9 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, } int ocfs2_write_begin_nolock(struct address_space *mapping, - loff_t pos, unsigned len, ocfs2_write_type_t type, - struct folio **foliop, void **fsdata, - struct buffer_head *di_bh, struct page *mmap_page) + loff_t pos, unsigned len, ocfs2_write_type_t type, + struct folio **foliop, void **fsdata, + struct buffer_head *di_bh, struct folio *mmap_folio) { int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; unsigned int clusters_to_alloc, extents_to_split, clusters_need = 0; @@ -1666,7 +1646,7 @@ try_again: if (ocfs2_supports_inline_data(osb)) { ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len, - mmap_page, wc); + mmap_folio, wc); if (ret == 1) { ret = 0; goto success; @@ -1718,7 +1698,7 @@ try_again: (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), - pos, len, type, mmap_page, + pos, len, type, mmap_folio, clusters_to_alloc, extents_to_split); /* @@ -1789,21 +1769,21 @@ try_again: } /* - * Fill our page array first. That way we've grabbed enough so + * Fill our folio array first. That way we've grabbed enough so * that we can zero and flush if we error after adding the * extent. */ - ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len, - cluster_of_pages, mmap_page); + ret = ocfs2_grab_folios_for_write(mapping, wc, wc->w_cpos, pos, len, + cluster_of_pages, mmap_folio); if (ret) { /* - * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock - * the target page. In this case, we exit with no error and no target - * page. This will trigger the caller, page_mkwrite(), to re-try - * the operation. + * ocfs2_grab_folios_for_write() returns -EAGAIN if it + * could not lock the target folio. In this case, we exit + * with no error and no target folio. This will trigger + * the caller, page_mkwrite(), to re-try the operation. */ if (type == OCFS2_WRITE_MMAP && ret == -EAGAIN) { - BUG_ON(wc->w_target_page); + BUG_ON(wc->w_target_folio); ret = 0; goto out_quota; } @@ -1826,7 +1806,7 @@ try_again: success: if (foliop) - *foliop = page_folio(wc->w_target_page); + *foliop = wc->w_target_folio; *fsdata = wc; return 0; out_quota: @@ -1845,7 +1825,7 @@ out: * to VM code. */ if (wc->w_target_locked) - unlock_page(mmap_page); + folio_unlock(mmap_folio); ocfs2_free_write_ctxt(inode, wc); @@ -1924,18 +1904,15 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, struct ocfs2_dinode *di, struct ocfs2_write_ctxt *wc) { - void *kaddr; - if (unlikely(*copied < len)) { - if (!PageUptodate(wc->w_target_page)) { + if (!folio_test_uptodate(wc->w_target_folio)) { *copied = 0; return; } } - kaddr = kmap_atomic(wc->w_target_page); - memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied); - kunmap_atomic(kaddr); + memcpy_from_folio(di->id2.i_data.id_data + pos, wc->w_target_folio, + pos, *copied); trace_ocfs2_write_end_inline( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -1944,17 +1921,16 @@ static void ocfs2_write_end_inline(struct inode *inode, loff_t pos, le16_to_cpu(di->i_dyn_features)); } -int ocfs2_write_end_nolock(struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, void *fsdata) +int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos, + unsigned len, unsigned copied, void *fsdata) { int i, ret; - unsigned from, to, start = pos & (PAGE_SIZE - 1); + size_t from, to, start = pos & (PAGE_SIZE - 1); struct inode *inode = mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); struct ocfs2_write_ctxt *wc = fsdata; struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data; handle_t *handle = wc->w_handle; - struct page *tmppage; BUG_ON(!list_empty(&wc->w_unwritten_list)); @@ -1973,44 +1949,44 @@ int ocfs2_write_end_nolock(struct address_space *mapping, goto out_write_size; } - if (unlikely(copied < len) && wc->w_target_page) { + if (unlikely(copied < len) && wc->w_target_folio) { loff_t new_isize; - if (!PageUptodate(wc->w_target_page)) + if (!folio_test_uptodate(wc->w_target_folio)) copied = 0; new_isize = max_t(loff_t, i_size_read(inode), pos + copied); - if (new_isize > page_offset(wc->w_target_page)) - ocfs2_zero_new_buffers(wc->w_target_page, start+copied, + if (new_isize > folio_pos(wc->w_target_folio)) + ocfs2_zero_new_buffers(wc->w_target_folio, start+copied, start+len); else { /* - * When page is fully beyond new isize (data copy - * failed), do not bother zeroing the page. Invalidate + * When folio is fully beyond new isize (data copy + * failed), do not bother zeroing the folio. Invalidate * it instead so that writeback does not get confused * put page & buffer dirty bits into inconsistent * state. */ - block_invalidate_folio(page_folio(wc->w_target_page), - 0, PAGE_SIZE); + block_invalidate_folio(wc->w_target_folio, 0, + folio_size(wc->w_target_folio)); } } - if (wc->w_target_page) - flush_dcache_page(wc->w_target_page); + if (wc->w_target_folio) + flush_dcache_folio(wc->w_target_folio); - for(i = 0; i < wc->w_num_pages; i++) { - tmppage = wc->w_pages[i]; + for (i = 0; i < wc->w_num_folios; i++) { + struct folio *folio = wc->w_folios[i]; - /* This is the direct io target page. */ - if (tmppage == NULL) + /* This is the direct io target folio */ + if (folio == NULL) continue; - if (tmppage == wc->w_target_page) { + if (folio == wc->w_target_folio) { from = wc->w_target_from; to = wc->w_target_to; - BUG_ON(from > PAGE_SIZE || - to > PAGE_SIZE || + BUG_ON(from > folio_size(folio) || + to > folio_size(folio) || to < from); } else { /* @@ -2019,19 +1995,17 @@ int ocfs2_write_end_nolock(struct address_space *mapping, * to flush their entire range. */ from = 0; - to = PAGE_SIZE; + to = folio_size(folio); } - if (page_has_buffers(tmppage)) { + if (folio_buffers(folio)) { if (handle && ocfs2_should_order_data(inode)) { - loff_t start_byte = - ((loff_t)tmppage->index << PAGE_SHIFT) + - from; + loff_t start_byte = folio_pos(folio) + from; loff_t length = to - from; ocfs2_jbd2_inode_add_write(handle, inode, start_byte, length); } - block_commit_write(tmppage, from, to); + block_commit_write(folio, from, to); } } @@ -2060,7 +2034,7 @@ out: * this lock and will ask for the page lock when flushing the data. * put it here to preserve the unlock order. */ - ocfs2_unlock_pages(wc); + ocfs2_unlock_folios(wc); if (handle) ocfs2_commit_trans(osb, handle); diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 1d1b4b7edba0..114efc9111e4 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -8,16 +8,11 @@ #include <linux/fs.h> -handle_t *ocfs2_start_walk_page_trans(struct inode *inode, - struct page *page, - unsigned from, - unsigned to); - -int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, +int ocfs2_map_folio_blocks(struct folio *folio, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new); -void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages); +void ocfs2_unlock_and_free_folios(struct folio **folios, int num_folios); int walk_page_buffers( handle_t *handle, struct buffer_head *head, @@ -37,11 +32,11 @@ typedef enum { } ocfs2_write_type_t; int ocfs2_write_begin_nolock(struct address_space *mapping, - loff_t pos, unsigned len, ocfs2_write_type_t type, - struct folio **foliop, void **fsdata, - struct buffer_head *di_bh, struct page *mmap_page); + loff_t pos, unsigned len, ocfs2_write_type_t type, + struct folio **foliop, void **fsdata, + struct buffer_head *di_bh, struct folio *mmap_folio); -int ocfs2_read_inline_data(struct inode *inode, struct page *page, +int ocfs2_read_inline_data(struct inode *inode, struct folio *folio, struct buffer_head *di_bh); int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4200a0341343..724350925aff 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -3,6 +3,7 @@ * Copyright (C) 2004, 2005 Oracle. All rights reserved. */ +#include "linux/kstrtox.h" #include <linux/kernel.h> #include <linux/sched.h> #include <linux/jiffies.h> @@ -1020,7 +1021,7 @@ fire_callbacks: if (list_empty(&slot->ds_live_item)) goto out; - /* live nodes only go dead after enough consequtive missed + /* live nodes only go dead after enough consecutive missed * samples.. reset the missed counter whenever we see * activity */ if (slot->ds_equal_samples >= o2hb_dead_threshold || gen_changed) { @@ -1535,10 +1536,11 @@ static int o2hb_read_block_input(struct o2hb_region *reg, { unsigned long bytes; char *p = (char *)page; + int ret; - bytes = simple_strtoul(p, &p, 0); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; + ret = kstrtoul(p, 0, &bytes); + if (ret) + return ret; /* Heartbeat and fs min / max block sizes are the same. */ if (bytes > 4096 || bytes < 512) @@ -1622,13 +1624,14 @@ static ssize_t o2hb_region_blocks_store(struct config_item *item, struct o2hb_region *reg = to_o2hb_region(item); unsigned long tmp; char *p = (char *)page; + int ret; if (reg->hr_bdev_file) return -EINVAL; - tmp = simple_strtoul(p, &p, 0); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; + ret = kstrtoul(p, 0, &tmp); + if (ret) + return ret; if (tmp > O2NM_MAX_NODES || tmp == 0) return -ERANGE; @@ -1776,8 +1779,8 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (o2nm_this_node() == O2NM_MAX_NODES) return -EINVAL; - fd = simple_strtol(p, &p, 0); - if (!p || (*p && (*p != '\n'))) + ret = kstrtol(p, 0, &fd); + if (ret < 0) return -EINVAL; if (fd < 0 || fd >= INT_MAX) @@ -2136,10 +2139,11 @@ static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *ite { unsigned long tmp; char *p = (char *)page; + int ret; - tmp = simple_strtoul(p, &p, 10); - if (!p || (*p && (*p != '\n'))) - return -EINVAL; + ret = kstrtoul(p, 10, &tmp); + if (ret) + return ret; /* this will validate ranges for us. */ o2hb_dead_threshold_set((unsigned int) tmp); diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index b73fc42e46ff..630bd5a3dd0d 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -29,7 +29,7 @@ * just calling printk() so that this can eventually make its way through * relayfs along with the debugging messages. Everything else gets KERN_DEBUG. * The inline tests and macro dance give GCC the opportunity to quite cleverly - * only emit the appropriage printk() when the caller passes in a constant + * only emit the appropriate printk() when the caller passes in a constant * mask, as is almost always the case. * * All this bitmask nonsense is managed from the files under diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c index 8bf17231d7b7..bfb8b456876c 100644 --- a/fs/ocfs2/cluster/quorum.c +++ b/fs/ocfs2/cluster/quorum.c @@ -23,7 +23,7 @@ * race between when we see a node start heartbeating and when we connect * to it. * - * So nodes that are in this transtion put a hold on the quorum decision + * So nodes that are in this transition put a hold on the quorum decision * with a counter. As they fall out of this transition they drop the count * and if they're the last, they fire off the decision. */ @@ -189,7 +189,7 @@ static void o2quo_clear_hold(struct o2quo_state *qs, u8 node) } /* as a node comes up we delay the quorum decision until we know the fate of - * the connection. the hold will be droped in conn_up or hb_down. it might be + * the connection. the hold will be dropped in conn_up or hb_down. it might be * perpetuated by con_err until hb_down. if we already have a conn, we might * be dropping a hold that conn_up got. */ void o2quo_hb_up(u8 node) @@ -256,7 +256,7 @@ void o2quo_hb_still_up(u8 node) } /* This is analogous to hb_up. as a node's connection comes up we delay the - * quorum decision until we see it heartbeating. the hold will be droped in + * quorum decision until we see it heartbeating. the hold will be dropped in * hb_up or hb_down. it might be perpetuated by con_err until hb_down. if * it's already heartbeating we might be dropping a hold that conn_up got. * */ diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2b8fa3e782fb..fce9beb214f0 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -5,13 +5,13 @@ * * ---- * - * Callers for this were originally written against a very simple synchronus + * Callers for this were originally written against a very simple synchronous * API. This implementation reflects those simple callers. Some day I'm sure * we'll need to move to a more robust posting/callback mechanism. * * Transmit calls pass in kernel virtual addresses and block copying this into * the socket's tx buffers via a usual blocking sendmsg. They'll block waiting - * for a failed socket to timeout. TX callers can also pass in a poniter to an + * for a failed socket to timeout. TX callers can also pass in a pointer to an * 'int' which gets filled with an errno off the wire in response to the * message they send. * @@ -101,7 +101,7 @@ static struct socket *o2net_listen_sock; * o2net_wq. teardown detaches the callbacks before destroying the workqueue. * quorum work is queued as sock containers are shutdown.. stop_listening * tears down all the node's sock containers, preventing future shutdowns - * and queued quroum work, before canceling delayed quorum work and + * and queued quorum work, before canceling delayed quorum work and * destroying the work queue. */ static struct workqueue_struct *o2net_wq; @@ -724,7 +724,7 @@ static void o2net_shutdown_sc(struct work_struct *work) if (o2net_unregister_callbacks(sc->sc_sock->sk, sc)) { /* we shouldn't flush as we're in the thread, the * races with pending sc work structs are harmless */ - del_timer_sync(&sc->sc_idle_timeout); + timer_delete_sync(&sc->sc_idle_timeout); o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); sc_put(sc); kernel_sock_shutdown(sc->sc_sock, SHUT_RDWR); @@ -1419,7 +1419,7 @@ out: return ret; } -/* this work func is triggerd by data ready. it reads until it can read no +/* this work func is triggered by data ready. it reads until it can read no * more. it interprets 0, eof, as fatal. if data_ready hits while we're doing * our work the work struct will be marked and we'll be called again. */ static void o2net_rx_until_empty(struct work_struct *work) diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index a9b8688aaf30..1873bbbb7e5b 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -32,7 +32,8 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) } -static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) +static int ocfs2_dentry_revalidate(struct inode *dir, const struct qstr *name, + struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ @@ -44,8 +45,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); osb = OCFS2_SB(dentry->d_sb); - trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, - dentry->d_name.name); + trace_ocfs2_dentry_revalidate(dentry, name->len, name->name); /* For a negative dentry - * check the generation number of the parent and compare with the @@ -53,12 +53,8 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) */ if (inode == NULL) { unsigned long gen = (unsigned long) dentry->d_fsdata; - unsigned long pgen; - spin_lock(&dentry->d_lock); - pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; - spin_unlock(&dentry->d_lock); - trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, - dentry->d_name.name, + unsigned long pgen = OCFS2_I(dir)->ip_dir_lock_gen; + trace_ocfs2_dentry_revalidate_negative(name->len, name->name, pgen, gen); if (gen != pgen) goto bail; diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h index 847a52dcbe7d..1969db8ffa9c 100644 --- a/fs/ocfs2/dlm/dlmapi.h +++ b/fs/ocfs2/dlm/dlmapi.h @@ -118,7 +118,7 @@ struct dlm_lockstatus { #define LKM_VALBLK 0x00000100 /* lock value block request */ #define LKM_NOQUEUE 0x00000200 /* non blocking request */ #define LKM_CONVERT 0x00000400 /* conversion request */ -#define LKM_NODLCKWT 0x00000800 /* this lock wont deadlock (U) */ +#define LKM_NODLCKWT 0x00000800 /* this lock won't deadlock (U) */ #define LKM_UNLOCK 0x00001000 /* deallocate this lock */ #define LKM_CANCEL 0x00002000 /* cancel conversion request */ #define LKM_DEQALL 0x00004000 /* remove all locks held by proc (U) */ diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e9ef4e2b0e75..fe4fdd09bae3 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -14,6 +14,7 @@ #include <linux/spinlock.h> #include <linux/debugfs.h> #include <linux/export.h> +#include <linux/string_choices.h> #include "../cluster/heartbeat.h" #include "../cluster/nodemanager.h" @@ -90,12 +91,12 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res) buf, res->owner, res->state); printk(" last used: %lu, refcnt: %u, on purge list: %s\n", res->last_used, kref_read(&res->refs), - list_empty(&res->purge) ? "no" : "yes"); + str_no_yes(list_empty(&res->purge))); printk(" on dirty list: %s, on reco list: %s, " "migrating pending: %s\n", - list_empty(&res->dirty) ? "no" : "yes", - list_empty(&res->recovering) ? "no" : "yes", - res->migration_pending ? "yes" : "no"); + str_no_yes(list_empty(&res->dirty)), + str_no_yes(list_empty(&res->recovering)), + str_yes_no(res->migration_pending)); printk(" inflight locks: %d, asts reserved: %d\n", res->inflight_locks, atomic_read(&res->asts_reserved)); dlm_print_lockres_refmap(res); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index d610da8e2f24..86bb1a03bcc1 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -21,7 +21,7 @@ #include <linux/inet.h> #include <linux/spinlock.h> #include <linux/delay.h> - +#include <linux/string_choices.h> #include "../cluster/heartbeat.h" #include "../cluster/nodemanager.h" @@ -2859,7 +2859,7 @@ static int dlm_mark_lockres_migrating(struct dlm_ctxt *dlm, dlm_lockres_release_ast(dlm, res); mlog(0, "about to wait on migration_wq, dirty=%s\n", - res->state & DLM_LOCK_RES_DIRTY ? "yes" : "no"); + str_yes_no(res->state & DLM_LOCK_RES_DIRTY)); /* if the extra ref we just put was the final one, this * will pass thru immediately. otherwise, we need to wait * for the last ast to finish. */ @@ -2869,12 +2869,12 @@ again: msecs_to_jiffies(1000)); if (ret < 0) { mlog(0, "woken again: migrating? %s, dead? %s\n", - res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no", - test_bit(target, dlm->domain_map) ? "no":"yes"); + str_yes_no(res->state & DLM_LOCK_RES_MIGRATING), + str_no_yes(test_bit(target, dlm->domain_map))); } else { mlog(0, "all is well: migrating? %s, dead? %s\n", - res->state & DLM_LOCK_RES_MIGRATING ? "yes":"no", - test_bit(target, dlm->domain_map) ? "no":"yes"); + str_yes_no(res->state & DLM_LOCK_RES_MIGRATING), + str_no_yes(test_bit(target, dlm->domain_map))); } if (!dlm_migration_can_proceed(dlm, res, target)) { mlog(0, "trying again...\n"); diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 50da8af988c1..67fc62a49a76 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -22,7 +22,7 @@ #include <linux/timer.h> #include <linux/kthread.h> #include <linux/delay.h> - +#include <linux/string_choices.h> #include "../cluster/heartbeat.h" #include "../cluster/nodemanager.h" @@ -207,7 +207,7 @@ void dlm_complete_recovery_thread(struct dlm_ctxt *dlm) * 1) all recovery threads cluster wide will work on recovering * ONE node at a time * 2) negotiate who will take over all the locks for the dead node. - * thats right... ALL the locks. + * that's right... ALL the locks. * 3) once a new master is chosen, everyone scans all locks * and moves aside those mastered by the dead guy * 4) each of these locks should be locked until recovery is done @@ -581,8 +581,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) msecs_to_jiffies(1000)); mlog(0, "waited 1 sec for %u, " "dead? %s\n", ndata->node_num, - dlm_is_node_dead(dlm, ndata->node_num) ? - "yes" : "no"); + str_yes_no(dlm_is_node_dead(dlm, ndata->node_num))); } else { /* -ENOMEM on the other node */ mlog(0, "%s: node %u returned " @@ -677,7 +676,7 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) spin_unlock(&dlm_reco_state_lock); mlog(0, "pass #%d, all_nodes_done?: %s\n", ++pass, - all_nodes_done?"yes":"no"); + str_yes_no(all_nodes_done)); if (all_nodes_done) { int ret; @@ -1469,7 +1468,7 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data, * The first one is handled at the end of this function. The * other two are handled in the worker thread after locks have * been attached. Yes, we don't wait for purge time to match - * kref_init. The lockres will still have atleast one ref + * kref_init. The lockres will still have at least one ref * added because it is in the hash __dlm_insert_lockres() */ extra_refs++; @@ -1735,7 +1734,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&res->spinlock); } } else { - /* put.. incase we are not the master */ + /* put.. in case we are not the master */ spin_unlock(&res->spinlock); dlm_lockres_put(res); } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 7fc0e920eda7..5130ec44e5e1 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -20,6 +20,7 @@ #include <linux/module.h> #include <linux/fs.h> +#include <linux/fs_context.h> #include <linux/pagemap.h> #include <linux/types.h> #include <linux/slab.h> @@ -401,10 +402,10 @@ static struct inode *dlmfs_get_inode(struct inode *parent, * File creation. Allocate an inode, and we're done.. */ /* SMP-safe */ -static int dlmfs_mkdir(struct mnt_idmap * idmap, - struct inode * dir, - struct dentry * dentry, - umode_t mode) +static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap, + struct inode * dir, + struct dentry * dentry, + umode_t mode) { int status; struct inode *inode = NULL; @@ -447,7 +448,7 @@ static int dlmfs_mkdir(struct mnt_idmap * idmap, bail: if (status < 0) iput(inode); - return status; + return ERR_PTR(status); } static int dlmfs_create(struct mnt_idmap *idmap, @@ -506,9 +507,7 @@ bail: return status; } -static int dlmfs_fill_super(struct super_block * sb, - void * data, - int silent) +static int dlmfs_fill_super(struct super_block *sb, struct fs_context *fc) { sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_SIZE; @@ -556,17 +555,27 @@ static const struct inode_operations dlmfs_file_inode_operations = { .setattr = dlmfs_file_setattr, }; -static struct dentry *dlmfs_mount(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int dlmfs_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, dlmfs_fill_super); +} + +static const struct fs_context_operations dlmfs_context_ops = { + .get_tree = dlmfs_get_tree, +}; + +static int dlmfs_init_fs_context(struct fs_context *fc) { - return mount_nodev(fs_type, flags, data, dlmfs_fill_super); + fc->ops = &dlmfs_context_ops; + + return 0; } static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", - .mount = dlmfs_mount, .kill_sb = kill_litter_super, + .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 764ecbd5ad41..92a6149da9c1 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -19,6 +19,7 @@ #include <linux/delay.h> #include <linux/quotaops.h> #include <linux/sched/signal.h> +#include <linux/string_choices.h> #define MLOG_MASK_PREFIX ML_DLM_GLUE #include <cluster/masklog.h> @@ -794,7 +795,7 @@ void ocfs2_lock_res_free(struct ocfs2_lock_res *res) /* * Keep a list of processes who have interest in a lockres. - * Note: this is now only uesed for check recursive cluster locking. + * Note: this is now only used for check recursive cluster locking. */ static inline void ocfs2_add_holder(struct ocfs2_lock_res *lockres, struct ocfs2_lock_holder *oh) @@ -2529,30 +2530,28 @@ bail: /* * This is working around a lock inversion between tasks acquiring DLM - * locks while holding a page lock and the downconvert thread which - * blocks dlm lock acquiry while acquiring page locks. + * locks while holding a folio lock and the downconvert thread which + * blocks dlm lock acquiry while acquiring folio locks. * - * ** These _with_page variantes are only intended to be called from aop - * methods that hold page locks and return a very specific *positive* error + * ** These _with_folio variants are only intended to be called from aop + * methods that hold folio locks and return a very specific *positive* error * code that aop methods pass up to the VFS -- test for errors with != 0. ** * * The DLM is called such that it returns -EAGAIN if it would have * blocked waiting for the downconvert thread. In that case we unlock - * our page so the downconvert thread can make progress. Once we've + * our folio so the downconvert thread can make progress. Once we've * done this we have to return AOP_TRUNCATED_PAGE so the aop method * that called us can bubble that back up into the VFS who will then * immediately retry the aop call. */ -int ocfs2_inode_lock_with_page(struct inode *inode, - struct buffer_head **ret_bh, - int ex, - struct page *page) +int ocfs2_inode_lock_with_folio(struct inode *inode, + struct buffer_head **ret_bh, int ex, struct folio *folio) { int ret; ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); if (ret == -EAGAIN) { - unlock_page(page); + folio_unlock(folio); /* * If we can't get inode lock immediately, we should not return * directly here, since this will lead to a softlockup problem. @@ -2630,7 +2629,7 @@ void ocfs2_inode_unlock(struct inode *inode, } /* - * This _tracker variantes are introduced to deal with the recursive cluster + * This _tracker variants are introduced to deal with the recursive cluster * locking issue. The idea is to keep track of a lock holder on the stack of * the current process. If there's a lock holder on the stack, we know the * task context is already protected by cluster locking. Currently, they're @@ -2735,7 +2734,7 @@ void ocfs2_inode_unlock_tracker(struct inode *inode, struct ocfs2_lock_res *lockres; lockres = &OCFS2_I(inode)->ip_inode_lockres; - /* had_lock means that the currect process already takes the cluster + /* had_lock means that the current process already takes the cluster * lock previously. * If had_lock is 1, we have nothing to do here. * If had_lock is 0, we will release the lock. @@ -3802,9 +3801,9 @@ recheck: * set when the ast is received for an upconvert just before the * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast * on the heels of the ast, we want to delay the downconvert just - * enough to allow the up requestor to do its task. Because this + * enough to allow the up requester to do its task. Because this * lock is in the blocked queue, the lock will be downconverted - * as soon as the requestor is done with the lock. + * as soon as the requester is done with the lock. */ if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) goto leave_requeue; @@ -4339,7 +4338,7 @@ unqueue: ocfs2_schedule_blocked_lock(osb, lockres); mlog(ML_BASTS, "lockres %s, requeue = %s.\n", lockres->l_name, - ctl.requeue ? "yes" : "no"); + str_yes_no(ctl.requeue)); spin_unlock_irqrestore(&lockres->l_lock, flags); if (ctl.unblock_action != UNBLOCK_CONTINUE diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index e5da5809ed95..a3ebd7303ea2 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -137,10 +137,8 @@ int ocfs2_inode_lock_full_nested(struct inode *inode, int ex, int arg_flags, int subclass); -int ocfs2_inode_lock_with_page(struct inode *inode, - struct buffer_head **ret_bh, - int ex, - struct page *page); +int ocfs2_inode_lock_with_folio(struct inode *inode, + struct buffer_head **ret_bh, int ex, struct folio *folio); /* Variants without special locking class or flags */ #define ocfs2_inode_lock_full(i, r, e, f)\ ocfs2_inode_lock_full_nested(i, r, e, f, OI_LS_NORMAL) diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index f7672472fa82..930150ed5db1 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -435,6 +435,16 @@ static int ocfs2_get_clusters_nocache(struct inode *inode, } } + if (le16_to_cpu(el->l_next_free_rec) > le16_to_cpu(el->l_count)) { + ocfs2_error(inode->i_sb, + "Inode %lu has an invalid extent (next_free_rec %u, count %u)\n", + inode->i_ino, + le16_to_cpu(el->l_next_free_rec), + le16_to_cpu(el->l_count)); + ret = -EROFS; + goto out; + } + i = ocfs2_search_extent_list(el, v_cluster); if (i == -1) { /* diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 957ced628eb1..2056cf08ac1e 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -782,11 +782,11 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, goto out_commit_trans; } - /* Get the offsets within the page that we want to zero */ - zero_from = abs_from & (PAGE_SIZE - 1); - zero_to = abs_to & (PAGE_SIZE - 1); + /* Get the offsets within the folio that we want to zero */ + zero_from = offset_in_folio(folio, abs_from); + zero_to = offset_in_folio(folio, abs_to); if (!zero_to) - zero_to = PAGE_SIZE; + zero_to = folio_size(folio); trace_ocfs2_write_zero_page( (unsigned long long)OCFS2_I(inode)->ip_blkno, @@ -813,7 +813,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* must not update i_size! */ - block_commit_write(&folio->page, block_start + 1, block_start + 1); + block_commit_write(folio, block_start + 1, block_start + 1); } /* diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 2cc5c99fe941..12e5d1f73325 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -200,6 +200,20 @@ bail: return inode; } +static int ocfs2_dinode_has_extents(struct ocfs2_dinode *di) +{ + /* inodes flagged with other stuff in id2 */ + if (di->i_flags & (OCFS2_SUPER_BLOCK_FL | OCFS2_LOCAL_ALLOC_FL | + OCFS2_CHAIN_FL | OCFS2_DEALLOC_FL)) + return 0; + /* i_flags doesn't indicate when id2 is a fast symlink */ + if (S_ISLNK(di->i_mode) && di->i_size && di->i_clusters == 0) + return 0; + if (di->i_dyn_features & OCFS2_INLINE_DATA_FL) + return 0; + + return 1; +} /* * here's how inodes get read from disk: @@ -1122,7 +1136,7 @@ static void ocfs2_clear_inode(struct inode *inode) dquot_drop(inode); - /* To preven remote deletes we hold open lock before, now it + /* To prevent remote deletes we hold open lock before, now it * is time to unlock PR and EX open locks. */ ocfs2_open_unlock(inode); @@ -1437,7 +1451,7 @@ static int ocfs2_filecheck_validate_inode_block(struct super_block *sb, * Call ocfs2_validate_meta_ecc() first since it has ecc repair * function, but we should not return error immediately when ecc * validation fails, because the reason is quite likely the invalid - * inode number inputed. + * inode number inputted. */ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check); if (rc) { @@ -1547,6 +1561,16 @@ static int ocfs2_filecheck_repair_inode_block(struct super_block *sb, le32_to_cpu(di->i_fs_generation)); } + if (ocfs2_dinode_has_extents(di) && + le16_to_cpu(di->id2.i_list.l_next_free_rec) > le16_to_cpu(di->id2.i_list.l_count)) { + di->id2.i_list.l_next_free_rec = di->id2.i_list.l_count; + changed = 1; + mlog(ML_ERROR, + "Filecheck: reset dinode #%llu: l_next_free_rec to %u\n", + (unsigned long long)bh->b_blocknr, + le16_to_cpu(di->id2.i_list.l_next_free_rec)); + } + if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) { ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check); mark_buffer_dirty(bh); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 71beef7f8a60..7ae96fb8807a 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -796,7 +796,7 @@ bail: /* * OCFS2_IOC_INFO handles an array of requests passed from userspace. * - * ocfs2_info_handle() recevies a large info aggregation, grab and + * ocfs2_info_handle() receives a large info aggregation, grab and * validate the request count from header, then break it into small * pieces, later specific handlers can handle them one by one. * diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 1bf188b6866a..e5f58ff2175f 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } -void ocfs2_recovery_exit(struct ocfs2_super *osb) +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) { - struct ocfs2_recovery_map *rm; - - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); +} + +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1249,7 +1271,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) @@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1569,27 +1603,29 @@ bail: ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) @@ -1956,7 +1992,7 @@ bail: /* * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some - * randomness to the timeout to minimize multple nodes firing the timer at the + * randomness to the timeout to minimize multiple nodes firing the timer at the * same time. */ static inline unsigned long ocfs2_orphan_scan_timeout(void) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0..6397170f302f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 6ef4cb045ccd..6a314e9f2b49 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -44,13 +44,13 @@ static vm_fault_t ocfs2_fault(struct vm_fault *vmf) } static vm_fault_t __ocfs2_page_mkwrite(struct file *file, - struct buffer_head *di_bh, struct page *page) + struct buffer_head *di_bh, struct folio *folio) { int err; vm_fault_t ret = VM_FAULT_NOPAGE; struct inode *inode = file_inode(file); struct address_space *mapping = inode->i_mapping; - loff_t pos = page_offset(page); + loff_t pos = folio_pos(folio); unsigned int len = PAGE_SIZE; pgoff_t last_index; struct folio *locked_folio = NULL; @@ -72,9 +72,9 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, * * Let VM retry with these cases. */ - if ((page->mapping != inode->i_mapping) || - (!PageUptodate(page)) || - (page_offset(page) >= size)) + if ((folio->mapping != inode->i_mapping) || + !folio_test_uptodate(folio) || + (pos >= size)) goto out; /* @@ -87,11 +87,11 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, * worry about ocfs2_write_begin() skipping some buffer reads * because the "write" would invalidate their data. */ - if (page->index == last_index) + if (folio->index == last_index) len = ((size - 1) & ~PAGE_MASK) + 1; err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, - &locked_folio, &fsdata, di_bh, page); + &locked_folio, &fsdata, di_bh, folio); if (err) { if (err != -ENOSPC) mlog_errno(err); @@ -112,7 +112,7 @@ out: static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf) { - struct page *page = vmf->page; + struct folio *folio = page_folio(vmf->page); struct inode *inode = file_inode(vmf->vma->vm_file); struct buffer_head *di_bh = NULL; sigset_t oldset; @@ -141,7 +141,7 @@ static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf) */ down_write(&OCFS2_I(inode)->ip_alloc_sem); - ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, page); + ret = __ocfs2_page_mkwrite(vmf->vma->vm_file, di_bh, folio); up_write(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index f9d6a4f9ca92..369c7d27befd 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -492,7 +492,7 @@ static int ocfs2_validate_and_adjust_move_goal(struct inode *inode, bg = (struct ocfs2_group_desc *)gd_bh->b_data; /* - * moving goal is not allowd to start with a group desc blok(#0 blk) + * moving goal is not allowed to start with a group desc blok(#0 blk) * let's compromise to the latter cluster. */ if (range->me_goal == le64_to_cpu(bg->bg_blkno)) @@ -658,7 +658,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, /* * probe the victim cluster group to find a proper - * region to fit wanted movement, it even will perfrom + * region to fit wanted movement, it even will perform * a best-effort attempt by compromising to a threshold * around the goal. */ @@ -920,7 +920,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) } /* - * rememer ip_xattr_sem also needs to be held if necessary + * remember ip_xattr_sem also needs to be held if necessary */ down_write(&OCFS2_I(inode)->ip_alloc_sem); @@ -1022,7 +1022,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->range = ⦥ /* - * ok, the default theshold for the defragmentation + * ok, the default threshold for the defragmentation * is 1M, since our maximum clustersize was 1M also. * any thought? */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 5550f8afa438..99278c8f0e24 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -508,7 +508,6 @@ static int __ocfs2_mknod_locked(struct inode *dir, struct inode *inode, dev_t dev, struct buffer_head **new_fe_bh, - struct buffer_head *parent_fe_bh, handle_t *handle, struct ocfs2_alloc_context *inode_ac, u64 fe_blkno, u64 suballoc_loc, u16 suballoc_bit) @@ -641,14 +640,14 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb, } return __ocfs2_mknod_locked(dir, inode, dev, new_fe_bh, - parent_fe_bh, handle, inode_ac, - fe_blkno, suballoc_loc, suballoc_bit); + handle, inode_ac, fe_blkno, + suballoc_loc, suballoc_bit); } -static int ocfs2_mkdir(struct mnt_idmap *idmap, - struct inode *dir, - struct dentry *dentry, - umode_t mode) +static struct dentry *ocfs2_mkdir(struct mnt_idmap *idmap, + struct inode *dir, + struct dentry *dentry, + umode_t mode) { int ret; @@ -658,7 +657,7 @@ static int ocfs2_mkdir(struct mnt_idmap *idmap, if (ret) mlog_errno(ret); - return ret; + return ERR_PTR(ret); } static int ocfs2_create(struct mnt_idmap *idmap, @@ -2576,7 +2575,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir, clear_nlink(inode); /* do the real work now. */ status = __ocfs2_mknod_locked(dir, inode, - 0, &new_di_bh, parent_di_bh, handle, + 0, &new_di_bh, handle, inode_ac, di_blkno, suballoc_loc, suballoc_bit); if (status < 0) { diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 51c52768132d..6aaa94c554c1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +385,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index c93689b568fe..e8e94599e907 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -132,7 +132,7 @@ * well as the name of the cluster being joined. * mount.ocfs2 must pass in a matching stack name. * - * If not set, the classic stack will be used. This is compatbile with + * If not set, the classic stack will be used. This is compatible with * all older versions. */ #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 @@ -143,7 +143,7 @@ /* Support for extended attributes */ #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 -/* Support for indexed directores */ +/* Support for indexed directories */ #define OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS 0x0400 /* Metadata checksum and error correction */ @@ -156,7 +156,7 @@ #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 /* - * Incompat bit to indicate useable clusterinfo with stackflags for all + * Incompat bit to indicate usable clusterinfo with stackflags for all * cluster stacks (userspace adnd o2cb). If this bit is set, * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. */ @@ -1083,7 +1083,7 @@ struct ocfs2_xattr_block { struct ocfs2_xattr_header xb_header; /* xattr header if this block contains xattr */ struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this - block cotains xattr + block contains xattr tree. */ } xb_attrs; }; diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 9680797bc531..2de2f8733283 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -215,7 +215,7 @@ struct ocfs2_move_extents { movement less likely to fail, may make fs even more fragmented */ -#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmenation +#define OCFS2_MOVE_EXT_FL_COMPLETE (0x00000004) /* Move or defragmentation completely gets done. */ diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h index 8ac357ce6a30..9b234c03d693 100644 --- a/fs/ocfs2/ocfs2_lockid.h +++ b/fs/ocfs2/ocfs2_lockid.h @@ -93,7 +93,7 @@ static char *ocfs2_lock_type_strings[] = { [OCFS2_LOCK_TYPE_DATA] = "Data", [OCFS2_LOCK_TYPE_SUPER] = "Super", [OCFS2_LOCK_TYPE_RENAME] = "Rename", - /* Need to differntiate from [R]ename.. serializing writes is the + /* Need to differentiate from [R]ename.. serializing writes is the * important job it does, anyway. */ [OCFS2_LOCK_TYPE_RW] = "Write/Read", [OCFS2_LOCK_TYPE_DENTRY] = "Dentry", diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 0511c69c9fde..54ed1495de9a 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -1658,34 +1658,34 @@ TRACE_EVENT(ocfs2_remount, ); TRACE_EVENT(ocfs2_fill_super, - TP_PROTO(void *sb, void *data, int silent), - TP_ARGS(sb, data, silent), + TP_PROTO(void *sb, void *fc, int silent), + TP_ARGS(sb, fc, silent), TP_STRUCT__entry( __field(void *, sb) - __field(void *, data) + __field(void *, fc) __field(int, silent) ), TP_fast_assign( __entry->sb = sb; - __entry->data = data; + __entry->fc = fc; __entry->silent = silent; ), TP_printk("%p %p %d", __entry->sb, - __entry->data, __entry->silent) + __entry->fc, __entry->silent) ); TRACE_EVENT(ocfs2_parse_options, - TP_PROTO(int is_remount, char *options), - TP_ARGS(is_remount, options), + TP_PROTO(int is_remount, const char *option), + TP_ARGS(is_remount, option), TP_STRUCT__entry( __field(int, is_remount) - __string(options, options) + __string(option, option) ), TP_fast_assign( __entry->is_remount = is_remount; - __assign_str(options); + __assign_str(option); ), - TP_printk("%d %s", __entry->is_remount, __get_str(options)) + TP_printk("%d %s", __entry->is_remount, __get_str(option)) ); DEFINE_OCFS2_POINTER_EVENT(ocfs2_put_super); diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 15d9acd456ec..e85b1ccf81be 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -273,7 +273,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type, if (new) memset(bh->b_data, 0, sb->s_blocksize); memcpy(bh->b_data + offset, data, len); - flush_dcache_page(bh->b_page); + flush_dcache_folio(bh->b_folio); set_buffer_uptodate(bh); unlock_buffer(bh); ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh); diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2956d888c131..de7f12858729 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -588,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -600,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -677,8 +674,7 @@ out_put: break; } out: - up_read(&sb->s_umount); - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } @@ -843,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 004393b13c0a..8f732742b26e 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -2420,7 +2420,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, * * If we will insert a new one, this is easy and only happens * during adding refcounted flag to the extent, so we don't - * have a chance of spliting. We just need one record. + * have a chance of splitting. We just need one record. * * If the refcount rec already exists, that would be a little * complicated. we may have to: @@ -2610,11 +2610,11 @@ static inline unsigned int ocfs2_cow_align_length(struct super_block *sb, /* * Calculate out the start and number of virtual clusters we need to CoW. * - * cpos is vitual start cluster position we want to do CoW in a + * cpos is virtual start cluster position we want to do CoW in a * file and write_len is the cluster length. * max_cpos is the place where we want to stop CoW intentionally. * - * Normal we will start CoW from the beginning of extent record cotaining cpos. + * Normal we will start CoW from the beginning of extent record containing cpos. * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we * get good I/O from the resulting extent tree. */ @@ -2902,7 +2902,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, int ret = 0, partial; struct super_block *sb = inode->i_sb; u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); - struct page *page; pgoff_t page_index; unsigned int from, to; loff_t offset, end, map_end; @@ -2921,6 +2920,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, end = i_size_read(inode); while (offset < end) { + struct folio *folio; page_index = offset >> PAGE_SHIFT; map_end = ((loff_t)page_index + 1) << PAGE_SHIFT; if (map_end > end) @@ -2933,9 +2933,10 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle, to = map_end & (PAGE_SIZE - 1); retry: - page = find_or_create_page(mapping, page_index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; + folio = __filemap_get_folio(mapping, page_index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); mlog_errno(ret); break; } @@ -2945,9 +2946,9 @@ retry: * page, so write it back. */ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize) { - if (PageDirty(page)) { - unlock_page(page); - put_page(page); + if (folio_test_dirty(folio)) { + folio_unlock(folio); + folio_put(folio); ret = filemap_write_and_wait_range(mapping, offset, map_end - 1); @@ -2955,9 +2956,7 @@ retry: } } - if (!PageUptodate(page)) { - struct folio *folio = page_folio(page); - + if (!folio_test_uptodate(folio)) { ret = block_read_full_folio(folio, ocfs2_get_block); if (ret) { mlog_errno(ret); @@ -2966,8 +2965,8 @@ retry: folio_lock(folio); } - if (page_has_buffers(page)) { - ret = walk_page_buffers(handle, page_buffers(page), + if (folio_buffers(folio)) { + ret = walk_page_buffers(handle, folio_buffers(folio), from, to, &partial, ocfs2_clear_cow_buffer); if (ret) { @@ -2976,14 +2975,12 @@ retry: } } - ocfs2_map_and_dirty_page(inode, - handle, from, to, - page, 0, &new_block); - mark_page_accessed(page); + ocfs2_map_and_dirty_folio(inode, handle, from, to, + folio, 0, &new_block); + folio_mark_accessed(folio); unlock: - unlock_page(page); - put_page(page); - page = NULL; + folio_unlock(folio); + folio_put(folio); offset = map_end; if (ret) break; diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h index ec8101ef5717..4fce17180342 100644 --- a/fs/ocfs2/reservations.h +++ b/fs/ocfs2/reservations.h @@ -31,7 +31,7 @@ struct ocfs2_alloc_reservation { #define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */ #define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be - * destroyed immedately after use */ + * destroyed immediately after use */ #define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed * directory btree */ @@ -125,7 +125,7 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, /** * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used. * @resmap: reservations bitmap - * @resv: optional reservation to recalulate based on new bitmap + * @resv: optional reservation to recalculate based on new bitmap * @cstart: start of allocation in clusters * @clen: end of allocation in clusters. * diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 10157d9d7a9c..f58e891aa2da 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -227,7 +227,7 @@ static int o2cb_dlm_lock_status(struct ocfs2_dlm_lksb *lksb) } /* - * o2dlm aways has a "valid" LVB. If the dlm loses track of the LVB + * o2dlm always has a "valid" LVB. If the dlm loses track of the LVB * contents, it will zero out the LVB. Thus the caller can always trust * the contents. */ diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 20aa37b67cfb..ddd761cf44c8 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -650,7 +650,7 @@ error: * and easier to preserve the name. */ -static struct ctl_table ocfs2_nm_table[] = { +static const struct ctl_table ocfs2_nm_table[] = { { .procname = "hb_ctl_path", .data = ocfs2_hb_ctl_path, diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 02ab072c528a..5486a6dce70a 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -210,7 +210,7 @@ struct ocfs2_stack_operations { struct file_lock *fl); /* - * This is an optoinal debugging hook. If provided, the + * This is an optional debugging hook. If provided, the * stack can dump debugging information about this lock. */ void (*dump_lksb)(struct ocfs2_dlm_lksb *lksb); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index f7b483f0de2a..6ac4dcd54588 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -698,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1794,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1819,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1982,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -2022,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2365,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index b481b834857d..bcf2ed4a8631 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 1e87554f6f41..3d2533950bae 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -19,10 +19,10 @@ #include <linux/blkdev.h> #include <linux/socket.h> #include <linux/inet.h> -#include <linux/parser.h> +#include <linux/fs_parser.h> +#include <linux/fs_context.h> #include <linux/crc32.h> #include <linux/debugfs.h> -#include <linux/mount.h> #include <linux/seq_file.h> #include <linux/quotaops.h> #include <linux/signal.h> @@ -80,17 +80,15 @@ struct mount_options unsigned int resv_level; int dir_resv_level; char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; + bool user_stack; }; -static int ocfs2_parse_options(struct super_block *sb, char *options, - struct mount_options *mopt, - int is_remount); +static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options); static int ocfs2_show_options(struct seq_file *s, struct dentry *root); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); -static int ocfs2_remount(struct super_block *sb, int *flags, char *data); static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err); static int ocfs2_initialize_mem_caches(void); static void ocfs2_free_mem_caches(void); @@ -135,7 +133,6 @@ static const struct super_operations ocfs2_sops = { .evict_inode = ocfs2_evict_inode, .sync_fs = ocfs2_sync_fs, .put_super = ocfs2_put_super, - .remount_fs = ocfs2_remount, .show_options = ocfs2_show_options, .quota_read = ocfs2_quota_read, .quota_write = ocfs2_quota_write, @@ -144,15 +141,10 @@ static const struct super_operations ocfs2_sops = { enum { Opt_barrier, - Opt_err_panic, - Opt_err_ro, + Opt_errors, Opt_intr, - Opt_nointr, - Opt_hb_none, - Opt_hb_local, - Opt_hb_global, - Opt_data_ordered, - Opt_data_writeback, + Opt_heartbeat, + Opt_data, Opt_atime_quantum, Opt_slot, Opt_commit, @@ -160,52 +152,64 @@ enum { Opt_localflocks, Opt_stack, Opt_user_xattr, - Opt_nouser_xattr, Opt_inode64, Opt_acl, - Opt_noacl, Opt_usrquota, Opt_grpquota, - Opt_coherency_buffered, - Opt_coherency_full, + Opt_coherency, Opt_resv_level, Opt_dir_resv_level, Opt_journal_async_commit, - Opt_err_cont, - Opt_err, }; -static const match_table_t tokens = { - {Opt_barrier, "barrier=%u"}, - {Opt_err_panic, "errors=panic"}, - {Opt_err_ro, "errors=remount-ro"}, - {Opt_intr, "intr"}, - {Opt_nointr, "nointr"}, - {Opt_hb_none, OCFS2_HB_NONE}, - {Opt_hb_local, OCFS2_HB_LOCAL}, - {Opt_hb_global, OCFS2_HB_GLOBAL}, - {Opt_data_ordered, "data=ordered"}, - {Opt_data_writeback, "data=writeback"}, - {Opt_atime_quantum, "atime_quantum=%u"}, - {Opt_slot, "preferred_slot=%u"}, - {Opt_commit, "commit=%u"}, - {Opt_localalloc, "localalloc=%d"}, - {Opt_localflocks, "localflocks"}, - {Opt_stack, "cluster_stack=%s"}, - {Opt_user_xattr, "user_xattr"}, - {Opt_nouser_xattr, "nouser_xattr"}, - {Opt_inode64, "inode64"}, - {Opt_acl, "acl"}, - {Opt_noacl, "noacl"}, - {Opt_usrquota, "usrquota"}, - {Opt_grpquota, "grpquota"}, - {Opt_coherency_buffered, "coherency=buffered"}, - {Opt_coherency_full, "coherency=full"}, - {Opt_resv_level, "resv_level=%u"}, - {Opt_dir_resv_level, "dir_resv_level=%u"}, - {Opt_journal_async_commit, "journal_async_commit"}, - {Opt_err_cont, "errors=continue"}, - {Opt_err, NULL} +static const struct constant_table ocfs2_param_errors[] = { + {"panic", OCFS2_MOUNT_ERRORS_PANIC}, + {"remount-ro", OCFS2_MOUNT_ERRORS_ROFS}, + {"continue", OCFS2_MOUNT_ERRORS_CONT}, + {} +}; + +static const struct constant_table ocfs2_param_heartbeat[] = { + {"local", OCFS2_MOUNT_HB_LOCAL}, + {"none", OCFS2_MOUNT_HB_NONE}, + {"global", OCFS2_MOUNT_HB_GLOBAL}, + {} +}; + +static const struct constant_table ocfs2_param_data[] = { + {"writeback", OCFS2_MOUNT_DATA_WRITEBACK}, + {"ordered", 0}, + {} +}; + +static const struct constant_table ocfs2_param_coherency[] = { + {"buffered", OCFS2_MOUNT_COHERENCY_BUFFERED}, + {"full", 0}, + {} +}; + +static const struct fs_parameter_spec ocfs2_param_spec[] = { + fsparam_u32 ("barrier", Opt_barrier), + fsparam_enum ("errors", Opt_errors, ocfs2_param_errors), + fsparam_flag_no ("intr", Opt_intr), + fsparam_enum ("heartbeat", Opt_heartbeat, ocfs2_param_heartbeat), + fsparam_enum ("data", Opt_data, ocfs2_param_data), + fsparam_u32 ("atime_quantum", Opt_atime_quantum), + fsparam_u32 ("preferred_slot", Opt_slot), + fsparam_u32 ("commit", Opt_commit), + fsparam_s32 ("localalloc", Opt_localalloc), + fsparam_flag ("localflocks", Opt_localflocks), + fsparam_string ("cluster_stack", Opt_stack), + fsparam_flag_no ("user_xattr", Opt_user_xattr), + fsparam_flag ("inode64", Opt_inode64), + fsparam_flag_no ("acl", Opt_acl), + fsparam_flag ("usrquota", Opt_usrquota), + fsparam_flag ("grpquota", Opt_grpquota), + fsparam_enum ("coherency", Opt_coherency, ocfs2_param_coherency), + fsparam_u32 ("resv_level", Opt_resv_level), + fsparam_u32 ("dir_resv_level", Opt_dir_resv_level), + fsparam_flag ("journal_async_commit", Opt_journal_async_commit), + {} }; #ifdef CONFIG_DEBUG_FS @@ -600,32 +604,32 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits, return (((unsigned long long)bytes) << bitshift) - trim; } -static int ocfs2_remount(struct super_block *sb, int *flags, char *data) +static int ocfs2_reconfigure(struct fs_context *fc) { int incompat_features; int ret = 0; - struct mount_options parsed_options; + struct mount_options *parsed_options = fc->fs_private; + struct super_block *sb = fc->root->d_sb; struct ocfs2_super *osb = OCFS2_SB(sb); u32 tmp; sync_filesystem(sb); - if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || - !ocfs2_check_set_options(sb, &parsed_options)) { + if (!ocfs2_check_set_options(sb, parsed_options)) { ret = -EINVAL; goto out; } tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; - if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { + if ((osb->s_mount_opt & tmp) != (parsed_options->mount_opt & tmp)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; } if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != - (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { + (parsed_options->mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change data mode on remount\n"); goto out; @@ -634,16 +638,16 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) /* Probably don't want this on remount; it might * mess with other nodes */ if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && - (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) { + (parsed_options->mount_opt & OCFS2_MOUNT_INODE64)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); goto out; } /* We're going to/from readonly mode. */ - if ((bool)(*flags & SB_RDONLY) != sb_rdonly(sb)) { + if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) { /* Disable quota accounting before remounting RO */ - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { ret = ocfs2_susp_quotas(osb, 0); if (ret < 0) goto out; @@ -657,7 +661,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto unlock_osb; } - if (*flags & SB_RDONLY) { + if (fc->sb_flags & SB_RDONLY) { sb->s_flags |= SB_RDONLY; osb->osb_flags |= OCFS2_OSB_SOFT_RO; } else { @@ -678,11 +682,11 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) sb->s_flags &= ~SB_RDONLY; osb->osb_flags &= ~OCFS2_OSB_SOFT_RO; } - trace_ocfs2_remount(sb->s_flags, osb->osb_flags, *flags); + trace_ocfs2_remount(sb->s_flags, osb->osb_flags, fc->sb_flags); unlock_osb: spin_unlock(&osb->osb_lock); /* Enable quota accounting after remounting RW */ - if (!ret && !(*flags & SB_RDONLY)) { + if (!ret && !(fc->sb_flags & SB_RDONLY)) { if (sb_any_quota_suspended(sb)) ret = ocfs2_susp_quotas(osb, 1); else @@ -701,11 +705,11 @@ unlock_osb: if (!ret) { /* Only save off the new mount options in case of a successful * remount. */ - osb->s_mount_opt = parsed_options.mount_opt; - osb->s_atime_quantum = parsed_options.atime_quantum; - osb->preferred_slot = parsed_options.slot; - if (parsed_options.commit_interval) - osb->osb_commit_interval = parsed_options.commit_interval; + osb->s_mount_opt = parsed_options->mount_opt; + osb->s_atime_quantum = parsed_options->atime_quantum; + osb->preferred_slot = parsed_options->slot; + if (parsed_options->commit_interval) + osb->osb_commit_interval = parsed_options->commit_interval; if (!ocfs2_is_hard_readonly(osb)) ocfs2_set_journal_params(osb); @@ -966,23 +970,18 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb) } } -static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) +static int ocfs2_fill_super(struct super_block *sb, struct fs_context *fc) { struct dentry *root; int status, sector_size; - struct mount_options parsed_options; + struct mount_options *parsed_options = fc->fs_private; struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; char nodestr[12]; struct ocfs2_blockcheck_stats stats; - trace_ocfs2_fill_super(sb, data, silent); - - if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { - status = -EINVAL; - goto out; - } + trace_ocfs2_fill_super(sb, fc, fc->sb_flags & SB_SILENT); /* probe for superblock */ status = ocfs2_sb_probe(sb, &bh, §or_size, &stats); @@ -999,24 +998,24 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb = OCFS2_SB(sb); - if (!ocfs2_check_set_options(sb, &parsed_options)) { + if (!ocfs2_check_set_options(sb, parsed_options)) { status = -EINVAL; goto out_super; } - osb->s_mount_opt = parsed_options.mount_opt; - osb->s_atime_quantum = parsed_options.atime_quantum; - osb->preferred_slot = parsed_options.slot; - osb->osb_commit_interval = parsed_options.commit_interval; + osb->s_mount_opt = parsed_options->mount_opt; + osb->s_atime_quantum = parsed_options->atime_quantum; + osb->preferred_slot = parsed_options->slot; + osb->osb_commit_interval = parsed_options->commit_interval; - ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt); - osb->osb_resv_level = parsed_options.resv_level; - osb->osb_dir_resv_level = parsed_options.resv_level; - if (parsed_options.dir_resv_level == -1) - osb->osb_dir_resv_level = parsed_options.resv_level; + ocfs2_la_set_sizes(osb, parsed_options->localalloc_opt); + osb->osb_resv_level = parsed_options->resv_level; + osb->osb_dir_resv_level = parsed_options->resv_level; + if (parsed_options->dir_resv_level == -1) + osb->osb_dir_resv_level = parsed_options->resv_level; else - osb->osb_dir_resv_level = parsed_options.dir_resv_level; + osb->osb_dir_resv_level = parsed_options->dir_resv_level; - status = ocfs2_verify_userspace_stack(osb, &parsed_options); + status = ocfs2_verify_userspace_stack(osb, parsed_options); if (status) goto out_super; @@ -1180,27 +1179,72 @@ out: return status; } -static struct dentry *ocfs2_mount(struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) +static int ocfs2_get_tree(struct fs_context *fc) +{ + return get_tree_bdev(fc, ocfs2_fill_super); +} + +static void ocfs2_free_fc(struct fs_context *fc) { - return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); + kfree(fc->fs_private); +} + +static const struct fs_context_operations ocfs2_context_ops = { + .parse_param = ocfs2_parse_param, + .get_tree = ocfs2_get_tree, + .reconfigure = ocfs2_reconfigure, + .free = ocfs2_free_fc, +}; + +static int ocfs2_init_fs_context(struct fs_context *fc) +{ + struct mount_options *mopt; + + mopt = kzalloc(sizeof(struct mount_options), GFP_KERNEL); + if (!mopt) + return -EINVAL; + + mopt->commit_interval = 0; + mopt->mount_opt = OCFS2_MOUNT_NOINTR; + mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; + mopt->slot = OCFS2_INVALID_SLOT; + mopt->localalloc_opt = -1; + mopt->cluster_stack[0] = '\0'; + mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; + mopt->dir_resv_level = -1; + + fc->fs_private = mopt; + fc->ops = &ocfs2_context_ops; + + return 0; } static struct file_system_type ocfs2_fs_type = { .owner = THIS_MODULE, .name = "ocfs2", - .mount = ocfs2_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, - .next = NULL + .next = NULL, + .init_fs_context = ocfs2_init_fs_context, + .parameters = ocfs2_param_spec, }; MODULE_ALIAS_FS("ocfs2"); static int ocfs2_check_set_options(struct super_block *sb, struct mount_options *options) { + if (options->user_stack == 0) { + u32 tmp; + + /* Ensure only one heartbeat mode */ + tmp = options->mount_opt & (OCFS2_MOUNT_HB_LOCAL | + OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE); + if (hweight32(tmp) != 1) { + mlog(ML_ERROR, "Invalid heartbeat mount options\n"); + return 0; + } + } if (options->mount_opt & OCFS2_MOUNT_USRQUOTA && !OCFS2_HAS_RO_COMPAT_FEATURE(sb, OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { @@ -1232,241 +1276,142 @@ static int ocfs2_check_set_options(struct super_block *sb, return 1; } -static int ocfs2_parse_options(struct super_block *sb, - char *options, - struct mount_options *mopt, - int is_remount) +static int ocfs2_parse_param(struct fs_context *fc, struct fs_parameter *param) { - int status, user_stack = 0; - char *p; - u32 tmp; - int token, option; - substring_t args[MAX_OPT_ARGS]; - - trace_ocfs2_parse_options(is_remount, options ? options : "(none)"); - - mopt->commit_interval = 0; - mopt->mount_opt = OCFS2_MOUNT_NOINTR; - mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; - mopt->slot = OCFS2_INVALID_SLOT; - mopt->localalloc_opt = -1; - mopt->cluster_stack[0] = '\0'; - mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL; - mopt->dir_resv_level = -1; - - if (!options) { - status = 1; - goto bail; - } - - while ((p = strsep(&options, ",")) != NULL) { - if (!*p) - continue; - - token = match_token(p, tokens, args); - switch (token) { - case Opt_hb_local: - mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; - break; - case Opt_hb_none: - mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; - break; - case Opt_hb_global: - mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; - break; - case Opt_barrier: - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option) - mopt->mount_opt |= OCFS2_MOUNT_BARRIER; - else - mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; - break; - case Opt_intr: - mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; - break; - case Opt_nointr: + struct fs_parse_result result; + int opt; + struct mount_options *mopt = fc->fs_private; + bool is_remount = (fc->purpose & FS_CONTEXT_FOR_RECONFIGURE); + + trace_ocfs2_parse_options(is_remount, param->key); + + opt = fs_parse(fc, ocfs2_param_spec, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_heartbeat: + mopt->mount_opt |= result.uint_32; + break; + case Opt_barrier: + if (result.uint_32) + mopt->mount_opt |= OCFS2_MOUNT_BARRIER; + else + mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; + break; + case Opt_intr: + if (result.negated) mopt->mount_opt |= OCFS2_MOUNT_NOINTR; - break; - case Opt_err_panic: - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT; - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS; - mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; - break; - case Opt_err_ro: - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_CONT; - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; - mopt->mount_opt |= OCFS2_MOUNT_ERRORS_ROFS; - break; - case Opt_err_cont: - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_ROFS; - mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; - mopt->mount_opt |= OCFS2_MOUNT_ERRORS_CONT; - break; - case Opt_data_ordered: - mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; - break; - case Opt_data_writeback: - mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; - break; - case Opt_user_xattr: - mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; - break; - case Opt_nouser_xattr: + else + mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; + break; + case Opt_errors: + mopt->mount_opt &= ~(OCFS2_MOUNT_ERRORS_CONT | + OCFS2_MOUNT_ERRORS_ROFS | + OCFS2_MOUNT_ERRORS_PANIC); + mopt->mount_opt |= result.uint_32; + break; + case Opt_data: + mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; + mopt->mount_opt |= result.uint_32; + break; + case Opt_user_xattr: + if (result.negated) mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; - break; - case Opt_atime_quantum: - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option >= 0) - mopt->atime_quantum = option; - break; - case Opt_slot: - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option) - mopt->slot = (u16)option; - break; - case Opt_commit: - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option < 0) - return 0; - if (option == 0) - option = JBD2_DEFAULT_MAX_COMMIT_AGE; - mopt->commit_interval = HZ * option; - break; - case Opt_localalloc: - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option >= 0) - mopt->localalloc_opt = option; - break; - case Opt_localflocks: - /* - * Changing this during remount could race - * flock() requests, or "unbalance" existing - * ones (e.g., a lock is taken in one mode but - * dropped in the other). If users care enough - * to flip locking modes during remount, we - * could add a "local" flag to individual - * flock structures for proper tracking of - * state. - */ - if (!is_remount) - mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; - break; - case Opt_stack: - /* Check both that the option we were passed - * is of the right length and that it is a proper - * string of the right length. - */ - if (((args[0].to - args[0].from) != - OCFS2_STACK_LABEL_LEN) || - (strnlen(args[0].from, - OCFS2_STACK_LABEL_LEN) != - OCFS2_STACK_LABEL_LEN)) { - mlog(ML_ERROR, - "Invalid cluster_stack option\n"); - status = 0; - goto bail; - } - memcpy(mopt->cluster_stack, args[0].from, - OCFS2_STACK_LABEL_LEN); - mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; - /* - * Open code the memcmp here as we don't have - * an osb to pass to - * ocfs2_userspace_stack(). - */ - if (memcmp(mopt->cluster_stack, - OCFS2_CLASSIC_CLUSTER_STACK, - OCFS2_STACK_LABEL_LEN)) - user_stack = 1; - break; - case Opt_inode64: - mopt->mount_opt |= OCFS2_MOUNT_INODE64; - break; - case Opt_usrquota: - mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; - break; - case Opt_grpquota: - mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; - break; - case Opt_coherency_buffered: - mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; - break; - case Opt_coherency_full: - mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; - break; - case Opt_acl: - mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; - mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; - break; - case Opt_noacl: + else + mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; + break; + case Opt_atime_quantum: + mopt->atime_quantum = result.uint_32; + break; + case Opt_slot: + if (result.uint_32) + mopt->slot = (u16)result.uint_32; + break; + case Opt_commit: + if (result.uint_32 == 0) + mopt->commit_interval = HZ * JBD2_DEFAULT_MAX_COMMIT_AGE; + else + mopt->commit_interval = HZ * result.uint_32; + break; + case Opt_localalloc: + if (result.int_32 >= 0) + mopt->localalloc_opt = result.int_32; + break; + case Opt_localflocks: + /* + * Changing this during remount could race flock() requests, or + * "unbalance" existing ones (e.g., a lock is taken in one mode + * but dropped in the other). If users care enough to flip + * locking modes during remount, we could add a "local" flag to + * individual flock structures for proper tracking of state. + */ + if (!is_remount) + mopt->mount_opt |= OCFS2_MOUNT_LOCALFLOCKS; + break; + case Opt_stack: + /* Check both that the option we were passed is of the right + * length and that it is a proper string of the right length. + */ + if (strlen(param->string) != OCFS2_STACK_LABEL_LEN) { + mlog(ML_ERROR, "Invalid cluster_stack option\n"); + return -EINVAL; + } + memcpy(mopt->cluster_stack, param->string, OCFS2_STACK_LABEL_LEN); + mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; + /* + * Open code the memcmp here as we don't have an osb to pass + * to ocfs2_userspace_stack(). + */ + if (memcmp(mopt->cluster_stack, + OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + mopt->user_stack = 1; + break; + case Opt_inode64: + mopt->mount_opt |= OCFS2_MOUNT_INODE64; + break; + case Opt_usrquota: + mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA; + break; + case Opt_grpquota: + mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; + break; + case Opt_coherency: + mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; + mopt->mount_opt |= result.uint_32; + break; + case Opt_acl: + if (result.negated) { mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; + } else { + mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; + mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; + } + break; + case Opt_resv_level: + if (is_remount) break; - case Opt_resv_level: - if (is_remount) - break; - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option >= OCFS2_MIN_RESV_LEVEL && - option < OCFS2_MAX_RESV_LEVEL) - mopt->resv_level = option; - break; - case Opt_dir_resv_level: - if (is_remount) - break; - if (match_int(&args[0], &option)) { - status = 0; - goto bail; - } - if (option >= OCFS2_MIN_RESV_LEVEL && - option < OCFS2_MAX_RESV_LEVEL) - mopt->dir_resv_level = option; - break; - case Opt_journal_async_commit: - mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL && + result.uint_32 < OCFS2_MAX_RESV_LEVEL) + mopt->resv_level = result.uint_32; + break; + case Opt_dir_resv_level: + if (is_remount) break; - default: - mlog(ML_ERROR, - "Unrecognized mount option \"%s\" " - "or missing value\n", p); - status = 0; - goto bail; - } - } - - if (user_stack == 0) { - /* Ensure only one heartbeat mode */ - tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | - OCFS2_MOUNT_HB_GLOBAL | - OCFS2_MOUNT_HB_NONE); - if (hweight32(tmp) != 1) { - mlog(ML_ERROR, "Invalid heartbeat mount options\n"); - status = 0; - goto bail; - } + if (result.uint_32 >= OCFS2_MIN_RESV_LEVEL && + result.uint_32 < OCFS2_MAX_RESV_LEVEL) + mopt->dir_resv_level = result.uint_32; + break; + case Opt_journal_async_commit: + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + break; + default: + return -EINVAL; } - status = 1; - -bail: - return status; + return 0; } static int ocfs2_show_options(struct seq_file *s, struct dentry *root) @@ -1858,7 +1803,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) osb = OCFS2_SB(sb); BUG_ON(!osb); - /* Remove file check sysfs related directores/files, + /* Remove file check sysfs related directories/files, * and wait for the pending file check operations */ ocfs2_filecheck_remove_sysfs(osb); @@ -1867,6 +1812,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index f5cf2255dc09..ad8be3300b49 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -54,13 +54,11 @@ static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio) { - struct page *page = &folio->page; - struct inode *inode = page->mapping->host; + struct inode *inode = folio->mapping->host; struct buffer_head *bh = NULL; int status = ocfs2_read_inode_block(inode, &bh); struct ocfs2_dinode *fe; const char *link; - void *kaddr; size_t len; if (status < 0) { @@ -72,12 +70,9 @@ static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio) link = (char *) fe->id2.i_symlink; /* will be less than a page size */ len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb)); - kaddr = kmap_atomic(page); - memcpy(kaddr, link, len + 1); - kunmap_atomic(kaddr); - SetPageUptodate(page); + memcpy_to_folio(folio, 0, link, len + 1); out: - unlock_page(page); + folio_end_read(folio, status == 0); brelse(bh); return status; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 73a6f6fd8a8e..d70a20d29e3e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -648,7 +648,7 @@ int ocfs2_calc_xattr_init(struct inode *dir, * 256(name) + 80(value) + 16(entry) = 352 bytes, * The max space of acl xattr taken inline is * 80(value) + 16(entry) * 2(if directory) = 192 bytes, - * when blocksize = 512, may reserve one more cluser for + * when blocksize = 512, may reserve one more cluster for * xattr bucket, otherwise reserve one metadata block * for them is ok. * If this is a new directory with inline data, @@ -4371,7 +4371,7 @@ static int cmp_xe_offset(const void *a, const void *b) /* * defrag a xattr bucket if we find that the bucket has some - * holes beteen name/value pairs. + * holes between name/value pairs. * We will move all the name/value pairs to the end of the bucket * so that we can spare some space for insertion. */ @@ -5011,7 +5011,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode, * 2. If cluster_size == bucket_size: * a) If the previous extent rec has more than one cluster and the insert * place isn't in the last cluster, copy the entire last cluster to the - * new one. This time, we don't need to upate the first_bh and header_bh + * new one. This time, we don't need to update the first_bh and header_bh * since they will not be moved into the new cluster. * b) Otherwise, move the bottom half of the xattrs in the last cluster into * the new one. And we set the extend flag to zero if the insert place is @@ -6189,7 +6189,7 @@ struct ocfs2_xattr_reflink { /* * Given a xattr header and xe offset, * return the proper xv and the corresponding bh. - * xattr in inode, block and xattr tree have different implementaions. + * xattr in inode, block and xattr tree have different implementations. */ typedef int (get_xattr_value_root)(struct super_block *sb, struct buffer_head *bh, @@ -6269,7 +6269,7 @@ static int ocfs2_get_xattr_value_root(struct super_block *sb, } /* - * Lock the meta_ac and caculate how much credits we need for reflink xattrs. + * Lock the meta_ac and calculate how much credits we need for reflink xattrs. * It is only used for inline xattr and xattr block. */ static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb, |