diff options
Diffstat (limited to 'fs/ocfs2')
43 files changed, 543 insertions, 332 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index f0937902f7b4..5d9388b44e5b 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -967,7 +967,14 @@ int ocfs2_num_free_extents(struct ocfs2_extent_tree *et) el = &eb->h_list; } - BUG_ON(el->l_tree_depth != 0); + if (el->l_tree_depth != 0) { + retval = ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci), + "Owner %llu has leaf extent block %llu with an invalid l_tree_depth of %u\n", + (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), + (unsigned long long)last_eb_blk, + le16_to_cpu(el->l_tree_depth)); + goto bail; + } retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); bail: @@ -1796,6 +1803,14 @@ static int __ocfs2_find_path(struct ocfs2_caching_info *ci, el = root_el; while (el->l_tree_depth) { + if (unlikely(le16_to_cpu(el->l_tree_depth) >= OCFS2_MAX_PATH_DEPTH)) { + ocfs2_error(ocfs2_metadata_cache_get_super(ci), + "Owner %llu has invalid tree depth %u in extent list\n", + (unsigned long long)ocfs2_metadata_cache_owner(ci), + le16_to_cpu(el->l_tree_depth)); + ret = -EROFS; + goto out; + } if (le16_to_cpu(el->l_next_free_rec) == 0) { ocfs2_error(ocfs2_metadata_cache_get_super(ci), "Owner %llu has empty extent list at depth %u\n", @@ -6927,7 +6942,7 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, * nonzero data on subsequent file extends. * * We need to call this before i_size is updated on the inode because - * otherwise block_write_full_page() will skip writeout of pages past + * otherwise block_write_full_folio() will skip writeout of pages past * i_size. */ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, @@ -7642,7 +7657,7 @@ out_mutex: goto next_group; } out: - range->len = trimmed * sb->s_blocksize; + range->len = trimmed * osb->s_clustersize; return ret; } diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 315f7c2f6a02..db72b3e924b3 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -388,21 +388,18 @@ out_unlock: /* Note: Because we don't support holes, our allocation has * already happened (allocation writes zeros to the file data) * so we don't have to worry about ordered writes in - * ocfs2_writepage. + * ocfs2_writepages. * - * ->writepage is called during the process of invalidating the page cache + * ->writepages is called during the process of invalidating the page cache * during blocked lock processing. It can't block on any cluster locks * to during block mapping. It's relying on the fact that the block * mapping can't have disappeared under the dirty pages that it is * being asked to write back. */ -static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) +static int ocfs2_writepages(struct address_space *mapping, + struct writeback_control *wbc) { - trace_ocfs2_writepage( - (unsigned long long)OCFS2_I(page->mapping->host)->ip_blkno, - page->index); - - return block_write_full_page(page, ocfs2_get_block, wbc); + return mpage_writepages(mapping, wbc, ocfs2_get_block); } /* Taken from ext3. We don't necessarily need the full blown @@ -567,10 +564,10 @@ static void ocfs2_clear_page_regions(struct page *page, * read-in the blocks at the tail of our file. Avoid reading them by * testing i_size against each block offset. */ -static int ocfs2_should_read_blk(struct inode *inode, struct page *page, +static int ocfs2_should_read_blk(struct inode *inode, struct folio *folio, unsigned int block_start) { - u64 offset = page_offset(page) + block_start; + u64 offset = folio_pos(folio) + block_start; if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) return 1; @@ -592,15 +589,16 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, struct inode *inode, unsigned int from, unsigned int to, int new) { + struct folio *folio = page_folio(page); int ret = 0; struct buffer_head *head, *bh, *wait[2], **wait_bh = wait; unsigned int block_end, block_start; unsigned int bsize = i_blocksize(inode); - if (!page_has_buffers(page)) - create_empty_buffers(page, bsize, 0); + head = folio_buffers(folio); + if (!head) + head = create_empty_buffers(folio, bsize, 0); - head = page_buffers(page); for (bh = head, block_start = 0; bh != head || !block_start; bh = bh->b_this_page, block_start += bsize) { block_end = block_start + bsize; @@ -612,7 +610,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, * they may belong to unallocated clusters. */ if (block_start >= to || block_end <= from) { - if (PageUptodate(page)) + if (folio_test_uptodate(folio)) set_buffer_uptodate(bh); continue; } @@ -629,11 +627,11 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, clean_bdev_bh_alias(bh); } - if (PageUptodate(page)) { + if (folio_test_uptodate(folio)) { set_buffer_uptodate(bh); } else if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_new(bh) && - ocfs2_should_read_blk(inode, page, block_start) && + ocfs2_should_read_blk(inode, folio, block_start) && (block_start < from || block_end > to)) { bh_read_nowait(bh, 0); *wait_bh++=bh; @@ -667,7 +665,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, if (block_start >= to) break; - zero_user(page, block_start, bh->b_size); + folio_zero_range(folio, block_start, bh->b_size); set_buffer_uptodate(bh); mark_buffer_dirty(bh); @@ -1188,7 +1186,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, /* This is the direct io target page. */ if (wc->w_pages[i] == NULL) { - p_blkno++; + p_blkno += (1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits)); continue; } @@ -1644,7 +1642,7 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, ocfs2_write_type_t type, - struct page **pagep, void **fsdata, + struct folio **foliop, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page) { int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS; @@ -1827,8 +1825,8 @@ try_again: ocfs2_free_alloc_context(meta_ac); success: - if (pagep) - *pagep = wc->w_target_page; + if (foliop) + *foliop = page_folio(wc->w_target_page); *fsdata = wc; return 0; out_quota: @@ -1880,7 +1878,7 @@ out: static int ocfs2_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata) + struct folio **foliop, void **fsdata) { int ret; struct buffer_head *di_bh = NULL; @@ -1902,7 +1900,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, down_write(&OCFS2_I(inode)->ip_alloc_sem); ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER, - pagep, fsdata, di_bh, NULL); + foliop, fsdata, di_bh, NULL); if (ret) { mlog_errno(ret); goto out_fail; @@ -2077,7 +2075,7 @@ out: static int ocfs2_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) + struct folio *folio, void *fsdata) { int ret; struct inode *inode = mapping->host; @@ -2284,8 +2282,6 @@ unlock: ocfs2_inode_unlock(inode, 1); brelse(di_bh); out: - if (ret < 0) - ret = -EIO; return ret; } @@ -2474,7 +2470,7 @@ const struct address_space_operations ocfs2_aops = { .dirty_folio = block_dirty_folio, .read_folio = ocfs2_read_folio, .readahead = ocfs2_readahead, - .writepage = ocfs2_writepage, + .writepages = ocfs2_writepages, .write_begin = ocfs2_write_begin, .write_end = ocfs2_write_end, .bmap = ocfs2_bmap, @@ -2483,5 +2479,5 @@ const struct address_space_operations ocfs2_aops = { .release_folio = ocfs2_release_folio, .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, - .error_remove_page = generic_error_remove_page, + .error_remove_folio = generic_error_remove_folio, }; diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index a9ce7947228c..1d1b4b7edba0 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -38,7 +38,7 @@ typedef enum { int ocfs2_write_begin_nolock(struct address_space *mapping, loff_t pos, unsigned len, ocfs2_write_type_t type, - struct page **pagep, void **fsdata, + struct folio **foliop, void **fsdata, struct buffer_head *di_bh, struct page *mmap_page); int ocfs2_read_inline_data(struct inode *inode, struct page *page, diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 45ca3cb7c097..8f714406528d 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -158,7 +158,7 @@ read_failure: if (new_bh && bh) { /* If middle bh fails, let previous bh * finish its read and then put it to - * aovoid bh leak + * avoid bh leak */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -344,7 +344,7 @@ read_failure: if (new_bh && bh) { /* If middle bh fails, let previous bh * finish its read and then put it to - * aovoid bh leak + * avoid bh leak */ if (!buffer_jbd(bh)) wait_on_buffer(bh); diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 21472e3ed182..4b9f45d7049e 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -213,7 +213,7 @@ struct o2hb_region { unsigned int hr_num_pages; struct page **hr_slot_data; - struct block_device *hr_bdev; + struct file *hr_bdev_file; struct o2hb_disk_slot *hr_slots; /* live node map of this region */ @@ -261,6 +261,11 @@ struct o2hb_region { int hr_last_hb_status; }; +static inline struct block_device *reg_bdev(struct o2hb_region *reg) +{ + return reg->hr_bdev_file ? file_bdev(reg->hr_bdev_file) : NULL; +} + struct o2hb_bio_wait_ctxt { atomic_t wc_num_reqs; struct completion wc_io_complete; @@ -286,7 +291,7 @@ static void o2hb_write_timeout(struct work_struct *work) hr_write_timeout_work.work); mlog(ML_ERROR, "Heartbeat write timeout to device %pg after %u " - "milliseconds\n", reg->hr_bdev, + "milliseconds\n", reg_bdev(reg), jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); if (o2hb_global_heartbeat_active()) { @@ -383,7 +388,7 @@ static void o2hb_nego_timeout(struct work_struct *work) if (!test_bit(master_node, reg->hr_nego_node_bitmap)) { printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg).\n", o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, - config_item_name(®->hr_item), reg->hr_bdev); + config_item_name(®->hr_item), reg_bdev(reg)); set_bit(master_node, reg->hr_nego_node_bitmap); } if (!bitmap_equal(reg->hr_nego_node_bitmap, live_node_bitmap, @@ -398,7 +403,8 @@ static void o2hb_nego_timeout(struct work_struct *work) } printk(KERN_NOTICE "o2hb: all nodes hb write hung, maybe region %s (%pg) is down.\n", - config_item_name(®->hr_item), reg->hr_bdev); + config_item_name(®->hr_item), + reg_bdev(reg)); /* approve negotiate timeout request. */ o2hb_arm_timeout(reg); @@ -419,7 +425,7 @@ static void o2hb_nego_timeout(struct work_struct *work) /* negotiate timeout with master node. */ printk(KERN_NOTICE "o2hb: node %d hb write hung for %ds on region %s (%pg), negotiate timeout with node %d.\n", o2nm_this_node(), O2HB_NEGO_TIMEOUT_MS/1000, config_item_name(®->hr_item), - reg->hr_bdev, master_node); + reg_bdev(reg), master_node); ret = o2hb_send_nego_msg(reg->hr_key, O2HB_NEGO_TIMEOUT_MSG, master_node); if (ret) @@ -436,7 +442,8 @@ static int o2hb_nego_timeout_handler(struct o2net_msg *msg, u32 len, void *data, nego_msg = (struct o2hb_nego_msg *)msg->buf; printk(KERN_NOTICE "o2hb: receive negotiate timeout message from node %d on region %s (%pg).\n", - nego_msg->node_num, config_item_name(®->hr_item), reg->hr_bdev); + nego_msg->node_num, config_item_name(®->hr_item), + reg_bdev(reg)); if (nego_msg->node_num < O2NM_MAX_NODES) set_bit(nego_msg->node_num, reg->hr_nego_node_bitmap); else @@ -451,7 +458,7 @@ static int o2hb_nego_approve_handler(struct o2net_msg *msg, u32 len, void *data, struct o2hb_region *reg = data; printk(KERN_NOTICE "o2hb: negotiate timeout approved by master node on region %s (%pg).\n", - config_item_name(®->hr_item), reg->hr_bdev); + config_item_name(®->hr_item), reg_bdev(reg)); o2hb_arm_timeout(reg); return 0; } @@ -515,7 +522,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg, * GFP_KERNEL that the local node can get fenced. It would be * nicest if we could pre-allocate these bios and avoid this * all together. */ - bio = bio_alloc(reg->hr_bdev, 16, opf, GFP_ATOMIC); + bio = bio_alloc(reg_bdev(reg), 16, opf, GFP_ATOMIC); if (!bio) { mlog(ML_ERROR, "Could not alloc slots BIO!\n"); bio = ERR_PTR(-ENOMEM); @@ -687,7 +694,7 @@ static int o2hb_check_own_slot(struct o2hb_region *reg) errstr = ERRSTR3; mlog(ML_ERROR, "%s (%pg): expected(%u:0x%llx, 0x%llx), " - "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg->hr_bdev, + "ondisk(%u:0x%llx, 0x%llx)\n", errstr, reg_bdev(reg), slot->ds_node_num, (unsigned long long)slot->ds_last_generation, (unsigned long long)slot->ds_last_time, hb_block->hb_node, (unsigned long long)le64_to_cpu(hb_block->hb_generation), @@ -861,7 +868,7 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg) goto unlock; printk(KERN_NOTICE "o2hb: Region %s (%pg) is now a quorum device\n", - config_item_name(®->hr_item), reg->hr_bdev); + config_item_name(®->hr_item), reg_bdev(reg)); set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); @@ -920,7 +927,7 @@ static int o2hb_check_slot(struct o2hb_region *reg, * consider it a transient miss but don't populate any * other values as they may be junk. */ mlog(ML_ERROR, "Node %d has written a bad crc to %pg\n", - slot->ds_node_num, reg->hr_bdev); + slot->ds_node_num, reg_bdev(reg)); o2hb_dump_slot(hb_block); slot->ds_equal_samples++; @@ -1003,8 +1010,8 @@ fire_callbacks: "of %u ms, but our count is %u ms.\n" "Please double check your configuration values " "for 'O2CB_HEARTBEAT_THRESHOLD'\n", - slot->ds_node_num, reg->hr_bdev, slot_dead_ms, - dead_ms); + slot->ds_node_num, reg_bdev(reg), + slot_dead_ms, dead_ms); } goto out; } @@ -1143,7 +1150,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) * can't be sure that the new block ever made it to * disk */ mlog(ML_ERROR, "Write error %d on device \"%pg\"\n", - write_wc.wc_error, reg->hr_bdev); + write_wc.wc_error, reg_bdev(reg)); ret = write_wc.wc_error; goto bail; } @@ -1169,7 +1176,7 @@ bail: printk(KERN_NOTICE "o2hb: Unable to stabilize " "heartbeat on region %s (%pg)\n", config_item_name(®->hr_item), - reg->hr_bdev); + reg_bdev(reg)); atomic_set(®->hr_steady_iterations, 0); reg->hr_aborted_start = 1; wake_up(&o2hb_steady_queue); @@ -1489,7 +1496,7 @@ static void o2hb_region_release(struct config_item *item) struct page *page; struct o2hb_region *reg = to_o2hb_region(item); - mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg->hr_bdev); + mlog(ML_HEARTBEAT, "hb region release (%pg)\n", reg_bdev(reg)); kfree(reg->hr_tmp_block); @@ -1502,8 +1509,8 @@ static void o2hb_region_release(struct config_item *item) kfree(reg->hr_slot_data); } - if (reg->hr_bdev) - blkdev_put(reg->hr_bdev, NULL); + if (reg->hr_bdev_file) + fput(reg->hr_bdev_file); kfree(reg->hr_slots); @@ -1562,7 +1569,7 @@ static ssize_t o2hb_region_block_bytes_store(struct config_item *item, unsigned long block_bytes; unsigned int block_bits; - if (reg->hr_bdev) + if (reg->hr_bdev_file) return -EINVAL; status = o2hb_read_block_input(reg, page, &block_bytes, @@ -1591,7 +1598,7 @@ static ssize_t o2hb_region_start_block_store(struct config_item *item, char *p = (char *)page; ssize_t ret; - if (reg->hr_bdev) + if (reg->hr_bdev_file) return -EINVAL; ret = kstrtoull(p, 0, &tmp); @@ -1616,7 +1623,7 @@ static ssize_t o2hb_region_blocks_store(struct config_item *item, unsigned long tmp; char *p = (char *)page; - if (reg->hr_bdev) + if (reg->hr_bdev_file) return -EINVAL; tmp = simple_strtoul(p, &p, 0); @@ -1635,8 +1642,8 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page) { unsigned int ret = 0; - if (to_o2hb_region(item)->hr_bdev) - ret = sprintf(page, "%pg\n", to_o2hb_region(item)->hr_bdev); + if (to_o2hb_region(item)->hr_bdev_file) + ret = sprintf(page, "%pg\n", reg_bdev(to_o2hb_region(item))); return ret; } @@ -1745,7 +1752,10 @@ out: return ret; } -/* this is acting as commit; we set up all of hr_bdev and hr_task or nothing */ +/* + * this is acting as commit; we set up all of hr_bdev_file and hr_task or + * nothing + */ static ssize_t o2hb_region_dev_store(struct config_item *item, const char *page, size_t count) @@ -1759,7 +1769,7 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, ssize_t ret = -EINVAL; int live_threshold; - if (reg->hr_bdev) + if (reg->hr_bdev_file) goto out; /* We can't heartbeat without having had our node number @@ -1775,26 +1785,25 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, goto out; f = fdget(fd); - if (f.file == NULL) + if (fd_file(f) == NULL) goto out; if (reg->hr_blocks == 0 || reg->hr_start_block == 0 || reg->hr_block_bytes == 0) goto out2; - if (!S_ISBLK(f.file->f_mapping->host->i_mode)) + if (!S_ISBLK(fd_file(f)->f_mapping->host->i_mode)) goto out2; - reg->hr_bdev = blkdev_get_by_dev(f.file->f_mapping->host->i_rdev, - BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, - NULL); - if (IS_ERR(reg->hr_bdev)) { - ret = PTR_ERR(reg->hr_bdev); - reg->hr_bdev = NULL; + reg->hr_bdev_file = bdev_file_open_by_dev(fd_file(f)->f_mapping->host->i_rdev, + BLK_OPEN_WRITE | BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(reg->hr_bdev_file)) { + ret = PTR_ERR(reg->hr_bdev_file); + reg->hr_bdev_file = NULL; goto out2; } - sectsize = bdev_logical_block_size(reg->hr_bdev); + sectsize = bdev_logical_block_size(reg_bdev(reg)); if (sectsize != reg->hr_block_bytes) { mlog(ML_ERROR, "blocksize %u incorrect for device, expected %d", @@ -1890,12 +1899,12 @@ static ssize_t o2hb_region_dev_store(struct config_item *item, if (hb_task && o2hb_global_heartbeat_active()) printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%pg)\n", - config_item_name(®->hr_item), reg->hr_bdev); + config_item_name(®->hr_item), reg_bdev(reg)); out3: if (ret < 0) { - blkdev_put(reg->hr_bdev, NULL); - reg->hr_bdev = NULL; + fput(reg->hr_bdev_file); + reg->hr_bdev_file = NULL; } out2: fdput(f); @@ -2085,7 +2094,7 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%pg)\n", ((atomic_read(®->hr_steady_iterations) == 0) ? "stopped" : "start aborted"), config_item_name(item), - reg->hr_bdev); + reg_bdev(reg)); } /* diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 960080753d3b..2b8fa3e782fb 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1784,6 +1784,9 @@ static int o2net_accept_one(struct socket *sock, int *more) struct o2nm_node *node = NULL; struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + }; struct o2net_node *nn; unsigned int nofs_flag; @@ -1802,7 +1805,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); + ret = sock->ops->accept(sock, new_sock, &arg); if (ret < 0) goto out; diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 04fc8344063a..a9b8688aaf30 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -124,17 +124,10 @@ static int ocfs2_match_dentry(struct dentry *dentry, if (!dentry->d_fsdata) return 0; - if (!dentry->d_parent) - return 0; - if (skip_unhashed && d_unhashed(dentry)) return 0; parent = d_inode(dentry->d_parent); - /* Negative parent dentry? */ - if (!parent) - return 0; - /* Name is in a different directory. */ if (OCFS2_I(parent)->ip_blkno != parent_blkno) return 0; diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 429c22f911fd..7799f4d16ce9 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -1065,26 +1065,39 @@ int ocfs2_find_entry(const char *name, int namelen, { struct buffer_head *bh; struct ocfs2_dir_entry *res_dir = NULL; + int ret = 0; if (ocfs2_dir_indexed(dir)) return ocfs2_find_entry_dx(name, namelen, dir, lookup); + if (unlikely(i_size_read(dir) <= 0)) { + ret = -EFSCORRUPTED; + mlog_errno(ret); + goto out; + } /* * The unindexed dir code only uses part of the lookup * structure, so there's no reason to push it down further * than this. */ - if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) + if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { + if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) { + ret = -EFSCORRUPTED; + mlog_errno(ret); + goto out; + } bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir); - else + } else { bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir); + } if (bh == NULL) return -ENOENT; lookup->dl_leaf_bh = bh; lookup->dl_entry = res_dir; - return 0; +out: + return ret; } /* @@ -1599,9 +1612,6 @@ int __ocfs2_add_entry(handle_t *handle, struct buffer_head *insert_bh = lookup->dl_leaf_bh; char *data_start = insert_bh->b_data; - if (!namelen) - return -EINVAL; - if (ocfs2_dir_indexed(dir)) { struct buffer_head *bh; @@ -1935,6 +1945,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) { int error = 0; struct inode *inode = file_inode(file); + struct ocfs2_file_private *fp = file->private_data; int lock_level = 0; trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); @@ -1955,7 +1966,7 @@ int ocfs2_readdir(struct file *file, struct dir_context *ctx) goto bail_nolock; } - error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); + error = ocfs2_dir_foreach_blk(inode, &fp->cookie, ctx, false); ocfs2_inode_unlock(inode, lock_level); if (error) @@ -2012,6 +2023,7 @@ int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, * * Return 0 if the name does not exist * Return -EEXIST if the directory contains the name + * Return -EFSCORRUPTED if found corruption * * Callers should have i_rwsem + a cluster lock on dir */ @@ -2025,9 +2037,12 @@ int ocfs2_check_dir_for_entry(struct inode *dir, trace_ocfs2_check_dir_for_entry( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name); - if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) { + ret = ocfs2_find_entry(name, namelen, dir, &lookup); + if (ret == 0) { ret = -EEXIST; mlog_errno(ret); + } else if (ret == -ENOENT) { + ret = 0; } ocfs2_free_dir_lookup_result(&lookup); @@ -3514,16 +3529,6 @@ static int dx_leaf_sort_cmp(const void *a, const void *b) return 0; } -static void dx_leaf_sort_swap(void *a, void *b, int size) -{ - struct ocfs2_dx_entry *entry1 = a; - struct ocfs2_dx_entry *entry2 = b; - - BUG_ON(size != sizeof(*entry1)); - - swap(*entry1, *entry2); -} - static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf) { struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list; @@ -3784,7 +3789,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, */ sort(dx_leaf->dl_list.de_entries, num_used, sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, - dx_leaf_sort_swap); + NULL); ocfs2_journal_dirty(handle, dx_leaf_bh); @@ -4257,12 +4262,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, trace_ocfs2_prepare_dir_for_insert( (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen); - if (!namelen) { - ret = -EINVAL; - mlog_errno(ret); - goto out; - } - /* * Do this up front to reduce confusion. * diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 5c04dde99981..2018501b2249 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -1274,7 +1274,7 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, { struct dlm_query_nodeinfo *qn; struct dlm_ctxt *dlm = NULL; - int locked = 0, status = -EINVAL; + int status = -EINVAL; qn = (struct dlm_query_nodeinfo *) msg->buf; @@ -1290,12 +1290,11 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, } spin_lock(&dlm->spinlock); - locked = 1; if (dlm->joining_node != qn->qn_nodenum) { mlog(ML_ERROR, "Node %d queried nodes on domain %s but " "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, dlm->joining_node); - goto bail; + goto unlock; } /* Support for node query was added in 1.1 */ @@ -1305,14 +1304,14 @@ static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, "but active dlm protocol is %d.%d\n", qn->qn_nodenum, qn->qn_domain, dlm->dlm_locking_proto.pv_major, dlm->dlm_locking_proto.pv_minor); - goto bail; + goto unlock; } status = dlm_match_nodes(dlm, qn); +unlock: + spin_unlock(&dlm->spinlock); bail: - if (locked) - spin_unlock(&dlm->spinlock); spin_unlock(&dlm_domain_lock); return status; @@ -1528,7 +1527,6 @@ static void dlm_send_join_asserts(struct dlm_ctxt *dlm, { int status, node, live; - status = 0; node = -1; while ((node = find_next_bit(node_map, O2NM_MAX_NODES, node + 1)) < O2NM_MAX_NODES) { diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index 9b57d012fd5c..7fc0e920eda7 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -80,8 +80,7 @@ static int param_set_dlmfs_capabilities(const char *val, static int param_get_dlmfs_capabilities(char *buffer, const struct kernel_param *kp) { - return strlcpy(buffer, DLMFS_CAPABILITIES, - strlen(DLMFS_CAPABILITIES) + 1); + return sysfs_emit(buffer, DLMFS_CAPABILITIES); } module_param_call(capabilities, param_set_dlmfs_capabilities, param_get_dlmfs_capabilities, NULL, 0444); @@ -579,7 +578,7 @@ static int __init init_dlmfs_fs(void) dlmfs_inode_cache = kmem_cache_create("dlmfs_inode_cache", sizeof(struct dlmfs_inode_private), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), dlmfs_init_once); if (!dlmfs_inode_cache) { status = -ENOMEM; diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 9148a98a5fbe..764ecbd5ad41 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -221,12 +221,12 @@ struct ocfs2_lock_res_ops { */ #define LOCK_TYPE_USES_LVB 0x2 -static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .get_osb = ocfs2_get_inode_osb, .check_downconvert = ocfs2_check_meta_downconvert, .set_lvb = ocfs2_set_meta_lvb, @@ -234,50 +234,50 @@ static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_super_lops = { +static const struct ocfs2_lock_res_ops ocfs2_super_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH, }; -static struct ocfs2_lock_res_ops ocfs2_rename_lops = { +static const struct ocfs2_lock_res_ops ocfs2_rename_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { +static const struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = { .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { +static const struct ocfs2_lock_res_ops ocfs2_trim_fs_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { +static const struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = { .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_dentry_lops = { +static const struct ocfs2_lock_res_ops ocfs2_dentry_lops = { .get_osb = ocfs2_get_dentry_osb, .post_unlock = ocfs2_dentry_post_unlock, .downconvert_worker = ocfs2_dentry_convert_worker, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { +static const struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { .get_osb = ocfs2_get_inode_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_flock_lops = { +static const struct ocfs2_lock_res_ops ocfs2_flock_lops = { .get_osb = ocfs2_get_file_osb, .flags = 0, }; -static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { +static const struct ocfs2_lock_res_ops ocfs2_qinfo_lops = { .set_lvb = ocfs2_set_qinfo_lvb, .get_osb = ocfs2_get_qinfo_osb, .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, }; -static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { +static const struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = { .check_downconvert = ocfs2_check_refcount_downconvert, .downconvert_worker = ocfs2_refcount_convert_worker, .flags = 0, @@ -510,7 +510,7 @@ static inline void ocfs2_init_start_time(struct ocfs2_mask_waiter *mw) static void ocfs2_lock_res_init_common(struct ocfs2_super *osb, struct ocfs2_lock_res *res, enum ocfs2_lock_type type, - struct ocfs2_lock_res_ops *ops, + const struct ocfs2_lock_res_ops *ops, void *priv) { res->l_type = type; @@ -553,7 +553,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, unsigned int generation, struct inode *inode) { - struct ocfs2_lock_res_ops *ops; + const struct ocfs2_lock_res_ops *ops; switch(type) { case OCFS2_LOCK_TYPE_RW: @@ -1615,7 +1615,7 @@ update_holders: unlock: lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING); - /* ocfs2_unblock_lock reques on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ + /* ocfs2_unblock_lock request on seeing OCFS2_LOCK_UPCONVERT_FINISHING */ kick_dc = (lockres->l_flags & OCFS2_LOCK_BLOCKED); spin_unlock_irqrestore(&lockres->l_lock, flags); @@ -3152,11 +3152,8 @@ static int ocfs2_dlm_seq_show(struct seq_file *m, void *v) #ifdef CONFIG_OCFS2_FS_STATS if (!lockres->l_lock_wait && dlm_debug->d_filter_secs) { now = ktime_to_us(ktime_get_real()); - if (lockres->l_lock_prmode.ls_last > - lockres->l_lock_exmode.ls_last) - last = lockres->l_lock_prmode.ls_last; - else - last = lockres->l_lock_exmode.ls_last; + last = max(lockres->l_lock_prmode.ls_last, + lockres->l_lock_exmode.ls_last); /* * Use d_filter_secs field to filter lock resources dump, * the default d_filter_secs(0) value filters nothing, diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index eaa8c80ace3c..96b684763b39 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -255,9 +255,9 @@ static struct dentry *ocfs2_fh_to_dentry(struct super_block *sb, if (fh_len < 3 || fh_type > 2) return NULL; - handle.ih_blkno = (u64)le32_to_cpu(fid->raw[0]) << 32; - handle.ih_blkno |= (u64)le32_to_cpu(fid->raw[1]); - handle.ih_generation = le32_to_cpu(fid->raw[2]); + handle.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[0]) << 32; + handle.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[1]); + handle.ih_generation = le32_to_cpu((__force __le32)fid->raw[2]); return ocfs2_get_dentry(sb, &handle); } @@ -269,9 +269,9 @@ static struct dentry *ocfs2_fh_to_parent(struct super_block *sb, if (fh_type != 2 || fh_len < 6) return NULL; - parent.ih_blkno = (u64)le32_to_cpu(fid->raw[3]) << 32; - parent.ih_blkno |= (u64)le32_to_cpu(fid->raw[4]); - parent.ih_generation = le32_to_cpu(fid->raw[5]); + parent.ih_blkno = (u64)le32_to_cpu((__force __le32)fid->raw[3]) << 32; + parent.ih_blkno |= (u64)le32_to_cpu((__force __le32)fid->raw[4]); + parent.ih_generation = le32_to_cpu((__force __le32)fid->raw[5]); return ocfs2_get_dentry(sb, &parent); } @@ -280,4 +280,5 @@ const struct export_operations ocfs2_export_ops = { .fh_to_dentry = ocfs2_fh_to_dentry, .fh_to_parent = ocfs2_fh_to_parent, .get_parent = ocfs2_get_parent, + .flags = EXPORT_OP_ASYNC_LOCK, }; diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 70a768b623cf..f7672472fa82 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -973,7 +973,13 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, } while (done < nr) { - down_read(&OCFS2_I(inode)->ip_alloc_sem); + if (!down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem)) { + rc = -EAGAIN; + mlog(ML_ERROR, + "Inode #%llu ip_alloc_sem is temporarily unavailable\n", + (unsigned long long)OCFS2_I(inode)->ip_blkno); + break; + } rc = ocfs2_extent_map_get_blocks(inode, v_block + done, &p_block, &p_count, NULL); up_read(&OCFS2_I(inode)->ip_alloc_sem); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0585f281ff62..cb09330a0861 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -755,7 +755,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, u64 abs_to, struct buffer_head *di_bh) { struct address_space *mapping = inode->i_mapping; - struct page *page; + struct folio *folio; unsigned long index = abs_from >> PAGE_SHIFT; handle_t *handle; int ret = 0; @@ -774,9 +774,10 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, goto out; } - page = find_or_create_page(mapping, index, GFP_NOFS); - if (!page) { - ret = -ENOMEM; + folio = __filemap_get_folio(mapping, index, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_NOFS); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); mlog_errno(ret); goto out_commit_trans; } @@ -803,7 +804,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, * __block_write_begin and block_commit_write to zero the * whole block. */ - ret = __block_write_begin(page, block_start + 1, 0, + ret = __block_write_begin(folio, block_start + 1, 0, ocfs2_get_block); if (ret < 0) { mlog_errno(ret); @@ -812,13 +813,13 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, /* must not update i_size! */ - block_commit_write(page, block_start + 1, block_start + 1); + block_commit_write(&folio->page, block_start + 1, block_start + 1); } /* * fs-writeback will release the dirty pages without page lock * whose offset are over inode size, the release happens at - * block_write_full_page(). + * block_write_full_folio(). */ i_size_write(inode, abs_to); inode->i_blocks = ocfs2_inode_sector_count(inode); @@ -833,8 +834,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, } out_unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); out_commit_trans: if (handle) ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); @@ -2765,6 +2766,13 @@ out_unlock: return remapped > 0 ? remapped : ret; } +static loff_t ocfs2_dir_llseek(struct file *file, loff_t offset, int whence) +{ + struct ocfs2_file_private *fp = file->private_data; + + return generic_llseek_cookie(file, offset, whence, &fp->cookie); +} + const struct inode_operations ocfs2_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, @@ -2780,6 +2788,7 @@ const struct inode_operations ocfs2_file_iops = { const struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, + .listxattr = ocfs2_listxattr, .permission = ocfs2_permission, .get_inode_acl = ocfs2_iop_get_acl, .set_acl = ocfs2_iop_set_acl, @@ -2811,7 +2820,7 @@ const struct file_operations ocfs2_fops = { WRAP_DIR_ITER(ocfs2_readdir) // FIXME! const struct file_operations ocfs2_dops = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, @@ -2857,7 +2866,7 @@ const struct file_operations ocfs2_fops_no_plocks = { }; const struct file_operations ocfs2_dops_no_plocks = { - .llseek = generic_file_llseek, + .llseek = ocfs2_dir_llseek, .read = generic_read_dir, .iterate_shared = shared_ocfs2_readdir, .fsync = ocfs2_sync_file, diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 8e53e4ac1120..41e65e45a9f3 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -20,6 +20,7 @@ struct ocfs2_alloc_context; enum ocfs2_alloc_restarted; struct ocfs2_file_private { + u64 cookie; struct file *fp_file; struct mutex fp_mutex; struct ocfs2_lock_res fp_flock; diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 999111bfc271..2cc5c99fe941 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -1621,6 +1621,7 @@ static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info } static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci) +__acquires(&oi->ip_lock) { struct ocfs2_inode_info *oi = cache_info_to_inode(ci); @@ -1628,6 +1629,7 @@ static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci) } static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci) +__releases(&oi->ip_lock) { struct ocfs2_inode_info *oi = cache_info_to_inode(ci); diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index b1550ba73f96..71beef7f8a60 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -125,6 +125,7 @@ int ocfs2_fileattr_set(struct mnt_idmap *idmap, ocfs2_inode->ip_attr = flags; ocfs2_set_inode_flags(inode); + inode_set_ctime_current(inode); status = ocfs2_mark_inode_dirty(handle, inode, bh); if (status < 0) diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index cbe3c12ff5f7..c2a73bfb16aa 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -90,7 +90,7 @@ enum ocfs2_replay_state { struct ocfs2_replay_map { unsigned int rm_slots; enum ocfs2_replay_state rm_state; - unsigned char rm_replay_slots[]; + unsigned char rm_replay_slots[] __counted_by(rm_slots); }; static void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state) @@ -174,7 +174,7 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) struct ocfs2_recovery_map *rm; mutex_init(&osb->recovery_lock); - osb->disable_recovery = 0; + osb->recovery_state = OCFS2_REC_ENABLED; osb->recovery_thread_task = NULL; init_waitqueue_head(&osb->recovery_event); @@ -190,31 +190,53 @@ int ocfs2_recovery_init(struct ocfs2_super *osb) return 0; } -/* we can't grab the goofy sem lock from inside wait_event, so we use - * memory barriers to make sure that we'll see the null task before - * being woken up */ static int ocfs2_recovery_thread_running(struct ocfs2_super *osb) { - mb(); return osb->recovery_thread_task != NULL; } -void ocfs2_recovery_exit(struct ocfs2_super *osb) +static void ocfs2_recovery_disable(struct ocfs2_super *osb, + enum ocfs2_recovery_state state) { - struct ocfs2_recovery_map *rm; - - /* disable any new recovery threads and wait for any currently - * running ones to exit. Do this before setting the vol_state. */ mutex_lock(&osb->recovery_lock); - osb->disable_recovery = 1; + /* + * If recovery thread is not running, we can directly transition to + * final state. + */ + if (!ocfs2_recovery_thread_running(osb)) { + osb->recovery_state = state + 1; + goto out_lock; + } + osb->recovery_state = state; + /* Wait for recovery thread to acknowledge state transition */ + wait_event_cmd(osb->recovery_event, + !ocfs2_recovery_thread_running(osb) || + osb->recovery_state >= state + 1, + mutex_unlock(&osb->recovery_lock), + mutex_lock(&osb->recovery_lock)); +out_lock: mutex_unlock(&osb->recovery_lock); - wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb)); - /* At this point, we know that no more recovery threads can be - * launched, so wait for any recovery completion work to - * complete. */ + /* + * At this point we know that no more recovery work can be queued so + * wait for any recovery completion work to complete. + */ if (osb->ocfs2_wq) flush_workqueue(osb->ocfs2_wq); +} + +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb) +{ + ocfs2_recovery_disable(osb, OCFS2_REC_QUOTA_WANT_DISABLE); +} + +void ocfs2_recovery_exit(struct ocfs2_super *osb) +{ + struct ocfs2_recovery_map *rm; + + /* disable any new recovery threads and wait for any currently + * running ones to exit. Do this before setting the vol_state. */ + ocfs2_recovery_disable(osb, OCFS2_REC_WANT_DISABLE); /* * Now that recovery is shut down, and the osb is about to be @@ -1249,7 +1271,7 @@ static int ocfs2_force_read_journal(struct inode *inode) } for (i = 0; i < p_blocks; i++, p_blkno++) { - bh = __find_get_block(osb->sb->s_bdev, p_blkno, + bh = __find_get_block_nonatomic(osb->sb->s_bdev, p_blkno, osb->sb->s_blocksize); /* block not cached. */ if (!bh) @@ -1472,6 +1494,18 @@ static int __ocfs2_recovery_thread(void *arg) } } restart: + if (quota_enabled) { + mutex_lock(&osb->recovery_lock); + /* Confirm that recovery thread will no longer recover quotas */ + if (osb->recovery_state == OCFS2_REC_QUOTA_WANT_DISABLE) { + osb->recovery_state = OCFS2_REC_QUOTA_DISABLED; + wake_up(&osb->recovery_event); + } + if (osb->recovery_state >= OCFS2_REC_QUOTA_DISABLED) + quota_enabled = 0; + mutex_unlock(&osb->recovery_lock); + } + status = ocfs2_super_lock(osb, 1); if (status < 0) { mlog_errno(status); @@ -1569,27 +1603,29 @@ bail: ocfs2_free_replay_slots(osb); osb->recovery_thread_task = NULL; - mb(); /* sync with ocfs2_recovery_thread_running */ + if (osb->recovery_state == OCFS2_REC_WANT_DISABLE) + osb->recovery_state = OCFS2_REC_DISABLED; wake_up(&osb->recovery_event); mutex_unlock(&osb->recovery_lock); - if (quota_enabled) - kfree(rm_quota); + kfree(rm_quota); return status; } void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { + int was_set = -1; + mutex_lock(&osb->recovery_lock); + if (osb->recovery_state < OCFS2_REC_WANT_DISABLE) + was_set = ocfs2_recovery_map_set(osb, node_num); trace_ocfs2_recovery_thread(node_num, osb->node_num, - osb->disable_recovery, osb->recovery_thread_task, - osb->disable_recovery ? - -1 : ocfs2_recovery_map_set(osb, node_num)); + osb->recovery_state, osb->recovery_thread_task, was_set); - if (osb->disable_recovery) + if (osb->recovery_state >= OCFS2_REC_WANT_DISABLE) goto out; if (osb->recovery_thread_task) diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index e3c3a35dc5e0..6397170f302f 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -148,6 +148,7 @@ void ocfs2_wait_for_recovery(struct ocfs2_super *osb); int ocfs2_recovery_init(struct ocfs2_super *osb); void ocfs2_recovery_exit(struct ocfs2_super *osb); +void ocfs2_recovery_disable_quota(struct ocfs2_super *osb); int ocfs2_compute_replay_slots(struct ocfs2_super *osb); void ocfs2_free_replay_slots(struct ocfs2_super *osb); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index c803c10dd97e..d1aa04a5af1b 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -212,14 +212,15 @@ static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, unsigned int num_clusters) { - spin_lock(&osb->osb_lock); - if (osb->local_alloc_state == OCFS2_LA_DISABLED || - osb->local_alloc_state == OCFS2_LA_THROTTLED) - if (num_clusters >= osb->local_alloc_default_bits) { + if (num_clusters >= osb->local_alloc_default_bits) { + spin_lock(&osb->osb_lock); + if (osb->local_alloc_state == OCFS2_LA_DISABLED || + osb->local_alloc_state == OCFS2_LA_THROTTLED) { cancel_delayed_work(&osb->la_enable_wq); osb->local_alloc_state = OCFS2_LA_ENABLED; } - spin_unlock(&osb->osb_lock); + spin_unlock(&osb->osb_lock); + } } void ocfs2_la_enable_worker(struct work_struct *work) @@ -335,7 +336,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) "found = %u, set = %u, taken = %u, off = %u\n", num_used, le32_to_cpu(alloc->id1.bitmap1.i_used), le32_to_cpu(alloc->id1.bitmap1.i_total), - OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); + le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off)); status = -EINVAL; goto bail; @@ -863,14 +864,8 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, numfound = bitoff = startoff = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); - while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { - if (bitoff == left) { - /* mlog(0, "bitoff (%d) == left", bitoff); */ - break; - } - /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " - "numfound = %d\n", bitoff, startoff, numfound);*/ - + while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) < + left) { /* Ok, we found a zero bit... is it contig. or do we * start over?*/ if (bitoff == startoff) { @@ -976,8 +971,8 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, start = count = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); - while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) - != -1) { + while (1) { + bit_off = ocfs2_find_next_zero_bit(bitmap, left, start); if ((bit_off < left) && (bit_off == start)) { count++; start++; @@ -1002,6 +997,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, goto bail; } } + if (bit_off >= left) break; count = 1; @@ -1220,7 +1216,7 @@ retry_enospc: OCFS2_LOCAL_ALLOC(alloc)->la_bitmap); trace_ocfs2_local_alloc_new_window_result( - OCFS2_LOCAL_ALLOC(alloc)->la_bm_off, + le32_to_cpu(OCFS2_LOCAL_ALLOC(alloc)->la_bm_off), le32_to_cpu(alloc->id1.bitmap1.i_total)); bail: diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index f37174e79fad..6de944818c56 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -27,7 +27,7 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, struct ocfs2_file_private *fp = file->private_data; struct ocfs2_lock_res *lockres = &fp->fp_flock; - if (fl->fl_type == F_WRLCK) + if (lock_is_write(fl)) level = 1; if (!IS_SETLKW(cmd)) trylock = 1; @@ -53,8 +53,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, */ locks_init_lock(&request); - request.fl_type = F_UNLCK; - request.fl_flags = FL_FLOCK; + request.c.flc_type = F_UNLCK; + request.c.flc_flags = FL_FLOCK; locks_lock_file_wait(file, &request); ocfs2_file_unlock(file); @@ -100,14 +100,14 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if (!(fl->fl_flags & FL_FLOCK)) + if (!(fl->c.flc_flags & FL_FLOCK)) return -ENOLCK; if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || ocfs2_mount_local(osb)) return locks_lock_file_wait(file, fl); - if (fl->fl_type == F_UNLCK) + if (lock_is_unlock(fl)) return ocfs2_do_funlock(file, cmd, fl); else return ocfs2_do_flock(file, inode, cmd, fl); @@ -118,7 +118,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) struct inode *inode = file->f_mapping->host; struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - if (!(fl->fl_flags & FL_POSIX)) + if (!(fl->c.flc_flags & FL_POSIX)) return -ENOLCK; return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 1834f26522ed..6ef4cb045ccd 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c @@ -53,7 +53,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, loff_t pos = page_offset(page); unsigned int len = PAGE_SIZE; pgoff_t last_index; - struct page *locked_page = NULL; + struct folio *locked_folio = NULL; void *fsdata; loff_t size = i_size_read(inode); @@ -91,7 +91,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, len = ((size - 1) & ~PAGE_MASK) + 1; err = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP, - &locked_page, &fsdata, di_bh, page); + &locked_folio, &fsdata, di_bh, page); if (err) { if (err != -ENOSPC) mlog_errno(err); @@ -99,7 +99,7 @@ static vm_fault_t __ocfs2_page_mkwrite(struct file *file, goto out; } - if (!locked_page) { + if (!locked_folio) { ret = VM_FAULT_NOPAGE; goto out; } diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 1f9ed117e78b..f9d6a4f9ca92 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -685,7 +685,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, } ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, - goal_bit, len); + goal_bit, len, 0, 0); if (ret) { ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, le16_to_cpu(gd->bg_chain)); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 4e6d8a3f727d..5550f8afa438 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1340,7 +1340,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap, goto bail; } - if (S_ISDIR(old_inode->i_mode)) { + if (S_ISDIR(old_inode->i_mode) && new_dir != old_dir) { u64 old_inode_parent; update_dot_dot = 1; @@ -1357,8 +1357,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap, goto bail; } - if (!new_inode && new_dir != old_dir && - new_dir->i_nlink >= ocfs2_link_max(osb)) { + if (!new_inode && new_dir->i_nlink >= ocfs2_link_max(osb)) { status = -EMLINK; goto bail; } @@ -1601,6 +1600,13 @@ static int ocfs2_rename(struct mnt_idmap *idmap, if (update_dot_dot) { status = ocfs2_update_entry(old_inode, handle, &old_inode_dot_dot_res, new_dir); + if (status < 0) { + mlog_errno(status); + goto bail; + } + } + + if (S_ISDIR(old_inode->i_mode)) { drop_nlink(old_dir); if (new_inode) { drop_nlink(new_inode); @@ -1640,6 +1646,10 @@ static int ocfs2_rename(struct mnt_idmap *idmap, INODE_CACHE(old_dir), old_dir_bh, OCFS2_JOURNAL_ACCESS_WRITE); + if (status < 0) { + mlog_errno(status); + goto bail; + } fe = (struct ocfs2_dinode *) old_dir_bh->b_data; ocfs2_set_links_count(fe, old_dir->i_nlink); ocfs2_journal_dirty(handle, old_dir_bh); @@ -2181,8 +2191,10 @@ static int __ocfs2_prepare_orphan_dir(struct inode *orphan_dir_inode, * @osb: ocfs2 file system * @ret_orphan_dir: Orphan dir inode - returned locked! * @blkno: Actual block number of the inode to be inserted into orphan dir. + * @name: Buffer to store the name of the orphan. * @lookup: dir lookup result, to be passed back into functions like * ocfs2_orphan_add + * @dio: Flag indicating if direct IO is being used or not. * * Returns zero on success and the ret_orphan_dir, name and lookup * fields will be populated. diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 8fe826143d7b..6aaa94c554c1 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -154,7 +154,7 @@ struct ocfs2_lock_stats { struct ocfs2_lock_res { void *l_priv; - struct ocfs2_lock_res_ops *l_ops; + const struct ocfs2_lock_res_ops *l_ops; struct list_head l_blocked_list; @@ -308,6 +308,21 @@ enum ocfs2_journal_trigger_type { void ocfs2_initialize_journal_triggers(struct super_block *sb, struct ocfs2_triggers triggers[]); +enum ocfs2_recovery_state { + OCFS2_REC_ENABLED = 0, + OCFS2_REC_QUOTA_WANT_DISABLE, + /* + * Must be OCFS2_REC_QUOTA_WANT_DISABLE + 1 for + * ocfs2_recovery_disable_quota() to work. + */ + OCFS2_REC_QUOTA_DISABLED, + OCFS2_REC_WANT_DISABLE, + /* + * Must be OCFS2_REC_WANT_DISABLE + 1 for ocfs2_recovery_exit() to work + */ + OCFS2_REC_DISABLED, +}; + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -370,7 +385,7 @@ struct ocfs2_super struct ocfs2_recovery_map *recovery_map; struct ocfs2_replay_map *replay_map; struct task_struct *recovery_thread_task; - int disable_recovery; + enum ocfs2_recovery_state recovery_state; wait_queue_head_t checkpoint_event; struct ocfs2_journal *journal; unsigned long osb_commit_interval; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 7aebdbf5cc0a..c93689b568fe 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -883,7 +883,8 @@ struct ocfs2_group_desc __le16 bg_free_bits_count; /* Free bits count */ __le16 bg_chain; /* What chain I am in. */ /*10*/ __le32 bg_generation; - __le32 bg_reserved1; + __le16 bg_contig_free_bits; /* max contig free bits length */ + __le16 bg_reserved1; __le64 bg_next_group; /* Next group in my list, in blocks */ /*20*/ __le64 bg_parent_dinode; /* dinode which owns me, in diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 6c3f4d7df7d6..0511c69c9fde 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h @@ -82,7 +82,7 @@ DECLARE_EVENT_CLASS(ocfs2__string, __string(name,name) ), TP_fast_assign( - __assign_str(name, name); + __assign_str(name); ), TP_printk("%s", __get_str(name)) ); @@ -1157,8 +1157,6 @@ DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_get_block_end); DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_readpage); -DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_writepage); - DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_bmap); TRACE_EVENT(ocfs2_try_to_write_inline_data, @@ -1291,7 +1289,7 @@ DECLARE_EVENT_CLASS(ocfs2__file_ops, __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; - __assign_str(d_name, d_name); + __assign_str(d_name); __entry->para = para; ), TP_printk("%p %p %p %llu %llu %.*s", __entry->inode, __entry->file, @@ -1427,7 +1425,7 @@ TRACE_EVENT(ocfs2_setattr, __entry->dentry = dentry; __entry->ino = ino; __entry->d_len = d_len; - __assign_str(d_name, d_name); + __assign_str(d_name); __entry->ia_valid = ia_valid; __entry->ia_mode = ia_mode; __entry->ia_uid = ia_uid; @@ -1685,7 +1683,7 @@ TRACE_EVENT(ocfs2_parse_options, ), TP_fast_assign( __entry->is_remount = is_remount; - __assign_str(options, options); + __assign_str(options); ), TP_printk("%d %s", __entry->is_remount, __get_str(options)) ); @@ -1720,8 +1718,8 @@ TRACE_EVENT(ocfs2_initialize_super, __field(int, cluster_bits) ), TP_fast_assign( - __assign_str(label, label); - __assign_str(uuid_str, uuid_str); + __assign_str(label); + __assign_str(uuid_str); __entry->root_dir = root_dir; __entry->system_dir = system_dir; __entry->cluster_bits = cluster_bits; @@ -1748,7 +1746,7 @@ TRACE_EVENT(ocfs2_init_xattr_set_ctxt, __field(int, credits) ), TP_fast_assign( - __assign_str(name, name); + __assign_str(name); __entry->meta = meta; __entry->clusters = clusters; __entry->credits = credits; @@ -1772,7 +1770,7 @@ DECLARE_EVENT_CLASS(ocfs2__xattr_find, ), TP_fast_assign( __entry->ino = ino; - __assign_str(name, name); + __assign_str(name); __entry->name_index = name_index; __entry->hash = hash; __entry->location = location; @@ -2021,7 +2019,7 @@ TRACE_EVENT(ocfs2_sync_dquot_helper, __entry->dq_id = dq_id; __entry->dq_type = dq_type; __entry->type = type; - __assign_str(s_id, s_id); + __assign_str(s_id); ), TP_printk("%u %u %lu %s", __entry->dq_id, __entry->dq_type, __entry->type, __get_str(s_id)) @@ -2062,7 +2060,7 @@ TRACE_EVENT(ocfs2_dx_dir_search, TP_fast_assign( __entry->ino = ino; __entry->namelen = namelen; - __assign_str(name, name); + __assign_str(name); __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->blkno = blkno; @@ -2090,7 +2088,7 @@ TRACE_EVENT(ocfs2_find_files_on_disk, ), TP_fast_assign( __entry->namelen = namelen; - __assign_str(name, name); + __assign_str(name); __entry->blkno = blkno; __entry->dir = dir; ), @@ -2109,7 +2107,7 @@ TRACE_EVENT(ocfs2_check_dir_for_entry, TP_fast_assign( __entry->dir = dir; __entry->namelen = namelen; - __assign_str(name, name); + __assign_str(name); ), TP_printk("%llu %.*s", __entry->dir, __entry->namelen, __get_str(name)) @@ -2137,7 +2135,7 @@ TRACE_EVENT(ocfs2_dx_dir_index_root_block, __entry->major_hash = major_hash; __entry->minor_hash = minor_hash; __entry->namelen = namelen; - __assign_str(name, name); + __assign_str(name); __entry->num_used = num_used; ), TP_printk("%llu %x %x %.*s %u", __entry->dir, @@ -2173,7 +2171,7 @@ DECLARE_EVENT_CLASS(ocfs2__dentry_ops, __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; - __assign_str(name, name); + __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->extra = extra; ), @@ -2219,7 +2217,7 @@ TRACE_EVENT(ocfs2_mknod, __entry->dir = dir; __entry->dentry = dentry; __entry->name_len = name_len; - __assign_str(name, name); + __assign_str(name); __entry->dir_blkno = dir_blkno; __entry->dev = dev; __entry->mode = mode; @@ -2243,9 +2241,9 @@ TRACE_EVENT(ocfs2_link, TP_fast_assign( __entry->ino = ino; __entry->old_len = old_len; - __assign_str(old_name, old_name); + __assign_str(old_name); __entry->name_len = name_len; - __assign_str(name, name); + __assign_str(name); ), TP_printk("%llu %.*s %.*s", __entry->ino, __entry->old_len, __get_str(old_name), @@ -2281,9 +2279,9 @@ TRACE_EVENT(ocfs2_rename, __entry->new_dir = new_dir; __entry->new_dentry = new_dentry; __entry->old_len = old_len; - __assign_str(old_name, old_name); + __assign_str(old_name); __entry->new_len = new_len; - __assign_str(new_name, new_name); + __assign_str(new_name); ), TP_printk("%p %p %p %p %.*s %.*s", __entry->old_dir, __entry->old_dentry, @@ -2303,7 +2301,7 @@ TRACE_EVENT(ocfs2_rename_target_exists, ), TP_fast_assign( __entry->new_len = new_len; - __assign_str(new_name, new_name); + __assign_str(new_name); ), TP_printk("%.*s", __entry->new_len, __get_str(new_name)) ); @@ -2346,7 +2344,7 @@ TRACE_EVENT(ocfs2_symlink_begin, __entry->dentry = dentry; __entry->symname = symname; __entry->len = len; - __assign_str(name, name); + __assign_str(name); ), TP_printk("%p %p %s %.*s", __entry->dir, __entry->dentry, __entry->symname, __entry->len, __get_str(name)) @@ -2362,7 +2360,7 @@ TRACE_EVENT(ocfs2_blkno_stringify, ), TP_fast_assign( __entry->blkno = blkno; - __assign_str(name, name); + __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->blkno, __get_str(name), @@ -2383,7 +2381,7 @@ TRACE_EVENT(ocfs2_orphan_del, ), TP_fast_assign( __entry->dir = dir; - __assign_str(name, name); + __assign_str(name); __entry->namelen = namelen; ), TP_printk("%llu %s %d", __entry->dir, __get_str(name), @@ -2405,7 +2403,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate, TP_fast_assign( __entry->dentry = dentry; __entry->len = len; - __assign_str(name, name); + __assign_str(name); ), TP_printk("%p %.*s", __entry->dentry, __entry->len, __get_str(name)) ); @@ -2422,7 +2420,7 @@ TRACE_EVENT(ocfs2_dentry_revalidate_negative, ), TP_fast_assign( __entry->len = len; - __assign_str(name, name); + __assign_str(name); __entry->pgen = pgen; __entry->gen = gen; ), @@ -2447,7 +2445,7 @@ TRACE_EVENT(ocfs2_find_local_alias, ), TP_fast_assign( __entry->len = len; - __assign_str(name, name); + __assign_str(name); ), TP_printk("%.*s", __entry->len, __get_str(name)) ); @@ -2464,7 +2462,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock, ), TP_fast_assign( __entry->len = len; - __assign_str(name, name); + __assign_str(name); __entry->parent = parent; __entry->fsdata = fsdata; ), @@ -2482,7 +2480,7 @@ TRACE_EVENT(ocfs2_dentry_attach_lock_found, __field(unsigned long long, ino) ), TP_fast_assign( - __assign_str(name, name); + __assign_str(name); __entry->parent = parent; __entry->ino = ino; ), @@ -2529,7 +2527,7 @@ TRACE_EVENT(ocfs2_get_parent, TP_fast_assign( __entry->child = child; __entry->len = len; - __assign_str(name, name); + __assign_str(name); __entry->ino = ino; ), TP_printk("%p %.*s %llu", __entry->child, __entry->len, @@ -2553,7 +2551,7 @@ TRACE_EVENT(ocfs2_encode_fh_begin, TP_fast_assign( __entry->dentry = dentry; __entry->name_len = name_len; - __assign_str(name, name); + __assign_str(name); __entry->fh = fh; __entry->len = len; __entry->connectable = connectable; diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index dc9f76ab7e13..15d9acd456ec 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -371,12 +371,16 @@ int ocfs2_global_read_info(struct super_block *sb, int type) status = ocfs2_extent_map_get_blocks(oinfo->dqi_gqinode, 0, &oinfo->dqi_giblk, &pcount, NULL); - if (status < 0) + if (status < 0) { + mlog_errno(status); goto out_unlock; + } status = ocfs2_qinfo_lock(oinfo, 0); - if (status < 0) + if (status < 0) { + mlog_errno(status); goto out_unlock; + } status = sb->s_op->quota_read(sb, type, (char *)&dinfo, sizeof(struct ocfs2_global_disk_dqinfo), OCFS2_GLOBAL_INFO_OFF); @@ -404,12 +408,11 @@ int ocfs2_global_read_info(struct super_block *sb, int type) schedule_delayed_work(&oinfo->dqi_sync_work, msecs_to_jiffies(oinfo->dqi_syncms)); -out_err: - return status; + return 0; out_unlock: ocfs2_unlock_global_qf(oinfo, 0); - mlog_errno(status); - goto out_err; +out_err: + return status; } /* Write information to global quota file. Expects exclusive lock on quota @@ -447,14 +450,17 @@ int ocfs2_global_write_info(struct super_block *sb, int type) int err; struct quota_info *dqopt = sb_dqopt(sb); struct ocfs2_mem_dqinfo *info = dqopt->info[type].dqi_priv; + unsigned int memalloc; down_write(&dqopt->dqio_sem); + memalloc = memalloc_nofs_save(); err = ocfs2_qinfo_lock(info, 1); if (err < 0) goto out_sem; err = __ocfs2_global_write_info(sb, type); ocfs2_qinfo_unlock(info, 1); out_sem: + memalloc_nofs_restore(memalloc); up_write(&dqopt->dqio_sem); return err; } @@ -601,6 +607,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; struct ocfs2_super *osb = OCFS2_SB(sb); int status = 0; + unsigned int memalloc; trace_ocfs2_sync_dquot_helper(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type, @@ -618,6 +625,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) goto out_ilock; } down_write(&sb_dqopt(sb)->dqio_sem); + memalloc = memalloc_nofs_save(); status = ocfs2_sync_dquot(dquot); if (status < 0) mlog_errno(status); @@ -625,6 +633,7 @@ static int ocfs2_sync_dquot_helper(struct dquot *dquot, unsigned long type) status = ocfs2_local_write_dquot(dquot); if (status < 0) mlog_errno(status); + memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out_ilock: @@ -662,6 +671,7 @@ static int ocfs2_write_dquot(struct dquot *dquot) handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb); int status = 0; + unsigned int memalloc; trace_ocfs2_write_dquot(from_kqid(&init_user_ns, dquot->dq_id), dquot->dq_id.type); @@ -673,7 +683,9 @@ static int ocfs2_write_dquot(struct dquot *dquot) goto out; } down_write(&sb_dqopt(dquot->dq_sb)->dqio_sem); + memalloc = memalloc_nofs_save(); status = ocfs2_local_write_dquot(dquot); + memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(dquot->dq_sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out: @@ -749,6 +761,11 @@ static int ocfs2_release_dquot(struct dquot *dquot) handle = ocfs2_start_trans(osb, ocfs2_calc_qdel_credits(dquot->dq_sb, dquot->dq_id.type)); if (IS_ERR(handle)) { + /* + * Mark dquot as inactive to avoid endless cycle in + * quota_release_workfn(). + */ + clear_bit(DQ_ACTIVE_B, &dquot->dq_flags); status = PTR_ERR(handle); mlog_errno(status); goto out_ilock; @@ -881,7 +898,7 @@ static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid) int status = 0; trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type); - if (!sb_has_quota_loaded(sb, type)) { + if (!sb_has_quota_active(sb, type)) { status = -ESRCH; goto out; } @@ -920,6 +937,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv; handle_t *handle; struct ocfs2_super *osb = OCFS2_SB(sb); + unsigned int memalloc; trace_ocfs2_mark_dquot_dirty(from_kqid(&init_user_ns, dquot->dq_id), type); @@ -946,6 +964,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) goto out_ilock; } down_write(&sb_dqopt(sb)->dqio_sem); + memalloc = memalloc_nofs_save(); status = ocfs2_sync_dquot(dquot); if (status < 0) { mlog_errno(status); @@ -954,6 +973,7 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) /* Now write updated local dquot structure */ status = ocfs2_local_write_dquot(dquot); out_dlock: + memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(sb)->dqio_sem); ocfs2_commit_trans(osb, handle); out_ilock: diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 257f13cdd14c..de7f12858729 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -453,8 +453,7 @@ out: /* Sync changes in local quota file into global quota file and * reinitialize local quota file. - * The function expects local quota file to be already locked and - * s_umount locked in shared mode. */ + * The function expects local quota file to be already locked. */ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int type, struct ocfs2_quota_recovery *rec) @@ -470,6 +469,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, int bit, chunk; struct ocfs2_recovery_chunk *rchunk, *next; qsize_t spacechange, inodechange; + unsigned int memalloc; trace_ocfs2_recover_local_quota_file((unsigned long)lqinode->i_ino, type); @@ -521,6 +521,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, goto out_drop_lock; } down_write(&sb_dqopt(sb)->dqio_sem); + memalloc = memalloc_nofs_save(); spin_lock(&dquot->dq_dqb_lock); /* Add usage from quota entry into quota changes * of our node. Auxiliary variables are important @@ -553,6 +554,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, unlock_buffer(qbh); ocfs2_journal_dirty(handle, qbh); out_commit: + memalloc_nofs_restore(memalloc); up_write(&sb_dqopt(sb)->dqio_sem); ocfs2_commit_trans(OCFS2_SB(sb), handle); out_drop_lock: @@ -585,7 +587,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, { unsigned int ino[OCFS2_MAXQUOTAS] = { LOCAL_USER_QUOTA_SYSTEM_INODE, LOCAL_GROUP_QUOTA_SYSTEM_INODE }; - struct super_block *sb = osb->sb; struct ocfs2_local_disk_dqinfo *ldinfo; struct buffer_head *bh; handle_t *handle; @@ -597,7 +598,6 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb, printk(KERN_NOTICE "ocfs2: Finishing quota recovery on device (%s) for " "slot %u\n", osb->dev_str, slot_num); - down_read(&sb->s_umount); for (type = 0; type < OCFS2_MAXQUOTAS; type++) { if (list_empty(&(rec->r_list[type]))) continue; @@ -674,8 +674,7 @@ out_put: break; } out: - up_read(&sb->s_umount); - kfree(rec); + ocfs2_free_quota_recovery(rec); return status; } @@ -840,8 +839,7 @@ static int ocfs2_local_free_info(struct super_block *sb, int type) ocfs2_release_local_quota_bitmaps(&oinfo->dqi_chunk); /* - * s_umount held in exclusive mode protects us against racing with - * recovery thread... + * ocfs2_dismount_volume() has already aborted quota recovery... */ if (oinfo->dqi_rec) { ocfs2_free_quota_recovery(oinfo->dqi_rec); @@ -864,6 +862,7 @@ out: brelse(oinfo->dqi_libh); brelse(oinfo->dqi_lqi_bh); kfree(oinfo); + info->dqi_priv = NULL; return status; } @@ -1244,6 +1243,10 @@ int ocfs2_create_local_dquot(struct dquot *dquot) &od->dq_local_phys_blk, &pcount, NULL); + if (status < 0) { + mlog_errno(status); + goto out; + } /* Initialize dquot structure on disk */ status = ocfs2_local_write_dquot(dquot); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index c71b79b5fb9b..004393b13c0a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -631,7 +631,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode, rb->rf_records.rl_count = cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb)); spin_lock(&osb->osb_lock); - rb->rf_generation = osb->s_next_generation++; + rb->rf_generation = cpu_to_le32(osb->s_next_generation++); spin_unlock(&osb->osb_lock); ocfs2_journal_dirty(handle, new_bh); @@ -1393,13 +1393,6 @@ static int cmp_refcount_rec_by_cpos(const void *a, const void *b) return 0; } -static void swap_refcount_rec(void *a, void *b, int size) -{ - struct ocfs2_refcount_rec *l = a, *r = b; - - swap(*l, *r); -} - /* * The refcount cpos are ordered by their 64bit cpos, * But we will use the low 32 bit to be the e_cpos in the b-tree. @@ -1475,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, */ sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_low_cpos, swap_refcount_rec); + cmp_refcount_rec_by_low_cpos, NULL); ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index); if (ret) { @@ -1500,11 +1493,11 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, sort(&rl->rl_recs, le16_to_cpu(rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_cpos, swap_refcount_rec); + cmp_refcount_rec_by_cpos, NULL); sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used), sizeof(struct ocfs2_refcount_rec), - cmp_refcount_rec_by_cpos, swap_refcount_rec); + cmp_refcount_rec_by_cpos, NULL); *split_cpos = cpos; return 0; diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index a9d1296d736d..1fe61974d9f0 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -414,7 +414,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap, start = search_start; while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len, - start)) != -1) { + start)) < resmap->m_bitmap_len) { /* Search reached end of the region */ if (offset >= (search_start + search_len)) break; diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index b2b47bb79529..b0733c08ed13 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -91,6 +91,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, u16 cl_bpc = le16_to_cpu(cl->cl_bpc); u16 cl_cpg = le16_to_cpu(cl->cl_cpg); u16 old_bg_clusters; + u16 contig_bits; + __le16 old_bg_contig_free_bits; trace_ocfs2_update_last_group_and_inode(new_clusters, first_new_cluster); @@ -122,6 +124,11 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, le16_add_cpu(&group->bg_free_bits_count, -1 * backups); } + contig_bits = ocfs2_find_max_contig_free_bits(group->bg_bitmap, + le16_to_cpu(group->bg_bits), 0); + old_bg_contig_free_bits = group->bg_contig_free_bits; + group->bg_contig_free_bits = cpu_to_le16(contig_bits); + ocfs2_journal_dirty(handle, group_bh); /* update the inode accordingly. */ @@ -160,6 +167,7 @@ out_rollback: le16_add_cpu(&group->bg_free_bits_count, backups); le16_add_cpu(&group->bg_bits, -1 * num_bits); le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); + group->bg_contig_free_bits = old_bg_contig_free_bits; } out: if (ret) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index da7718cef735..e544c704b583 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -37,7 +37,7 @@ struct ocfs2_slot_info { unsigned int si_blocks; struct buffer_head **si_bh; unsigned int si_num_slots; - struct ocfs2_slot si_slots[]; + struct ocfs2_slot si_slots[] __counted_by(si_num_slots); }; diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index c973c03f6fd8..10157d9d7a9c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -404,7 +404,7 @@ static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations o2cb_stack_ops = { +static const struct ocfs2_stack_operations o2cb_stack_ops = { .connect = o2cb_cluster_connect, .disconnect = o2cb_cluster_disconnect, .this_node = o2cb_cluster_this_node, diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 9b76ee66aeb2..77edcd70f72c 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c @@ -744,7 +744,7 @@ static int user_plock(struct ocfs2_cluster_connection *conn, return dlm_posix_cancel(conn->cc_lockspace, ino, file, fl); else if (IS_GETLK(cmd)) return dlm_posix_get(conn->cc_lockspace, ino, file, fl); - else if (fl->fl_type == F_UNLCK) + else if (lock_is_unlock(fl)) return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); else return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); @@ -1065,7 +1065,7 @@ static int user_cluster_this_node(struct ocfs2_cluster_connection *conn, return 0; } -static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { +static const struct ocfs2_stack_operations ocfs2_user_plugin_ops = { .connect = user_cluster_connect, .disconnect = user_cluster_disconnect, .this_node = user_cluster_this_node, diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index a8d5ca98fa57..20aa37b67cfb 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -658,7 +658,6 @@ static struct ctl_table ocfs2_nm_table[] = { .mode = 0644, .proc_handler = proc_dostring, }, - { } }; static struct ctl_table_header *ocfs2_table_header; diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index 3636847fae19..02ab072c528a 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h @@ -223,7 +223,7 @@ struct ocfs2_stack_operations { */ struct ocfs2_stack_plugin { char *sp_name; - struct ocfs2_stack_operations *sp_ops; + const struct ocfs2_stack_operations *sp_ops; struct module *sp_owner; /* These are managed by the stackglue code. */ diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 166c8918c825..6ac4dcd54588 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -50,6 +50,10 @@ struct ocfs2_suballoc_result { u64 sr_blkno; /* The first allocated block */ unsigned int sr_bit_offset; /* The bit in the bg */ unsigned int sr_bits; /* How many bits we claimed */ + unsigned int sr_max_contig_bits; /* The length for contiguous + * free bits, only available + * for cluster group + */ }; static u64 ocfs2_group_from_res(struct ocfs2_suballoc_result *res) @@ -694,10 +698,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode, ac, cl); - if (PTR_ERR(bg_bh) == -ENOSPC) + if (PTR_ERR(bg_bh) == -ENOSPC) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; bg_bh = ocfs2_block_group_alloc_discontig(handle, alloc_inode, ac, cl); + } if (IS_ERR(bg_bh)) { status = PTR_ERR(bg_bh); bg_bh = NULL; @@ -1272,6 +1278,26 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, return ret; } +u16 ocfs2_find_max_contig_free_bits(void *bitmap, + u16 total_bits, u16 start) +{ + u16 offset, free_bits; + u16 contig_bits = 0; + + while (start < total_bits) { + offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start); + if (offset == total_bits) + break; + + start = ocfs2_find_next_bit(bitmap, total_bits, offset); + free_bits = start - offset; + if (contig_bits < free_bits) + contig_bits = free_bits; + } + + return contig_bits; +} + static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, struct buffer_head *bg_bh, unsigned int bits_wanted, @@ -1280,6 +1306,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, { void *bitmap; u16 best_offset, best_size; + u16 prev_best_size = 0; int offset, start, found, status = 0; struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; @@ -1290,10 +1317,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { - if (offset == total_bits) - break; - + while ((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) < + total_bits) { if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { /* We found a zero, but we can't use it as it * hasn't been put to disk yet! */ @@ -1308,6 +1333,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, /* got a zero after some ones */ found = 1; start = offset + 1; + prev_best_size = best_size; } if (found > best_size) { best_size = found; @@ -1320,6 +1346,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, } } + /* best_size will be allocated, we save prev_best_size */ + res->sr_max_contig_bits = prev_best_size; if (best_size) { res->sr_bit_offset = best_offset; res->sr_bits = best_size; @@ -1337,11 +1365,16 @@ int ocfs2_block_group_set_bits(handle_t *handle, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, unsigned int bit_off, - unsigned int num_bits) + unsigned int num_bits, + unsigned int max_contig_bits, + int fastpath) { int status; void *bitmap = bg->bg_bitmap; int journal_type = OCFS2_JOURNAL_ACCESS_WRITE; + unsigned int start = bit_off + num_bits; + u16 contig_bits; + struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); /* All callers get the descriptor via * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ @@ -1373,6 +1406,29 @@ int ocfs2_block_group_set_bits(handle_t *handle, while(num_bits--) ocfs2_set_bit(bit_off++, bitmap); + /* + * this is optimize path, caller set old contig value + * in max_contig_bits to bypass finding action. + */ + if (fastpath) { + bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits); + } else if (ocfs2_is_cluster_bitmap(alloc_inode)) { + /* + * Usually, the block group bitmap allocates only 1 bit + * at a time, while the cluster group allocates n bits + * each time. Therefore, we only save the contig bits for + * the cluster group. + */ + contig_bits = ocfs2_find_max_contig_free_bits(bitmap, + le16_to_cpu(bg->bg_bits), start); + if (contig_bits > max_contig_bits) + max_contig_bits = contig_bits; + bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits); + ocfs2_local_alloc_seen_free_bits(osb, max_contig_bits); + } else { + bg->bg_contig_free_bits = 0; + } + ocfs2_journal_dirty(handle, group_bh); bail: @@ -1486,7 +1542,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, BUG_ON(!ocfs2_is_cluster_bitmap(inode)); - if (gd->bg_free_bits_count) { + if (le16_to_cpu(gd->bg_contig_free_bits) && + le16_to_cpu(gd->bg_contig_free_bits) < bits_wanted) + return -ENOSPC; + + /* ->bg_contig_free_bits may un-initialized, so compare again */ + if (le16_to_cpu(gd->bg_free_bits_count) >= bits_wanted) { max_bits = le16_to_cpu(gd->bg_bits); /* Tail groups in cluster bitmaps which aren't cpg @@ -1530,13 +1591,6 @@ static int ocfs2_cluster_group_search(struct inode *inode, * of bits. */ if (min_bits <= res->sr_bits) search = 0; /* success */ - else if (res->sr_bits) { - /* - * Don't show bits which we'll be returning - * for allocation to the local alloc bitmap. - */ - ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits); - } } return search; @@ -1555,7 +1609,7 @@ static int ocfs2_block_group_search(struct inode *inode, BUG_ON(min_bits != 1); BUG_ON(ocfs2_is_cluster_bitmap(inode)); - if (bg->bg_free_bits_count) { + if (le16_to_cpu(bg->bg_free_bits_count) >= bits_wanted) { ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), group_bh, bits_wanted, le16_to_cpu(bg->bg_bits), @@ -1715,7 +1769,8 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, } ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, - res->sr_bit_offset, res->sr_bits); + res->sr_bit_offset, res->sr_bits, + res->sr_max_contig_bits, 0); if (ret < 0) { ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh, res->sr_bits, @@ -1741,6 +1796,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, { int status; u16 chain; + u32 contig_bits; u64 next_group; struct inode *alloc_inode = ac->ac_inode; struct buffer_head *group_bh = NULL; @@ -1766,10 +1822,21 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, status = -ENOSPC; /* for now, the chain search is a bit simplistic. We just use * the 1st group with any empty bits. */ - while ((status = ac->ac_group_search(alloc_inode, group_bh, - bits_wanted, min_bits, - ac->ac_max_block, - res)) == -ENOSPC) { + while (1) { + if (ac->ac_which == OCFS2_AC_USE_MAIN_DISCONTIG) { + contig_bits = le16_to_cpu(bg->bg_contig_free_bits); + if (!contig_bits) + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (bits_wanted > contig_bits && contig_bits >= min_bits) + bits_wanted = contig_bits; + } + + status = ac->ac_group_search(alloc_inode, group_bh, + bits_wanted, min_bits, + ac->ac_max_block, res); + if (status != -ENOSPC) + break; if (!bg->bg_next_group) break; @@ -1849,7 +1916,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, bg, group_bh, res->sr_bit_offset, - res->sr_bits); + res->sr_bits, + res->sr_max_contig_bits, + 0); if (status < 0) { ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh, res->sr_bits, chain); @@ -1927,6 +1996,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, victim = ocfs2_find_victim_chain(cl); ac->ac_chain = victim; +search: status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, res, &bits_left); if (!status) { @@ -1951,7 +2021,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) { if (i == victim) continue; - if (!cl->cl_recs[i].c_free) + if (le32_to_cpu(cl->cl_recs[i].c_free) < bits_wanted) continue; ac->ac_chain = i; @@ -1967,6 +2037,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac, } } + /* Chains can't supply the bits_wanted contiguous space. + * We should switch to using every single bit when allocating + * from the global bitmap. */ + if (i == le16_to_cpu(cl->cl_next_free_rec) && + status == -ENOSPC && ac->ac_which == OCFS2_AC_USE_MAIN) { + ac->ac_which = OCFS2_AC_USE_MAIN_DISCONTIG; + ac->ac_chain = victim; + goto search; + } + set_hint: if (status != -ENOSPC) { /* If the next search of this group is not likely to @@ -2163,7 +2243,9 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle, bg, bg_bh, res->sr_bit_offset, - res->sr_bits); + res->sr_bits, + res->sr_max_contig_bits, + 0); if (ret < 0) { ocfs2_rollback_alloc_dinode_counts(ac->ac_inode, ac->ac_bh, res->sr_bits, chain); @@ -2308,7 +2390,8 @@ int __ocfs2_claim_clusters(handle_t *handle, BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL - && ac->ac_which != OCFS2_AC_USE_MAIN); + && ac->ac_which != OCFS2_AC_USE_MAIN + && ac->ac_which != OCFS2_AC_USE_MAIN_DISCONTIG); if (ac->ac_which == OCFS2_AC_USE_LOCAL) { WARN_ON(min_clusters > 1); @@ -2382,11 +2465,13 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, struct buffer_head *group_bh, unsigned int bit_off, unsigned int num_bits, + unsigned int max_contig_bits, void (*undo_fn)(unsigned int bit, unsigned long *bmap)) { int status; unsigned int tmp; + u16 contig_bits; struct ocfs2_group_desc *undo_bg = NULL; struct journal_head *jh; @@ -2433,6 +2518,20 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, num_bits); } + /* + * TODO: even 'num_bits == 1' (the worst case, release 1 cluster), + * we still need to rescan whole bitmap. + */ + if (ocfs2_is_cluster_bitmap(alloc_inode)) { + contig_bits = ocfs2_find_max_contig_free_bits(bg->bg_bitmap, + le16_to_cpu(bg->bg_bits), 0); + if (contig_bits > max_contig_bits) + max_contig_bits = contig_bits; + bg->bg_contig_free_bits = cpu_to_le16(max_contig_bits); + } else { + bg->bg_contig_free_bits = 0; + } + if (undo_fn) spin_unlock(&jh->b_state_lock); @@ -2459,6 +2558,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, struct ocfs2_chain_list *cl = &fe->id2.i_chain; struct buffer_head *group_bh = NULL; struct ocfs2_group_desc *group; + __le16 old_bg_contig_free_bits = 0; /* The alloc_bh comes from ocfs2_free_dinode() or * ocfs2_free_clusters(). The callers have all locked the @@ -2483,9 +2583,11 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits)); + if (ocfs2_is_cluster_bitmap(alloc_inode)) + old_bg_contig_free_bits = group->bg_contig_free_bits; status = ocfs2_block_group_clear_bits(handle, alloc_inode, group, group_bh, - start_bit, count, undo_fn); + start_bit, count, 0, undo_fn); if (status < 0) { mlog_errno(status); goto bail; @@ -2496,7 +2598,8 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle, if (status < 0) { mlog_errno(status); ocfs2_block_group_set_bits(handle, alloc_inode, group, group_bh, - start_bit, count); + start_bit, count, + le16_to_cpu(old_bg_contig_free_bits), 1); goto bail; } diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 9c74eace3adc..bcf2ed4a8631 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -29,6 +29,7 @@ struct ocfs2_alloc_context { #define OCFS2_AC_USE_MAIN 2 #define OCFS2_AC_USE_INODE 3 #define OCFS2_AC_USE_META 4 +#define OCFS2_AC_USE_MAIN_DISCONTIG 5 u32 ac_which; /* these are used by the chain search */ @@ -79,12 +80,16 @@ void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, struct buffer_head *di_bh, u32 num_bits, u16 chain); +u16 ocfs2_find_max_contig_free_bits(void *bitmap, + u16 total_bits, u16 start); int ocfs2_block_group_set_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, struct buffer_head *group_bh, unsigned int bit_off, - unsigned int num_bits); + unsigned int num_bits, + unsigned int max_contig_bits, + int fastpath); int ocfs2_claim_metadata(handle_t *handle, struct ocfs2_alloc_context *ac, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 9f6bbb4a0844..868ccdf44738 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1571,15 +1571,13 @@ static int __init ocfs2_init(void) ocfs2_set_locking_protocol(); - status = register_quota_format(&ocfs2_quota_format); - if (status < 0) - goto out3; + register_quota_format(&ocfs2_quota_format); + status = register_filesystem(&ocfs2_fs_type); if (!status) return 0; unregister_quota_format(&ocfs2_quota_format); -out3: debugfs_remove(ocfs2_debugfs_root); ocfs2_free_mem_caches(); out2: @@ -1708,18 +1706,17 @@ static int ocfs2_initialize_mem_caches(void) sizeof(struct ocfs2_inode_info), 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD|SLAB_ACCOUNT), + SLAB_ACCOUNT), ocfs2_inode_init_once); ocfs2_dquot_cachep = kmem_cache_create("ocfs2_dquot_cache", sizeof(struct ocfs2_dquot), 0, - (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| - SLAB_MEM_SPREAD), + SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT, NULL); ocfs2_qf_chunk_cachep = kmem_cache_create("ocfs2_qf_chunk_cache", sizeof(struct ocfs2_quota_chunk), 0, - (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), + SLAB_RECLAIM_ACCOUNT, NULL); if (!ocfs2_inode_cachep || !ocfs2_dquot_cachep || !ocfs2_qf_chunk_cachep) { @@ -1870,6 +1867,9 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) /* Orphan scan should be stopped as early as possible */ ocfs2_orphan_scan_stop(osb); + /* Stop quota recovery so that we can disable quotas */ + ocfs2_recovery_disable_quota(osb); + ocfs2_disable_quotas(osb); /* All dquots should be freed by now */ @@ -2029,8 +2029,8 @@ static int ocfs2_initialize_super(struct super_block *sb, cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits); bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits); sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits); - memcpy(&sb->s_uuid, di->id2.i_super.s_uuid, - sizeof(di->id2.i_super.s_uuid)); + super_set_uuid(sb, di->id2.i_super.s_uuid, + sizeof(di->id2.i_super.s_uuid)); osb->osb_dx_mask = (1 << (cbits - bbits)) - 1; @@ -2343,7 +2343,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, mlog(ML_ERROR, "found superblock with incorrect block " "size bits: found %u, should be 9, 10, 11, or 12\n", blksz_bits); - } else if ((1 << le32_to_cpu(blksz_bits)) != blksz) { + } else if ((1 << blksz_bits) != blksz) { mlog(ML_ERROR, "found superblock with incorrect block " "size: found %u, should be %u\n", 1 << blksz_bits, blksz); } else if (le16_to_cpu(di->id2.i_super.s_major_rev_level) != @@ -2363,8 +2363,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di, (unsigned long long)bh->b_blocknr); } else if (le32_to_cpu(di->id2.i_super.s_clustersize_bits) < 12 || le32_to_cpu(di->id2.i_super.s_clustersize_bits) > 20) { - mlog(ML_ERROR, "bad cluster size found: %u\n", - 1 << le32_to_cpu(di->id2.i_super.s_clustersize_bits)); + mlog(ML_ERROR, "bad cluster size bit found: %u\n", + le32_to_cpu(di->id2.i_super.s_clustersize_bits)); } else if (!le64_to_cpu(di->id2.i_super.s_root_blkno)) { mlog(ML_ERROR, "bad root_blkno: 0\n"); } else if (!le64_to_cpu(di->id2.i_super.s_system_dir_blkno)) { diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index d4c5fdcfa1e4..f5cf2255dc09 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -65,7 +65,7 @@ static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio) if (status < 0) { mlog_errno(status); - return status; + goto out; } fe = (struct ocfs2_dinode *) bh->b_data; @@ -76,9 +76,10 @@ static int ocfs2_fast_symlink_read_folio(struct file *f, struct folio *folio) memcpy(kaddr, link, len + 1); kunmap_atomic(kaddr); SetPageUptodate(page); +out: unlock_page(page); brelse(bh); - return 0; + return status; } const struct address_space_operations ocfs2_fast_symlink_aops = { diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index c2268b9e20a6..73a6f6fd8a8e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -87,14 +87,14 @@ static struct ocfs2_xattr_def_value_root def_xv = { .xv.xr_list.l_count = cpu_to_le16(1), }; -const struct xattr_handler *ocfs2_xattr_handlers[] = { +const struct xattr_handler * const ocfs2_xattr_handlers[] = { &ocfs2_xattr_user_handler, &ocfs2_xattr_trusted_handler, &ocfs2_xattr_security_handler, NULL }; -static const struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { +static const struct xattr_handler * const ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = { [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, @@ -4166,15 +4166,6 @@ static int cmp_xe(const void *a, const void *b) return 0; } -static void swap_xe(void *a, void *b, int size) -{ - struct ocfs2_xattr_entry *l = a, *r = b, tmp; - - tmp = *l; - memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); - memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); -} - /* * When the ocfs2_xattr_block is filled up, new bucket will be created * and all the xattr entries will be moved to the new bucket. @@ -4240,7 +4231,7 @@ static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, trace_ocfs2_cp_xattr_block_to_bucket_end(offset, size, off_change); sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), - cmp_xe, swap_xe); + cmp_xe, NULL); } /* @@ -4435,7 +4426,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, */ sort(entries, le16_to_cpu(xh->xh_count), sizeof(struct ocfs2_xattr_entry), - cmp_xe_offset, swap_xe); + cmp_xe_offset, NULL); /* Move all name/values to the end of the bucket. */ xe = xh->xh_entries; @@ -4477,7 +4468,7 @@ static int ocfs2_defrag_xattr_bucket(struct inode *inode, /* sort the entries by their name_hash. */ sort(entries, le16_to_cpu(xh->xh_count), sizeof(struct ocfs2_xattr_entry), - cmp_xe, swap_xe); + cmp_xe, NULL); buf = bucket_buf; for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize) diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h index 00308b57f64f..65e9aa743919 100644 --- a/fs/ocfs2/xattr.h +++ b/fs/ocfs2/xattr.h @@ -30,7 +30,7 @@ struct ocfs2_security_xattr_info { extern const struct xattr_handler ocfs2_xattr_user_handler; extern const struct xattr_handler ocfs2_xattr_trusted_handler; extern const struct xattr_handler ocfs2_xattr_security_handler; -extern const struct xattr_handler *ocfs2_xattr_handlers[]; +extern const struct xattr_handler * const ocfs2_xattr_handlers[]; ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); int ocfs2_xattr_get_nolock(struct inode *, struct buffer_head *, int, |