diff options
Diffstat (limited to 'fs/xfs/xfs_super.c')
-rw-r--r-- | fs/xfs/xfs_super.c | 536 |
1 files changed, 317 insertions, 219 deletions
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 102cbd606633..9a86d3ec2cb6 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -49,6 +49,28 @@ static struct kset *xfs_kset; /* top-level xfs sysfs dir */ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */ #endif +#ifdef CONFIG_HOTPLUG_CPU +static LIST_HEAD(xfs_mount_list); +static DEFINE_SPINLOCK(xfs_mount_list_lock); + +static inline void xfs_mount_list_add(struct xfs_mount *mp) +{ + spin_lock(&xfs_mount_list_lock); + list_add(&mp->m_mount_list, &xfs_mount_list); + spin_unlock(&xfs_mount_list_lock); +} + +static inline void xfs_mount_list_del(struct xfs_mount *mp) +{ + spin_lock(&xfs_mount_list_lock); + list_del(&mp->m_mount_list); + spin_unlock(&xfs_mount_list_lock); +} +#else /* !CONFIG_HOTPLUG_CPU */ +static inline void xfs_mount_list_add(struct xfs_mount *mp) {} +static inline void xfs_mount_list_del(struct xfs_mount *mp) {} +#endif + enum xfs_dax_mode { XFS_DAX_INODE = 0, XFS_DAX_ALWAYS = 1, @@ -62,15 +84,15 @@ xfs_mount_set_dax_mode( { switch (mode) { case XFS_DAX_INODE: - mp->m_flags &= ~(XFS_MOUNT_DAX_ALWAYS | XFS_MOUNT_DAX_NEVER); + mp->m_features &= ~(XFS_FEAT_DAX_ALWAYS | XFS_FEAT_DAX_NEVER); break; case XFS_DAX_ALWAYS: - mp->m_flags |= XFS_MOUNT_DAX_ALWAYS; - mp->m_flags &= ~XFS_MOUNT_DAX_NEVER; + mp->m_features |= XFS_FEAT_DAX_ALWAYS; + mp->m_features &= ~XFS_FEAT_DAX_NEVER; break; case XFS_DAX_NEVER: - mp->m_flags |= XFS_MOUNT_DAX_NEVER; - mp->m_flags &= ~XFS_MOUNT_DAX_ALWAYS; + mp->m_features |= XFS_FEAT_DAX_NEVER; + mp->m_features &= ~XFS_FEAT_DAX_ALWAYS; break; } } @@ -154,33 +176,32 @@ xfs_fs_show_options( { static struct proc_xfs_info xfs_info_set[] = { /* the few simple ones we can get from the mount struct */ - { XFS_MOUNT_IKEEP, ",ikeep" }, - { XFS_MOUNT_WSYNC, ",wsync" }, - { XFS_MOUNT_NOALIGN, ",noalign" }, - { XFS_MOUNT_SWALLOC, ",swalloc" }, - { XFS_MOUNT_NOUUID, ",nouuid" }, - { XFS_MOUNT_NORECOVERY, ",norecovery" }, - { XFS_MOUNT_ATTR2, ",attr2" }, - { XFS_MOUNT_FILESTREAMS, ",filestreams" }, - { XFS_MOUNT_GRPID, ",grpid" }, - { XFS_MOUNT_DISCARD, ",discard" }, - { XFS_MOUNT_LARGEIO, ",largeio" }, - { XFS_MOUNT_DAX_ALWAYS, ",dax=always" }, - { XFS_MOUNT_DAX_NEVER, ",dax=never" }, + { XFS_FEAT_IKEEP, ",ikeep" }, + { XFS_FEAT_WSYNC, ",wsync" }, + { XFS_FEAT_NOALIGN, ",noalign" }, + { XFS_FEAT_SWALLOC, ",swalloc" }, + { XFS_FEAT_NOUUID, ",nouuid" }, + { XFS_FEAT_NORECOVERY, ",norecovery" }, + { XFS_FEAT_ATTR2, ",attr2" }, + { XFS_FEAT_FILESTREAMS, ",filestreams" }, + { XFS_FEAT_GRPID, ",grpid" }, + { XFS_FEAT_DISCARD, ",discard" }, + { XFS_FEAT_LARGE_IOSIZE, ",largeio" }, + { XFS_FEAT_DAX_ALWAYS, ",dax=always" }, + { XFS_FEAT_DAX_NEVER, ",dax=never" }, { 0, NULL } }; struct xfs_mount *mp = XFS_M(root->d_sb); struct proc_xfs_info *xfs_infop; for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) { - if (mp->m_flags & xfs_infop->flag) + if (mp->m_features & xfs_infop->flag) seq_puts(m, xfs_infop->str); } - seq_printf(m, ",inode%d", - (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64); + seq_printf(m, ",inode%d", xfs_has_small_inums(mp) ? 32 : 64); - if (mp->m_flags & XFS_MOUNT_ALLOCSIZE) + if (xfs_has_allocsize(mp)) seq_printf(m, ",allocsize=%dk", (1 << mp->m_allocsize_log) >> 10); @@ -201,25 +222,20 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & XFS_UQUOTA_ACCT) { - if (mp->m_qflags & XFS_UQUOTA_ENFD) - seq_puts(m, ",usrquota"); - else - seq_puts(m, ",uqnoenforce"); - } + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else if (mp->m_qflags & XFS_UQUOTA_ACCT) + seq_puts(m, ",uqnoenforce"); - if (mp->m_qflags & XFS_PQUOTA_ACCT) { - if (mp->m_qflags & XFS_PQUOTA_ENFD) - seq_puts(m, ",prjquota"); - else - seq_puts(m, ",pqnoenforce"); - } - if (mp->m_qflags & XFS_GQUOTA_ACCT) { - if (mp->m_qflags & XFS_GQUOTA_ENFD) - seq_puts(m, ",grpquota"); - else - seq_puts(m, ",gqnoenforce"); - } + if (mp->m_qflags & XFS_PQUOTA_ENFD) + seq_puts(m, ",prjquota"); + else if (mp->m_qflags & XFS_PQUOTA_ACCT) + seq_puts(m, ",pqnoenforce"); + + if (mp->m_qflags & XFS_GQUOTA_ENFD) + seq_puts(m, ",grpquota"); + else if (mp->m_qflags & XFS_GQUOTA_ACCT) + seq_puts(m, ",gqnoenforce"); if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT)) seq_puts(m, ",noquota"); @@ -230,11 +246,11 @@ xfs_fs_show_options( /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically - * whether or not XFS_MOUNT_SMALL_INUMS is set. + * whether or not XFS_FEAT_SMALL_INUMS is set. * * Inode allocation patterns are altered only if inode32 is requested - * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large. - * If altered, XFS_MOUNT_32BITINODES is set as well. + * (XFS_FEAT_SMALL_INUMS), and the filesystem is sufficiently large. + * If altered, XFS_OPSTATE_INODE32 is set as well. * * An agcount independent of that in the mount structure is provided * because in the growfs case, mp->m_sb.sb_agcount is not yet updated @@ -276,13 +292,13 @@ xfs_set_inode_alloc( /* * If user asked for no more than 32-bit inodes, and the fs is - * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter + * sufficiently large, set XFS_OPSTATE_INODE32 if we must alter * the allocator to accommodate the request. */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32) - mp->m_flags |= XFS_MOUNT_32BITINODES; + if (xfs_has_small_inums(mp) && ino > XFS_MAXINUMBER_32) + set_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); else - mp->m_flags &= ~XFS_MOUNT_32BITINODES; + clear_bit(XFS_OPSTATE_INODE32, &mp->m_opstate); for (index = 0; index < agcount; index++) { struct xfs_perag *pag; @@ -291,7 +307,7 @@ xfs_set_inode_alloc( pag = xfs_perag_get(mp, index); - if (mp->m_flags & XFS_MOUNT_32BITINODES) { + if (xfs_is_inode32(mp)) { if (ino > XFS_MAXINUMBER_32) { pag->pagi_inodeok = 0; pag->pagf_metadata = 0; @@ -311,7 +327,7 @@ xfs_set_inode_alloc( xfs_perag_put(pag); } - return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount; + return xfs_is_inode32(mp) ? maxagi : agcount; } STATIC int @@ -468,7 +484,7 @@ xfs_setup_devices( if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { unsigned int log_sector_size = BBSIZE; - if (xfs_sb_version_hassector(&mp->m_sb)) + if (xfs_has_sector(mp)) log_sector_size = mp->m_sb.sb_logsectsize; error = xfs_setsize_buftarg(mp->m_logdev_targp, log_sector_size); @@ -501,37 +517,37 @@ xfs_init_mount_workqueues( if (!mp->m_unwritten_workqueue) goto out_destroy_buf; - mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s", - XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_UNBOUND), - 0, mp->m_super->s_id); - if (!mp->m_cil_workqueue) - goto out_destroy_unwritten; - mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); if (!mp->m_reclaim_workqueue) - goto out_destroy_cil; + goto out_destroy_unwritten; - mp->m_gc_workqueue = alloc_workqueue("xfs-gc/%s", - WQ_SYSFS | WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM, + mp->m_blockgc_wq = alloc_workqueue("xfs-blockgc/%s", + XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM), 0, mp->m_super->s_id); - if (!mp->m_gc_workqueue) + if (!mp->m_blockgc_wq) goto out_destroy_reclaim; + mp->m_inodegc_wq = alloc_workqueue("xfs-inodegc/%s", + XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM), + 1, mp->m_super->s_id); + if (!mp->m_inodegc_wq) + goto out_destroy_blockgc; + mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", XFS_WQFLAGS(WQ_FREEZABLE), 0, mp->m_super->s_id); if (!mp->m_sync_workqueue) - goto out_destroy_eofb; + goto out_destroy_inodegc; return 0; -out_destroy_eofb: - destroy_workqueue(mp->m_gc_workqueue); +out_destroy_inodegc: + destroy_workqueue(mp->m_inodegc_wq); +out_destroy_blockgc: + destroy_workqueue(mp->m_blockgc_wq); out_destroy_reclaim: destroy_workqueue(mp->m_reclaim_workqueue); -out_destroy_cil: - destroy_workqueue(mp->m_cil_workqueue); out_destroy_unwritten: destroy_workqueue(mp->m_unwritten_workqueue); out_destroy_buf: @@ -545,9 +561,9 @@ xfs_destroy_mount_workqueues( struct xfs_mount *mp) { destroy_workqueue(mp->m_sync_workqueue); - destroy_workqueue(mp->m_gc_workqueue); + destroy_workqueue(mp->m_blockgc_wq); + destroy_workqueue(mp->m_inodegc_wq); destroy_workqueue(mp->m_reclaim_workqueue); - destroy_workqueue(mp->m_cil_workqueue); destroy_workqueue(mp->m_unwritten_workqueue); destroy_workqueue(mp->m_buf_workqueue); } @@ -596,32 +612,6 @@ xfs_fs_alloc_inode( return NULL; } -#ifdef DEBUG -static void -xfs_check_delalloc( - struct xfs_inode *ip, - int whichfork) -{ - struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); - struct xfs_bmbt_irec got; - struct xfs_iext_cursor icur; - - if (!ifp || !xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got)) - return; - do { - if (isnullstartblock(got.br_startblock)) { - xfs_warn(ip->i_mount, - "ino %llx %s fork has delalloc extent at [0x%llx:0x%llx]", - ip->i_ino, - whichfork == XFS_DATA_FORK ? "data" : "cow", - got.br_startoff, got.br_blockcount); - } - } while (xfs_iext_next_extent(ifp, &icur, &got)); -} -#else -#define xfs_check_delalloc(ip, whichfork) do { } while (0) -#endif - /* * Now that the generic code is guaranteed not to be accessing * the linux inode, we can inactivate and reclaim the inode. @@ -637,30 +627,6 @@ xfs_fs_destroy_inode( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); XFS_STATS_INC(ip->i_mount, vn_rele); XFS_STATS_INC(ip->i_mount, vn_remove); - - xfs_inactive(ip); - - if (!XFS_FORCED_SHUTDOWN(ip->i_mount) && ip->i_delayed_blks) { - xfs_check_delalloc(ip, XFS_DATA_FORK); - xfs_check_delalloc(ip, XFS_COW_FORK); - ASSERT(0); - } - - XFS_STATS_INC(ip->i_mount, vn_reclaim); - - /* - * We should never get here with one of the reclaim flags already set. - */ - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE)); - ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); - - /* - * We always use background reclaim here because even if the inode is - * clean, it still may be under IO and hence we have wait for IO - * completion to occur before we can reclaim the inode. The background - * reclaim path handles this more efficiently than we can here, so - * simply let background reclaim tear down all inodes. - */ xfs_inode_mark_reclaimable(ip); } @@ -732,7 +698,7 @@ xfs_fs_drop_inode( * that. See the comment for this inode flag. */ if (ip->i_flags & XFS_IRECOVERY) { - ASSERT(ip->i_mount->m_log->l_flags & XLOG_RECOVERY_NEEDED); + ASSERT(xlog_recovery_needed(ip->i_mount->m_log)); return 0; } @@ -755,6 +721,8 @@ xfs_fs_sync_fs( { struct xfs_mount *mp = XFS_M(sb); + trace_xfs_fs_sync_fs(mp, __return_address); + /* * Doing anything during the async pass would be counterproductive. */ @@ -771,6 +739,25 @@ xfs_fs_sync_fs( flush_delayed_work(&mp->m_log->l_work); } + /* + * If we are called with page faults frozen out, it means we are about + * to freeze the transaction subsystem. Take the opportunity to shut + * down inodegc because once SB_FREEZE_FS is set it's too late to + * prevent inactivation races with freeze. The fs doesn't get called + * again by the freezing process until after SB_FREEZE_FS has been set, + * so it's now or never. Same logic applies to speculative allocation + * garbage collection. + * + * We don't care if this is a normal syncfs call that does this or + * freeze that does this - we can run this multiple times without issue + * and we won't race with a restart because a restart can only occur + * when the state is either SB_FREEZE_FS or SB_FREEZE_COMPLETE. + */ + if (sb->s_writers.frozen == SB_FREEZE_PAGEFAULT) { + xfs_inodegc_stop(mp); + xfs_blockgc_stop(mp); + } + return 0; } @@ -789,6 +776,9 @@ xfs_fs_statfs( xfs_extlen_t lsize; int64_t ffree; + /* Wait for whatever inactivations are in progress. */ + xfs_inodegc_flush(mp); + statp->f_type = XFS_SUPER_MAGIC; statp->f_namelen = MAXNAMELEN - 1; @@ -884,10 +874,22 @@ xfs_fs_freeze( * set a GFP_NOFS context here to avoid recursion deadlocks. */ flags = memalloc_nofs_save(); - xfs_blockgc_stop(mp); xfs_save_resvblks(mp); ret = xfs_log_quiesce(mp); memalloc_nofs_restore(flags); + + /* + * For read-write filesystems, we need to restart the inodegc on error + * because we stopped it at SB_FREEZE_PAGEFAULT level and a thaw is not + * going to be run to restart it now. We are at SB_FREEZE_FS level + * here, so we can restart safely without racing with a stop in + * xfs_fs_sync_fs(). + */ + if (ret && !xfs_is_readonly(mp)) { + xfs_blockgc_start(mp); + xfs_inodegc_start(mp); + } + return ret; } @@ -899,7 +901,18 @@ xfs_fs_unfreeze( xfs_restore_resvblks(mp); xfs_log_work_queue(mp); - xfs_blockgc_start(mp); + + /* + * Don't reactivate the inodegc worker on a readonly filesystem because + * inodes are sent directly to reclaim. Don't reactivate the blockgc + * worker because there are no speculative preallocations on a readonly + * filesystem. + */ + if (!xfs_is_readonly(mp)) { + xfs_blockgc_start(mp); + xfs_inodegc_start(mp); + } + return 0; } @@ -911,10 +924,8 @@ STATIC int xfs_finish_flags( struct xfs_mount *mp) { - int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - /* Fail a mount where the logbuf is smaller than the log stripe */ - if (xfs_sb_version_haslogv2(&mp->m_sb)) { + if (xfs_has_logv2(mp)) { if (mp->m_logbsize <= 0 && mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; @@ -936,33 +947,24 @@ xfs_finish_flags( /* * V5 filesystems always use attr2 format for attributes. */ - if (xfs_sb_version_hascrc(&mp->m_sb) && - (mp->m_flags & XFS_MOUNT_NOATTR2)) { + if (xfs_has_crc(mp) && xfs_has_noattr2(mp)) { xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. " "attr2 is always enabled for V5 filesystems."); return -EINVAL; } /* - * mkfs'ed attr2 will turn on attr2 mount unless explicitly - * told by noattr2 to turn it off - */ - if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(mp->m_flags & XFS_MOUNT_NOATTR2)) - mp->m_flags |= XFS_MOUNT_ATTR2; - - /* * prohibit r/w mounts of read-only filesystems */ - if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { + if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !xfs_is_readonly(mp)) { xfs_warn(mp, "cannot mount a read-only filesystem as read-write"); return -EROFS; } - if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && - (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE)) && - !xfs_sb_version_has_pquotino(&mp->m_sb)) { + if ((mp->m_qflags & XFS_GQUOTA_ACCT) && + (mp->m_qflags & XFS_PQUOTA_ACCT) && + !xfs_has_pquotino(mp)) { xfs_warn(mp, "Super block does not support project and group quota together"); return -EINVAL; @@ -1020,11 +1022,40 @@ xfs_destroy_percpu_counters( percpu_counter_destroy(&mp->m_icount); percpu_counter_destroy(&mp->m_ifree); percpu_counter_destroy(&mp->m_fdblocks); - ASSERT(XFS_FORCED_SHUTDOWN(mp) || + ASSERT(xfs_is_shutdown(mp) || percpu_counter_sum(&mp->m_delalloc_blks) == 0); percpu_counter_destroy(&mp->m_delalloc_blks); } +static int +xfs_inodegc_init_percpu( + struct xfs_mount *mp) +{ + struct xfs_inodegc *gc; + int cpu; + + mp->m_inodegc = alloc_percpu(struct xfs_inodegc); + if (!mp->m_inodegc) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + gc = per_cpu_ptr(mp->m_inodegc, cpu); + init_llist_head(&gc->list); + gc->items = 0; + INIT_WORK(&gc->work, xfs_inodegc_worker); + } + return 0; +} + +static void +xfs_inodegc_free_percpu( + struct xfs_mount *mp) +{ + if (!mp->m_inodegc) + return; + free_percpu(mp->m_inodegc); +} + static void xfs_fs_put_super( struct super_block *sb) @@ -1041,6 +1072,8 @@ xfs_fs_put_super( xfs_freesb(mp); free_percpu(mp->m_stats.xs_stats); + xfs_mount_list_del(mp); + xfs_inodegc_free_percpu(mp); xfs_destroy_percpu_counters(mp); xfs_destroy_mount_workqueues(mp); xfs_close_devices(mp); @@ -1129,7 +1162,7 @@ xfs_fs_warn_deprecated( * already had the flag set */ if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) && - !!(XFS_M(fc->root->d_sb)->m_flags & flag) == value) + !!(XFS_M(fc->root->d_sb)->m_features & flag) == value) return; xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key); } @@ -1177,27 +1210,27 @@ xfs_fs_parse_param( if (suffix_kstrtoint(param->string, 10, &size)) return -EINVAL; parsing_mp->m_allocsize_log = ffs(size) - 1; - parsing_mp->m_flags |= XFS_MOUNT_ALLOCSIZE; + parsing_mp->m_features |= XFS_FEAT_ALLOCSIZE; return 0; case Opt_grpid: case Opt_bsdgroups: - parsing_mp->m_flags |= XFS_MOUNT_GRPID; + parsing_mp->m_features |= XFS_FEAT_GRPID; return 0; case Opt_nogrpid: case Opt_sysvgroups: - parsing_mp->m_flags &= ~XFS_MOUNT_GRPID; + parsing_mp->m_features &= ~XFS_FEAT_GRPID; return 0; case Opt_wsync: - parsing_mp->m_flags |= XFS_MOUNT_WSYNC; + parsing_mp->m_features |= XFS_FEAT_WSYNC; return 0; case Opt_norecovery: - parsing_mp->m_flags |= XFS_MOUNT_NORECOVERY; + parsing_mp->m_features |= XFS_FEAT_NORECOVERY; return 0; case Opt_noalign: - parsing_mp->m_flags |= XFS_MOUNT_NOALIGN; + parsing_mp->m_features |= XFS_FEAT_NOALIGN; return 0; case Opt_swalloc: - parsing_mp->m_flags |= XFS_MOUNT_SWALLOC; + parsing_mp->m_features |= XFS_FEAT_SWALLOC; return 0; case Opt_sunit: parsing_mp->m_dalign = result.uint_32; @@ -1206,62 +1239,58 @@ xfs_fs_parse_param( parsing_mp->m_swidth = result.uint_32; return 0; case Opt_inode32: - parsing_mp->m_flags |= XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_features |= XFS_FEAT_SMALL_INUMS; return 0; case Opt_inode64: - parsing_mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; + parsing_mp->m_features &= ~XFS_FEAT_SMALL_INUMS; return 0; case Opt_nouuid: - parsing_mp->m_flags |= XFS_MOUNT_NOUUID; + parsing_mp->m_features |= XFS_FEAT_NOUUID; return 0; case Opt_largeio: - parsing_mp->m_flags |= XFS_MOUNT_LARGEIO; + parsing_mp->m_features |= XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_nolargeio: - parsing_mp->m_flags &= ~XFS_MOUNT_LARGEIO; + parsing_mp->m_features &= ~XFS_FEAT_LARGE_IOSIZE; return 0; case Opt_filestreams: - parsing_mp->m_flags |= XFS_MOUNT_FILESTREAMS; + parsing_mp->m_features |= XFS_FEAT_FILESTREAMS; return 0; case Opt_noquota: parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD; - parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE; return 0; case Opt_quota: case Opt_uquota: case Opt_usrquota: - parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | - XFS_UQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD); return 0; case Opt_qnoenforce: case Opt_uqnoenforce: - parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_UQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD; return 0; case Opt_pquota: case Opt_prjquota: - parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | - XFS_PQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD); return 0; case Opt_pqnoenforce: - parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_PQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD; return 0; case Opt_gquota: case Opt_grpquota: - parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | - XFS_GQUOTA_ENFD); + parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD); return 0; case Opt_gqnoenforce: - parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + parsing_mp->m_qflags |= XFS_GQUOTA_ACCT; parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD; return 0; case Opt_discard: - parsing_mp->m_flags |= XFS_MOUNT_DISCARD; + parsing_mp->m_features |= XFS_FEAT_DISCARD; return 0; case Opt_nodiscard: - parsing_mp->m_flags &= ~XFS_MOUNT_DISCARD; + parsing_mp->m_features &= ~XFS_FEAT_DISCARD; return 0; #ifdef CONFIG_FS_DAX case Opt_dax: @@ -1273,21 +1302,20 @@ xfs_fs_parse_param( #endif /* Following mount options will be removed in September 2025 */ case Opt_ikeep: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, true); - parsing_mp->m_flags |= XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, true); + parsing_mp->m_features |= XFS_FEAT_IKEEP; return 0; case Opt_noikeep: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_IKEEP, false); - parsing_mp->m_flags &= ~XFS_MOUNT_IKEEP; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_IKEEP, false); + parsing_mp->m_features &= ~XFS_FEAT_IKEEP; return 0; case Opt_attr2: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_ATTR2, true); - parsing_mp->m_flags |= XFS_MOUNT_ATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_ATTR2, true); + parsing_mp->m_features |= XFS_FEAT_ATTR2; return 0; case Opt_noattr2: - xfs_fs_warn_deprecated(fc, param, XFS_MOUNT_NOATTR2, true); - parsing_mp->m_flags &= ~XFS_MOUNT_ATTR2; - parsing_mp->m_flags |= XFS_MOUNT_NOATTR2; + xfs_fs_warn_deprecated(fc, param, XFS_FEAT_NOATTR2, true); + parsing_mp->m_features |= XFS_FEAT_NOATTR2; return 0; default: xfs_warn(parsing_mp, "unknown mount option [%s].", param->key); @@ -1301,17 +1329,23 @@ static int xfs_fs_validate_params( struct xfs_mount *mp) { + /* No recovery flag requires a read-only mount */ + if (xfs_has_norecovery(mp) && !xfs_is_readonly(mp)) { + xfs_warn(mp, "no-recovery mounts must be read-only."); + return -EINVAL; + } + /* - * no recovery flag requires a read-only mount + * We have not read the superblock at this point, so only the attr2 + * mount option can set the attr2 feature by this stage. */ - if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && - !(mp->m_flags & XFS_MOUNT_RDONLY)) { - xfs_warn(mp, "no-recovery mounts must be read-only."); + if (xfs_has_attr2(mp) && xfs_has_noattr2(mp)) { + xfs_warn(mp, "attr2 and noattr2 cannot both be specified."); return -EINVAL; } - if ((mp->m_flags & XFS_MOUNT_NOALIGN) && - (mp->m_dalign || mp->m_swidth)) { + + if (xfs_has_noalign(mp) && (mp->m_dalign || mp->m_swidth)) { xfs_warn(mp, "sunit and swidth options incompatible with the noalign option"); return -EINVAL; @@ -1355,7 +1389,7 @@ xfs_fs_validate_params( return -EINVAL; } - if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) && + if (xfs_has_allocsize(mp) && (mp->m_allocsize_log > XFS_MAX_IO_LOG || mp->m_allocsize_log < XFS_MIN_IO_LOG)) { xfs_warn(mp, "invalid log iosize: %d [not %d-%d]", @@ -1416,11 +1450,22 @@ xfs_fs_fill_super( if (error) goto out_destroy_workqueues; + error = xfs_inodegc_init_percpu(mp); + if (error) + goto out_destroy_counters; + + /* + * All percpu data structures requiring cleanup when a cpu goes offline + * must be allocated before adding this @mp to the cpu-dead handler's + * mount list. + */ + xfs_mount_list_add(mp); + /* Allocate stats memory before we do operations that might use it */ mp->m_stats.xs_stats = alloc_percpu(struct xfsstats); if (!mp->m_stats.xs_stats) { error = -ENOMEM; - goto out_destroy_counters; + goto out_destroy_inodegc; } error = xfs_readsb(mp, flags); @@ -1436,7 +1481,7 @@ xfs_fs_fill_super( goto out_free_sb; /* V4 support is undergoing deprecation. */ - if (!xfs_sb_version_hascrc(&mp->m_sb)) { + if (!xfs_has_crc(mp)) { #ifdef CONFIG_XFS_SUPPORT_V4 xfs_warn_once(mp, "Deprecated V4 format (crc=0) will not be supported after September 2030."); @@ -1449,7 +1494,7 @@ xfs_fs_fill_super( } /* Filesystem claims it needs repair, so refuse the mount. */ - if (xfs_sb_version_needsrepair(&mp->m_sb)) { + if (xfs_has_needsrepair(mp)) { xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); error = -EFSCORRUPTED; goto out_free_sb; @@ -1521,7 +1566,7 @@ xfs_fs_fill_super( sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_max_links = XFS_MAXLINK; sb->s_time_gran = 1; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) { + if (xfs_has_bigtime(mp)) { sb->s_time_min = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MIN); sb->s_time_max = xfs_bigtime_to_unix(XFS_BIGTIME_TIME_MAX); } else { @@ -1534,14 +1579,10 @@ xfs_fs_fill_super( set_posix_acl_flag(sb); /* version 5 superblocks support inode version counters. */ - if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + if (xfs_has_crc(mp)) sb->s_flags |= SB_I_VERSION; - if (xfs_sb_version_hasbigtime(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL big timestamp feature in use. Use at your own risk!"); - - if (mp->m_flags & XFS_MOUNT_DAX_ALWAYS) { + if (xfs_has_dax_always(mp)) { bool rtdev_is_dax = false, datadev_is_dax; xfs_warn(mp, @@ -1557,7 +1598,7 @@ xfs_fs_fill_super( "DAX unsupported by block device. Turning off DAX."); xfs_mount_set_dax_mode(mp, XFS_DAX_NEVER); } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { xfs_alert(mp, "DAX and reflink cannot be used together!"); error = -EINVAL; @@ -1565,17 +1606,17 @@ xfs_fs_fill_super( } } - if (mp->m_flags & XFS_MOUNT_DISCARD) { + if (xfs_has_discard(mp)) { struct request_queue *q = bdev_get_queue(sb->s_bdev); if (!blk_queue_discard(q)) { xfs_warn(mp, "mounting with \"discard\" option, but " "the device does not support discard"); - mp->m_flags &= ~XFS_MOUNT_DISCARD; + mp->m_features &= ~XFS_FEAT_DISCARD; } } - if (xfs_sb_version_hasreflink(&mp->m_sb)) { + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, "reflink not compatible with realtime device!"); @@ -1589,17 +1630,13 @@ xfs_fs_fill_super( } } - if (xfs_sb_version_hasrmapbt(&mp->m_sb) && mp->m_sb.sb_rblocks) { + if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) { xfs_alert(mp, "reverse mapping btree not compatible with realtime device!"); error = -EINVAL; goto out_filestream_unmount; } - if (xfs_sb_version_hasinobtcounts(&mp->m_sb)) - xfs_warn(mp, - "EXPERIMENTAL inode btree counters feature in use. Use at your own risk!"); - error = xfs_mountfs(mp); if (error) goto out_filestream_unmount; @@ -1623,6 +1660,9 @@ xfs_fs_fill_super( xfs_freesb(mp); out_free_stats: free_percpu(mp->m_stats.xs_stats); + out_destroy_inodegc: + xfs_mount_list_del(mp); + xfs_inodegc_free_percpu(mp); out_destroy_counters: xfs_destroy_percpu_counters(mp); out_destroy_workqueues: @@ -1654,13 +1694,13 @@ xfs_remount_rw( struct xfs_sb *sbp = &mp->m_sb; int error; - if (mp->m_flags & XFS_MOUNT_NORECOVERY) { + if (xfs_has_norecovery(mp)) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); return -EINVAL; } - if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + if (xfs_sb_is_v5(sbp) && xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_warn(mp, "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem", @@ -1669,7 +1709,7 @@ xfs_remount_rw( return -EINVAL; } - mp->m_flags &= ~XFS_MOUNT_RDONLY; + clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* * If this is the first remount to writeable state we might have some @@ -1706,6 +1746,9 @@ xfs_remount_rw( if (error && error != -ENOSPC) return error; + /* Re-enable the background inode inactivation worker. */ + xfs_inodegc_start(mp); + return 0; } @@ -1728,6 +1771,15 @@ xfs_remount_ro( return error; } + /* + * Stop the inodegc background worker. xfs_fs_reconfigure already + * flushed all pending inodegc work when it sync'd the filesystem. + * The VFS holds s_umount, so we know that inodes cannot enter + * xfs_fs_destroy_inode during a remount operation. In readonly mode + * we send inodes straight to reclaim, so no inodes will be queued. + */ + xfs_inodegc_stop(mp); + /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { @@ -1745,7 +1797,7 @@ xfs_remount_ro( xfs_save_resvblks(mp); xfs_log_clean(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); return 0; } @@ -1768,12 +1820,11 @@ xfs_fs_reconfigure( { struct xfs_mount *mp = XFS_M(fc->root->d_sb); struct xfs_mount *new_mp = fc->s_fs_info; - xfs_sb_t *sbp = &mp->m_sb; int flags = fc->sb_flags; int error; /* version 5 superblocks always support version counters. */ - if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) + if (xfs_has_crc(mp)) fc->sb_flags |= SB_I_VERSION; error = xfs_fs_validate_params(new_mp); @@ -1783,28 +1834,26 @@ xfs_fs_reconfigure( sync_filesystem(mp->m_super); /* inode32 -> inode64 */ - if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && - !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { - mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + if (xfs_has_small_inums(mp) && !xfs_has_small_inums(new_mp)) { + mp->m_features &= ~XFS_FEAT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* inode64 -> inode32 */ - if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) && - (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) { - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); + if (!xfs_has_small_inums(mp) && xfs_has_small_inums(new_mp)) { + mp->m_features |= XFS_FEAT_SMALL_INUMS; + mp->m_maxagi = xfs_set_inode_alloc(mp, mp->m_sb.sb_agcount); } /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) { + if (xfs_is_readonly(mp) && !(flags & SB_RDONLY)) { error = xfs_remount_rw(mp); if (error) return error; } /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) { + if (!xfs_is_readonly(mp) && (flags & SB_RDONLY)) { error = xfs_remount_ro(mp); if (error) return error; @@ -1871,11 +1920,11 @@ static int xfs_init_fs_context( * Copy binary VFS mount flags we are interested in. */ if (fc->sb_flags & SB_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; + set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (fc->sb_flags & SB_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; + mp->m_features |= XFS_FEAT_DIRSYNC; if (fc->sb_flags & SB_SYNCHRONOUS) - mp->m_flags |= XFS_MOUNT_WSYNC; + mp->m_features |= XFS_FEAT_WSYNC; fc->s_fs_info = mp; fc->ops = &xfs_context_ops; @@ -2118,6 +2167,48 @@ xfs_destroy_workqueues(void) destroy_workqueue(xfs_alloc_wq); } +#ifdef CONFIG_HOTPLUG_CPU +static int +xfs_cpu_dead( + unsigned int cpu) +{ + struct xfs_mount *mp, *n; + + spin_lock(&xfs_mount_list_lock); + list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { + spin_unlock(&xfs_mount_list_lock); + xfs_inodegc_cpu_dead(mp, cpu); + spin_lock(&xfs_mount_list_lock); + } + spin_unlock(&xfs_mount_list_lock); + return 0; +} + +static int __init +xfs_cpu_hotplug_init(void) +{ + int error; + + error = cpuhp_setup_state_nocalls(CPUHP_XFS_DEAD, "xfs:dead", NULL, + xfs_cpu_dead); + if (error < 0) + xfs_alert(NULL, +"Failed to initialise CPU hotplug, error %d. XFS is non-functional.", + error); + return error; +} + +static void +xfs_cpu_hotplug_destroy(void) +{ + cpuhp_remove_state_nocalls(CPUHP_XFS_DEAD); +} + +#else /* !CONFIG_HOTPLUG_CPU */ +static inline int xfs_cpu_hotplug_init(void) { return 0; } +static inline void xfs_cpu_hotplug_destroy(void) {} +#endif + STATIC int __init init_xfs_fs(void) { @@ -2130,10 +2221,14 @@ init_xfs_fs(void) xfs_dir_startup(); - error = xfs_init_zones(); + error = xfs_cpu_hotplug_init(); if (error) goto out; + error = xfs_init_zones(); + if (error) + goto out_destroy_hp; + error = xfs_init_workqueues(); if (error) goto out_destroy_zones; @@ -2213,6 +2308,8 @@ init_xfs_fs(void) xfs_destroy_workqueues(); out_destroy_zones: xfs_destroy_zones(); + out_destroy_hp: + xfs_cpu_hotplug_destroy(); out: return error; } @@ -2235,6 +2332,7 @@ exit_xfs_fs(void) xfs_destroy_workqueues(); xfs_destroy_zones(); xfs_uuid_table_free(); + xfs_cpu_hotplug_destroy(); } module_init(init_xfs_fs); |