diff options
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 219 |
1 files changed, 118 insertions, 101 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index d11de0fa5c5f..704aaadb61cf 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -361,7 +361,7 @@ xlog_find_verify_cycle( *new_blk = -1; out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -477,7 +477,7 @@ xlog_find_verify_log_record( *last_blk = i; out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -731,7 +731,7 @@ validate_head: goto out_free_buffer; } - kmem_free(buffer); + kvfree(buffer); if (head_blk == log_bbnum) *return_head_blk = 0; else @@ -745,7 +745,7 @@ validate_head: return 0; out_free_buffer: - kmem_free(buffer); + kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to find log head"); return error; @@ -999,7 +999,7 @@ xlog_verify_tail( "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", orig_tail, *tail_blk); out: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -1046,7 +1046,7 @@ xlog_verify_head( error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, XLOG_MAX_ICLOGS, tmp_buffer, &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); - kmem_free(tmp_buffer); + kvfree(tmp_buffer); if (error < 0) return error; @@ -1177,8 +1177,8 @@ xlog_check_unmount_rec( */ xlog_assign_atomic_lsn(&log->l_tail_lsn, log->l_curr_cycle, after_umount_blk); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, - log->l_curr_cycle, after_umount_blk); + log->l_ailp->ail_head_lsn = + atomic64_read(&log->l_tail_lsn); *tail_blk = after_umount_blk; *clean = true; @@ -1212,11 +1212,7 @@ xlog_set_state( if (bump_cycle) log->l_curr_cycle++; atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); + log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn); } /* @@ -1340,7 +1336,7 @@ xlog_find_tail( * headers if we have a filesystem using non-persistent counters. */ if (clean) - set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); + xfs_set_clean(log->l_mp); /* * Make sure that there are no blocks in front of the head @@ -1365,7 +1361,7 @@ xlog_find_tail( error = xlog_clear_stale_blocks(log, tail_lsn); done: - kmem_free(buffer); + kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to locate log tail"); @@ -1399,6 +1395,7 @@ xlog_find_zeroed( xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t num_scan_bblks; int error, log_bbnum = log->l_logBBsize; + int ret = 1; *blk_no = 0; @@ -1413,8 +1410,7 @@ xlog_find_zeroed( first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; - kmem_free(buffer); - return 1; + goto out_free_buffer; } /* check partially zeroed log */ @@ -1424,8 +1420,8 @@ xlog_find_zeroed( last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ - kmem_free(buffer); - return 0; + ret = 0; + goto out_free_buffer; } /* we have a partially zeroed log */ @@ -1471,10 +1467,10 @@ xlog_find_zeroed( *blk_no = last_blk; out_free_buffer: - kmem_free(buffer); + kvfree(buffer); if (error) return error; - return 1; + return ret; } /* @@ -1583,7 +1579,7 @@ xlog_write_log_records( } out_free_buffer: - kmem_free(buffer); + kvfree(buffer); return error; } @@ -1767,6 +1763,37 @@ xlog_recover_iget( return 0; } +/* + * Get an inode so that we can recover a log operation. + * + * Log intent items that target inodes effectively contain a file handle. + * Check that the generation number matches the intent item like we do for + * other file handles. Log intent items defined after this validation weakness + * was identified must use this function. + */ +int +xlog_recover_iget_handle( + struct xfs_mount *mp, + xfs_ino_t ino, + uint32_t gen, + struct xfs_inode **ipp) +{ + struct xfs_inode *ip; + int error; + + error = xlog_recover_iget(mp, ino, &ip); + if (error) + return error; + + if (VFS_I(ip)->i_generation != gen) { + xfs_irele(ip); + return -EFSCORRUPTED; + } + + *ipp = ip; + return 0; +} + /****************************************************************************** * * Log recover routines @@ -1789,6 +1816,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { &xlog_bud_item_ops, &xlog_attri_item_ops, &xlog_attrd_item_ops, + &xlog_xmi_item_ops, + &xlog_xmd_item_ops, }; static const struct xlog_recover_item_ops * @@ -1820,7 +1849,7 @@ xlog_find_item_ops( * from the transaction. However, we can't do that until after we've * replayed all the other items because they may be dependent on the * cancelled buffer and replaying the cancelled buffer can remove it - * form the cancelled buffer table. Hence they have tobe done last. + * form the cancelled buffer table. Hence they have to be done last. * * 3. Inode allocation buffers must be replayed before inode items that * read the buffer and replay changes into it. For filesystems using the @@ -1942,11 +1971,11 @@ xlog_recover_intent_item( struct xlog *log, struct xfs_log_item *lip, xfs_lsn_t lsn, - unsigned int dfp_type) + const struct xfs_defer_op_type *ops) { ASSERT(xlog_item_is_intent(lip)); - xfs_defer_start_recovery(lip, dfp_type, &log->r_dfops); + xfs_defer_start_recovery(lip, &log->r_dfops, ops); /* * Insert the intent into the AIL directly and drop one reference so @@ -2057,7 +2086,8 @@ xlog_recover_add_item( { struct xlog_recover_item *item; - item = kmem_zalloc(sizeof(struct xlog_recover_item), 0); + item = kzalloc(sizeof(struct xlog_recover_item), + GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&item->ri_list); list_add_tail(&item->ri_list, head); } @@ -2098,7 +2128,7 @@ xlog_recover_add_to_cont_trans( old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; - ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); + ptr = kvrealloc(old_ptr, len + old_len, GFP_KERNEL); if (!ptr) return -ENOMEM; memcpy(&ptr[old_len], dp, len); @@ -2160,7 +2190,7 @@ xlog_recover_add_to_trans( return 0; } - ptr = kmem_alloc(len, 0); + ptr = xlog_kvmalloc(len); memcpy(ptr, dp, len); in_f = (struct xfs_inode_log_format *)ptr; @@ -2182,14 +2212,13 @@ xlog_recover_add_to_trans( "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); - kmem_free(ptr); + kvfree(ptr); return -EFSCORRUPTED; } item->ri_total = in_f->ilf_size; - item->ri_buf = - kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t), - 0); + item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t), + GFP_KERNEL | __GFP_NOFAIL); } if (item->ri_total <= item->ri_cnt) { @@ -2197,7 +2226,7 @@ xlog_recover_add_to_trans( "log item region count (%d) overflowed size (%d)", item->ri_cnt, item->ri_total); ASSERT(0); - kmem_free(ptr); + kvfree(ptr); return -EFSCORRUPTED; } @@ -2227,13 +2256,13 @@ xlog_recover_free_trans( /* Free the regions in the item. */ list_del(&item->ri_list); for (i = 0; i < item->ri_cnt; i++) - kmem_free(item->ri_buf[i].i_addr); + kvfree(item->ri_buf[i].i_addr); /* Free the item itself */ - kmem_free(item->ri_buf); - kmem_free(item); + kfree(item->ri_buf); + kfree(item); } /* Free the transaction recover structure */ - kmem_free(trans); + kfree(trans); } /* @@ -2332,7 +2361,7 @@ xlog_recover_ophdr_to_trans( * This is a new transaction so allocate a new recovery container to * hold the recovery ops that will follow. */ - trans = kmem_zalloc(sizeof(struct xlog_recover), 0); + trans = kzalloc(sizeof(struct xlog_recover), GFP_KERNEL | __GFP_NOFAIL); trans->r_log_tid = tid; trans->r_lsn = be64_to_cpu(rhead->h_lsn); INIT_LIST_HEAD(&trans->r_itemq); @@ -2565,17 +2594,14 @@ xlog_recover_process_intents( #endif list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { - struct xfs_log_item *lip = dfp->dfp_intent; - const struct xfs_item_ops *ops = lip->li_ops; - - ASSERT(xlog_item_is_intent(lip)); + ASSERT(xlog_item_is_intent(dfp->dfp_intent)); /* * We should never see a redo item with a LSN higher than * the last transaction we found in the log at the start * of recovery. */ - ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + ASSERT(XFS_LSN_CMP(last_lsn, dfp->dfp_intent->li_lsn) >= 0); /* * NOTE: If your intent processing routine can create more @@ -2584,16 +2610,13 @@ xlog_recover_process_intents( * replayed in the wrong order! * * The recovery function can free the log item, so we must not - * access lip after it returns. + * access dfp->dfp_intent after it returns. It must dispose of + * @dfp if it returns 0. */ - error = ops->iop_recover(dfp, &capture_list); - if (error) { - trace_xlog_intent_recovery_failed(log->l_mp, error, - ops->iop_recover); + error = xfs_defer_finish_recovery(log->l_mp, dfp, + &capture_list); + if (error) break; - } - - xfs_defer_cancel_recovery(log->l_mp, dfp); } if (error) goto err; @@ -2627,15 +2650,22 @@ xlog_recover_cancel_intents( } /* - * Transfer ownership of the recovered log intent item to the recovery - * transaction. + * Transfer ownership of the recovered pending work to the recovery transaction + * and try to finish the work. If there is more work to be done, the dfp will + * remain attached to the transaction. If not, the dfp is freed. */ -void -xlog_recover_transfer_intent( +int +xlog_recover_finish_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { - dfp->dfp_intent = NULL; + int error; + + list_move(&dfp->dfp_list, &tp->t_dfops); + error = xfs_defer_finish_one(tp, dfp); + if (error == -EAGAIN) + return 0; + return error; } /* @@ -2658,7 +2688,7 @@ xlog_recover_clear_agi_bucket( if (error) goto out_error; - error = xfs_read_agi(pag, tp, &agibp); + error = xfs_read_agi(pag, tp, 0, &agibp); if (error) goto out_abort; @@ -2774,7 +2804,7 @@ xlog_recover_iunlink_ag( int bucket; int error; - error = xfs_read_agi(pag, NULL, &agibp); + error = xfs_read_agi(pag, NULL, 0, &agibp); if (error) { /* * AGI is b0rked. Don't process it. @@ -2979,6 +3009,10 @@ xlog_do_recovery_pass( for (i = 0; i < XLOG_RHASH_SIZE; i++) INIT_HLIST_HEAD(&rhash[i]); + hbp = xlog_alloc_buffer(log, hblks); + if (!hbp) + return -ENOMEM; + /* * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. @@ -2989,10 +3023,6 @@ xlog_do_recovery_pass( * iclog header and extract the header size from it. Get a * new hbp that is the correct size. */ - hbp = xlog_alloc_buffer(log, 1); - if (!hbp) - return -ENOMEM; - error = xlog_bread(log, tail_blk, 1, hbp, &offset); if (error) goto bread_err1; @@ -3034,21 +3064,20 @@ xlog_do_recovery_pass( (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) { hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); if (hblks > 1) { - kmem_free(hbp); + kvfree(hbp); hbp = xlog_alloc_buffer(log, hblks); + if (!hbp) + return -ENOMEM; } } } else { ASSERT(log->l_sectBBsize == 1); - hbp = xlog_alloc_buffer(log, 1); h_size = XLOG_BIG_RECORD_BSIZE; } - if (!hbp) - return -ENOMEM; dbp = xlog_alloc_buffer(log, BTOBB(h_size)); if (!dbp) { - kmem_free(hbp); + kvfree(hbp); return -ENOMEM; } @@ -3209,9 +3238,9 @@ xlog_do_recovery_pass( } bread_err2: - kmem_free(dbp); + kvfree(dbp); bread_err1: - kmem_free(hbp); + kvfree(hbp); /* * Submit buffers that have been dirtied by the last record recovered. @@ -3333,14 +3362,13 @@ xlog_do_recover( /* * We now update the tail_lsn since much of the recovery has completed - * and there may be space available to use. If there were no extent - * or iunlinks, we can free up the entire log and set the tail_lsn to - * be the last_sync_lsn. This was set in xlog_find_tail to be the - * lsn of the last known good LR on disk. If there are extent frees - * or iunlinks they will have some entries in the AIL; so we look at - * the AIL to determine how to set the tail_lsn. + * and there may be space available to use. If there were no extent or + * iunlinks, we can free up the entire log. This was set in + * xlog_find_tail to be the lsn of the last known good LR on disk. If + * there are extent frees or iunlinks they will have some entries in the + * AIL; so we look at the AIL to determine how to set the tail_lsn. */ - xlog_assign_tail_lsn(mp); + xfs_ail_assign_tail_lsn(log->l_ailp); /* * Now that we've finished replaying all buffer and inode updates, @@ -3365,13 +3393,6 @@ xlog_do_recover( /* re-initialise in-core superblock and geometry structures */ mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); - error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, - &mp->m_maxagi); - if (error) { - xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); - return error; - } - mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); /* Normal transactions can now occur */ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); @@ -3470,12 +3491,19 @@ xlog_recover( * part of recovery so that the root and real-time bitmap inodes can be read in * from disk in between the two stages. This is necessary so that we can free * space in the real-time portion of the file system. + * + * We run this whole process under GFP_NOFS allocation context. We do a + * combination of non-transactional and transactional work, yet we really don't + * want to recurse into the filesystem from direct reclaim during any of this + * processing. This allows all the recovery code run here not to care about the + * memory allocation context it is running in. */ int xlog_recover_finish( struct xlog *log) { - int error; + unsigned int nofs_flags = memalloc_nofs_save(); + int error; error = xlog_recover_process_intents(log); if (error) { @@ -3489,7 +3517,7 @@ xlog_recover_finish( xlog_recover_cancel_intents(log); xfs_alert(log->l_mp, "Failed to recover intents"); xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); - return error; + goto out_error; } /* @@ -3499,21 +3527,6 @@ xlog_recover_finish( */ xfs_log_force(log->l_mp, XFS_LOG_SYNC); - /* - * Now that we've recovered the log and all the intents, we can clear - * the log incompat feature bits in the superblock because there's no - * longer anything to protect. We rely on the AIL push to write out the - * updated superblock after everything else. - */ - if (xfs_clear_incompat_log_features(log->l_mp)) { - error = xfs_sync_sb(log->l_mp, false); - if (error < 0) { - xfs_alert(log->l_mp, - "Failed to clear log incompat features on recovery"); - return error; - } - } - xlog_recover_process_iunlinks(log); /* @@ -3535,9 +3548,13 @@ xlog_recover_finish( * and AIL. */ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + error = 0; + goto out_error; } - return 0; +out_error: + memalloc_nofs_restore(nofs_flags); + return error; } void |