summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c195
1 files changed, 105 insertions, 90 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ad3d26ddfe31..503ea89e8b9a 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
STATIC int xlog_iclogs_empty(xlog_t *log);
#if defined(XFS_LOG_TRACE)
+
+#define XLOG_TRACE_LOGGRANT_SIZE 2048
+#define XLOG_TRACE_ICLOG_SIZE 256
+
+void
+xlog_trace_loggrant_alloc(xlog_t *log)
+{
+ log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS);
+}
+
+void
+xlog_trace_loggrant_dealloc(xlog_t *log)
+{
+ ktrace_free(log->l_grant_trace);
+}
+
void
xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
{
unsigned long cnts;
- if (!log->l_grant_trace) {
- log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP);
- if (!log->l_grant_trace)
- return;
- }
/* ticket counts are 1 byte each */
cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8;
@@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string)
}
void
+xlog_trace_iclog_alloc(xlog_in_core_t *iclog)
+{
+ iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS);
+}
+
+void
+xlog_trace_iclog_dealloc(xlog_in_core_t *iclog)
+{
+ ktrace_free(iclog->ic_trace);
+}
+
+void
xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
{
- if (!iclog->ic_trace)
- iclog->ic_trace = ktrace_alloc(256, KM_SLEEP);
ktrace_enter(iclog->ic_trace,
(void *)((unsigned long)state),
(void *)((unsigned long)current_pid()),
@@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state)
(void *)NULL, (void *)NULL);
}
#else
+
+#define xlog_trace_loggrant_alloc(log)
+#define xlog_trace_loggrant_dealloc(log)
#define xlog_trace_loggrant(log,tic,string)
+
+#define xlog_trace_iclog_alloc(iclog)
+#define xlog_trace_iclog_dealloc(iclog)
#define xlog_trace_iclog(iclog,state)
+
#endif /* XFS_LOG_TRACE */
@@ -226,20 +254,24 @@ xlog_grant_sub_space(struct log *log, int bytes)
static void
xlog_grant_add_space_write(struct log *log, int bytes)
{
- log->l_grant_write_bytes += bytes;
- if (log->l_grant_write_bytes > log->l_logsize) {
- log->l_grant_write_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_write_bytes;
+ if (tmp > bytes)
+ log->l_grant_write_bytes += bytes;
+ else {
log->l_grant_write_cycle++;
+ log->l_grant_write_bytes = bytes - tmp;
}
}
static void
xlog_grant_add_space_reserve(struct log *log, int bytes)
{
- log->l_grant_reserve_bytes += bytes;
- if (log->l_grant_reserve_bytes > log->l_logsize) {
- log->l_grant_reserve_bytes -= log->l_logsize;
+ int tmp = log->l_logsize - log->l_grant_reserve_bytes;
+ if (tmp > bytes)
+ log->l_grant_reserve_bytes += bytes;
+ else {
log->l_grant_reserve_cycle++;
+ log->l_grant_reserve_bytes = bytes - tmp;
}
}
@@ -332,15 +364,12 @@ xfs_log_done(xfs_mount_t *mp,
} else {
xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)");
xlog_regrant_reserve_log_space(log, ticket);
- }
-
- /* If this ticket was a permanent reservation and we aren't
- * trying to release it, reset the inited flags; so next time
- * we write, a start record will be written out.
- */
- if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) &&
- (flags & XFS_LOG_REL_PERM_RESERV) == 0)
+ /* If this ticket was a permanent reservation and we aren't
+ * trying to release it, reset the inited flags; so next time
+ * we write, a start record will be written out.
+ */
ticket->t_flags |= XLOG_TIC_INITED;
+ }
return lsn;
} /* xfs_log_done */
@@ -353,11 +382,11 @@ xfs_log_done(xfs_mount_t *mp,
* Asynchronous forces are implemented by setting the WANT_SYNC
* bit in the appropriate in-core log and then returning.
*
- * Synchronous forces are implemented with a semaphore. All callers
- * to force a given lsn to disk will wait on a semaphore attached to the
+ * Synchronous forces are implemented with a signal variable. All callers
+ * to force a given lsn to disk will wait on a the sv attached to the
* specific in-core log. When given in-core log finally completes its
* write to disk, that thread will wake up all threads waiting on the
- * semaphore.
+ * sv.
*/
int
_xfs_log_force(
@@ -584,12 +613,12 @@ error:
* mp - ubiquitous xfs mount point structure
*/
int
-xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags)
+xfs_log_mount_finish(xfs_mount_t *mp)
{
int error;
if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
- error = xlog_recover_finish(mp->m_log, mfsi_flags);
+ error = xlog_recover_finish(mp->m_log);
else {
error = 0;
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
@@ -703,7 +732,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
if (!(iclog->ic_state == XLOG_STATE_ACTIVE ||
iclog->ic_state == XLOG_STATE_DIRTY)) {
if (!XLOG_FORCED_SHUTDOWN(log)) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -744,7 +773,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
|| iclog->ic_state == XLOG_STATE_DIRTY
|| iclog->ic_state == XLOG_STATE_IOERROR) ) {
- sv_wait(&iclog->ic_forcesema, PMEM,
+ sv_wait(&iclog->ic_force_wait, PMEM,
&log->l_icloglock, s);
} else {
spin_unlock(&log->l_icloglock);
@@ -834,7 +863,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= tic->t_unit_res;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}
@@ -855,7 +884,7 @@ xfs_log_move_tail(xfs_mount_t *mp,
break;
tail_lsn = 0;
free_bytes -= need_bytes;
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
@@ -1008,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp)
* layer, it means the underlyin device no longer supports
* barrier I/O. Warn loudly and turn off barriers.
*/
- if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) {
+ if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) {
l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
xfs_fs_cmn_err(CE_WARN, l->l_mp,
"xlog_iodone: Barriers are no longer supported"
@@ -1228,8 +1257,9 @@ xlog_alloc_log(xfs_mount_t *mp,
spin_lock_init(&log->l_icloglock);
spin_lock_init(&log->l_grant_lock);
- initnsema(&log->l_flushsema, 0, "ic-flush");
+ sv_init(&log->l_flush_wait, 0, "flush_wait");
+ xlog_trace_loggrant_alloc(log);
/* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
@@ -1281,8 +1311,10 @@ xlog_alloc_log(xfs_mount_t *mp,
ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
- sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force");
- sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write");
+ sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force");
+ sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write");
+
+ xlog_trace_iclog_alloc(iclog);
iclogp = &iclog->ic_next;
}
@@ -1561,33 +1593,21 @@ xlog_dealloc_log(xlog_t *log)
iclog = log->l_iclog;
for (i=0; i<log->l_iclog_bufs; i++) {
- sv_destroy(&iclog->ic_forcesema);
- sv_destroy(&iclog->ic_writesema);
+ sv_destroy(&iclog->ic_force_wait);
+ sv_destroy(&iclog->ic_write_wait);
xfs_buf_free(iclog->ic_bp);
-#ifdef XFS_LOG_TRACE
- if (iclog->ic_trace != NULL) {
- ktrace_free(iclog->ic_trace);
- }
-#endif
+ xlog_trace_iclog_dealloc(iclog);
next_iclog = iclog->ic_next;
- kmem_free(iclog, sizeof(xlog_in_core_t));
+ kmem_free(iclog);
iclog = next_iclog;
}
- freesema(&log->l_flushsema);
spinlock_destroy(&log->l_icloglock);
spinlock_destroy(&log->l_grant_lock);
xfs_buf_free(log->l_xbuf);
-#ifdef XFS_LOG_TRACE
- if (log->l_trace != NULL) {
- ktrace_free(log->l_trace);
- }
- if (log->l_grant_trace != NULL) {
- ktrace_free(log->l_grant_trace);
- }
-#endif
+ xlog_trace_loggrant_dealloc(log);
log->l_mp->m_log = NULL;
- kmem_free(log, sizeof(xlog_t));
+ kmem_free(log);
} /* xlog_dealloc_log */
/*
@@ -1973,7 +1993,7 @@ xlog_write(xfs_mount_t * mp,
/* Clean iclogs starting from the head. This ordering must be
* maintained, so an iclog doesn't become ACTIVE beyond one that
* is SYNCING. This is also required to maintain the notion that we use
- * a counting semaphore to hold off would be writers to the log when every
+ * a ordered wait queue to hold off would be writers to the log when every
* iclog is trying to sync to disk.
*
* State Change: DIRTY -> ACTIVE
@@ -2097,6 +2117,7 @@ xlog_state_do_callback(
int funcdidcallbacks; /* flag: function did callbacks */
int repeats; /* for issuing console warnings if
* looping too many times */
+ int wake = 0;
spin_lock(&log->l_icloglock);
first_iclog = iclog = log->l_iclog;
@@ -2236,7 +2257,7 @@ xlog_state_do_callback(
xlog_state_clean_log(log);
/* wake up threads waiting in xfs_log_force() */
- sv_broadcast(&iclog->ic_forcesema);
+ sv_broadcast(&iclog->ic_force_wait);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2278,15 +2299,13 @@ xlog_state_do_callback(
}
#endif
- flushcnt = 0;
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) {
- flushcnt = log->l_flushcnt;
- log->l_flushcnt = 0;
- }
+ if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
+ wake = 1;
spin_unlock(&log->l_icloglock);
- while (flushcnt--)
- vsema(&log->l_flushsema);
-} /* xlog_state_do_callback */
+
+ if (wake)
+ sv_broadcast(&log->l_flush_wait);
+}
/*
@@ -2300,8 +2319,7 @@ xlog_state_do_callback(
* the second completion goes through.
*
* Callbacks could take time, so they are done outside the scope of the
- * global state machine log lock. Assume that the calls to cvsema won't
- * take a long time. At least we know it won't sleep.
+ * global state machine log lock.
*/
STATIC void
xlog_state_done_syncing(
@@ -2337,7 +2355,7 @@ xlog_state_done_syncing(
* iclog buffer, we wake them all, one will get to do the
* I/O, the others get to wait for the result.
*/
- sv_broadcast(&iclog->ic_writesema);
+ sv_broadcast(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2345,11 +2363,9 @@ xlog_state_done_syncing(
/*
* If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must
- * sleep. The flush semaphore is set to the number of in-core buffers and
- * decremented around disk syncing. Therefore, if all buffers are syncing,
- * this semaphore will cause new writes to sleep until a sync completes.
- * Otherwise, this code just does p() followed by v(). This approximates
- * a sleep/wakeup except we can't race.
+ * sleep. We wait on the flush queue on the head iclog as that should be
+ * the first iclog to complete flushing. Hence if all iclogs are syncing,
+ * we will wait here and all new writes will sleep until a sync completes.
*
* The in-core logs are used in a circular fashion. They are not used
* out-of-order even when an iclog past the head is free.
@@ -2384,16 +2400,15 @@ restart:
}
iclog = log->l_iclog;
- if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) {
- log->l_flushcnt++;
- spin_unlock(&log->l_icloglock);
+ if (iclog->ic_state != XLOG_STATE_ACTIVE) {
xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH);
XFS_STATS_INC(xs_log_noiclogs);
- /* Ensure that log writes happen */
- psema(&log->l_flushsema, PINOD);
+
+ /* Wait for log writes to have flushed */
+ sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0);
goto restart;
}
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+
head = &iclog->ic_header;
atomic_inc(&iclog->ic_refcnt); /* prevents sync */
@@ -2507,7 +2522,7 @@ xlog_grant_log_space(xlog_t *log,
goto error_return;
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/*
* If we got an error, and the filesystem is shutting down,
* we'll catch it down below. So just continue...
@@ -2533,7 +2548,7 @@ redo:
xlog_trace_loggrant(log, tic,
"xlog_grant_log_space: sleep 2");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
if (XLOG_FORCED_SHUTDOWN(log)) {
spin_lock(&log->l_grant_lock);
@@ -2632,7 +2647,7 @@ xlog_regrant_write_log_space(xlog_t *log,
if (free_bytes < ntic->t_unit_res)
break;
free_bytes -= ntic->t_unit_res;
- sv_signal(&ntic->t_sema);
+ sv_signal(&ntic->t_wait);
ntic = ntic->t_next;
} while (ntic != log->l_write_headq);
@@ -2643,7 +2658,7 @@ xlog_regrant_write_log_space(xlog_t *log,
xlog_trace_loggrant(log, tic,
"xlog_regrant_write_log_space: sleep 1");
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT,
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT,
&log->l_grant_lock, s);
/* If we're shutting down, this tic is already
@@ -2672,7 +2687,7 @@ redo:
if ((tic->t_flags & XLOG_TIC_IN_Q) == 0)
xlog_ins_ticketq(&log->l_write_headq, tic);
XFS_STATS_INC(xs_sleep_logspace);
- sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s);
+ sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s);
/* If we're shutting down, this tic is already off the queue */
if (XLOG_FORCED_SHUTDOWN(log)) {
@@ -2915,7 +2930,7 @@ xlog_state_switch_iclogs(xlog_t *log,
* 2. the current iclog is drity, and the previous iclog is in the
* active or dirty state.
*
- * We may sleep (call psema) if:
+ * We may sleep if:
*
* 1. the current iclog is not in the active nor dirty state.
* 2. the current iclog dirty, and the previous iclog is not in the
@@ -3012,7 +3027,7 @@ maybe_sleep:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3095,7 +3110,7 @@ try_again:
XLOG_STATE_SYNCING))) {
ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
+ sv_wait(&iclog->ic_prev->ic_write_wait, PSWP,
&log->l_icloglock, s);
*log_flushed = 1;
already_slept = 1;
@@ -3115,7 +3130,7 @@ try_again:
!(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) {
/*
- * Don't wait on the forcesema if we know that we've
+ * Don't wait on completion if we know that we've
* gotten a log write error.
*/
if (iclog->ic_state & XLOG_STATE_IOERROR) {
@@ -3123,7 +3138,7 @@ try_again:
return XFS_ERROR(EIO);
}
XFS_STATS_INC(xs_log_force_sleep);
- sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s);
+ sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s);
/*
* No need to grab the log lock here since we're
* only deciding whether or not to return EIO
@@ -3179,7 +3194,7 @@ STATIC void
xlog_ticket_put(xlog_t *log,
xlog_ticket_t *ticket)
{
- sv_destroy(&ticket->t_sema);
+ sv_destroy(&ticket->t_wait);
kmem_zone_free(xfs_log_ticket_zone, ticket);
} /* xlog_ticket_put */
@@ -3269,7 +3284,7 @@ xlog_ticket_get(xlog_t *log,
tic->t_trans_type = 0;
if (xflags & XFS_LOG_PERM_RESERV)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- sv_init(&(tic->t_sema), SV_DEFAULT, "logtick");
+ sv_init(&(tic->t_wait), SV_DEFAULT, "logtick");
xlog_tic_reset_res(tic);
@@ -3556,14 +3571,14 @@ xfs_log_force_umount(
*/
if ((tic = log->l_reserve_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_reserve_headq);
}
if ((tic = log->l_write_headq)) {
do {
- sv_signal(&tic->t_sema);
+ sv_signal(&tic->t_wait);
tic = tic->t_next;
} while (tic != log->l_write_headq);
}