diff options
| -rw-r--r-- | fs/f2fs/checkpoint.c | 106 | ||||
| -rw-r--r-- | fs/f2fs/f2fs.h | 51 | ||||
| -rw-r--r-- | include/trace/events/f2fs.h | 68 |
3 files changed, 221 insertions, 4 deletions
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 300664269eb6..bc6058a3122b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -14,6 +14,9 @@ #include <linux/pagevec.h> #include <linux/swap.h> #include <linux/kthread.h> +#include <linux/delayacct.h> +#include <linux/ioprio.h> +#include <linux/math64.h> #include "f2fs.h" #include "node.h" @@ -21,6 +24,109 @@ #include "iostat.h" #include <trace/events/f2fs.h> +static inline void get_lock_elapsed_time(struct f2fs_time_stat *ts) +{ + ts->total_time = ktime_get(); +#ifdef CONFIG_64BIT + ts->running_time = current->se.sum_exec_runtime; +#endif +#if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) + ts->runnable_time = current->sched_info.run_delay; +#endif +#ifdef CONFIG_TASK_DELAY_ACCT + if (current->delays) + ts->io_sleep_time = current->delays->blkio_delay; +#endif +} + +static inline void trace_lock_elapsed_time_start(struct f2fs_rwsem *sem, + struct f2fs_lock_context *lc) +{ + lc->lock_trace = trace_f2fs_lock_elapsed_time_enabled(); + if (!lc->lock_trace) + return; + + get_lock_elapsed_time(&lc->ts); +} + +static inline void trace_lock_elapsed_time_end(struct f2fs_rwsem *sem, + struct f2fs_lock_context *lc, bool is_write) +{ + struct f2fs_time_stat tts; + unsigned long long total_time; + unsigned long long running_time = 0; + unsigned long long runnable_time = 0; + unsigned long long io_sleep_time = 0; + unsigned long long other_time = 0; + unsigned npm = NSEC_PER_MSEC; + + if (!lc->lock_trace) + return; + + get_lock_elapsed_time(&tts); + + total_time = div_u64(tts.total_time - lc->ts.total_time, npm); + if (total_time <= MAX_LOCK_ELAPSED_TIME) + return; + +#ifdef CONFIG_64BIT + running_time = div_u64(tts.running_time - lc->ts.running_time, npm); +#endif +#if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) + runnable_time = div_u64(tts.runnable_time - lc->ts.runnable_time, npm); +#endif +#ifdef CONFIG_TASK_DELAY_ACCT + io_sleep_time = div_u64(tts.io_sleep_time - lc->ts.io_sleep_time, npm); +#endif + if (total_time > running_time + io_sleep_time + runnable_time) + other_time = total_time - running_time - + io_sleep_time - runnable_time; + + trace_f2fs_lock_elapsed_time(sem->sbi, sem->name, is_write, current, + get_current_ioprio(), total_time, running_time, + runnable_time, io_sleep_time, other_time); +} + +void f2fs_down_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + f2fs_down_read(sem); + trace_lock_elapsed_time_start(sem, lc); +} + +int f2fs_down_read_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + if (!f2fs_down_read_trylock(sem)) + return 0; + trace_lock_elapsed_time_start(sem, lc); + return 1; +} + +void f2fs_up_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + f2fs_up_read(sem); + trace_lock_elapsed_time_end(sem, lc, false); +} + +void f2fs_down_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + f2fs_down_write(sem); + trace_lock_elapsed_time_start(sem, lc); +} + +int f2fs_down_write_trylock_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + if (!f2fs_down_write_trylock(sem)) + return 0; + trace_lock_elapsed_time_start(sem, lc); + return 1; +} + +void f2fs_up_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc) +{ + f2fs_up_write(sem); + trace_lock_elapsed_time_end(sem, lc, true); +} + #define DEFAULT_CHECKPOINT_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 3)) static struct kmem_cache *ino_entry_slab; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5d81de7cee70..fc3f532972ed 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -173,6 +173,10 @@ enum device_allocation_policy { ALLOCATE_FORWARD_FROM_HINT, }; +enum f2fs_lock_name { + LOCK_NAME_NONE, +}; + /* * An implementation of an rwsem that is explicitly unfair to readers. This * prevents priority inversion when a low-priority reader acquires the read lock @@ -181,6 +185,8 @@ enum device_allocation_policy { */ struct f2fs_rwsem { + struct f2fs_sb_info *sbi; + enum f2fs_lock_name name; struct rw_semaphore internal_rwsem; #ifdef CONFIG_F2FS_UNFAIR_RWSEM wait_queue_head_t read_waiters; @@ -1409,6 +1415,24 @@ struct f2fs_gc_control { unsigned int nr_free_secs; /* # of free sections to do GC */ }; +struct f2fs_time_stat { + unsigned long long total_time; /* total wall clock time */ +#ifdef CONFIG_64BIT + unsigned long long running_time; /* running time */ +#endif +#if defined(CONFIG_SCHED_INFO) && defined(CONFIG_SCHEDSTATS) + unsigned long long runnable_time; /* runnable(including preempted) time */ +#endif +#ifdef CONFIG_TASK_DELAY_ACCT + unsigned long long io_sleep_time; /* IO sleep time */ +#endif +}; + +struct f2fs_lock_context { + struct f2fs_time_stat ts; + bool lock_trace; +}; + /* * For s_flag in struct f2fs_sb_info * Modification on enum should be synchronized with s_flag array @@ -1525,6 +1549,9 @@ enum f2fs_lookup_mode { LOOKUP_AUTO, }; +/* a threshold of maximum elapsed time in critical region to print tracepoint */ +#define MAX_LOCK_ELAPSED_TIME 500 + static inline int f2fs_test_bit(unsigned int nr, char *addr); static inline void f2fs_set_bit(unsigned int nr, char *addr); static inline void f2fs_clear_bit(unsigned int nr, char *addr); @@ -2263,16 +2290,22 @@ static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) spin_unlock_irqrestore(&sbi->cp_lock, flags); } -#define init_f2fs_rwsem(sem) \ +#define init_f2fs_rwsem(sem) __init_f2fs_rwsem(sem, NULL, LOCK_NAME_NONE) +#define init_f2fs_rwsem_trace __init_f2fs_rwsem + +#define __init_f2fs_rwsem(sem, sbi, name) \ do { \ static struct lock_class_key __key; \ \ - __init_f2fs_rwsem((sem), #sem, &__key); \ + do_init_f2fs_rwsem((sem), #sem, &__key, sbi, name); \ } while (0) -static inline void __init_f2fs_rwsem(struct f2fs_rwsem *sem, - const char *sem_name, struct lock_class_key *key) +static inline void do_init_f2fs_rwsem(struct f2fs_rwsem *sem, + const char *sem_name, struct lock_class_key *key, + struct f2fs_sb_info *sbi, enum f2fs_lock_name name) { + sem->sbi = sbi; + sem->name = name; __init_rwsem(&sem->internal_rwsem, sem_name, key); #ifdef CONFIG_F2FS_UNFAIR_RWSEM init_waitqueue_head(&sem->read_waiters); @@ -2341,6 +2374,16 @@ static inline void f2fs_up_write(struct f2fs_rwsem *sem) #endif } +void f2fs_down_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); +int f2fs_down_read_trylock_trace(struct f2fs_rwsem *sem, + struct f2fs_lock_context *lc); +void f2fs_up_read_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); +void f2fs_down_write_trace(struct f2fs_rwsem *sem, + struct f2fs_lock_context *lc); +int f2fs_down_write_trylock_trace(struct f2fs_rwsem *sem, + struct f2fs_lock_context *lc); +void f2fs_up_write_trace(struct f2fs_rwsem *sem, struct f2fs_lock_context *lc); + static inline void disable_nat_bits(struct f2fs_sb_info *sbi, bool lock) { unsigned long flags; diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 635dcabcf1c7..9a852a16df9c 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -184,6 +184,10 @@ TRACE_DEFINE_ENUM(CP_PHASE_FINISH_CHECKPOINT); { CP_PHASE_FINISH_BLOCK_OPS, "finish block_ops" }, \ { CP_PHASE_FINISH_CHECKPOINT, "finish checkpoint" }) +#define show_lock_name(lock) \ + __print_symbolic(lock, \ + { LOCK_NAME_NONE, "none" }) + struct f2fs_sb_info; struct f2fs_io_info; struct extent_info; @@ -2452,6 +2456,70 @@ DEFINE_EVENT(f2fs__rw_end, f2fs_datawrite_end, TP_ARGS(inode, offset, bytes) ); +TRACE_EVENT(f2fs_lock_elapsed_time, + + TP_PROTO(struct f2fs_sb_info *sbi, enum f2fs_lock_name lock_name, + bool is_write, struct task_struct *p, int ioprio, + unsigned long long total_time, + unsigned long long running_time, + unsigned long long runnable_time, + unsigned long long io_sleep_time, + unsigned long long other_time), + + TP_ARGS(sbi, lock_name, is_write, p, ioprio, total_time, running_time, + runnable_time, io_sleep_time, other_time), + + TP_STRUCT__entry( + __field(dev_t, dev) + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, prio) + __field(int, ioprio_class) + __field(int, ioprio_data) + __field(unsigned int, lock_name) + __field(bool, is_write) + __field(unsigned long long, total_time) + __field(unsigned long long, running_time) + __field(unsigned long long, runnable_time) + __field(unsigned long long, io_sleep_time) + __field(unsigned long long, other_time) + ), + + TP_fast_assign( + __entry->dev = sbi->sb->s_dev; + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->prio = p->prio; + __entry->ioprio_class = IOPRIO_PRIO_CLASS(ioprio); + __entry->ioprio_data = IOPRIO_PRIO_DATA(ioprio); + __entry->lock_name = lock_name; + __entry->is_write = is_write; + __entry->total_time = total_time; + __entry->running_time = running_time; + __entry->runnable_time = runnable_time; + __entry->io_sleep_time = io_sleep_time; + __entry->other_time = other_time; + ), + + TP_printk("dev = (%d,%d), comm: %s, pid: %d, prio: %d, " + "ioprio_class: %d, ioprio_data: %d, lock_name: %s, " + "lock_type: %s, total: %llu, running: %llu, " + "runnable: %llu, io_sleep: %llu, other: %llu", + show_dev(__entry->dev), + __entry->comm, + __entry->pid, + __entry->prio, + __entry->ioprio_class, + __entry->ioprio_data, + show_lock_name(__entry->lock_name), + __entry->is_write ? "wlock" : "rlock", + __entry->total_time, + __entry->running_time, + __entry->runnable_time, + __entry->io_sleep_time, + __entry->other_time) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ |
