diff options
Diffstat (limited to 'kernel/trace/trace.c')
| -rw-r--r-- | kernel/trace/trace.c | 275 | 
1 files changed, 219 insertions, 56 deletions
| diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a2f0b9f33e9b..8a4bd6b68a0b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -253,6 +253,9 @@ unsigned long long ns2usecs(cycle_t nsec)  #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\  	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD) +/* trace_flags that are default zero for instances */ +#define ZEROED_TRACE_FLAGS \ +	TRACE_ITER_EVENT_FORK  /*   * The global_trace is the descriptor that holds the tracing @@ -303,33 +306,18 @@ void trace_array_put(struct trace_array *this_tr)  	mutex_unlock(&trace_types_lock);  } -int filter_check_discard(struct trace_event_file *file, void *rec, -			 struct ring_buffer *buffer, -			 struct ring_buffer_event *event) -{ -	if (unlikely(file->flags & EVENT_FILE_FL_FILTERED) && -	    !filter_match_preds(file->filter, rec)) { -		ring_buffer_discard_commit(buffer, event); -		return 1; -	} - -	return 0; -} -EXPORT_SYMBOL_GPL(filter_check_discard); -  int call_filter_check_discard(struct trace_event_call *call, void *rec,  			      struct ring_buffer *buffer,  			      struct ring_buffer_event *event)  {  	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&  	    !filter_match_preds(call->filter, rec)) { -		ring_buffer_discard_commit(buffer, event); +		__trace_event_discard_commit(buffer, event);  		return 1;  	}  	return 0;  } -EXPORT_SYMBOL_GPL(call_filter_check_discard);  static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)  { @@ -1672,6 +1660,16 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,  }  EXPORT_SYMBOL_GPL(tracing_generic_entry_update); +static __always_inline void +trace_event_setup(struct ring_buffer_event *event, +		  int type, unsigned long flags, int pc) +{ +	struct trace_entry *ent = ring_buffer_event_data(event); + +	tracing_generic_entry_update(ent, flags, pc); +	ent->type = type; +} +  struct ring_buffer_event *  trace_buffer_lock_reserve(struct ring_buffer *buffer,  			  int type, @@ -1681,34 +1679,137 @@ trace_buffer_lock_reserve(struct ring_buffer *buffer,  	struct ring_buffer_event *event;  	event = ring_buffer_lock_reserve(buffer, len); -	if (event != NULL) { -		struct trace_entry *ent = ring_buffer_event_data(event); +	if (event != NULL) +		trace_event_setup(event, type, flags, pc); + +	return event; +} + +DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event); +DEFINE_PER_CPU(int, trace_buffered_event_cnt); +static int trace_buffered_event_ref; + +/** + * trace_buffered_event_enable - enable buffering events + * + * When events are being filtered, it is quicker to use a temporary + * buffer to write the event data into if there's a likely chance + * that it will not be committed. The discard of the ring buffer + * is not as fast as committing, and is much slower than copying + * a commit. + * + * When an event is to be filtered, allocate per cpu buffers to + * write the event data into, and if the event is filtered and discarded + * it is simply dropped, otherwise, the entire data is to be committed + * in one shot. + */ +void trace_buffered_event_enable(void) +{ +	struct ring_buffer_event *event; +	struct page *page; +	int cpu; -		tracing_generic_entry_update(ent, flags, pc); -		ent->type = type; +	WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + +	if (trace_buffered_event_ref++) +		return; + +	for_each_tracing_cpu(cpu) { +		page = alloc_pages_node(cpu_to_node(cpu), +					GFP_KERNEL | __GFP_NORETRY, 0); +		if (!page) +			goto failed; + +		event = page_address(page); +		memset(event, 0, sizeof(*event)); + +		per_cpu(trace_buffered_event, cpu) = event; + +		preempt_disable(); +		if (cpu == smp_processor_id() && +		    this_cpu_read(trace_buffered_event) != +		    per_cpu(trace_buffered_event, cpu)) +			WARN_ON_ONCE(1); +		preempt_enable();  	} -	return event; +	return; + failed: +	trace_buffered_event_disable();  } -void -__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +static void enable_trace_buffered_event(void *data)  { -	__this_cpu_write(trace_cmdline_save, true); -	ring_buffer_unlock_commit(buffer, event); +	/* Probably not needed, but do it anyway */ +	smp_rmb(); +	this_cpu_dec(trace_buffered_event_cnt);  } -void trace_buffer_unlock_commit(struct trace_array *tr, -				struct ring_buffer *buffer, -				struct ring_buffer_event *event, -				unsigned long flags, int pc) +static void disable_trace_buffered_event(void *data)  { -	__buffer_unlock_commit(buffer, event); +	this_cpu_inc(trace_buffered_event_cnt); +} -	ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL); -	ftrace_trace_userstack(buffer, flags, pc); +/** + * trace_buffered_event_disable - disable buffering events + * + * When a filter is removed, it is faster to not use the buffered + * events, and to commit directly into the ring buffer. Free up + * the temp buffers when there are no more users. This requires + * special synchronization with current events. + */ +void trace_buffered_event_disable(void) +{ +	int cpu; + +	WARN_ON_ONCE(!mutex_is_locked(&event_mutex)); + +	if (WARN_ON_ONCE(!trace_buffered_event_ref)) +		return; + +	if (--trace_buffered_event_ref) +		return; + +	preempt_disable(); +	/* For each CPU, set the buffer as used. */ +	smp_call_function_many(tracing_buffer_mask, +			       disable_trace_buffered_event, NULL, 1); +	preempt_enable(); + +	/* Wait for all current users to finish */ +	synchronize_sched(); + +	for_each_tracing_cpu(cpu) { +		free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); +		per_cpu(trace_buffered_event, cpu) = NULL; +	} +	/* +	 * Make sure trace_buffered_event is NULL before clearing +	 * trace_buffered_event_cnt. +	 */ +	smp_wmb(); + +	preempt_disable(); +	/* Do the work on each cpu */ +	smp_call_function_many(tracing_buffer_mask, +			       enable_trace_buffered_event, NULL, 1); +	preempt_enable(); +} + +void +__buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event) +{ +	__this_cpu_write(trace_cmdline_save, true); + +	/* If this is the temp buffer, we need to commit fully */ +	if (this_cpu_read(trace_buffered_event) == event) { +		/* Length is in event->array[0] */ +		ring_buffer_write(buffer, event->array[0], &event->array[1]); +		/* Release the temp buffer */ +		this_cpu_dec(trace_buffered_event_cnt); +	} else +		ring_buffer_unlock_commit(buffer, event);  } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);  static struct ring_buffer *temp_buffer; @@ -1719,8 +1820,23 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,  			  unsigned long flags, int pc)  {  	struct ring_buffer_event *entry; +	int val;  	*current_rb = trace_file->tr->trace_buffer.buffer; + +	if ((trace_file->flags & +	     (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) && +	    (entry = this_cpu_read(trace_buffered_event))) { +		/* Try to use the per cpu buffer first */ +		val = this_cpu_inc_return(trace_buffered_event_cnt); +		if (val == 1) { +			trace_event_setup(entry, type, flags, pc); +			entry->array[0] = len; +			return entry; +		} +		this_cpu_dec(trace_buffered_event_cnt); +	} +  	entry = trace_buffer_lock_reserve(*current_rb,  					 type, len, flags, pc);  	/* @@ -1738,17 +1854,6 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,  }  EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve); -struct ring_buffer_event * -trace_current_buffer_lock_reserve(struct ring_buffer **current_rb, -				  int type, unsigned long len, -				  unsigned long flags, int pc) -{ -	*current_rb = global_trace.trace_buffer.buffer; -	return trace_buffer_lock_reserve(*current_rb, -					 type, len, flags, pc); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_lock_reserve); -  void trace_buffer_unlock_commit_regs(struct trace_array *tr,  				     struct ring_buffer *buffer,  				     struct ring_buffer_event *event, @@ -1760,14 +1865,6 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,  	ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);  	ftrace_trace_userstack(buffer, flags, pc);  } -EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs); - -void trace_current_buffer_discard_commit(struct ring_buffer *buffer, -					 struct ring_buffer_event *event) -{ -	ring_buffer_discard_commit(buffer, event); -} -EXPORT_SYMBOL_GPL(trace_current_buffer_discard_commit);  void  trace_function(struct trace_array *tr, @@ -3571,6 +3668,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)  	if (mask == TRACE_ITER_RECORD_CMD)  		trace_event_enable_cmd_record(enabled); +	if (mask == TRACE_ITER_EVENT_FORK) +		trace_event_follow_fork(tr, enabled); +  	if (mask == TRACE_ITER_OVERWRITE) {  		ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);  #ifdef CONFIG_TRACER_MAX_TRACE @@ -3658,7 +3758,7 @@ tracing_trace_options_write(struct file *filp, const char __user *ubuf,  	if (cnt >= sizeof(buf))  		return -EINVAL; -	if (copy_from_user(&buf, ubuf, cnt)) +	if (copy_from_user(buf, ubuf, cnt))  		return -EFAULT;  	buf[cnt] = 0; @@ -3804,12 +3904,19 @@ static const char readme_msg[] =  	"\t   trigger: traceon, traceoff\n"  	"\t            enable_event:<system>:<event>\n"  	"\t            disable_event:<system>:<event>\n" +#ifdef CONFIG_HIST_TRIGGERS +	"\t            enable_hist:<system>:<event>\n" +	"\t            disable_hist:<system>:<event>\n" +#endif  #ifdef CONFIG_STACKTRACE  	"\t\t    stacktrace\n"  #endif  #ifdef CONFIG_TRACER_SNAPSHOT  	"\t\t    snapshot\n"  #endif +#ifdef CONFIG_HIST_TRIGGERS +	"\t\t    hist (see below)\n" +#endif  	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"  	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"  	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n" @@ -3825,6 +3932,56 @@ static const char readme_msg[] =  	"\t   To remove a trigger with a count:\n"  	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"  	"\t   Filters can be ignored when removing a trigger.\n" +#ifdef CONFIG_HIST_TRIGGERS +	"      hist trigger\t- If set, event hits are aggregated into a hash table\n" +	"\t    Format: hist:keys=<field1[,field2,...]>\n" +	"\t            [:values=<field1[,field2,...]>]\n" +	"\t            [:sort=<field1[,field2,...]>]\n" +	"\t            [:size=#entries]\n" +	"\t            [:pause][:continue][:clear]\n" +	"\t            [:name=histname1]\n" +	"\t            [if <filter>]\n\n" +	"\t    When a matching event is hit, an entry is added to a hash\n" +	"\t    table using the key(s) and value(s) named, and the value of a\n" +	"\t    sum called 'hitcount' is incremented.  Keys and values\n" +	"\t    correspond to fields in the event's format description.  Keys\n" +	"\t    can be any field, or the special string 'stacktrace'.\n" +	"\t    Compound keys consisting of up to two fields can be specified\n" +	"\t    by the 'keys' keyword.  Values must correspond to numeric\n" +	"\t    fields.  Sort keys consisting of up to two fields can be\n" +	"\t    specified using the 'sort' keyword.  The sort direction can\n" +	"\t    be modified by appending '.descending' or '.ascending' to a\n" +	"\t    sort field.  The 'size' parameter can be used to specify more\n" +	"\t    or fewer than the default 2048 entries for the hashtable size.\n" +	"\t    If a hist trigger is given a name using the 'name' parameter,\n" +	"\t    its histogram data will be shared with other triggers of the\n" +	"\t    same name, and trigger hits will update this common data.\n\n" +	"\t    Reading the 'hist' file for the event will dump the hash\n" +	"\t    table in its entirety to stdout.  If there are multiple hist\n" +	"\t    triggers attached to an event, there will be a table for each\n" +	"\t    trigger in the output.  The table displayed for a named\n" +	"\t    trigger will be the same as any other instance having the\n" +	"\t    same name.  The default format used to display a given field\n" +	"\t    can be modified by appending any of the following modifiers\n" +	"\t    to the field name, as applicable:\n\n" +	"\t            .hex        display a number as a hex value\n" +	"\t            .sym        display an address as a symbol\n" +	"\t            .sym-offset display an address as a symbol and offset\n" +	"\t            .execname   display a common_pid as a program name\n" +	"\t            .syscall    display a syscall id as a syscall name\n\n" +	"\t            .log2       display log2 value rather than raw number\n\n" +	"\t    The 'pause' parameter can be used to pause an existing hist\n" +	"\t    trigger or to start a hist trigger but not log any events\n" +	"\t    until told to do so.  'continue' can be used to start or\n" +	"\t    restart a paused hist trigger.\n\n" +	"\t    The 'clear' parameter will clear the contents of a running\n" +	"\t    hist trigger and leave its current paused/active state\n" +	"\t    unchanged.\n\n" +	"\t    The enable_hist and disable_hist triggers can be used to\n" +	"\t    have one event conditionally start and stop another event's\n" +	"\t    already-attached hist trigger.  The syntax is analagous to\n" +	"\t    the enable_event and disable_event triggers.\n" +#endif  ;  static ssize_t @@ -4474,7 +4631,7 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf,  	if (cnt > MAX_TRACER_SIZE)  		cnt = MAX_TRACER_SIZE; -	if (copy_from_user(&buf, ubuf, cnt)) +	if (copy_from_user(buf, ubuf, cnt))  		return -EFAULT;  	buf[cnt] = 0; @@ -5264,7 +5421,7 @@ static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,  	if (cnt >= sizeof(buf))  		return -EINVAL; -	if (copy_from_user(&buf, ubuf, cnt)) +	if (copy_from_user(buf, ubuf, cnt))  		return -EFAULT;  	buf[cnt] = 0; @@ -6650,7 +6807,7 @@ static int instance_mkdir(const char *name)  	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))  		goto out_free_tr; -	tr->trace_flags = global_trace.trace_flags; +	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;  	cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -6724,6 +6881,12 @@ static int instance_rmdir(const char *name)  	list_del(&tr->list); +	/* Disable all the flags that were enabled coming in */ +	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) { +		if ((1 << i) & ZEROED_TRACE_FLAGS) +			set_tracer_flag(tr, 1 << i, 0); +	} +  	tracing_set_nop(tr);  	event_trace_del_tracer(tr);  	ftrace_destroy_function_files(tr); | 
