diff options
Diffstat (limited to 'kernel/printk/printk_ringbuffer.c')
| -rw-r--r-- | kernel/printk/printk_ringbuffer.c | 335 | 
1 files changed, 282 insertions, 53 deletions
diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c index fde338606ce8..88e8f3a61922 100644 --- a/kernel/printk/printk_ringbuffer.c +++ b/kernel/printk/printk_ringbuffer.c @@ -6,6 +6,7 @@  #include <linux/errno.h>  #include <linux/bug.h>  #include "printk_ringbuffer.h" +#include "internal.h"  /**   * DOC: printk_ringbuffer overview @@ -303,6 +304,9 @@   *   *   desc_push_tail:B / desc_reserve:D   *     set descriptor reusable (state), then push descriptor tail (id) + * + *   desc_update_last_finalized:A / desc_last_finalized_seq:A + *     store finalized record, then set new highest finalized sequence number   */  #define DATA_SIZE(data_ring)		_DATA_SIZE((data_ring)->size_bits) @@ -1030,9 +1034,13 @@ static char *data_alloc(struct printk_ringbuffer *rb, unsigned int size,  	unsigned long next_lpos;  	if (size == 0) { -		/* Specify a data-less block. */ -		blk_lpos->begin = NO_LPOS; -		blk_lpos->next = NO_LPOS; +		/* +		 * Data blocks are not created for empty lines. Instead, the +		 * reader will recognize these special lpos values and handle +		 * it appropriately. +		 */ +		blk_lpos->begin = EMPTY_LINE_LPOS; +		blk_lpos->next = EMPTY_LINE_LPOS;  		return NULL;  	} @@ -1210,10 +1218,18 @@ static const char *get_data(struct prb_data_ring *data_ring,  	/* Data-less data block description. */  	if (BLK_DATALESS(blk_lpos)) { -		if (blk_lpos->begin == NO_LPOS && blk_lpos->next == NO_LPOS) { +		/* +		 * Records that are just empty lines are also valid, even +		 * though they do not have a data block. For such records +		 * explicitly return empty string data to signify success. +		 */ +		if (blk_lpos->begin == EMPTY_LINE_LPOS && +		    blk_lpos->next == EMPTY_LINE_LPOS) {  			*data_size = 0;  			return "";  		} + +		/* Data lost, invalid, or otherwise unavailable. */  		return NULL;  	} @@ -1442,19 +1458,117 @@ fail_reopen:  }  /* + * @last_finalized_seq value guarantees that all records up to and including + * this sequence number are finalized and can be read. The only exception are + * too old records which have already been overwritten. + * + * It is also guaranteed that @last_finalized_seq only increases. + * + * Be aware that finalized records following non-finalized records are not + * reported because they are not yet available to the reader. For example, + * a new record stored via printk() will not be available to a printer if + * it follows a record that has not been finalized yet. However, once that + * non-finalized record becomes finalized, @last_finalized_seq will be + * appropriately updated and the full set of finalized records will be + * available to the printer. And since each printk() caller will either + * directly print or trigger deferred printing of all available unprinted + * records, all printk() messages will get printed. + */ +static u64 desc_last_finalized_seq(struct printk_ringbuffer *rb) +{ +	struct prb_desc_ring *desc_ring = &rb->desc_ring; +	unsigned long ulseq; + +	/* +	 * Guarantee the sequence number is loaded before loading the +	 * associated record in order to guarantee that the record can be +	 * seen by this CPU. This pairs with desc_update_last_finalized:A. +	 */ +	ulseq = atomic_long_read_acquire(&desc_ring->last_finalized_seq +					); /* LMM(desc_last_finalized_seq:A) */ + +	return __ulseq_to_u64seq(rb, ulseq); +} + +static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq, +			    struct printk_record *r, unsigned int *line_count); + +/* + * Check if there are records directly following @last_finalized_seq that are + * finalized. If so, update @last_finalized_seq to the latest of these + * records. It is not allowed to skip over records that are not yet finalized. + */ +static void desc_update_last_finalized(struct printk_ringbuffer *rb) +{ +	struct prb_desc_ring *desc_ring = &rb->desc_ring; +	u64 old_seq = desc_last_finalized_seq(rb); +	unsigned long oldval; +	unsigned long newval; +	u64 finalized_seq; +	u64 try_seq; + +try_again: +	finalized_seq = old_seq; +	try_seq = finalized_seq + 1; + +	/* Try to find later finalized records. */ +	while (_prb_read_valid(rb, &try_seq, NULL, NULL)) { +		finalized_seq = try_seq; +		try_seq++; +	} + +	/* No update needed if no later finalized record was found. */ +	if (finalized_seq == old_seq) +		return; + +	oldval = __u64seq_to_ulseq(old_seq); +	newval = __u64seq_to_ulseq(finalized_seq); + +	/* +	 * Set the sequence number of a later finalized record that has been +	 * seen. +	 * +	 * Guarantee the record data is visible to other CPUs before storing +	 * its sequence number. This pairs with desc_last_finalized_seq:A. +	 * +	 * Memory barrier involvement: +	 * +	 * If desc_last_finalized_seq:A reads from +	 * desc_update_last_finalized:A, then desc_read:A reads from +	 * _prb_commit:B. +	 * +	 * Relies on: +	 * +	 * RELEASE from _prb_commit:B to desc_update_last_finalized:A +	 *    matching +	 * ACQUIRE from desc_last_finalized_seq:A to desc_read:A +	 * +	 * Note: _prb_commit:B and desc_update_last_finalized:A can be +	 *       different CPUs. However, the desc_update_last_finalized:A +	 *       CPU (which performs the release) must have previously seen +	 *       _prb_commit:B. +	 */ +	if (!atomic_long_try_cmpxchg_release(&desc_ring->last_finalized_seq, +				&oldval, newval)) { /* LMM(desc_update_last_finalized:A) */ +		old_seq = __ulseq_to_u64seq(rb, oldval); +		goto try_again; +	} +} + +/*   * Attempt to finalize a specified descriptor. If this fails, the descriptor   * is either already final or it will finalize itself when the writer commits.   */ -static void desc_make_final(struct prb_desc_ring *desc_ring, unsigned long id) +static void desc_make_final(struct printk_ringbuffer *rb, unsigned long id)  { +	struct prb_desc_ring *desc_ring = &rb->desc_ring;  	unsigned long prev_state_val = DESC_SV(id, desc_committed);  	struct prb_desc *d = to_desc(desc_ring, id); -	atomic_long_cmpxchg_relaxed(&d->state_var, prev_state_val, -			DESC_SV(id, desc_finalized)); /* LMM(desc_make_final:A) */ - -	/* Best effort to remember the last finalized @id. */ -	atomic_long_set(&desc_ring->last_finalized_id, id); +	if (atomic_long_try_cmpxchg_relaxed(&d->state_var, &prev_state_val, +			DESC_SV(id, desc_finalized))) { /* LMM(desc_make_final:A) */ +		desc_update_last_finalized(rb); +	}  }  /** @@ -1550,7 +1664,7 @@ bool prb_reserve(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,  	 * readers. (For seq==0 there is no previous descriptor.)  	 */  	if (info->seq > 0) -		desc_make_final(desc_ring, DESC_ID(id - 1)); +		desc_make_final(rb, DESC_ID(id - 1));  	r->text_buf = data_alloc(rb, r->text_buf_size, &d->text_blk_lpos, id);  	/* If text data allocation fails, a data-less record is committed. */ @@ -1643,7 +1757,7 @@ void prb_commit(struct prb_reserved_entry *e)  	 */  	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_commit:A) */  	if (head_id != e->id) -		desc_make_final(desc_ring, e->id); +		desc_make_final(e->rb, e->id);  }  /** @@ -1663,12 +1777,9 @@ void prb_commit(struct prb_reserved_entry *e)   */  void prb_final_commit(struct prb_reserved_entry *e)  { -	struct prb_desc_ring *desc_ring = &e->rb->desc_ring; -  	_prb_commit(e, desc_finalized); -	/* Best effort to remember the last finalized @id. */ -	atomic_long_set(&desc_ring->last_finalized_id, e->id); +	desc_update_last_finalized(e->rb);  }  /* @@ -1832,7 +1943,7 @@ static int prb_read(struct printk_ringbuffer *rb, u64 seq,  }  /* Get the sequence number of the tail descriptor. */ -static u64 prb_first_seq(struct printk_ringbuffer *rb) +u64 prb_first_seq(struct printk_ringbuffer *rb)  {  	struct prb_desc_ring *desc_ring = &rb->desc_ring;  	enum desc_state d_state; @@ -1875,12 +1986,123 @@ static u64 prb_first_seq(struct printk_ringbuffer *rb)  	return seq;  } +/** + * prb_next_reserve_seq() - Get the sequence number after the most recently + *                  reserved record. + * + * @rb:  The ringbuffer to get the sequence number from. + * + * This is the public function available to readers to see what sequence + * number will be assigned to the next reserved record. + * + * Note that depending on the situation, this value can be equal to or + * higher than the sequence number returned by prb_next_seq(). + * + * Context: Any context. + * Return: The sequence number that will be assigned to the next record + *         reserved. + */ +u64 prb_next_reserve_seq(struct printk_ringbuffer *rb) +{ +	struct prb_desc_ring *desc_ring = &rb->desc_ring; +	unsigned long last_finalized_id; +	atomic_long_t *state_var; +	u64 last_finalized_seq; +	unsigned long head_id; +	struct prb_desc desc; +	unsigned long diff; +	struct prb_desc *d; +	int err; + +	/* +	 * It may not be possible to read a sequence number for @head_id. +	 * So the ID of @last_finailzed_seq is used to calculate what the +	 * sequence number of @head_id will be. +	 */ + +try_again: +	last_finalized_seq = desc_last_finalized_seq(rb); + +	/* +	 * @head_id is loaded after @last_finalized_seq to ensure that +	 * it points to the record with @last_finalized_seq or newer. +	 * +	 * Memory barrier involvement: +	 * +	 * If desc_last_finalized_seq:A reads from +	 * desc_update_last_finalized:A, then +	 * prb_next_reserve_seq:A reads from desc_reserve:D. +	 * +	 * Relies on: +	 * +	 * RELEASE from desc_reserve:D to desc_update_last_finalized:A +	 *    matching +	 * ACQUIRE from desc_last_finalized_seq:A to prb_next_reserve_seq:A +	 * +	 * Note: desc_reserve:D and desc_update_last_finalized:A can be +	 *       different CPUs. However, the desc_update_last_finalized:A CPU +	 *       (which performs the release) must have previously seen +	 *       desc_read:C, which implies desc_reserve:D can be seen. +	 */ +	head_id = atomic_long_read(&desc_ring->head_id); /* LMM(prb_next_reserve_seq:A) */ + +	d = to_desc(desc_ring, last_finalized_seq); +	state_var = &d->state_var; + +	/* Extract the ID, used to specify the descriptor to read. */ +	last_finalized_id = DESC_ID(atomic_long_read(state_var)); + +	/* Ensure @last_finalized_id is correct. */ +	err = desc_read_finalized_seq(desc_ring, last_finalized_id, last_finalized_seq, &desc); + +	if (err == -EINVAL) { +		if (last_finalized_seq == 0) { +			/* +			 * No record has been finalized or even reserved yet. +			 * +			 * The @head_id is initialized such that the first +			 * increment will yield the first record (seq=0). +			 * Handle it separately to avoid a negative @diff +			 * below. +			 */ +			if (head_id == DESC0_ID(desc_ring->count_bits)) +				return 0; + +			/* +			 * One or more descriptors are already reserved. Use +			 * the descriptor ID of the first one (@seq=0) for +			 * the @diff below. +			 */ +			last_finalized_id = DESC0_ID(desc_ring->count_bits) + 1; +		} else { +			/* Record must have been overwritten. Try again. */ +			goto try_again; +		} +	} + +	/* Diff of known descriptor IDs to compute related sequence numbers. */ +	diff = head_id - last_finalized_id; + +	/* +	 * @head_id points to the most recently reserved record, but this +	 * function returns the sequence number that will be assigned to the +	 * next (not yet reserved) record. Thus +1 is needed. +	 */ +	return (last_finalized_seq + diff + 1); +} +  /* - * Non-blocking read of a record. Updates @seq to the last finalized record - * (which may have no data available). + * Non-blocking read of a record. + * + * On success @seq is updated to the record that was read and (if provided) + * @r and @line_count will contain the read/calculated data. + * + * On failure @seq is updated to a record that is not yet available to the + * reader, but it will be the next record available to the reader.   * - * See the description of prb_read_valid() and prb_read_valid_info() - * for details. + * Note: When the current CPU is in panic, this function will skip over any + *       non-existent/non-finalized records in order to allow the panic CPU + *       to print any and all records that have been finalized.   */  static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,  			    struct printk_record *r, unsigned int *line_count) @@ -1899,12 +2121,32 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,  			*seq = tail_seq;  		} else if (err == -ENOENT) { -			/* Record exists, but no data available. Skip. */ +			/* Record exists, but the data was lost. Skip. */  			(*seq)++;  		} else { -			/* Non-existent/non-finalized record. Must stop. */ -			return false; +			/* +			 * Non-existent/non-finalized record. Must stop. +			 * +			 * For panic situations it cannot be expected that +			 * non-finalized records will become finalized. But +			 * there may be other finalized records beyond that +			 * need to be printed for a panic situation. If this +			 * is the panic CPU, skip this +			 * non-existent/non-finalized record unless it is +			 * at or beyond the head, in which case it is not +			 * possible to continue. +			 * +			 * Note that new messages printed on panic CPU are +			 * finalized when we are here. The only exception +			 * might be the last message without trailing newline. +			 * But it would have the sequence number returned +			 * by "prb_next_reserve_seq() - 1". +			 */ +			if (this_cpu_in_panic() && ((*seq + 1) < prb_next_reserve_seq(rb))) +				(*seq)++; +			else +				return false;  		}  	} @@ -1932,7 +2174,7 @@ static bool _prb_read_valid(struct printk_ringbuffer *rb, u64 *seq,   * On success, the reader must check r->info.seq to see which record was   * actually read. This allows the reader to detect dropped records.   * - * Failure means @seq refers to a not yet written record. + * Failure means @seq refers to a record not yet available to the reader.   */  bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,  		    struct printk_record *r) @@ -1962,7 +2204,7 @@ bool prb_read_valid(struct printk_ringbuffer *rb, u64 seq,   * On success, the reader must check info->seq to see which record meta data   * was actually read. This allows the reader to detect dropped records.   * - * Failure means @seq refers to a not yet written record. + * Failure means @seq refers to a record not yet available to the reader.   */  bool prb_read_valid_info(struct printk_ringbuffer *rb, u64 seq,  			 struct printk_info *info, unsigned int *line_count) @@ -2008,7 +2250,9 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)   * newest sequence number available to readers will be.   *   * This provides readers a sequence number to jump to if all currently - * available records should be skipped. + * available records should be skipped. It is guaranteed that all records + * previous to the returned value have been finalized and are (or were) + * available to the reader.   *   * Context: Any context.   * Return: The sequence number of the next newest (not yet available) record @@ -2016,34 +2260,19 @@ u64 prb_first_valid_seq(struct printk_ringbuffer *rb)   */  u64 prb_next_seq(struct printk_ringbuffer *rb)  { -	struct prb_desc_ring *desc_ring = &rb->desc_ring; -	enum desc_state d_state; -	unsigned long id;  	u64 seq; -	/* Check if the cached @id still points to a valid @seq. */ -	id = atomic_long_read(&desc_ring->last_finalized_id); -	d_state = desc_read(desc_ring, id, NULL, &seq, NULL); +	seq = desc_last_finalized_seq(rb); -	if (d_state == desc_finalized || d_state == desc_reusable) { -		/* -		 * Begin searching after the last finalized record. -		 * -		 * On 0, the search must begin at 0 because of hack#2 -		 * of the bootstrapping phase it is not known if a -		 * record at index 0 exists. -		 */ -		if (seq != 0) -			seq++; -	} else { -		/* -		 * The information about the last finalized sequence number -		 * has gone. It should happen only when there is a flood of -		 * new messages and the ringbuffer is rapidly recycled. -		 * Give up and start from the beginning. -		 */ -		seq = 0; -	} +	/* +	 * Begin searching after the last finalized record. +	 * +	 * On 0, the search must begin at 0 because of hack#2 +	 * of the bootstrapping phase it is not known if a +	 * record at index 0 exists. +	 */ +	if (seq != 0) +		seq++;  	/*  	 * The information about the last finalized @seq might be inaccurate. @@ -2085,7 +2314,7 @@ void prb_init(struct printk_ringbuffer *rb,  	rb->desc_ring.infos = infos;  	atomic_long_set(&rb->desc_ring.head_id, DESC0_ID(descbits));  	atomic_long_set(&rb->desc_ring.tail_id, DESC0_ID(descbits)); -	atomic_long_set(&rb->desc_ring.last_finalized_id, DESC0_ID(descbits)); +	atomic_long_set(&rb->desc_ring.last_finalized_seq, 0);  	rb->text_data_ring.size_bits = textbits;  	rb->text_data_ring.data = text_buf;  | 
