diff options
Diffstat (limited to 'include/linux/edac.h')
-rw-r--r-- | include/linux/edac.h | 162 |
1 files changed, 38 insertions, 124 deletions
diff --git a/include/linux/edac.h b/include/linux/edac.h index 9e0d78966552..07c52c0af62d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -18,6 +18,8 @@ #include <linux/workqueue.h> #include <linux/debugfs.h> +#define EDAC_DEVICE_NAME_LEN 31 + struct device; #define EDAC_OPSTATE_INVAL -1 @@ -128,12 +130,21 @@ enum dev_type { * fatal (maybe it is on an unused memory area, * or the memory controller could recover from * it for example, by re-trying the operation). + * @HW_EVENT_ERR_DEFERRED: Deferred Error - Indicates an uncorrectable + * error whose handling is not urgent. This could + * be due to hardware data poisoning where the + * system can continue operation until the poisoned + * data is consumed. Preemptive measures may also + * be taken, e.g. offlining pages, etc. * @HW_EVENT_ERR_FATAL: Fatal Error - Uncorrected error that could not * be recovered. + * @HW_EVENT_ERR_INFO: Informational - The CPER spec defines a forth + * type of error: informational logs. */ enum hw_event_mc_err_type { HW_EVENT_ERR_CORRECTED, HW_EVENT_ERR_UNCORRECTED, + HW_EVENT_ERR_DEFERRED, HW_EVENT_ERR_FATAL, HW_EVENT_ERR_INFO, }; @@ -145,6 +156,8 @@ static inline char *mc_event_error_type(const unsigned int err_type) return "Corrected"; case HW_EVENT_ERR_UNCORRECTED: return "Uncorrected"; + case HW_EVENT_ERR_DEFERRED: + return "Deferred"; case HW_EVENT_ERR_FATAL: return "Fatal"; default: @@ -157,7 +170,7 @@ static inline char *mc_event_error_type(const unsigned int err_type) * enum mem_type - memory types. For a more detailed reference, please see * http://en.wikipedia.org/wiki/DRAM * - * @MEM_EMPTY Empty csrow + * @MEM_EMPTY: Empty csrow * @MEM_RESERVED: Reserved csrow type * @MEM_UNKNOWN: Unknown csrow type * @MEM_FPM: FPM - Fast Page Mode, used on systems up to 1995. @@ -192,10 +205,11 @@ static inline char *mc_event_error_type(const unsigned int err_type) * @MEM_DDR3: DDR3 RAM * @MEM_RDDR3: Registered DDR3 RAM * This is a variant of the DDR3 memories. - * @MEM_LRDDR3 Load-Reduced DDR3 memory. + * @MEM_LRDDR3: Load-Reduced DDR3 memory. * @MEM_DDR4: Unbuffered DDR4 RAM * @MEM_RDDR4: Registered DDR4 RAM * This is a variant of the DDR4 memories. + * @MEM_LRDDR4: Load-Reduced DDR4 memory. */ enum mem_type { MEM_EMPTY = 0, @@ -218,6 +232,7 @@ enum mem_type { MEM_LRDDR3, MEM_DDR4, MEM_RDDR4, + MEM_LRDDR4, }; #define MEM_FLAG_EMPTY BIT(MEM_EMPTY) @@ -239,6 +254,7 @@ enum mem_type { #define MEM_FLAG_RDDR3 BIT(MEM_RDDR3) #define MEM_FLAG_DDR4 BIT(MEM_DDR4) #define MEM_FLAG_RDDR4 BIT(MEM_RDDR4) +#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4) /** * enum edac-type - Error Detection and Correction capabilities and mode @@ -278,7 +294,7 @@ enum edac_type { /** * enum scrub_type - scrubbing capabilities - * @SCRUB_UNKNOWN Unknown if scrubber is available + * @SCRUB_UNKNOWN: Unknown if scrubber is available * @SCRUB_NONE: No scrubber * @SCRUB_SW_PROG: SW progressive (sequential) scrubbing * @SCRUB_SW_SRC: Software scrub only errors @@ -287,7 +303,7 @@ enum edac_type { * @SCRUB_HW_PROG: HW progressive (sequential) scrubbing * @SCRUB_HW_SRC: Hardware scrub only errors * @SCRUB_HW_PROG_SRC: Progressive hardware scrub from an error - * SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable + * @SCRUB_HW_TUNABLE: Hardware scrub frequency is tunable */ enum scrub_type { SCRUB_UNKNOWN = 0, @@ -320,114 +336,6 @@ enum scrub_type { #define OP_RUNNING_POLL_INTR 0x203 #define OP_OFFLINE 0x300 -/* - * Concepts used at the EDAC subsystem - * - * There are several things to be aware of that aren't at all obvious: - * - * SOCKETS, SOCKET SETS, BANKS, ROWS, CHIP-SELECT ROWS, CHANNELS, etc.. - * - * These are some of the many terms that are thrown about that don't always - * mean what people think they mean (Inconceivable!). In the interest of - * creating a common ground for discussion, terms and their definitions - * will be established. - * - * Memory devices: The individual DRAM chips on a memory stick. These - * devices commonly output 4 and 8 bits each (x4, x8). - * Grouping several of these in parallel provides the - * number of bits that the memory controller expects: - * typically 72 bits, in order to provide 64 bits + - * 8 bits of ECC data. - * - * Memory Stick: A printed circuit board that aggregates multiple - * memory devices in parallel. In general, this is the - * Field Replaceable Unit (FRU) which gets replaced, in - * the case of excessive errors. Most often it is also - * called DIMM (Dual Inline Memory Module). - * - * Memory Socket: A physical connector on the motherboard that accepts - * a single memory stick. Also called as "slot" on several - * datasheets. - * - * Channel: A memory controller channel, responsible to communicate - * with a group of DIMMs. Each channel has its own - * independent control (command) and data bus, and can - * be used independently or grouped with other channels. - * - * Branch: It is typically the highest hierarchy on a - * Fully-Buffered DIMM memory controller. - * Typically, it contains two channels. - * Two channels at the same branch can be used in single - * mode or in lockstep mode. - * When lockstep is enabled, the cacheline is doubled, - * but it generally brings some performance penalty. - * Also, it is generally not possible to point to just one - * memory stick when an error occurs, as the error - * correction code is calculated using two DIMMs instead - * of one. Due to that, it is capable of correcting more - * errors than on single mode. - * - * Single-channel: The data accessed by the memory controller is contained - * into one dimm only. E. g. if the data is 64 bits-wide, - * the data flows to the CPU using one 64 bits parallel - * access. - * Typically used with SDR, DDR, DDR2 and DDR3 memories. - * FB-DIMM and RAMBUS use a different concept for channel, - * so this concept doesn't apply there. - * - * Double-channel: The data size accessed by the memory controller is - * interlaced into two dimms, accessed at the same time. - * E. g. if the DIMM is 64 bits-wide (72 bits with ECC), - * the data flows to the CPU using a 128 bits parallel - * access. - * - * Chip-select row: This is the name of the DRAM signal used to select the - * DRAM ranks to be accessed. Common chip-select rows for - * single channel are 64 bits, for dual channel 128 bits. - * It may not be visible by the memory controller, as some - * DIMM types have a memory buffer that can hide direct - * access to it from the Memory Controller. - * - * Single-Ranked stick: A Single-ranked stick has 1 chip-select row of memory. - * Motherboards commonly drive two chip-select pins to - * a memory stick. A single-ranked stick, will occupy - * only one of those rows. The other will be unused. - * - * Double-Ranked stick: A double-ranked stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. - * - * Double-sided stick: DEPRECATED TERM, see Double-Ranked stick. - * A double-sided stick has two chip-select rows which - * access different sets of memory devices. The two - * rows cannot be accessed concurrently. "Double-sided" - * is irrespective of the memory devices being mounted - * on both sides of the memory stick. - * - * Socket set: All of the memory sticks that are required for - * a single memory access or all of the memory sticks - * spanned by a chip-select row. A single socket set - * has two chip-select rows and if double-sided sticks - * are used these will occupy those chip-select rows. - * - * Bank: This term is avoided because it is unclear when - * needing to distinguish between chip-select rows and - * socket sets. - * - * Controller pages: - * - * Physical pages: - * - * Virtual pages: - * - * - * STRUCTURE ORGANIZATION AND CHOICES - * - * - * - * PS - I enjoyed writing all that about as much as you enjoyed reading it. - */ - /** * enum edac_mc_layer - memory controller hierarchy layer * @@ -452,7 +360,7 @@ enum edac_mc_layer_type { /** * struct edac_mc_layer - describes the memory controller hierarchy - * @layer: layer type + * @type: layer type * @size: number of components per layer. For example, * if the channel layer has two channels, size = 2 * @is_virt_csrow: This layer is part of the "csrow" when old API @@ -475,24 +383,28 @@ struct edac_mc_layer { #define EDAC_MAX_LAYERS 3 /** - * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer array - * for the element given by [layer0,layer1,layer2] position + * EDAC_DIMM_OFF - Macro responsible to get a pointer offset inside a pointer + * array for the element given by [layer0,layer1,layer2] + * position * * @layers: a struct edac_mc_layer array, describing how many elements * were allocated for each layer - * @n_layers: Number of layers at the @layers array + * @nlayers: Number of layers at the @layers array * @layer0: layer0 position * @layer1: layer1 position. Unused if n_layers < 2 * @layer2: layer2 position. Unused if n_layers < 3 * - * For 1 layer, this macro returns &var[layer0] - &var + * For 1 layer, this macro returns "var[layer0] - var"; + * * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "&var[layer0][layer1] - &var" + * and to return "var[layer0][layer1] - var"; + * * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "&var[layer0][layer1][layer2] - &var" + * and to return "var[layer0][layer1][layer2] - var". * * A loop could be used here to make it more generic, but, as we only have * 3 layers, this is a little faster. + * * By design, layers can never be 0 or more than 3. If that ever happens, * a NULL is returned, causing an OOPS during the memory allocation routine, * with would point to the developer that he's doing something wrong. @@ -519,16 +431,18 @@ struct edac_mc_layer { * were allocated for each layer * @var: name of the var where we want to get the pointer * (like mci->dimms) - * @n_layers: Number of layers at the @layers array + * @nlayers: Number of layers at the @layers array * @layer0: layer0 position * @layer1: layer1 position. Unused if n_layers < 2 * @layer2: layer2 position. Unused if n_layers < 3 * - * For 1 layer, this macro returns &var[layer0] + * For 1 layer, this macro returns "var[layer0]"; + * * For 2 layers, this macro is similar to allocate a bi-dimensional array - * and to return "&var[layer0][layer1]" + * and to return "var[layer0][layer1]"; + * * For 3 layers, this macro is similar to allocate a tri-dimensional array - * and to return "&var[layer0][layer1][layer2]" + * and to return "var[layer0][layer1][layer2]"; */ #define EDAC_DIMM_PTR(layers, var, nlayers, layer0, layer1, layer2) ({ \ typeof(*var) __p; \ @@ -614,7 +528,7 @@ struct errcount_attribute_data { }; /** - * edac_raw_error_desc - Raw error report structure + * struct edac_raw_error_desc - Raw error report structure * @grain: minimum granularity for an error report, in bytes * @error_count: number of errors of the same type * @top_layer: top layer of the error (layer[0]) |