summaryrefslogtreecommitdiff
path: root/drivers/edac/edac_core.h
diff options
context:
space:
mode:
authorDouglas Thompson <dougthompson@xmission.com>2007-07-19 12:49:36 +0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-19 21:04:53 +0400
commite27e3dac651771fe3250f6305dee277bce29fc5d (patch)
tree9c0ac81a0948d8e52a72865ff9fbae4a12031a32 /drivers/edac/edac_core.h
parent7c9281d76c1c0b130f79d5fc021084e9749959d4 (diff)
downloadlinux-e27e3dac651771fe3250f6305dee277bce29fc5d.tar.xz
drivers/edac: add edac_device class
This patch adds the new 'class' of object to be managed, named: 'edac_device'. As a peer of the 'edac_mc' class of object, it provides a non-memory centric view of an ERROR DETECTING device in hardware. It provides a sysfs interface and an abstraction for varioius EDAC type devices. Multiple 'instances' within the class are possible, with each 'instance' able to have multiple 'blocks', and each 'block' having 'attributes'. At the 'block' level there are the 'ce_count' and 'ue_count' fields which the device driver can update and/or call edac_device_handle_XX() functions. At each higher level are additional 'total' count fields, which are a summation of counts below that level. This 'edac_device' has been used to capture and present ECC errors which are found in a a L1 and L2 system on a per CORE/CPU basis. Signed-off-by: Douglas Thompson <dougthompson@xmission.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/edac/edac_core.h')
-rw-r--r--drivers/edac/edac_core.h252
1 files changed, 246 insertions, 6 deletions
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 397f144791ec..a3e4b97fe4fe 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -32,9 +32,14 @@
#include <linux/completion.h>
#include <linux/kobject.h>
#include <linux/platform_device.h>
+#include <linux/sysdev.h>
+#include <linux/workqueue.h>
+#include <linux/version.h>
#define EDAC_MC_LABEL_LEN 31
-#define MC_PROC_NAME_MAX_LEN 7
+#define EDAC_DEVICE_NAME_LEN 31
+#define EDAC_ATTRIB_VALUE_LEN 15
+#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20
#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) )
@@ -51,6 +56,10 @@
#define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
+/* edac_device printk */
+#define edac_device_printk(ctl, level, fmt, arg...) \
+ printk(level "EDAC DEVICE%d: " fmt, ctl->dev_idx, ##arg)
+
/* prefixes for edac_printk() and edac_mc_printk() */
#define EDAC_MC "MC"
#define EDAC_PCI "PCI"
@@ -62,7 +71,7 @@ extern int edac_debug_level;
#define edac_debug_printk(level, fmt, arg...) \
do { \
if (level <= edac_debug_level) \
- edac_printk(KERN_DEBUG, EDAC_DEBUG, fmt, ##arg); \
+ edac_printk(KERN_EMERG, EDAC_DEBUG, fmt, ##arg); \
} while(0)
#define debugf0( ... ) edac_debug_printk(0, __VA_ARGS__ )
@@ -195,6 +204,8 @@ enum scrub_type {
/* FIXME - should have notify capabilities: NMI, LOG, PROC, etc */
+extern char * edac_align_ptr(void *ptr, unsigned size);
+
/*
* There are several things to be aware of that aren't at all obvious:
*
@@ -376,6 +387,231 @@ struct mem_ctl_info {
struct completion kobj_complete;
};
+/*
+ * The following are the structures to provide for a generice
+ * or abstract 'edac_device'. This set of structures and the
+ * code that implements the APIs for the same, provide for
+ * registering EDAC type devices which are NOT standard memory.
+ *
+ * CPU caches (L1 and L2)
+ * DMA engines
+ * Core CPU swithces
+ * Fabric switch units
+ * PCIe interface controllers
+ * other EDAC/ECC type devices that can be monitored for
+ * errors, etc.
+ *
+ * It allows for a 2 level set of hiearchry. For example:
+ *
+ * cache could be composed of L1, L2 and L3 levels of cache.
+ * Each CPU core would have its own L1 cache, while sharing
+ * L2 and maybe L3 caches.
+ *
+ * View them arranged, via the sysfs presentation:
+ * /sys/devices/system/edac/..
+ *
+ * mc/ <existing memory device directory>
+ * cpu/cpu0/.. <L1 and L2 block directory>
+ * /L1-cache/ce_count
+ * /ue_count
+ * /L2-cache/ce_count
+ * /ue_count
+ * cpu/cpu1/.. <L1 and L2 block directory>
+ * /L1-cache/ce_count
+ * /ue_count
+ * /L2-cache/ce_count
+ * /ue_count
+ * ...
+ *
+ * the L1 and L2 directories would be "edac_device_block's"
+ */
+
+struct edac_device_counter {
+ u32 ue_count;
+ u32 ce_count;
+};
+
+#define INC_COUNTER(cnt) (cnt++)
+
+/*
+ * An array of these is passed to the alloc() function
+ * to specify attributes of the edac_block
+ */
+struct edac_attrib_spec {
+ char name[EDAC_DEVICE_NAME_LEN + 1];
+
+ int type;
+#define EDAC_ATTR_INT 0x01
+#define EDAC_ATTR_CHAR 0x02
+};
+
+
+/* Attribute control structure
+ * In this structure is a pointer to the driver's edac_attrib_spec
+ * The life of this pointer is inclusive in the life of the driver's
+ * life cycle.
+ */
+struct edac_attrib {
+ struct edac_device_block *block; /* Up Pointer */
+
+ struct edac_attrib_spec *spec; /* ptr to module spec entry */
+
+ union { /* actual value */
+ int edac_attrib_int_value;
+ char edac_attrib_char_value[EDAC_ATTRIB_VALUE_LEN + 1];
+ } edac_attrib_value;
+};
+
+/* device block control structure */
+struct edac_device_block {
+ struct edac_device_instance *instance; /* Up Pointer */
+ char name[EDAC_DEVICE_NAME_LEN + 1];
+
+ struct edac_device_counter counters; /* basic UE and CE counters */
+
+ int nr_attribs; /* how many attributes */
+ struct edac_attrib *attribs; /* this block's attributes */
+
+ /* edac sysfs device control */
+ struct kobject kobj;
+ struct completion kobj_complete;
+};
+
+/* device instance control structure */
+struct edac_device_instance {
+ struct edac_device_ctl_info *ctl; /* Up pointer */
+ char name[EDAC_DEVICE_NAME_LEN + 4];
+
+ struct edac_device_counter counters; /* instance counters */
+
+ u32 nr_blocks; /* how many blocks */
+ struct edac_device_block *blocks; /* block array */
+
+ /* edac sysfs device control */
+ struct kobject kobj;
+ struct completion kobj_complete;
+};
+
+
+/*
+ * Abstract edac_device control info structure
+ *
+ */
+struct edac_device_ctl_info {
+ /* for global list of edac_device_ctl_info structs */
+ struct list_head link;
+
+ int dev_idx;
+
+ /* Per instance controls for this edac_device */
+ int log_ue; /* boolean for logging UEs */
+ int log_ce; /* boolean for logging CEs */
+ int panic_on_ue; /* boolean for panic'ing on an UE */
+ unsigned poll_msec; /* number of milliseconds to poll interval */
+ unsigned long delay; /* number of jiffies for poll_msec */
+
+ struct sysdev_class *edac_class; /* pointer to class */
+
+ /* the internal state of this controller instance */
+ int op_state;
+#define OP_ALLOC 0x100
+#define OP_RUNNING_POLL 0x201
+#define OP_RUNNING_INTERRUPT 0x202
+#define OP_RUNNING_POLL_INTR 0x203
+#define OP_OFFLINE 0x300
+
+ /* work struct for this instance */
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20))
+ struct delayed_work work;
+#else
+ struct work_struct work;
+#endif
+
+ /* pointer to edac polling checking routine:
+ * If NOT NULL: points to polling check routine
+ * If NULL: Then assumes INTERRUPT operation, where
+ * MC driver will receive events
+ */
+ void (*edac_check) (struct edac_device_ctl_info * edac_dev);
+
+ struct device *dev; /* pointer to device structure */
+
+ const char *mod_name; /* module name */
+ const char *ctl_name; /* edac controller name */
+
+ void *pvt_info; /* pointer to 'private driver' info */
+
+ unsigned long start_time;/* edac_device load start time (jiffies)*/
+
+ /* these are for safe removal of mc devices from global list while
+ * NMI handlers may be traversing list
+ */
+ struct rcu_head rcu;
+ struct completion complete;
+
+ /* sysfs top name under 'edac' directory
+ * and instance name:
+ * cpu/cpu0/...
+ * cpu/cpu1/...
+ * cpu/cpu2/...
+ * ...
+ */
+ char name[EDAC_DEVICE_NAME_LEN + 1];
+
+ /* Number of instances supported on this control structure
+ * and the array of those instances
+ */
+ u32 nr_instances;
+ struct edac_device_instance *instances;
+
+ /* Event counters for the this whole EDAC Device */
+ struct edac_device_counter counters;
+
+ /* edac sysfs device control for the 'name'
+ * device this structure controls
+ */
+ struct kobject kobj;
+ struct completion kobj_complete;
+};
+
+/* To get from the instance's wq to the beginning of the ctl structure */
+#define to_edac_device_ctl_work(w) \
+ container_of(w,struct edac_device_ctl_info,work)
+
+/* Function to calc the number of delay jiffies from poll_msec */
+static inline void edac_device_calc_delay(
+ struct edac_device_ctl_info *edac_dev)
+{
+ /* convert from msec to jiffies */
+ edac_dev->delay = edac_dev->poll_msec * HZ / 1000;
+}
+
+/*
+ * The alloc() and free() functions for the 'edac_device' control info
+ * structure. A MC driver will allocate one of these for each edac_device
+ * it is going to control/register with the EDAC CORE.
+ */
+extern struct edac_device_ctl_info *edac_device_alloc_ctl_info(
+ unsigned sizeof_private,
+ char *edac_device_name,
+ unsigned nr_instances,
+ char *edac_block_name,
+ unsigned nr_blocks,
+ unsigned offset_value,
+ struct edac_attrib_spec *attrib_spec,
+ unsigned nr_attribs
+);
+
+/* The offset value can be:
+ * -1 indicating no offset value
+ * 0 for zero-based block numbers
+ * 1 for 1-based block number
+ * other for other-based block number
+ */
+#define BLOCK_OFFSET_VALUE_OFF ((unsigned) -1)
+
+extern void edac_device_free_ctl_info( struct edac_device_ctl_info *ctl_info);
+
#ifdef CONFIG_PCI
/* write all or some bits in a byte-register*/
@@ -466,13 +702,17 @@ extern void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
char *msg);
/*
- * This kmalloc's and initializes all the structures.
- * Can't be used if all structures don't have the same lifetime.
+ * edac_device APIs
*/
extern struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
unsigned nr_chans);
-
-/* Free an mc previously allocated by edac_mc_alloc() */
extern void edac_mc_free(struct mem_ctl_info *mci);
+extern int edac_device_add_device(struct edac_device_ctl_info *edac_dev, int edac_idx);
+extern struct edac_device_ctl_info * edac_device_del_device(struct device *dev);
+extern void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr, const char *msg);
+extern void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
+ int inst_nr, int block_nr, const char *msg);
+
#endif /* _EDAC_CORE_H_ */