summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 08:12:47 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-05 08:12:47 +0300
commitac322de6bf5416cb145b58599297b8be73cd86ac (patch)
tree1a1be9f8b9241159fb4cde14a548eba9a4155b28 /include
parentccf21b69a83afaee4d5499e0d03eacf23946e08c (diff)
parent339421def582abb14c2217aa8c8f28bb2e299174 (diff)
downloadlinux-ac322de6bf5416cb145b58599297b8be73cd86ac.tar.xz
Merge tag 'md/4.4' of git://neil.brown.name/md
Pull md updates from Neil Brown: "Two major components to this update. 1) The clustered-raid1 support from SUSE is nearly complete. There are a few outstanding issues being worked on. Maybe half a dozen patches will bring this to a usable state. 2) The first stage of journalled-raid5 support from Facebook makes an appearance. With a journal device configured (typically NVRAM or SSD), the "RAID5 write hole" should be closed - a crash during degraded operations cannot result in data corruption. The next stage will be to use the journal as a write-behind cache so that latency can be reduced and in some cases throughput increased by performing more full-stripe writes. * tag 'md/4.4' of git://neil.brown.name/md: (66 commits) MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW MD: set journal disk ->raid_disk MD: kick out journal disk if it's not fresh raid5-cache: start raid5 readonly if journal is missing MD: add new bit to indicate raid array with journal raid5-cache: IO error handling raid5: journal disk can't be removed raid5-cache: add trim support for log MD: fix info output for journal disk raid5-cache: use bio chaining raid5-cache: small log->seq cleanup raid5-cache: new helper: r5_reserve_log_entry raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta raid5-cache: take rdev->data_offset into account early on raid5-cache: refactor bio allocation raid5-cache: clean up r5l_get_meta raid5-cache: simplify state machine when caches flushes are not needed raid5-cache: factor out a helper to run all stripes for an I/O unit raid5-cache: rename flushed_ios to finished_ios raid5-cache: free I/O units earlier ...
Diffstat (limited to 'include')
-rw-r--r--include/uapi/linux/raid/md_p.h73
1 files changed, 72 insertions, 1 deletions
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index 2ae6131e69a5..c3e654c6d518 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -89,6 +89,12 @@
* read requests will only be sent here in
* dire need
*/
+#define MD_DISK_JOURNAL 18 /* disk is used as the write journal in RAID-5/6 */
+
+#define MD_DISK_ROLE_SPARE 0xffff
+#define MD_DISK_ROLE_FAULTY 0xfffe
+#define MD_DISK_ROLE_JOURNAL 0xfffd
+#define MD_DISK_ROLE_MAX 0xff00 /* max value of regular disk role */
typedef struct mdp_device_descriptor_s {
__u32 number; /* 0 Device number in the entire set */
@@ -252,7 +258,10 @@ struct mdp_superblock_1 {
__le64 data_offset; /* sector start of data, often 0 */
__le64 data_size; /* sectors in this device that can be used for data */
__le64 super_offset; /* sector start of this superblock */
- __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ union {
+ __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+ __le64 journal_tail;/* journal tail of journal device (from data_offset) */
+ };
__le32 dev_number; /* permanent identifier of this device - not role in raid */
__le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */
__u8 device_uuid[16]; /* user-space setable, ignored by kernel */
@@ -302,6 +311,8 @@ struct mdp_superblock_1 {
#define MD_FEATURE_RECOVERY_BITMAP 128 /* recovery that is happening
* is guided by bitmap.
*/
+#define MD_FEATURE_CLUSTERED 256 /* clustered MD */
+#define MD_FEATURE_JOURNAL 512 /* support write cache */
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
@@ -310,6 +321,66 @@ struct mdp_superblock_1 {
|MD_FEATURE_RESHAPE_BACKWARDS \
|MD_FEATURE_NEW_OFFSET \
|MD_FEATURE_RECOVERY_BITMAP \
+ |MD_FEATURE_CLUSTERED \
+ |MD_FEATURE_JOURNAL \
)
+struct r5l_payload_header {
+ __le16 type;
+ __le16 flags;
+} __attribute__ ((__packed__));
+
+enum r5l_payload_type {
+ R5LOG_PAYLOAD_DATA = 0,
+ R5LOG_PAYLOAD_PARITY = 1,
+ R5LOG_PAYLOAD_FLUSH = 2,
+};
+
+struct r5l_payload_data_parity {
+ struct r5l_payload_header header;
+ __le32 size; /* sector. data/parity size. each 4k
+ * has a checksum */
+ __le64 location; /* sector. For data, it's raid sector. For
+ * parity, it's stripe sector */
+ __le32 checksum[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_data_parity_flag {
+ R5LOG_PAYLOAD_FLAG_DISCARD = 1, /* payload is discard */
+ /*
+ * RESHAPED/RESHAPING is only set when there is reshape activity. Note,
+ * both data/parity of a stripe should have the same flag set
+ *
+ * RESHAPED: reshape is running, and this stripe finished reshape
+ * RESHAPING: reshape is running, and this stripe isn't reshaped
+ */
+ R5LOG_PAYLOAD_FLAG_RESHAPED = 2,
+ R5LOG_PAYLOAD_FLAG_RESHAPING = 3,
+};
+
+struct r5l_payload_flush {
+ struct r5l_payload_header header;
+ __le32 size; /* flush_stripes size, bytes */
+ __le64 flush_stripes[];
+} __attribute__ ((__packed__));
+
+enum r5l_payload_flush_flag {
+ R5LOG_PAYLOAD_FLAG_FLUSH_STRIPE = 1, /* data represents whole stripe */
+};
+
+struct r5l_meta_block {
+ __le32 magic;
+ __le32 checksum;
+ __u8 version;
+ __u8 __zero_pading_1;
+ __le16 __zero_pading_2;
+ __le32 meta_size; /* whole size of the block */
+
+ __le64 seq;
+ __le64 position; /* sector, start from rdev->data_offset, current position */
+ struct r5l_payload_header payloads[];
+} __attribute__ ((__packed__));
+
+#define R5LOG_VERSION 0x1
+#define R5LOG_MAGIC 0x6433c509
#endif