summaryrefslogtreecommitdiff
path: root/include/linux/ceph
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/ceph')
-rw-r--r--include/linux/ceph/ceph_features.h12
-rw-r--r--include/linux/ceph/ceph_fs.h5
-rw-r--r--include/linux/ceph/osd_client.h11
-rw-r--r--include/linux/ceph/osdmap.h50
-rw-r--r--include/linux/ceph/rados.h18
5 files changed, 84 insertions, 12 deletions
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index 138448f766b4..d12659ce550d 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -43,6 +43,13 @@
#define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */
#define CEPH_FEATURE_EXPORT_PEER (1ULL<<37)
#define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38)
+#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */
+/* The process supports new-style OSDMap encoding. Monitors also use
+ this bit to determine if peers support NAK messages. */
+#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39)
+#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40)
+#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41)
+#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */
/*
* The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature
@@ -82,7 +89,10 @@ static inline u64 ceph_sanitize_features(u64 features)
CEPH_FEATURE_OSDHASHPSPOOL | \
CEPH_FEATURE_OSD_CACHEPOOL | \
CEPH_FEATURE_CRUSH_V2 | \
- CEPH_FEATURE_EXPORT_PEER)
+ CEPH_FEATURE_EXPORT_PEER | \
+ CEPH_FEATURE_OSDMAP_ENC | \
+ CEPH_FEATURE_CRUSH_TUNABLES3 | \
+ CEPH_FEATURE_OSD_PRIMARY_AFFINITY)
#define CEPH_FEATURES_REQUIRED_DEFAULT \
(CEPH_FEATURE_NOSRCADDR | \
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 25bfb0eff772..5f6db18d72e8 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -332,6 +332,7 @@ enum {
CEPH_MDS_OP_LOOKUPHASH = 0x00102,
CEPH_MDS_OP_LOOKUPPARENT = 0x00103,
CEPH_MDS_OP_LOOKUPINO = 0x00104,
+ CEPH_MDS_OP_LOOKUPNAME = 0x00105,
CEPH_MDS_OP_SETXATTR = 0x01105,
CEPH_MDS_OP_RMXATTR = 0x01106,
@@ -420,8 +421,8 @@ union ceph_mds_request_args {
struct {
__u8 rule; /* currently fcntl or flock */
__u8 type; /* shared, exclusive, remove*/
+ __le64 owner; /* owner of the lock */
__le64 pid; /* process id requesting the lock */
- __le64 pid_namespace;
__le64 start; /* initial location to lock */
__le64 length; /* num bytes to lock from start */
__u8 wait; /* will caller wait for lock to become available? */
@@ -532,8 +533,8 @@ struct ceph_filelock {
__le64 start;/* file offset to start lock at */
__le64 length; /* num bytes to lock; 0 for all following start */
__le64 client; /* which client holds the lock */
+ __le64 owner; /* owner the lock */
__le64 pid; /* process id holding the lock on the client */
- __le64 pid_namespace;
__u8 type; /* shared lock, exclusive lock, or unlock */
} __attribute__ ((packed));
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index fd47e872ebcc..94ec69672164 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -43,7 +43,7 @@ struct ceph_osd {
};
-#define CEPH_OSD_MAX_OP 2
+#define CEPH_OSD_MAX_OP 3
enum ceph_osd_data_type {
CEPH_OSD_DATA_TYPE_NONE = 0,
@@ -76,6 +76,7 @@ struct ceph_osd_data {
struct ceph_osd_req_op {
u16 op; /* CEPH_OSD_OP_* */
+ u32 flags; /* CEPH_OSD_OP_FLAG_* */
u32 payload_len;
union {
struct ceph_osd_data raw_data_in;
@@ -102,6 +103,10 @@ struct ceph_osd_req_op {
u32 timeout;
__u8 flag;
} watch;
+ struct {
+ u64 expected_object_size;
+ u64 expected_write_size;
+ } alloc_hint;
};
};
@@ -293,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
unsigned int which, u16 opcode,
u64 cookie, u64 version, int flag);
+extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
+ unsigned int which,
+ u64 expected_object_size,
+ u64 expected_write_size);
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 49ff69f0746b..561ea896c657 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -41,6 +41,18 @@ struct ceph_pg_pool_info {
char *name;
};
+static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool)
+{
+ switch (pool->type) {
+ case CEPH_POOL_TYPE_REP:
+ return true;
+ case CEPH_POOL_TYPE_EC:
+ return false;
+ default:
+ BUG_ON(1);
+ }
+}
+
struct ceph_object_locator {
s64 pool;
};
@@ -60,8 +72,16 @@ struct ceph_object_id {
struct ceph_pg_mapping {
struct rb_node node;
struct ceph_pg pgid;
- int len;
- int osds[];
+
+ union {
+ struct {
+ int len;
+ int osds[];
+ } pg_temp;
+ struct {
+ int osd;
+ } primary_temp;
+ };
};
struct ceph_osdmap {
@@ -78,12 +98,19 @@ struct ceph_osdmap {
struct ceph_entity_addr *osd_addr;
struct rb_root pg_temp;
+ struct rb_root primary_temp;
+
+ u32 *osd_primary_affinity;
+
struct rb_root pg_pools;
u32 pool_max;
/* the CRUSH map specifies the mapping of placement groups to
* the list of osds that store+replicate them. */
struct crush_map *crush;
+
+ struct mutex crush_scratch_mutex;
+ int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3];
};
static inline void ceph_oid_set_name(struct ceph_object_id *oid,
@@ -110,9 +137,21 @@ static inline void ceph_oid_copy(struct ceph_object_id *dest,
dest->name_len = src->name_len;
}
+static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd)
+{
+ return osd >= 0 && osd < map->max_osd &&
+ (map->osd_state[osd] & CEPH_OSD_EXISTS);
+}
+
static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd)
{
- return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP);
+ return ceph_osd_exists(map, osd) &&
+ (map->osd_state[osd] & CEPH_OSD_UP);
+}
+
+static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd)
+{
+ return !ceph_osd_is_up(map, osd);
}
static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
@@ -121,6 +160,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag)
}
extern char *ceph_osdmap_state_str(char *str, int len, int state);
+extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd);
static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map,
int osd)
@@ -153,7 +193,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid)
return 0;
}
-extern struct ceph_osdmap *osdmap_decode(void **p, void *end);
+extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end);
extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *map,
struct ceph_messenger *msgr);
@@ -172,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap,
extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap,
struct ceph_pg pgid,
- int *acting);
+ int *osds, int *primary);
extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap,
struct ceph_pg pgid);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 96292df4041b..f20e0d8a2155 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -81,8 +81,9 @@ struct ceph_pg_v1 {
*/
#define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */
-#define CEPH_PG_TYPE_REP 1
-#define CEPH_PG_TYPE_RAID4 2
+#define CEPH_POOL_TYPE_REP 1
+#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */
+#define CEPH_POOL_TYPE_EC 3
/*
* stable_mod func is used to control number of placement groups.
@@ -133,6 +134,10 @@ extern const char *ceph_osd_state_name(int s);
#define CEPH_OSD_IN 0x10000
#define CEPH_OSD_OUT 0
+/* osd primary-affinity. fixed point value: 0x10000 == baseline */
+#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000
+#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000
+
/*
* osd map flag bits
@@ -227,6 +232,9 @@ enum {
CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
+ /* hints */
+ CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
+
/** multi **/
CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
@@ -382,7 +390,7 @@ enum {
*/
struct ceph_osd_op {
__le16 op; /* CEPH_OSD_OP_* */
- __le32 flags; /* CEPH_OSD_FLAG_* */
+ __le32 flags; /* CEPH_OSD_OP_FLAG_* */
union {
struct {
__le64 offset, length;
@@ -416,6 +424,10 @@ struct ceph_osd_op {
__le64 offset, length;
__le64 src_offset;
} __attribute__ ((packed)) clonerange;
+ struct {
+ __le64 expected_object_size;
+ __le64 expected_write_size;
+ } __attribute__ ((packed)) alloc_hint;
};
__le32 payload_len;
} __attribute__ ((packed));