From 6e8575faa8fa680d59404a4d58d12190667be815 Mon Sep 17 00:00:00 2001 From: Sam Lang Date: Fri, 28 Dec 2012 09:56:46 -0800 Subject: ceph: Check for created flag in response from mds The mds now sends back a created inode if the create request performed the create. If the file already existed, no inode is returned in the reply. This allows ceph to set the created flag in atomic_open so that permissions are properly checked in the case that the file wasn't created by the create call to the mds. To ensure compability with previous kernels, a feature for sending back the inode in the create reply was added, so that the mds will only send back the inode if the client indicates it supports the feature. Signed-off-by: Sam Lang Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index dad579b0c0e6..6b7c6acbb3bf 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -14,13 +14,16 @@ #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) /* bits 8-17 defined by user-space; not supported yet here */ #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) +/* bits 19-25 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) /* * Features supported. */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES) + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR) -- cgit v1.2.3 From 1604f488ac2dcce33c8218e75a000e8c5fb57e61 Mon Sep 17 00:00:00 2001 From: Jim Schutt Date: Fri, 30 Nov 2012 09:15:25 -0700 Subject: libceph: for chooseleaf rules, retry CRUSH map descent from root if leaf is failed Add libceph support for a new CRUSH tunable recently added to Ceph servers. Consider the CRUSH rule step chooseleaf firstn 0 type This rule means that replicas will be chosen in a manner such that each chosen leaf's branch will contain a unique instance of . When an object is re-replicated after a leaf failure, if the CRUSH map uses a chooseleaf rule the remapped replica ends up under the bucket that held the failed leaf. This causes uneven data distribution across the storage cluster, to the point that when all the leaves but one fail under a particular bucket, that remaining leaf holds all the data from its failed peers. This behavior also limits the number of peers that can participate in the re-replication of the data held by the failed leaf, which increases the time required to re-replicate after a failure. For a chooseleaf CRUSH rule, the tree descent has two steps: call them the inner and outer descents. If the tree descent down to is the outer descent, and the descent from down to a leaf is the inner descent, the issue is that a down leaf is detected on the inner descent, so only the inner descent is retried. In order to disperse re-replicated data as widely as possible across a storage cluster after a failure, we want to retry the outer descent. So, fix up crush_choose() to allow the inner descent to return immediately on choosing a failed leaf. Wire this up as a new CRUSH tunable. Note that after this change, for a chooseleaf rule, if the primary OSD in a placement group has failed, choosing a replacement may result in one of the other OSDs in the PG colliding with the new primary. This requires that OSD's data for that PG to need moving as well. This seems unavoidable but should be relatively rare. This corresponds to ceph.git commit 88f218181a9e6d2292e2697fc93797d0f6d6e5dc. Signed-off-by: Jim Schutt Reviewed-by: Sage Weil --- include/linux/ceph/ceph_features.h | 7 +++++-- include/linux/crush/crush.h | 2 ++ net/ceph/crush/mapper.c | 13 ++++++++++--- net/ceph/osdmap.c | 6 ++++++ 4 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 6b7c6acbb3bf..2160aab482f6 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -14,7 +14,9 @@ #define CEPH_FEATURE_DIRLAYOUTHASH (1<<7) /* bits 8-17 defined by user-space; not supported yet here */ #define CEPH_FEATURE_CRUSH_TUNABLES (1<<18) -/* bits 19-25 defined by user-space; not supported yet here */ +/* bits 19-24 defined by user-space; not supported yet here */ +#define CEPH_FEATURE_CRUSH_TUNABLES2 (1<<25) +/* bit 26 defined by user-space; not supported yet here */ #define CEPH_FEATURE_REPLY_CREATE_INODE (1<<27) /* @@ -22,7 +24,8 @@ */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ - CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES | \ + CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE) #define CEPH_FEATURES_REQUIRED_DEFAULT \ diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 25baa287cff7..6a1101f24cfb 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -162,6 +162,8 @@ struct crush_map { __u32 choose_local_fallback_tries; /* choose attempts before giving up */ __u32 choose_total_tries; + /* attempt chooseleaf inner descent once; on failure retry outer descent */ + __u32 chooseleaf_descend_once; }; diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 35fce755ce10..96c8a58937db 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -287,6 +287,7 @@ static int is_out(const struct crush_map *map, const __u32 *weight, int item, in * @outpos: our position in that vector * @firstn: true if choosing "first n" items, false if choosing "indep" * @recurse_to_leaf: true if we want one device under each item of given type + * @descend_once: true if we should only try one descent before giving up * @out2: second output vector for leaf items (if @recurse_to_leaf) */ static int crush_choose(const struct crush_map *map, @@ -295,7 +296,7 @@ static int crush_choose(const struct crush_map *map, int x, int numrep, int type, int *out, int outpos, int firstn, int recurse_to_leaf, - int *out2) + int descend_once, int *out2) { int rep; unsigned int ftotal, flocal; @@ -399,6 +400,7 @@ static int crush_choose(const struct crush_map *map, x, outpos+1, 0, out2, outpos, firstn, 0, + map->chooseleaf_descend_once, NULL) <= outpos) /* didn't get leaf */ reject = 1; @@ -422,7 +424,10 @@ reject: ftotal++; flocal++; - if (collide && flocal <= map->choose_local_tries) + if (reject && descend_once) + /* let outer call try again */ + skip_rep = 1; + else if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; else if (map->choose_local_fallback_tries > 0 && @@ -485,6 +490,7 @@ int crush_do_rule(const struct crush_map *map, int i, j; int numrep; int firstn; + const int descend_once = 0; if ((__u32)ruleno >= map->max_rules) { dprintk(" bad ruleno %d\n", ruleno); @@ -544,7 +550,8 @@ int crush_do_rule(const struct crush_map *map, curstep->arg2, o+osize, j, firstn, - recurse_to_leaf, c+osize); + recurse_to_leaf, + descend_once, c+osize); } if (recurse_to_leaf) diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index de73214b5d26..ca05871635bc 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -170,6 +170,7 @@ static struct crush_map *crush_decode(void *pbyval, void *end) c->choose_local_tries = 2; c->choose_local_fallback_tries = 5; c->choose_total_tries = 19; + c->chooseleaf_descend_once = 0; ceph_decode_need(p, end, 4*sizeof(u32), bad); magic = ceph_decode_32(p); @@ -336,6 +337,11 @@ static struct crush_map *crush_decode(void *pbyval, void *end) dout("crush decode tunable choose_total_tries = %d", c->choose_total_tries); + ceph_decode_need(p, end, sizeof(u32), done); + c->chooseleaf_descend_once = ceph_decode_32(p); + dout("crush decode tunable chooseleaf_descend_once = %d", + c->chooseleaf_descend_once); + done: dout("crush_decode success\n"); return c; -- cgit v1.2.3 From dd5f049dbdf973d9bceebef1fd73647a5ede6732 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 1 Nov 2012 08:39:27 -0500 Subject: ceph: define ceph_encode_8_safe() It's kind of a silly macro, but ceph_encode_8_safe() is the only one missing from an otherwise pretty complete set. It's not used, but neither are a couple of the others in this set. While in there, insert some whitespace to tidy up the alignment of the line-terminating backslashes in some of the macro definitions. Signed-off-by: Alex Elder Reviewed-by: Dan Mick --- include/linux/ceph/decode.h | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h index 4bbf2db45f46..cd679f2d348b 100644 --- a/include/linux/ceph/decode.h +++ b/include/linux/ceph/decode.h @@ -52,10 +52,10 @@ static inline int ceph_has_room(void **p, void *end, size_t n) return end >= *p && n <= end - *p; } -#define ceph_decode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_decode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_decode_64_safe(p, end, v, bad) \ @@ -99,8 +99,8 @@ static inline int ceph_has_room(void **p, void *end, size_t n) * * There are two possible failures: * - converting the string would require accessing memory at or - * beyond the "end" pointer provided (-E - * - memory could not be allocated for the result + * beyond the "end" pointer provided (-ERANGE) + * - memory could not be allocated for the result (-ENOMEM) */ static inline char *ceph_extract_encoded_string(void **p, void *end, size_t *lenp, gfp_t gfp) @@ -217,10 +217,10 @@ static inline void ceph_encode_string(void **p, void *end, *p += len; } -#define ceph_encode_need(p, end, n, bad) \ - do { \ - if (!likely(ceph_has_room(p, end, n))) \ - goto bad; \ +#define ceph_encode_need(p, end, n, bad) \ + do { \ + if (!likely(ceph_has_room(p, end, n))) \ + goto bad; \ } while (0) #define ceph_encode_64_safe(p, end, v, bad) \ @@ -231,12 +231,17 @@ static inline void ceph_encode_string(void **p, void *end, #define ceph_encode_32_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u32), bad); \ - ceph_encode_32(p, v); \ + ceph_encode_32(p, v); \ } while (0) #define ceph_encode_16_safe(p, end, v, bad) \ do { \ ceph_encode_need(p, end, sizeof(u16), bad); \ - ceph_encode_16(p, v); \ + ceph_encode_16(p, v); \ + } while (0) +#define ceph_encode_8_safe(p, end, v, bad) \ + do { \ + ceph_encode_need(p, end, sizeof(u8), bad); \ + ceph_encode_8(p, v); \ } while (0) #define ceph_encode_copy_safe(p, end, pv, n, bad) \ -- cgit v1.2.3 From af77f26caa35a95af09d1dac5c513b3901de7e37 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 9 Nov 2012 08:43:15 -0600 Subject: rbd: drop oid parameters from ceph_osdc_build_request() The last two parameters to ceph_osd_build_request() describe the object id, but the values passed always come from the osd request structure whose address is also provided. Get rid of those last two parameters. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 6 +----- include/linux/ceph/osd_client.h | 4 +--- net/ceph/osd_client.c | 13 +++++-------- 3 files changed, 7 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d4e93a28fb6a..9a701effa0ef 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1176,11 +1176,7 @@ static int rbd_do_request(struct request *rq, snapid, ofs, &len, &bno, osd_req, ops); rbd_assert(ret == 0); - ceph_osdc_build_request(osd_req, ofs, &len, - ops, - snapc, - &mtime, - osd_req->r_oid, osd_req->r_oid_len); + ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime); if (linger_req) { ceph_osdc_set_request_linger(osdc, osd_req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d9b880e977e6..f2e5d2cdca06 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -227,9 +227,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 *plen, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len); + struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, struct ceph_file_layout *layout, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index eade41bb7102..7d38327a8e89 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -376,9 +376,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 *plen, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, - struct timespec *mtime, - const char *oid, - int oid_len) + struct timespec *mtime) { struct ceph_msg *msg = req->r_request; struct ceph_osd_request_head *head; @@ -405,9 +403,9 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, /* fill in oid */ - head->object_len = cpu_to_le32(oid_len); - memcpy(p, oid, oid_len); - p += oid_len; + head->object_len = cpu_to_le32(req->r_oid_len); + memcpy(p, req->r_oid, req->r_oid_len); + p += req->r_oid_len; src_op = src_ops; while (src_op->op) { @@ -506,8 +504,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, ceph_osdc_build_request(req, off, plen, ops, snapc, - mtime, - req->r_oid, req->r_oid_len); + mtime); return req; } -- cgit v1.2.3 From c885837f7d4f8c4f5cb2a744cc6929bc078e9dc0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: always allow trail in osd request An osd request structure contains an optional trail portion, which if present will contain data to be passed in the payload portion of the message containing the request. The trail field is a ceph_pagelist pointer, and if null it indicates there is no trail. A ceph_pagelist structure contains a length field, and it can legitimately hold value 0. Make use of this to change the interpretation of the "trail" of an osd request so that every osd request has trailing data, it just might have length 0. This means we change the r_trail field in a ceph_osd_request structure from a pointer to a structure that is always initialized. Note that in ceph_osdc_start_request(), the trail pointer (or now address of that structure) is assigned to a ceph message's trail field. Here's why that's still OK (looking at net/ceph/messenger.c): - What would have resulted in a null pointer previously will now refer to a 0-length page list. That message trail pointer is used in two functions, write_partial_msg_pages() and out_msg_pos_next(). - In write_partial_msg_pages(), a null page list pointer is handled the same as a message with 0-length trail, and both result in a "in_trail" variable set to false. The trail pointer is only used if in_trail is true. - The only other place the message trail pointer is used is out_msg_pos_next(). That function is only called by write_partial_msg_pages() and only touches the trail pointer if the in_trail value it is passed is true. Therefore a null ceph_msg->trail pointer is equivalent to a non-null pointer referring to a 0-length page list structure. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 4 ++-- net/ceph/osd_client.c | 43 ++++++++++++----------------------------- 2 files changed, 14 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index f2e5d2cdca06..61562c792855 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -10,6 +10,7 @@ #include #include #include +#include /* * Maximum object name size @@ -22,7 +23,6 @@ struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; struct ceph_authorizer; -struct ceph_pagelist; /* * completion callback for async writepages @@ -95,7 +95,7 @@ struct ceph_osd_request { struct bio *r_bio; /* instead of pages */ #endif - struct ceph_pagelist *r_trail; /* trailing part of the data */ + struct ceph_pagelist r_trail; /* trailing part of the data */ }; struct ceph_osd_event { diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 7d38327a8e89..2be50d82ccbc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -171,10 +171,7 @@ void ceph_osdc_release_request(struct kref *kref) bio_put(req->r_bio); #endif ceph_put_snap_context(req->r_snapc); - if (req->r_trail) { - ceph_pagelist_release(req->r_trail); - kfree(req->r_trail); - } + ceph_pagelist_release(&req->r_trail); if (req->r_mempool) mempool_free(req, req->r_osdc->req_mempool); else @@ -208,8 +205,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, { struct ceph_osd_request *req; struct ceph_msg *msg; - int needs_trail; - int num_op = get_num_ops(ops, &needs_trail); + int num_op = get_num_ops(ops, NULL); size_t msg_size = sizeof(struct ceph_osd_request_head); msg_size += num_op*sizeof(struct ceph_osd_op); @@ -252,15 +248,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, } req->r_reply = msg; - /* allocate space for the trailing data */ - if (needs_trail) { - req->r_trail = kmalloc(sizeof(struct ceph_pagelist), gfp_flags); - if (!req->r_trail) { - ceph_osdc_put_request(req); - return NULL; - } - ceph_pagelist_init(req->r_trail); - } + ceph_pagelist_init(&req->r_trail); /* create request message; allow space for oid */ msg_size += MAX_OBJ_NAME_SIZE; @@ -312,29 +300,25 @@ static void osd_req_encode_op(struct ceph_osd_request *req, case CEPH_OSD_OP_GETXATTR: case CEPH_OSD_OP_SETXATTR: case CEPH_OSD_OP_CMPXATTR: - BUG_ON(!req->r_trail); - dst->xattr.name_len = cpu_to_le32(src->xattr.name_len); dst->xattr.value_len = cpu_to_le32(src->xattr.value_len); dst->xattr.cmp_op = src->xattr.cmp_op; dst->xattr.cmp_mode = src->xattr.cmp_mode; - ceph_pagelist_append(req->r_trail, src->xattr.name, + ceph_pagelist_append(&req->r_trail, src->xattr.name, src->xattr.name_len); - ceph_pagelist_append(req->r_trail, src->xattr.val, + ceph_pagelist_append(&req->r_trail, src->xattr.val, src->xattr.value_len); break; case CEPH_OSD_OP_CALL: - BUG_ON(!req->r_trail); - dst->cls.class_len = src->cls.class_len; dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); - ceph_pagelist_append(req->r_trail, src->cls.class_name, + ceph_pagelist_append(&req->r_trail, src->cls.class_name, src->cls.class_len); - ceph_pagelist_append(req->r_trail, src->cls.method_name, + ceph_pagelist_append(&req->r_trail, src->cls.method_name, src->cls.method_len); - ceph_pagelist_append(req->r_trail, src->cls.indata, + ceph_pagelist_append(&req->r_trail, src->cls.indata, src->cls.indata_len); break; case CEPH_OSD_OP_ROLLBACK: @@ -347,11 +331,9 @@ static void osd_req_encode_op(struct ceph_osd_request *req, __le32 prot_ver = cpu_to_le32(src->watch.prot_ver); __le32 timeout = cpu_to_le32(src->watch.timeout); - BUG_ON(!req->r_trail); - - ceph_pagelist_append(req->r_trail, + ceph_pagelist_append(&req->r_trail, &prot_ver, sizeof(prot_ver)); - ceph_pagelist_append(req->r_trail, + ceph_pagelist_append(&req->r_trail, &timeout, sizeof(timeout)); } case CEPH_OSD_OP_NOTIFY_ACK: @@ -414,8 +396,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, op++; } - if (req->r_trail) - data_len += req->r_trail->length; + data_len += req->r_trail.length; if (snapc) { head->snap_seq = cpu_to_le64(snapc->seq); @@ -1715,7 +1696,7 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, #ifdef CONFIG_BLOCK req->r_request->bio = req->r_bio; #endif - req->r_request->trail = req->r_trail; + req->r_request->trail = &req->r_trail; register_request(osdc, req); -- cgit v1.2.3 From 0120be3c60d46d6d55f4bf7a3d654cc705eb0c54 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:19 -0600 Subject: libceph: pass length to ceph_osdc_build_request() The len argument to ceph_osdc_build_request() is set up to be passed by address, but that function never updates its value so there's no need to do this. Tighten up the interface by passing the length directly. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 54bd9fc3ef7c..c1b135b6cb97 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1172,7 +1172,7 @@ static int rbd_do_request(struct request *rq, snapid, ofs, &len, &bno, osd_req, ops); rbd_assert(ret == 0); - ceph_osdc_build_request(osd_req, ofs, &len, ops, snapc, &mtime); + ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime); if (linger_req) { ceph_osdc_set_request_linger(osdc, osd_req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 61562c792855..4bfb4582439a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -224,7 +224,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * struct bio *bio); extern void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, struct timespec *mtime); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 37d43d5b828c..e29a3ed92958 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -336,7 +336,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 *plen, + u64 off, u64 len, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, struct timespec *mtime) @@ -390,7 +390,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, if (flags & CEPH_OSD_FLAG_WRITE) { req->r_request->hdr.data_off = cpu_to_le16(off); - req->r_request->hdr.data_len = cpu_to_le32(*plen + data_len); + req->r_request->hdr.data_len = cpu_to_le32(len + data_len); } else if (data_len) { req->r_request->hdr.data_off = 0; req->r_request->hdr.data_len = cpu_to_le32(data_len); @@ -464,7 +464,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, plen, ops, + ceph_osdc_build_request(req, off, *plen, ops, snapc, mtime); -- cgit v1.2.3 From e8afad656cbcd06d02a7bacd4b318fa0e2907de0 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:19 -0600 Subject: libceph: pass length to ceph_calc_file_object_mapping() ceph_calc_file_object_mapping() takes (among other things) a "file" offset and length, and based on the layout, determines the object number ("bno") backing the affected portion of the file's data and the offset into that object where the desired range begins. It also computes the size that should be used for the request--either the amount requested or something less if that would exceed the end of the object. This patch changes the input length parameter in this function so it is used only for input. That is, the argument will be passed by value rather than by address, so the value provided won't get updated by the function. The value would only get updated if the length would surpass the current object, and in that case the value it got updated to would be exactly that returned in *oxlen. Only one of the two callers is affected by this change. Update ceph_calc_raw_layout() so it records any updated value. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/ioctl.c | 2 +- include/linux/ceph/osdmap.h | 2 +- net/ceph/osd_client.c | 6 ++++-- net/ceph/osdmap.c | 9 ++++----- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 36549a46e311..3b22150d3e19 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -194,7 +194,7 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) return -EFAULT; down_read(&osdc->map_sem); - r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, &len, + r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); if (r < 0) diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 5ea57ba69320..1f653e2ff5cc 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -110,7 +110,7 @@ extern void ceph_osdmap_destroy(struct ceph_osdmap *map); /* calculate mapping of a file extent to an object */ extern int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *bno, u64 *oxoff, u64 *oxlen); /* calculate mapping of object to a placement group */ diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index e29a3ed92958..47e5f5b1f94c 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -53,13 +53,15 @@ int ceph_calc_raw_layout(struct ceph_osd_client *osdc, reqhead->snapid = cpu_to_le64(snapid); /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, plen, bno, + r = ceph_calc_file_object_mapping(layout, off, orig_len, bno, &objoff, &objlen); if (r < 0) return r; - if (*plen < orig_len) + if (objlen < orig_len) { + *plen = objlen; dout(" skipping last %llu, final file extent %llu~%llu\n", orig_len - *plen, off, *plen); + } if (op_has_extent(op->op)) { u32 osize = le32_to_cpu(layout->fl_object_size); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index ca05871635bc..369f03ba9ee5 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1016,7 +1016,7 @@ bad: * pass a stride back to the caller. */ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, - u64 off, u64 *plen, + u64 off, u64 len, u64 *ono, u64 *oxoff, u64 *oxlen) { @@ -1027,7 +1027,7 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, u32 su_per_object; u64 t, su_offset; - dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, + dout("mapping %llu~%llu osize %u fl_su %u\n", off, len, osize, su); if (su == 0 || sc == 0) goto invalid; @@ -1060,11 +1060,10 @@ int ceph_calc_file_object_mapping(struct ceph_file_layout *layout, /* * Calculate the length of the extent being written to the selected - * object. This is the minimum of the full length requested (plen) or + * object. This is the minimum of the full length requested (len) or * the remainder of the current stripe being written to. */ - *oxlen = min_t(u64, *plen, su - su_offset); - *plen = *oxlen; + *oxlen = min_t(u64, len, su - su_offset); dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); return 0; -- cgit v1.2.3 From 4d6b250bf18d44571d69a0f4afec4b6a1969729f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: drop snapid in ceph_calc_raw_layout() A snapshot id must be provided to ceph_calc_raw_layout() even though it is not needed at all for calculating the layout. Where the snapshot id *is* needed is when building the request message for an osd operation. Drop the snapid parameter from ceph_calc_raw_layout() and pass that value instead in ceph_osdc_build_request(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 4 ++-- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 14 ++++---------- 3 files changed, 7 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c1b135b6cb97..fa371868e9b0 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1169,10 +1169,10 @@ static int rbd_do_request(struct request *rq, rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id); ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout, - snapid, ofs, &len, &bno, osd_req, ops); + ofs, &len, &bno, osd_req, ops); rbd_assert(ret == 0); - ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, &mtime); + ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime); if (linger_req) { ceph_osdc_set_request_linger(osdc, osd_req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 4bfb4582439a..0e82a0a967ef 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -209,7 +209,6 @@ extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, - u64 snapid, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op); @@ -227,6 +226,7 @@ extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, + u64 snap_id, struct timespec *mtime); extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 47e5f5b1f94c..b5a4b2875e8a 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -40,18 +40,14 @@ static int op_has_extent(int op) int ceph_calc_raw_layout(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, - u64 snapid, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; u64 orig_len = *plen; u64 objoff, objlen; /* extent in object */ int r; - reqhead->snapid = cpu_to_le64(snapid); - /* object extent? */ r = ceph_calc_file_object_mapping(layout, off, orig_len, bno, &objoff, &objlen); @@ -121,8 +117,7 @@ static int calc_layout(struct ceph_osd_client *osdc, u64 bno; int r; - r = ceph_calc_raw_layout(osdc, layout, vino.snap, off, - plen, &bno, req, op); + r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op); if (r < 0) return r; @@ -340,7 +335,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req, void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, struct ceph_osd_req_op *src_ops, - struct ceph_snap_context *snapc, + struct ceph_snap_context *snapc, u64 snap_id, struct timespec *mtime) { struct ceph_msg *msg = req->r_request; @@ -355,6 +350,7 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, int i; head = msg->front.iov_base; + head->snapid = cpu_to_le64(snap_id); op = (void *)(head + 1); p = (void *)(op + num_op); @@ -466,9 +462,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, *plen, ops, - snapc, - mtime); + ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime); return req; } -- cgit v1.2.3 From e75b45cf36565fd8ba206a9d80f670a86e61ba2f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:14 -0600 Subject: libceph: drop osdc from ceph_calc_raw_layout() The osdc parameter to ceph_calc_raw_layout() is not used, so get rid of it. Consequently, the corresponding parameter in calc_layout() becomes unused, so get rid of that as well. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 10 ++++------ 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index fa371868e9b0..ac8fd3856509 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1168,7 +1168,7 @@ static int rbd_do_request(struct request *rq, osd_req->r_oid_len = strlen(osd_req->r_oid); rbd_layout_init(&osd_req->r_file_layout, rbd_dev->spec->pool_id); - ret = ceph_calc_raw_layout(osdc, &osd_req->r_file_layout, + ret = ceph_calc_raw_layout(&osd_req->r_file_layout, ofs, &len, &bno, osd_req, ops); rbd_assert(ret == 0); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 0e82a0a967ef..fe3a6e8db1f9 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -207,8 +207,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, +extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index b5a4b2875e8a..cd9c28387de3 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -38,8 +38,7 @@ static int op_has_extent(int op) op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_osd_client *osdc, - struct ceph_file_layout *layout, +int ceph_calc_raw_layout(struct ceph_file_layout *layout, u64 off, u64 *plen, u64 *bno, struct ceph_osd_request *req, struct ceph_osd_req_op *op) @@ -107,8 +106,7 @@ EXPORT_SYMBOL(ceph_calc_raw_layout); * * fill osd op in request message. */ -static int calc_layout(struct ceph_osd_client *osdc, - struct ceph_vino vino, +static int calc_layout(struct ceph_vino vino, struct ceph_file_layout *layout, u64 off, u64 *plen, struct ceph_osd_request *req, @@ -117,7 +115,7 @@ static int calc_layout(struct ceph_osd_client *osdc, u64 bno; int r; - r = ceph_calc_raw_layout(osdc, layout, off, plen, &bno, req, op); + r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op); if (r < 0) return r; @@ -452,7 +450,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, return ERR_PTR(-ENOMEM); /* calculate max write size */ - r = calc_layout(osdc, vino, layout, off, plen, req, ops); + r = calc_layout(vino, layout, off, plen, req, ops); if (r < 0) return ERR_PTR(r); req->r_file_layout = *layout; /* keep a copy */ -- cgit v1.2.3 From d178a9e74006e80f568d87e29f2a68f14fc7cbb1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: don't set flags in ceph_osdc_alloc_request() The only thing ceph_osdc_alloc_request() really does with the flags value it is passed is assign it to the newly-created osd request structure. Do that in the caller instead. Both callers subsequently call ceph_osdc_build_request(), so have that function (instead of ceph_osdc_alloc_request()) issue a warning if a request comes through with neither the read nor write flags set. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 3 ++- include/linux/ceph/osd_client.h | 1 - net/ceph/osd_client.c | 11 ++++------- 3 files changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index ac8fd3856509..bdbaa4cfd9d3 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1148,13 +1148,14 @@ static int rbd_do_request(struct request *rq, (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, flags, snapc, ops, + osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO, pages, bio); if (!osd_req) { ret = -ENOMEM; goto done_pages; } + osd_req->r_flags = flags; osd_req->r_callback = rbd_cb; rbd_req->rq = rq; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fe3a6e8db1f9..6ddda5bbd1a6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -213,7 +213,6 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, struct ceph_osd_req_op *op); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index cd9c28387de3..77ce1edaa07d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -171,7 +171,6 @@ static int get_num_ops(struct ceph_osd_req_op *ops) } struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, - int flags, struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, @@ -208,10 +207,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, INIT_LIST_HEAD(&req->r_req_lru_item); INIT_LIST_HEAD(&req->r_osd_item); - req->r_flags = flags; - - WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); - /* create reply message */ if (use_mempool) msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); @@ -347,6 +342,8 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, u64 data_len = 0; int i; + WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); + head = msg->front.iov_base; head->snapid = cpu_to_le64(snap_id); op = (void *)(head + 1); @@ -442,12 +439,12 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } else ops[1].op = 0; - req = ceph_osdc_alloc_request(osdc, flags, - snapc, ops, + req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS, NULL, NULL); if (!req) return ERR_PTR(-ENOMEM); + req->r_flags = flags; /* calculate max write size */ r = calc_layout(vino, layout, off, plen, req, ops); -- cgit v1.2.3 From 54a5400721da7fa5a16cea151aade5bdfee74111 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: don't set pages or bio in ceph_osdc_alloc_request() Only one of the two callers of ceph_osdc_alloc_request() provides page or bio data for its payload. And essentially all that function was doing with those arguments was assigning them to fields in the osd request structure. Simplify ceph_osdc_alloc_request() by having the caller take care of making those assignments Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 8 ++++++-- include/linux/ceph/osd_client.h | 4 +--- net/ceph/osd_client.c | 15 ++------------- 3 files changed, 9 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index bdbaa4cfd9d3..d1445df6398a 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1148,14 +1148,18 @@ static int rbd_do_request(struct request *rq, (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, - false, GFP_NOIO, pages, bio); + osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO); if (!osd_req) { ret = -ENOMEM; goto done_pages; } osd_req->r_flags = flags; + osd_req->r_pages = pages; + if (bio) { + osd_req->r_bio = bio; + bio_get(osd_req->r_bio); + } osd_req->r_callback = rbd_cb; rbd_req->rq = rq; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6ddda5bbd1a6..75f56d372d44 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -216,9 +216,7 @@ extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client * struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio); + gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 77ce1edaa07d..bdc3bb12bfd5 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -174,9 +174,7 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, struct ceph_osd_req_op *ops, bool use_mempool, - gfp_t gfp_flags, - struct page **pages, - struct bio *bio) + gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; @@ -237,13 +235,6 @@ struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, memset(msg->front.iov_base, 0, msg->front.iov_len); req->r_request = msg; - req->r_pages = pages; -#ifdef CONFIG_BLOCK - if (bio) { - req->r_bio = bio; - bio_get(req->r_bio); - } -#endif return req; } @@ -439,9 +430,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, } else ops[1].op = 0; - req = ceph_osdc_alloc_request(osdc, snapc, ops, - use_mempool, - GFP_NOFS, NULL, NULL); + req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); req->r_flags = flags; -- cgit v1.2.3 From ae7ca4a35b1f5df86e2c32b2cfc01a8d528c7b8c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Tue, 13 Nov 2012 21:11:15 -0600 Subject: libceph: pass num_op with ops Both ceph_osdc_alloc_request() and ceph_osdc_build_request() are provided an array of ceph osd request operations. Rather than just passing the number of operations in the array, the caller is required append an additional zeroed operation structure to signal the end of the array. All callers know the number of operations at the time these functions are called, so drop the silly zero entry and supply that number directly. As a result, get_num_ops() is no longer needed. This also means that ceph_osdc_alloc_request() never uses its ops argument, so that can be dropped. Also rbd_create_rw_ops() no longer needs to add one to reserve room for the additional op. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 9 +++++---- include/linux/ceph/osd_client.h | 3 ++- net/ceph/osd_client.c | 43 ++++++++++++++--------------------------- 3 files changed, 22 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index b6872d3cb04c..88de8ccb29bd 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1026,12 +1026,12 @@ out_err: /* * helpers for osd request op vectors. */ -static struct ceph_osd_req_op *rbd_create_rw_ops(int num_ops, +static struct ceph_osd_req_op *rbd_create_rw_ops(int num_op, int opcode, u32 payload_len) { struct ceph_osd_req_op *ops; - ops = kzalloc(sizeof (*ops) * (num_ops + 1), GFP_NOIO); + ops = kzalloc(num_op * sizeof (*ops), GFP_NOIO); if (!ops) return NULL; @@ -1149,7 +1149,7 @@ static int rbd_do_request(struct request *rq, (unsigned long long) len, coll, coll_index); osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, ops, false, GFP_NOIO); + osd_req = ceph_osdc_alloc_request(osdc, snapc, num_op, false, GFP_NOIO); if (!osd_req) { ret = -ENOMEM; goto done_pages; @@ -1178,7 +1178,8 @@ static int rbd_do_request(struct request *rq, ofs, &len, &bno, osd_req, ops); rbd_assert(ret == 0); - ceph_osdc_build_request(osd_req, ofs, len, ops, snapc, snapid, &mtime); + ceph_osdc_build_request(osd_req, ofs, len, num_op, ops, + snapc, snapid, &mtime); if (linger_req) { ceph_osdc_set_request_linger(osdc, osd_req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 75f56d372d44..2b04d054e09d 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -214,12 +214,13 @@ extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, gfp_t gfp_flags); extern void ceph_osdc_build_request(struct ceph_osd_request *req, u64 off, u64 len, + unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, u64 snap_id, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index bdc3bb12bfd5..500ae8b49321 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -160,25 +160,14 @@ void ceph_osdc_release_request(struct kref *kref) } EXPORT_SYMBOL(ceph_osdc_release_request); -static int get_num_ops(struct ceph_osd_req_op *ops) -{ - int i = 0; - - while (ops[i].op) - i++; - - return i; -} - struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, - struct ceph_osd_req_op *ops, + unsigned int num_op, bool use_mempool, gfp_t gfp_flags) { struct ceph_osd_request *req; struct ceph_msg *msg; - int num_op = get_num_ops(ops); size_t msg_size = sizeof(struct ceph_osd_request_head); msg_size += num_op*sizeof(struct ceph_osd_op); @@ -317,7 +306,7 @@ static void osd_req_encode_op(struct ceph_osd_request *req, * */ void ceph_osdc_build_request(struct ceph_osd_request *req, - u64 off, u64 len, + u64 off, u64 len, unsigned int num_op, struct ceph_osd_req_op *src_ops, struct ceph_snap_context *snapc, u64 snap_id, struct timespec *mtime) @@ -327,7 +316,6 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, struct ceph_osd_req_op *src_op; struct ceph_osd_op *op; void *p; - int num_op = get_num_ops(src_ops); size_t msg_size = sizeof(*head) + num_op*sizeof(*op); int flags = req->r_flags; u64 data_len = 0; @@ -346,20 +334,17 @@ void ceph_osdc_build_request(struct ceph_osd_request *req, head->flags = cpu_to_le32(flags); if (flags & CEPH_OSD_FLAG_WRITE) ceph_encode_timespec(&head->mtime, mtime); + BUG_ON(num_op > (unsigned int) ((u16) -1)); head->num_ops = cpu_to_le16(num_op); - /* fill in oid */ head->object_len = cpu_to_le32(req->r_oid_len); memcpy(p, req->r_oid, req->r_oid_len); p += req->r_oid_len; src_op = src_ops; - while (src_op->op) { - osd_req_encode_op(req, op, src_op); - src_op++; - op++; - } + while (num_op--) + osd_req_encode_op(req, op++, src_op++); data_len += req->r_trail.length; @@ -414,23 +399,24 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, bool use_mempool, int num_reply, int page_align) { - struct ceph_osd_req_op ops[3]; + struct ceph_osd_req_op ops[2]; struct ceph_osd_request *req; + unsigned int num_op = 1; int r; + memset(&ops, 0, sizeof ops); + ops[0].op = opcode; ops[0].extent.truncate_seq = truncate_seq; ops[0].extent.truncate_size = truncate_size; - ops[0].payload_len = 0; if (do_sync) { ops[1].op = CEPH_OSD_OP_STARTSYNC; - ops[1].payload_len = 0; - ops[2].op = 0; - } else - ops[1].op = 0; + num_op++; + } - req = ceph_osdc_alloc_request(osdc, snapc, ops, use_mempool, GFP_NOFS); + req = ceph_osdc_alloc_request(osdc, snapc, num_op, use_mempool, + GFP_NOFS); if (!req) return ERR_PTR(-ENOMEM); req->r_flags = flags; @@ -446,7 +432,8 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, req->r_num_pages = calc_pages_for(page_align, *plen); req->r_page_alignment = page_align; - ceph_osdc_build_request(req, off, *plen, ops, snapc, vino.snap, mtime); + ceph_osdc_build_request(req, off, *plen, num_op, ops, + snapc, vino.snap, mtime); return req; } -- cgit v1.2.3 From 2b5fc648af5eec2f4fe984cb6b926214e02c5cf4 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 14 Nov 2012 09:38:20 -0600 Subject: rbd: kill ceph_osd_req_op->flags The flags field of struct ceph_osd_req_op is never used, so just get rid of it. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 2b04d054e09d..69287ccfe68a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -157,7 +157,6 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ - u32 flags; /* CEPH_OSD_FLAG_* */ union { struct { u64 offset, length; -- cgit v1.2.3 From 3ebc21f7bc2f9c0145bbbf0f12430b766a200f9f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 31 Jan 2013 16:02:01 -0600 Subject: libceph: fix messenger CONFIG_BLOCK dependencies The ceph messenger has a few spots that are only used when bio messages are supported, and that's only when CONFIG_BLOCK is defined. This surrounds a couple of spots with #ifdef's that would cause a problem if CONFIG_BLOCK were not present in the kernel configuration. This resolves: http://tracker.ceph.com/issues/3976 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/messenger.h | 2 ++ net/ceph/messenger.c | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 14ba5ee738a9..60903e0f665c 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -83,9 +83,11 @@ struct ceph_msg { struct list_head list_head; struct kref kref; +#ifdef CONFIG_BLOCK struct bio *bio; /* instead of pages/pagelist */ struct bio *bio_iter; /* bio iterator */ int bio_seg; /* current bio segment */ +#endif /* CONFIG_BLOCK */ struct ceph_pagelist *trail; /* the trailing part of the data */ bool front_is_vmalloc; bool more_to_follow; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5ccf87ed8d68..8a62a559a2aa 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -9,8 +9,9 @@ #include #include #include +#ifdef CONFIG_BLOCK #include -#include +#endif /* CONFIG_BLOCK */ #include #include @@ -2651,9 +2652,11 @@ struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags, m->page_alignment = 0; m->pages = NULL; m->pagelist = NULL; +#ifdef CONFIG_BLOCK m->bio = NULL; m->bio_iter = NULL; m->bio_seg = 0; +#endif /* CONFIG_BLOCK */ m->trail = NULL; /* front */ -- cgit v1.2.3 From 72fe25e3460c8673984370208e0e6261101372d6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 30 Jan 2013 11:13:33 -0600 Subject: libceph: add a compatibility check interface An upcoming change implements semantic change that could lead to a crash if an old version of the libceph kernel module is used with a new version of the rbd kernel module. In order to preclude that possibility, this adds a compatibilty check interface. If this interface doesn't exist, the modules are obviously not compatible. But if it does exist, this provides a way of letting the caller know whether it will operate properly with this libceph module. Perhaps confusingly, it returns false right now. The semantic change mentioned above will make it return true. This resolves: http://tracker.ceph.com/issues/3800 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 2 ++ net/ceph/ceph_common.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 084d3c622b12..c44275ab375c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -193,6 +193,8 @@ static inline int calc_pages_for(u64 off, u64 len) } /* ceph_common.c */ +extern bool libceph_compatible(void *data); + extern const char *ceph_msg_type_name(int type); extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); extern struct kmem_cache *ceph_inode_cachep; diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index ee71ea26777a..a98c03ff853f 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -26,6 +26,22 @@ #include "crypto.h" +/* + * Module compatibility interface. For now it doesn't do anything, + * but its existence signals a certain level of functionality. + * + * The data buffer is used to pass information both to and from + * libceph. The return value indicates whether libceph determines + * it is compatible with the caller (from another kernel module), + * given the provided data. + * + * The data pointer can be null. + */ +bool libceph_compatible(void *data) +{ + return false; +} +EXPORT_SYMBOL(libceph_compatible); /* * find filename portion of a path (/foo/bar/baz -> baz) -- cgit v1.2.3 From e7e319a9c51409c7effe34333ea26facf2fab9e1 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Thu, 14 Feb 2013 12:16:43 -0600 Subject: libceph: improve packing in struct ceph_osd_req_op The layout of struct ceph_osd_req_op leaves lots of holes. Rearranging things a little for better field alignment reduces the size by a third. This resolves: http://tracker.ceph.com/issues/4163 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 69287ccfe68a..82bf6338d6c1 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -157,6 +157,7 @@ struct ceph_osd_client { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 payload_len; union { struct { u64 offset, length; @@ -165,23 +166,24 @@ struct ceph_osd_req_op { } extent; struct { const char *name; - u32 name_len; const char *val; + u32 name_len; u32 value_len; __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ } xattr; struct { const char *class_name; - __u8 class_len; const char *method_name; - __u8 method_len; - __u8 argc; const char *indata; u32 indata_len; + __u8 class_len; + __u8 method_len; + __u8 argc; } cls; struct { - u64 cookie, count; + u64 cookie; + u64 count; } pgls; struct { u64 snapid; @@ -189,12 +191,11 @@ struct ceph_osd_req_op { struct { u64 cookie; u64 ver; - __u8 flag; u32 prot_ver; u32 timeout; + __u8 flag; } watch; }; - u32 payload_len; }; extern int ceph_osdc_init(struct ceph_osd_client *osdc, -- cgit v1.2.3 From 87f979d390f9ecfa3d0038a9f9a002a62f8a1895 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "nofail" parameter There is only one caller of ceph_osdc_writepages(), and it always passes the value true as its "nofail" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with the constant value true. This and a number of cleanup patches that follow resolve: http://tracker.ceph.com/issues/4126 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 064d1a68d2c1..c7e401c96fc9 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0, true); + &page, 1, 0, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 82bf6338d6c1..afcb255b016a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync, bool nofail); + int flags, int do_sync); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d9d58bbe9f9a..dd01b1340e95 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int num_pages, - int flags, int do_sync, bool nofail) + int flags, int do_sync) { struct ceph_osd_request *req; int rc = 0; @@ -1880,7 +1880,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_WRITE, snapc, do_sync, truncate_seq, truncate_size, mtime, - nofail, 1, page_align); + true, 1, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1889,7 +1889,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, dout("writepages %llu~%llu (%d pages)\n", off, len, req->r_num_pages); - rc = ceph_osdc_start_request(osdc, req, nofail); + rc = ceph_osdc_start_request(osdc, req, true); if (!rc) rc = ceph_osdc_wait_request(osdc, req); -- cgit v1.2.3 From fbf8685fb155e12a9f4d4b966c7b3442ed557687 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "dosync" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "dosync" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 2 +- include/linux/ceph/osd_client.h | 2 +- net/ceph/osd_client.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index c7e401c96fc9..bef552819312 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -493,7 +493,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) page_off, len, ci->i_truncate_seq, ci->i_truncate_size, &inode->i_mtime, - &page, 1, 0, 0); + &page, 1, 0); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index afcb255b016a..7a63100a3e69 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -275,7 +275,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int nr_pages, - int flags, int do_sync); + int flags); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index dd01b1340e95..ac186b7c9986 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1867,7 +1867,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, struct page **pages, int num_pages, - int flags, int do_sync) + int flags) { struct ceph_osd_request *req; int rc = 0; @@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_OP_WRITE, flags | CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, - snapc, do_sync, + snapc, 0, truncate_seq, truncate_size, mtime, true, 1, page_align); if (IS_ERR(req)) -- cgit v1.2.3 From 2480882611e3ab844563dd3d0a822227604ab8fe Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_writepages() "flags" parameter There is only one caller of ceph_osdc_writepages(), and it always passes 0 as its "flags" argument. Get rid of that argument and replace its use in ceph_osdc_writepages() with 0. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 3 +-- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 6 ++---- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index bef552819312..8d3240d6f289 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -492,8 +492,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) &ci->i_layout, snapc, page_off, len, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, - &page, 1, 0); + &inode->i_mtime, &page, 1); if (err < 0) { dout("writepage setting page/mapping error %d %p\n", err, page); SetPageError(page); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 7a63100a3e69..6540e8861998 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -274,8 +274,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int nr_pages, - int flags); + struct page **pages, int nr_pages); /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index ac186b7c9986..d4e3812bcebc 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1866,8 +1866,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, u64 off, u64 len, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - struct page **pages, int num_pages, - int flags) + struct page **pages, int num_pages) { struct ceph_osd_request *req; int rc = 0; @@ -1876,8 +1875,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, BUG_ON(vino.snap != CEPH_NOSNAP); req = ceph_osdc_new_request(osdc, layout, vino, off, &len, CEPH_OSD_OP_WRITE, - flags | CEPH_OSD_FLAG_ONDISK | - CEPH_OSD_FLAG_WRITE, + CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, 0, truncate_seq, truncate_size, mtime, true, 1, page_align); -- cgit v1.2.3 From a3bea47e8bdd51d921e5b2045720d60140612c7c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: ceph: kill ceph_osdc_new_request() "num_reply" parameter The "num_reply" parameter to ceph_osdc_new_request() is never used inside that function, so get rid of it. Note that ceph_sync_write() passes 2 for that argument, while all other callers pass 1. It doesn't matter, but perhaps someone should verify this doesn't indicate a problem. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- fs/ceph/addr.c | 4 ++-- fs/ceph/file.c | 2 +- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8d3240d6f289..fc613715af46 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -315,7 +315,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, ci->i_truncate_seq, ci->i_truncate_size, - NULL, false, 1, 0); + NULL, false, 0); if (IS_ERR(req)) return PTR_ERR(req); @@ -837,7 +837,7 @@ get_more_pages: snapc, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &inode->i_mtime, true, 1, 0); + &inode->i_mtime, true, 0); if (IS_ERR(req)) { rc = PTR_ERR(req); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index a1e5b81e8118..9c4325e654ca 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -541,7 +541,7 @@ more: ci->i_snap_realm->cached_context, do_sync, ci->i_truncate_seq, ci->i_truncate_size, - &mtime, false, 2, page_align); + &mtime, false, page_align); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 6540e8861998..5812802bd8ae 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -234,8 +234,7 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, int do_sync, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, - int page_align); + bool use_mempool, int page_align); extern void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, struct ceph_osd_request *req); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index d4e3812bcebc..d3e75138506b 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -393,7 +393,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, u32 truncate_seq, u64 truncate_size, struct timespec *mtime, - bool use_mempool, int num_reply, + bool use_mempool, int page_align) { struct ceph_osd_req_op ops[2]; @@ -1837,7 +1837,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc, req = ceph_osdc_new_request(osdc, layout, vino, off, plen, CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL, 0, truncate_seq, truncate_size, NULL, - false, 1, page_align); + false, page_align); if (IS_ERR(req)) return PTR_ERR(req); @@ -1878,7 +1878,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE, snapc, 0, truncate_seq, truncate_size, mtime, - true, 1, page_align); + true, page_align); if (IS_ERR(req)) return PTR_ERR(req); -- cgit v1.2.3 From 60e56f138180e72fa8487d4b9c1c916013494f46 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:29 -0600 Subject: libceph: kill ceph_calc_raw_layout() There is no caller of ceph_calc_raw_layout() outside of libceph, so there's no need to export from the module. Furthermore, there is only one caller, in calc_layout(), and it is not much more than a simple wrapper for that function. So get rid of ceph_calc_raw_layout() and embed it instead within calc_layout(). While touching "osd_client.c", get rid of the unnecessary forward declaration of __send_request(). Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 5 --- net/ceph/osd_client.c | 77 +++++++++++++++++------------------------ 2 files changed, 32 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 5812802bd8ae..c39e7ed4b203 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -207,11 +207,6 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); -extern int ceph_calc_raw_layout(struct ceph_file_layout *layout, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op); - extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, unsigned int num_op, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 0d67cd37173b..cd3a489b7438 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -38,49 +38,6 @@ static int op_has_extent(int op) op == CEPH_OSD_OP_WRITE); } -int ceph_calc_raw_layout(struct ceph_file_layout *layout, - u64 off, u64 *plen, u64 *bno, - struct ceph_osd_request *req, - struct ceph_osd_req_op *op) -{ - u64 orig_len = *plen; - u64 objoff, objlen; /* extent in object */ - int r; - - /* object extent? */ - r = ceph_calc_file_object_mapping(layout, off, orig_len, bno, - &objoff, &objlen); - if (r < 0) - return r; - if (objlen < orig_len) { - *plen = objlen; - dout(" skipping last %llu, final file extent %llu~%llu\n", - orig_len - *plen, off, *plen); - } - - if (op_has_extent(op->op)) { - u32 osize = le32_to_cpu(layout->fl_object_size); - op->extent.offset = objoff; - op->extent.length = objlen; - if (op->extent.truncate_size <= off - objoff) { - op->extent.truncate_size = 0; - } else { - op->extent.truncate_size -= off - objoff; - if (op->extent.truncate_size > osize) - op->extent.truncate_size = osize; - } - } - req->r_num_pages = calc_pages_for(off, *plen); - req->r_page_alignment = off & ~PAGE_MASK; - if (op->op == CEPH_OSD_OP_WRITE) - op->payload_len = *plen; - - dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", - *bno, objoff, objlen, req->r_num_pages); - return 0; -} -EXPORT_SYMBOL(ceph_calc_raw_layout); - /* * Implement client access to distributed object storage cluster. * @@ -112,12 +69,42 @@ static int calc_layout(struct ceph_vino vino, struct ceph_osd_request *req, struct ceph_osd_req_op *op) { - u64 bno; + u64 orig_len = *plen; + u64 bno = 0; + u64 objoff = 0; + u64 objlen = 0; int r; - r = ceph_calc_raw_layout(layout, off, plen, &bno, req, op); + /* object extent? */ + r = ceph_calc_file_object_mapping(layout, off, orig_len, &bno, + &objoff, &objlen); if (r < 0) return r; + if (objlen < orig_len) { + *plen = objlen; + dout(" skipping last %llu, final file extent %llu~%llu\n", + orig_len - *plen, off, *plen); + } + + if (op_has_extent(op->op)) { + u32 osize = le32_to_cpu(layout->fl_object_size); + op->extent.offset = objoff; + op->extent.length = objlen; + if (op->extent.truncate_size <= off - objoff) { + op->extent.truncate_size = 0; + } else { + op->extent.truncate_size -= off - objoff; + if (op->extent.truncate_size > osize) + op->extent.truncate_size = osize; + } + } + req->r_num_pages = calc_pages_for(off, *plen); + req->r_page_alignment = off & ~PAGE_MASK; + if (op->op == CEPH_OSD_OP_WRITE) + op->payload_len = *plen; + + dout("calc_layout bno=%llx %llu~%llu (%d pages)\n", + bno, objoff, objlen, req->r_num_pages); snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno); req->r_oid_len = strlen(req->r_oid); -- cgit v1.2.3 From 3c663bbdcdf9296e0fe3362acb9e81f49d7b72c6 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_create_event() "one_shot" parameter There is only one caller of ceph_osdc_create_event(), and it provides 0 as its "one_shot" argument. Get rid of that argument and just use 0 in its place. Replace the code in handle_watch_notify() that executes if one_shot is nonzero in the event with a BUG_ON() call. While modifying "osd_client.c", give handle_watch_notify() static scope. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 2 +- include/linux/ceph/osd_client.h | 3 +-- net/ceph/osd_client.c | 11 +++++------ 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 982963ec2607..13ee789f2328 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1744,7 +1744,7 @@ static int rbd_dev_header_watch_sync(struct rbd_device *rbd_dev, int start) rbd_assert(start ^ !!rbd_dev->watch_request); if (start) { - ret = ceph_osdc_create_event(osdc, rbd_watch_cb, 0, rbd_dev, + ret = ceph_osdc_create_event(osdc, rbd_watch_cb, rbd_dev, &rbd_dev->watch_event); if (ret < 0) return ret; diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c39e7ed4b203..39c55d61e159 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -273,8 +273,7 @@ extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, /* watch/notify events */ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent); + void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); extern int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout); diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index cd3a489b7438..4322faa7c811 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1477,8 +1477,7 @@ static void __remove_event(struct ceph_osd_event *event) int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), - int one_shot, void *data, - struct ceph_osd_event **pevent) + void *data, struct ceph_osd_event **pevent) { struct ceph_osd_event *event; @@ -1488,7 +1487,7 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, dout("create_event %p\n", event); event->cb = event_cb; - event->one_shot = one_shot; + event->one_shot = 0; event->data = data; event->osdc = osdc; INIT_LIST_HEAD(&event->osd_node); @@ -1541,7 +1540,8 @@ static void do_event_work(struct work_struct *work) /* * Process osd watch notifications */ -void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) +static void handle_watch_notify(struct ceph_osd_client *osdc, + struct ceph_msg *msg) { void *p, *end; u8 proto_ver; @@ -1562,9 +1562,8 @@ void handle_watch_notify(struct ceph_osd_client *osdc, struct ceph_msg *msg) spin_lock(&osdc->event_lock); event = __find_event(osdc, cookie); if (event) { + BUG_ON(event->one_shot); get_event(event); - if (event->one_shot) - __remove_event(event); } spin_unlock(&osdc->event_lock); dout("handle_watch_notify cookie %lld ver %lld event %p\n", -- cgit v1.2.3 From 2d2f522699fe8b827087941eb31b9a12cf465f17 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: kill ceph_osdc_wait_event() There are no actual users of ceph_osdc_wait_event(). This would have been one-shot events, but we no longer support those so just get rid of this function. Since this leaves nothing else that waits for the completion of an event, we can get rid of the completion in a struct ceph_osd_event. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/osd_client.h | 3 --- net/ceph/osd_client.c | 18 ------------------ 2 files changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 39c55d61e159..388158ff0cbc 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -107,7 +107,6 @@ struct ceph_osd_event { struct rb_node node; struct list_head osd_node; struct kref kref; - struct completion completion; }; struct ceph_osd_event_work { @@ -275,8 +274,6 @@ extern int ceph_osdc_create_event(struct ceph_osd_client *osdc, void (*event_cb)(u64, u64, u8, void *), void *data, struct ceph_osd_event **pevent); extern void ceph_osdc_cancel_event(struct ceph_osd_event *event); -extern int ceph_osdc_wait_event(struct ceph_osd_event *event, - unsigned long timeout); extern void ceph_osdc_put_event(struct ceph_osd_event *event); #endif diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 4322faa7c811..ad6b8b35f5ca 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1494,7 +1494,6 @@ int ceph_osdc_create_event(struct ceph_osd_client *osdc, RB_CLEAR_NODE(&event->node); kref_init(&event->kref); /* one ref for us */ kref_get(&event->kref); /* one ref for the caller */ - init_completion(&event->completion); spin_lock(&osdc->event_lock); event->cookie = ++osdc->event_count; @@ -1530,7 +1529,6 @@ static void do_event_work(struct work_struct *work) dout("do_event_work completing %p\n", event); event->cb(ver, notify_id, opcode, event->data); - complete(&event->completion); dout("do_event_work completed %p\n", event); ceph_osdc_put_event(event); kfree(event_work); @@ -1588,7 +1586,6 @@ static void handle_watch_notify(struct ceph_osd_client *osdc, return; done_err: - complete(&event->completion); ceph_osdc_put_event(event); return; @@ -1597,21 +1594,6 @@ bad: return; } -int ceph_osdc_wait_event(struct ceph_osd_event *event, unsigned long timeout) -{ - int err; - - dout("wait_event %p\n", event); - err = wait_for_completion_interruptible_timeout(&event->completion, - timeout * HZ); - ceph_osdc_put_event(event); - if (err > 0) - err = 0; - dout("wait_event %p returns %d\n", event, err); - return err; -} -EXPORT_SYMBOL(ceph_osdc_wait_event); - /* * Register request, send initial attempt. */ -- cgit v1.2.3 From 0315a7770983bbe69211efed1aaee08324acd54c Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update rados.h Update most of "include/linux/ceph/rados.h" to match its user space counterpart in "src/include/rados.h" in the ceph tree. Almost everything that has changed is either: - added or revised comments - added definitions (therefore no real effect on existing code) - defining the same value a different way (e.g., "1 << 0" vs "1") The only exceptions are: - The declaration of ceph_osd_state_name() was excluded; that will be inserted in the next patch. - ceph_osd_op_mode_read() and ceph_osd_op_mode_modify() are defined differently, but they were never used in the kernel - CEPH_OSD_FLAG_PEERSTAT is now CEPH_OSD_FLAG_PEERSTAT_OLD, but that was never used in the kernel Anything that was present in this file but not in its user space counterpart was left intact here. I left the definitions of EOLDSNAPC and EBLACKLISTED using numerical values here; I'm not sure the right way to go with those. This and the next two commits resolve: http://tracker.ceph.com/issues/4164 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 91 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2c04afeead1c..9c3b4aaf516b 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -145,8 +145,10 @@ struct ceph_eversion { */ /* status bits */ -#define CEPH_OSD_EXISTS 1 -#define CEPH_OSD_UP 2 +#define CEPH_OSD_EXISTS (1<<0) +#define CEPH_OSD_UP (1<<1) +#define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ +#define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 @@ -161,9 +163,25 @@ struct ceph_eversion { #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ +#define CEPH_OSDMAP_NOUP (1<<5) /* block osd boot */ +#define CEPH_OSDMAP_NODOWN (1<<6) /* block osd mark-down/failure */ +#define CEPH_OSDMAP_NOOUT (1<<7) /* block osd auto mark-out */ +#define CEPH_OSDMAP_NOIN (1<<8) /* block osd auto mark-in */ +#define CEPH_OSDMAP_NOBACKFILL (1<<9) /* block osd backfill */ +#define CEPH_OSDMAP_NORECOVER (1<<10) /* block osd recovery and backfill */ + +/* + * The error code to return when an OSD can't handle a write + * because it is too large. + */ +#define OSD_WRITETOOBIG EMSGSIZE /* * osd ops + * + * WARNING: do not use these op codes directly. Use the helpers + * defined below instead. In certain cases, op code behavior was + * redefined, resulting in special-cases in the helpers. */ #define CEPH_OSD_OP_MODE 0xf000 #define CEPH_OSD_OP_MODE_RD 0x1000 @@ -177,6 +195,7 @@ struct ceph_eversion { #define CEPH_OSD_OP_TYPE_ATTR 0x0300 #define CEPH_OSD_OP_TYPE_EXEC 0x0400 #define CEPH_OSD_OP_TYPE_PG 0x0500 +#define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */ enum { /** data **/ @@ -217,6 +236,23 @@ enum { CEPH_OSD_OP_WATCH = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15, + /* omap */ + CEPH_OSD_OP_OMAPGETKEYS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17, + CEPH_OSD_OP_OMAPGETVALS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18, + CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19, + CEPH_OSD_OP_OMAPGETVALSBYKEYS = + CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20, + CEPH_OSD_OP_OMAPSETVALS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21, + CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22, + CEPH_OSD_OP_OMAPCLEAR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23, + CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, + CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + + /** multi **/ + CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, + CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, + CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3, + /** attrs **/ /* read */ CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, @@ -238,6 +274,7 @@ enum { CEPH_OSD_OP_SCRUB_RESERVE = CEPH_OSD_OP_MODE_SUB | 6, CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7, CEPH_OSD_OP_SCRUB_STOP = CEPH_OSD_OP_MODE_SUB | 8, + CEPH_OSD_OP_SCRUB_MAP = CEPH_OSD_OP_MODE_SUB | 9, /** lock **/ CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, @@ -248,10 +285,12 @@ enum { CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, /** exec **/ + /* note: the RD bit here is wrong; see special-case below in helper */ CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, /** pg **/ CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, + CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2, }; static inline int ceph_osd_op_type_lock(int op) @@ -274,6 +313,10 @@ static inline int ceph_osd_op_type_pg(int op) { return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; } +static inline int ceph_osd_op_type_multi(int op) +{ + return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_MULTI; +} static inline int ceph_osd_op_mode_subop(int op) { @@ -281,11 +324,12 @@ static inline int ceph_osd_op_mode_subop(int op) } static inline int ceph_osd_op_mode_read(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; + return (op & CEPH_OSD_OP_MODE_RD) && + op != CEPH_OSD_OP_CALL; } static inline int ceph_osd_op_mode_modify(int op) { - return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; + return op & CEPH_OSD_OP_MODE_WR; } /* @@ -294,34 +338,38 @@ static inline int ceph_osd_op_mode_modify(int op) */ #define CEPH_OSD_TMAP_HDR 'h' #define CEPH_OSD_TMAP_SET 's' +#define CEPH_OSD_TMAP_CREATE 'c' /* create key */ #define CEPH_OSD_TMAP_RM 'r' +#define CEPH_OSD_TMAP_RMSLOPPY 'R' extern const char *ceph_osd_op_name(int op); - /* * osd op flags * * An op may be READ, WRITE, or READ|WRITE. */ enum { - CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ - CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ - CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ - CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ - CEPH_OSD_FLAG_READ = 16, /* op may read */ - CEPH_OSD_FLAG_WRITE = 32, /* op may write */ - CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ - CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ - CEPH_OSD_FLAG_BALANCE_READS = 256, - CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ - CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ - CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ - CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ + CEPH_OSD_FLAG_ACK = 0x0001, /* want (or is) "ack" ack */ + CEPH_OSD_FLAG_ONNVRAM = 0x0002, /* want (or is) "onnvram" ack */ + CEPH_OSD_FLAG_ONDISK = 0x0004, /* want (or is) "ondisk" ack */ + CEPH_OSD_FLAG_RETRY = 0x0008, /* resend attempt */ + CEPH_OSD_FLAG_READ = 0x0010, /* op may read */ + CEPH_OSD_FLAG_WRITE = 0x0020, /* op may write */ + CEPH_OSD_FLAG_ORDERSNAP = 0x0040, /* EOLDSNAP if snapc is out of order */ + CEPH_OSD_FLAG_PEERSTAT_OLD = 0x0080, /* DEPRECATED msg includes osd_peer_stat */ + CEPH_OSD_FLAG_BALANCE_READS = 0x0100, + CEPH_OSD_FLAG_PARALLELEXEC = 0x0200, /* execute op in parallel */ + CEPH_OSD_FLAG_PGOP = 0x0400, /* pg op, no object */ + CEPH_OSD_FLAG_EXEC = 0x0800, /* op may exec */ + CEPH_OSD_FLAG_EXEC_PUBLIC = 0x1000, /* DEPRECATED op may exec (public) */ + CEPH_OSD_FLAG_LOCALIZE_READS = 0x2000, /* read from nearby replica, if any */ + CEPH_OSD_FLAG_RWORDERED = 0x4000, /* order wrt concurrent reads */ }; enum { CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ + CEPH_OSD_OP_FLAG_FAILOK = 2, /* continue despite failure */ }; #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ @@ -381,7 +429,11 @@ struct ceph_osd_op { __le64 ver; __u8 flag; /* 0 = unwatch, 1 = watch */ } __attribute__ ((packed)) watch; -}; + struct { + __le64 offset, length; + __le64 src_offset; + } __attribute__ ((packed)) clonerange; + }; __le32 payload_len; } __attribute__ ((packed)); @@ -424,5 +476,4 @@ struct ceph_osd_reply_head { } __attribute__ ((packed)); - #endif -- cgit v1.2.3 From 4b568b1aaf23d0ce64b98d01d5ad1bcc7694440a Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: add ceph_osd_state_name() Add the definition of ceph_osd_state_name(), to match its counterpart in user space. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/rados.h | 2 ++ net/ceph/ceph_strings.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 9c3b4aaf516b..b65182aba6f7 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -150,6 +150,8 @@ struct ceph_eversion { #define CEPH_OSD_AUTOOUT (1<<2) /* osd was automatically marked out */ #define CEPH_OSD_NEW (1<<3) /* osd is new, never marked in */ +extern const char *ceph_osd_state_name(int s); + /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 3fbda04de29c..833075cff4e8 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -68,6 +68,21 @@ const char *ceph_osd_op_name(int op) return "???"; } +const char *ceph_osd_state_name(int s) +{ + switch (s) { + case CEPH_OSD_EXISTS: + return "exists"; + case CEPH_OSD_UP: + return "up"; + case CEPH_OSD_AUTOOUT: + return "autoout"; + case CEPH_OSD_NEW: + return "new"; + default: + return "???"; + } +} const char *ceph_pool_op_name(int op) { -- cgit v1.2.3 From dd6f5e105d85e02bc41db0891eb07152b1746ad9 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 15 Feb 2013 11:42:30 -0600 Subject: libceph: update ceph_fs.h Update most of "include/linux/ceph/ceph_fs.h" to match its user space counterpart in "src/include/ceph_fs.h" in the ceph tree. Everything that has changed is either: - added definitions (therefore no real effect on existing code) - deleting unused symbols - added or revised comments There were some differences between the struct definitions for ceph_mon_subscribe_item and the open field of ceph_mds_request_args; those differences remain. This and the next commit resolve: http://tracker.ceph.com/issues/4165 Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_fs.h | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index cf6f4d998a76..2ad7b860f062 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -21,16 +21,14 @@ * internal cluster protocols separately from the public, * client-facing protocol. */ -#define CEPH_OSD_PROTOCOL 8 /* cluster internal */ -#define CEPH_MDS_PROTOCOL 12 /* cluster internal */ -#define CEPH_MON_PROTOCOL 5 /* cluster internal */ #define CEPH_OSDC_PROTOCOL 24 /* server/client */ #define CEPH_MDSC_PROTOCOL 32 /* server/client */ #define CEPH_MONC_PROTOCOL 15 /* server/client */ -#define CEPH_INO_ROOT 1 -#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_ROOT 1 +#define CEPH_INO_CEPH 2 /* hidden .ceph dir */ +#define CEPH_INO_DOTDOT 3 /* used by ceph fuse for parent (..) */ /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ #define CEPH_MAX_MON 31 @@ -51,7 +49,7 @@ struct ceph_file_layout { __le32 fl_object_stripe_unit; /* UNUSED. for per-object parity, if any */ /* object -> pg layout */ - __le32 fl_unused; /* unused; used to be preferred primary (-1) */ + __le32 fl_unused; /* unused; used to be preferred primary for pg (-1 for none) */ __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ } __attribute__ ((packed)); @@ -101,6 +99,8 @@ struct ceph_dir_layout { #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 #define CEPH_MSG_AUTH 17 #define CEPH_MSG_AUTH_REPLY 18 +#define CEPH_MSG_MON_GET_VERSION 19 +#define CEPH_MSG_MON_GET_VERSION_REPLY 20 /* client <-> mds */ #define CEPH_MSG_MDS_MAP 21 @@ -220,6 +220,11 @@ struct ceph_mon_subscribe_ack { struct ceph_fsid fsid; } __attribute__ ((packed)); +/* + * mdsmap flags + */ +#define CEPH_MDSMAP_DOWN (1<<0) /* cluster deliberately down */ + /* * mds states * > 0 -> in @@ -233,6 +238,7 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ +#define CEPH_MDS_STATE_REPLAYONCE -9 /* up, replaying an active node's journal */ #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed @@ -264,6 +270,7 @@ extern const char *ceph_mds_state_name(int s); #define CEPH_LOCK_IXATTR 2048 #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ +#define CEPH_LOCK_IPOLICY 16384 /* policy lock on dirs. MDS internal */ /* client_session ops */ enum { @@ -338,6 +345,12 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_SETATTR_SIZE 32 #define CEPH_SETATTR_CTIME 64 +/* + * Ceph setxattr request flags. + */ +#define CEPH_XATTR_CREATE 1 +#define CEPH_XATTR_REPLACE 2 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ @@ -522,14 +535,17 @@ int ceph_flags_to_mode(int flags); #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ +#define CEPH_CAP_SIMPLE_BITS 2 +#define CEPH_CAP_FILE_BITS 8 + /* per-lock shift */ #define CEPH_CAP_SAUTH 2 #define CEPH_CAP_SLINK 4 #define CEPH_CAP_SXATTR 6 #define CEPH_CAP_SFILE 8 -#define CEPH_CAP_SFLOCK 20 +#define CEPH_CAP_SFLOCK 20 -#define CEPH_CAP_BITS 22 +#define CEPH_CAP_BITS 22 /* composed values */ #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) -- cgit v1.2.3 From b324814e8436772cb3367b14149ba003a9954525 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: use void pointers in page vector functions The functions used for working with ceph page vectors are defined with char pointers, but they're really intended to operate on untyped data. Change the types of these function parameters to (void *) to reflect this. (Note that the functions now assume void pointer arithmetic works like arithmetic on char pointers.) Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- include/linux/ceph/libceph.h | 10 +++++----- net/ceph/pagevec.c | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index c44275ab375c..2250f8bb2490 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -222,7 +222,7 @@ extern int ceph_open_session(struct ceph_client *client); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); -extern struct page **ceph_get_direct_page_vector(const char __user *data, +extern struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page); extern void ceph_put_page_vector(struct page **pages, int num_pages, @@ -230,15 +230,15 @@ extern void ceph_put_page_vector(struct page **pages, int num_pages, extern void ceph_release_page_vector(struct page **pages, int num_pages); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len); extern int ceph_copy_to_page_vector(struct page **pages, - const char *data, + const void *data, loff_t off, size_t len); extern int ceph_copy_from_page_vector(struct page **pages, - char *data, + void *data, loff_t off, size_t len); -extern int ceph_copy_page_vector_to_user(struct page **pages, char __user *data, +extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, loff_t off, size_t len); extern void ceph_zero_page_vector_range(int off, int len, struct page **pages); diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index cd9c21df87d1..5b20be979c19 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -12,7 +12,7 @@ /* * build a vector of user pages */ -struct page **ceph_get_direct_page_vector(const char __user *data, +struct page **ceph_get_direct_page_vector(const void __user *data, int num_pages, bool write_page) { struct page **pages; @@ -93,7 +93,7 @@ EXPORT_SYMBOL(ceph_alloc_page_vector); * copy user data into a page vector */ int ceph_copy_user_to_page_vector(struct page **pages, - const char __user *data, + const void __user *data, loff_t off, size_t len) { int i = 0; @@ -119,7 +119,7 @@ int ceph_copy_user_to_page_vector(struct page **pages, EXPORT_SYMBOL(ceph_copy_user_to_page_vector); int ceph_copy_to_page_vector(struct page **pages, - const char *data, + const void *data, loff_t off, size_t len) { int i = 0; @@ -143,7 +143,7 @@ int ceph_copy_to_page_vector(struct page **pages, EXPORT_SYMBOL(ceph_copy_to_page_vector); int ceph_copy_from_page_vector(struct page **pages, - char *data, + void *data, loff_t off, size_t len) { int i = 0; @@ -170,7 +170,7 @@ EXPORT_SYMBOL(ceph_copy_from_page_vector); * copy user data from a page vector into a user pointer */ int ceph_copy_page_vector_to_user(struct page **pages, - char __user *data, + void __user *data, loff_t off, size_t len) { int i = 0; -- cgit v1.2.3 From 903bb32e890237ca43ab847e561e5377cfe0fdb3 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 6 Feb 2013 13:11:38 -0600 Subject: libceph: drop return value from page vector copy routines The return values provided for ceph_copy_to_page_vector() and ceph_copy_from_page_vector() serve no purpose, so get rid of them. Signed-off-by: Alex Elder Reviewed-by: Josh Durgin --- drivers/block/rbd.c | 5 ++--- include/linux/ceph/libceph.h | 4 ++-- net/ceph/pagevec.c | 14 ++++++-------- 3 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index c259b4089e95..b0eea3eaee93 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -1890,8 +1890,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, if (ret < 0) goto out; ret = 0; - (void) ceph_copy_from_page_vector(pages, inbound, 0, - obj_request->xferred); + ceph_copy_from_page_vector(pages, inbound, 0, obj_request->xferred); if (version) *version = obj_request->version; out: @@ -2089,7 +2088,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, rbd_assert(obj_request->xferred <= (u64) SIZE_MAX); size = (size_t) obj_request->xferred; - (void) ceph_copy_from_page_vector(pages, buf, 0, size); + ceph_copy_from_page_vector(pages, buf, 0, size); rbd_assert(size <= (size_t) INT_MAX); ret = (int) size; if (version) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 2250f8bb2490..29818fc3fa49 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -232,10 +232,10 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_copy_user_to_page_vector(struct page **pages, const void __user *data, loff_t off, size_t len); -extern int ceph_copy_to_page_vector(struct page **pages, +extern void ceph_copy_to_page_vector(struct page **pages, const void *data, loff_t off, size_t len); -extern int ceph_copy_from_page_vector(struct page **pages, +extern void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len); extern int ceph_copy_page_vector_to_user(struct page **pages, void __user *data, diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 5b20be979c19..815a2249cfa9 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -118,17 +118,17 @@ int ceph_copy_user_to_page_vector(struct page **pages, } EXPORT_SYMBOL(ceph_copy_user_to_page_vector); -int ceph_copy_to_page_vector(struct page **pages, +void ceph_copy_to_page_vector(struct page **pages, const void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(page_address(pages[i]) + po, data, l); data += l; left -= l; @@ -138,21 +138,20 @@ int ceph_copy_to_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_to_page_vector); -int ceph_copy_from_page_vector(struct page **pages, +void ceph_copy_from_page_vector(struct page **pages, void *data, loff_t off, size_t len) { int i = 0; size_t po = off & ~PAGE_CACHE_MASK; size_t left = len; - size_t l; while (left > 0) { - l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + size_t l = min_t(size_t, PAGE_CACHE_SIZE-po, left); + memcpy(data, page_address(pages[i]) + po, l); data += l; left -= l; @@ -162,7 +161,6 @@ int ceph_copy_from_page_vector(struct page **pages, i++; } } - return len; } EXPORT_SYMBOL(ceph_copy_from_page_vector); -- cgit v1.2.3