From ca9bc12b90fbc4e2b1f81360f63842c9da54bb3c Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 11 Jan 2011 13:47:24 +0100 Subject: drbd: Get rid of BE_DRBD_MAGIC and BE_DRBD_MAGIC_BIG Converting the constants happens at compile time. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 9e5f5607eba3..d28202811672 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -334,9 +334,7 @@ enum drbd_timeout_flag { #define UUID_JUST_CREATED ((__u64)4) #define DRBD_MAGIC 0x83740267 -#define BE_DRBD_MAGIC __constant_cpu_to_be32(DRBD_MAGIC) #define DRBD_MAGIC_BIG 0x835a -#define BE_DRBD_MAGIC_BIG __constant_cpu_to_be16(DRBD_MAGIC_BIG) /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 -- cgit v1.2.3 From 9749f30f1a387070e6e8351f35aeb829eacc3ab6 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 20 Jul 2011 14:59:37 +0200 Subject: idr: idr_for_each_entry() macro Inspired by the list_for_each_entry() macro --- include/linux/idr.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 255491cf522e..52a9da295296 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -152,4 +152,15 @@ void ida_simple_remove(struct ida *ida, unsigned int id); void __init idr_init_cache(void); +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0, entry = (typeof(entry))idr_get_next((idp), &(id)); \ + entry != NULL; \ + ++id, entry = (typeof(entry))idr_get_next((idp), &(id))) + #endif /* __IDR_H__ */ -- cgit v1.2.3 From fd340c12c98b57ec0751ebb317057eee41be0c3d Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 19 Jan 2011 16:57:39 +0100 Subject: drbd: Use new header layout The new header layout will only be used if the peer supports it of course. For the first packet and the handshake packet the old (h80) layout is used for compatibility reasons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 - drivers/block/drbd/drbd_main.c | 82 +++++++++++++++++--------------------- drivers/block/drbd/drbd_receiver.c | 7 +++- include/linux/drbd.h | 2 +- 4 files changed, 42 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index dc669dfe5b0d..4de43481bcb9 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -345,7 +345,6 @@ struct p_header95 { u16 magic; /* use DRBD_MAGIC_BIG here */ u16 command; u32 length; /* Use only 24 bits of that. Ignore the highest 8 bit. */ - u8 payload[0]; } __packed; struct p_header { diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 55ce48e24b8e..f8cb15c84ed8 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1820,12 +1820,36 @@ void drbd_thread_current_set_cpu(struct drbd_conf *mdev) } #endif +static void prepare_header80(struct drbd_conf *mdev, struct p_header80 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be32(DRBD_MAGIC); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be16(size); +} + +static void prepare_header95(struct drbd_conf *mdev, struct p_header95 *h, + enum drbd_packets cmd, int size) +{ + h->magic = cpu_to_be16(DRBD_MAGIC_BIG); + h->command = cpu_to_be16(cmd); + h->length = cpu_to_be32(size); +} + +static void prepare_header(struct drbd_conf *mdev, struct p_header *h, + enum drbd_packets cmd, int size) +{ + if (mdev->tconn->agreed_pro_version >= 100 || size > DRBD_MAX_SIZE_H80_PACKET) + prepare_header95(mdev, &h->h95, cmd, size); + else + prepare_header80(mdev, &h->h80, cmd, size); +} + /* the appropriate socket mutex must be held already */ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, - enum drbd_packets cmd, struct p_header *hg, + enum drbd_packets cmd, struct p_header *h, size_t size, unsigned msg_flags) { - struct p_header80 *h = (struct p_header80 *)hg; int sent, ok; if (!expect(h)) @@ -1833,9 +1857,7 @@ int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock, if (!expect(size)) return false; - h->magic = cpu_to_be32(DRBD_MAGIC); - h->command = cpu_to_be16(cmd); - h->length = cpu_to_be16(size-sizeof(struct p_header80)); + prepare_header(mdev, h, cmd, size - sizeof(struct p_header)); sent = drbd_send(mdev, sock, h, size, msg_flags); @@ -1878,12 +1900,10 @@ int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket, int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data, size_t size) { - struct p_header80 h; + struct p_header h; int ok; - h.magic = cpu_to_be32(DRBD_MAGIC); - h.command = cpu_to_be16(cmd); - h.length = cpu_to_be16(size); + prepare_header(mdev, &h, cmd, size); if (!drbd_get_data_sock(mdev)) return 0; @@ -2456,14 +2476,11 @@ int drbd_send_drequest_csum(struct drbd_conf *mdev, int ok; struct p_block_req p; + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header) + digest_size); p.sector = cpu_to_be64(sector); p.block_id = ID_SYNCER /* unused */; p.blksize = cpu_to_be32(size); - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size); - mutex_lock(&mdev->tconn->data.mutex); ok = (sizeof(p) == drbd_send(mdev, mdev->tconn->data.socket, &p, sizeof(p), 0)); @@ -2663,22 +2680,10 @@ int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req) dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (req->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(P_DATA); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(P_DATA); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); - } - + prepare_header(mdev, &p.head, P_DATA, sizeof(p) - sizeof(struct p_header) + dgs + req->i.size); p.sector = cpu_to_be64(req->i.sector); p.block_id = (unsigned long)req; - p.seq_num = cpu_to_be32(req->seq_num = - atomic_add_return(1, &mdev->packet_seq)); + p.seq_num = cpu_to_be32(req->seq_num = atomic_add_return(1, &mdev->packet_seq)); dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw); @@ -2748,18 +2753,7 @@ int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd, dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_w_tfm) ? crypto_hash_digestsize(mdev->tconn->integrity_w_tfm) : 0; - if (e->i.size <= DRBD_MAX_SIZE_H80_PACKET) { - p.head.h80.magic = cpu_to_be32(DRBD_MAGIC); - p.head.h80.command = cpu_to_be16(cmd); - p.head.h80.length = - cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } else { - p.head.h95.magic = cpu_to_be16(DRBD_MAGIC_BIG); - p.head.h95.command = cpu_to_be16(cmd); - p.head.h95.length = - cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); - } - + prepare_header(mdev, &p.head, cmd, sizeof(p) - sizeof(struct p_header80) + dgs + e->i.size); p.sector = cpu_to_be64(e->i.sector); p.block_id = e->block_id; /* p.seq_num = 0; No sequence numbers here.. */ @@ -3028,7 +3022,7 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) drbd_thread_init(mdev, &mdev->tconn->worker, drbd_worker); drbd_thread_init(mdev, &mdev->tconn->asender, drbd_asender); - mdev->tconn->agreed_pro_version = PRO_VERSION_MAX; + /* mdev->tconn->agreed_pro_version gets initialized in drbd_connect() */ mdev->write_ordering = WO_bdev_flush; mdev->resync_wenr = LC_FREE; mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE; @@ -3506,12 +3500,8 @@ int __init drbd_init(void) { int err; - if (sizeof(struct p_handshake) != 80) { - printk(KERN_ERR - "drbd: never change the size or layout " - "of the HandShake packet.\n"); - return -EINVAL; - } + BUILD_BUG_ON(sizeof(struct p_header80) != sizeof(struct p_header95)); + BUILD_BUG_ON(sizeof(struct p_handshake) != 80); if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { printk(KERN_ERR diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9393fe482efc..8f5a241fe20a 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -761,6 +761,9 @@ static int drbd_connect(struct drbd_conf *mdev) return -2; clear_bit(DISCARD_CONCURRENT, &mdev->flags); + mdev->tconn->agreed_pro_version = 99; + /* agreed_pro_version must be smaller than 100 so we send the old + header (h80) in the first packet and in the handshake packet. */ sock = NULL; msock = NULL; @@ -935,12 +938,12 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi return false; } - if (likely(h->h80.magic == cpu_to_be32(DRBD_MAGIC))) { + if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) { *cmd = be16_to_cpu(h->h80.command); *packet_size = be16_to_cpu(h->h80.length); } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) { *cmd = be16_to_cpu(h->h95.command); - *packet_size = be32_to_cpu(h->h95.length); + *packet_size = be32_to_cpu(h->h95.length) & 0x00ffffff; } else { dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", be32_to_cpu(h->h80.magic), diff --git a/include/linux/drbd.h b/include/linux/drbd.h index d28202811672..35fc08a0a552 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -56,7 +56,7 @@ extern const char *drbd_buildtag(void); #define REL_VERSION "8.3.11" #define API_VERSION 88 #define PRO_VERSION_MIN 86 -#define PRO_VERSION_MAX 96 +#define PRO_VERSION_MAX 100 enum drbd_io_error_p { -- cgit v1.2.3 From 4738fa16907a933d72bbcae1b8922dc9330fde92 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:20:55 +0100 Subject: drbd: use clear_bit_unlock() where appropriate Some open-coded clear_bit(); smp_mb__after_clear_bit(); should in fact have been smp_mb__before_clear_bit(); clear_bit(); Instead, use clear_bit_unlock() to annotate the intention, and have it do the right thing. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_bitmap.c | 3 +-- drivers/block/drbd/drbd_main.c | 3 +-- include/linux/lru_cache.h | 3 +-- lib/lru_cache.c | 10 ++++------ 4 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index e8d652f197c3..4be737055718 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -219,8 +219,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 592f0c949fd0..c77e51a40926 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2818,8 +2818,7 @@ static int w_bitmap_io(struct drbd_work *w, int unused) put_ldev(mdev); } - clear_bit(BITMAP_IO, &mdev->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(BITMAP_IO, &mdev->flags); wake_up(&mdev->misc_wait); if (work->done) diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 7a71ffad037c..4cceafb0732d 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -275,8 +275,7 @@ static inline int lc_try_lock(struct lru_cache *lc) */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); } static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index a07e7268d7ed..9f353f7f41ca 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -44,8 +44,8 @@ MODULE_LICENSE("GPL"); } while (0) #define RETURN(x...) do { \ - clear_bit(__LC_PARANOIA, &lc->flags); \ - smp_mb__after_clear_bit(); return x ; } while (0) + clear_bit_unlock(__LC_PARANOIA, &lc->flags); \ + return x ; } while (0) /* BUG() if e is not one of the elements tracked by lc */ #define PARANOIA_LC_ELEMENT(lc, e) do { \ @@ -438,8 +438,7 @@ void lc_changed(struct lru_cache *lc, struct lc_element *e) hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); lc->changing_element = NULL; lc->new_number = LC_FREE; - clear_bit(__LC_DIRTY, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_DIRTY, &lc->flags); RETURN(); } @@ -463,8 +462,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); lc->used--; - clear_bit(__LC_STARVING, &lc->flags); - smp_mb__after_clear_bit(); + clear_bit_unlock(__LC_STARVING, &lc->flags); } RETURN(e->refcnt); } -- cgit v1.2.3 From 46a15bc3ec425b546d140581c28192ab7877ddc4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:01 +0100 Subject: lru_cache: allow multiple changes per transaction Allow multiple changes to the active set of elements in lru_cache. The only current user of lru_cache, drbd, is driving this generalisation. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 50 ++------ drivers/block/drbd/drbd_nl.c | 4 +- include/linux/lru_cache.h | 68 +++++++---- lib/lru_cache.c | 243 +++++++++++++++++++++++++++------------ 4 files changed, 225 insertions(+), 140 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 1ce3de6eed1b..44097c87fed7 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -175,7 +175,6 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) { struct lc_element *al_ext; struct lc_element *tmp; - unsigned long al_flags = 0; int wake; spin_lock_irq(&mdev->al_lock); @@ -190,19 +189,8 @@ static struct lc_element *_al_get(struct drbd_conf *mdev, unsigned int enr) return NULL; } } - al_ext = lc_get(mdev->act_log, enr); - al_flags = mdev->act_log->flags; + al_ext = lc_get(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - - /* - if (!al_ext) { - if (al_flags & LC_STARVING) - dev_warn(DEV, "Have to wait for LRU element (AL too small?)\n"); - if (al_flags & LC_DIRTY) - dev_warn(DEV, "Ongoing AL update (AL device too slow?)\n"); - } - */ - return al_ext; } @@ -235,7 +223,7 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) mdev->al_writ_cnt++; spin_lock_irq(&mdev->al_lock); - lc_changed(mdev->act_log, al_ext); + lc_committed(mdev->act_log); spin_unlock_irq(&mdev->al_lock); wake_up(&mdev->al_wait); } @@ -601,7 +589,7 @@ void drbd_al_shrink(struct drbd_conf *mdev) struct lc_element *al_ext; int i; - D_ASSERT(test_bit(__LC_DIRTY, &mdev->act_log->flags)); + D_ASSERT(test_bit(__LC_LOCKED, &mdev->act_log->flags)); for (i = 0; i < mdev->act_log->nr_elements; i++) { al_ext = lc_element_by_index(mdev->act_log, i); @@ -708,7 +696,9 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector, } ext->rs_left = rs_left; ext->rs_failed = success ? 0 : count; - lc_changed(mdev->resync, &ext->lce); + /* we don't keep a persistent log of the resync lru, + * we can commit any change right away. */ + lc_committed(mdev->resync); } lc_put(mdev->resync, &ext->lce); /* no race, we are within the al_lock! */ @@ -892,7 +882,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wakeup = 1; } if (bm_ext->lce.refcnt == 1) @@ -908,7 +898,7 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); } return bm_ext; @@ -916,26 +906,12 @@ struct bm_extent *_bme_get(struct drbd_conf *mdev, unsigned int enr) static int _is_in_al(struct drbd_conf *mdev, unsigned int enr) { - struct lc_element *al_ext; - int rv = 0; + int rv; spin_lock_irq(&mdev->al_lock); - if (unlikely(enr == mdev->act_log->new_number)) - rv = 1; - else { - al_ext = lc_find(mdev->act_log, enr); - if (al_ext) { - if (al_ext->refcnt) - rv = 1; - } - } + rv = lc_is_used(mdev->act_log, enr); spin_unlock_irq(&mdev->al_lock); - /* - if (unlikely(rv)) { - dev_info(DEV, "Delaying sync read until app's write is done\n"); - } - */ return rv; } @@ -1065,13 +1041,13 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) if (rs_flags & LC_STARVING) dev_warn(DEV, "Have to wait for element" " (resync LRU too small?)\n"); - BUG_ON(rs_flags & LC_DIRTY); + BUG_ON(rs_flags & LC_LOCKED); goto try_again; } if (bm_ext->lce.lc_number != enr) { bm_ext->rs_left = drbd_bm_e_weight(mdev, enr); bm_ext->rs_failed = 0; - lc_changed(mdev->resync, &bm_ext->lce); + lc_committed(mdev->resync); wake_up(&mdev->al_wait); D_ASSERT(test_bit(BME_LOCKED, &bm_ext->flags) == 0); } @@ -1082,8 +1058,6 @@ int drbd_try_rs_begin_io(struct drbd_conf *mdev, sector_t sector) } check_al: for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { - if (unlikely(al_enr+i == mdev->act_log->new_number)) - goto try_again; if (lc_is_used(mdev->act_log, al_enr+i)) goto try_again; } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index ae8f42e38e4f..0a92f5226c2a 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, + n = lc_create("act_log", drbd_al_ext_cache, 1, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -1016,7 +1016,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp } resync_lru = lc_create("resync", drbd_bm_ext_cache, - 61, sizeof(struct bm_extent), + 1, 61, sizeof(struct bm_extent), offsetof(struct bm_extent, lce)); if (!resync_lru) { retcode = ERR_NOMEM; diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h index 4cceafb0732d..cbafae40c649 100644 --- a/include/linux/lru_cache.h +++ b/include/linux/lru_cache.h @@ -166,9 +166,11 @@ struct lc_element { /* if we want to track a larger set of objects, * it needs to become arch independend u64 */ unsigned lc_number; - /* special label when on free list */ #define LC_FREE (~0U) + + /* for pending changes */ + unsigned lc_new_number; }; struct lru_cache { @@ -176,6 +178,7 @@ struct lru_cache { struct list_head lru; struct list_head free; struct list_head in_use; + struct list_head to_be_changed; /* the pre-created kmem cache to allocate the objects from */ struct kmem_cache *lc_cache; @@ -186,7 +189,7 @@ struct lru_cache { size_t element_off; /* number of elements (indices) */ - unsigned int nr_elements; + unsigned int nr_elements; /* Arbitrary limit on maximum tracked objects. Practical limit is much * lower due to allocation failures, probably. For typical use cases, * nr_elements should be a few thousand at most. @@ -194,18 +197,19 @@ struct lru_cache { * 8 high bits of .lc_index to be overloaded with flags in the future. */ #define LC_MAX_ACTIVE (1<<24) + /* allow to accumulate a few (index:label) changes, + * but no more than max_pending_changes */ + unsigned int max_pending_changes; + /* number of elements currently on to_be_changed list */ + unsigned int pending_changes; + /* statistics */ - unsigned used; /* number of lelements currently on in_use list */ - unsigned long hits, misses, starving, dirty, changed; + unsigned used; /* number of elements currently on in_use list */ + unsigned long hits, misses, starving, locked, changed; /* see below: flag-bits for lru_cache */ unsigned long flags; - /* when changing the label of an index element */ - unsigned int new_number; - - /* for paranoia when changing the label of an index element */ - struct lc_element *changing_element; void *lc_private; const char *name; @@ -221,10 +225,15 @@ enum { /* debugging aid, to catch concurrent access early. * user needs to guarantee exclusive access by proper locking! */ __LC_PARANOIA, - /* if we need to change the set, but currently there is a changing - * transaction pending, we are "dirty", and must deferr further - * changing requests */ + + /* annotate that the set is "dirty", possibly accumulating further + * changes, until a transaction is finally triggered */ __LC_DIRTY, + + /* Locked, no further changes allowed. + * Also used to serialize changing transactions. */ + __LC_LOCKED, + /* if we need to change the set, but currently there is no free nor * unused element available, we are "starving", and must not give out * further references, to guarantee that eventually some refcnt will @@ -236,9 +245,11 @@ enum { }; #define LC_PARANOIA (1<<__LC_PARANOIA) #define LC_DIRTY (1<<__LC_DIRTY) +#define LC_LOCKED (1<<__LC_LOCKED) #define LC_STARVING (1<<__LC_STARVING) extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off); extern void lc_reset(struct lru_cache *lc); extern void lc_destroy(struct lru_cache *lc); @@ -249,7 +260,7 @@ extern struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr); extern struct lc_element *lc_get(struct lru_cache *lc, unsigned int enr); extern unsigned int lc_put(struct lru_cache *lc, struct lc_element *e); -extern void lc_changed(struct lru_cache *lc, struct lc_element *e); +extern void lc_committed(struct lru_cache *lc); struct seq_file; extern size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc); @@ -258,31 +269,40 @@ extern void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char void (*detail) (struct seq_file *, struct lc_element *)); /** - * lc_try_lock - can be used to stop lc_get() from changing the tracked set + * lc_try_lock_for_transaction - can be used to stop lc_get() from changing the tracked set * @lc: the lru cache to operate on * - * Note that the reference counts and order on the active and lru lists may - * still change. Returns true if we acquired the lock. + * Allows (expects) the set to be "dirty". Note that the reference counts and + * order on the active and lru lists may still change. Used to serialize + * changing transactions. Returns true if we aquired the lock. */ -static inline int lc_try_lock(struct lru_cache *lc) +static inline int lc_try_lock_for_transaction(struct lru_cache *lc) { - return !test_and_set_bit(__LC_DIRTY, &lc->flags); + return !test_and_set_bit(__LC_LOCKED, &lc->flags); } +/** + * lc_try_lock - variant to stop lc_get() from changing the tracked set + * @lc: the lru cache to operate on + * + * Note that the reference counts and order on the active and lru lists may + * still change. Only works on a "clean" set. Returns true if we aquired the + * lock, which means there are no pending changes, and any further attempt to + * change the set will not succeed until the next lc_unlock(). + */ +extern int lc_try_lock(struct lru_cache *lc); + /** * lc_unlock - unlock @lc, allow lc_get() to change the set again * @lc: the lru cache to operate on */ static inline void lc_unlock(struct lru_cache *lc) { - clear_bit_unlock(__LC_DIRTY, &lc->flags); + clear_bit(__LC_DIRTY, &lc->flags); + clear_bit_unlock(__LC_LOCKED, &lc->flags); } -static inline int lc_is_used(struct lru_cache *lc, unsigned int enr) -{ - struct lc_element *e = lc_find(lc, enr); - return e && e->refcnt; -} +extern bool lc_is_used(struct lru_cache *lc, unsigned int enr); #define lc_entry(ptr, type, member) \ container_of(ptr, type, member) diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 17621684758a..d71d89498943 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -55,9 +55,40 @@ MODULE_LICENSE("GPL"); BUG_ON(i >= lc_->nr_elements); \ BUG_ON(lc_->lc_element[i] != e_); } while (0) + +/* We need to atomically + * - try to grab the lock (set LC_LOCKED) + * - only if there is no pending transaction + * (neither LC_DIRTY nor LC_STARVING is set) + * Because of PARANOIA_ENTRY() above abusing lc->flags as well, + * it is not sufficient to just say + * return 0 == cmpxchg(&lc->flags, 0, LC_LOCKED); + */ +int lc_try_lock(struct lru_cache *lc) +{ + unsigned long val; + do { + val = cmpxchg(&lc->flags, 0, LC_LOCKED); + } while (unlikely (val == LC_PARANOIA)); + /* Spin until no-one is inside a PARANOIA_ENTRY()/RETURN() section. */ + return 0 == val; +#if 0 + /* Alternative approach, spin in case someone enters or leaves a + * PARANOIA_ENTRY()/RETURN() section. */ + unsigned long old, new, val; + do { + old = lc->flags & LC_PARANOIA; + new = old | LC_LOCKED; + val = cmpxchg(&lc->flags, old, new); + } while (unlikely (val == (old ^ LC_PARANOIA))); + return old == val; +#endif +} + /** * lc_create - prepares to track objects in an active set * @name: descriptive name only used in lc_seq_printf_stats and lc_seq_dump_details + * @max_pending_changes: maximum changes to accumulate until a transaction is required * @e_count: number of elements allowed to be active simultaneously * @e_size: size of the tracked objects * @e_off: offset to the &struct lc_element member in a tracked object @@ -66,6 +97,7 @@ MODULE_LICENSE("GPL"); * or NULL on (allocation) failure. */ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, + unsigned max_pending_changes, unsigned e_count, size_t e_size, size_t e_off) { struct hlist_head *slot = NULL; @@ -98,12 +130,13 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->name = name; lc->element_size = e_size; lc->element_off = e_off; lc->nr_elements = e_count; - lc->new_number = LC_FREE; + lc->max_pending_changes = max_pending_changes; lc->lc_cache = cache; lc->lc_element = element; lc->lc_slot = slot; @@ -117,6 +150,7 @@ struct lru_cache *lc_create(const char *name, struct kmem_cache *cache, e = p + e_off; e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); element[i] = e; } @@ -175,15 +209,15 @@ void lc_reset(struct lru_cache *lc) INIT_LIST_HEAD(&lc->in_use); INIT_LIST_HEAD(&lc->lru); INIT_LIST_HEAD(&lc->free); + INIT_LIST_HEAD(&lc->to_be_changed); lc->used = 0; lc->hits = 0; lc->misses = 0; lc->starving = 0; - lc->dirty = 0; + lc->locked = 0; lc->changed = 0; + lc->pending_changes = 0; lc->flags = 0; - lc->changing_element = NULL; - lc->new_number = LC_FREE; memset(lc->lc_slot, 0, sizeof(struct hlist_head) * lc->nr_elements); for (i = 0; i < lc->nr_elements; i++) { @@ -194,6 +228,7 @@ void lc_reset(struct lru_cache *lc) /* re-init it */ e->lc_index = i; e->lc_number = LC_FREE; + e->lc_new_number = LC_FREE; list_add(&e->list, &lc->free); } } @@ -208,14 +243,14 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) /* NOTE: * total calls to lc_get are * (starving + hits + misses) - * misses include "dirty" count (update from an other thread in + * misses include "locked" count (update from an other thread in * progress) and "changed", when this in fact lead to an successful * update of the cache. */ return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu dirty:%lu changed:%lu\n", + "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->dirty, lc->changed); + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) @@ -224,16 +259,8 @@ static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) } -/** - * lc_find - find element by label, if present in the hash table - * @lc: The lru_cache object - * @enr: element number - * - * Returns the pointer to an element, if the element with the requested - * "label" or element number is present in the hash table, - * or NULL if not found. Does not change the refcnt. - */ -struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) +static struct lc_element *__lc_find(struct lru_cache *lc, unsigned int enr, + bool include_changing) { struct hlist_node *n; struct lc_element *e; @@ -241,29 +268,48 @@ struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) BUG_ON(!lc); BUG_ON(!lc->nr_elements); hlist_for_each_entry(e, n, lc_hash_slot(lc, enr), colision) { - if (e->lc_number == enr) + /* "about to be changed" elements, pending transaction commit, + * are hashed by their "new number". "Normal" elements have + * lc_number == lc_new_number. */ + if (e->lc_new_number != enr) + continue; + if (e->lc_new_number == e->lc_number || include_changing) return e; + break; } return NULL; } -/* returned element will be "recycled" immediately */ -static struct lc_element *lc_evict(struct lru_cache *lc) +/** + * lc_find - find element by label, if present in the hash table + * @lc: The lru_cache object + * @enr: element number + * + * Returns the pointer to an element, if the element with the requested + * "label" or element number is present in the hash table, + * or NULL if not found. Does not change the refcnt. + * Ignores elements that are "about to be used", i.e. not yet in the active + * set, but still pending transaction commit. + */ +struct lc_element *lc_find(struct lru_cache *lc, unsigned int enr) { - struct list_head *n; - struct lc_element *e; - - if (list_empty(&lc->lru)) - return NULL; - - n = lc->lru.prev; - e = list_entry(n, struct lc_element, list); - - PARANOIA_LC_ELEMENT(lc, e); + return __lc_find(lc, enr, 0); +} - list_del(&e->list); - hlist_del(&e->colision); - return e; +/** + * lc_is_used - find element by label + * @lc: The lru_cache object + * @enr: element number + * + * Returns true, if the element with the requested "label" or element number is + * present in the hash table, and is used (refcnt > 0). + * Also finds elements that are not _currently_ used but only "about to be + * used", i.e. on the "to_be_changed" list, pending transaction commit. + */ +bool lc_is_used(struct lru_cache *lc, unsigned int enr) +{ + struct lc_element *e = __lc_find(lc, enr, 1); + return e && e->refcnt; } /** @@ -280,22 +326,34 @@ void lc_del(struct lru_cache *lc, struct lc_element *e) PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt); - e->lc_number = LC_FREE; + e->lc_number = e->lc_new_number = LC_FREE; hlist_del_init(&e->colision); list_move(&e->list, &lc->free); RETURN(); } -static struct lc_element *lc_get_unused_element(struct lru_cache *lc) +static struct lc_element *lc_prepare_for_change(struct lru_cache *lc, unsigned new_number) { struct list_head *n; + struct lc_element *e; + + if (!list_empty(&lc->free)) + n = lc->free.next; + else if (!list_empty(&lc->lru)) + n = lc->lru.prev; + else + return NULL; + + e = list_entry(n, struct lc_element, list); + PARANOIA_LC_ELEMENT(lc, e); - if (list_empty(&lc->free)) - return lc_evict(lc); + e->lc_new_number = new_number; + if (!hlist_unhashed(&e->colision)) + __hlist_del(&e->colision); + hlist_add_head(&e->colision, lc_hash_slot(lc, new_number)); + list_move(&e->list, &lc->to_be_changed); - n = lc->free.next; - list_del(n); - return list_entry(n, struct lc_element, list); + return e; } static int lc_unused_element_available(struct lru_cache *lc) @@ -318,8 +376,12 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - e = lc_find(lc, enr); - if (e) { + e = __lc_find(lc, enr, 1); + /* if lc_new_number != lc_number, + * this enr is currently being pulled in already, + * and will be available once the pending transaction + * has been committed. */ + if (e && e->lc_new_number == e->lc_number) { ++lc->hits; if (e->refcnt++ == 0) lc->used++; @@ -331,6 +393,24 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool if (!may_change) RETURN(NULL); + /* It has been found above, but on the "to_be_changed" list, not yet + * committed. Don't pull it in twice, wait for the transaction, then + * try again */ + if (e) + RETURN(NULL); + + /* To avoid races with lc_try_lock(), first, mark us dirty + * (using test_and_set_bit, as it implies memory barriers), ... */ + test_and_set_bit(__LC_DIRTY, &lc->flags); + + /* ... only then check if it is locked anyways. If lc_unlock clears + * the dirty bit again, that's not a problem, we will come here again. + */ + if (test_bit(__LC_LOCKED, &lc->flags)) { + ++lc->locked; + RETURN(NULL); + } + /* In case there is nothing available and we can not kick out * the LRU element, we have to wait ... */ @@ -339,24 +419,19 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool RETURN(NULL); } - /* it was not present in the active set. - * we are going to recycle an unused (or even "free") element. - * user may need to commit a transaction to record that change. - * we serialize on flags & LC_DIRTY */ - if (test_and_set_bit(__LC_DIRTY, &lc->flags)) { - ++lc->dirty; + /* It was not present in the active set. We are going to recycle an + * unused (or even "free") element, but we won't accumulate more than + * max_pending_changes changes. */ + if (lc->pending_changes >= lc->max_pending_changes) RETURN(NULL); - } - e = lc_get_unused_element(lc); + e = lc_prepare_for_change(lc, enr); BUG_ON(!e); clear_bit(__LC_STARVING, &lc->flags); BUG_ON(++e->refcnt != 1); lc->used++; - - lc->changing_element = e; - lc->new_number = enr; + lc->pending_changes++; RETURN(e); } @@ -388,12 +463,15 @@ static struct lc_element *__lc_get(struct lru_cache *lc, unsigned int enr, bool * pointer to an UNUSED element with some different element number, * where that different number may also be %LC_FREE. * - * In this case, the cache is marked %LC_DIRTY (blocking further changes), - * and the returned element pointer is removed from the lru list and - * hash collision chains. The user now should do whatever housekeeping - * is necessary. - * Then he must call lc_changed(lc,element_pointer), to finish - * the change. + * In this case, the cache is marked %LC_DIRTY, + * so lc_try_lock() will no longer succeed. + * The returned element pointer is moved to the "to_be_changed" list, + * and registered with the new element number on the hash collision chains, + * so it is possible to pick it up from lc_is_used(). + * Up to "max_pending_changes" (see lc_create()) can be accumulated. + * The user now should do whatever housekeeping is necessary, + * typically serialize on lc_try_lock_for_transaction(), then call + * lc_committed(lc) and lc_unlock(), to finish the change. * * NOTE: The user needs to check the lc_number on EACH use, so he recognizes * any cache set change. @@ -425,22 +503,25 @@ struct lc_element *lc_try_get(struct lru_cache *lc, unsigned int enr) } /** - * lc_changed - tell @lc that the change has been recorded + * lc_committed - tell @lc that pending changes have been recorded * @lc: the lru cache to operate on - * @e: the element pending label change + * + * User is expected to serialize on explicit lc_try_lock_for_transaction() + * before the transaction is started, and later needs to lc_unlock() explicitly + * as well. */ -void lc_changed(struct lru_cache *lc, struct lc_element *e) +void lc_committed(struct lru_cache *lc) { + struct lc_element *e, *tmp; + PARANOIA_ENTRY(); - BUG_ON(e != lc->changing_element); - PARANOIA_LC_ELEMENT(lc, e); - ++lc->changed; - e->lc_number = lc->new_number; - list_add(&e->list, &lc->in_use); - hlist_add_head(&e->colision, lc_hash_slot(lc, lc->new_number)); - lc->changing_element = NULL; - lc->new_number = LC_FREE; - clear_bit_unlock(__LC_DIRTY, &lc->flags); + list_for_each_entry_safe(e, tmp, &lc->to_be_changed, list) { + /* count number of changes, not number of transactions */ + ++lc->changed; + e->lc_number = e->lc_new_number; + list_move(&e->list, &lc->in_use); + } + lc->pending_changes = 0; RETURN(); } @@ -459,7 +540,7 @@ unsigned int lc_put(struct lru_cache *lc, struct lc_element *e) PARANOIA_ENTRY(); PARANOIA_LC_ELEMENT(lc, e); BUG_ON(e->refcnt == 0); - BUG_ON(e == lc->changing_element); + BUG_ON(e->lc_number != e->lc_new_number); if (--e->refcnt == 0) { /* move it to the front of LRU. */ list_move(&e->list, &lc->lru); @@ -504,16 +585,24 @@ unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e) void lc_set(struct lru_cache *lc, unsigned int enr, int index) { struct lc_element *e; + struct list_head *lh; if (index < 0 || index >= lc->nr_elements) return; e = lc_element_by_index(lc, index); - e->lc_number = enr; + BUG_ON(e->lc_number != e->lc_new_number); + BUG_ON(e->refcnt != 0); + e->lc_number = e->lc_new_number = enr; hlist_del_init(&e->colision); - hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); - list_move(&e->list, e->refcnt ? &lc->in_use : &lc->lru); + if (enr == LC_FREE) + lh = &lc->free; + else { + hlist_add_head(&e->colision, lc_hash_slot(lc, enr)); + lh = &lc->lru; + } + list_move(&e->list, lh); } /** @@ -553,8 +642,10 @@ EXPORT_SYMBOL(lc_try_get); EXPORT_SYMBOL(lc_find); EXPORT_SYMBOL(lc_get); EXPORT_SYMBOL(lc_put); -EXPORT_SYMBOL(lc_changed); +EXPORT_SYMBOL(lc_committed); EXPORT_SYMBOL(lc_element_by_index); EXPORT_SYMBOL(lc_index_of); EXPORT_SYMBOL(lc_seq_printf_stats); EXPORT_SYMBOL(lc_seq_dump_details); +EXPORT_SYMBOL(lc_try_lock); +EXPORT_SYMBOL(lc_is_used); -- cgit v1.2.3 From 7ad651b52218eea3f9280dbb353dfe0c42742d85 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 21 Feb 2011 13:21:03 +0100 Subject: drbd: new on-disk activity log transaction format Use a new on-disk transaction format for the activity log, which allows for multiple changes to the active set per transaction. Using 4k transaction blocks, we can now get rid of the work-around code to deal with devices not supporting 512 byte logical block size. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_actlog.c | 409 ++++++++++++++++++++++++--------------- drivers/block/drbd/drbd_int.h | 44 +++-- drivers/block/drbd/drbd_main.c | 4 - drivers/block/drbd/drbd_nl.c | 42 +--- include/linux/drbd.h | 4 + include/linux/drbd_limits.h | 8 +- 6 files changed, 302 insertions(+), 209 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 44097c87fed7..ea3895de4e6d 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -24,21 +24,67 @@ */ #include +#include #include +#include +#include #include "drbd_int.h" #include "drbd_wrappers.h" -/* We maintain a trivial checksum in our on disk activity log. - * With that we can ensure correct operation even when the storage - * device might do a partial (last) sector write while losing power. - */ -struct __packed al_transaction { - u32 magic; - u32 tr_number; - struct __packed { - u32 pos; - u32 extent; } updates[1 + AL_EXTENTS_PT]; - u32 xor_sum; +/* all fields on disc in big endian */ +struct __packed al_transaction_on_disk { + /* don't we all like magic */ + __be32 magic; + + /* to identify the most recent transaction block + * in the on disk ring buffer */ + __be32 tr_number; + + /* checksum on the full 4k block, with this field set to 0. */ + __be32 crc32c; + + /* type of transaction, special transaction types like: + * purge-all, set-all-idle, set-all-active, ... to-be-defined */ + __be16 transaction_type; + + /* we currently allow only a few thousand extents, + * so 16bit will be enough for the slot number. */ + + /* how many updates in this transaction */ + __be16 n_updates; + + /* maximum slot number, "al-extents" in drbd.conf speak. + * Having this in each transaction should make reconfiguration + * of that parameter easier. */ + __be16 context_size; + + /* slot number the context starts with */ + __be16 context_start_slot_nr; + + /* Some reserved bytes. Expected usage is a 64bit counter of + * sectors-written since device creation, and other data generation tag + * supporting usage */ + __be32 __reserved[4]; + + /* --- 36 byte used --- */ + + /* Reserve space for up to AL_UPDATES_PER_TRANSACTION changes + * in one transaction, then use the remaining byte in the 4k block for + * context information. "Flexible" number of updates per transaction + * does not help, as we have to account for the case when all update + * slots are used anyways, so it would only complicate code without + * additional benefit. + */ + __be16 update_slot_nr[AL_UPDATES_PER_TRANSACTION]; + + /* but the extent number is 32bit, which at an extent size of 4 MiB + * allows to cover device sizes of up to 2**54 Byte (16 PiB) */ + __be32 update_extent_nr[AL_UPDATES_PER_TRANSACTION]; + + /* --- 420 bytes used (36 + 64*6) --- */ + + /* 4096 - 420 = 3676 = 919 * 4 */ + __be32 context[AL_CONTEXT_PER_TRANSACTION]; }; struct update_odbm_work { @@ -48,11 +94,8 @@ struct update_odbm_work { struct update_al_work { struct drbd_work w; - struct lc_element *al_ext; struct completion event; - unsigned int enr; - /* if old_enr != LC_FREE, write corresponding bitmap sector, too */ - unsigned int old_enr; + int err; }; struct drbd_atodb_wait { @@ -107,67 +150,30 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev, int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, sector_t sector, int rw) { - int logical_block_size, mask, ok; - int offset = 0; + int ok; struct page *iop = mdev->md_io_page; D_ASSERT(mutex_is_locked(&mdev->md_io_mutex)); BUG_ON(!bdev->md_bdev); - logical_block_size = bdev_logical_block_size(bdev->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - /* in case logical_block_size != 512 [ s390 only? ] */ - if (logical_block_size != MD_SECTOR_SIZE) { - mask = (logical_block_size / MD_SECTOR_SIZE) - 1; - D_ASSERT(mask == 1 || mask == 3 || mask == 7); - D_ASSERT(logical_block_size == (mask+1) * MD_SECTOR_SIZE); - offset = sector & mask; - sector = sector & ~mask; - iop = mdev->md_io_tmpp; - - if (rw & WRITE) { - /* these are GFP_KERNEL pages, pre-allocated - * on device initialization */ - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, - READ, logical_block_size); - - if (unlikely(!ok)) { - dev_err(DEV, "drbd_md_sync_page_io(,%llus," - "READ [logical_block_size!=512]) failed!\n", - (unsigned long long)sector); - return 0; - } - - memcpy(hp + offset*MD_SECTOR_SIZE, p, MD_SECTOR_SIZE); - } - } + dev_dbg(DEV, "meta_data io: %s [%d]:%s(,%llus,%s)\n", + current->comm, current->pid, __func__, + (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); if (sector < drbd_md_first_sector(bdev) || - sector > drbd_md_last_sector(bdev)) + sector + 7 > drbd_md_last_sector(bdev)) dev_alert(DEV, "%s [%d]:%s(,%llus,%s) out of range md access!\n", current->comm, current->pid, __func__, (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); - ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, logical_block_size); + ok = _drbd_md_sync_page_io(mdev, bdev, iop, sector, rw, MD_BLOCK_SIZE); if (unlikely(!ok)) { dev_err(DEV, "drbd_md_sync_page_io(,%llus,%s) failed!\n", (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); return 0; } - if (logical_block_size != MD_SECTOR_SIZE && !(rw & WRITE)) { - void *p = page_address(mdev->md_io_page); - void *hp = page_address(mdev->md_io_tmpp); - - memcpy(p, hp + offset*MD_SECTOR_SIZE, MD_SECTOR_SIZE); - } - return ok; } @@ -211,20 +217,34 @@ void drbd_al_begin_io(struct drbd_conf *mdev, sector_t sector) * current->bio_tail list now. * we have to delegate updates to the activity log * to the worker thread. */ - init_completion(&al_work.event); - al_work.al_ext = al_ext; - al_work.enr = enr; - al_work.old_enr = al_ext->lc_number; - al_work.w.cb = w_al_write_transaction; - al_work.w.mdev = mdev; - drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); - wait_for_completion(&al_work.event); - mdev->al_writ_cnt++; + /* Serialize multiple transactions. + * This uses test_and_set_bit, memory barrier is implicit. + * Optimization potential: + * first check for transaction number > old transaction number, + * so not all waiters have to lock/unlock. */ + wait_event(mdev->al_wait, lc_try_lock_for_transaction(mdev->act_log)); - spin_lock_irq(&mdev->al_lock); - lc_committed(mdev->act_log); - spin_unlock_irq(&mdev->al_lock); + /* Double check: it may have been committed by someone else, + * while we have been waiting for the lock. */ + if (al_ext->lc_number != enr) { + init_completion(&al_work.event); + al_work.w.cb = w_al_write_transaction; + al_work.w.mdev = mdev; + drbd_queue_work_front(&mdev->tconn->data.work, &al_work.w); + wait_for_completion(&al_work.event); + + mdev->al_writ_cnt++; + + spin_lock_irq(&mdev->al_lock); + /* FIXME + if (al_work.err) + we need an "lc_cancel" here; + */ + lc_committed(mdev->act_log); + spin_unlock_irq(&mdev->al_lock); + } + lc_unlock(mdev->act_log); wake_up(&mdev->al_wait); } } @@ -283,95 +303,118 @@ w_al_write_transaction(struct drbd_work *w, int unused) { struct update_al_work *aw = container_of(w, struct update_al_work, w); struct drbd_conf *mdev = w->mdev; - struct lc_element *updated = aw->al_ext; - const unsigned int new_enr = aw->enr; - const unsigned int evicted = aw->old_enr; - struct al_transaction *buffer; + struct al_transaction_on_disk *buffer; + struct lc_element *e; sector_t sector; - int i, n, mx; - unsigned int extent_nr; - u32 xor_sum = 0; + int i, mx; + unsigned extent_nr; + unsigned crc = 0; if (!get_ldev(mdev)) { - dev_err(DEV, - "disk is %s, cannot start al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + dev_err(DEV, "disk is %s, cannot start al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); return 1; } - /* do we have to do a bitmap write, first? - * TODO reduce maximum latency: - * submit both bios, then wait for both, - * instead of doing two synchronous sector writes. - * For now, we must not write the transaction, - * if we cannot write out the bitmap of the evicted extent. */ - if (mdev->state.conn < C_CONNECTED && evicted != LC_FREE) - drbd_bm_write_page(mdev, al_extent_to_bm_page(evicted)); /* The bitmap write may have failed, causing a state change. */ if (mdev->state.disk < D_INCONSISTENT) { dev_err(DEV, - "disk is %s, cannot write al transaction (-%d +%d)\n", - drbd_disk_str(mdev->state.disk), evicted, new_enr); + "disk is %s, cannot write al transaction\n", + drbd_disk_str(mdev->state.disk)); + aw->err = -EIO; complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */ - buffer = (struct al_transaction *)page_address(mdev->md_io_page); + buffer = page_address(mdev->md_io_page); - buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC); + memset(buffer, 0, sizeof(*buffer)); + buffer->magic = cpu_to_be32(DRBD_AL_MAGIC); buffer->tr_number = cpu_to_be32(mdev->al_tr_number); - n = lc_index_of(mdev->act_log, updated); + i = 0; + + /* Even though no one can start to change this list + * once we set the LC_LOCKED -- from drbd_al_begin_io(), + * lc_try_lock_for_transaction() --, someone may still + * be in the process of changing it. */ + spin_lock_irq(&mdev->al_lock); + list_for_each_entry(e, &mdev->act_log->to_be_changed, list) { + if (i == AL_UPDATES_PER_TRANSACTION) { + i++; + break; + } + buffer->update_slot_nr[i] = cpu_to_be16(e->lc_index); + buffer->update_extent_nr[i] = cpu_to_be32(e->lc_new_number); + if (e->lc_number != LC_FREE) + drbd_bm_mark_for_writeout(mdev, + al_extent_to_bm_page(e->lc_number)); + i++; + } + spin_unlock_irq(&mdev->al_lock); + BUG_ON(i > AL_UPDATES_PER_TRANSACTION); - buffer->updates[0].pos = cpu_to_be32(n); - buffer->updates[0].extent = cpu_to_be32(new_enr); + buffer->n_updates = cpu_to_be16(i); + for ( ; i < AL_UPDATES_PER_TRANSACTION; i++) { + buffer->update_slot_nr[i] = cpu_to_be16(-1); + buffer->update_extent_nr[i] = cpu_to_be32(LC_FREE); + } - xor_sum ^= new_enr; + buffer->context_size = cpu_to_be16(mdev->act_log->nr_elements); + buffer->context_start_slot_nr = cpu_to_be16(mdev->al_tr_cycle); - mx = min_t(int, AL_EXTENTS_PT, + mx = min_t(int, AL_CONTEXT_PER_TRANSACTION, mdev->act_log->nr_elements - mdev->al_tr_cycle); for (i = 0; i < mx; i++) { unsigned idx = mdev->al_tr_cycle + i; extent_nr = lc_element_by_index(mdev->act_log, idx)->lc_number; - buffer->updates[i+1].pos = cpu_to_be32(idx); - buffer->updates[i+1].extent = cpu_to_be32(extent_nr); - xor_sum ^= extent_nr; + buffer->context[i] = cpu_to_be32(extent_nr); } - for (; i < AL_EXTENTS_PT; i++) { - buffer->updates[i+1].pos = __constant_cpu_to_be32(-1); - buffer->updates[i+1].extent = __constant_cpu_to_be32(LC_FREE); - xor_sum ^= LC_FREE; - } - mdev->al_tr_cycle += AL_EXTENTS_PT; + for (; i < AL_CONTEXT_PER_TRANSACTION; i++) + buffer->context[i] = cpu_to_be32(LC_FREE); + + mdev->al_tr_cycle += AL_CONTEXT_PER_TRANSACTION; if (mdev->al_tr_cycle >= mdev->act_log->nr_elements) mdev->al_tr_cycle = 0; - buffer->xor_sum = cpu_to_be32(xor_sum); - sector = mdev->ldev->md.md_offset - + mdev->ldev->md.al_offset + mdev->al_tr_pos; - - if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) - drbd_chk_io_error(mdev, 1, true); + + mdev->ldev->md.al_offset + + mdev->al_tr_pos * (MD_BLOCK_SIZE>>9); - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + crc = crc32c(0, buffer, 4096); + buffer->crc32c = cpu_to_be32(crc); - D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE); - mdev->al_tr_number++; + if (drbd_bm_write_hinted(mdev)) + aw->err = -EIO; + /* drbd_chk_io_error done already */ + else if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) { + aw->err = -EIO; + drbd_chk_io_error(mdev, 1, true); + } else { + /* advance ringbuffer position and transaction counter */ + mdev->al_tr_pos = (mdev->al_tr_pos + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); + mdev->al_tr_number++; + } mutex_unlock(&mdev->md_io_mutex); - complete(&((struct update_al_work *)w)->event); put_ldev(mdev); return 1; } +/* FIXME + * reading of the activity log, + * and potentially dirtying of the affected bitmap regions, + * should be done from userland only. + * DRBD would simply always attach with an empty activity log, + * and refuse to attach to something that looks like a crashed primary. + */ + /** * drbd_al_read_tr() - Read a single transaction from the on disk activity log * @mdev: DRBD device. @@ -383,27 +426,39 @@ w_al_write_transaction(struct drbd_work *w, int unused) */ static int drbd_al_read_tr(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, - struct al_transaction *b, int index) { + struct al_transaction_on_disk *b = page_address(mdev->md_io_page); sector_t sector; - int rv, i; - u32 xor_sum = 0; + u32 crc; - sector = bdev->md.md_offset + bdev->md.al_offset + index; + sector = bdev->md.md_offset + + bdev->md.al_offset + + index * (MD_BLOCK_SIZE>>9); /* Dont process error normally, * as this is done before disk is attached! */ if (!drbd_md_sync_page_io(mdev, bdev, sector, READ)) return -1; - rv = (b->magic == cpu_to_be32(DRBD_MAGIC)); + if (!expect(b->magic == cpu_to_be32(DRBD_AL_MAGIC))) + return 0; + + if (!expect(be16_to_cpu(b->n_updates) <= AL_UPDATES_PER_TRANSACTION)) + return 0; - for (i = 0; i < AL_EXTENTS_PT + 1; i++) - xor_sum ^= be32_to_cpu(b->updates[i].extent); - rv &= (xor_sum == be32_to_cpu(b->xor_sum)); + if (!expect(be16_to_cpu(b->context_size) <= DRBD_AL_EXTENTS_MAX)) + return 0; - return rv; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < DRBD_AL_EXTENTS_MAX)) + return 0; + + crc = be32_to_cpu(b->crc32c); + b->crc32c = 0; + if (!expect(crc == crc32c(0, b, 4096))) + return 0; + + return 1; } /** @@ -415,7 +470,7 @@ static int drbd_al_read_tr(struct drbd_conf *mdev, */ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) { - struct al_transaction *buffer; + struct al_transaction_on_disk *b; int i; int rv; int mx; @@ -428,25 +483,36 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) u32 to_tnr = 0; u32 cnr; - mx = div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT); + /* Note that this is expected to be called with a newly created, + * clean and all unused activity log of the "expected size". + */ /* lock out all other meta data io for now, * and make sure the page is mapped. */ mutex_lock(&mdev->md_io_mutex); - buffer = page_address(mdev->md_io_page); + b = page_address(mdev->md_io_page); + + /* Always use the full ringbuffer space for now. + * possible optimization: read in all of it, + * then scan the in-memory pages. */ + + mx = (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* Find the valid transaction in the log */ - for (i = 0; i <= mx; i++) { - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + for (i = 0; i < mx; i++) { + rv = drbd_al_read_tr(mdev, bdev, i); + /* invalid data in that block */ if (rv == 0) continue; + + /* IO error */ if (rv == -1) { mutex_unlock(&mdev->md_io_mutex); return 0; } - cnr = be32_to_cpu(buffer->tr_number); + cnr = be32_to_cpu(b->tr_number); if (++found_valid == 1) { from = i; to = i; @@ -454,8 +520,11 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) to_tnr = cnr; continue; } + + D_ASSERT(cnr != to_tnr); + D_ASSERT(cnr != from_tnr); if ((int)cnr - (int)from_tnr < 0) { - D_ASSERT(from_tnr - cnr + i - from == mx+1); + D_ASSERT(from_tnr - cnr + i - from == mx); from = i; from_tnr = cnr; } @@ -476,11 +545,10 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) * dev_info(DEV, "Reading from %d to %d.\n",from,to); */ i = from; while (1) { - int j, pos; - unsigned int extent_nr; - unsigned int trn; + struct lc_element *e; + unsigned j, n, slot, extent_nr; - rv = drbd_al_read_tr(mdev, bdev, buffer, i); + rv = drbd_al_read_tr(mdev, bdev, i); if (!expect(rv != 0)) goto cancel; if (rv == -1) { @@ -488,23 +556,55 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev) return 0; } - trn = be32_to_cpu(buffer->tr_number); + /* deal with different transaction types. + * not yet implemented */ + if (!expect(b->transaction_type == 0)) + goto cancel; - spin_lock_irq(&mdev->al_lock); + /* on the fly re-create/resize activity log? + * will be a special transaction type flag. */ + if (!expect(be16_to_cpu(b->context_size) == mdev->act_log->nr_elements)) + goto cancel; + if (!expect(be16_to_cpu(b->context_start_slot_nr) < mdev->act_log->nr_elements)) + goto cancel; - /* This loop runs backwards because in the cyclic - elements there might be an old version of the - updated element (in slot 0). So the element in slot 0 - can overwrite old versions. */ - for (j = AL_EXTENTS_PT; j >= 0; j--) { - pos = be32_to_cpu(buffer->updates[j].pos); - extent_nr = be32_to_cpu(buffer->updates[j].extent); + /* We are the only user of the activity log right now, + * don't actually need to take that lock. */ + spin_lock_irq(&mdev->al_lock); - if (extent_nr == LC_FREE) - continue; + /* first, apply the context, ... */ + for (j = 0, slot = be16_to_cpu(b->context_start_slot_nr); + j < AL_CONTEXT_PER_TRANSACTION && + slot < mdev->act_log->nr_elements; j++, slot++) { + extent_nr = be32_to_cpu(b->context[j]); + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); + } - lc_set(mdev->act_log, extent_nr, pos); - active_extents++; + /* ... then apply the updates, + * which override the context information. + * drbd_al_read_tr already did the rangecheck + * on n <= AL_UPDATES_PER_TRANSACTION */ + n = be16_to_cpu(b->n_updates); + for (j = 0; j < n; j++) { + slot = be16_to_cpu(b->update_slot_nr[j]); + extent_nr = be32_to_cpu(b->update_extent_nr[j]); + if (!expect(slot < mdev->act_log->nr_elements)) + break; + e = lc_element_by_index(mdev->act_log, slot); + if (e->lc_number != extent_nr) { + if (extent_nr != LC_FREE) + active_extents++; + else + active_extents--; + } + lc_set(mdev->act_log, extent_nr, slot); } spin_unlock_irq(&mdev->al_lock); @@ -514,15 +614,12 @@ cancel: if (i == to) break; i++; - if (i > mx) + if (i >= mx) i = 0; } mdev->al_tr_number = to_tnr+1; - mdev->al_tr_pos = to; - if (++mdev->al_tr_pos > - div_ceil(mdev->act_log->nr_elements, AL_EXTENTS_PT)) - mdev->al_tr_pos = 0; + mdev->al_tr_pos = (to + 1) % (MD_AL_SECTORS*512/MD_BLOCK_SIZE); /* ok, we are done with it */ mutex_unlock(&mdev->md_io_mutex); diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index edfdeb62c18f..3213808a898a 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1069,7 +1069,6 @@ struct drbd_conf { atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ wait_queue_head_t ee_wait; struct page *md_io_page; /* one page buffer for md_io */ - struct page *md_io_tmpp; /* for logical_block_size != 512 */ struct mutex md_io_mutex; /* protects the md_io_buffer */ spinlock_t al_lock; wait_queue_head_t al_wait; @@ -1259,22 +1258,39 @@ extern void drbd_ldev_destroy(struct drbd_conf *mdev); * either at the end of the backing device * or on a separate meta data device. */ -#define MD_RESERVED_SECT (128LU << 11) /* 128 MB, unit sectors */ /* The following numbers are sectors */ -#define MD_AL_OFFSET 8 /* 8 Sectors after start of meta area */ -#define MD_AL_MAX_SIZE 64 /* = 32 kb LOG ~ 3776 extents ~ 14 GB Storage */ -/* Allows up to about 3.8TB */ -#define MD_BM_OFFSET (MD_AL_OFFSET + MD_AL_MAX_SIZE) - -/* Since the smalles IO unit is usually 512 byte */ -#define MD_SECTOR_SHIFT 9 -#define MD_SECTOR_SIZE (1<ldev); mdev->ldev = NULL;); - if (mdev->md_io_tmpp) { - __free_page(mdev->md_io_tmpp); - mdev->md_io_tmpp = NULL; - } clear_bit(GO_DISKLESS, &mdev->flags); } diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 0a92f5226c2a..90d731723205 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -527,7 +527,7 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, case DRBD_MD_INDEX_FLEX_INT: bdev->md.md_offset = drbd_md_ss__(mdev, bdev); /* al size is still fixed */ - bdev->md.al_offset = -MD_AL_MAX_SIZE; + bdev->md.al_offset = -MD_AL_SECTORS; /* we need (slightly less than) ~ this much bitmap sectors: */ md_size_sect = drbd_get_capacity(bdev->backing_bdev); md_size_sect = ALIGN(md_size_sect, BM_SECT_PER_EXT); @@ -751,8 +751,8 @@ static int drbd_check_al_size(struct drbd_conf *mdev) unsigned int in_use; int i; - if (!expect(mdev->sync_conf.al_extents >= 7)) - mdev->sync_conf.al_extents = 127; + if (!expect(mdev->sync_conf.al_extents >= DRBD_AL_EXTENTS_MIN)) + mdev->sync_conf.al_extents = DRBD_AL_EXTENTS_MIN; if (mdev->act_log && mdev->act_log->nr_elements == mdev->sync_conf.al_extents) @@ -760,7 +760,7 @@ static int drbd_check_al_size(struct drbd_conf *mdev) in_use = 0; t = mdev->act_log; - n = lc_create("act_log", drbd_al_ext_cache, 1, + n = lc_create("act_log", drbd_al_ext_cache, AL_UPDATES_PER_TRANSACTION, mdev->sync_conf.al_extents, sizeof(struct lc_element), 0); if (n == NULL) { @@ -932,7 +932,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp union drbd_state ns, os; enum drbd_state_rv rv; int cp_discovered = 0; - int logical_block_size; drbd_reconfig_start(mdev); @@ -1087,25 +1086,6 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp drbd_md_set_sector_offsets(mdev, nbc); - /* allocate a second IO page if logical_block_size != 512 */ - logical_block_size = bdev_logical_block_size(nbc->md_bdev); - if (logical_block_size == 0) - logical_block_size = MD_SECTOR_SIZE; - - if (logical_block_size != MD_SECTOR_SIZE) { - if (!mdev->md_io_tmpp) { - struct page *page = alloc_page(GFP_NOIO); - if (!page) - goto force_diskless_dec; - - dev_warn(DEV, "Meta data's bdev logical_block_size = %d != %d\n", - logical_block_size, MD_SECTOR_SIZE); - dev_warn(DEV, "Workaround engaged (has performance impact).\n"); - - mdev->md_io_tmpp = page; - } - } - if (!mdev->bitmap) { if (drbd_bm_init(mdev)) { retcode = ERR_NOMEM; @@ -1804,14 +1784,12 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.rate >= 1)) sc.rate = 1; - if (!expect(sc.al_extents >= 7)) - sc.al_extents = 127; /* arbitrary minimum */ -#define AL_MAX ((MD_AL_MAX_SIZE-1) * AL_EXTENTS_PT) - if (sc.al_extents > AL_MAX) { - dev_err(DEV, "sc.al_extents > %d\n", AL_MAX); - sc.al_extents = AL_MAX; - } -#undef AL_MAX + + /* clip to allowed range */ + if (!expect(sc.al_extents >= DRBD_AL_EXTENTS_MIN)) + sc.al_extents = DRBD_AL_EXTENTS_MIN; + if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) + sc.al_extents = DRBD_AL_EXTENTS_MAX; /* to avoid spurious errors when configuring minors before configuring * the minors they depend on: if necessary, first create the minor we diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 35fc08a0a552..70a688b92c1b 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -336,6 +336,10 @@ enum drbd_timeout_flag { #define DRBD_MAGIC 0x83740267 #define DRBD_MAGIC_BIG 0x835a +/* how I came up with this magic? + * base64 decode "actlog==" ;) */ +#define DRBD_AL_MAGIC 0x69cb65a2 + /* these are of type "int" */ #define DRBD_MD_INDEX_INTERNAL -1 #define DRBD_MD_INDEX_FLEX_EXT -2 diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 447c36752385..75f05af33725 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -102,10 +102,12 @@ #define DRBD_RATE_DEF 250 /* kb/second */ /* less than 7 would hit performance unnecessarily. - * 3833 is the largest prime that still does fit - * into 64 sectors of activity log */ + * 919 slots context information per transaction, + * 32k activity log, 4k transaction size, + * one transaction in flight: + * 919 * 7 = 6433 */ #define DRBD_AL_EXTENTS_MIN 7 -#define DRBD_AL_EXTENTS_MAX 3833 +#define DRBD_AL_EXTENTS_MAX 6433 #define DRBD_AL_EXTENTS_DEF 127 #define DRBD_AFTER_MIN -1 -- cgit v1.2.3 From 1aba4d7fcfabe999e0c99683b394aa76d5c42842 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Feb 2011 15:38:08 +0100 Subject: drbd: Preparing the connector interface to operator on connections Up to now it only operated on minor numbers. Now it can work also on named connections. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 1 + drivers/block/drbd/drbd_main.c | 15 +++++++ drivers/block/drbd/drbd_nl.c | 96 +++++++++++++++++++++++++++--------------- include/linux/drbd.h | 19 +++++++-- 4 files changed, 94 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 48367e53a7a5..033af1995867 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1479,6 +1479,7 @@ extern void drbd_free_mdev(struct drbd_conf *mdev); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); +struct drbd_tconn *conn_by_name(const char *name); extern int proc_details; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index cbec5ff2cc74..4761426f9ad7 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -2196,6 +2196,21 @@ static void drbd_init_workqueue(struct drbd_work_queue* wq) INIT_LIST_HEAD(&wq->q); } +struct drbd_tconn *conn_by_name(const char *name) +{ + struct drbd_tconn *tconn; + + write_lock_irq(&global_state_lock); + list_for_each_entry(tconn, &drbd_tconns, all_tconn) { + if (!strcmp(tconn->name, name)) + goto found; + } + tconn = NULL; +found: + write_unlock_irq(&global_state_lock); + return tconn; +} + struct drbd_tconn *drbd_new_tconn(char *name) { struct drbd_tconn *tconn; diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index b141f891f643..27a43d138f6b 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -2184,42 +2184,57 @@ out: return 0; } +enum cn_handler_type { + CHT_MINOR, + CHT_CONN, + CHT_CTOR, + /* CHT_RES, later */ +}; + struct cn_handler_struct { - int (*function)(struct drbd_conf *, - struct drbd_nl_cfg_req *, - struct drbd_nl_cfg_reply *); + enum cn_handler_type type; + union { + int (*minor_based)(struct drbd_conf *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*conn_based)(struct drbd_tconn *, + struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + int (*constructor)(struct drbd_nl_cfg_req *, + struct drbd_nl_cfg_reply *); + }; int reply_body_size; }; static struct cn_handler_struct cnd_table[] = { - [ P_primary ] = { &drbd_nl_primary, 0 }, - [ P_secondary ] = { &drbd_nl_secondary, 0 }, - [ P_disk_conf ] = { &drbd_nl_disk_conf, 0 }, - [ P_detach ] = { &drbd_nl_detach, 0 }, - [ P_net_conf ] = { &drbd_nl_net_conf, 0 }, - [ P_disconnect ] = { &drbd_nl_disconnect, 0 }, - [ P_resize ] = { &drbd_nl_resize, 0 }, - [ P_syncer_conf ] = { &drbd_nl_syncer_conf, 0 }, - [ P_invalidate ] = { &drbd_nl_invalidate, 0 }, - [ P_invalidate_peer ] = { &drbd_nl_invalidate_peer, 0 }, - [ P_pause_sync ] = { &drbd_nl_pause_sync, 0 }, - [ P_resume_sync ] = { &drbd_nl_resume_sync, 0 }, - [ P_suspend_io ] = { &drbd_nl_suspend_io, 0 }, - [ P_resume_io ] = { &drbd_nl_resume_io, 0 }, - [ P_outdate ] = { &drbd_nl_outdate, 0 }, - [ P_get_config ] = { &drbd_nl_get_config, + [ P_primary ] = { CHT_MINOR, { &drbd_nl_primary }, 0 }, + [ P_secondary ] = { CHT_MINOR, { &drbd_nl_secondary }, 0 }, + [ P_disk_conf ] = { CHT_MINOR, { &drbd_nl_disk_conf }, 0 }, + [ P_detach ] = { CHT_MINOR, { &drbd_nl_detach }, 0 }, + [ P_net_conf ] = { CHT_MINOR, { &drbd_nl_net_conf }, 0 }, + [ P_disconnect ] = { CHT_MINOR, { &drbd_nl_disconnect }, 0 }, + [ P_resize ] = { CHT_MINOR, { &drbd_nl_resize }, 0 }, + [ P_syncer_conf ] = { CHT_MINOR, { &drbd_nl_syncer_conf },0 }, + [ P_invalidate ] = { CHT_MINOR, { &drbd_nl_invalidate }, 0 }, + [ P_invalidate_peer ] = { CHT_MINOR, { &drbd_nl_invalidate_peer },0 }, + [ P_pause_sync ] = { CHT_MINOR, { &drbd_nl_pause_sync }, 0 }, + [ P_resume_sync ] = { CHT_MINOR, { &drbd_nl_resume_sync },0 }, + [ P_suspend_io ] = { CHT_MINOR, { &drbd_nl_suspend_io }, 0 }, + [ P_resume_io ] = { CHT_MINOR, { &drbd_nl_resume_io }, 0 }, + [ P_outdate ] = { CHT_MINOR, { &drbd_nl_outdate }, 0 }, + [ P_get_config ] = { CHT_MINOR, { &drbd_nl_get_config }, sizeof(struct syncer_conf_tag_len_struct) + sizeof(struct disk_conf_tag_len_struct) + sizeof(struct net_conf_tag_len_struct) }, - [ P_get_state ] = { &drbd_nl_get_state, + [ P_get_state ] = { CHT_MINOR, { &drbd_nl_get_state }, sizeof(struct get_state_tag_len_struct) + sizeof(struct sync_progress_tag_len_struct) }, - [ P_get_uuids ] = { &drbd_nl_get_uuids, + [ P_get_uuids ] = { CHT_MINOR, { &drbd_nl_get_uuids }, sizeof(struct get_uuids_tag_len_struct) }, - [ P_get_timeout_flag ] = { &drbd_nl_get_timeout_flag, + [ P_get_timeout_flag ] = { CHT_MINOR, { &drbd_nl_get_timeout_flag }, sizeof(struct get_timeout_flag_tag_len_struct)}, - [ P_start_ov ] = { &drbd_nl_start_ov, 0 }, - [ P_new_c_uuid ] = { &drbd_nl_new_c_uuid, 0 }, + [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, + [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) @@ -2229,6 +2244,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms struct cn_msg *cn_reply; struct drbd_nl_cfg_reply *reply; struct drbd_conf *mdev; + struct drbd_tconn *tconn; int retcode, rr; int reply_size = sizeof(struct cn_msg) + sizeof(struct drbd_nl_cfg_reply) @@ -2244,13 +2260,6 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms goto fail; } - mdev = ensure_mdev(nlp->drbd_minor, - (nlp->flags & DRBD_NL_CREATE_DEVICE)); - if (!mdev) { - retcode = ERR_MINOR_INVALID; - goto fail; - } - if (nlp->packet_type >= P_nl_after_last_packet || nlp->packet_type == P_return_code_only) { retcode = ERR_PACKET_NR; @@ -2260,7 +2269,7 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms cm = cnd_table + nlp->packet_type; /* This may happen if packet number is 0: */ - if (cm->function == NULL) { + if (cm->minor_based == NULL) { retcode = ERR_PACKET_NR; goto fail; } @@ -2281,7 +2290,28 @@ static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms reply->ret_code = NO_ERROR; /* Might by modified by cm->function. */ /* reply->tag_list; might be modified by cm->function. */ - rr = cm->function(mdev, nlp, reply); + retcode = ERR_MINOR_INVALID; + rr = 0; + switch (cm->type) { + case CHT_MINOR: + mdev = minor_to_mdev(nlp->drbd_minor); + if (!mdev) + goto fail; + rr = cm->minor_based(mdev, nlp, reply); + break; + case CHT_CONN: + tconn = conn_by_name(nlp->obj_name); + if (!tconn) { + retcode = ERR_CONN_NOT_KNOWN; + goto fail; + } + rr = cm->conn_based(tconn, nlp, reply); + break; + case CHT_CTOR: + rr = cm->constructor(nlp, reply); + break; + /* case CHT_RES: */ + } cn_reply->id = req->id; cn_reply->seq = req->seq; diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 70a688b92c1b..7683b4ab6583 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -155,6 +155,7 @@ enum drbd_ret_code { ERR_CONG_NOT_PROTO_A = 155, ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, + ERR_CONN_NOT_KNOWN = 158, /* insert new ones above this line */ AFTER_LAST_ERR_CODE @@ -347,8 +348,11 @@ enum drbd_timeout_flag { /* Start of the new netlink/connector stuff */ -#define DRBD_NL_CREATE_DEVICE 0x01 -#define DRBD_NL_SET_DEFAULTS 0x02 +enum drbd_ncr_flags { + DRBD_NL_CREATE_DEVICE = 0x01, + DRBD_NL_SET_DEFAULTS = 0x02, +}; +#define DRBD_NL_OBJ_NAME_LEN 32 /* For searching a vacant cn_idx value */ @@ -356,8 +360,15 @@ enum drbd_timeout_flag { struct drbd_nl_cfg_req { int packet_type; - unsigned int drbd_minor; - int flags; + union { + struct { + unsigned int drbd_minor; + enum drbd_ncr_flags flags; + }; + struct { + char obj_name[DRBD_NL_OBJ_NAME_LEN]; + }; + }; unsigned short tag_list[]; }; -- cgit v1.2.3 From 774b305518a68a50df4f479bcf79da2add724e6e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Feb 2011 02:07:03 -0500 Subject: drbd: Implemented new commands to create/delete connections/minors Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_int.h | 4 +- drivers/block/drbd/drbd_main.c | 68 ++++++++++++++------------ drivers/block/drbd/drbd_nl.c | 106 +++++++++++++++++++++++++---------------- include/linux/drbd.h | 3 ++ include/linux/drbd_nl.h | 12 +++++ 5 files changed, 120 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index a27e2a4e038d..535d503886d8 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -1258,7 +1258,6 @@ extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); extern void drbd_go_diskless(struct drbd_conf *mdev); extern void drbd_ldev_destroy(struct drbd_conf *mdev); - /* Meta data layout We reserve a 128MB Block (4k aligned) * either at the end of the backing device @@ -1476,8 +1475,9 @@ extern wait_queue_head_t drbd_pp_wait; extern rwlock_t global_state_lock; extern int conn_lowest_minor(struct drbd_tconn *tconn); -extern struct drbd_conf *drbd_new_device(unsigned int minor); +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr); extern void drbd_free_mdev(struct drbd_conf *mdev); +extern void drbd_delete_device(unsigned int minor); struct drbd_tconn *drbd_new_tconn(char *name); extern void drbd_free_tconn(struct drbd_tconn *tconn); diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 2bfd63058f40..ec7d0d98657c 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -614,13 +614,16 @@ char *drbd_task_to_thread_name(struct drbd_tconn *tconn, struct task_struct *tas return thi ? thi->name : task->comm; } -#ifdef CONFIG_SMP int conn_lowest_minor(struct drbd_tconn *tconn) { int minor = 0; - idr_get_next(&tconn->volumes, &minor); + + if (!idr_get_next(&tconn->volumes, &minor)) + return -1; return minor; } + +#ifdef CONFIG_SMP /** * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs * @mdev: DRBD device. @@ -2078,15 +2081,16 @@ static void drbd_release_ee_lists(struct drbd_conf *mdev) dev_err(DEV, "%d EEs in net list found!\n", rr); } -/* caution. no locking. - * currently only used from module cleanup code. */ -static void drbd_delete_device(unsigned int minor) +/* caution. no locking. */ +void drbd_delete_device(unsigned int minor) { struct drbd_conf *mdev = minor_to_mdev(minor); if (!mdev) return; + idr_remove(&mdev->tconn->volumes, minor); + /* paranoia asserts */ D_ASSERT(mdev->open_cnt == 0); D_ASSERT(list_empty(&mdev->tconn->data.work.q)); @@ -2101,7 +2105,6 @@ static void drbd_delete_device(unsigned int minor) bdput(mdev->this_bdev); drbd_free_resources(mdev); - drbd_free_tconn(mdev->tconn); drbd_release_ee_lists(mdev); @@ -2223,6 +2226,9 @@ struct drbd_tconn *drbd_new_tconn(char *name) if (!tconn->name) goto fail; + if (!zalloc_cpumask_var(&tconn->cpu_mask, GFP_KERNEL)) + goto fail; + if (!tl_init(tconn)) goto fail; @@ -2252,6 +2258,7 @@ struct drbd_tconn *drbd_new_tconn(char *name) fail: tl_cleanup(tconn); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn); @@ -2265,6 +2272,7 @@ void drbd_free_tconn(struct drbd_tconn *tconn) write_unlock_irq(&global_state_lock); idr_destroy(&tconn->volumes); + free_cpumask_var(tconn->cpu_mask); kfree(tconn->name); kfree(tconn->int_dig_out); kfree(tconn->int_dig_in); @@ -2272,32 +2280,31 @@ void drbd_free_tconn(struct drbd_tconn *tconn) kfree(tconn); } -struct drbd_conf *drbd_new_device(unsigned int minor) +enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr) { struct drbd_conf *mdev; struct gendisk *disk; struct request_queue *q; - char conn_name[9]; /* drbd1234N */ - int vnr; + int vnr_got = vnr; + + mdev = minor_to_mdev(minor); + if (mdev) + return ERR_MINOR_EXISTS; /* GFP_KERNEL, we are outside of all write-out paths */ mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL); if (!mdev) - return NULL; - sprintf(conn_name, "drbd%d", minor); - mdev->tconn = drbd_new_tconn(conn_name); - if (!mdev->tconn) - goto out_no_tconn; - if (!idr_pre_get(&mdev->tconn->volumes, GFP_KERNEL)) - goto out_no_cpumask; - if (idr_get_new(&mdev->tconn->volumes, mdev, &vnr)) - goto out_no_cpumask; - if (vnr != 0) { - dev_err(DEV, "vnr = %d\n", vnr); - goto out_no_cpumask; - } - if (!zalloc_cpumask_var(&mdev->tconn->cpu_mask, GFP_KERNEL)) - goto out_no_cpumask; + return ERR_NOMEM; + + mdev->tconn = tconn; + if (!idr_pre_get(&tconn->volumes, GFP_KERNEL)) + goto out_no_idr; + if (idr_get_new(&tconn->volumes, mdev, &vnr_got)) + goto out_no_idr; + if (vnr_got != vnr) { + dev_err(DEV, "vnr_got (%d) != vnr (%d)\n", vnr_got, vnr); + goto out_no_q; + } mdev->minor = minor; @@ -2354,7 +2361,10 @@ struct drbd_conf *drbd_new_device(unsigned int minor) INIT_LIST_HEAD(&mdev->current_epoch->list); mdev->epochs = 1; - return mdev; + minor_table[minor] = mdev; + add_disk(disk); + + return NO_ERROR; /* out_whatever_else: kfree(mdev->current_epoch); */ @@ -2367,12 +2377,10 @@ out_no_io_page: out_no_disk: blk_cleanup_queue(q); out_no_q: - free_cpumask_var(mdev->tconn->cpu_mask); -out_no_cpumask: - drbd_free_tconn(mdev->tconn); -out_no_tconn: + idr_remove(&tconn->volumes, vnr_got); +out_no_idr: kfree(mdev); - return NULL; + return ERR_NOMEM; } /* counterpart of drbd_new_device. diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 455a51dd364d..f2739fd188a0 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c @@ -443,40 +443,6 @@ drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role, int force) return rv; } -static struct drbd_conf *ensure_mdev(int minor, int create) -{ - struct drbd_conf *mdev; - - if (minor >= minor_count) - return NULL; - - mdev = minor_to_mdev(minor); - - if (!mdev && create) { - struct gendisk *disk = NULL; - mdev = drbd_new_device(minor); - - spin_lock_irq(&drbd_pp_lock); - if (minor_table[minor] == NULL) { - minor_table[minor] = mdev; - disk = mdev->vdisk; - mdev = NULL; - } /* else: we lost the race */ - spin_unlock_irq(&drbd_pp_lock); - - if (disk) /* we won the race above */ - /* in case we ever add a drbd_delete_device(), - * don't forget the del_gendisk! */ - add_disk(disk); - else /* we lost the race above */ - drbd_free_mdev(mdev); - - mdev = minor_to_mdev(minor); - } - - return mdev; -} - static int drbd_nl_primary(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) { @@ -1789,12 +1755,6 @@ static int drbd_nl_syncer_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *n if (!expect(sc.al_extents <= DRBD_AL_EXTENTS_MAX)) sc.al_extents = DRBD_AL_EXTENTS_MAX; - /* to avoid spurious errors when configuring minors before configuring - * the minors they depend on: if necessary, first create the minor we - * depend on */ - if (sc.after >= 0) - ensure_mdev(sc.after, 1); - /* most sanity checks done, try to assign the new sync-after * dependency. need to hold the global lock in there, * to avoid a race in the dependency loop check. */ @@ -2184,13 +2144,73 @@ out: return 0; } +static int drbd_nl_new_conn(struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_connection args; + + if (!new_connection_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = NO_ERROR; + if (!drbd_new_tconn(args.name)) + reply->ret_code = ERR_NOMEM; + + return 0; +} + +static int drbd_nl_new_minor(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + struct new_minor args; + + args.vol_nr = 0; + args.minor = 0; + + if (!new_minor_from_tags(nlp->tag_list, &args)) { + reply->ret_code = ERR_MANDATORY_TAG; + return 0; + } + + reply->ret_code = conn_new_minor(tconn, args.minor, args.vol_nr); + + return 0; +} + +static int drbd_nl_del_minor(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp, + struct drbd_nl_cfg_reply *reply) +{ + if (mdev->state.disk == D_DISKLESS && + mdev->state.conn == C_STANDALONE && + mdev->state.role == R_SECONDARY) { + drbd_delete_device(mdev_to_minor(mdev)); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_MINOR_CONFIGURED; + } + return 0; +} + +static int drbd_nl_del_conn(struct drbd_tconn *tconn, + struct drbd_nl_cfg_req *nlp, struct drbd_nl_cfg_reply *reply) +{ + if (conn_lowest_minor(tconn) < 0) { + drbd_free_tconn(tconn); + reply->ret_code = NO_ERROR; + } else { + reply->ret_code = ERR_CONN_IN_USE; + } + + return 0; +} + enum cn_handler_type { CHT_MINOR, CHT_CONN, CHT_CTOR, /* CHT_RES, later */ }; - struct cn_handler_struct { enum cn_handler_type type; union { @@ -2235,6 +2255,10 @@ static struct cn_handler_struct cnd_table[] = { sizeof(struct get_timeout_flag_tag_len_struct)}, [ P_start_ov ] = { CHT_MINOR, { &drbd_nl_start_ov }, 0 }, [ P_new_c_uuid ] = { CHT_MINOR, { &drbd_nl_new_c_uuid }, 0 }, + [ P_new_connection ] = { CHT_CTOR, { .constructor = &drbd_nl_new_conn }, 0 }, + [ P_new_minor ] = { CHT_CONN, { .conn_based = &drbd_nl_new_minor }, 0 }, + [ P_del_minor ] = { CHT_MINOR, { &drbd_nl_del_minor }, 0 }, + [ P_del_connection ] = { CHT_CONN, { .conn_based = &drbd_nl_del_conn }, 0 }, }; static void drbd_connector_callback(struct cn_msg *req, struct netlink_skb_parms *nsp) diff --git a/include/linux/drbd.h b/include/linux/drbd.h index 7683b4ab6583..e192167e6145 100644 --- a/include/linux/drbd.h +++ b/include/linux/drbd.h @@ -156,6 +156,9 @@ enum drbd_ret_code { ERR_PIC_AFTER_DEP = 156, ERR_PIC_PEER_DEP = 157, ERR_CONN_NOT_KNOWN = 158, + ERR_CONN_IN_USE = 159, + ERR_MINOR_CONFIGURED = 160, + ERR_MINOR_EXISTS = 161, /* insert new ones above this line */ AFTER_LAST_ERR_CODE diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h index ab6159e4fcf0..1216c7a432c5 100644 --- a/include/linux/drbd_nl.h +++ b/include/linux/drbd_nl.h @@ -152,6 +152,18 @@ NL_PACKET(new_c_uuid, 26, NL_RESPONSE(return_code_only, 27) #endif +NL_PACKET(new_connection, 28, /* CHT_CTOR */ + NL_STRING( 85, T_MANDATORY, name, DRBD_NL_OBJ_NAME_LEN) +) + +NL_PACKET(new_minor, 29, /* CHT_CONN */ + NL_INTEGER( 86, T_MANDATORY, minor) + NL_INTEGER( 87, T_MANDATORY, vol_nr) +) + +NL_PACKET(del_minor, 30, ) /* CHT_MINOR */ +NL_PACKET(del_connection, 31, ) /* CHT_CONN */ + #undef NL_PACKET #undef NL_INTEGER #undef NL_INT64 -- cgit v1.2.3 From a5df0e199cf6b31400fa86f6c3f73fa6e127e9ed Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 23 Feb 2011 12:51:43 +0100 Subject: drbd: default to detach on-io-error Old default behaviour was "pass-on", which is not useful in production at all. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h index 75f05af33725..22920a8af4e2 100644 --- a/include/linux/drbd_limits.h +++ b/include/linux/drbd_limits.h @@ -125,7 +125,7 @@ #define DRBD_DISK_SIZE_SECT_MAX (1 * (2LLU << 40)) #define DRBD_DISK_SIZE_SECT_DEF 0 /* = disabled = no user size... */ -#define DRBD_ON_IO_ERROR_DEF EP_PASS_ON +#define DRBD_ON_IO_ERROR_DEF EP_DETACH #define DRBD_FENCING_DEF FP_DONT_CARE #define DRBD_AFTER_SB_0P_DEF ASB_DISCONNECT #define DRBD_AFTER_SB_1P_DEF ASB_DISCONNECT -- cgit v1.2.3 From ec2c35ac1ea288f5c931e32452ecea50068e8450 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 7 Mar 2011 10:20:08 +0100 Subject: drbd: prepare the transition from connector to genetlink This adds the new API header and helper files. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- include/linux/drbd_genl.h | 349 +++++++++++++++++++++++++++++++ include/linux/drbd_genl_api.h | 55 +++++ include/linux/genl_magic_func.h | 417 ++++++++++++++++++++++++++++++++++++++ include/linux/genl_magic_struct.h | 260 ++++++++++++++++++++++++ 4 files changed, 1081 insertions(+) create mode 100644 include/linux/drbd_genl.h create mode 100644 include/linux/drbd_genl_api.h create mode 100644 include/linux/genl_magic_func.h create mode 100644 include/linux/genl_magic_struct.h (limited to 'include/linux') diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h new file mode 100644 index 000000000000..84e16848f7a1 --- /dev/null +++ b/include/linux/drbd_genl.h @@ -0,0 +1,349 @@ +/* + * General overview: + * full generic netlink message: + * |nlmsghdr|genlmsghdr| + * + * payload: + * |optional fixed size family header| + * + * sequence of netlink attributes: + * I chose to have all "top level" attributes NLA_NESTED, + * corresponding to some real struct. + * So we have a sequence of |tla, len| + * + * nested nla sequence: + * may be empty, or contain a sequence of netlink attributes + * representing the struct fields. + * + * The tag number of any field (regardless of containing struct) + * will be available as T_ ## field_name, + * so you cannot have the same field name in two differnt structs. + * + * The tag numbers themselves are per struct, though, + * so should always begin at 1 (not 0, that is the special "NLA_UNSPEC" type, + * which we won't use here). + * The tag numbers are used as index in the respective nla_policy array. + * + * GENL_struct(tag_name, tag_number, struct name, struct fields) - struct and policy + * genl_magic_struct.h + * generates the struct declaration, + * generates an entry in the tla enum, + * genl_magic_func.h + * generates an entry in the static tla policy + * with .type = NLA_NESTED + * generates the static _nl_policy definition, + * and static conversion functions + * + * genl_magic_func.h + * + * GENL_mc_group(group) + * genl_magic_struct.h + * does nothing + * genl_magic_func.h + * defines and registers the mcast group, + * and provides a send helper + * + * GENL_notification(op_name, op_num, mcast_group, tla list) + * These are notifications to userspace. + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * does nothing + * + * mcast group: the name of the mcast group this notification should be + * expected on + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + * + * GENL_op(op_name, op_num, flags and handler, tla list) - "genl operations" + * These are requests from userspace. + * + * _op and _notification share the same "number space", + * op_nr will be assigned to "genlmsghdr->cmd" + * + * genl_magic_struct.h + * generates an entry in the genl_ops enum, + * genl_magic_func.h + * generates an entry in the static genl_ops array, + * and static register/unregister functions to + * genl_register_family_with_ops(). + * + * flags and handler: + * GENL_op_init( .doit = x, .dumpit = y, .flags = something) + * GENL_doit(x) => .dumpit = NULL, .flags = GENL_ADMIN_PERM + * tla list: the list of expected top level attributes, + * for documentation and sanity checking. + */ + +/* + * STRUCTS + */ + +/* this is sent kernel -> userland on various error conditions, and contains + * informational textual info, which is supposedly human readable. + * The computer relevant return code is in the drbd_genlmsghdr. + */ +GENL_struct(DRBD_NLA_CFG_REPLY, 1, drbd_cfg_reply, + /* "arbitrary" size strings, nla_policy.len = 0 */ + __str_field(1, GENLA_F_MANDATORY, info_text, 0) +) + +/* Configuration requests typically need a context to operate on. + * Possible keys are device minor (fits in the drbd_genlmsghdr), + * the replication link (aka connection) name, + * and/or the replication group (aka resource) name, + * and the volume id within the resource. */ +GENL_struct(DRBD_NLA_CFG_CONTEXT, 2, drbd_cfg_context, + /* currently only 256 volumes per group, + * but maybe we still change that */ + __u32_field(1, GENLA_F_MANDATORY, ctx_volume) + __str_field(2, GENLA_F_MANDATORY, ctx_conn_name, 128) +) + +GENL_struct(DRBD_NLA_DISK_CONF, 3, disk_conf, + __u64_field(1, GENLA_F_MANDATORY, disk_size) + __str_field(2, GENLA_F_REQUIRED, backing_dev, 128) + __str_field(3, GENLA_F_REQUIRED, meta_dev, 128) + __u32_field(4, GENLA_F_REQUIRED, meta_dev_idx) + __u32_field(5, GENLA_F_MANDATORY, max_bio_bvecs) + __u32_field(6, GENLA_F_MANDATORY, on_io_error) + __u32_field(7, GENLA_F_MANDATORY, fencing) + __flg_field(8, GENLA_F_MANDATORY, no_disk_barrier) + __flg_field(9, GENLA_F_MANDATORY, no_disk_flush) + __flg_field(10, GENLA_F_MANDATORY, no_disk_drain) + __flg_field(11, GENLA_F_MANDATORY, no_md_flush) + __flg_field(12, GENLA_F_MANDATORY, use_bmbv) +) + +GENL_struct(DRBD_NLA_SYNCER_CONF, 4, syncer_conf, + __u32_field(1, GENLA_F_MANDATORY, rate) + __u32_field(2, GENLA_F_MANDATORY, after) + __u32_field(3, GENLA_F_MANDATORY, al_extents) + __str_field(4, GENLA_F_MANDATORY, cpu_mask, 32) + __str_field(5, GENLA_F_MANDATORY, verify_alg, SHARED_SECRET_MAX) + __str_field(6, GENLA_F_MANDATORY, csums_alg, SHARED_SECRET_MAX) + __flg_field(7, GENLA_F_MANDATORY, use_rle) + __u32_field(8, GENLA_F_MANDATORY, on_no_data) + __u32_field(9, GENLA_F_MANDATORY, c_plan_ahead) + __u32_field(10, GENLA_F_MANDATORY, c_delay_target) + __u32_field(11, GENLA_F_MANDATORY, c_fill_target) + __u32_field(12, GENLA_F_MANDATORY, c_max_rate) + __u32_field(13, GENLA_F_MANDATORY, c_min_rate) +) + +GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf, + __str_field(1, GENLA_F_MANDATORY | GENLA_F_SENSITIVE, + shared_secret, SHARED_SECRET_MAX) + __str_field(2, GENLA_F_MANDATORY, cram_hmac_alg, SHARED_SECRET_MAX) + __str_field(3, GENLA_F_MANDATORY, integrity_alg, SHARED_SECRET_MAX) + __str_field(4, GENLA_F_REQUIRED, my_addr, 128) + __str_field(5, GENLA_F_REQUIRED, peer_addr, 128) + __u32_field(6, GENLA_F_REQUIRED, wire_protocol) + __u32_field(7, GENLA_F_MANDATORY, try_connect_int) + __u32_field(8, GENLA_F_MANDATORY, timeout) + __u32_field(9, GENLA_F_MANDATORY, ping_int) + __u32_field(10, GENLA_F_MANDATORY, ping_timeo) + __u32_field(11, GENLA_F_MANDATORY, sndbuf_size) + __u32_field(12, GENLA_F_MANDATORY, rcvbuf_size) + __u32_field(13, GENLA_F_MANDATORY, ko_count) + __u32_field(14, GENLA_F_MANDATORY, max_buffers) + __u32_field(15, GENLA_F_MANDATORY, max_epoch_size) + __u32_field(16, GENLA_F_MANDATORY, unplug_watermark) + __u32_field(17, GENLA_F_MANDATORY, after_sb_0p) + __u32_field(18, GENLA_F_MANDATORY, after_sb_1p) + __u32_field(19, GENLA_F_MANDATORY, after_sb_2p) + __u32_field(20, GENLA_F_MANDATORY, rr_conflict) + __u32_field(21, GENLA_F_MANDATORY, on_congestion) + __u32_field(22, GENLA_F_MANDATORY, cong_fill) + __u32_field(23, GENLA_F_MANDATORY, cong_extents) + __flg_field(24, GENLA_F_MANDATORY, two_primaries) + __flg_field(25, GENLA_F_MANDATORY, want_lose) + __flg_field(26, GENLA_F_MANDATORY, no_cork) + __flg_field(27, GENLA_F_MANDATORY, always_asbp) + __flg_field(28, GENLA_F_MANDATORY, dry_run) +) + +GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, + __flg_field(1, GENLA_F_MANDATORY, assume_uptodate) +) + +GENL_struct(DRBD_NLA_RESIZE_PARMS, 7, resize_parms, + __u64_field(1, GENLA_F_MANDATORY, resize_size) + __flg_field(2, GENLA_F_MANDATORY, resize_force) + __flg_field(3, GENLA_F_MANDATORY, no_resync) +) + +GENL_struct(DRBD_NLA_STATE_INFO, 8, state_info, + /* the reason of the broadcast, + * if this is an event triggered broadcast. */ + __u32_field(1, GENLA_F_MANDATORY, sib_reason) + __u32_field(2, GENLA_F_REQUIRED, current_state) + __u64_field(3, GENLA_F_MANDATORY, capacity) + __u64_field(4, GENLA_F_MANDATORY, ed_uuid) + + /* These are for broadcast from after state change work. + * prev_state and new_state are from the moment the state change took + * place, new_state is not neccessarily the same as current_state, + * there may have been more state changes since. Which will be + * broadcasted soon, in their respective after state change work. */ + __u32_field(5, GENLA_F_MANDATORY, prev_state) + __u32_field(6, GENLA_F_MANDATORY, new_state) + + /* if we have a local disk: */ + __bin_field(7, GENLA_F_MANDATORY, uuids, (UI_SIZE*sizeof(__u64))) + __u32_field(8, GENLA_F_MANDATORY, disk_flags) + __u64_field(9, GENLA_F_MANDATORY, bits_total) + __u64_field(10, GENLA_F_MANDATORY, bits_oos) + /* and in case resync or online verify is active */ + __u64_field(11, GENLA_F_MANDATORY, bits_rs_total) + __u64_field(12, GENLA_F_MANDATORY, bits_rs_failed) + + /* for pre and post notifications of helper execution */ + __str_field(13, GENLA_F_MANDATORY, helper, 32) + __u32_field(14, GENLA_F_MANDATORY, helper_exit_code) +) + +GENL_struct(DRBD_NLA_START_OV_PARMS, 9, start_ov_parms, + __u64_field(1, GENLA_F_MANDATORY, ov_start_sector) +) + +GENL_struct(DRBD_NLA_NEW_C_UUID_PARMS, 10, new_c_uuid_parms, + __flg_field(1, GENLA_F_MANDATORY, clear_bm) +) + +GENL_struct(DRBD_NLA_TIMEOUT_PARMS, 11, timeout_parms, + __u32_field(1, GENLA_F_REQUIRED, timeout_type) +) + +GENL_struct(DRBD_NLA_DISCONNECT_PARMS, 12, disconnect_parms, + __flg_field(1, GENLA_F_MANDATORY, force_disconnect) +) + +/* + * Notifications and commands (genlmsghdr->cmd) + */ +GENL_mc_group(events) + + /* kernel -> userspace announcement of changes */ +GENL_notification( + DRBD_EVENT, 1, events, + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_STATE_INFO, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_MANDATORY) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + + /* query kernel for specific or all info */ +GENL_op( + DRBD_ADM_GET_STATUS, 2, + GENL_op_init( + .doit = drbd_adm_get_status, + .dumpit = drbd_adm_get_status_all, + /* anyone may ask for the status, + * it is broadcasted anyways */ + ), + /* To select the object .doit. + * Or a subset of objects in .dumpit. */ + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_MANDATORY) +) + +#if 0 + /* TO BE DONE */ + /* create or destroy resources, aka replication groups */ +GENL_op(DRBD_ADM_CREATE_RESOURCE, 3, GENL_doit(drbd_adm_create_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DELETE_RESOURCE, 4, GENL_doit(drbd_adm_delete_resource), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +#endif + + /* add DRBD minor devices as volumes to resources */ +GENL_op(DRBD_ADM_ADD_MINOR, 5, GENL_doit(drbd_adm_add_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_MINOR, 6, GENL_doit(drbd_adm_delete_minor), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* add or delete replication links to resources */ +GENL_op(DRBD_ADM_ADD_LINK, 7, GENL_doit(drbd_adm_create_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_DEL_LINK, 8, GENL_doit(drbd_adm_delete_connection), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on replication links */ +GENL_op(DRBD_ADM_SYNCER, 9, + GENL_doit(drbd_adm_syncer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SYNCER_CONF, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_CONNECT, 10, + GENL_doit(drbd_adm_connect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NET_CONF, GENLA_F_REQUIRED) +) + +GENL_op(DRBD_ADM_DISCONNECT, 11, GENL_doit(drbd_adm_disconnect), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) + + /* operates on minors */ +GENL_op(DRBD_ADM_ATTACH, 12, + GENL_doit(drbd_adm_attach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_DISK_CONF, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_RESIZE, 13, + GENL_doit(drbd_adm_resize), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_RESIZE_PARMS, GENLA_F_MANDATORY) +) + + /* operates on all volumes within a resource */ +GENL_op( + DRBD_ADM_PRIMARY, 14, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_SECONDARY, 15, + GENL_doit(drbd_adm_set_role), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_SET_ROLE_PARMS, GENLA_F_REQUIRED) +) + +GENL_op( + DRBD_ADM_NEW_C_UUID, 16, + GENL_doit(drbd_adm_new_c_uuid), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED) + GENL_tla_expected(DRBD_NLA_NEW_C_UUID_PARMS, GENLA_F_MANDATORY) +) + +GENL_op( + DRBD_ADM_START_OV, 17, + GENL_doit(drbd_adm_start_ov), + GENL_tla_expected(DRBD_NLA_START_OV_PARMS, GENLA_F_MANDATORY) +) + +GENL_op(DRBD_ADM_DETACH, 18, GENL_doit(drbd_adm_detach), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVALIDATE, 19, GENL_doit(drbd_adm_invalidate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_INVAL_PEER, 20, GENL_doit(drbd_adm_invalidate_peer), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_PAUSE_SYNC, 21, GENL_doit(drbd_adm_pause_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_SYNC, 22, GENL_doit(drbd_adm_resume_sync), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_SUSPEND_IO, 23, GENL_doit(drbd_adm_suspend_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_RESUME_IO, 24, GENL_doit(drbd_adm_resume_io), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_OUTDATE, 25, GENL_doit(drbd_adm_outdate), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) +GENL_op(DRBD_ADM_GET_TIMEOUT_TYPE, 26, GENL_doit(drbd_adm_get_timeout_type), + GENL_tla_expected(DRBD_NLA_CFG_CONTEXT, GENLA_F_REQUIRED)) diff --git a/include/linux/drbd_genl_api.h b/include/linux/drbd_genl_api.h new file mode 100644 index 000000000000..9ef50d51e34e --- /dev/null +++ b/include/linux/drbd_genl_api.h @@ -0,0 +1,55 @@ +#ifndef DRBD_GENL_STRUCT_H +#define DRBD_GENL_STRUCT_H + +/** + * struct drbd_genlmsghdr - DRBD specific header used in NETLINK_GENERIC requests + * @minor: + * For admin requests (user -> kernel): which minor device to operate on. + * For (unicast) replies or informational (broadcast) messages + * (kernel -> user): which minor device the information is about. + * If we do not operate on minors, but on connections or resources, + * the minor value shall be (~0), and the attribute DRBD_NLA_CFG_CONTEXT + * is used instead. + * @flags: possible operation modifiers (relevant only for user->kernel): + * DRBD_GENL_F_SET_DEFAULTS + * @volume: + * When creating a new minor (adding it to a resource), the resource needs + * to know which volume number within the resource this is supposed to be. + * The volume number corresponds to the same volume number on the remote side, + * whereas the minor number on the remote side may be different + * (union with flags). + * @ret_code: kernel->userland unicast cfg reply return code (union with flags); + */ +struct drbd_genlmsghdr { + __u32 minor; + union { + __u32 flags; + __s32 ret_code; + }; +}; + +/* To be used in drbd_genlmsghdr.flags */ +enum { + DRBD_GENL_F_SET_DEFAULTS = 1, +}; + +enum drbd_state_info_bcast_reason { + SIB_GET_STATUS_REPLY = 1, + SIB_STATE_CHANGE = 2, + SIB_HELPER_PRE = 3, + SIB_HELPER_POST = 4, + SIB_SYNC_PROGRESS = 5, +}; + +/* hack around predefined gcc/cpp "linux=1", + * we cannot possibly include <1/drbd_genl.h> */ +#undef linux + +#include +#define GENL_MAGIC_VERSION API_VERSION +#define GENL_MAGIC_FAMILY drbd +#define GENL_MAGIC_FAMILY_HDRSZ sizeof(struct drbd_genlmsghdr) +#define GENL_MAGIC_INCLUDE_FILE +#include + +#endif diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h new file mode 100644 index 000000000000..8a86f659d363 --- /dev/null +++ b/include/linux/genl_magic_func.h @@ -0,0 +1,417 @@ +#ifndef GENL_MAGIC_FUNC_H +#define GENL_MAGIC_FUNC_H + +#include + +/* + * Extension of genl attribute validation policies {{{1 + * {{{2 + */ + +/** + * nla_is_required - return true if this attribute is required + * @nla: netlink attribute + */ +static inline int nla_is_required(const struct nlattr *nla) +{ + return nla->nla_type & GENLA_F_REQUIRED; +} + +/** + * nla_is_mandatory - return true if understanding this attribute is mandatory + * @nla: netlink attribute + * Note: REQUIRED attributes are implicitly MANDATORY as well + */ +static inline int nla_is_mandatory(const struct nlattr *nla) +{ + return nla->nla_type & (GENLA_F_MANDATORY | GENLA_F_REQUIRED); +} + +/* Functionality to be integrated into nla_parse(), and validate_nla(), + * respectively. + * + * Enforcing the "mandatory" bit is done here, + * by rejecting unknown mandatory attributes. + * + * Part of enforcing the "required" flag would mean to embed it into + * nla_policy.type, and extending validate_nla(), which currently does + * BUG_ON(pt->type > NLA_TYPE_MAX); we have to work on existing kernels, + * so we cannot do that. Thats why enforcing "required" is done in the + * generated assignment functions below. */ +static int nla_check_unknown(int maxtype, struct nlattr *head, int len) +{ + struct nlattr *nla; + int rem; + nla_for_each_attr(nla, head, len, rem) { + __u16 type = nla_type(nla); + if (type > maxtype && nla_is_mandatory(nla)) + return -EOPNOTSUPP; + } + return 0; +} + +/* + * Magic: declare tla policy {{{1 + * Magic: declare nested policies + * {{{2 + */ +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + [tag_name] = { .type = NLA_NESTED }, + +static struct nla_policy CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy)[] = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static struct nla_policy s_name ## _nl_policy[] __read_mostly = \ +{ s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, _type, __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type }, + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, _type, maxlen, \ + __get, __put) \ + [__nla_type(attr_nr)] = { .type = nla_type, \ + .len = maxlen - (nla_type == NLA_NUL_STRING) }, + +#include GENL_MAGIC_INCLUDE_FILE + +#ifndef __KERNEL__ +#ifndef pr_info +#define pr_info(args...) fprintf(stderr, args); +#endif +#endif + +#if 1 +static void dprint_field(const char *dir, int nla_type, + const char *name, void *valp) +{ + __u64 val = valp ? *(__u32 *)valp : 1; + switch (nla_type) { + case NLA_U8: val = (__u8)val; + case NLA_U16: val = (__u16)val; + case NLA_U32: val = (__u32)val; + pr_info("%s attr %s: %d 0x%08x\n", dir, + name, (int)val, (unsigned)val); + break; + case NLA_U64: + val = *(__u64*)valp; + pr_info("%s attr %s: %lld 0x%08llx\n", dir, + name, (long long)val, (unsigned long long)val); + break; + case NLA_FLAG: + if (val) + pr_info("%s attr %s: set\n", dir, name); + break; + } +} + +static void dprint_array(const char *dir, int nla_type, + const char *name, const char *val, unsigned len) +{ + switch (nla_type) { + case NLA_NUL_STRING: + if (len && val[len-1] == '\0') + len--; + pr_info("%s attr %s: [len:%u] '%s'\n", dir, name, len, val); + break; + default: + /* we can always show 4 byte, + * thats what nlattr are aligned to. */ + pr_info("%s attr %s: [len:%u] %02x%02x%02x%02x ...\n", + dir, name, len, val[0], val[1], val[2], val[3]); + } +} + +#define DPRINT_TLA(a, op, b) pr_info("%s %s %s\n", a, op, b); + +/* Name is a member field name of the struct s. + * If s is NULL (only parsing, no copy requested in *_from_attrs()), + * nla is supposed to point to the attribute containing the information + * corresponding to that struct member. */ +#define DPRINT_FIELD(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_field(dir, nla_type, #name, &s->name); \ + else if (nla) \ + dprint_field(dir, nla_type, #name, \ + (nla_type == NLA_FLAG) ? NULL \ + : nla_data(nla)); \ + } while (0) + +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) \ + do { \ + if (s) \ + dprint_array(dir, nla_type, #name, \ + s->name, s->name ## _len); \ + else if (nla) \ + dprint_array(dir, nla_type, #name, \ + nla_data(nla), nla_len(nla)); \ + } while (0) +#else +#define DPRINT_TLA(a, op, b) do {} while (0) +#define DPRINT_FIELD(dir, nla_type, name, s, nla) do {} while (0) +#define DPRINT_ARRAY(dir, nla_type, name, s, nla) do {} while (0) +#endif + +/* + * Magic: provide conversion functions {{{1 + * populate struct from attribute table: + * {{{2 + */ + +/* processing of generic netlink messages is serialized. + * use one static buffer for parsing of nested attributes */ +static struct nlattr *nested_attr_tb[128]; + +#ifndef BUILD_BUG_ON +/* Force a compilation error if condition is true */ +#define BUILD_BUG_ON(condition) ((void)BUILD_BUG_ON_ZERO(condition)) +/* Force a compilation error if condition is true, but also produce a + result (of value 0 and type size_t), so the expression can be used + e.g. in a structure initializer (or where-ever else comma expressions + aren't permitted). */ +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) +#endif + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + /* static, potentially unused */ \ +int s_name ## _from_attrs(struct s_name *s, struct nlattr *tb[]) \ +{ \ + const int maxtype = ARRAY_SIZE(s_name ## _nl_policy)-1; \ + struct nlattr *tla = tb[tag_number]; \ + struct nlattr **ntb = nested_attr_tb; \ + struct nlattr *nla; \ + int err; \ + BUILD_BUG_ON(ARRAY_SIZE(s_name ## _nl_policy) > ARRAY_SIZE(nested_attr_tb)); \ + if (!tla) \ + return -ENOMSG; \ + DPRINT_TLA(#s_name, "<=-", #tag_name); \ + err = nla_parse_nested(ntb, maxtype, tla, s_name ## _nl_policy); \ + if (err) \ + return err; \ + err = nla_check_unknown(maxtype, nla_data(tla), nla_len(tla)); \ + if (err) \ + return err; \ + \ + s_fields \ + return 0; \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name = __get(nla); \ + DPRINT_FIELD("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } + +/* validate_nla() already checked nla_len <= maxlen appropriately. */ +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + nla = ntb[__nla_type(attr_nr)]; \ + if (nla) { \ + if (s) \ + s->name ## _len = \ + __get(s->name, nla, maxlen); \ + DPRINT_ARRAY("<<", nla_type, name, s, nla); \ + } else if ((attr_flag) & GENLA_F_REQUIRED) { \ + pr_info("<< missing attr: %s\n", #name); \ + return -ENOMSG; \ + } \ + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +/* + * Magic: define op number to op name mapping {{{1 + * {{{2 + */ +const char *CONCAT_(GENL_MAGIC_FAMILY, _genl_cmd_to_str)(__u8 cmd) +{ + switch (cmd) { +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + case op_num: return #op_name; +#include GENL_MAGIC_INCLUDE_FILE + default: + return "unknown"; + } +} + +#ifdef __KERNEL__ +#include +/* + * Magic: define genl_ops {{{1 + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ +{ \ + handler \ + .cmd = op_name, \ + .policy = CONCAT_(GENL_MAGIC_FAMILY, _tla_nl_policy), \ +}, + +#define ZZZ_genl_ops CONCAT_(GENL_MAGIC_FAMILY, _genl_ops) +static struct genl_ops ZZZ_genl_ops[] __read_mostly = { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +/* + * Define the genl_family, multicast groups, {{{1 + * and provide register/unregister functions. + * {{{2 + */ +#define ZZZ_genl_family CONCAT_(GENL_MAGIC_FAMILY, _genl_family) +static struct genl_family ZZZ_genl_family __read_mostly = { + .id = GENL_ID_GENERATE, + .name = __stringify(GENL_MAGIC_FAMILY), + .version = GENL_MAGIC_VERSION, +#ifdef GENL_MAGIC_FAMILY_HDRSZ + .hdrsize = NLA_ALIGN(GENL_MAGIC_FAMILY_HDRSZ), +#endif + .maxattr = ARRAY_SIZE(drbd_tla_nl_policy)-1, +}; + +/* + * Magic: define multicast groups + * Magic: define multicast group registration helper + */ +#undef GENL_mc_group +#define GENL_mc_group(group) \ +static struct genl_multicast_group \ +CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group) __read_mostly = { \ + .name = #group, \ +}; \ +static int CONCAT_(GENL_MAGIC_FAMILY, _genl_multicast_ ## group)( \ + struct sk_buff *skb, gfp_t flags) \ +{ \ + unsigned int group_id = \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id; \ + if (!group_id) \ + return -EINVAL; \ + return genlmsg_multicast(skb, 0, group_id, flags); \ +} + +#include GENL_MAGIC_INCLUDE_FILE + +int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void) +{ + int err = genl_register_family_with_ops(&ZZZ_genl_family, + ZZZ_genl_ops, ARRAY_SIZE(ZZZ_genl_ops)); + if (err) + return err; +#undef GENL_mc_group +#define GENL_mc_group(group) \ + err = genl_register_mc_group(&ZZZ_genl_family, \ + &CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group)); \ + if (err) \ + goto fail; \ + else \ + pr_info("%s: mcg %s: %u\n", #group, \ + __stringify(GENL_MAGIC_FAMILY), \ + CONCAT_(GENL_MAGIC_FAMILY, _mcg_ ## group).id); + +#include GENL_MAGIC_INCLUDE_FILE + +#undef GENL_mc_group +#define GENL_mc_group(group) + return 0; +fail: + genl_unregister_family(&ZZZ_genl_family); + return err; +} + +void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void) +{ + genl_unregister_family(&ZZZ_genl_family); +} + +/* + * Magic: provide conversion functions {{{1 + * populate skb from struct. + * {{{2 + */ + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static int s_name ## _to_skb(struct sk_buff *skb, struct s_name *s, \ + const bool exclude_sensitive) \ +{ \ + struct nlattr *tla = nla_nest_start(skb, tag_number); \ + if (!tla) \ + goto nla_put_failure; \ + DPRINT_TLA(#s_name, "-=>", #tag_name); \ + s_fields \ + nla_nest_end(skb, tla); \ + return 0; \ + \ +nla_put_failure: \ + if (tla) \ + nla_nest_cancel(skb, tla); \ + return -EMSGSIZE; \ +} \ +static inline int s_name ## _to_priv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 0); \ +} \ +static inline int s_name ## _to_unpriv_skb(struct sk_buff *skb, \ + struct s_name *s) \ +{ \ + return s_name ## _to_skb(skb, s, 1); \ +} + + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_FIELD(">>", nla_type, name, s, NULL); \ + __put(skb, attr_nr, s->name); \ + } + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + if (!exclude_sensitive || !((attr_flag) & GENLA_F_SENSITIVE)) { \ + DPRINT_ARRAY(">>",nla_type, name, s, NULL); \ + __put(skb, attr_nr, min_t(int, maxlen, \ + s->name ## _len + (nla_type == NLA_NUL_STRING)),\ + s->name); \ + } + +#include GENL_MAGIC_INCLUDE_FILE + +#endif /* __KERNEL__ */ + +/* }}}1 */ +#endif /* GENL_MAGIC_FUNC_H */ +/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */ diff --git a/include/linux/genl_magic_struct.h b/include/linux/genl_magic_struct.h new file mode 100644 index 000000000000..745ebfd6c7e5 --- /dev/null +++ b/include/linux/genl_magic_struct.h @@ -0,0 +1,260 @@ +#ifndef GENL_MAGIC_STRUCT_H +#define GENL_MAGIC_STRUCT_H + +#ifndef GENL_MAGIC_FAMILY +# error "you need to define GENL_MAGIC_FAMILY before inclusion" +#endif + +#ifndef GENL_MAGIC_VERSION +# error "you need to define GENL_MAGIC_VERSION before inclusion" +#endif + +#ifndef GENL_MAGIC_INCLUDE_FILE +# error "you need to define GENL_MAGIC_INCLUDE_FILE before inclusion" +#endif + +#include +#include + +#define CONCAT__(a,b) a ## b +#define CONCAT_(a,b) CONCAT__(a,b) + +extern int CONCAT_(GENL_MAGIC_FAMILY, _genl_register)(void); +extern void CONCAT_(GENL_MAGIC_FAMILY, _genl_unregister)(void); + +/* + * Extension of genl attribute validation policies {{{2 + */ + +/** + * GENLA_F_FLAGS - policy type flags to ease compatible ABI evolvement + * + * @GENLA_F_REQUIRED: attribute has to be present, or message is considered invalid. + * Adding new REQUIRED attributes breaks ABI compatibility, so don't do that. + * + * @GENLA_F_MANDATORY: if present, receiver _must_ understand it. + * Without this, unknown attributes (> maxtype) are _silently_ ignored + * by validate_nla(). + * + * To be used for API extensions, so older kernel can reject requests for not + * yet implemented features, if newer userland tries to use them even though + * the genl_family version clearly indicates they are not available. + * + * @GENLA_F_MAY_IGNORE: To clearly document the fact, for good measure. + * To be used for API extensions for things that have sane defaults, + * so newer userland can still talk to older kernel, knowing it will + * silently ignore these attributes if not yet known. + * + * NOTE: These flags overload + * NLA_F_NESTED (1 << 15) + * NLA_F_NET_BYTEORDER (1 << 14) + * from linux/netlink.h, which are not useful for validate_nla(): + * NET_BYTEORDER is not used anywhere, and NESTED would be specified by setting + * .type = NLA_NESTED in the appropriate policy. + * + * See also: nla_type() + */ +enum { + GENLA_F_MAY_IGNORE = 0, + GENLA_F_MANDATORY = 1 << 14, + GENLA_F_REQUIRED = 1 << 15, + + /* This will not be present in the __u16 .nla_type, but can be + * triggered on in _to_skb, to exclude "sensitive" + * information from broadcasts, or on unpriviledged get requests. + * This is useful because genetlink multicast groups can be listened in + * on by anyone. */ + GENLA_F_SENSITIVE = 1 << 16, +}; + +#define __nla_type(x) ((__u16)((__u16)(x) & (__u16)NLA_TYPE_MASK)) + +/* }}}1 + * MAGIC + * multi-include macro expansion magic starts here + */ + +/* MAGIC helpers {{{2 */ + +/* possible field types */ +#define __flg_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_FLAG, char, \ + nla_get_flag, __nla_put_flag) +#define __u8_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U8, unsigned char, \ + nla_get_u8, NLA_PUT_U8) +#define __u16_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U16, __u16, \ + nla_get_u16, NLA_PUT_U16) +#define __u32_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U32, __u32, \ + nla_get_u32, NLA_PUT_U32) +#define __u64_field(attr_nr, attr_flag, name) \ + __field(attr_nr, attr_flag, name, NLA_U64, __u64, \ + nla_get_u64, NLA_PUT_U64) +#define __str_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_NUL_STRING, char, maxlen, \ + nla_strlcpy, NLA_PUT) +#define __bin_field(attr_nr, attr_flag, name, maxlen) \ + __array(attr_nr, attr_flag, name, NLA_BINARY, char, maxlen, \ + nla_memcpy, NLA_PUT) + +#define __nla_put_flag(skb, attrtype, value) \ + do { \ + if (value) \ + NLA_PUT_FLAG(skb, attrtype); \ + } while (0) + +#define GENL_op_init(args...) args +#define GENL_doit(handler) \ + .doit = handler, \ + .flags = GENL_ADMIN_PERM, +#define GENL_dumpit(handler) \ + .dumpit = handler, \ + .flags = GENL_ADMIN_PERM, + +/* }}}1 + * Magic: define the enum symbols for genl_ops + * Magic: define the enum symbols for top level attributes + * Magic: define the enum symbols for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_mc_group +#define GENL_mc_group(group) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + op_name = op_num, + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, tla_list) \ + op_name = op_num, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + tag_name = tag_number, + +enum { +#include GENL_MAGIC_INCLUDE_FILE +}; + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +enum { \ + s_fields \ +}; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + T_ ## name = (__u16)(attr_nr | attr_flag), + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: compile time assert unique numbers for operations + * Magic: -"- unique numbers for top level attributes + * Magic: -"- unique numbers for nested attributes + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) \ + case op_name: + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) \ + case op_name: + +static inline void ct_assert_unique_operations(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_op +#define GENL_op(op_name, op_num, handler, attr_list) + +#undef GENL_notification +#define GENL_notification(op_name, op_num, mcast_group, tla_list) + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ + case tag_number: + +static inline void ct_assert_unique_top_level_attributes(void) +{ + switch (0) { +#include GENL_MAGIC_INCLUDE_FILE + ; + } +} + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +static inline void ct_assert_unique_ ## s_name ## _attributes(void) \ +{ \ + switch (0) { \ + s_fields \ + ; \ + } \ +} + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + case attr_nr: + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + case attr_nr: + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 + * Magic: declare structs + * struct { + * fields + * }; + * {{{2 + */ + +#undef GENL_struct +#define GENL_struct(tag_name, tag_number, s_name, s_fields) \ +struct s_name { s_fields }; + +#undef __field +#define __field(attr_nr, attr_flag, name, nla_type, type, __get, __put) \ + type name; + +#undef __array +#define __array(attr_nr, attr_flag, name, nla_type, type, maxlen, __get, __put) \ + type name[maxlen]; \ + __u32 name ## _len; + +#include GENL_MAGIC_INCLUDE_FILE + +/* }}}1 */ +#endif /* GENL_MAGIC_STRUCT_H */ +/* vim: set foldmethod=marker nofoldenable : */ -- cgit v1.2.3 From d4f65b5d2497b2fd9c45f06b71deb4ab084a5b66 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 13 Sep 2012 13:06:29 +0100 Subject: KEYS: Add payload preparsing opportunity prior to key instantiate or update Give the key type the opportunity to preparse the payload prior to the instantiation and update routines being called. This is done with the provision of two new key type operations: int (*preparse)(struct key_preparsed_payload *prep); void (*free_preparse)(struct key_preparsed_payload *prep); If the first operation is present, then it is called before key creation (in the add/update case) or before the key semaphore is taken (in the update and instantiate cases). The second operation is called to clean up if the first was called. preparse() is given the opportunity to fill in the following structure: struct key_preparsed_payload { char *description; void *type_data[2]; void *payload; const void *data; size_t datalen; size_t quotalen; }; Before the preparser is called, the first three fields will have been cleared, the payload pointer and size will be stored in data and datalen and the default quota size from the key_type struct will be stored into quotalen. The preparser may parse the payload in any way it likes and may store data in the type_data[] and payload fields for use by the instantiate() and update() ops. The preparser may also propose a description for the key by attaching it as a string to the description field. This can be used by passing a NULL or "" description to the add_key() system call or the key_create_or_update() function. This cannot work with request_key() as that required the description to tell the upcall about the key to be created. This, for example permits keys that store PGP public keys to generate their own name from the user ID and public key fingerprint in the key. The instantiate() and update() operations are then modified to look like this: int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); int (*update)(struct key *key, struct key_preparsed_payload *prep); and the new payload data is passed in *prep, whether or not it was preparsed. Signed-off-by: David Howells --- Documentation/security/keys.txt | 50 +++++++++++++- fs/cifs/cifs_spnego.c | 6 +- fs/cifs/cifsacl.c | 8 +-- include/keys/user-type.h | 6 +- include/linux/key-type.h | 35 +++++++++- net/ceph/crypto.c | 9 +-- net/dns_resolver/dns_key.c | 6 +- net/rxrpc/ar-key.c | 40 +++++------ security/keys/encrypted-keys/encrypted.c | 16 +++-- security/keys/key.c | 114 ++++++++++++++++++++++--------- security/keys/keyctl.c | 18 +++-- security/keys/keyring.c | 6 +- security/keys/request_key_auth.c | 8 +-- security/keys/trusted.c | 16 +++-- security/keys/user_defined.c | 14 ++-- 15 files changed, 250 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys.txt b/Documentation/security/keys.txt index aa0dbd74b71b..7d9ca92022d8 100644 --- a/Documentation/security/keys.txt +++ b/Documentation/security/keys.txt @@ -412,6 +412,10 @@ The main syscalls are: to the keyring. In this case, an error will be generated if the process does not have permission to write to the keyring. + If the key type supports it, if the description is NULL or an empty + string, the key type will try and generate a description from the content + of the payload. + The payload is optional, and the pointer can be NULL if not required by the type. The payload is plen in size, and plen can be zero for an empty payload. @@ -1114,12 +1118,53 @@ The structure has a number of fields, some of which are mandatory: it should return 0. - (*) int (*instantiate)(struct key *key, const void *data, size_t datalen); + (*) int (*preparse)(struct key_preparsed_payload *prep); + + This optional method permits the key type to attempt to parse payload + before a key is created (add key) or the key semaphore is taken (update or + instantiate key). The structure pointed to by prep looks like: + + struct key_preparsed_payload { + char *description; + void *type_data[2]; + void *payload; + const void *data; + size_t datalen; + size_t quotalen; + }; + + Before calling the method, the caller will fill in data and datalen with + the payload blob parameters; quotalen will be filled in with the default + quota size from the key type and the rest will be cleared. + + If a description can be proposed from the payload contents, that should be + attached as a string to the description field. This will be used for the + key description if the caller of add_key() passes NULL or "". + + The method can attach anything it likes to type_data[] and payload. These + are merely passed along to the instantiate() or update() operations. + + The method should return 0 if success ful or a negative error code + otherwise. + + + (*) void (*free_preparse)(struct key_preparsed_payload *prep); + + This method is only required if the preparse() method is provided, + otherwise it is unused. It cleans up anything attached to the + description, type_data and payload fields of the key_preparsed_payload + struct as filled in by the preparse() method. + + + (*) int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); This method is called to attach a payload to a key during construction. The payload attached need not bear any relation to the data passed to this function. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + If the amount of data attached to the key differs from the size in keytype->def_datalen, then key_payload_reserve() should be called. @@ -1135,6 +1180,9 @@ The structure has a number of fields, some of which are mandatory: If this type of key can be updated, then this method should be provided. It is called to update a key's payload from the blob of data provided. + The prep->data and prep->datalen fields will define the original payload + blob. If preparse() was supplied then other fields may be filled in also. + key_payload_reserve() should be called if the data length might change before any changes are actually made. Note that if this succeeds, the type is committed to changing the key because it's already been altered, so all diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index e622863b292f..086f381d6489 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -31,18 +31,18 @@ /* create a new cifs key */ static int -cifs_spnego_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; int ret; ret = -ENOMEM; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) goto error; /* attach the data */ - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; ret = 0; diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 05f4dc263a23..f3c60e264ca8 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -167,17 +167,17 @@ static struct shrinker cifs_shrinker = { }; static int -cifs_idmap_key_instantiate(struct key *key, const void *data, size_t datalen) +cifs_idmap_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { char *payload; - payload = kmalloc(datalen, GFP_KERNEL); + payload = kmalloc(prep->datalen, GFP_KERNEL); if (!payload) return -ENOMEM; - memcpy(payload, data, datalen); + memcpy(payload, prep->data, prep->datalen); key->payload.data = payload; - key->datalen = datalen; + key->datalen = prep->datalen; return 0; } diff --git a/include/keys/user-type.h b/include/keys/user-type.h index bc9ec1d7698c..5e452c84f1e6 100644 --- a/include/keys/user-type.h +++ b/include/keys/user-type.h @@ -35,8 +35,10 @@ struct user_key_payload { extern struct key_type key_type_user; extern struct key_type key_type_logon; -extern int user_instantiate(struct key *key, const void *data, size_t datalen); -extern int user_update(struct key *key, const void *data, size_t datalen); +struct key_preparsed_payload; + +extern int user_instantiate(struct key *key, struct key_preparsed_payload *prep); +extern int user_update(struct key *key, struct key_preparsed_payload *prep); extern int user_match(const struct key *key, const void *criterion); extern void user_revoke(struct key *key); extern void user_destroy(struct key *key); diff --git a/include/linux/key-type.h b/include/linux/key-type.h index f0c651cda7b0..518a53afb9ea 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -26,6 +26,27 @@ struct key_construction { struct key *authkey;/* authorisation for key being constructed */ }; +/* + * Pre-parsed payload, used by key add, update and instantiate. + * + * This struct will be cleared and data and datalen will be set with the data + * and length parameters from the caller and quotalen will be set from + * def_datalen from the key type. Then if the preparse() op is provided by the + * key type, that will be called. Then the struct will be passed to the + * instantiate() or the update() op. + * + * If the preparse() op is given, the free_preparse() op will be called to + * clear the contents. + */ +struct key_preparsed_payload { + char *description; /* Proposed key description (or NULL) */ + void *type_data[2]; /* Private key-type data */ + void *payload; /* Proposed payload */ + const void *data; /* Raw data */ + size_t datalen; /* Raw datalen */ + size_t quotalen; /* Quota length for proposed payload */ +}; + typedef int (*request_key_actor_t)(struct key_construction *key, const char *op, void *aux); @@ -45,18 +66,28 @@ struct key_type { /* vet a description */ int (*vet_description)(const char *description); + /* Preparse the data blob from userspace that is to be the payload, + * generating a proposed description and payload that will be handed to + * the instantiate() and update() ops. + */ + int (*preparse)(struct key_preparsed_payload *prep); + + /* Free a preparse data structure. + */ + void (*free_preparse)(struct key_preparsed_payload *prep); + /* instantiate a key of this type * - this method should call key_payload_reserve() to determine if the * user's quota will hold the payload */ - int (*instantiate)(struct key *key, const void *data, size_t datalen); + int (*instantiate)(struct key *key, struct key_preparsed_payload *prep); /* update a key of this type (optional) * - this method should call key_payload_reserve() to recalculate the * quota consumption * - the key must be locked against read when modifying */ - int (*update)(struct key *key, const void *data, size_t datalen); + int (*update)(struct key *key, struct key_preparsed_payload *prep); /* match a key against a description */ int (*match)(const struct key *key, const void *desc); diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 9da7fdd3cd8a..af14cb425164 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -423,14 +423,15 @@ int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, } } -int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) +int ceph_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct ceph_crypto_key *ckey; + size_t datalen = prep->datalen; int ret; void *p; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto err; ret = key_payload_reserve(key, datalen); @@ -443,8 +444,8 @@ int ceph_key_instantiate(struct key *key, const void *data, size_t datalen) goto err; /* TODO ceph_crypto_key_decode should really take const input */ - p = (void *)data; - ret = ceph_crypto_key_decode(ckey, &p, (char*)data+datalen); + p = (void *)prep->data; + ret = ceph_crypto_key_decode(ckey, &p, (char*)prep->data+datalen); if (ret < 0) goto err_ckey; diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index d9507dd05818..859ab8b6ec34 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -59,13 +59,13 @@ const struct cred *dns_resolver_cache; * "ip1,ip2,...#foo=bar" */ static int -dns_resolver_instantiate(struct key *key, const void *_data, size_t datalen) +dns_resolver_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; unsigned long derrno; int ret; - size_t result_len = 0; - const char *data = _data, *end, *opt; + size_t datalen = prep->datalen, result_len = 0; + const char *data = prep->data, *end, *opt; kenter("%%%d,%s,'%*.*s',%zu", key->serial, key->description, diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c index 8b1f9f49960f..106c5a6b1ab2 100644 --- a/net/rxrpc/ar-key.c +++ b/net/rxrpc/ar-key.c @@ -26,8 +26,8 @@ #include "ar-internal.h" static int rxrpc_vet_description_s(const char *); -static int rxrpc_instantiate(struct key *, const void *, size_t); -static int rxrpc_instantiate_s(struct key *, const void *, size_t); +static int rxrpc_instantiate(struct key *, struct key_preparsed_payload *); +static int rxrpc_instantiate_s(struct key *, struct key_preparsed_payload *); static void rxrpc_destroy(struct key *); static void rxrpc_destroy_s(struct key *); static void rxrpc_describe(const struct key *, struct seq_file *); @@ -678,7 +678,7 @@ error: * * if no data is provided, then a no-security key is made */ -static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) +static int rxrpc_instantiate(struct key *key, struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; @@ -686,26 +686,26 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) u32 kver; int ret; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); /* handle a no-security key */ - if (!data && datalen == 0) + if (!prep->data && prep->datalen == 0) return 0; /* determine if the XDR payload format is being used */ - if (datalen > 7 * 4) { - ret = rxrpc_instantiate_xdr(key, data, datalen); + if (prep->datalen > 7 * 4) { + ret = rxrpc_instantiate_xdr(key, prep->data, prep->datalen); if (ret != -EPROTO) return ret; } /* get the key interface version number */ ret = -EINVAL; - if (datalen <= 4 || !data) + if (prep->datalen <= 4 || !prep->data) goto error; - memcpy(&kver, data, sizeof(kver)); - data += sizeof(kver); - datalen -= sizeof(kver); + memcpy(&kver, prep->data, sizeof(kver)); + prep->data += sizeof(kver); + prep->datalen -= sizeof(kver); _debug("KEY I/F VERSION: %u", kver); @@ -715,11 +715,11 @@ static int rxrpc_instantiate(struct key *key, const void *data, size_t datalen) /* deal with a version 1 key */ ret = -EINVAL; - if (datalen < sizeof(*v1)) + if (prep->datalen < sizeof(*v1)) goto error; - v1 = data; - if (datalen != sizeof(*v1) + v1->ticket_length) + v1 = prep->data; + if (prep->datalen != sizeof(*v1) + v1->ticket_length) goto error; _debug("SCIX: %u", v1->security_index); @@ -784,17 +784,17 @@ error: * instantiate a server secret key * data should be a pointer to the 8-byte secret key */ -static int rxrpc_instantiate_s(struct key *key, const void *data, - size_t datalen) +static int rxrpc_instantiate_s(struct key *key, + struct key_preparsed_payload *prep) { struct crypto_blkcipher *ci; - _enter("{%x},,%zu", key_serial(key), datalen); + _enter("{%x},,%zu", key_serial(key), prep->datalen); - if (datalen != 8) + if (prep->datalen != 8) return -EINVAL; - memcpy(&key->type_data, data, 8); + memcpy(&key->type_data, prep->data, 8); ci = crypto_alloc_blkcipher("pcbc(des)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(ci)) { @@ -802,7 +802,7 @@ static int rxrpc_instantiate_s(struct key *key, const void *data, return PTR_ERR(ci); } - if (crypto_blkcipher_setkey(ci, data, 8) < 0) + if (crypto_blkcipher_setkey(ci, prep->data, 8) < 0) BUG(); key->payload.data = ci; diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c index 2d1bb8af7696..9e1e005c7596 100644 --- a/security/keys/encrypted-keys/encrypted.c +++ b/security/keys/encrypted-keys/encrypted.c @@ -773,8 +773,8 @@ static int encrypted_init(struct encrypted_key_payload *epayload, * * On success, return 0. Otherwise return errno. */ -static int encrypted_instantiate(struct key *key, const void *data, - size_t datalen) +static int encrypted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; @@ -782,16 +782,17 @@ static int encrypted_instantiate(struct key *key, const void *data, char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; + size_t datalen = prep->datalen; int ret; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; datablob[datalen] = 0; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, &decrypted_datalen, &hex_encoded_iv); if (ret < 0) @@ -834,16 +835,17 @@ static void encrypted_rcu_free(struct rcu_head *rcu) * * On success, return 0. Otherwise return errno. */ -static int encrypted_update(struct key *key, const void *data, size_t datalen) +static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = key->payload.data; struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; const char *format = NULL; + size_t datalen = prep->datalen; int ret = 0; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; buf = kmalloc(datalen + 1, GFP_KERNEL); @@ -851,7 +853,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) return -ENOMEM; buf[datalen] = 0; - memcpy(buf, data, datalen); + memcpy(buf, prep->data, datalen); ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL); if (ret < 0) goto out; diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..1d039af99f50 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -412,8 +412,7 @@ EXPORT_SYMBOL(key_payload_reserve); * key_construction_mutex. */ static int __key_instantiate_and_link(struct key *key, - const void *data, - size_t datalen, + struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, unsigned long *_prealloc) @@ -431,7 +430,7 @@ static int __key_instantiate_and_link(struct key *key, /* can't instantiate twice */ if (!test_bit(KEY_FLAG_INSTANTIATED, &key->flags)) { /* instantiate the key */ - ret = key->type->instantiate(key, data, datalen); + ret = key->type->instantiate(key, prep); if (ret == 0) { /* mark the key as being instantiated */ @@ -482,22 +481,37 @@ int key_instantiate_and_link(struct key *key, struct key *keyring, struct key *authkey) { + struct key_preparsed_payload prep; unsigned long prealloc; int ret; + memset(&prep, 0, sizeof(prep)); + prep.data = data; + prep.datalen = datalen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; + } + if (keyring) { ret = __key_link_begin(keyring, key->type, key->description, &prealloc); if (ret < 0) - return ret; + goto error_free_preparse; } - ret = __key_instantiate_and_link(key, data, datalen, keyring, authkey, + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &prealloc); if (keyring) __key_link_end(keyring, key->type, prealloc); +error_free_preparse: + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } @@ -706,7 +720,7 @@ void key_type_put(struct key_type *ktype) * if we get an error. */ static inline key_ref_t __key_update(key_ref_t key_ref, - const void *payload, size_t plen) + struct key_preparsed_payload *prep) { struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -722,7 +736,7 @@ static inline key_ref_t __key_update(key_ref_t key_ref, down_write(&key->sem); - ret = key->type->update(key, payload, plen); + ret = key->type->update(key, prep); if (ret == 0) /* updating a negative key instantiates it */ clear_bit(KEY_FLAG_NEGATIVE, &key->flags); @@ -774,6 +788,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, unsigned long flags) { unsigned long prealloc; + struct key_preparsed_payload prep; const struct cred *cred = current_cred(); struct key_type *ktype; struct key *keyring, *key = NULL; @@ -789,8 +804,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } key_ref = ERR_PTR(-EINVAL); - if (!ktype->match || !ktype->instantiate) - goto error_2; + if (!ktype->match || !ktype->instantiate || + (!description && !ktype->preparse)) + goto error_put_type; keyring = key_ref_to_ptr(keyring_ref); @@ -798,18 +814,37 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = ERR_PTR(-ENOTDIR); if (keyring->type != &key_type_keyring) - goto error_2; + goto error_put_type; + + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = ktype->def_datalen; + if (ktype->preparse) { + ret = ktype->preparse(&prep); + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_put_type; + } + if (!description) + description = prep.description; + key_ref = ERR_PTR(-EINVAL); + if (!description) + goto error_free_prep; + } ret = __key_link_begin(keyring, ktype, description, &prealloc); - if (ret < 0) - goto error_2; + if (ret < 0) { + key_ref = ERR_PTR(ret); + goto error_free_prep; + } /* if we're going to allocate a new key, we're going to have * to modify the keyring */ ret = key_permission(keyring_ref, KEY_WRITE); if (ret < 0) { key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } /* if it's possible to update this type of key, search for an existing @@ -840,25 +875,27 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, perm, flags); if (IS_ERR(key)) { key_ref = ERR_CAST(key); - goto error_3; + goto error_link_end; } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, payload, plen, keyring, NULL, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); - goto error_3; + goto error_link_end; } key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); - error_3: +error_link_end: __key_link_end(keyring, ktype, prealloc); - error_2: +error_free_prep: + if (ktype->preparse) + ktype->free_preparse(&prep); +error_put_type: key_type_put(ktype); - error: +error: return key_ref; found_matching_key: @@ -866,10 +903,9 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * - we can drop the locks first as we have the key pinned */ __key_link_end(keyring, ktype, prealloc); - key_type_put(ktype); - key_ref = __key_update(key_ref, payload, plen); - goto error; + key_ref = __key_update(key_ref, &prep); + goto error_free_prep; } EXPORT_SYMBOL(key_create_or_update); @@ -888,6 +924,7 @@ EXPORT_SYMBOL(key_create_or_update); */ int key_update(key_ref_t key_ref, const void *payload, size_t plen) { + struct key_preparsed_payload prep; struct key *key = key_ref_to_ptr(key_ref); int ret; @@ -900,18 +937,31 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) /* attempt to update it if supported */ ret = -EOPNOTSUPP; - if (key->type->update) { - down_write(&key->sem); - - ret = key->type->update(key, payload, plen); - if (ret == 0) - /* updating a negative key instantiates it */ - clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + if (!key->type->update) + goto error; - up_write(&key->sem); + memset(&prep, 0, sizeof(prep)); + prep.data = payload; + prep.datalen = plen; + prep.quotalen = key->type->def_datalen; + if (key->type->preparse) { + ret = key->type->preparse(&prep); + if (ret < 0) + goto error; } - error: + down_write(&key->sem); + + ret = key->type->update(key, &prep); + if (ret == 0) + /* updating a negative key instantiates it */ + clear_bit(KEY_FLAG_NEGATIVE, &key->flags); + + up_write(&key->sem); + + if (key->type->preparse) + key->type->free_preparse(&prep); +error: return ret; } EXPORT_SYMBOL(key_update); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 3364fbf46807..505d40be196c 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -46,6 +46,9 @@ static int key_get_type_from_user(char *type, * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * + * If the description is NULL or an empty string, the key type is asked to + * generate one from the payload. + * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error @@ -72,10 +75,17 @@ SYSCALL_DEFINE5(add_key, const char __user *, _type, if (ret < 0) goto error; - description = strndup_user(_description, PAGE_SIZE); - if (IS_ERR(description)) { - ret = PTR_ERR(description); - goto error; + description = NULL; + if (_description) { + description = strndup_user(_description, PAGE_SIZE); + if (IS_ERR(description)) { + ret = PTR_ERR(description); + goto error; + } + if (!*description) { + kfree(description); + description = NULL; + } } /* pull the payload in if one was supplied */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..f04d8cf81f3c 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -66,7 +66,7 @@ static inline unsigned keyring_hash(const char *desc) * operations. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen); + struct key_preparsed_payload *prep); static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); @@ -121,12 +121,12 @@ static void keyring_publish_name(struct key *keyring) * Returns 0 on success, -EINVAL if given any data. */ static int keyring_instantiate(struct key *keyring, - const void *data, size_t datalen) + struct key_preparsed_payload *prep) { int ret; ret = -EINVAL; - if (datalen == 0) { + if (prep->datalen == 0) { /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 60d4e3f5e4bb..85730d5a5a59 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -19,7 +19,8 @@ #include #include "internal.h" -static int request_key_auth_instantiate(struct key *, const void *, size_t); +static int request_key_auth_instantiate(struct key *, + struct key_preparsed_payload *); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); @@ -42,10 +43,9 @@ struct key_type key_type_request_key_auth = { * Instantiate a request-key authorisation key. */ static int request_key_auth_instantiate(struct key *key, - const void *data, - size_t datalen) + struct key_preparsed_payload *prep) { - key->payload.data = (struct request_key_auth *) data; + key->payload.data = (struct request_key_auth *)prep->data; return 0; } diff --git a/security/keys/trusted.c b/security/keys/trusted.c index 2d5d041f2049..42036c7a0856 100644 --- a/security/keys/trusted.c +++ b/security/keys/trusted.c @@ -927,22 +927,23 @@ static struct trusted_key_payload *trusted_payload_alloc(struct key *key) * * On success, return 0. Otherwise return errno. */ -static int trusted_instantiate(struct key *key, const void *data, - size_t datalen) +static int trusted_instantiate(struct key *key, + struct key_preparsed_payload *prep) { struct trusted_key_payload *payload = NULL; struct trusted_key_options *options = NULL; + size_t datalen = prep->datalen; char *datablob; int ret = 0; int key_cmd; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; options = trusted_options_alloc(); @@ -1011,17 +1012,18 @@ static void trusted_rcu_free(struct rcu_head *rcu) /* * trusted_update - reseal an existing key with new PCR values */ -static int trusted_update(struct key *key, const void *data, size_t datalen) +static int trusted_update(struct key *key, struct key_preparsed_payload *prep) { struct trusted_key_payload *p = key->payload.data; struct trusted_key_payload *new_p; struct trusted_key_options *new_o; + size_t datalen = prep->datalen; char *datablob; int ret = 0; if (!p->migratable) return -EPERM; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); @@ -1038,7 +1040,7 @@ static int trusted_update(struct key *key, const void *data, size_t datalen) goto out; } - memcpy(datablob, data, datalen); + memcpy(datablob, prep->data, datalen); datablob[datalen] = '\0'; ret = datablob_parse(datablob, new_p, new_o); if (ret != Opt_update) { diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c index c7660a25a3e4..55dc88939185 100644 --- a/security/keys/user_defined.c +++ b/security/keys/user_defined.c @@ -58,13 +58,14 @@ EXPORT_SYMBOL_GPL(key_type_logon); /* * instantiate a user defined key */ -int user_instantiate(struct key *key, const void *data, size_t datalen) +int user_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; ret = key_payload_reserve(key, datalen); @@ -78,7 +79,7 @@ int user_instantiate(struct key *key, const void *data, size_t datalen) /* attach the data */ upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); rcu_assign_keypointer(key, upayload); ret = 0; @@ -92,13 +93,14 @@ EXPORT_SYMBOL_GPL(user_instantiate); * update a user defined key * - the key's semaphore is write-locked */ -int user_update(struct key *key, const void *data, size_t datalen) +int user_update(struct key *key, struct key_preparsed_payload *prep) { struct user_key_payload *upayload, *zap; + size_t datalen = prep->datalen; int ret; ret = -EINVAL; - if (datalen <= 0 || datalen > 32767 || !data) + if (datalen <= 0 || datalen > 32767 || !prep->data) goto error; /* construct a replacement payload */ @@ -108,7 +110,7 @@ int user_update(struct key *key, const void *data, size_t datalen) goto error; upayload->datalen = datalen; - memcpy(upayload->data, data, datalen); + memcpy(upayload->data, prep->data, datalen); /* check the quota and attach the new data */ zap = upayload; -- cgit v1.2.3 From e6459606b04e6385ccd3c2060fc10f78a92c7700 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 1 Oct 2012 00:23:52 +0200 Subject: lib: Add early cpio decoder Add a simple cpio decoder without library dependencies for the purpose of extracting components from the initramfs blob for early kernel uses. Intended consumers so far are microcode and ACPI override. Signed-off-by: H. Peter Anvin Link: http://lkml.kernel.org/r/1349043837-22659-2-git-send-email-trenn@suse.de Cc: Len Brown Cc: Fenghua Yu Signed-off-by: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/earlycpio.h | 17 ++++++ lib/Makefile | 2 +- lib/earlycpio.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 1 deletion(-) create mode 100644 include/linux/earlycpio.h create mode 100644 lib/earlycpio.c (limited to 'include/linux') diff --git a/include/linux/earlycpio.h b/include/linux/earlycpio.h new file mode 100644 index 000000000000..111f46d83d00 --- /dev/null +++ b/include/linux/earlycpio.h @@ -0,0 +1,17 @@ +#ifndef _LINUX_EARLYCPIO_H +#define _LINUX_EARLYCPIO_H + +#include + +#define MAX_CPIO_FILE_NAME 18 + +struct cpio_data { + void *data; + size_t size; + char name[MAX_CPIO_FILE_NAME]; +}; + +struct cpio_data find_cpio_data(const char *path, void *data, size_t len, + long *offset); + +#endif /* _LINUX_EARLYCPIO_H */ diff --git a/lib/Makefile b/lib/Makefile index 42d283edc4d3..0924041b6959 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -12,7 +12,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ idr.o int_sqrt.o extable.o prio_tree.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ - is_single_threaded.o plist.o decompress.o + is_single_threaded.o plist.o decompress.o earlycpio.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/earlycpio.c b/lib/earlycpio.c new file mode 100644 index 000000000000..8078ef49cb79 --- /dev/null +++ b/lib/earlycpio.c @@ -0,0 +1,145 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2012 Intel Corporation; author H. Peter Anvin + * + * This file is part of the Linux kernel, and is made available + * under the terms of the GNU General Public License version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * ----------------------------------------------------------------------- */ + +/* + * earlycpio.c + * + * Find a specific cpio member; must precede any compressed content. + * This is used to locate data items in the initramfs used by the + * kernel itself during early boot (before the main initramfs is + * decompressed.) It is the responsibility of the initramfs creator + * to ensure that these items are uncompressed at the head of the + * blob. Depending on the boot loader or package tool that may be a + * separate file or part of the same file. + */ + +#include +#include +#include + +enum cpio_fields { + C_MAGIC, + C_INO, + C_MODE, + C_UID, + C_GID, + C_NLINK, + C_MTIME, + C_FILESIZE, + C_MAJ, + C_MIN, + C_RMAJ, + C_RMIN, + C_NAMESIZE, + C_CHKSUM, + C_NFIELDS +}; + +/** + * cpio_data find_cpio_data - Search for files in an uncompressed cpio + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @offset: When a matching file is found, this is the offset to the + * beginning of the cpio. It can be used to iterate through + * the cpio to find all files inside of a directory path + * + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. + */ + +struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, + size_t len, long *offset) +{ + const size_t cpio_header_len = 8*C_NFIELDS - 2; + struct cpio_data cd = { NULL, 0, "" }; + const char *p, *dptr, *nptr; + unsigned int ch[C_NFIELDS], *chp, v; + unsigned char c, x; + size_t mypathsize = strlen(path); + int i, j; + + p = data; + + while (len > cpio_header_len) { + if (!*p) { + /* All cpio headers need to be 4-byte aligned */ + p += 4; + len -= 4; + continue; + } + + j = 6; /* The magic field is only 6 characters */ + chp = ch; + for (i = C_NFIELDS; i; i--) { + v = 0; + while (j--) { + v <<= 4; + c = *p++; + + x = c - '0'; + if (x < 10) { + v += x; + continue; + } + + x = (c | 0x20) - 'a'; + if (x < 6) { + v += x + 10; + continue; + } + + goto quit; /* Invalid hexadecimal */ + } + *chp++ = v; + j = 8; /* All other fields are 8 characters */ + } + + if ((ch[C_MAGIC] - 0x070701) > 1) + goto quit; /* Invalid magic */ + + len -= cpio_header_len; + + dptr = PTR_ALIGN(p + ch[C_NAMESIZE], 4); + nptr = PTR_ALIGN(dptr + ch[C_FILESIZE], 4); + + if (nptr > p + len || dptr < p || nptr < dptr) + goto quit; /* Buffer overrun */ + + if ((ch[C_MODE] & 0170000) == 0100000 && + ch[C_NAMESIZE] >= mypathsize && + !memcmp(p, path, mypathsize)) { + *offset = (long)nptr - (long)data; + if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { + pr_warn( + "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", + p, MAX_CPIO_FILE_NAME); + } + strlcpy(cd.name, p + mypathsize, MAX_CPIO_FILE_NAME); + + cd.data = (void *)dptr; + cd.size = ch[C_FILESIZE]; + return cd; /* Found it! */ + } + len -= (nptr - p); + p = nptr; + } + +quit: + return cd; +} -- cgit v1.2.3 From 8e30524dcc0d0ac1a18a5cee482b9d9cde3cb332 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:53 +0200 Subject: x86, acpi: Introduce x86 arch specific arch_reserve_mem_area() for e820 handling This is needed for ACPI table overriding via initrd. Beside reserving memblocks, X86 also requires to flag the memory area to E820_RESERVED or E820_ACPI in the e820 mappings to be able to io(re)map it later. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-3-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 6 ++++++ include/linux/acpi.h | 8 ++++++++ 2 files changed, 14 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index b2297e58c6ed..6b75777c0a8d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1700,3 +1700,9 @@ int __acpi_release_global_lock(unsigned int *lock) } while (unlikely (val != old)); return old & 0x1; } + +void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + e820_add_region(addr, size, E820_ACPI); + update_e820(); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4f2a76224509..946fd1ea79ff 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -426,6 +426,14 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); +#if CONFIG_X86 +void arch_reserve_mem_area(acpi_physical_address addr, size_t size); +#else +static inline void arch_reserve_mem_area(acpi_physical_address addr, + size_t size) +{ +} +#endif /* CONFIG_X86 */ #else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif -- cgit v1.2.3 From 53aac44c904abbad9f474f652f099de13b5c3563 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 1 Oct 2012 00:23:54 +0200 Subject: ACPI: Store valid ACPI tables passed via early initrd in reserved memblock areas A later patch will compare them with ACPI tables that get loaded at boot or runtime and if criteria match, a stored one is loaded. Signed-off-by: Thomas Renninger Link: http://lkml.kernel.org/r/1349043837-22659-4-git-send-email-trenn@suse.de Cc: Len Brown Cc: Robert Moore Cc: Yinghai Lu Cc: Eric Piel Signed-off-by: H. Peter Anvin --- arch/x86/kernel/setup.c | 2 + drivers/acpi/Kconfig | 9 ++++ drivers/acpi/osl.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 8 ++++ 4 files changed, 141 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index f4b9b80e1b95..764e543b2297 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -941,6 +941,8 @@ void __init setup_arch(char **cmdline_p) reserve_initrd(); + acpi_initrd_override((void *)initrd_start, initrd_end - initrd_start); + reserve_crashkernel(); vsmp_init(); diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 80998958cf45..8cf719547b51 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -261,6 +261,15 @@ config ACPI_CUSTOM_DSDT bool default ACPI_CUSTOM_DSDT_FILE != "" +config ACPI_INITRD_TABLE_OVERRIDE + bool "ACPI tables can be passed via uncompressed cpio in initrd" + default n + help + This option provides functionality to override arbitrary ACPI tables + via initrd. No functional change if no ACPI tables are passed via + initrd, therefore it's safe to say Y. + See Documentation/acpi/initrd_table_override.txt for details + config ACPI_BLACKLIST_YEAR int "Disable ACPI for systems before Jan 1st this year" if X86_32 default 0 diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 9eaf708f5885..b20b07903218 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -534,6 +534,128 @@ acpi_os_predefined_override(const struct acpi_predefined_names *init_val, return AE_OK; } +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +#include +#include + +static u64 acpi_tables_addr; +static int all_tables_size; + +/* Copied from acpica/tbutils.c:acpi_tb_checksum() */ +u8 __init acpi_table_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + return sum; +} + +/* All but ACPI_SIG_RSDP and ACPI_SIG_FACS: */ +static const char * const table_sigs[] = { + ACPI_SIG_BERT, ACPI_SIG_CPEP, ACPI_SIG_ECDT, ACPI_SIG_EINJ, + ACPI_SIG_ERST, ACPI_SIG_HEST, ACPI_SIG_MADT, ACPI_SIG_MSCT, + ACPI_SIG_SBST, ACPI_SIG_SLIT, ACPI_SIG_SRAT, ACPI_SIG_ASF, + ACPI_SIG_BOOT, ACPI_SIG_DBGP, ACPI_SIG_DMAR, ACPI_SIG_HPET, + ACPI_SIG_IBFT, ACPI_SIG_IVRS, ACPI_SIG_MCFG, ACPI_SIG_MCHI, + ACPI_SIG_SLIC, ACPI_SIG_SPCR, ACPI_SIG_SPMI, ACPI_SIG_TCPA, + ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, + ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, + ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, NULL }; + +/* Non-fatal errors: Affected tables/files are ignored */ +#define INVALID_TABLE(x, path, name) \ + { pr_err("ACPI OVERRIDE: " x " [%s%s]\n", path, name); continue; } + +#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) + +/* Must not increase 10 or needs code modification below */ +#define ACPI_OVERRIDE_TABLES 10 + +void __init acpi_initrd_override(void *data, size_t size) +{ + int sig, no, table_nr = 0, total_offset = 0; + long offset = 0; + struct acpi_table_header *table; + char cpio_path[32] = "kernel/firmware/acpi/"; + struct cpio_data file; + struct cpio_data early_initrd_files[ACPI_OVERRIDE_TABLES]; + char *p; + + if (data == NULL || size == 0) + return; + + for (no = 0; no < ACPI_OVERRIDE_TABLES; no++) { + file = find_cpio_data(cpio_path, data, size, &offset); + if (!file.data) + break; + + data += offset; + size -= offset; + + if (file.size < sizeof(struct acpi_table_header)) + INVALID_TABLE("Table smaller than ACPI header", + cpio_path, file.name); + + table = file.data; + + for (sig = 0; table_sigs[sig]; sig++) + if (!memcmp(table->signature, table_sigs[sig], 4)) + break; + + if (!table_sigs[sig]) + INVALID_TABLE("Unknown signature", + cpio_path, file.name); + if (file.size != table->length) + INVALID_TABLE("File length does not match table length", + cpio_path, file.name); + if (acpi_table_checksum(file.data, table->length)) + INVALID_TABLE("Bad table checksum", + cpio_path, file.name); + + pr_info("%4.4s ACPI table found in initrd [%s%s][0x%x]\n", + table->signature, cpio_path, file.name, table->length); + + all_tables_size += table->length; + early_initrd_files[table_nr].data = file.data; + early_initrd_files[table_nr].size = file.size; + table_nr++; + } + if (table_nr == 0) + return; + + acpi_tables_addr = + memblock_find_in_range(0, max_low_pfn_mapped << PAGE_SHIFT, + all_tables_size, PAGE_SIZE); + if (!acpi_tables_addr) { + WARN_ON(1); + return; + } + /* + * Only calling e820_add_reserve does not work and the + * tables are invalid (memory got used) later. + * memblock_reserve works as expected and the tables won't get modified. + * But it's not enough on X86 because ioremap will + * complain later (used by acpi_os_map_memory) that the pages + * that should get mapped are not marked "reserved". + * Both memblock_reserve and e820_add_region (via arch_reserve_mem_area) + * works fine. + */ + memblock_reserve(acpi_tables_addr, acpi_tables_addr + all_tables_size); + arch_reserve_mem_area(acpi_tables_addr, all_tables_size); + + p = early_ioremap(acpi_tables_addr, all_tables_size); + + for (no = 0; no < table_nr; no++) { + memcpy(p + total_offset, early_initrd_files[no].data, + early_initrd_files[no].size); + total_offset += early_initrd_files[no].size; + } + early_iounmap(p, all_tables_size); +} +#endif /* CONFIG_ACPI_INITRD_TABLE_OVERRIDE */ + acpi_status acpi_os_table_override(struct acpi_table_header * existing_table, struct acpi_table_header ** new_table) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 946fd1ea79ff..d14081c10c17 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -76,6 +76,14 @@ typedef int (*acpi_table_handler) (struct acpi_table_header *table); typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end); +#ifdef CONFIG_ACPI_INITRD_TABLE_OVERRIDE +void acpi_initrd_override(void *data, size_t size); +#else +static inline void acpi_initrd_override(void *data, size_t size) +{ +} +#endif + char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); -- cgit v1.2.3 From 3a50597de8635cd05133bd12c95681c82fe7b878 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:29 +0100 Subject: KEYS: Make the session and process keyrings per-thread Make the session keyring per-thread rather than per-process, but still inherited from the parent thread to solve a problem with PAM and gdm. The problem is that join_session_keyring() will reject attempts to change the session keyring of a multithreaded program but gdm is now multithreaded before it gets to the point of starting PAM and running pam_keyinit to create the session keyring. See: https://bugs.freedesktop.org/show_bug.cgi?id=49211 The reason that join_session_keyring() will only change the session keyring under a single-threaded environment is that it's hard to alter the other thread's credentials to effect the change in a multi-threaded program. The problems are such as: (1) How to prevent two threads both running join_session_keyring() from racing. (2) Another thread's credentials may not be modified directly by this process. (3) The number of threads is uncertain whilst we're not holding the appropriate spinlock, making preallocation slightly tricky. (4) We could use TIF_NOTIFY_RESUME and key_replace_session_keyring() to get another thread to replace its keyring, but that means preallocating for each thread. A reasonable way around this is to make the session keyring per-thread rather than per-process and just document that if you want a common session keyring, you must get it before you spawn any threads - which is the current situation anyway. Whilst we're at it, we can the process keyring behave in the same way. This means we can clean up some of the ickyness in the creds code. Basically, after this patch, the session, process and thread keyrings are about inheritance rules only and not about sharing changes of keyring. Reported-by: Mantas M. Signed-off-by: David Howells Tested-by: Ray Strode --- include/linux/cred.h | 17 +----- kernel/cred.c | 127 +++++-------------------------------------- security/keys/keyctl.c | 11 ++-- security/keys/process_keys.c | 66 ++++++++-------------- security/keys/request_key.c | 10 ++-- 5 files changed, 50 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index ebbed2ce6637..0142aacb70b7 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -76,21 +76,6 @@ extern int groups_search(const struct group_info *, kgid_t); extern int in_group_p(kgid_t); extern int in_egroup_p(kgid_t); -/* - * The common credentials for a thread group - * - shared by CLONE_THREAD - */ -#ifdef CONFIG_KEYS -struct thread_group_cred { - atomic_t usage; - pid_t tgid; /* thread group process ID */ - spinlock_t lock; - struct key __rcu *session_keyring; /* keyring inherited over fork */ - struct key *process_keyring; /* keyring private to this process */ - struct rcu_head rcu; /* RCU deletion hook */ -}; -#endif - /* * The security context of a task * @@ -139,6 +124,8 @@ struct cred { #ifdef CONFIG_KEYS unsigned char jit_keyring; /* default keyring to attach requested * keys to */ + struct key __rcu *session_keyring; /* keyring inherited over fork */ + struct key *process_keyring; /* keyring private to this process */ struct key *thread_keyring; /* keyring private to this thread */ struct key *request_key_auth; /* assumed request_key authority */ struct thread_group_cred *tgcred; /* thread-group shared credentials */ diff --git a/kernel/cred.c b/kernel/cred.c index de728ac50d82..3f7ad1ec2ae4 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -29,17 +29,6 @@ static struct kmem_cache *cred_jar; -/* - * The common credentials for the initial task's thread group - */ -#ifdef CONFIG_KEYS -static struct thread_group_cred init_tgcred = { - .usage = ATOMIC_INIT(2), - .tgid = 0, - .lock = __SPIN_LOCK_UNLOCKED(init_cred.tgcred.lock), -}; -#endif - /* * The initial credentials for the initial task */ @@ -65,9 +54,6 @@ struct cred init_cred = { .user = INIT_USER, .user_ns = &init_user_ns, .group_info = &init_groups, -#ifdef CONFIG_KEYS - .tgcred = &init_tgcred, -#endif }; static inline void set_cred_subscribers(struct cred *cred, int n) @@ -95,36 +81,6 @@ static inline void alter_cred_subscribers(const struct cred *_cred, int n) #endif } -/* - * Dispose of the shared task group credentials - */ -#ifdef CONFIG_KEYS -static void release_tgcred_rcu(struct rcu_head *rcu) -{ - struct thread_group_cred *tgcred = - container_of(rcu, struct thread_group_cred, rcu); - - BUG_ON(atomic_read(&tgcred->usage) != 0); - - key_put(tgcred->session_keyring); - key_put(tgcred->process_keyring); - kfree(tgcred); -} -#endif - -/* - * Release a set of thread group credentials. - */ -static void release_tgcred(struct cred *cred) -{ -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred = cred->tgcred; - - if (atomic_dec_and_test(&tgcred->usage)) - call_rcu(&tgcred->rcu, release_tgcred_rcu); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -150,9 +106,10 @@ static void put_cred_rcu(struct rcu_head *rcu) #endif security_cred_free(cred); + key_put(cred->session_keyring); + key_put(cred->process_keyring); key_put(cred->thread_keyring); key_put(cred->request_key_auth); - release_tgcred(cred); if (cred->group_info) put_group_info(cred->group_info); free_uid(cred->user); @@ -246,15 +203,6 @@ struct cred *cred_alloc_blank(void) if (!new) return NULL; -#ifdef CONFIG_KEYS - new->tgcred = kzalloc(sizeof(*new->tgcred), GFP_KERNEL); - if (!new->tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } - atomic_set(&new->tgcred->usage, 1); -#endif - atomic_set(&new->usage, 1); #ifdef CONFIG_DEBUG_CREDENTIALS new->magic = CRED_MAGIC; @@ -308,9 +256,10 @@ struct cred *prepare_creds(void) get_user_ns(new->user_ns); #ifdef CONFIG_KEYS + key_get(new->session_keyring); + key_get(new->process_keyring); key_get(new->thread_keyring); key_get(new->request_key_auth); - atomic_inc(&new->tgcred->usage); #endif #ifdef CONFIG_SECURITY @@ -334,39 +283,20 @@ EXPORT_SYMBOL(prepare_creds); */ struct cred *prepare_exec_creds(void) { - struct thread_group_cred *tgcred = NULL; struct cred *new; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) - return NULL; -#endif - new = prepare_creds(); - if (!new) { - kfree(tgcred); + if (!new) return new; - } #ifdef CONFIG_KEYS /* newly exec'd tasks don't get a thread keyring */ key_put(new->thread_keyring); new->thread_keyring = NULL; - /* create a new per-thread-group creds for all this set of threads to - * share */ - memcpy(tgcred, new->tgcred, sizeof(struct thread_group_cred)); - - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - /* inherit the session keyring; new process keyring */ - key_get(tgcred->session_keyring); - tgcred->process_keyring = NULL; - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; #endif return new; @@ -383,9 +313,6 @@ struct cred *prepare_exec_creds(void) */ int copy_creds(struct task_struct *p, unsigned long clone_flags) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif struct cred *new; int ret; @@ -425,22 +352,12 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) install_thread_keyring_to_cred(new); } - /* we share the process and session keyrings between all the threads in - * a process - this is slightly icky as we violate COW credentials a - * bit */ + /* The process keyring is only shared between the threads in a process; + * anything outside of those threads doesn't inherit. + */ if (!(clone_flags & CLONE_THREAD)) { - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - ret = -ENOMEM; - goto error_put; - } - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = key_get(new->tgcred->session_keyring); - - release_tgcred(new); - new->tgcred = tgcred; + key_put(new->process_keyring); + new->process_keyring = NULL; } #endif @@ -643,9 +560,6 @@ void __init cred_init(void) */ struct cred *prepare_kernel_cred(struct task_struct *daemon) { -#ifdef CONFIG_KEYS - struct thread_group_cred *tgcred; -#endif const struct cred *old; struct cred *new; @@ -653,14 +567,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) if (!new) return NULL; -#ifdef CONFIG_KEYS - tgcred = kmalloc(sizeof(*tgcred), GFP_KERNEL); - if (!tgcred) { - kmem_cache_free(cred_jar, new); - return NULL; - } -#endif - kdebug("prepare_kernel_cred() alloc %p", new); if (daemon) @@ -678,13 +584,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) get_group_info(new->group_info); #ifdef CONFIG_KEYS - atomic_set(&tgcred->usage, 1); - spin_lock_init(&tgcred->lock); - tgcred->process_keyring = NULL; - tgcred->session_keyring = NULL; - new->tgcred = tgcred; - new->request_key_auth = NULL; + new->session_keyring = NULL; + new->process_keyring = NULL; new->thread_keyring = NULL; + new->request_key_auth = NULL; new->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; #endif diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index a0d373f76815..65b38417c211 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1475,7 +1475,8 @@ long keyctl_session_to_parent(void) goto error_keyring; newwork = &cred->rcu; - cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); + cred->session_keyring = key_ref_to_ptr(keyring_r); + keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; @@ -1500,7 +1501,7 @@ long keyctl_session_to_parent(void) mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || - mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) { + mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } @@ -1516,9 +1517,9 @@ long keyctl_session_to_parent(void) goto unlock; /* the keyrings must have the same UID */ - if ((pcred->tgcred->session_keyring && - pcred->tgcred->session_keyring->uid != mycred->euid) || - mycred->tgcred->session_keyring->uid != mycred->euid) + if ((pcred->session_keyring && + pcred->session_keyring->uid != mycred->euid) || + mycred->session_keyring->uid != mycred->euid) goto unlock; /* cancel an already pending keyring replacement */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 178b8c3b130a..9de5dc598276 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -169,9 +169,8 @@ static int install_thread_keyring(void) int install_process_keyring_to_cred(struct cred *new) { struct key *keyring; - int ret; - if (new->tgcred->process_keyring) + if (new->process_keyring) return -EEXIST; keyring = keyring_alloc("_pid", new->uid, new->gid, @@ -179,17 +178,8 @@ int install_process_keyring_to_cred(struct cred *new) if (IS_ERR(keyring)) return PTR_ERR(keyring); - spin_lock_irq(&new->tgcred->lock); - if (!new->tgcred->process_keyring) { - new->tgcred->process_keyring = keyring; - keyring = NULL; - ret = 0; - } else { - ret = -EEXIST; - } - spin_unlock_irq(&new->tgcred->lock); - key_put(keyring); - return ret; + new->process_keyring = keyring; + return 0; } /* @@ -230,7 +220,7 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) /* create an empty session keyring */ if (!keyring) { flags = KEY_ALLOC_QUOTA_OVERRUN; - if (cred->tgcred->session_keyring) + if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; keyring = keyring_alloc("_ses", cred->uid, cred->gid, @@ -242,17 +232,11 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) } /* install the keyring */ - spin_lock_irq(&cred->tgcred->lock); - old = cred->tgcred->session_keyring; - rcu_assign_pointer(cred->tgcred->session_keyring, keyring); - spin_unlock_irq(&cred->tgcred->lock); - - /* we're using RCU on the pointer, but there's no point synchronising - * on it if it didn't previously point to anything */ - if (old) { - synchronize_rcu(); + old = cred->session_keyring; + rcu_assign_pointer(cred->session_keyring, keyring); + + if (old) key_put(old); - } return 0; } @@ -367,9 +351,9 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the process keyring second */ - if (cred->tgcred->process_keyring) { + if (cred->process_keyring) { key_ref = keyring_search_aux( - make_key_ref(cred->tgcred->process_keyring, 1), + make_key_ref(cred->process_keyring, 1), cred, type, description, match, no_state_check); if (!IS_ERR(key_ref)) goto found; @@ -388,12 +372,10 @@ key_ref_t search_my_process_keyrings(struct key_type *type, } /* search the session keyring */ - if (cred->tgcred->session_keyring) { + if (cred->session_keyring) { rcu_read_lock(); key_ref = keyring_search_aux( - make_key_ref(rcu_dereference( - cred->tgcred->session_keyring), - 1), + make_key_ref(rcu_dereference(cred->session_keyring), 1), cred, type, description, match, no_state_check); rcu_read_unlock(); @@ -563,7 +545,7 @@ try_again: break; case KEY_SPEC_PROCESS_KEYRING: - if (!cred->tgcred->process_keyring) { + if (!cred->process_keyring) { if (!(lflags & KEY_LOOKUP_CREATE)) goto error; @@ -575,13 +557,13 @@ try_again: goto reget_creds; } - key = cred->tgcred->process_keyring; + key = cred->process_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_SESSION_KEYRING: - if (!cred->tgcred->session_keyring) { + if (!cred->session_keyring) { /* always install a session keyring upon access if one * doesn't exist yet */ ret = install_user_keyrings(); @@ -596,7 +578,7 @@ try_again: if (ret < 0) goto error; goto reget_creds; - } else if (cred->tgcred->session_keyring == + } else if (cred->session_keyring == cred->user->session_keyring && lflags & KEY_LOOKUP_CREATE) { ret = join_session_keyring(NULL); @@ -606,7 +588,7 @@ try_again: } rcu_read_lock(); - key = rcu_dereference(cred->tgcred->session_keyring); + key = rcu_dereference(cred->session_keyring); atomic_inc(&key->usage); rcu_read_unlock(); key_ref = make_key_ref(key, 1); @@ -766,12 +748,6 @@ long join_session_keyring(const char *name) struct key *keyring; long ret, serial; - /* only permit this if there's a single thread in the thread group - - * this avoids us having to adjust the creds on all threads and risking - * ENOMEM */ - if (!current_is_single_threaded()) - return -EMLINK; - new = prepare_creds(); if (!new) return -ENOMEM; @@ -783,7 +759,7 @@ long join_session_keyring(const char *name) if (ret < 0) goto error; - serial = new->tgcred->session_keyring->serial; + serial = new->session_keyring->serial; ret = commit_creds(new); if (ret == 0) ret = serial; @@ -806,6 +782,9 @@ long join_session_keyring(const char *name) } else if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; + } else if (keyring == new->session_keyring) { + ret = 0; + goto error2; } /* we've got a keyring - now to install it */ @@ -862,8 +841,7 @@ void key_change_session_keyring(struct callback_head *twork) new->jit_keyring = old->jit_keyring; new->thread_keyring = key_get(old->thread_keyring); - new->tgcred->tgid = old->tgcred->tgid; - new->tgcred->process_keyring = key_get(old->tgcred->process_keyring); + new->process_keyring = key_get(old->process_keyring); security_transfer_creds(new, old); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 000e75017520..275c4f9e4b8c 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -150,12 +150,12 @@ static int call_sbin_request_key(struct key_construction *cons, cred->thread_keyring ? cred->thread_keyring->serial : 0); prkey = 0; - if (cred->tgcred->process_keyring) - prkey = cred->tgcred->process_keyring->serial; + if (cred->process_keyring) + prkey = cred->process_keyring->serial; sprintf(keyring_str[1], "%d", prkey); rcu_read_lock(); - session = rcu_dereference(cred->tgcred->session_keyring); + session = rcu_dereference(cred->session_keyring); if (!session) session = cred->user->session_keyring; sskey = session->serial; @@ -297,14 +297,14 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_PROCESS_KEYRING: - dest_keyring = key_get(cred->tgcred->process_keyring); + dest_keyring = key_get(cred->process_keyring); if (dest_keyring) break; case KEY_REQKEY_DEFL_SESSION_KEYRING: rcu_read_lock(); dest_keyring = key_get( - rcu_dereference(cred->tgcred->session_keyring)); + rcu_dereference(cred->session_keyring)); rcu_read_unlock(); if (dest_keyring) -- cgit v1.2.3 From 96b5c8fea6c0861621051290d705ec2e971963f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Oct 2012 19:24:56 +0100 Subject: KEYS: Reduce initial permissions on keys Reduce the initial permissions on new keys to grant the possessor everything, view permission only to the user (so the keys can be seen in /proc/keys) and nothing else. This gives the creator a chance to adjust the permissions mask before other processes can access the new key or create a link to it. To aid with this, keyring_alloc() now takes a permission argument rather than setting the permissions itself. The following permissions are now set: (1) The user and user-session keyrings grant the user that owns them full permissions and grant a possessor everything bar SETATTR. (2) The process and thread keyrings grant the possessor full permissions but only grant the user VIEW. This permits the user to see them in /proc/keys, but not to do anything with them. (3) Anonymous session keyrings grant the possessor full permissions, but only grant the user VIEW and READ. This means that the user can see them in /proc/keys and can list them, but nothing else. Possibly READ shouldn't be provided either. (4) Named session keyrings grant everything an anonymous session keyring does, plus they grant the user LINK permission. The whole point of named session keyrings is that others can also subscribe to them. Possibly this should be a separate permission to LINK. (5) The temporary session keyring created by call_sbin_request_key() gets the same permissions as an anonymous session keyring. (6) Keys created by add_key() get VIEW, SEARCH, LINK and SETATTR for the possessor, plus READ and/or WRITE if the key type supports them. The used only gets VIEW now. (7) Keys created by request_key() now get the same as those created by add_key(). Reported-by: Lennart Poettering Reported-by: Stef Walter Signed-off-by: David Howells --- include/linux/key.h | 1 + security/keys/key.c | 6 +++--- security/keys/keyring.c | 9 +++------ security/keys/process_keys.c | 26 +++++++++++++++++--------- security/keys/request_key.c | 11 ++++++++++- 5 files changed, 34 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index cef3b315ba7c..890699815212 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -264,6 +264,7 @@ extern int key_unlink(struct key *keyring, extern struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, const struct cred *cred, + key_perm_t perm, unsigned long flags, struct key *dest); diff --git a/security/keys/key.c b/security/keys/key.c index 50d96d4e06f2..bebeca3a78e4 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -826,13 +826,13 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, /* if the client doesn't provide, decide on the permissions we want */ if (perm == KEY_PERM_UNDEF) { perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; - perm |= KEY_USR_VIEW | KEY_USR_SEARCH | KEY_USR_LINK | KEY_USR_SETATTR; + perm |= KEY_USR_VIEW; if (ktype->read) - perm |= KEY_POS_READ | KEY_USR_READ; + perm |= KEY_POS_READ; if (ktype == &key_type_keyring || ktype->update) - perm |= KEY_USR_WRITE; + perm |= KEY_POS_WRITE; } /* allocate a new key */ diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 81e7852d281d..cf704a92083f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -257,17 +257,14 @@ error: * Allocate a keyring and link into the destination keyring. */ struct key *keyring_alloc(const char *description, uid_t uid, gid_t gid, - const struct cred *cred, unsigned long flags, - struct key *dest) + const struct cred *cred, key_perm_t perm, + unsigned long flags, struct key *dest) { struct key *keyring; int ret; keyring = key_alloc(&key_type_keyring, description, - uid, gid, cred, - (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL, - flags); - + uid, gid, cred, perm, flags); if (!IS_ERR(keyring)) { ret = key_instantiate_and_link(keyring, NULL, 0, dest, NULL); if (ret < 0) { diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 9de5dc598276..b58d93892740 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -46,9 +46,11 @@ int install_user_keyrings(void) struct user_struct *user; const struct cred *cred; struct key *uid_keyring, *session_keyring; + key_perm_t user_keyring_perm; char buf[20]; int ret; + user_keyring_perm = (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_ALL; cred = current_cred(); user = cred->user; @@ -72,8 +74,8 @@ int install_user_keyrings(void) uid_keyring = find_keyring_by_name(buf, true); if (IS_ERR(uid_keyring)) { uid_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, - NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(uid_keyring)) { ret = PTR_ERR(uid_keyring); goto error; @@ -88,7 +90,8 @@ int install_user_keyrings(void) if (IS_ERR(session_keyring)) { session_keyring = keyring_alloc(buf, user->uid, (gid_t) -1, - cred, KEY_ALLOC_IN_QUOTA, NULL); + cred, user_keyring_perm, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(session_keyring)) { ret = PTR_ERR(session_keyring); goto error_release; @@ -129,6 +132,7 @@ int install_thread_keyring_to_cred(struct cred *new) struct key *keyring; keyring = keyring_alloc("_tid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -173,8 +177,9 @@ int install_process_keyring_to_cred(struct cred *new) if (new->process_keyring) return -EEXIST; - keyring = keyring_alloc("_pid", new->uid, new->gid, - new, KEY_ALLOC_QUOTA_OVERRUN, NULL); + keyring = keyring_alloc("_pid", new->uid, new->gid, new, + KEY_POS_ALL | KEY_USR_VIEW, + KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -223,8 +228,9 @@ int install_session_keyring_to_cred(struct cred *cred, struct key *keyring) if (cred->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc("_ses", cred->uid, cred->gid, - cred, flags, NULL); + keyring = keyring_alloc("_ses", cred->uid, cred->gid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, + flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); } else { @@ -773,8 +779,10 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, old->uid, old->gid, old, - KEY_ALLOC_IN_QUOTA, NULL); + keyring = keyring_alloc( + name, old->uid, old->gid, old, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_LINK, + KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); goto error2; diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 275c4f9e4b8c..0ae3a2202771 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -126,6 +126,7 @@ static int call_sbin_request_key(struct key_construction *cons, cred = get_current_cred(); keyring = keyring_alloc(desc, cred->fsuid, cred->fsgid, cred, + KEY_POS_ALL | KEY_USR_VIEW | KEY_USR_READ, KEY_ALLOC_QUOTA_OVERRUN, NULL); put_cred(cred); if (IS_ERR(keyring)) { @@ -347,6 +348,7 @@ static int construct_alloc_key(struct key_type *type, const struct cred *cred = current_cred(); unsigned long prealloc; struct key *key; + key_perm_t perm; key_ref_t key_ref; int ret; @@ -355,8 +357,15 @@ static int construct_alloc_key(struct key_type *type, *_key = NULL; mutex_lock(&user->cons_lock); + perm = KEY_POS_VIEW | KEY_POS_SEARCH | KEY_POS_LINK | KEY_POS_SETATTR; + perm |= KEY_USR_VIEW; + if (type->read) + perm |= KEY_POS_READ; + if (type == &key_type_keyring || type->update) + perm |= KEY_POS_WRITE; + key = key_alloc(type, description, cred->fsuid, cred->fsgid, cred, - KEY_POS_ALL, flags); + perm, flags); if (IS_ERR(key)) goto alloc_failed; -- cgit v1.2.3 From 9202e07636f0c4858ba6c30773a3f160b2b5659a Mon Sep 17 00:00:00 2001 From: Liu Yu-B13201 Date: Tue, 3 Jul 2012 05:48:52 +0000 Subject: KVM: PPC: Add support for ePAPR idle hcall in host kernel And add a new flag definition in kvm_ppc_pvinfo to indicate whether the host supports the EV_IDLE hcall. Signed-off-by: Liu Yu [stuart.yoder@freescale.com: cleanup,fixes for conditions allowing idle] Signed-off-by: Stuart Yoder [agraf: fix typo] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 7 +++++-- arch/powerpc/include/asm/Kbuild | 1 + arch/powerpc/kvm/powerpc.c | 10 ++++++++-- include/linux/kvm.h | 2 ++ 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f6ec3a92e621..170afb1fbc2e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1194,12 +1194,15 @@ struct kvm_ppc_pvinfo { This ioctl fetches PV specific information that need to be passed to the guest using the device tree or other means from vm context. -For now the only implemented piece of information distributed here is an array -of 4 instructions that make up a hypercall. +The hcall array defines 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +The flags bitmap is defined as: + + /* the host supports the ePAPR idle hcall + #define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) 4.48 KVM_ASSIGN_PCI_DEVICE diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 7e313f1ed183..13d6b7bf3b69 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -34,5 +34,6 @@ header-y += termios.h header-y += types.h header-y += ucontext.h header-y += unistd.h +header-y += epapr_hcalls.h generic-y += rwsem.h diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index a478e662b2bc..dbf56e173c25 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -38,8 +38,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !(v->arch.shared->msr & MSR_WE) || - !!(v->arch.pending_exceptions) || + return !!(v->arch.pending_exceptions) || v->requests; } @@ -86,6 +85,11 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu) /* Second return value is in r4 */ break; + case EV_HCALL_TOKEN(EV_IDLE): + r = EV_SUCCESS; + kvm_vcpu_block(vcpu); + clear_bit(KVM_REQ_UNHALT, &vcpu->requests); + break; default: r = EV_UNIMPLEMENTED; break; @@ -779,6 +783,8 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) pvinfo->hcall[3] = inst_nop; #endif + pvinfo->flags = KVM_PPC_PVINFO_FLAGS_EV_IDLE; + return 0; } diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0a6d6ba44c85..4cb3761bebae 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -477,6 +477,8 @@ struct kvm_ppc_smmu_info { struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ]; }; +#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0) + #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ -- cgit v1.2.3 From f61c94bb99ca4253ac5dd57750e1af209a4beb7a Mon Sep 17 00:00:00 2001 From: Bharat Bhushan Date: Wed, 8 Aug 2012 20:38:19 +0000 Subject: KVM: PPC: booke: Add watchdog emulation This patch adds the watchdog emulation in KVM. The watchdog emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl. The kernel timer are used for watchdog emulation and emulates h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how it is configured. Signed-off-by: Liu Yu Signed-off-by: Scott Wood [bharat.bhushan@freescale.com: reworked patch] Signed-off-by: Bharat Bhushan [agraf: adjust to new request framework] Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_host.h | 3 + arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/asm/reg_booke.h | 7 ++ arch/powerpc/kvm/book3s.c | 9 ++ arch/powerpc/kvm/booke.c | 155 +++++++++++++++++++++++++++++++++++ arch/powerpc/kvm/booke_emulate.c | 8 ++ arch/powerpc/kvm/powerpc.c | 14 +++- include/linux/kvm.h | 2 + include/linux/kvm_host.h | 1 + 9 files changed, 199 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 4a5ec8f573c7..51b0ccd56769 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -471,6 +471,8 @@ struct kvm_vcpu_arch { ulong fault_esr; ulong queued_dear; ulong queued_esr; + spinlock_t wdt_lock; + struct timer_list wdt_timer; u32 tlbcfg[4]; u32 mmucfg; u32 epr; @@ -486,6 +488,7 @@ struct kvm_vcpu_arch { u8 osi_needed; u8 osi_enabled; u8 papr_enabled; + u8 watchdog_enabled; u8 sane; u8 cpu_type; u8 hcall_needed; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 3dfc437fb9d9..c06a64b53362 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb); extern void kvmppc_decrementer_func(unsigned long data); extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu); +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu); +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu); /* Core-specific hooks */ diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 2d916c4982c5..e07e6af5e1ff 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -539,6 +539,13 @@ #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */ #define TCR_ARE 0x00400000 /* Auto Reload Enable */ +#ifdef CONFIG_E500 +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \ + (((tcr) & 0x1E0000) >> 15)) +#else +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30) +#endif + /* Bit definitions for the TSR. */ #define TSR_ENW 0x80000000 /* Enable Next Watchdog */ #define TSR_WIS 0x40000000 /* WDT Interrupt Status */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 3f2a8360c857..e94666566fa9 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return 0; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index c36493087dbf..09e8bf33a8c9 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -209,6 +209,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu, clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions); } +static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu) +{ + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG); +} + +static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu) +{ + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions); +} + static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1) { #ifdef CONFIG_KVM_BOOKE_HV @@ -328,6 +338,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; break; + case BOOKE_IRQPRIO_WATCHDOG: case BOOKE_IRQPRIO_CRITICAL: case BOOKE_IRQPRIO_DBELL_CRIT: allowed = vcpu->arch.shared->msr & MSR_CE; @@ -407,12 +418,121 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, return allowed; } +/* + * Return the number of jiffies until the next timeout. If the timeout is + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA + * because the larger value can break the timer APIs. + */ +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu) +{ + u64 tb, wdt_tb, wdt_ticks = 0; + u64 nr_jiffies = 0; + u32 period = TCR_GET_WP(vcpu->arch.tcr); + + wdt_tb = 1ULL << (63 - period); + tb = get_tb(); + /* + * The watchdog timeout will hapeen when TB bit corresponding + * to watchdog will toggle from 0 to 1. + */ + if (tb & wdt_tb) + wdt_ticks = wdt_tb; + + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1)); + + /* Convert timebase ticks to jiffies */ + nr_jiffies = wdt_ticks; + + if (do_div(nr_jiffies, tb_ticks_per_jiffy)) + nr_jiffies++; + + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA); +} + +static void arm_next_watchdog(struct kvm_vcpu *vcpu) +{ + unsigned long nr_jiffies; + unsigned long flags; + + /* + * If TSR_ENW and TSR_WIS are not set then no need to exit to + * userspace, so clear the KVM_REQ_WATCHDOG request. + */ + if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS)) + clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests); + + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags); + nr_jiffies = watchdog_next_timeout(vcpu); + /* + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA + * then do not run the watchdog timer as this can break timer APIs. + */ + if (nr_jiffies < NEXT_TIMER_MAX_DELTA) + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies); + else + del_timer(&vcpu->arch.wdt_timer); + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags); +} + +void kvmppc_watchdog_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + u32 tsr, new_tsr; + int final; + + do { + new_tsr = tsr = vcpu->arch.tsr; + final = 0; + + /* Time out event */ + if (tsr & TSR_ENW) { + if (tsr & TSR_WIS) + final = 1; + else + new_tsr = tsr | TSR_WIS; + } else { + new_tsr = tsr | TSR_ENW; + } + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr); + + if (new_tsr & TSR_WIS) { + smp_wmb(); + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * If this is final watchdog expiry and some action is required + * then exit to userspace. + */ + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) && + vcpu->arch.watchdog_enabled) { + smp_wmb(); + kvm_make_request(KVM_REQ_WATCHDOG, vcpu); + kvm_vcpu_kick(vcpu); + } + + /* + * Stop running the watchdog timer after final expiration to + * prevent the host from being flooded with timers if the + * guest sets a short period. + * Timers will resume when TSR/TCR is updated next time. + */ + if (!final) + arm_next_watchdog(vcpu); +} + static void update_timer_ints(struct kvm_vcpu *vcpu) { if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS)) kvmppc_core_queue_dec(vcpu); else kvmppc_core_dequeue_dec(vcpu); + + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS)) + kvmppc_core_queue_watchdog(vcpu); + else + kvmppc_core_dequeue_watchdog(vcpu); } static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu) @@ -466,6 +586,11 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) kvmppc_core_flush_tlb(vcpu); #endif + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_WATCHDOG; + r = 0; + } + return r; } @@ -995,6 +1120,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) return r; } +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu) +{ + /* setup watchdog timer once */ + spin_lock_init(&vcpu->arch.wdt_lock); + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func, + (unsigned long)vcpu); + + return 0; +} + +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + del_timer_sync(&vcpu->arch.wdt_timer); +} + int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) { int i; @@ -1090,7 +1230,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu, } if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) { + u32 old_tsr = vcpu->arch.tsr; + vcpu->arch.tsr = sregs->u.e.tsr; + + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } @@ -1251,6 +1397,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr) { vcpu->arch.tcr = new_tcr; + arm_next_watchdog(vcpu); update_timer_ints(vcpu); } @@ -1265,6 +1412,14 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits) { clear_bits(tsr_bits, &vcpu->arch.tsr); + + /* + * We may have stopped the watchdog due to + * being stuck on final expiration. + */ + if (tsr_bits & (TSR_ENW | TSR_WIS)) + arm_next_watchdog(vcpu); + update_timer_ints(vcpu); } diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 12834bb608ab..5a66ade7fd17 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) kvmppc_clr_tsr_bits(vcpu, spr_val); break; case SPRN_TCR: + /* + * WRC is a 2-bit field that is supposed to preserve its + * value once written to non-zero. + */ + if (vcpu->arch.tcr & TCR_WRC_MASK) { + spr_val &= ~TCR_WRC_MASK; + spr_val |= vcpu->arch.tcr & TCR_WRC_MASK; + } kvmppc_set_tcr(vcpu, spr_val); break; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 54b12af577d0..0ffd7d17adc7 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -300,6 +300,7 @@ int kvm_dev_ioctl_check_extension(long ext) switch (ext) { #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: + case KVM_CAP_PPC_BOOKE_WATCHDOG: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -476,6 +477,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct hrtimer *timer) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { + int ret; + hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu); vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup; @@ -484,13 +487,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) #ifdef CONFIG_KVM_EXIT_TIMING mutex_init(&vcpu->arch.exit_timing_lock); #endif - - return 0; + ret = kvmppc_subarch_vcpu_init(vcpu); + return ret; } void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { kvmppc_mmu_destroy(vcpu); + kvmppc_subarch_vcpu_uninit(vcpu); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -735,6 +739,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; +#ifdef CONFIG_BOOKE + case KVM_CAP_PPC_BOOKE_WATCHDOG: + r = 0; + vcpu->arch.watchdog_enabled = true; + break; +#endif #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC) case KVM_CAP_SW_TLB: { struct kvm_config_tlb cfg; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 4cb3761bebae..1649d4b57e1f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -167,6 +167,7 @@ struct kvm_pit_config { #define KVM_EXIT_OSI 18 #define KVM_EXIT_PAPR_HCALL 19 #define KVM_EXIT_S390_UCONTROL 20 +#define KVM_EXIT_WATCHDOG 21 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -628,6 +629,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_READONLY_MEM 81 #endif #define KVM_CAP_IRQFD_RESAMPLE 82 +#define KVM_CAP_PPC_BOOKE_WATCHDOG 83 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2850656e2e96..0ca3663206f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -118,6 +118,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IMMEDIATE_EXIT 15 #define KVM_REQ_PMU 16 #define KVM_REQ_PMI 17 +#define KVM_REQ_WATCHDOG 18 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From ed7a8d7a3c5dd4adedb271112b6faba45c7037f9 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 13 Sep 2012 21:44:30 +0000 Subject: KVM: Move some PPC ioctl definitions to the correct place This moves the definitions of KVM_CREATE_SPAPR_TCE and KVM_ALLOCATE_RMA in include/linux/kvm.h from the section listing the vcpu ioctls to the section listing VM ioctls, as these are both implemented and documented as VM ioctls. Fortunately there is no actual collision of ioctl numbers at this point. Moving these to the correct section will reduce the probability of a future collision. This does not change the user/kernel ABI at all. Signed-off-by: Paul Mackerras Acked-by: Alexander Graf Signed-off-by: Alexander Graf --- include/linux/kvm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 1649d4b57e1f..65ad5c624c70 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -852,6 +852,9 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info) /* Available with KVM_CAP_PPC_ALLOC_HTAB */ #define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32) +#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) +/* Available with KVM_CAP_RMA */ +#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* * ioctls for vcpu fds @@ -915,9 +918,6 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_XCRS */ #define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs) #define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs) -#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) -/* Available with KVM_CAP_RMA */ -#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) /* Available with KVM_CAP_SW_TLB */ #define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb) /* Available with KVM_CAP_ONE_REG */ -- cgit v1.2.3 From 385ddeac7ed99cf7dc62d76274d55fbd7cae1b5a Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Fri, 5 Oct 2012 15:05:34 -0700 Subject: X86 ACPI: Use #ifdef not #if for CONFIG_X86 check Fix a build warning on ia64: include/linux/acpi.h:437:5: warning: "CONFIG_X86" is not defined Signed-off-by: Tony Luck Link: http://lkml.kernel.org/r/506f59ae9600b36a4@agluck-desktop.sc.intel.com Cc: Len Brown Cc: Thomas Renninger Signed-off-by: H. Peter Anvin --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d14081c10c17..49fe586e3b1e 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -434,7 +434,7 @@ void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control); -#if CONFIG_X86 +#ifdef CONFIG_X86 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else static inline void arch_reserve_mem_area(acpi_physical_address addr, -- cgit v1.2.3 From 8b6e4547e0e4b6aac11df6d8d4e71ea2ab159b5e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 20 Sep 2012 07:43:08 +0200 Subject: KVM: x86: Convert kvm_arch_vcpu_reset into private kvm_vcpu_reset There are no external callers of this function as there is no concept of resetting a vcpu from generic code. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 8 +++++--- include/linux/kvm_host.h | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1eefebe5d727..dfed7ca2d27a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -160,6 +160,8 @@ u64 __read_mostly host_xcr0; int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); +static int kvm_vcpu_reset(struct kvm_vcpu *vcpu); + static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; @@ -5426,7 +5428,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) pr_debug("vcpu %d received sipi with vector # %x\n", vcpu->vcpu_id, vcpu->arch.sipi_vector); kvm_lapic_reset(vcpu); - r = kvm_arch_vcpu_reset(vcpu); + r = kvm_vcpu_reset(vcpu); if (r) return r; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -6036,7 +6038,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) r = vcpu_load(vcpu); if (r) return r; - r = kvm_arch_vcpu_reset(vcpu); + r = kvm_vcpu_reset(vcpu); if (r == 0) r = kvm_mmu_setup(vcpu); vcpu_put(vcpu); @@ -6058,7 +6060,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_x86_ops->vcpu_free(vcpu); } -int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) +static int kvm_vcpu_reset(struct kvm_vcpu *vcpu) { atomic_set(&vcpu->arch.nmi_queued, 0); vcpu->arch.nmi_pending = 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..c35b1c08c004 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -582,7 +582,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id); int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu); int kvm_arch_hardware_enable(void *garbage); void kvm_arch_hardware_disable(void *garbage); int kvm_arch_hardware_setup(void); -- cgit v1.2.3 From 90f790d2dc96f5a61855ae65b90e30c40c893a20 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 20 Aug 2012 21:45:05 +0100 Subject: regmap: irq: Allow users to retrieve the irq_domain This is useful for integration with other subsystems, especially MFD, and provides an alternative API for users that request their own IRQs. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap-irq.c | 19 +++++++++++++++++++ include/linux/regmap.h | 2 ++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap-irq.c b/drivers/base/regmap/regmap-irq.c index 5b6b1d8e6cc0..5972ad958544 100644 --- a/drivers/base/regmap/regmap-irq.c +++ b/drivers/base/regmap/regmap-irq.c @@ -458,3 +458,22 @@ int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq) return irq_create_mapping(data->domain, irq); } EXPORT_SYMBOL_GPL(regmap_irq_get_virq); + +/** + * regmap_irq_get_domain(): Retrieve the irq_domain for the chip + * + * Useful for drivers to request their own IRQs and for integration + * with subsystems. For ease of integration NULL is accepted as a + * domain, allowing devices to just call this even if no domain is + * allocated. + * + * @data: regmap_irq controller to operate on. + */ +struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data) +{ + if (data) + return data->domain; + else + return NULL; +} +EXPORT_SYMBOL_GPL(regmap_irq_get_domain); diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e3bcc3f4dcb8..b9e99799965d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -19,6 +19,7 @@ struct module; struct device; struct i2c_client; +struct irq_domain; struct spi_device; struct regmap; struct regmap_range_cfg; @@ -317,6 +318,7 @@ int regmap_add_irq_chip(struct regmap *map, int irq, int irq_flags, void regmap_del_irq_chip(int irq, struct regmap_irq_chip_data *data); int regmap_irq_chip_get_base(struct regmap_irq_chip_data *data); int regmap_irq_get_virq(struct regmap_irq_chip_data *data, int irq); +struct irq_domain *regmap_irq_get_domain(struct regmap_irq_chip_data *data); #else -- cgit v1.2.3 From e3549cd01347ef211d01353bdf2572b086574007 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Tue, 2 Oct 2012 20:17:15 +0100 Subject: regmap: Rename n_ranges to num_ranges This makes things consistent with the rest of the API and is actually what the documentation says. We don't currently have any in tree users so low cost. Signed-off-by: Mark Brown --- drivers/base/regmap/regmap.c | 4 ++-- include/linux/regmap.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 52069d29ff12..ea9d6eb826bd 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -519,7 +519,7 @@ struct regmap *regmap_init(struct device *dev, } map->range_tree = RB_ROOT; - for (i = 0; i < config->n_ranges; i++) { + for (i = 0; i < config->num_ranges; i++) { const struct regmap_range_cfg *range_cfg = &config->ranges[i]; struct regmap_range_node *new; @@ -532,7 +532,7 @@ struct regmap *regmap_init(struct device *dev, /* Make sure, that this register range has no selector or data window within its boundary */ - for (j = 0; j < config->n_ranges; j++) { + for (j = 0; j < config->num_ranges; j++) { unsigned sel_reg = config->ranges[j].selector_reg; unsigned win_min = config->ranges[j].window_start; unsigned win_max = win_min + diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e3bcc3f4dcb8..afc8e1a2cd1b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -133,7 +133,7 @@ struct regmap_config { enum regmap_endian val_format_endian; const struct regmap_range_cfg *ranges; - unsigned int n_ranges; + unsigned int num_ranges; }; /** -- cgit v1.2.3 From d058bb49618482f2eff0db57618c9a7352916dd5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 3 Oct 2012 12:40:47 +0100 Subject: regmap: Allow ranges to be named For more useful diagnostics. Signed-off-by: Mark Brown --- drivers/base/regmap/internal.h | 1 + drivers/base/regmap/regmap.c | 1 + include/linux/regmap.h | 4 ++++ 3 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/drivers/base/regmap/internal.h b/drivers/base/regmap/internal.h index 80f9ab9c3aa4..27e66c3e7a59 100644 --- a/drivers/base/regmap/internal.h +++ b/drivers/base/regmap/internal.h @@ -120,6 +120,7 @@ int _regmap_write(struct regmap *map, unsigned int reg, struct regmap_range_node { struct rb_node node; + const char *name; unsigned int range_min; unsigned int range_max; diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c index 0544f63ecd31..ce5129df4406 100644 --- a/drivers/base/regmap/regmap.c +++ b/drivers/base/regmap/regmap.c @@ -579,6 +579,7 @@ struct regmap *regmap_init(struct device *dev, goto err_range; } + new->name = range_cfg->name; new->range_min = range_cfg->range_min; new->range_max = range_cfg->range_max; new->selector_reg = range_cfg->selector_reg; diff --git a/include/linux/regmap.h b/include/linux/regmap.h index afc8e1a2cd1b..9f228d7f7ac4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -142,6 +142,8 @@ struct regmap_config { * 1. page selector register update; * 2. access through data window registers. * + * @name: Descriptive name for diagnostics + * * @range_min: Address of the lowest register address in virtual range. * @range_max: Address of the highest register in virtual range. * @@ -153,6 +155,8 @@ struct regmap_config { * @window_len: Number of registers in data window. */ struct regmap_range_cfg { + const char *name; + /* Registers of virtual address range */ unsigned int range_min; unsigned int range_max; -- cgit v1.2.3 From b8a6d9980f75cf5401a5ab23834eace1cb23c4f1 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Fri, 14 Sep 2012 10:35:54 +0800 Subject: dma: ipu: rename mach/ipu.h to include/linux/dma/ipu-dma.h The header ipu.h really belongs to dma subsystem rather than imx platform. Rename it to ipu-dma.h and put it into include/linux/dma/. Signed-off-by: Shawn Guo Acked-by: Guennadi Liakhovetski Acked-by: Sascha Hauer Acked-by: Arnd Bergmann Cc: Vinod Koul Cc: Florian Tobias Schandinat Cc: linux-media@vger.kernel.org Cc: linux-fbdev@vger.kernel.org --- arch/arm/mach-imx/include/mach/ipu.h | 177 ------------------------- drivers/dma/ipu/ipu_idmac.c | 3 +- drivers/dma/ipu/ipu_irq.c | 3 +- drivers/media/platform/soc_camera/mx3_camera.c | 2 +- drivers/video/mx3fb.c | 2 +- include/linux/dma/ipu-dma.h | 177 +++++++++++++++++++++++++ 6 files changed, 181 insertions(+), 183 deletions(-) delete mode 100644 arch/arm/mach-imx/include/mach/ipu.h create mode 100644 include/linux/dma/ipu-dma.h (limited to 'include/linux') diff --git a/arch/arm/mach-imx/include/mach/ipu.h b/arch/arm/mach-imx/include/mach/ipu.h deleted file mode 100644 index 539e559d18b2..000000000000 --- a/arch/arm/mach-imx/include/mach/ipu.h +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Copyright (C) 2008 - * Guennadi Liakhovetski, DENX Software Engineering, - * - * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _IPU_H_ -#define _IPU_H_ - -#include -#include - -/* IPU DMA Controller channel definitions. */ -enum ipu_channel { - IDMAC_IC_0 = 0, /* IC (encoding task) to memory */ - IDMAC_IC_1 = 1, /* IC (viewfinder task) to memory */ - IDMAC_ADC_0 = 1, - IDMAC_IC_2 = 2, - IDMAC_ADC_1 = 2, - IDMAC_IC_3 = 3, - IDMAC_IC_4 = 4, - IDMAC_IC_5 = 5, - IDMAC_IC_6 = 6, - IDMAC_IC_7 = 7, /* IC (sensor data) to memory */ - IDMAC_IC_8 = 8, - IDMAC_IC_9 = 9, - IDMAC_IC_10 = 10, - IDMAC_IC_11 = 11, - IDMAC_IC_12 = 12, - IDMAC_IC_13 = 13, - IDMAC_SDC_0 = 14, /* Background synchronous display data */ - IDMAC_SDC_1 = 15, /* Foreground data (overlay) */ - IDMAC_SDC_2 = 16, - IDMAC_SDC_3 = 17, - IDMAC_ADC_2 = 18, - IDMAC_ADC_3 = 19, - IDMAC_ADC_4 = 20, - IDMAC_ADC_5 = 21, - IDMAC_ADC_6 = 22, - IDMAC_ADC_7 = 23, - IDMAC_PF_0 = 24, - IDMAC_PF_1 = 25, - IDMAC_PF_2 = 26, - IDMAC_PF_3 = 27, - IDMAC_PF_4 = 28, - IDMAC_PF_5 = 29, - IDMAC_PF_6 = 30, - IDMAC_PF_7 = 31, -}; - -/* Order significant! */ -enum ipu_channel_status { - IPU_CHANNEL_FREE, - IPU_CHANNEL_INITIALIZED, - IPU_CHANNEL_READY, - IPU_CHANNEL_ENABLED, -}; - -#define IPU_CHANNELS_NUM 32 - -enum pixel_fmt { - /* 1 byte */ - IPU_PIX_FMT_GENERIC, - IPU_PIX_FMT_RGB332, - IPU_PIX_FMT_YUV420P, - IPU_PIX_FMT_YUV422P, - IPU_PIX_FMT_YUV420P2, - IPU_PIX_FMT_YVU422P, - /* 2 bytes */ - IPU_PIX_FMT_RGB565, - IPU_PIX_FMT_RGB666, - IPU_PIX_FMT_BGR666, - IPU_PIX_FMT_YUYV, - IPU_PIX_FMT_UYVY, - /* 3 bytes */ - IPU_PIX_FMT_RGB24, - IPU_PIX_FMT_BGR24, - /* 4 bytes */ - IPU_PIX_FMT_GENERIC_32, - IPU_PIX_FMT_RGB32, - IPU_PIX_FMT_BGR32, - IPU_PIX_FMT_ABGR32, - IPU_PIX_FMT_BGRA32, - IPU_PIX_FMT_RGBA32, -}; - -enum ipu_color_space { - IPU_COLORSPACE_RGB, - IPU_COLORSPACE_YCBCR, - IPU_COLORSPACE_YUV -}; - -/* - * Enumeration of IPU rotation modes - */ -enum ipu_rotate_mode { - /* Note the enum values correspond to BAM value */ - IPU_ROTATE_NONE = 0, - IPU_ROTATE_VERT_FLIP = 1, - IPU_ROTATE_HORIZ_FLIP = 2, - IPU_ROTATE_180 = 3, - IPU_ROTATE_90_RIGHT = 4, - IPU_ROTATE_90_RIGHT_VFLIP = 5, - IPU_ROTATE_90_RIGHT_HFLIP = 6, - IPU_ROTATE_90_LEFT = 7, -}; - -/* - * Enumeration of DI ports for ADC. - */ -enum display_port { - DISP0, - DISP1, - DISP2, - DISP3 -}; - -struct idmac_video_param { - unsigned short in_width; - unsigned short in_height; - uint32_t in_pixel_fmt; - unsigned short out_width; - unsigned short out_height; - uint32_t out_pixel_fmt; - unsigned short out_stride; - bool graphics_combine_en; - bool global_alpha_en; - bool key_color_en; - enum display_port disp; - unsigned short out_left; - unsigned short out_top; -}; - -/* - * Union of initialization parameters for a logical channel. So far only video - * parameters are used. - */ -union ipu_channel_param { - struct idmac_video_param video; -}; - -struct idmac_tx_desc { - struct dma_async_tx_descriptor txd; - struct scatterlist *sg; /* scatterlist for this */ - unsigned int sg_len; /* tx-descriptor. */ - struct list_head list; -}; - -struct idmac_channel { - struct dma_chan dma_chan; - dma_cookie_t completed; /* last completed cookie */ - union ipu_channel_param params; - enum ipu_channel link; /* input channel, linked to the output */ - enum ipu_channel_status status; - void *client; /* Only one client per channel */ - unsigned int n_tx_desc; - struct idmac_tx_desc *desc; /* allocated tx-descriptors */ - struct scatterlist *sg[2]; /* scatterlist elements in buffer-0 and -1 */ - struct list_head free_list; /* free tx-descriptors */ - struct list_head queue; /* queued tx-descriptors */ - spinlock_t lock; /* protects sg[0,1], queue */ - struct mutex chan_mutex; /* protects status, cookie, free_list */ - bool sec_chan_en; - int active_buffer; - unsigned int eof_irq; - char eof_name[16]; /* EOF IRQ name for request_irq() */ -}; - -#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd) -#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan) - -#endif diff --git a/drivers/dma/ipu/ipu_idmac.c b/drivers/dma/ipu/ipu_idmac.c index c7573e50aa14..65855373cee6 100644 --- a/drivers/dma/ipu/ipu_idmac.c +++ b/drivers/dma/ipu/ipu_idmac.c @@ -22,8 +22,7 @@ #include #include #include - -#include +#include #include "../dmaengine.h" #include "ipu_intern.h" diff --git a/drivers/dma/ipu/ipu_irq.c b/drivers/dma/ipu/ipu_irq.c index fa95bcc3de1f..a5ee37d5320f 100644 --- a/drivers/dma/ipu/ipu_irq.c +++ b/drivers/dma/ipu/ipu_irq.c @@ -15,8 +15,7 @@ #include #include #include - -#include +#include #include "ipu_intern.h" diff --git a/drivers/media/platform/soc_camera/mx3_camera.c b/drivers/media/platform/soc_camera/mx3_camera.c index 3557ac97e430..64d39b1b5582 100644 --- a/drivers/media/platform/soc_camera/mx3_camera.c +++ b/drivers/media/platform/soc_camera/mx3_camera.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -24,7 +25,6 @@ #include #include -#include #include #include diff --git a/drivers/video/mx3fb.c b/drivers/video/mx3fb.c index ce1d452464ed..0324c07be895 100644 --- a/drivers/video/mx3fb.c +++ b/drivers/video/mx3fb.c @@ -26,10 +26,10 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/include/linux/dma/ipu-dma.h b/include/linux/dma/ipu-dma.h new file mode 100644 index 000000000000..18031115c668 --- /dev/null +++ b/include/linux/dma/ipu-dma.h @@ -0,0 +1,177 @@ +/* + * Copyright (C) 2008 + * Guennadi Liakhovetski, DENX Software Engineering, + * + * Copyright (C) 2005-2007 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_DMA_IPU_DMA_H +#define __LINUX_DMA_IPU_DMA_H + +#include +#include + +/* IPU DMA Controller channel definitions. */ +enum ipu_channel { + IDMAC_IC_0 = 0, /* IC (encoding task) to memory */ + IDMAC_IC_1 = 1, /* IC (viewfinder task) to memory */ + IDMAC_ADC_0 = 1, + IDMAC_IC_2 = 2, + IDMAC_ADC_1 = 2, + IDMAC_IC_3 = 3, + IDMAC_IC_4 = 4, + IDMAC_IC_5 = 5, + IDMAC_IC_6 = 6, + IDMAC_IC_7 = 7, /* IC (sensor data) to memory */ + IDMAC_IC_8 = 8, + IDMAC_IC_9 = 9, + IDMAC_IC_10 = 10, + IDMAC_IC_11 = 11, + IDMAC_IC_12 = 12, + IDMAC_IC_13 = 13, + IDMAC_SDC_0 = 14, /* Background synchronous display data */ + IDMAC_SDC_1 = 15, /* Foreground data (overlay) */ + IDMAC_SDC_2 = 16, + IDMAC_SDC_3 = 17, + IDMAC_ADC_2 = 18, + IDMAC_ADC_3 = 19, + IDMAC_ADC_4 = 20, + IDMAC_ADC_5 = 21, + IDMAC_ADC_6 = 22, + IDMAC_ADC_7 = 23, + IDMAC_PF_0 = 24, + IDMAC_PF_1 = 25, + IDMAC_PF_2 = 26, + IDMAC_PF_3 = 27, + IDMAC_PF_4 = 28, + IDMAC_PF_5 = 29, + IDMAC_PF_6 = 30, + IDMAC_PF_7 = 31, +}; + +/* Order significant! */ +enum ipu_channel_status { + IPU_CHANNEL_FREE, + IPU_CHANNEL_INITIALIZED, + IPU_CHANNEL_READY, + IPU_CHANNEL_ENABLED, +}; + +#define IPU_CHANNELS_NUM 32 + +enum pixel_fmt { + /* 1 byte */ + IPU_PIX_FMT_GENERIC, + IPU_PIX_FMT_RGB332, + IPU_PIX_FMT_YUV420P, + IPU_PIX_FMT_YUV422P, + IPU_PIX_FMT_YUV420P2, + IPU_PIX_FMT_YVU422P, + /* 2 bytes */ + IPU_PIX_FMT_RGB565, + IPU_PIX_FMT_RGB666, + IPU_PIX_FMT_BGR666, + IPU_PIX_FMT_YUYV, + IPU_PIX_FMT_UYVY, + /* 3 bytes */ + IPU_PIX_FMT_RGB24, + IPU_PIX_FMT_BGR24, + /* 4 bytes */ + IPU_PIX_FMT_GENERIC_32, + IPU_PIX_FMT_RGB32, + IPU_PIX_FMT_BGR32, + IPU_PIX_FMT_ABGR32, + IPU_PIX_FMT_BGRA32, + IPU_PIX_FMT_RGBA32, +}; + +enum ipu_color_space { + IPU_COLORSPACE_RGB, + IPU_COLORSPACE_YCBCR, + IPU_COLORSPACE_YUV +}; + +/* + * Enumeration of IPU rotation modes + */ +enum ipu_rotate_mode { + /* Note the enum values correspond to BAM value */ + IPU_ROTATE_NONE = 0, + IPU_ROTATE_VERT_FLIP = 1, + IPU_ROTATE_HORIZ_FLIP = 2, + IPU_ROTATE_180 = 3, + IPU_ROTATE_90_RIGHT = 4, + IPU_ROTATE_90_RIGHT_VFLIP = 5, + IPU_ROTATE_90_RIGHT_HFLIP = 6, + IPU_ROTATE_90_LEFT = 7, +}; + +/* + * Enumeration of DI ports for ADC. + */ +enum display_port { + DISP0, + DISP1, + DISP2, + DISP3 +}; + +struct idmac_video_param { + unsigned short in_width; + unsigned short in_height; + uint32_t in_pixel_fmt; + unsigned short out_width; + unsigned short out_height; + uint32_t out_pixel_fmt; + unsigned short out_stride; + bool graphics_combine_en; + bool global_alpha_en; + bool key_color_en; + enum display_port disp; + unsigned short out_left; + unsigned short out_top; +}; + +/* + * Union of initialization parameters for a logical channel. So far only video + * parameters are used. + */ +union ipu_channel_param { + struct idmac_video_param video; +}; + +struct idmac_tx_desc { + struct dma_async_tx_descriptor txd; + struct scatterlist *sg; /* scatterlist for this */ + unsigned int sg_len; /* tx-descriptor. */ + struct list_head list; +}; + +struct idmac_channel { + struct dma_chan dma_chan; + dma_cookie_t completed; /* last completed cookie */ + union ipu_channel_param params; + enum ipu_channel link; /* input channel, linked to the output */ + enum ipu_channel_status status; + void *client; /* Only one client per channel */ + unsigned int n_tx_desc; + struct idmac_tx_desc *desc; /* allocated tx-descriptors */ + struct scatterlist *sg[2]; /* scatterlist elements in buffer-0 and -1 */ + struct list_head free_list; /* free tx-descriptors */ + struct list_head queue; /* queued tx-descriptors */ + spinlock_t lock; /* protects sg[0,1], queue */ + struct mutex chan_mutex; /* protects status, cookie, free_list */ + bool sec_chan_en; + int active_buffer; + unsigned int eof_irq; + char eof_name[16]; /* EOF IRQ name for request_irq() */ +}; + +#define to_tx_desc(tx) container_of(tx, struct idmac_tx_desc, txd) +#define to_idmac_chan(c) container_of(c, struct idmac_channel, dma_chan) + +#endif /* __LINUX_DMA_IPU_DMA_H */ -- cgit v1.2.3 From e51d0f0ac4b7f513808743c6a62f0387eebd0144 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sat, 15 Sep 2012 21:11:28 +0800 Subject: dma: imx-dma: remove cpu_is_xxx by using platform_device_id It changes the driver to use platform_device_id rather than cpu_is_xxx to determine the controller type, and updates the platform code accordingly. As the result, mach/hardware.h inclusion gets removed from the driver. Signed-off-by: Shawn Guo Signed-off-by: Sascha Hauer Acked-by: Arnd Bergmann Tested-by: Javier Martin Cc: Vinod Koul --- arch/arm/mach-imx/clk-imx1.c | 3 +- arch/arm/mach-imx/clk-imx21.c | 4 +-- arch/arm/mach-imx/clk-imx27.c | 4 +-- arch/arm/mach-imx/devices/devices-common.h | 2 +- arch/arm/mach-imx/devices/platform-imx-dma.c | 4 +-- arch/arm/mach-imx/mm-imx1.c | 3 +- arch/arm/mach-imx/mm-imx21.c | 3 +- arch/arm/mach-imx/mm-imx27.c | 3 +- drivers/dma/imx-dma.c | 54 ++++++++++++++++++++++++---- include/linux/platform_data/dma-imx.h | 4 ++- 10 files changed, 66 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-imx/clk-imx1.c b/arch/arm/mach-imx/clk-imx1.c index 1cc7a735ed19..49a74440bc48 100644 --- a/arch/arm/mach-imx/clk-imx1.c +++ b/arch/arm/mach-imx/clk-imx1.c @@ -83,7 +83,8 @@ int __init mx1_clocks_init(unsigned long fref) pr_err("imx1 clk %d: register failed with %ld\n", i, PTR_ERR(clk[i])); - clk_register_clkdev(clk[dma_gate], "ahb", "imx-dma"); + clk_register_clkdev(clk[dma_gate], "ahb", "imx1-dma"); + clk_register_clkdev(clk[hclk], "ipg", "imx1-dma"); clk_register_clkdev(clk[csi_gate], NULL, "mx1-camera.0"); clk_register_clkdev(clk[mma_gate], "mma", NULL); clk_register_clkdev(clk[usbd_gate], NULL, "imx_udc.0"); diff --git a/arch/arm/mach-imx/clk-imx21.c b/arch/arm/mach-imx/clk-imx21.c index 96a4788032d1..8aec572f84fe 100644 --- a/arch/arm/mach-imx/clk-imx21.c +++ b/arch/arm/mach-imx/clk-imx21.c @@ -163,8 +163,8 @@ int __init mx21_clocks_init(unsigned long lref, unsigned long href) clk_register_clkdev(clk[usb_gate], "per", "imx21-hcd.0"); clk_register_clkdev(clk[usb_hclk_gate], "ahb", "imx21-hcd.0"); clk_register_clkdev(clk[nfc_gate], NULL, "imx21-nand.0"); - clk_register_clkdev(clk[dma_hclk_gate], "ahb", "imx-dma"); - clk_register_clkdev(clk[dma_gate], "ipg", "imx-dma"); + clk_register_clkdev(clk[dma_hclk_gate], "ahb", "imx21-dma"); + clk_register_clkdev(clk[dma_gate], "ipg", "imx21-dma"); clk_register_clkdev(clk[wdog_gate], NULL, "imx2-wdt.0"); clk_register_clkdev(clk[i2c_gate], NULL, "imx21-i2c.0"); clk_register_clkdev(clk[kpp_gate], NULL, "mxc-keypad"); diff --git a/arch/arm/mach-imx/clk-imx27.c b/arch/arm/mach-imx/clk-imx27.c index 86a0c4262471..a5f0e3b6dec6 100644 --- a/arch/arm/mach-imx/clk-imx27.c +++ b/arch/arm/mach-imx/clk-imx27.c @@ -242,8 +242,8 @@ int __init mx27_clocks_init(unsigned long fref) clk_register_clkdev(clk[nfc_baud_gate], NULL, "imx27-nand.0"); clk_register_clkdev(clk[vpu_baud_gate], "per", "coda-imx27.0"); clk_register_clkdev(clk[vpu_ahb_gate], "ahb", "coda-imx27.0"); - clk_register_clkdev(clk[dma_ahb_gate], "ahb", "imx-dma"); - clk_register_clkdev(clk[dma_ipg_gate], "ipg", "imx-dma"); + clk_register_clkdev(clk[dma_ahb_gate], "ahb", "imx27-dma"); + clk_register_clkdev(clk[dma_ipg_gate], "ipg", "imx27-dma"); clk_register_clkdev(clk[fec_ipg_gate], "ipg", "imx27-fec.0"); clk_register_clkdev(clk[fec_ahb_gate], "ahb", "imx27-fec.0"); clk_register_clkdev(clk[wdog_ipg_gate], NULL, "imx2-wdt.0"); diff --git a/arch/arm/mach-imx/devices/devices-common.h b/arch/arm/mach-imx/devices/devices-common.h index e4368f6c5fb4..36eb3f09f5d7 100644 --- a/arch/arm/mach-imx/devices/devices-common.h +++ b/arch/arm/mach-imx/devices/devices-common.h @@ -329,7 +329,7 @@ struct platform_device *__init imx_add_spi_imx( const struct imx_spi_imx_data *data, const struct spi_imx_master *pdata); -struct platform_device *imx_add_imx_dma(resource_size_t iobase, +struct platform_device *imx_add_imx_dma(char *name, resource_size_t iobase, int irq, int irq_err); struct platform_device *imx_add_imx_sdma(char *name, resource_size_t iobase, int irq, struct sdma_platform_data *pdata); diff --git a/arch/arm/mach-imx/devices/platform-imx-dma.c b/arch/arm/mach-imx/devices/platform-imx-dma.c index f9003e4d0f57..ccdb5dc4ddbd 100644 --- a/arch/arm/mach-imx/devices/platform-imx-dma.c +++ b/arch/arm/mach-imx/devices/platform-imx-dma.c @@ -8,7 +8,7 @@ */ #include "devices-common.h" -struct platform_device __init __maybe_unused *imx_add_imx_dma( +struct platform_device __init __maybe_unused *imx_add_imx_dma(char *name, resource_size_t iobase, int irq, int irq_err) { struct resource res[] = { @@ -28,7 +28,7 @@ struct platform_device __init __maybe_unused *imx_add_imx_dma( }; return platform_device_register_resndata(&mxc_ahb_bus, - "imx-dma", -1, res, ARRAY_SIZE(res), NULL, 0); + name, -1, res, ARRAY_SIZE(res), NULL, 0); } struct platform_device __init __maybe_unused *imx_add_imx_sdma(char *name, diff --git a/arch/arm/mach-imx/mm-imx1.c b/arch/arm/mach-imx/mm-imx1.c index 9de81bf6de96..79f6c0b8f69f 100644 --- a/arch/arm/mach-imx/mm-imx1.c +++ b/arch/arm/mach-imx/mm-imx1.c @@ -60,6 +60,7 @@ void __init imx1_soc_init(void) MX1_GPIO_INT_PORTC, 0); mxc_register_gpio("imx1-gpio", 3, MX1_GPIO4_BASE_ADDR, SZ_256, MX1_GPIO_INT_PORTD, 0); - imx_add_imx_dma(MX1_DMA_BASE_ADDR, MX1_DMA_INT, MX1_DMA_ERR); + imx_add_imx_dma("imx1-dma", MX1_DMA_BASE_ADDR, + MX1_DMA_INT, MX1_DMA_ERR); pinctrl_provide_dummies(); } diff --git a/arch/arm/mach-imx/mm-imx21.c b/arch/arm/mach-imx/mm-imx21.c index 1c295154c296..3b97ea63b5fb 100644 --- a/arch/arm/mach-imx/mm-imx21.c +++ b/arch/arm/mach-imx/mm-imx21.c @@ -90,7 +90,8 @@ void __init imx21_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX21_GPIO6_BASE_ADDR, SZ_256, MX21_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma(MX21_DMA_BASE_ADDR, MX21_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx21-dma", MX21_DMA_BASE_ADDR, + MX21_INT_DMACH0, 0); /* No ERR irq */ platform_device_register_simple("imx21-audmux", 0, imx21_audmux_res, ARRAY_SIZE(imx21_audmux_res)); } diff --git a/arch/arm/mach-imx/mm-imx27.c b/arch/arm/mach-imx/mm-imx27.c index d389f4af33c0..91e8da832810 100644 --- a/arch/arm/mach-imx/mm-imx27.c +++ b/arch/arm/mach-imx/mm-imx27.c @@ -91,7 +91,8 @@ void __init imx27_soc_init(void) mxc_register_gpio("imx21-gpio", 5, MX27_GPIO6_BASE_ADDR, SZ_256, MX27_INT_GPIO, 0); pinctrl_provide_dummies(); - imx_add_imx_dma(MX27_DMA_BASE_ADDR, MX27_INT_DMACH0, 0); /* No ERR irq */ + imx_add_imx_dma("imx27-dma", MX27_DMA_BASE_ADDR, + MX27_INT_DMACH0, 0); /* No ERR irq */ /* imx27 has the imx21 type audmux */ platform_device_register_simple("imx21-audmux", 0, imx27_audmux_res, ARRAY_SIZE(imx27_audmux_res)); diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 88e8a8d89b56..a3a8270e76fb 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -29,7 +29,6 @@ #include #include -#include #include "dmaengine.h" #define IMXDMA_MAX_CHAN_DESCRIPTORS 16 @@ -167,6 +166,12 @@ struct imxdma_channel { int slot_2d; }; +enum imx_dma_type { + IMX1_DMA, + IMX21_DMA, + IMX27_DMA, +}; + struct imxdma_engine { struct device *dev; struct device_dma_parameters dma_parms; @@ -177,8 +182,40 @@ struct imxdma_engine { spinlock_t lock; struct imx_dma_2d_config slots_2d[IMX_DMA_2D_SLOTS]; struct imxdma_channel channel[IMX_DMA_CHANNELS]; + enum imx_dma_type devtype; }; +static struct platform_device_id imx_dma_devtype[] = { + { + .name = "imx1-dma", + .driver_data = IMX1_DMA, + }, { + .name = "imx21-dma", + .driver_data = IMX21_DMA, + }, { + .name = "imx27-dma", + .driver_data = IMX27_DMA, + }, { + /* sentinel */ + } +}; +MODULE_DEVICE_TABLE(platform, imx_dma_devtype); + +static inline int is_imx1_dma(struct imxdma_engine *imxdma) +{ + return imxdma->devtype == IMX1_DMA; +} + +static inline int is_imx21_dma(struct imxdma_engine *imxdma) +{ + return imxdma->devtype == IMX21_DMA; +} + +static inline int is_imx27_dma(struct imxdma_engine *imxdma) +{ + return imxdma->devtype == IMX27_DMA; +} + static struct imxdma_channel *to_imxdma_chan(struct dma_chan *chan) { return container_of(chan, struct imxdma_channel, chan); @@ -212,7 +249,9 @@ static unsigned imx_dmav1_readl(struct imxdma_engine *imxdma, unsigned offset) static int imxdma_hw_chain(struct imxdma_channel *imxdmac) { - if (cpu_is_mx27()) + struct imxdma_engine *imxdma = imxdmac->imxdma; + + if (is_imx27_dma(imxdma)) return imxdmac->hw_chaining; else return 0; @@ -267,7 +306,7 @@ static void imxdma_enable_hw(struct imxdma_desc *d) imx_dmav1_writel(imxdma, imx_dmav1_readl(imxdma, DMA_CCR(channel)) | CCR_CEN | CCR_ACRPT, DMA_CCR(channel)); - if ((cpu_is_mx21() || cpu_is_mx27()) && + if (!is_imx1_dma(imxdma) && d->sg && imxdma_hw_chain(imxdmac)) { d->sg = sg_next(d->sg); if (d->sg) { @@ -436,7 +475,7 @@ static irqreturn_t dma_irq_handler(int irq, void *dev_id) struct imxdma_engine *imxdma = dev_id; int i, disr; - if (cpu_is_mx21() || cpu_is_mx27()) + if (!is_imx1_dma(imxdma)) imxdma_err_handler(irq, dev_id); disr = imx_dmav1_readl(imxdma, DMA_DISR); @@ -967,6 +1006,8 @@ static int __init imxdma_probe(struct platform_device *pdev) if (!imxdma) return -ENOMEM; + imxdma->devtype = pdev->id_entry->driver_data; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); imxdma->base = devm_request_and_ioremap(&pdev->dev, res); if (!imxdma->base) @@ -990,7 +1031,7 @@ static int __init imxdma_probe(struct platform_device *pdev) /* reset DMA module */ imx_dmav1_writel(imxdma, DCR_DRST, DMA_DCR); - if (cpu_is_mx1()) { + if (is_imx1_dma(imxdma)) { ret = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0, "DMA", imxdma); if (ret) { @@ -1038,7 +1079,7 @@ static int __init imxdma_probe(struct platform_device *pdev) for (i = 0; i < IMX_DMA_CHANNELS; i++) { struct imxdma_channel *imxdmac = &imxdma->channel[i]; - if (cpu_is_mx21() || cpu_is_mx27()) { + if (!is_imx1_dma(imxdma)) { ret = devm_request_irq(&pdev->dev, irq + i, dma_irq_handler, 0, "DMA", imxdma); if (ret) { @@ -1118,6 +1159,7 @@ static struct platform_driver imxdma_driver = { .driver = { .name = "imx-dma", }, + .id_table = imx_dma_devtype, .remove = __exit_p(imxdma_remove), }; diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h index 1b9080385b46..f6d30cc1cb77 100644 --- a/include/linux/platform_data/dma-imx.h +++ b/include/linux/platform_data/dma-imx.h @@ -61,7 +61,9 @@ static inline int imx_dma_is_ipu(struct dma_chan *chan) static inline int imx_dma_is_general_purpose(struct dma_chan *chan) { return strstr(dev_name(chan->device->dev), "sdma") || - !strcmp(dev_name(chan->device->dev), "imx-dma"); + !strcmp(dev_name(chan->device->dev), "imx1-dma") || + !strcmp(dev_name(chan->device->dev), "imx21-dma") || + !strcmp(dev_name(chan->device->dev), "imx27-dma"); } #endif -- cgit v1.2.3 From 1e66210a3135b544713a9455e78e36f6f8d1bf77 Mon Sep 17 00:00:00 2001 From: Shawn Guo Date: Sun, 16 Sep 2012 22:16:44 +0800 Subject: ARM: imx: remove header file mach/irqs.h The only mach/irqs.h user outside arch/arm/mach-imx is sound/soc/fsl/imx-pcm-fiq.c, which refers to mxc_set_irq_fiq(). Move the declaration into include/linux/platform_data/asoc-imx-ssi.h, so that we can remove mach/irqs.h includsion from imx-pcm-fiq.c. Inside arch/arm/mach-imx, the only users to mach/irqs.h are avic.c and tzic.c for referring to macro FIQ_START. Let's move the macro into irq-common.h and get rid of mach/irqs.h completely. Signed-off-by: Shawn Guo Acked-by: Sascha Hauer Acked-by: Arnd Bergmann Acked-by: Mark Brown Cc: alsa-devel@alsa-project.org --- arch/arm/mach-imx/avic.c | 1 - arch/arm/mach-imx/include/mach/irqs.h | 21 --------------------- arch/arm/mach-imx/irq-common.h | 3 +++ arch/arm/mach-imx/tzic.c | 2 -- include/linux/platform_data/asoc-imx-ssi.h | 2 ++ sound/soc/fsl/imx-pcm-fiq.c | 1 - 6 files changed, 5 insertions(+), 25 deletions(-) delete mode 100644 arch/arm/mach-imx/include/mach/irqs.h (limited to 'include/linux') diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c index adc64bc5b343..0eff23ed92b9 100644 --- a/arch/arm/mach-imx/avic.c +++ b/arch/arm/mach-imx/avic.c @@ -24,7 +24,6 @@ #include #include #include -#include #include "common.h" #include "hardware.h" diff --git a/arch/arm/mach-imx/include/mach/irqs.h b/arch/arm/mach-imx/include/mach/irqs.h deleted file mode 100644 index d73f5e8ea9cb..000000000000 --- a/arch/arm/mach-imx/include/mach/irqs.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * Copyright 2004-2007 Freescale Semiconductor, Inc. All Rights Reserved. - */ - -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __ASM_ARCH_MXC_IRQS_H__ -#define __ASM_ARCH_MXC_IRQS_H__ - -extern int imx_irq_set_priority(unsigned char irq, unsigned char prio); - -/* all normal IRQs can be FIQs */ -#define FIQ_START 0 -/* switch between IRQ and FIQ */ -extern int mxc_set_irq_fiq(unsigned int irq, unsigned int type); - -#endif /* __ASM_ARCH_MXC_IRQS_H__ */ diff --git a/arch/arm/mach-imx/irq-common.h b/arch/arm/mach-imx/irq-common.h index 6ccb3a14c693..5b2dabba330f 100644 --- a/arch/arm/mach-imx/irq-common.h +++ b/arch/arm/mach-imx/irq-common.h @@ -19,6 +19,9 @@ #ifndef __PLAT_MXC_IRQ_COMMON_H__ #define __PLAT_MXC_IRQ_COMMON_H__ +/* all normal IRQs can be FIQs */ +#define FIQ_START 0 + struct mxc_extra_irq { int (*set_priority)(unsigned char irq, unsigned char prio); diff --git a/arch/arm/mach-imx/tzic.c b/arch/arm/mach-imx/tzic.c index c7625b4a916b..9721161f208f 100644 --- a/arch/arm/mach-imx/tzic.c +++ b/arch/arm/mach-imx/tzic.c @@ -21,8 +21,6 @@ #include #include -#include - #include "common.h" #include "hardware.h" #include "irq-common.h" diff --git a/include/linux/platform_data/asoc-imx-ssi.h b/include/linux/platform_data/asoc-imx-ssi.h index 63f3c2804239..92c7fd72f636 100644 --- a/include/linux/platform_data/asoc-imx-ssi.h +++ b/include/linux/platform_data/asoc-imx-ssi.h @@ -17,5 +17,7 @@ struct imx_ssi_platform_data { void (*ac97_warm_reset)(struct snd_ac97 *ac97); }; +extern int mxc_set_irq_fiq(unsigned int irq, unsigned int type); + #endif /* __MACH_SSI_H */ diff --git a/sound/soc/fsl/imx-pcm-fiq.c b/sound/soc/fsl/imx-pcm-fiq.c index 22c6130957ba..9ffc9e66308f 100644 --- a/sound/soc/fsl/imx-pcm-fiq.c +++ b/sound/soc/fsl/imx-pcm-fiq.c @@ -29,7 +29,6 @@ #include -#include #include #include "imx-ssi.h" -- cgit v1.2.3 From c0cc3f1665256b7cfdc1d581f997dcea1af71405 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 28 Sep 2012 16:50:15 +0100 Subject: ASoC: wm8994: Allow a delay between jack insertion and microphone detect This can be used to provide some additional settling time to ensure that we don't start microphone detection while the microphone pin is connected to one of the headphone pins. Signed-off-by: Mark Brown --- include/linux/mfd/wm8994/pdata.h | 5 +++ sound/soc/codecs/wm8994.c | 74 +++++++++++++++++++++++++++++----------- 2 files changed, 60 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h index fc87be4fdc25..8e21a094836d 100644 --- a/include/linux/mfd/wm8994/pdata.h +++ b/include/linux/mfd/wm8994/pdata.h @@ -176,6 +176,11 @@ struct wm8994_pdata { unsigned int lineout1fb:1; unsigned int lineout2fb:1; + /* Delay between detecting a jack and starting microphone + * detect (specified in ms) + */ + int micdet_delay; + /* IRQ for microphone detection if brought out directly as a * signal. */ diff --git a/sound/soc/codecs/wm8994.c b/sound/soc/codecs/wm8994.c index 2b2dadc54dac..07095a9ca9a6 100644 --- a/sound/soc/codecs/wm8994.c +++ b/sound/soc/codecs/wm8994.c @@ -3470,11 +3470,46 @@ static void wm8958_default_micdet(u16 status, void *data) } } +/* Deferred mic detection to allow for extra settling time */ +static void wm1811_mic_work(struct work_struct *work) +{ + struct wm8994_priv *wm8994 = container_of(work, struct wm8994_priv, + mic_work.work); + struct snd_soc_codec *codec = wm8994->hubs.codec; + + pm_runtime_get_sync(codec->dev); + + /* If required for an external cap force MICBIAS on */ + if (wm8994->pdata->jd_ext_cap) { + snd_soc_dapm_force_enable_pin(&codec->dapm, + "MICBIAS2"); + snd_soc_dapm_sync(&codec->dapm); + } + + mutex_lock(&wm8994->accdet_lock); + + dev_dbg(codec->dev, "Starting mic detection\n"); + + /* + * Start off measument of microphone impedence to find out + * what's actually there. + */ + wm8994->mic_detecting = true; + wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC); + + snd_soc_update_bits(codec, WM8958_MIC_DETECT_1, + WM8958_MICD_ENA, WM8958_MICD_ENA); + + mutex_unlock(&wm8994->accdet_lock); + + pm_runtime_put(codec->dev); +} + static irqreturn_t wm1811_jackdet_irq(int irq, void *data) { struct wm8994_priv *wm8994 = data; struct snd_soc_codec *codec = wm8994->hubs.codec; - int reg; + int reg, delay; bool present; pm_runtime_get_sync(codec->dev); @@ -3505,18 +3540,14 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data) snd_soc_update_bits(codec, WM1811_JACKDET_CTRL, WM1811_JACKDET_DB, 0); - /* - * Start off measument of microphone impedence to find - * out what's actually there. - */ - wm8994->mic_detecting = true; - wm1811_jackdet_set_mode(codec, WM1811_JACKDET_MODE_MIC); - - snd_soc_update_bits(codec, WM8958_MIC_DETECT_1, - WM8958_MICD_ENA, WM8958_MICD_ENA); + delay = wm8994->pdata->micdet_delay; + schedule_delayed_work(&wm8994->mic_work, + msecs_to_jiffies(delay)); } else { dev_dbg(codec->dev, "Jack not detected\n"); + cancel_delayed_work_sync(&wm8994->mic_work); + snd_soc_update_bits(codec, WM8958_MICBIAS2, WM8958_MICB2_DISCH, WM8958_MICB2_DISCH); @@ -3533,14 +3564,9 @@ static irqreturn_t wm1811_jackdet_irq(int irq, void *data) mutex_unlock(&wm8994->accdet_lock); - /* If required for an external cap force MICBIAS on */ - if (wm8994->pdata->jd_ext_cap) { - if (present) - snd_soc_dapm_force_enable_pin(&codec->dapm, - "MICBIAS2"); - else - snd_soc_dapm_disable_pin(&codec->dapm, "MICBIAS2"); - } + /* Turn off MICBIAS if it was on for an external cap */ + if (wm8994->pdata->jd_ext_cap && !present) + snd_soc_dapm_disable_pin(&codec->dapm, "MICBIAS2"); if (present) snd_soc_jack_report(wm8994->micdet[0].jack, @@ -3763,10 +3789,20 @@ static int wm8994_codec_probe(struct snd_soc_codec *codec) snd_soc_codec_set_cache_io(codec, 16, 16, SND_SOC_REGMAP); mutex_init(&wm8994->accdet_lock); - INIT_DELAYED_WORK(&wm8994->mic_work, wm8994_mic_work); INIT_DELAYED_WORK(&wm8994->jackdet_bootstrap, wm1811_jackdet_bootstrap); + switch (control->type) { + case WM8994: + INIT_DELAYED_WORK(&wm8994->mic_work, wm8994_mic_work); + break; + case WM1811: + INIT_DELAYED_WORK(&wm8994->mic_work, wm1811_mic_work); + break; + default: + break; + } + for (i = 0; i < ARRAY_SIZE(wm8994->fll_locked); i++) init_completion(&wm8994->fll_locked[i]); -- cgit v1.2.3 From 0c57067430a2b729bc08c92b17eb4f29d9bbfaae Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Sat, 6 Oct 2012 20:47:46 +0530 Subject: regulator: tps51632: Add tps51632 regulator driver The TPS51632 is a driverless step down controller with serial control. Advanced features such as D-Cap+ architecture with overlapping pulse support and OSR overshoot reduction provide fast transient response, lowest output capacitance and high efficiency. The TPS51632 supports both I2C and DVFS interfaces (through PWM) for dynamic control of the output voltage and current monitor telemetry. Add regulator driver for TPS51632. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- drivers/regulator/Kconfig | 11 + drivers/regulator/Makefile | 1 + drivers/regulator/tps51632-regulator.c | 332 +++++++++++++++++++++++++++ include/linux/regulator/tps51632-regulator.h | 47 ++++ 4 files changed, 391 insertions(+) create mode 100644 drivers/regulator/tps51632-regulator.c create mode 100644 include/linux/regulator/tps51632-regulator.h (limited to 'include/linux') diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig index 67d47b59a66d..aa9e8a18262d 100644 --- a/drivers/regulator/Kconfig +++ b/drivers/regulator/Kconfig @@ -335,6 +335,17 @@ config REGULATOR_PALMAS on the muxing. This is handled automatically in the driver by reading the mux info from OTP. +config REGULATOR_TPS51632 + tristate "TI TPS51632 Power Regulator" + depends on I2C + select REGMAP_I2C + help + This driver supports TPS51632 voltage regulator chip. + The TPS52632 is 3-2-1 Phase D-Cap+ Step Down Driverless Controller + with Serial VID control and DVFS. + The voltage output can be configure through I2C interface or PWM + interface. + config REGULATOR_TPS6105X tristate "TI TPS6105X Power regulators" depends on TPS6105X diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile index e431eed8a878..ec1aec460bf6 100644 --- a/drivers/regulator/Makefile +++ b/drivers/regulator/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_REGULATOR_MC13783) += mc13783-regulator.o obj-$(CONFIG_REGULATOR_MC13892) += mc13892-regulator.o obj-$(CONFIG_REGULATOR_MC13XXX_CORE) += mc13xxx-regulator-core.o obj-$(CONFIG_REGULATOR_PALMAS) += palmas-regulator.o +obj-$(CONFIG_REGULATOR_TPS51632) += tps51632-regulator.o obj-$(CONFIG_REGULATOR_PCAP) += pcap-regulator.o obj-$(CONFIG_REGULATOR_PCF50633) += pcf50633-regulator.o obj-$(CONFIG_REGULATOR_RC5T583) += rc5t583-regulator.o diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c new file mode 100644 index 000000000000..34603640d6d9 --- /dev/null +++ b/drivers/regulator/tps51632-regulator.c @@ -0,0 +1,332 @@ +/* + * tps51632-regulator.c -- TI TPS51632 + * + * Regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down Driverless + * Controller with serial VID control and DVFS. + * + * Copyright (c) 2012, NVIDIA Corporation. + * + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation version 2. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any kind, + * whether express or implied; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Register definitions */ +#define TPS51632_VOLTAGE_SELECT_REG 0x0 +#define TPS51632_VOLTAGE_BASE_REG 0x1 +#define TPS51632_OFFSET_REG 0x2 +#define TPS51632_IMON_REG 0x3 +#define TPS51632_VMAX_REG 0x4 +#define TPS51632_DVFS_CONTROL_REG 0x5 +#define TPS51632_POWER_STATE_REG 0x6 +#define TPS51632_SLEW_REGS 0x7 +#define TPS51632_FAULT_REG 0x14 + +#define TPS51632_MAX_REG 0x15 + +#define TPS51632_VOUT_MASK 0x7F +#define TPS51632_VOUT_OFFSET_MASK 0x1F +#define TPS51632_VMAX_MASK 0x7F +#define TPS51632_VMAX_LOCK 0x80 + +/* TPS51632_DVFS_CONTROL_REG */ +#define TPS51632_DVFS_PWMEN 0x1 +#define TPS51632_DVFS_STEP_20 0x2 +#define TPS51632_DVFS_VMAX_PG 0x4 +#define TPS51632_DVFS_PWMRST 0x8 +#define TPS51632_DVFS_OCA_EN 0x10 +#define TPS51632_DVFS_FCCM 0x20 + +/* TPS51632_POWER_STATE_REG */ +#define TPS51632_POWER_STATE_MASK 0x03 +#define TPS51632_POWER_STATE_MULTI_PHASE_CCM 0x0 +#define TPS51632_POWER_STATE_SINGLE_PHASE_CCM 0x1 +#define TPS51632_POWER_STATE_SINGLE_PHASE_DCM 0x2 + +#define TPS51632_MIN_VOLATGE 500000 +#define TPS51632_MAX_VOLATGE 1520000 +#define TPS51632_VOLATGE_STEP_10mV 10000 +#define TPS51632_VOLATGE_STEP_20mV 20000 +#define TPS51632_MAX_VSEL 0x7F +#define TPS51632_MIN_VSEL 0x19 +#define TPS51632_DEFAULT_RAMP_DELAY 6000 +#define TPS51632_VOLT_VSEL(uV) \ + (DIV_ROUND_UP(uV - TPS51632_MIN_VOLATGE, \ + TPS51632_VOLATGE_STEP_10mV) + \ + TPS51632_MIN_VSEL) + +/* TPS51632 chip information */ +struct tps51632_chip { + struct device *dev; + struct regulator_desc desc; + struct regulator_dev *rdev; + struct regmap *regmap; + bool enable_pwm_dvfs; +}; + +static int tps51632_dcdc_get_voltage_sel(struct regulator_dev *rdev) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + unsigned int data; + int ret; + unsigned int reg = TPS51632_VOLTAGE_SELECT_REG; + int vsel; + + if (tps->enable_pwm_dvfs) + reg = TPS51632_VOLTAGE_BASE_REG; + + ret = regmap_read(tps->regmap, reg, &data); + if (ret < 0) { + dev_err(tps->dev, "reg read failed, err %d\n", ret); + return ret; + } + + vsel = data & TPS51632_VOUT_MASK; + + if (vsel < TPS51632_MIN_VSEL) + return 0; + else + return vsel - TPS51632_MIN_VSEL; +} + +static int tps51632_dcdc_set_voltage_sel(struct regulator_dev *rdev, + unsigned selector) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + int vsel; + int ret; + unsigned int reg = TPS51632_VOLTAGE_SELECT_REG; + + if (tps->enable_pwm_dvfs) + reg = TPS51632_VOLTAGE_BASE_REG; + + vsel = selector + TPS51632_MIN_VSEL; + if (vsel > TPS51632_MAX_VSEL) + return -EINVAL; + + ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_SELECT_REG, vsel); + if (ret < 0) + dev_err(tps->dev, "reg write failed, err %d\n", ret); + return ret; +} + +static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev, + int ramp_delay) +{ + struct tps51632_chip *tps = rdev_get_drvdata(rdev); + int bit = ramp_delay/6000; + int ret; + + if (bit) + bit--; + ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit)); + if (ret < 0) + dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret); + return ret; +} + +static struct regulator_ops tps51632_dcdc_ops = { + .get_voltage_sel = tps51632_dcdc_get_voltage_sel, + .set_voltage_sel = tps51632_dcdc_set_voltage_sel, + .list_voltage = regulator_list_voltage_linear, + .set_voltage_time_sel = regulator_set_voltage_time_sel, + .set_ramp_delay = tps51632_dcdc_set_ramp_delay, +}; + +static int __devinit tps51632_init_dcdc(struct tps51632_chip *tps, + struct tps51632_regulator_platform_data *pdata) +{ + int ret; + uint8_t control = 0; + int vsel; + + if (!pdata->enable_pwm_dvfs) + goto skip_pwm_config; + + control |= TPS51632_DVFS_PWMEN; + tps->enable_pwm_dvfs = pdata->enable_pwm_dvfs; + vsel = TPS51632_VOLT_VSEL(pdata->base_voltage_uV); + ret = regmap_write(tps->regmap, TPS51632_VOLTAGE_BASE_REG, vsel); + if (ret < 0) { + dev_err(tps->dev, "BASE reg write failed, err %d\n", ret); + return ret; + } + + if (pdata->dvfs_step_20mV) + control |= TPS51632_DVFS_STEP_20; + + if (pdata->max_voltage_uV) { + unsigned int vmax; + /** + * TPS51632 hw behavior: VMAX register can be write only + * once as it get locked after first write. The lock get + * reset only when device is power-reset. + * Write register only when lock bit is not enabled. + */ + ret = regmap_read(tps->regmap, TPS51632_VMAX_REG, &vmax); + if (ret < 0) { + dev_err(tps->dev, "VMAX read failed, err %d\n", ret); + return ret; + } + if (!(vmax & TPS51632_VMAX_LOCK)) { + vsel = TPS51632_VOLT_VSEL(pdata->max_voltage_uV); + ret = regmap_write(tps->regmap, TPS51632_VMAX_REG, + vsel); + if (ret < 0) { + dev_err(tps->dev, + "VMAX write failed, err %d\n", ret); + return ret; + } + } + } + +skip_pwm_config: + ret = regmap_write(tps->regmap, TPS51632_DVFS_CONTROL_REG, control); + if (ret < 0) + dev_err(tps->dev, "DVFS reg write failed, err %d\n", ret); + return ret; +} + +static bool rd_wr_reg(struct device *dev, unsigned int reg) +{ + if ((reg >= 0x8) && (reg <= 0x10)) + return false; + return true; +} + +static const struct regmap_config tps51632_regmap_config = { + .reg_bits = 8, + .val_bits = 8, + .writeable_reg = rd_wr_reg, + .readable_reg = rd_wr_reg, + .max_register = TPS51632_MAX_REG - 1, + .cache_type = REGCACHE_RBTREE, +}; + +static int __devinit tps51632_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct tps51632_regulator_platform_data *pdata; + struct regulator_dev *rdev; + struct tps51632_chip *tps; + int ret; + struct regulator_config config = { }; + + pdata = client->dev.platform_data; + if (!pdata) { + dev_err(&client->dev, "No Platform data\n"); + return -EINVAL; + } + + tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL); + if (!tps) { + dev_err(&client->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + + tps->dev = &client->dev; + tps->desc.name = id->name; + tps->desc.id = 0; + tps->desc.ramp_delay = TPS51632_DEFAULT_RAMP_DELAY; + tps->desc.min_uV = TPS51632_MIN_VOLATGE; + tps->desc.uV_step = TPS51632_VOLATGE_STEP_10mV; + tps->desc.n_voltages = (TPS51632_MAX_VSEL - TPS51632_MIN_VSEL) + 1; + tps->desc.ops = &tps51632_dcdc_ops; + tps->desc.type = REGULATOR_VOLTAGE; + tps->desc.owner = THIS_MODULE; + + tps->regmap = devm_regmap_init_i2c(client, &tps51632_regmap_config); + if (IS_ERR(tps->regmap)) { + ret = PTR_ERR(tps->regmap); + dev_err(&client->dev, "regmap init failed, err %d\n", ret); + return ret; + } + i2c_set_clientdata(client, tps); + + ret = tps51632_init_dcdc(tps, pdata); + if (ret < 0) { + dev_err(tps->dev, "Init failed, err = %d\n", ret); + return ret; + } + + /* Register the regulators */ + config.dev = &client->dev; + config.init_data = pdata->reg_init_data; + config.driver_data = tps; + config.regmap = tps->regmap; + config.of_node = client->dev.of_node; + + rdev = regulator_register(&tps->desc, &config); + if (IS_ERR(rdev)) { + dev_err(tps->dev, "regulator register failed\n"); + return PTR_ERR(rdev); + } + + tps->rdev = rdev; + return 0; +} + +static int __devexit tps51632_remove(struct i2c_client *client) +{ + struct tps51632_chip *tps = i2c_get_clientdata(client); + + regulator_unregister(tps->rdev); + return 0; +} + +static const struct i2c_device_id tps51632_id[] = { + {.name = "tps51632",}, + {}, +}; + +MODULE_DEVICE_TABLE(i2c, tps51632_id); + +static struct i2c_driver tps51632_i2c_driver = { + .driver = { + .name = "tps51632", + .owner = THIS_MODULE, + }, + .probe = tps51632_probe, + .remove = __devexit_p(tps51632_remove), + .id_table = tps51632_id, +}; + +static int __init tps51632_init(void) +{ + return i2c_add_driver(&tps51632_i2c_driver); +} +subsys_initcall(tps51632_init); + +static void __exit tps51632_cleanup(void) +{ + i2c_del_driver(&tps51632_i2c_driver); +} +module_exit(tps51632_cleanup); + +MODULE_AUTHOR("Laxman Dewangan "); +MODULE_DESCRIPTION("TPS51632 voltage regulator driver"); +MODULE_LICENSE("GPL v2"); diff --git a/include/linux/regulator/tps51632-regulator.h b/include/linux/regulator/tps51632-regulator.h new file mode 100644 index 000000000000..d00841e1a75a --- /dev/null +++ b/include/linux/regulator/tps51632-regulator.h @@ -0,0 +1,47 @@ +/* + * tps51632-regulator.h -- TPS51632 regulator + * + * Interface for regulator driver for TPS51632 3-2-1 Phase D-Cap Step Down + * Driverless Controller with serial VID control and DVFS. + * + * Copyright (C) 2012 NVIDIA Corporation + + * Author: Laxman Dewangan + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + */ + +#ifndef __LINUX_REGULATOR_TPS51632_H +#define __LINUX_REGULATOR_TPS51632_H + +/* + * struct tps51632_regulator_platform_data - tps51632 regulator platform data. + * + * @reg_init_data: The regulator init data. + * @enable_pwm_dvfs: Enable PWM DVFS or not. + * @dvfs_step_20mV: Step for DVFS is 20mV or 10mV. + * @max_voltage_uV: Maximum possible voltage in PWM-DVFS mode. + * @base_voltage_uV: Base voltage when PWM-DVFS enabled. + */ +struct tps51632_regulator_platform_data { + struct regulator_init_data *reg_init_data; + bool enable_pwm_dvfs; + bool dvfs_step_20mV; + int max_voltage_uV; + int base_voltage_uV; +}; + +#endif /* __LINUX_REGULATOR_TPS51632_H */ -- cgit v1.2.3 From bd7a2b600ace90c8819495b639a744c8f5c68feb Mon Sep 17 00:00:00 2001 From: Pawel Moll Date: Mon, 24 Sep 2012 18:56:53 +0100 Subject: regulator: core: Support for continuous voltage range Some regulators can set any voltage within the constraints range, not being limited to specified operating points. This patch makes it possible to describe such regulator and makes the regulator_is_supported_voltage() function behave correctly. Signed-off-by: Pawel Moll Signed-off-by: Mark Brown --- drivers/regulator/core.c | 5 +++++ include/linux/regulator/driver.h | 3 +++ 2 files changed, 8 insertions(+) (limited to 'include/linux') diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 5c4829cba6a6..f7c74db7465c 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1979,6 +1979,11 @@ int regulator_is_supported_voltage(struct regulator *regulator, return ret; } + /* Any voltage within constrains range is fine? */ + if (rdev->desc->continuous_voltage_range) + return min_uV >= rdev->constraints->min_uV && + max_uV <= rdev->constraints->max_uV; + ret = regulator_count_voltages(regulator); if (ret < 0) return ret; diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 7932a3bf21bd..f2b72b230b9b 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -181,6 +181,8 @@ enum regulator_type { * @type: Indicates if the regulator is a voltage or current regulator. * @owner: Module providing the regulator, used for refcounting. * + * @continuous_voltage_range: Indicates if the regulator can set any + * voltage within constrains range. * @n_voltages: Number of selectors available for ops.list_voltage(). * * @min_uV: Voltage given by the lowest selector (if linear mapping) @@ -199,6 +201,7 @@ struct regulator_desc { const char *name; const char *supply_name; int id; + bool continuous_voltage_range; unsigned n_voltages; struct regulator_ops *ops; int irq; -- cgit v1.2.3 From 757ef79188657087f96f6f12c003b682860fede2 Mon Sep 17 00:00:00 2001 From: Jon Hunter Date: Thu, 28 Jun 2012 13:41:29 -0500 Subject: ARM: OMAP2+: GPMC: Remove unused OneNAND get_freq() platform function A platform function pointer for getting the frequency of a OneNAND device was added so that a platform could specify a custom function for returning the frequency and not just rely on the OneNAND version to determine the frequency. However, this platform function pointer is not currently being used and I am not sure if it ever has. OneNAND devices are not so common these days and as far as I know not being used with new devices. Therefore, it is most likely that this get_freq() function pointer will not be used and so remove it. Given that the get_freq() function pointer is not used, neither is the clk_dep variable and so all references to it can also be removed. Signed-off-by: Jon Hunter Signed-off-by: Afzal Mohammed --- arch/arm/mach-omap2/gpmc-onenand.c | 39 ++++--------------------- include/linux/platform_data/mtd-onenand-omap2.h | 8 ----- 2 files changed, 5 insertions(+), 42 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 29d391b273fc..50165aa4205a 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -140,21 +140,10 @@ static void set_onenand_cfg(void __iomem *onenand_base) } static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg, - void __iomem *onenand_base, bool *clk_dep) + void __iomem *onenand_base) { u16 ver = readw(onenand_base + ONENAND_REG_VERSION_ID); - int freq = 0; - - if (cfg->get_freq) { - struct onenand_freq_info fi; - - fi.maf_id = readw(onenand_base + ONENAND_REG_MANUFACTURER_ID); - fi.dev_id = readw(onenand_base + ONENAND_REG_DEVICE_ID); - fi.ver_id = ver; - freq = cfg->get_freq(&fi, clk_dep); - if (freq) - return freq; - } + int freq; switch ((ver >> 4) & 0xf) { case 0: @@ -182,7 +171,7 @@ static int omap2_onenand_get_freq(struct omap_onenand_platform_data *cfg, static struct gpmc_timings omap2_onenand_calc_sync_timings(struct omap_onenand_platform_data *cfg, - int freq, bool clk_dep) + int freq) { struct gpmc_timings t; const int t_cer = 15; @@ -261,22 +250,6 @@ omap2_onenand_calc_sync_timings(struct omap_onenand_platform_data *cfg, else latency = 4; - if (clk_dep) { - if (gpmc_clk_ns < 12) { /* >83Mhz */ - t_ces = 3; - t_avds = 4; - } else if (gpmc_clk_ns < 15) { /* >66Mhz */ - t_ces = 5; - t_avds = 4; - } else if (gpmc_clk_ns < 25) { /* >40Mhz */ - t_ces = 6; - t_avds = 5; - } else { - t_ces = 7; - t_avds = 7; - } - } - /* Set synchronous read timings */ memset(&t, 0, sizeof(t)); @@ -399,16 +372,14 @@ static int omap2_onenand_setup_sync(void __iomem *onenand_base, int *freq_ptr) { int ret, freq = *freq_ptr; struct gpmc_timings t; - bool clk_dep = false; if (!freq) { /* Very first call freq is not known */ - freq = omap2_onenand_get_freq(gpmc_onenand_data, - onenand_base, &clk_dep); + freq = omap2_onenand_get_freq(gpmc_onenand_data, onenand_base); set_onenand_cfg(onenand_base); } - t = omap2_onenand_calc_sync_timings(gpmc_onenand_data, freq, clk_dep); + t = omap2_onenand_calc_sync_timings(gpmc_onenand_data, freq); ret = gpmc_set_sync_mode(gpmc_onenand_data->cs, &t); if (IS_ERR_VALUE(ret)) diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h index 2858667d2e4f..21bb0ff4052e 100644 --- a/include/linux/platform_data/mtd-onenand-omap2.h +++ b/include/linux/platform_data/mtd-onenand-omap2.h @@ -15,20 +15,12 @@ #define ONENAND_SYNC_READ (1 << 0) #define ONENAND_SYNC_READWRITE (1 << 1) -struct onenand_freq_info { - u16 maf_id; - u16 dev_id; - u16 ver_id; -}; - struct omap_onenand_platform_data { int cs; int gpio_irq; struct mtd_partition *parts; int nr_parts; int (*onenand_setup)(void __iomem *, int *freq_ptr); - int (*get_freq)(const struct onenand_freq_info *freq_info, - bool *clk_dep); int dma_channel; u8 flags; u8 regulator_can_sleep; -- cgit v1.2.3 From eb77b6a78ac7f63985a3f592baf80ff33d213c48 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Fri, 5 Oct 2012 11:39:54 +0530 Subject: ARM: OMAP2+: onenand: connected soc info in pdata onenand driver needs to know whether soc is falling under 34xx family to properly handle onenand. But driver is not supposed to do cpu_is_* check, hence educate platform data with this information. Driver can make use of it to avoid cpu_is_* check. Signed-off-by: Afzal Mohammed --- arch/arm/mach-omap2/gpmc-onenand.c | 5 +++++ include/linux/platform_data/mtd-onenand-omap2.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 06fd84657218..4a1c0b7ab432 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -428,6 +428,11 @@ void __init gpmc_onenand_init(struct omap_onenand_platform_data *_onenand_data) gpmc_onenand_data->flags |= ONENAND_SYNC_READ; } + if (cpu_is_omap34xx()) + gpmc_onenand_data->flags |= ONENAND_IN_OMAP34XX; + else + gpmc_onenand_data->flags &= ~ONENAND_IN_OMAP34XX; + err = gpmc_cs_request(gpmc_onenand_data->cs, ONENAND_IO_SIZE, (unsigned long *)&gpmc_onenand_resource.start); if (err < 0) { diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h index 21bb0ff4052e..ef9c60d4b29b 100644 --- a/include/linux/platform_data/mtd-onenand-omap2.h +++ b/include/linux/platform_data/mtd-onenand-omap2.h @@ -14,6 +14,7 @@ #define ONENAND_SYNC_READ (1 << 0) #define ONENAND_SYNC_READWRITE (1 << 1) +#define ONENAND_IN_OMAP34XX (1 << 2) struct omap_onenand_platform_data { int cs; -- cgit v1.2.3 From b6ab13e7d6d2778702baea7433d0bd9f234d5083 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Sat, 29 Sep 2012 10:32:42 +0530 Subject: ARM: OMAP2+: onenand: header cleanup For common arm zImage existing onenand header file in platform specific location was moved to generic platform data location, but it contained more than platform data, remove it. New local header has been created for exposing functions. Signed-off-by: Afzal Mohammed --- arch/arm/mach-omap2/board-flash.c | 1 + arch/arm/mach-omap2/board-igep0020.c | 1 + arch/arm/mach-omap2/board-n8x0.c | 1 + arch/arm/mach-omap2/board-rm680.c | 1 + arch/arm/mach-omap2/board-rx51-peripherals.c | 1 + arch/arm/mach-omap2/gpmc-onenand.c | 1 + arch/arm/mach-omap2/gpmc-onenand.h | 24 ++++++++++++++++++++++++ include/linux/platform_data/mtd-onenand-omap2.h | 19 +++---------------- 8 files changed, 33 insertions(+), 16 deletions(-) create mode 100644 arch/arm/mach-omap2/gpmc-onenand.h (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c index d4ac5352a71c..f438d511ba11 100644 --- a/arch/arm/mach-omap2/board-flash.c +++ b/arch/arm/mach-omap2/board-flash.c @@ -25,6 +25,7 @@ #include "common.h" #include "board-flash.h" +#include "gpmc-onenand.h" #define REG_FPGA_REV 0x10 #define REG_FPGA_DIP_SWITCH_INPUT2 0x60 diff --git a/arch/arm/mach-omap2/board-igep0020.c b/arch/arm/mach-omap2/board-igep0020.c index f6b3ed0e5c94..83c6efaf4226 100644 --- a/arch/arm/mach-omap2/board-igep0020.c +++ b/arch/arm/mach-omap2/board-igep0020.c @@ -43,6 +43,7 @@ #include "common-board-devices.h" #include "board-flash.h" #include "control.h" +#include "gpmc-onenand.h" #define IGEP2_SMSC911X_CS 5 #define IGEP2_SMSC911X_GPIO 176 diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index d95f727ca39a..92b19166aac8 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -32,6 +32,7 @@ #include #include "mux.h" +#include "gpmc-onenand.h" #define TUSB6010_ASYNC_CS 1 #define TUSB6010_SYNC_CS 4 diff --git a/arch/arm/mach-omap2/board-rm680.c b/arch/arm/mach-omap2/board-rm680.c index 45997bfbcbd2..154cf337c0c7 100644 --- a/arch/arm/mach-omap2/board-rm680.c +++ b/arch/arm/mach-omap2/board-rm680.c @@ -33,6 +33,7 @@ #include "hsmmc.h" #include "sdram-nokia.h" #include "common-board-devices.h" +#include "gpmc-onenand.h" static struct regulator_consumer_supply rm680_vemmc_consumers[] = { REGULATOR_SUPPLY("vmmc", "omap_hsmmc.1"), diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c index 020e03c95bfe..45760044b2bd 100644 --- a/arch/arm/mach-omap2/board-rx51-peripherals.c +++ b/arch/arm/mach-omap2/board-rx51-peripherals.c @@ -54,6 +54,7 @@ #include "mux.h" #include "hsmmc.h" #include "common-board-devices.h" +#include "gpmc-onenand.h" #define SYSTEM_REV_B_USES_VAUX3 0x1699 #define SYSTEM_REV_S_USES_VAUX3 0x8 diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c index 4a1c0b7ab432..f318f11da2fa 100644 --- a/arch/arm/mach-omap2/gpmc-onenand.c +++ b/arch/arm/mach-omap2/gpmc-onenand.c @@ -23,6 +23,7 @@ #include #include "soc.h" +#include "gpmc-onenand.h" #define ONENAND_IO_SIZE SZ_128K diff --git a/arch/arm/mach-omap2/gpmc-onenand.h b/arch/arm/mach-omap2/gpmc-onenand.h new file mode 100644 index 000000000000..216f23a8b45c --- /dev/null +++ b/arch/arm/mach-omap2/gpmc-onenand.h @@ -0,0 +1,24 @@ +/* + * arch/arm/mach-omap2/gpmc-onenand.h + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __OMAP2_GPMC_ONENAND_H +#define __OMAP2_GPMC_ONENAND_H + +#include + +#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2) +extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); +#else +#define board_onenand_data NULL +static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) +{ +} +#endif + +#endif diff --git a/include/linux/platform_data/mtd-onenand-omap2.h b/include/linux/platform_data/mtd-onenand-omap2.h index ef9c60d4b29b..685af7e8b120 100644 --- a/include/linux/platform_data/mtd-onenand-omap2.h +++ b/include/linux/platform_data/mtd-onenand-omap2.h @@ -9,6 +9,9 @@ * published by the Free Software Foundation. */ +#ifndef __MTD_ONENAND_OMAP2_H +#define __MTD_ONENAND_OMAP2_H + #include #include @@ -27,20 +30,4 @@ struct omap_onenand_platform_data { u8 regulator_can_sleep; u8 skip_initial_unlocking; }; - -#define ONENAND_MAX_PARTITIONS 8 - -#if defined(CONFIG_MTD_ONENAND_OMAP2) || \ - defined(CONFIG_MTD_ONENAND_OMAP2_MODULE) - -extern void gpmc_onenand_init(struct omap_onenand_platform_data *d); - -#else - -#define board_onenand_data NULL - -static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d) -{ -} - #endif -- cgit v1.2.3 From bc3668ea046be9e841eecfab04bddfa759e765d6 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Sat, 29 Sep 2012 12:26:13 +0530 Subject: ARM: OMAP2+: nand: header cleanup For common arm zImage existing nand header file in platform specific location was moved to generic platform data location, but it contained more than platform data, remove it. New local header has been created for exposing functions. Also move gpmc-nand platform data to platform header meant for nand from gpmc header file Signed-off-by: Afzal Mohammed --- arch/arm/mach-omap2/board-cm-t35.c | 3 +- arch/arm/mach-omap2/board-cm-t3517.c | 3 +- arch/arm/mach-omap2/board-flash.c | 4 +- arch/arm/mach-omap2/board-omap3pandora.c | 3 +- arch/arm/mach-omap2/common-board-devices.c | 1 - arch/arm/mach-omap2/gpmc-nand.c | 58 ++++++++++++++-------------- arch/arm/mach-omap2/gpmc-nand.h | 27 +++++++++++++ arch/arm/mach-omap2/gpmc.c | 2 + arch/arm/plat-omap/include/plat/gpmc.h | 28 +------------- include/linux/platform_data/mtd-nand-omap2.h | 41 ++++++++++++++------ 10 files changed, 98 insertions(+), 72 deletions(-) create mode 100644 arch/arm/mach-omap2/gpmc-nand.h (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c index 376d26eb601c..fef68de716b5 100644 --- a/arch/arm/mach-omap2/board-cm-t35.c +++ b/arch/arm/mach-omap2/board-cm-t35.c @@ -53,6 +53,7 @@ #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" #include "common-board-devices.h" +#include "gpmc-nand.h" #define CM_T35_GPIO_PENDOWN 57 #define SB_T35_USB_HUB_RESET_GPIO 167 @@ -181,7 +182,7 @@ static struct omap_nand_platform_data cm_t35_nand_data = { static void __init cm_t35_init_nand(void) { - if (gpmc_nand_init(&cm_t35_nand_data) < 0) + if (gpmc_nand_init(&cm_t35_nand_data, NULL) < 0) pr_err("CM-T35: Unable to register NAND device\n"); } #else diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c index 59c0a45f75b0..3a19e80e0005 100644 --- a/arch/arm/mach-omap2/board-cm-t3517.c +++ b/arch/arm/mach-omap2/board-cm-t3517.c @@ -49,6 +49,7 @@ #include "control.h" #include "common-board-devices.h" #include "am35xx-emac.h" +#include "gpmc-nand.h" #if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE) static struct gpio_led cm_t3517_leds[] = { @@ -240,7 +241,7 @@ static struct omap_nand_platform_data cm_t3517_nand_data = { static void __init cm_t3517_init_nand(void) { - if (gpmc_nand_init(&cm_t3517_nand_data) < 0) + if (gpmc_nand_init(&cm_t3517_nand_data, NULL) < 0) pr_err("CM-T3517: NAND initialization failed\n"); } #else diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c index f438d511ba11..ba9fa86a10aa 100644 --- a/arch/arm/mach-omap2/board-flash.c +++ b/arch/arm/mach-omap2/board-flash.c @@ -26,6 +26,7 @@ #include "common.h" #include "board-flash.h" #include "gpmc-onenand.h" +#include "gpmc-nand.h" #define REG_FPGA_REV 0x10 #define REG_FPGA_DIP_SWITCH_INPUT2 0x60 @@ -139,10 +140,9 @@ __init board_nand_init(struct mtd_partition *nand_parts, u8 nr_parts, u8 cs, board_nand_data.parts = nand_parts; board_nand_data.nr_parts = nr_parts; board_nand_data.devsize = nand_type; - board_nand_data.gpmc_t = gpmc_t; board_nand_data.ecc_opt = OMAP_ECC_HAMMING_CODE_DEFAULT; - gpmc_nand_init(&board_nand_data); + gpmc_nand_init(&board_nand_data, gpmc_t); } #endif /* CONFIG_MTD_NAND_OMAP2 || CONFIG_MTD_NAND_OMAP2_MODULE */ diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c index 00a1f4ae6e44..f286b4b4bd5b 100644 --- a/arch/arm/mach-omap2/board-omap3pandora.c +++ b/arch/arm/mach-omap2/board-omap3pandora.c @@ -50,6 +50,7 @@ #include "sdram-micron-mt46h32m32lf-6.h" #include "hsmmc.h" #include "common-board-devices.h" +#include "gpmc-nand.h" #define PANDORA_WIFI_IRQ_GPIO 21 #define PANDORA_WIFI_NRESET_GPIO 23 @@ -602,7 +603,7 @@ static void __init omap3pandora_init(void) omap_ads7846_init(1, OMAP3_PANDORA_TS_GPIO, 0, NULL); usbhs_init(&usbhs_bdata); usb_musb_init(NULL); - gpmc_nand_init(&pandora_nand_data); + gpmc_nand_init(&pandora_nand_data, NULL); /* Ensure SDRC pins are mux'd for self-refresh */ omap_mux_init_signal("sdrc_cke0", OMAP_PIN_OUTPUT); diff --git a/arch/arm/mach-omap2/common-board-devices.c b/arch/arm/mach-omap2/common-board-devices.c index 90e0597667b9..ad856092c06a 100644 --- a/arch/arm/mach-omap2/common-board-devices.c +++ b/arch/arm/mach-omap2/common-board-devices.c @@ -25,7 +25,6 @@ #include #include -#include #include "common.h" #include "common-board-devices.h" diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c index 4acf497faeb3..abdb78a95a94 100644 --- a/arch/arm/mach-omap2/gpmc-nand.c +++ b/arch/arm/mach-omap2/gpmc-nand.c @@ -20,6 +20,10 @@ #include #include "soc.h" +#include "gpmc-nand.h" + +/* minimum size for IO mapping */ +#define NAND_IO_SIZE 4 static struct resource gpmc_nand_resource[] = { { @@ -40,41 +44,36 @@ static struct platform_device gpmc_nand_device = { .resource = gpmc_nand_resource, }; -static int omap2_nand_gpmc_retime(struct omap_nand_platform_data *gpmc_nand_data) +static int omap2_nand_gpmc_retime( + struct omap_nand_platform_data *gpmc_nand_data, + struct gpmc_timings *gpmc_t) { struct gpmc_timings t; int err; - if (!gpmc_nand_data->gpmc_t) - return 0; - memset(&t, 0, sizeof(t)); - t.sync_clk = gpmc_nand_data->gpmc_t->sync_clk; - t.cs_on = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_on); - t.adv_on = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->adv_on); + t.sync_clk = gpmc_t->sync_clk; + t.cs_on = gpmc_round_ns_to_ticks(gpmc_t->cs_on); + t.adv_on = gpmc_round_ns_to_ticks(gpmc_t->adv_on); /* Read */ - t.adv_rd_off = gpmc_round_ns_to_ticks( - gpmc_nand_data->gpmc_t->adv_rd_off); + t.adv_rd_off = gpmc_round_ns_to_ticks(gpmc_t->adv_rd_off); t.oe_on = t.adv_on; - t.access = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->access); - t.oe_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->oe_off); - t.cs_rd_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_rd_off); - t.rd_cycle = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->rd_cycle); + t.access = gpmc_round_ns_to_ticks(gpmc_t->access); + t.oe_off = gpmc_round_ns_to_ticks(gpmc_t->oe_off); + t.cs_rd_off = gpmc_round_ns_to_ticks(gpmc_t->cs_rd_off); + t.rd_cycle = gpmc_round_ns_to_ticks(gpmc_t->rd_cycle); /* Write */ - t.adv_wr_off = gpmc_round_ns_to_ticks( - gpmc_nand_data->gpmc_t->adv_wr_off); + t.adv_wr_off = gpmc_round_ns_to_ticks(gpmc_t->adv_wr_off); t.we_on = t.oe_on; if (cpu_is_omap34xx()) { - t.wr_data_mux_bus = gpmc_round_ns_to_ticks( - gpmc_nand_data->gpmc_t->wr_data_mux_bus); - t.wr_access = gpmc_round_ns_to_ticks( - gpmc_nand_data->gpmc_t->wr_access); + t.wr_data_mux_bus = gpmc_round_ns_to_ticks(gpmc_t->wr_data_mux_bus); + t.wr_access = gpmc_round_ns_to_ticks(gpmc_t->wr_access); } - t.we_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->we_off); - t.cs_wr_off = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->cs_wr_off); - t.wr_cycle = gpmc_round_ns_to_ticks(gpmc_nand_data->gpmc_t->wr_cycle); + t.we_off = gpmc_round_ns_to_ticks(gpmc_t->we_off); + t.cs_wr_off = gpmc_round_ns_to_ticks(gpmc_t->cs_wr_off); + t.wr_cycle = gpmc_round_ns_to_ticks(gpmc_t->wr_cycle); /* Configure GPMC */ if (gpmc_nand_data->devsize == NAND_BUSWIDTH_16) @@ -91,7 +90,8 @@ static int omap2_nand_gpmc_retime(struct omap_nand_platform_data *gpmc_nand_data return 0; } -int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data) +int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data, + struct gpmc_timings *gpmc_t) { int err = 0; struct device *dev = &gpmc_nand_device.dev; @@ -112,11 +112,13 @@ int __init gpmc_nand_init(struct omap_nand_platform_data *gpmc_nand_data) gpmc_get_client_irq(GPMC_IRQ_FIFOEVENTENABLE); gpmc_nand_resource[2].start = gpmc_get_client_irq(GPMC_IRQ_COUNT_EVENT); - /* Set timings in GPMC */ - err = omap2_nand_gpmc_retime(gpmc_nand_data); - if (err < 0) { - dev_err(dev, "Unable to set gpmc timings: %d\n", err); - return err; + + if (gpmc_t) { + err = omap2_nand_gpmc_retime(gpmc_nand_data, gpmc_t); + if (err < 0) { + dev_err(dev, "Unable to set gpmc timings: %d\n", err); + return err; + } } /* Enable RD PIN Monitoring Reg */ diff --git a/arch/arm/mach-omap2/gpmc-nand.h b/arch/arm/mach-omap2/gpmc-nand.h new file mode 100644 index 000000000000..11a377f0e6f0 --- /dev/null +++ b/arch/arm/mach-omap2/gpmc-nand.h @@ -0,0 +1,27 @@ +/* + * arch/arm/mach-omap2/gpmc-nand.h + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __OMAP2_GPMC_NAND_H +#define __OMAP2_GPMC_NAND_H + +#include +#include + +#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2) +extern int gpmc_nand_init(struct omap_nand_platform_data *d, + struct gpmc_timings *gpmc_t); +#else +static inline int gpmc_nand_init(struct omap_nand_platform_data *d, + struct gpmc_timings *gpmc_t) +{ + return 0; +} +#endif + +#endif diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 91af5aefeaae..34823b3092f3 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -26,6 +26,8 @@ #include #include +#include + #include #include diff --git a/arch/arm/plat-omap/include/plat/gpmc.h b/arch/arm/plat-omap/include/plat/gpmc.h index f7e0990eaf37..4b06bb4531a2 100644 --- a/arch/arm/plat-omap/include/plat/gpmc.h +++ b/arch/arm/plat-omap/include/plat/gpmc.h @@ -11,6 +11,8 @@ #ifndef __OMAP2_GPMC_H #define __OMAP2_GPMC_H +#include + /* Maximum Number of Chip Selects */ #define GPMC_CS_NUM 8 @@ -86,16 +88,6 @@ #define PREFETCH_FIFOTHRESHOLD_MAX 0x40 #define PREFETCH_FIFOTHRESHOLD(val) ((val) << 8) -enum omap_ecc { - /* 1-bit ecc: stored at end of spare area */ - OMAP_ECC_HAMMING_CODE_DEFAULT = 0, /* Default, s/w method */ - OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */ - /* 1-bit ecc: stored at beginning of spare area as romcode */ - OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */ - OMAP_ECC_BCH4_CODE_HW, /* 4-bit BCH ecc code */ - OMAP_ECC_BCH8_CODE_HW, /* 8-bit BCH ecc code */ -}; - /* * Note that all values in this struct are in nanoseconds except sync_clk * (which is in picoseconds), while the register values are in gpmc_fck cycles. @@ -133,22 +125,6 @@ struct gpmc_timings { u16 wr_data_mux_bus; /* WRDATAONADMUXBUS */ }; -struct gpmc_nand_regs { - void __iomem *gpmc_status; - void __iomem *gpmc_nand_command; - void __iomem *gpmc_nand_address; - void __iomem *gpmc_nand_data; - void __iomem *gpmc_prefetch_config1; - void __iomem *gpmc_prefetch_config2; - void __iomem *gpmc_prefetch_control; - void __iomem *gpmc_prefetch_status; - void __iomem *gpmc_ecc_config; - void __iomem *gpmc_ecc_control; - void __iomem *gpmc_ecc_size_config; - void __iomem *gpmc_ecc1_result; - void __iomem *gpmc_bch_result0; -}; - extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); extern int gpmc_get_client_irq(unsigned irq_config); diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 1a68c1e5fe53..e1965fe581d1 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -8,7 +8,9 @@ * published by the Free Software Foundation. */ -#include +#ifndef _MTD_NAND_OMAP2_H +#define _MTD_NAND_OMAP2_H + #include enum nand_io { @@ -18,10 +20,35 @@ enum nand_io { NAND_OMAP_PREFETCH_IRQ /* prefetch enabled irq mode */ }; +enum omap_ecc { + /* 1-bit ecc: stored at end of spare area */ + OMAP_ECC_HAMMING_CODE_DEFAULT = 0, /* Default, s/w method */ + OMAP_ECC_HAMMING_CODE_HW, /* gpmc to detect the error */ + /* 1-bit ecc: stored at beginning of spare area as romcode */ + OMAP_ECC_HAMMING_CODE_HW_ROMCODE, /* gpmc method & romcode layout */ + OMAP_ECC_BCH4_CODE_HW, /* 4-bit BCH ecc code */ + OMAP_ECC_BCH8_CODE_HW, /* 8-bit BCH ecc code */ +}; + +struct gpmc_nand_regs { + void __iomem *gpmc_status; + void __iomem *gpmc_nand_command; + void __iomem *gpmc_nand_address; + void __iomem *gpmc_nand_data; + void __iomem *gpmc_prefetch_config1; + void __iomem *gpmc_prefetch_config2; + void __iomem *gpmc_prefetch_control; + void __iomem *gpmc_prefetch_status; + void __iomem *gpmc_ecc_config; + void __iomem *gpmc_ecc_control; + void __iomem *gpmc_ecc_size_config; + void __iomem *gpmc_ecc1_result; + void __iomem *gpmc_bch_result0; +}; + struct omap_nand_platform_data { int cs; struct mtd_partition *parts; - struct gpmc_timings *gpmc_t; int nr_parts; bool dev_ready; enum nand_io xfer_type; @@ -30,14 +57,4 @@ struct omap_nand_platform_data { struct gpmc_nand_regs reg; }; -/* minimum size for IO mapping */ -#define NAND_IO_SIZE 4 - -#if defined(CONFIG_MTD_NAND_OMAP2) || defined(CONFIG_MTD_NAND_OMAP2_MODULE) -extern int gpmc_nand_init(struct omap_nand_platform_data *d); -#else -static inline int gpmc_nand_init(struct omap_nand_platform_data *d) -{ - return 0; -} #endif -- cgit v1.2.3 From 2fdf0c98969fdac8f7b191d4988e2e436717c857 Mon Sep 17 00:00:00 2001 From: Afzal Mohammed Date: Thu, 4 Oct 2012 15:49:04 +0530 Subject: ARM: OMAP2+: gpmc: nand register helper bch update Update helper function that provides gpmc-nand register details for nand driver with bch register information. Using this nand driver can be made self sufficient to handle remaining gpmc-nand operations by itself instead of relying on gpmc exported nand functions. Signed-off-by: Afzal Mohammed --- arch/arm/mach-omap2/gpmc.c | 18 +++++++++++++++++- include/linux/platform_data/mtd-nand-omap2.h | 7 ++++++- 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c index 34823b3092f3..f5cde49854a5 100644 --- a/arch/arm/mach-omap2/gpmc.c +++ b/arch/arm/mach-omap2/gpmc.c @@ -61,6 +61,9 @@ #define GPMC_ECC_SIZE_CONFIG 0x1fc #define GPMC_ECC1_RESULT 0x200 #define GPMC_ECC_BCH_RESULT_0 0x240 /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_1 0x244 /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_2 0x248 /* not available on OMAP2 */ +#define GPMC_ECC_BCH_RESULT_3 0x24c /* not available on OMAP2 */ /* GPMC ECC control settings */ #define GPMC_ECC_CTRL_ECCCLEAR 0x100 @@ -77,6 +80,7 @@ #define GPMC_CS0_OFFSET 0x60 #define GPMC_CS_SIZE 0x30 +#define GPMC_BCH_SIZE 0x10 #define GPMC_MEM_START 0x00000000 #define GPMC_MEM_END 0x3FFFFFFF @@ -731,6 +735,8 @@ EXPORT_SYMBOL(gpmc_prefetch_reset); void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) { + int i; + reg->gpmc_status = gpmc_base + GPMC_STATUS; reg->gpmc_nand_command = gpmc_base + GPMC_CS0_OFFSET + GPMC_CS_NAND_COMMAND + GPMC_CS_SIZE * cs; @@ -746,7 +752,17 @@ void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) reg->gpmc_ecc_control = gpmc_base + GPMC_ECC_CONTROL; reg->gpmc_ecc_size_config = gpmc_base + GPMC_ECC_SIZE_CONFIG; reg->gpmc_ecc1_result = gpmc_base + GPMC_ECC1_RESULT; - reg->gpmc_bch_result0 = gpmc_base + GPMC_ECC_BCH_RESULT_0; + + for (i = 0; i < GPMC_BCH_NUM_REMAINDER; i++) { + reg->gpmc_bch_result0[i] = gpmc_base + GPMC_ECC_BCH_RESULT_0 + + GPMC_BCH_SIZE * i; + reg->gpmc_bch_result1[i] = gpmc_base + GPMC_ECC_BCH_RESULT_1 + + GPMC_BCH_SIZE * i; + reg->gpmc_bch_result2[i] = gpmc_base + GPMC_ECC_BCH_RESULT_2 + + GPMC_BCH_SIZE * i; + reg->gpmc_bch_result3[i] = gpmc_base + GPMC_ECC_BCH_RESULT_3 + + GPMC_BCH_SIZE * i; + } } int gpmc_get_client_irq(unsigned irq_config) diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index e1965fe581d1..24d32ca34bef 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -13,6 +13,8 @@ #include +#define GPMC_BCH_NUM_REMAINDER 8 + enum nand_io { NAND_OMAP_PREFETCH_POLLED = 0, /* prefetch polled mode, default */ NAND_OMAP_POLLED, /* polled mode, without prefetch */ @@ -43,7 +45,10 @@ struct gpmc_nand_regs { void __iomem *gpmc_ecc_control; void __iomem *gpmc_ecc_size_config; void __iomem *gpmc_ecc1_result; - void __iomem *gpmc_bch_result0; + void __iomem *gpmc_bch_result0[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result1[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result2[GPMC_BCH_NUM_REMAINDER]; + void __iomem *gpmc_bch_result3[GPMC_BCH_NUM_REMAINDER]; }; struct omap_nand_platform_data { -- cgit v1.2.3 From 68f39e74fbc3e58ad52d008072bddacc9eee1c7e Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 15 Oct 2012 12:09:43 -0700 Subject: ARM: OMAP: Split plat/mmc.h into local headers and platform_data We need to remove this from plat for ARM common zImage support. Also remove includes not needed by the omap_hsmmc.c driver. Cc: linux-mmc@vger.kernel.org Acked-by: Chris Ball Acked-by: Venkatraman S [tony@atomide.com: fold in removal of unused driver includes] Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-h2-mmc.c | 5 +- arch/arm/mach-omap1/board-h3-mmc.c | 3 +- arch/arm/mach-omap1/board-htcherald.c | 2 +- arch/arm/mach-omap1/board-innovator.c | 2 +- arch/arm/mach-omap1/board-nokia770.c | 2 +- arch/arm/mach-omap1/board-sx1-mmc.c | 3 +- arch/arm/mach-omap1/devices.c | 2 +- arch/arm/mach-omap1/mmc.h | 18 +++ arch/arm/mach-omap2/board-4430sdp.c | 2 +- arch/arm/mach-omap2/board-n8x0.c | 2 +- arch/arm/mach-omap2/board-omap4panda.c | 2 +- arch/arm/mach-omap2/board-rm680.c | 2 +- arch/arm/mach-omap2/hsmmc.c | 2 +- arch/arm/mach-omap2/mmc.h | 23 ++++ arch/arm/mach-omap2/msdi.c | 2 +- arch/arm/mach-omap2/omap4-common.c | 2 +- arch/arm/mach-omap2/omap_hwmod_2420_data.c | 2 +- arch/arm/mach-omap2/omap_hwmod_2430_data.c | 2 +- arch/arm/mach-omap2/omap_hwmod_33xx_data.c | 2 +- arch/arm/mach-omap2/omap_hwmod_3xxx_data.c | 2 +- arch/arm/mach-omap2/omap_hwmod_44xx_data.c | 2 +- arch/arm/plat-omap/include/plat/mmc.h | 188 ----------------------------- drivers/mmc/host/omap.c | 3 +- drivers/mmc/host/omap_hsmmc.c | 4 +- include/linux/platform_data/mmc-omap.h | 147 ++++++++++++++++++++++ 25 files changed, 212 insertions(+), 214 deletions(-) create mode 100644 arch/arm/mach-omap1/mmc.h create mode 100644 arch/arm/mach-omap2/mmc.h delete mode 100644 arch/arm/plat-omap/include/plat/mmc.h create mode 100644 include/linux/platform_data/mmc-omap.h (limited to 'include/linux') diff --git a/arch/arm/mach-omap1/board-h2-mmc.c b/arch/arm/mach-omap1/board-h2-mmc.c index e1362ce48497..7119ef28e0ad 100644 --- a/arch/arm/mach-omap1/board-h2-mmc.c +++ b/arch/arm/mach-omap1/board-h2-mmc.c @@ -13,12 +13,11 @@ */ #include #include - +#include #include -#include - #include "board-h2.h" +#include "mmc.h" #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) diff --git a/arch/arm/mach-omap1/board-h3-mmc.c b/arch/arm/mach-omap1/board-h3-mmc.c index c74daace8cd6..17d77914d769 100644 --- a/arch/arm/mach-omap1/board-h3-mmc.c +++ b/arch/arm/mach-omap1/board-h3-mmc.c @@ -16,9 +16,8 @@ #include -#include - #include "board-h3.h" +#include "mmc.h" #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) diff --git a/arch/arm/mach-omap1/board-htcherald.c b/arch/arm/mach-omap1/board-htcherald.c index 87ab2086ef96..f23200ceb43d 100644 --- a/arch/arm/mach-omap1/board-htcherald.c +++ b/arch/arm/mach-omap1/board-htcherald.c @@ -43,7 +43,7 @@ #include #include -#include +#include "mmc.h" #include #include diff --git a/arch/arm/mach-omap1/board-innovator.c b/arch/arm/mach-omap1/board-innovator.c index db5f7d2976e7..411cc5b14ce3 100644 --- a/arch/arm/mach-omap1/board-innovator.c +++ b/arch/arm/mach-omap1/board-innovator.c @@ -36,13 +36,13 @@ #include #include #include -#include #include #include #include "iomap.h" #include "common.h" +#include "mmc.h" /* At OMAP1610 Innovator the Ethernet is directly connected to CS1 */ #define INNOVATOR1610_ETHR_START 0x04000300 diff --git a/arch/arm/mach-omap1/board-nokia770.c b/arch/arm/mach-omap1/board-nokia770.c index 7d5c06d6a52a..cb72f2474430 100644 --- a/arch/arm/mach-omap1/board-nokia770.c +++ b/arch/arm/mach-omap1/board-nokia770.c @@ -29,13 +29,13 @@ #include #include -#include #include #include #include #include "common.h" +#include "mmc.h" #define ADS7846_PENDOWN_GPIO 15 diff --git a/arch/arm/mach-omap1/board-sx1-mmc.c b/arch/arm/mach-omap1/board-sx1-mmc.c index 5932d56e17bf..4fcf19c78a08 100644 --- a/arch/arm/mach-omap1/board-sx1-mmc.c +++ b/arch/arm/mach-omap1/board-sx1-mmc.c @@ -16,9 +16,10 @@ #include #include -#include #include +#include "mmc.h" + #if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) static int mmc_set_power(struct device *dev, int slot, int power_on, diff --git a/arch/arm/mach-omap1/devices.c b/arch/arm/mach-omap1/devices.c index d3fec92c54cb..f85836ae6691 100644 --- a/arch/arm/mach-omap1/devices.c +++ b/arch/arm/mach-omap1/devices.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include @@ -30,6 +29,7 @@ #include "common.h" #include "clock.h" +#include "mmc.h" #if defined(CONFIG_SND_SOC) || defined(CONFIG_SND_SOC_MODULE) diff --git a/arch/arm/mach-omap1/mmc.h b/arch/arm/mach-omap1/mmc.h new file mode 100644 index 000000000000..39c2b13de884 --- /dev/null +++ b/arch/arm/mach-omap1/mmc.h @@ -0,0 +1,18 @@ +#include +#include + +#define OMAP15XX_NR_MMC 1 +#define OMAP16XX_NR_MMC 2 +#define OMAP1_MMC_SIZE 0x080 +#define OMAP1_MMC1_BASE 0xfffb7800 +#define OMAP1_MMC2_BASE 0xfffb7c00 /* omap16xx only */ + +#if defined(CONFIG_MMC_OMAP) || defined(CONFIG_MMC_OMAP_MODULE) +void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data, + int nr_controllers); +#else +static inline void omap1_init_mmc(struct omap_mmc_platform_data **mmc_data, + int nr_controllers) +{ +} +#endif diff --git a/arch/arm/mach-omap2/board-4430sdp.c b/arch/arm/mach-omap2/board-4430sdp.c index 3669c120c7e8..2ab267ec3b75 100644 --- a/arch/arm/mach-omap2/board-4430sdp.c +++ b/arch/arm/mach-omap2/board-4430sdp.c @@ -35,7 +35,6 @@ #include "common.h" #include -#include #include "omap4-keypad.h" #include