diff options
Diffstat (limited to 'block')
52 files changed, 6616 insertions, 389 deletions
diff --git a/block/Kconfig b/block/Kconfig index e97934eececa..09acf1b39905 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -99,6 +99,12 @@ config BLK_DEV_THROTTLING See Documentation/cgroups/blkio-controller.txt for more information. +menu "Partition Types" + +source "block/partitions/Kconfig" + +endmenu + endif # BLOCK config BLOCK_COMPAT diff --git a/block/Makefile b/block/Makefile index 514c6e4f427a..39b76ba66ffd 100644 --- a/block/Makefile +++ b/block/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ - blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o + blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \ + partition-generic.o partitions/ obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index b596e54ddd71..b8c143d68ee0 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -30,8 +30,10 @@ EXPORT_SYMBOL_GPL(blkio_root_cgroup); static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *, struct cgroup *); -static int blkiocg_can_attach_task(struct cgroup *, struct task_struct *); -static void blkiocg_attach_task(struct cgroup *, struct task_struct *); +static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *, + struct cgroup_taskset *); +static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *, + struct cgroup_taskset *); static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *); static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); @@ -44,8 +46,8 @@ static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *); struct cgroup_subsys blkio_subsys = { .name = "blkio", .create = blkiocg_create, - .can_attach_task = blkiocg_can_attach_task, - .attach_task = blkiocg_attach_task, + .can_attach = blkiocg_can_attach, + .attach = blkiocg_attach, .destroy = blkiocg_destroy, .populate = blkiocg_populate, #ifdef CONFIG_BLK_CGROUP @@ -768,25 +770,14 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, return disk_total; } -static int blkio_check_dev_num(dev_t dev) -{ - int part = 0; - struct gendisk *disk; - - disk = get_gendisk(dev, &part); - if (!disk || part) - return -ENODEV; - - return 0; -} - static int blkio_policy_parse_and_set(char *buf, struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid) { + struct gendisk *disk = NULL; char *s[4], *p, *major_s = NULL, *minor_s = NULL; - int ret; unsigned long major, minor; - int i = 0; + int i = 0, ret = -EINVAL; + int part; dev_t dev; u64 temp; @@ -804,37 +795,36 @@ static int blkio_policy_parse_and_set(char *buf, } if (i != 2) - return -EINVAL; + goto out; p = strsep(&s[0], ":"); if (p != NULL) major_s = p; else - return -EINVAL; + goto out; minor_s = s[0]; if (!minor_s) - return -EINVAL; + goto out; - ret = strict_strtoul(major_s, 10, &major); - if (ret) - return -EINVAL; + if (strict_strtoul(major_s, 10, &major)) + goto out; - ret = strict_strtoul(minor_s, 10, &minor); - if (ret) - return -EINVAL; + if (strict_strtoul(minor_s, 10, &minor)) + goto out; dev = MKDEV(major, minor); - ret = strict_strtoull(s[1], 10, &temp); - if (ret) - return -EINVAL; + if (strict_strtoull(s[1], 10, &temp)) + goto out; /* For rule removal, do not check for device presence. */ if (temp) { - ret = blkio_check_dev_num(dev); - if (ret) - return ret; + disk = get_gendisk(dev, &part); + if (!disk || part) { + ret = -ENODEV; + goto out; + } } newpn->dev = dev; @@ -843,7 +833,7 @@ static int blkio_policy_parse_and_set(char *buf, case BLKIO_POLICY_PROP: if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || temp > BLKIO_WEIGHT_MAX) - return -EINVAL; + goto out; newpn->plid = plid; newpn->fileid = fileid; @@ -860,7 +850,7 @@ static int blkio_policy_parse_and_set(char *buf, case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_write_iops_device: if (temp > THROTL_IOPS_MAX) - return -EINVAL; + goto out; newpn->plid = plid; newpn->fileid = fileid; @@ -871,68 +861,96 @@ static int blkio_policy_parse_and_set(char *buf, default: BUG(); } - - return 0; + ret = 0; +out: + put_disk(disk); + return ret; } unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg, dev_t dev) { struct blkio_policy_node *pn; + unsigned long flags; + unsigned int weight; + + spin_lock_irqsave(&blkcg->lock, flags); pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP, BLKIO_PROP_weight_device); if (pn) - return pn->val.weight; + weight = pn->val.weight; else - return blkcg->weight; + weight = blkcg->weight; + + spin_unlock_irqrestore(&blkcg->lock, flags); + + return weight; } EXPORT_SYMBOL_GPL(blkcg_get_weight); uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev) { struct blkio_policy_node *pn; + unsigned long flags; + uint64_t bps = -1; + spin_lock_irqsave(&blkcg->lock, flags); pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, BLKIO_THROTL_read_bps_device); if (pn) - return pn->val.bps; - else - return -1; + bps = pn->val.bps; + spin_unlock_irqrestore(&blkcg->lock, flags); + + return bps; } uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev) { struct blkio_policy_node *pn; + unsigned long flags; + uint64_t bps = -1; + + spin_lock_irqsave(&blkcg->lock, flags); pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, BLKIO_THROTL_write_bps_device); if (pn) - return pn->val.bps; - else - return -1; + bps = pn->val.bps; + spin_unlock_irqrestore(&blkcg->lock, flags); + + return bps; } unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev) { struct blkio_policy_node *pn; + unsigned long flags; + unsigned int iops = -1; + spin_lock_irqsave(&blkcg->lock, flags); pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, BLKIO_THROTL_read_iops_device); if (pn) - return pn->val.iops; - else - return -1; + iops = pn->val.iops; + spin_unlock_irqrestore(&blkcg->lock, flags); + + return iops; } unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev) { struct blkio_policy_node *pn; + unsigned long flags; + unsigned int iops = -1; + + spin_lock_irqsave(&blkcg->lock, flags); pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL, BLKIO_THROTL_write_iops_device); if (pn) - return pn->val.iops; - else - return -1; + iops = pn->val.iops; + spin_unlock_irqrestore(&blkcg->lock, flags); + + return iops; } /* Checks whether user asked for deleting a policy rule */ @@ -1085,6 +1103,7 @@ static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft, if (blkio_delete_rule_command(newpn)) { blkio_policy_delete_node(pn); + kfree(pn); spin_unlock_irq(&blkcg->lock); goto update_io_group; } @@ -1609,30 +1628,39 @@ done: * of the main cic data structures. For now we allow a task to change * its cgroup only if it's the only owner of its ioc. */ -static int blkiocg_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk) +static int blkiocg_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup_taskset *tset) { + struct task_struct *task; struct io_context *ioc; int ret = 0; /* task_lock() is needed to avoid races with exit_io_context() */ - task_lock(tsk); - ioc = tsk->io_context; - if (ioc && atomic_read(&ioc->nr_tasks) > 1) - ret = -EINVAL; - task_unlock(tsk); - + cgroup_taskset_for_each(task, cgrp, tset) { + task_lock(task); + ioc = task->io_context; + if (ioc && atomic_read(&ioc->nr_tasks) > 1) + ret = -EINVAL; + task_unlock(task); + if (ret) + break; + } return ret; } -static void blkiocg_attach_task(struct cgroup *cgrp, struct task_struct *tsk) +static void blkiocg_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, + struct cgroup_taskset *tset) { + struct task_struct *task; struct io_context *ioc; - task_lock(tsk); - ioc = tsk->io_context; - if (ioc) - ioc->cgroup_changed = 1; - task_unlock(tsk); + cgroup_taskset_for_each(task, cgrp, tset) { + task_lock(task); + ioc = task->io_context; + if (ioc) + ioc->cgroup_changed = 1; + task_unlock(task); + } } void blkio_policy_register(struct blkio_policy_type *blkiop) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index a71d2904ffb9..6f3ace7e792f 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -188,7 +188,7 @@ struct blkio_policy_node { union { unsigned int weight; /* - * Rate read/write in terms of byptes per second + * Rate read/write in terms of bytes per second * Whether this rate represents read or write is determined * by file type "fileid". */ diff --git a/block/blk-core.c b/block/blk-core.c index d34433ae7917..15de223c7f93 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,6 +28,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/fault-inject.h> #include <linux/list_sort.h> +#include <linux/delay.h> #define CREATE_TRACE_POINTS #include <trace/events/block.h> @@ -38,8 +39,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); -static int __make_request(struct request_queue *q, struct bio *bio); - /* * For the allocated request tables */ @@ -347,30 +346,87 @@ void blk_put_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_put_queue); -/* - * Note: If a driver supplied the queue lock, it is disconnected - * by this function. The actual state of the lock doesn't matter - * here as the request_queue isn't accessible after this point - * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. +/** + * blk_drain_queue - drain requests from request_queue + * @q: queue to drain + * @drain_all: whether to drain all requests or only the ones w/ ELVPRIV + * + * Drain requests from @q. If @drain_all is set, all requests are drained. + * If not, only ELVPRIV requests are drained. The caller is responsible + * for ensuring that no new requests which need to be drained are queued. + */ +void blk_drain_queue(struct request_queue *q, bool drain_all) +{ + while (true) { + int nr_rqs; + + spin_lock_irq(q->queue_lock); + + elv_drain_elevator(q); + if (drain_all) + blk_throtl_drain(q); + + /* + * This function might be called on a queue which failed + * driver init after queue creation. Some drivers + * (e.g. fd) get unhappy in such cases. Kick queue iff + * dispatch queue has something on it. + */ + if (!list_empty(&q->queue_head)) + __blk_run_queue(q); + + if (drain_all) + nr_rqs = q->rq.count[0] + q->rq.count[1]; + else + nr_rqs = q->rq.elvpriv; + + spin_unlock_irq(q->queue_lock); + + if (!nr_rqs) + break; + msleep(10); + } +} + +/** + * blk_cleanup_queue - shutdown a request queue + * @q: request queue to shutdown + * + * Mark @q DEAD, drain all pending requests, destroy and put it. All + * future requests will be failed immediately with -ENODEV. */ void blk_cleanup_queue(struct request_queue *q) { - /* - * We know we have process context here, so we can be a little - * cautious and ensure that pending block actions on this device - * are done before moving on. Going into this function, we should - * not have processes doing IO to this device. - */ - blk_sync_queue(q); + spinlock_t *lock = q->queue_lock; - del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + /* mark @q DEAD, no new request or merges will be allowed afterwards */ mutex_lock(&q->sysfs_lock); queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); - mutex_unlock(&q->sysfs_lock); + + spin_lock_irq(lock); + queue_flag_set(QUEUE_FLAG_NOMERGES, q); + queue_flag_set(QUEUE_FLAG_NOXMERGES, q); + queue_flag_set(QUEUE_FLAG_DEAD, q); if (q->queue_lock != &q->__queue_lock) q->queue_lock = &q->__queue_lock; + spin_unlock_irq(lock); + mutex_unlock(&q->sysfs_lock); + + /* + * Drain all requests queued before DEAD marking. The caller might + * be trying to tear down @q before its elevator is initialized, in + * which case we don't want to call into draining. + */ + if (q->elevator) + blk_drain_queue(q, true); + + /* @q won't process any more request, flush async actions */ + del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); + blk_sync_queue(q); + + /* @q is and will stay empty, shutdown and put */ blk_put_queue(q); } EXPORT_SYMBOL(blk_cleanup_queue); @@ -418,6 +474,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY; q->backing_dev_info.name = "block"; + q->node = node_id; err = bdi_init(&q->backing_dev_info); if (err) { @@ -502,7 +559,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; - q = blk_init_allocated_queue_node(uninit_q, rfn, lock, node_id); + q = blk_init_allocated_queue(uninit_q, rfn, lock); if (!q) blk_cleanup_queue(uninit_q); @@ -514,18 +571,9 @@ struct request_queue * blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, spinlock_t *lock) { - return blk_init_allocated_queue_node(q, rfn, lock, -1); -} -EXPORT_SYMBOL(blk_init_allocated_queue); - -struct request_queue * -blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, - spinlock_t *lock, int node_id) -{ if (!q) return NULL; - q->node = node_id; if (blk_init_free_list(q)) return NULL; @@ -541,7 +589,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, /* * This also sets hw/phys segments, boundary and size */ - blk_queue_make_request(q, __make_request); + blk_queue_make_request(q, blk_queue_bio); q->sg_reserved_size = INT_MAX; @@ -555,7 +603,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, return NULL; } -EXPORT_SYMBOL(blk_init_allocated_queue_node); +EXPORT_SYMBOL(blk_init_allocated_queue); int blk_get_queue(struct request_queue *q) { @@ -576,7 +624,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, unsigned int flags, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -587,12 +635,10 @@ blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) rq->cmd_flags = flags | REQ_ALLOCED; - if (priv) { - if (unlikely(elv_set_request(q, rq, gfp_mask))) { - mempool_free(rq, q->rq.rq_pool); - return NULL; - } - rq->cmd_flags |= REQ_ELVPRIV; + if ((flags & REQ_ELVPRIV) && + unlikely(elv_set_request(q, rq, gfp_mask))) { + mempool_free(rq, q->rq.rq_pool); + return NULL; } return rq; @@ -651,12 +697,13 @@ static void __freed_request(struct request_queue *q, int sync) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(struct request_queue *q, int sync, int priv) +static void freed_request(struct request_queue *q, unsigned int flags) { struct request_list *rl = &q->rq; + int sync = rw_is_sync(flags); rl->count[sync]--; - if (priv) + if (flags & REQ_ELVPRIV) rl->elvpriv--; __freed_request(q, sync); @@ -684,10 +731,19 @@ static bool blk_rq_should_init_elevator(struct bio *bio) return true; } -/* - * Get a free request, queue_lock must be held. - * Returns NULL on failure, with queue_lock held. - * Returns !NULL on success, with queue_lock *not held*. +/** + * get_request - get a free request + * @q: request_queue to allocate request from + * @rw_flags: RW and SYNC flags + * @bio: bio to allocate request for (can be %NULL) + * @gfp_mask: allocation mask + * + * Get a free request from @q. This function may fail under memory + * pressure or if @q is dead. + * + * Must be callled with @q->queue_lock held and, + * Returns %NULL on failure, with @q->queue_lock held. + * Returns !%NULL on success, with @q->queue_lock *not held*. */ static struct request *get_request(struct request_queue *q, int rw_flags, struct bio *bio, gfp_t gfp_mask) @@ -696,7 +752,10 @@ static struct request *get_request(struct request_queue *q, int rw_flags, struct request_list *rl = &q->rq; struct io_context *ioc = NULL; const bool is_sync = rw_is_sync(rw_flags) != 0; - int may_queue, priv = 0; + int may_queue; + + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) @@ -740,17 +799,17 @@ static struct request *get_request(struct request_queue *q, int rw_flags, rl->count[is_sync]++; rl->starved[is_sync] = 0; - if (blk_rq_should_init_elevator(bio)) { - priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); - if (priv) - rl->elvpriv++; + if (blk_rq_should_init_elevator(bio) && + !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags)) { + rw_flags |= REQ_ELVPRIV; + rl->elvpriv++; } if (blk_queue_io_stat(q)) rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); - rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); + rq = blk_alloc_request(q, rw_flags, gfp_mask); if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything @@ -760,7 +819,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * wait queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(q, is_sync, priv); + freed_request(q, rw_flags); /* * in the very unlikely event that allocation failed and no @@ -790,11 +849,18 @@ out: return rq; } -/* - * No available requests for this queue, wait for some requests to become - * available. +/** + * get_request_wait - get a free request with retry + * @q: request_queue to allocate request from + * @rw_flags: RW and SYNC flags + * @bio: bio to allocate request for (can be %NULL) + * + * Get a free request from @q. This function keeps retrying under memory + * pressure and fails iff @q is dead. * - * Called with q->queue_lock held, and returns with it unlocked. + * Must be callled with @q->queue_lock held and, + * Returns %NULL on failure, with @q->queue_lock held. + * Returns !%NULL on success, with @q->queue_lock *not held*. */ static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct bio *bio) @@ -808,6 +874,9 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct io_context *ioc; struct request_list *rl = &q->rq; + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, TASK_UNINTERRUPTIBLE); @@ -838,19 +907,15 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) { struct request *rq; - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - return NULL; - BUG_ON(rw != READ && rw != WRITE); spin_lock_irq(q->queue_lock); - if (gfp_mask & __GFP_WAIT) { + if (gfp_mask & __GFP_WAIT) rq = get_request_wait(q, rw, NULL); - } else { + else rq = get_request(q, rw, NULL, gfp_mask); - if (!rq) - spin_unlock_irq(q->queue_lock); - } + if (!rq) + spin_unlock_irq(q->queue_lock); /* q->queue_lock is unlocked at this point */ return rq; @@ -1052,14 +1117,13 @@ void __blk_put_request(struct request_queue *q, struct request *req) * it didn't come out of our reserved rq pools */ if (req->cmd_flags & REQ_ALLOCED) { - int is_sync = rq_is_sync(req) != 0; - int priv = req->cmd_flags & REQ_ELVPRIV; + unsigned int flags = req->cmd_flags; BUG_ON(!list_empty(&req->queuelist)); BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); - freed_request(q, is_sync, priv); + freed_request(q, flags); } } EXPORT_SYMBOL_GPL(__blk_put_request); @@ -1161,18 +1225,32 @@ static bool bio_attempt_front_merge(struct request_queue *q, return true; } -/* - * Attempts to merge with the plugged list in the current process. Returns - * true if merge was successful, otherwise false. +/** + * attempt_plug_merge - try to merge with %current's plugged list + * @q: request_queue new bio is being queued at + * @bio: new bio being queued + * @request_count: out parameter for number of traversed plugged requests + * + * Determine whether @bio being queued on @q can be merged with a request + * on %current's plugged list. Returns %true if merge was successful, + * otherwise %false. + * + * This function is called without @q->queue_lock; however, elevator is + * accessed iff there already are requests on the plugged list which in + * turn guarantees validity of the elevator. + * + * Note that, on successful merge, elevator operation + * elevator_bio_merged_fn() will be called without queue lock. Elevator + * must be ready for this. */ -static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio, unsigned int *request_count) +static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, + unsigned int *request_count) { struct blk_plug *plug; struct request *rq; bool ret = false; - plug = tsk->plug; + plug = current->plug; if (!plug) goto out; *request_count = 0; @@ -1202,7 +1280,6 @@ out: void init_request_from_bio(struct request *req, struct bio *bio) { - req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; @@ -1215,7 +1292,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } -static int __make_request(struct request_queue *q, struct bio *bio) +void blk_queue_bio(struct request_queue *q, struct bio *bio) { const bool sync = !!(bio->bi_rw & REQ_SYNC); struct blk_plug *plug; @@ -1240,8 +1317,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio, &request_count)) - goto out; + if (attempt_plug_merge(q, bio, &request_count)) + return; spin_lock_irq(q->queue_lock); @@ -1275,6 +1352,10 @@ get_rq: * Returns with the queue unlocked. */ req = get_request_wait(q, rw_flags, bio); + if (unlikely(!req)) { + bio_endio(bio, -ENODEV); /* @q is dead */ + goto out_unlock; + } /* * After dropping the lock and possibly sleeping here, our request @@ -1284,8 +1365,7 @@ get_rq: */ init_request_from_bio(req, bio); - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || - bio_flagged(bio, BIO_CPU_AFFINE)) + if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags)) req->cpu = raw_smp_processor_id(); plug = current->plug; @@ -1298,15 +1378,19 @@ get_rq: */ if (list_empty(&plug->list)) trace_block_plug(q); - else if (!plug->should_sort) { - struct request *__rq; + else { + if (!plug->should_sort) { + struct request *__rq; - __rq = list_entry_rq(plug->list.prev); - if (__rq->q != q) - plug->should_sort = 1; + __rq = list_entry_rq(plug->list.prev); + if (__rq->q != q) + plug->should_sort = 1; + } + if (request_count >= BLK_MAX_REQUEST_COUNT) { + blk_flush_plug_list(plug, false); + trace_block_plug(q); + } } - if (request_count >= BLK_MAX_REQUEST_COUNT) - blk_flush_plug_list(plug, false); list_add_tail(&req->queuelist, &plug->list); drive_stat_acct(req, 1); } else { @@ -1316,9 +1400,8 @@ get_rq: out_unlock: spin_unlock_irq(q->queue_lock); } -out: - return 0; } +EXPORT_SYMBOL_GPL(blk_queue_bio); /* for device mapper only */ /* * If bio->bi_dev is a partition, remap the location @@ -1417,165 +1500,135 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) return 0; } -/** - * generic_make_request - hand a buffer to its device driver for I/O - * @bio: The bio describing the location in memory and on the device. - * - * generic_make_request() is used to make I/O requests of block - * devices. It is passed a &struct bio, which describes the I/O that needs - * to be done. - * - * generic_make_request() does not return any status. The - * success/failure status of the request, along with notification of - * completion, is delivered asynchronously through the bio->bi_end_io - * function described (one day) else where. - * - * The caller of generic_make_request must make sure that bi_io_vec - * are set to describe the memory buffer, and that bi_dev and bi_sector are - * set to describe the device address, and the - * bi_end_io and optionally bi_private are set to describe how - * completion notification should be signaled. - * - * generic_make_request and the drivers it calls may use bi_next if this - * bio happens to be merged with someone else, and may change bi_dev and - * bi_sector for remaps as it sees fit. So the values of these fields - * should NOT be depended on after the call to generic_make_request. - */ -static inline void __generic_make_request(struct bio *bio) +static noinline_for_stack bool +generic_make_request_checks(struct bio *bio) { struct request_queue *q; - sector_t old_sector; - int ret, nr_sectors = bio_sectors(bio); - dev_t old_dev; + int nr_sectors = bio_sectors(bio); int err = -EIO; + char b[BDEVNAME_SIZE]; + struct hd_struct *part; might_sleep(); if (bio_check_eod(bio, nr_sectors)) goto end_io; - /* - * Resolve the mapping until finished. (drivers are - * still free to implement/resolve their own stacking - * by explicitly returning 0) - * - * NOTE: we don't repeat the blk_size check for each new device. - * Stacking drivers are expected to know what they are doing. - */ - old_sector = -1; - old_dev = 0; - do { - char b[BDEVNAME_SIZE]; - struct hd_struct *part; - - q = bdev_get_queue(bio->bi_bdev); - if (unlikely(!q)) { - printk(KERN_ERR - "generic_make_request: Trying to access " - "nonexistent block-device %s (%Lu)\n", - bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); - goto end_io; - } - - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { - printk(KERN_ERR "bio too big device %s (%u > %u)\n", - bdevname(bio->bi_bdev, b), - bio_sectors(bio), - queue_max_hw_sectors(q)); - goto end_io; - } - - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) - goto end_io; - - part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || - should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) - goto end_io; - - /* - * If this device has partitions, remap block n - * of partition p to block n+start(p) of the disk. - */ - blk_partition_remap(bio); + q = bdev_get_queue(bio->bi_bdev); + if (unlikely(!q)) { + printk(KERN_ERR + "generic_make_request: Trying to access " + "nonexistent block-device %s (%Lu)\n", + bdevname(bio->bi_bdev, b), + (long long) bio->bi_sector); + goto end_io; + } - if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) - goto end_io; + if (unlikely(!(bio->bi_rw & REQ_DISCARD) && + nr_sectors > queue_max_hw_sectors(q))) { + printk(KERN_ERR "bio too big device %s (%u > %u)\n", + bdevname(bio->bi_bdev, b), + bio_sectors(bio), + queue_max_hw_sectors(q)); + goto end_io; + } - if (old_sector != -1) - trace_block_bio_remap(q, bio, old_dev, old_sector); + part = bio->bi_bdev->bd_part; + if (should_fail_request(part, bio->bi_size) || + should_fail_request(&part_to_disk(part)->part0, + bio->bi_size)) + goto end_io; - old_sector = bio->bi_sector; - old_dev = bio->bi_bdev->bd_dev; + /* + * If this device has partitions, remap block n + * of partition p to block n+start(p) of the disk. + */ + blk_partition_remap(bio); - if (bio_check_eod(bio, nr_sectors)) - goto end_io; + if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) + goto end_io; - /* - * Filter flush bio's early so that make_request based - * drivers without flush support don't have to worry - * about them. - */ - if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { - bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); - if (!nr_sectors) { - err = 0; - goto end_io; - } - } + if (bio_check_eod(bio, nr_sectors)) + goto end_io; - if ((bio->bi_rw & REQ_DISCARD) && - (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { - err = -EOPNOTSUPP; + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; goto end_io; } + } - if (blk_throtl_bio(q, &bio)) - goto end_io; - - /* - * If bio = NULL, bio has been throttled and will be submitted - * later. - */ - if (!bio) - break; - - trace_block_bio_queue(q, bio); + if ((bio->bi_rw & REQ_DISCARD) && + (!blk_queue_discard(q) || + ((bio->bi_rw & REQ_SECURE) && + !blk_queue_secdiscard(q)))) { + err = -EOPNOTSUPP; + goto end_io; + } - ret = q->make_request_fn(q, bio); - } while (ret); + if (blk_throtl_bio(q, bio)) + return false; /* throttled, will be resubmitted later */ - return; + trace_block_bio_queue(q, bio); + return true; end_io: bio_endio(bio, err); + return false; } -/* - * We only want one ->make_request_fn to be active at a time, - * else stack usage with stacked devices could be a problem. - * So use current->bio_list to keep a list of requests - * submited by a make_request_fn function. - * current->bio_list is also used as a flag to say if - * generic_make_request is currently active in this task or not. - * If it is NULL, then no make_request is active. If it is non-NULL, - * then a make_request is active, and new requests should be added - * at the tail +/** + * generic_make_request - hand a buffer to its device driver for I/O + * @bio: The bio describing the location in memory and on the device. + * + * generic_make_request() is used to make I/O requests of block + * devices. It is passed a &struct bio, which describes the I/O that needs + * to be done. + * + * generic_make_request() does not return any status. The + * success/failure status of the request, along with notification of + * completion, is delivered asynchronously through the bio->bi_end_io + * function described (one day) else where. + * + * The caller of generic_make_request must make sure that bi_io_vec + * are set to describe the memory buffer, and that bi_dev and bi_sector are + * set to describe the device address, and the + * bi_end_io and optionally bi_private are set to describe how + * completion notification should be signaled. + * + * generic_make_request and the drivers it calls may use bi_next if this + * bio happens to be merged with someone else, and may resubmit the bio to + * a lower device by calling into generic_make_request recursively, which + * means the bio should NOT be touched after the call to ->make_request_fn. */ void generic_make_request(struct bio *bio) { struct bio_list bio_list_on_stack; + if (!generic_make_request_checks(bio)) + return; + + /* + * We only want one ->make_request_fn to be active at a time, else + * stack usage with stacked devices could be a problem. So use + * current->bio_list to keep a list of requests submited by a + * make_request_fn function. current->bio_list is also used as a + * flag to say if generic_make_request is currently active in this + * task or not. If it is NULL, then no make_request is active. If + * it is non-NULL, then a make_request is active, and new requests + * should be added at the tail + */ if (current->bio_list) { - /* make_request is active */ bio_list_add(current->bio_list, bio); return; } + /* following loop may be a bit non-obvious, and so deserves some * explanation. * Before entering the loop, bio->bi_next is NULL (as all callers @@ -1583,22 +1636,21 @@ void generic_make_request(struct bio *bio) * We pretend that we have just taken it off a longer list, so * we assign bio_list to a pointer to the bio_list_on_stack, * thus initialising the bio_list of new bios to be - * added. __generic_make_request may indeed add some more bios + * added. ->make_request() may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio * of the top of the list (no pretending) and so remove it from - * bio_list, and call into __generic_make_request again. - * - * The loop was structured like this to make only one call to - * __generic_make_request (which is important as it is large and - * inlined) and to keep the structure simple. + * bio_list, and call into ->make_request() again. */ BUG_ON(bio->bi_next); bio_list_init(&bio_list_on_stack); current->bio_list = &bio_list_on_stack; do { - __generic_make_request(bio); + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + + q->make_request_fn(q, bio); + bio = bio_list_pop(current->bio_list); } while (bio); current->bio_list = NULL; /* deactivate */ @@ -1725,6 +1777,8 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) where = ELEVATOR_INSERT_FLUSH; add_acct_request(q, rq, where); + if (where == ELEVATOR_INSERT_FLUSH) + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2628,6 +2682,20 @@ EXPORT_SYMBOL(kblockd_schedule_delayed_work); #define PLUG_MAGIC 0x91827364 +/** + * blk_start_plug - initialize blk_plug and track it inside the task_struct + * @plug: The &struct blk_plug that needs to be initialized + * + * Description: + * Tracking blk_plug inside the task_struct will help with auto-flushing the + * pending I/O should the task end up blocking between blk_start_plug() and + * blk_finish_plug(). This is important from a performance perspective, but + * also ensures that we don't deadlock. For instance, if the task is blocking + * for a memory allocation, memory reclaim could end up wanting to free a + * page belonging to that request that is currently residing in our private + * plug. By flushing the pending I/O when the process goes to sleep, we avoid + * this kind of deadlock. + */ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; diff --git a/block/blk-flush.c b/block/blk-flush.c index 491eb30a242d..720ad607ff91 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -320,7 +320,7 @@ void blk_insert_flush(struct request *rq) return; } - BUG_ON(!rq->bio || rq->bio != rq->biotail); + BUG_ON(rq->bio != rq->biotail); /*assumes zero or single bio rq */ /* * If there's data but flush is not necessary, the request can be @@ -330,7 +330,6 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); - blk_run_queue_async(q); return; } diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 129b9e209a3b..da2a818c3a92 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -24,6 +24,7 @@ #include <linux/mempool.h> #include <linux/bio.h> #include <linux/scatterlist.h> +#include <linux/export.h> #include <linux/slab.h> #include "blk.h" diff --git a/block/blk-map.c b/block/blk-map.c index e663ac2d8e68..623e1cd4cffe 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -204,10 +204,11 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, if (!iov[i].iov_len) return -EINVAL; - if (uaddr & queue_dma_alignment(q)) { + /* + * Keep going so we check length of all segments + */ + if (uaddr & queue_dma_alignment(q)) unaligned = 1; - break; - } } if (unaligned || (q->dma_pad_mask & len) || map_data) @@ -310,7 +311,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, if (IS_ERR(bio)) return PTR_ERR(bio); - if (rq_data_dir(rq) == WRITE) + if (!reading) bio->bi_rw |= REQ_WRITE; if (do_copy) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 60fda88c57f0..e7f9f657f105 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -457,11 +457,11 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, } /** - * blk_cleanup_queue: - release a &struct request_queue when it is no longer needed - * @kobj: the kobj belonging of the request queue to be released + * blk_release_queue: - release a &struct request_queue when it is no longer needed + * @kobj: the kobj belonging to the request queue to be released * * Description: - * blk_cleanup_queue is the pair to blk_init_queue() or + * blk_release_queue is the pair to blk_init_queue() or * blk_queue_make_request(). It should be called when a request queue is * being released; typically when a block device is being de-registered. * Currently, its primary task it to free all the &struct request @@ -490,6 +490,7 @@ static void blk_release_queue(struct kobject *kobj) if (q->queue_tags) __blk_queue_free_tags(q); + blk_throtl_release(q); blk_trace_shutdown(q); bdi_destroy(&q->backing_dev_info); diff --git a/block/blk-tag.c b/block/blk-tag.c index ece65fc4c79b..4af6f5cc1167 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -282,16 +282,9 @@ EXPORT_SYMBOL(blk_queue_resize_tags); void blk_queue_end_tag(struct request_queue *q, struct request *rq) { struct blk_queue_tag *bqt = q->queue_tags; - int tag = rq->tag; + unsigned tag = rq->tag; /* negative tags invalid */ - BUG_ON(tag == -1); - - if (unlikely(tag >= bqt->real_max_depth)) - /* - * This can happen after tag depth has been reduced. - * FIXME: how about a warning or info message here? - */ - return; + BUG_ON(tag >= bqt->real_max_depth); list_del_init(&rq->queuelist); rq->cmd_flags &= ~REQ_QUEUED; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index a19f58c6fc3a..4553245d9317 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -10,6 +10,7 @@ #include <linux/bio.h> #include <linux/blktrace_api.h> #include "blk-cgroup.h" +#include "blk.h" /* Max dispatch from a group in 1 round */ static int throtl_grp_quantum = 8; @@ -302,16 +303,16 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) return tg; } -/* - * This function returns with queue lock unlocked in case of error, like - * request queue is no more - */ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) { struct throtl_grp *tg = NULL, *__tg = NULL; struct blkio_cgroup *blkcg; struct request_queue *q = td->queue; + /* no throttling for dead queue */ + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) + return NULL; + rcu_read_lock(); blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); @@ -323,32 +324,22 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* * Need to allocate a group. Allocation of group also needs allocation * of per cpu stats which in-turn takes a mutex() and can block. Hence - * we need to drop rcu lock and queue_lock before we call alloc - * - * Take the request queue reference to make sure queue does not - * go away once we return from allocation. + * we need to drop rcu lock and queue_lock before we call alloc. */ - blk_get_queue(q); rcu_read_unlock(); spin_unlock_irq(q->queue_lock); tg = throtl_alloc_tg(td); - /* - * We might have slept in group allocation. Make sure queue is not - * dead - */ - if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { - blk_put_queue(q); - if (tg) - kfree(tg); - - return ERR_PTR(-ENODEV); - } - blk_put_queue(q); /* Group allocated and queue is still alive. take the lock */ spin_lock_irq(q->queue_lock); + /* Make sure @q is still alive */ + if (unlikely(test_bit(QUEUE_FLAG_DEAD, &q->queue_flags))) { + kfree(tg); + return NULL; + } + /* * Initialize the new group. After sleeping, read the blkcg again. */ @@ -1014,11 +1005,6 @@ static void throtl_release_tgs(struct throtl_data *td) } } -static void throtl_td_free(struct throtl_data *td) -{ - kfree(td); -} - /* * Blk cgroup controller notification saying that blkio_group object is being * delinked as associated cgroup object is going away. That also means that @@ -1123,17 +1109,17 @@ static struct blkio_policy_type blkio_policy_throtl = { .plid = BLKIO_POLICY_THROTL, }; -int blk_throtl_bio(struct request_queue *q, struct bio **biop) +bool blk_throtl_bio(struct request_queue *q, struct bio *bio) { struct throtl_data *td = q->td; struct throtl_grp *tg; - struct bio *bio = *biop; bool rw = bio_data_dir(bio), update_disptime = true; struct blkio_cgroup *blkcg; + bool throttled = false; if (bio->bi_rw & REQ_THROTTLED) { bio->bi_rw &= ~REQ_THROTTLED; - return 0; + goto out; } /* @@ -1152,7 +1138,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); - return 0; + goto out; } } rcu_read_unlock(); @@ -1161,18 +1147,10 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) * Either group has not been allocated yet or it is not an unlimited * IO group */ - spin_lock_irq(q->queue_lock); tg = throtl_get_tg(td); - - if (IS_ERR(tg)) { - if (PTR_ERR(tg) == -ENODEV) { - /* - * Queue is gone. No queue lock held here. - */ - return -ENODEV; - } - } + if (unlikely(!tg)) + goto out_unlock; if (tg->nr_queued[rw]) { /* @@ -1200,7 +1178,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) * So keep on trimming slice even if bio is not queued. */ throtl_trim_slice(td, tg, rw); - goto out; + goto out_unlock; } queue_bio: @@ -1212,16 +1190,52 @@ queue_bio: tg->nr_queued[READ], tg->nr_queued[WRITE]); throtl_add_bio_tg(q->td, tg, bio); - *biop = NULL; + throttled = true; if (update_disptime) { tg_update_disptime(td, tg); throtl_schedule_next_dispatch(td); } +out_unlock: + spin_unlock_irq(q->queue_lock); out: + return throttled; +} + +/** + * blk_throtl_drain - drain throttled bios + * @q: request_queue to drain throttled bios for + * + * Dispatch all currently throttled bios on @q through ->make_request_fn(). + */ +void blk_throtl_drain(struct request_queue *q) + __releases(q->queue_lock) __acquires(q->queue_lock) +{ + struct throtl_data *td = q->td; + struct throtl_rb_root *st = &td->tg_service_tree; + struct throtl_grp *tg; + struct bio_list bl; + struct bio *bio; + + WARN_ON_ONCE(!queue_is_locked(q)); + + bio_list_init(&bl); + + while ((tg = throtl_rb_first(st))) { + throtl_dequeue_tg(td, tg); + + while ((bio = bio_list_peek(&tg->bio_lists[READ]))) + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); + while ((bio = bio_list_peek(&tg->bio_lists[WRITE]))) + tg_dispatch_one_bio(td, tg, bio_data_dir(bio), &bl); + } spin_unlock_irq(q->queue_lock); - return 0; + + while ((bio = bio_list_pop(&bl))) + generic_make_request(bio); + + spin_lock_irq(q->queue_lock); } int blk_throtl_init(struct request_queue *q) @@ -1296,7 +1310,11 @@ void blk_throtl_exit(struct request_queue *q) * it. */ throtl_shutdown_wq(q); - throtl_td_free(td); +} + +void blk_throtl_release(struct request_queue *q) +{ + kfree(q->td); } static int __init throtl_init(void) diff --git a/block/blk.h b/block/blk.h index 20b900a377c9..3f6551b3c92d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,6 +15,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio); int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); +void blk_drain_queue(struct request_queue *q, bool drain_all); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); bool __blk_end_bidi_request(struct request *rq, int error, @@ -188,4 +189,21 @@ static inline int blk_do_io_stat(struct request *rq) (rq->cmd_flags & REQ_DISCARD)); } -#endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern bool blk_throtl_bio(struct request_queue *q, struct bio *bio); +extern void blk_throtl_drain(struct request_queue *q); +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern void blk_throtl_release(struct request_queue *q); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline bool blk_throtl_bio(struct request_queue *q, struct bio *bio) +{ + return false; +} +static inline void blk_throtl_drain(struct request_queue *q) { } +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline void blk_throtl_exit(struct request_queue *q) { } +static inline void blk_throtl_release(struct request_queue *q) { } +#endif /* CONFIG_BLK_DEV_THROTTLING */ + +#endif /* BLK_INTERNAL_H */ diff --git a/block/bsg-lib.c b/block/bsg-lib.c index 6690e6e41037..7ad49c88f6b1 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -25,7 +25,7 @@ #include <linux/delay.h> #include <linux/scatterlist.h> #include <linux/bsg-lib.h> -#include <linux/module.h> +#include <linux/export.h> #include <scsi/scsi_cmnd.h> /** diff --git a/block/bsg.c b/block/bsg.c index 702f1316bb8f..9651ec7b87c2 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -1070,7 +1070,7 @@ EXPORT_SYMBOL_GPL(bsg_register_queue); static struct cdev bsg_cdev; -static char *bsg_devnode(struct device *dev, mode_t *mode) +static char *bsg_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev)); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 16ace89613bc..3548705b04e4 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1655,6 +1655,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, struct request *next) { struct cfq_queue *cfqq = RQ_CFQQ(rq); + struct cfq_data *cfqd = q->elevator->elevator_data; + /* * reposition in fifo if next is older than rq */ @@ -1669,6 +1671,16 @@ cfq_merged_requests(struct request_queue *q, struct request *rq, cfq_remove_request(next); cfq_blkiocg_update_io_merged_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(next), rq_is_sync(next)); + + cfqq = RQ_CFQQ(next); + /* + * all requests of this queue are merged to other queues, delete it + * from the service tree. If it's the active_queue, + * cfq_dispatch_requests() will choose to expire it or do idle + */ + if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list) && + cfqq != cfqd->active_queue) + cfq_del_cfqq_rr(cfqd, cfqq); } static int cfq_allow_merge(struct request_queue *q, struct request *rq, @@ -3184,7 +3196,7 @@ static int cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, } } - if (ret) + if (ret && ret != -EEXIST) printk(KERN_ERR "cfq: cic link failed!\n"); return ret; @@ -3200,6 +3212,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) { struct io_context *ioc = NULL; struct cfq_io_context *cic; + int ret; might_sleep_if(gfp_mask & __GFP_WAIT); @@ -3207,6 +3220,7 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) if (!ioc) return NULL; +retry: cic = cfq_cic_lookup(cfqd, ioc); if (cic) goto out; @@ -3215,7 +3229,12 @@ cfq_get_io_context(struct cfq_data *cfqd, gfp_t gfp_mask) if (cic == NULL) goto err; - if (cfq_cic_link(cfqd, ioc, cic, gfp_mask)) + ret = cfq_cic_link(cfqd, ioc, cic, gfp_mask); + if (ret == -EEXIST) { + /* someone has linked cic to ioc already */ + cfq_cic_free(cic); + goto retry; + } else if (ret) goto err_free; out: @@ -4036,6 +4055,11 @@ static void *cfq_init_queue(struct request_queue *q) if (blkio_alloc_blkg_stats(&cfqg->blkg)) { kfree(cfqg); + + spin_lock(&cic_index_lock); + ida_remove(&cic_index_ida, cfqd->cic_index); + spin_unlock(&cic_index_lock); + kfree(cfqd); return NULL; } diff --git a/block/elevator.c b/block/elevator.c index a3b64bc71d88..66343d6917d0 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -31,7 +31,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/delay.h> #include <linux/blktrace_api.h> #include <linux/hash.h> #include <linux/uaccess.h> @@ -182,7 +181,7 @@ static void elevator_attach(struct request_queue *q, struct elevator_queue *eq, eq->elevator_data = data; } -static char chosen_elevator[16]; +static char chosen_elevator[ELV_NAME_MAX]; static int __init elevator_setup(char *str) { @@ -606,43 +605,35 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) void elv_drain_elevator(struct request_queue *q) { static int printed; + + lockdep_assert_held(q->queue_lock); + while (q->elevator->ops->elevator_dispatch_fn(q, 1)) ; - if (q->nr_sorted == 0) - return; - if (printed++ < 10) { + if (q->nr_sorted && printed++ < 10) { printk(KERN_ERR "%s: forced dispatching is broken " "(nr_sorted=%u), please report this\n", q->elevator->elevator_type->elevator_name, q->nr_sorted); } } -/* - * Call with queue lock held, interrupts disabled - */ void elv_quiesce_start(struct request_queue *q) { if (!q->elevator) return; + spin_lock_irq(q->queue_lock); queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); - /* - * make sure we don't have any requests in flight - */ - elv_drain_elevator(q); - while (q->rq.elvpriv) { - __blk_run_queue(q); - spin_unlock_irq(q->queue_lock); - msleep(10); - spin_lock_irq(q->queue_lock); - elv_drain_elevator(q); - } + blk_drain_queue(q, false); } void elv_quiesce_end(struct request_queue *q) { + spin_lock_irq(q->queue_lock); queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + spin_unlock_irq(q->queue_lock); } void __elv_add_request(struct request_queue *q, struct request *rq, int where) @@ -972,7 +963,6 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) /* * Turn on BYPASS and drain all requests w/ elevator private data */ - spin_lock_irq(q->queue_lock); elv_quiesce_start(q); /* @@ -983,8 +973,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) /* * attach and start new elevator */ + spin_lock_irq(q->queue_lock); elevator_attach(q, e, data); - spin_unlock_irq(q->queue_lock); if (old_elevator->registered) { @@ -999,9 +989,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * finally exit old elevator and turn off BYPASS. */ elevator_exit(old_elevator); - spin_lock_irq(q->queue_lock); elv_quiesce_end(q); - spin_unlock_irq(q->queue_lock); blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); @@ -1015,10 +1003,7 @@ fail_register: elevator_exit(e); q->elevator = old_elevator; elv_register_queue(q); - - spin_lock_irq(q->queue_lock); - queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); - spin_unlock_irq(q->queue_lock); + elv_quiesce_end(q); return err; } diff --git a/block/genhd.c b/block/genhd.c index e2f67902dd02..83e7c04015e1 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -15,7 +15,6 @@ #include <linux/slab.h> #include <linux/kmod.h> #include <linux/kobj_map.h> -#include <linux/buffer_head.h> #include <linux/mutex.h> #include <linux/idr.h> #include <linux/log2.h> @@ -507,7 +506,7 @@ static int exact_lock(dev_t devt, void *data) return 0; } -void register_disk(struct gendisk *disk) +static void register_disk(struct gendisk *disk) { struct device *ddev = disk_to_dev(disk); struct block_device *bdev; @@ -536,7 +535,7 @@ void register_disk(struct gendisk *disk) disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); /* No minors to use for partitions */ - if (!disk_partitionable(disk)) + if (!disk_part_scan_enabled(disk)) goto exit; /* No such device (e.g., media were just removed) */ @@ -611,6 +610,12 @@ void add_disk(struct gendisk *disk) register_disk(disk); blk_register_queue(disk); + /* + * Take an extra ref on queue which will be put on disk_release() + * so that it sticks around as long as @disk is there. + */ + WARN_ON_ONCE(blk_get_queue(disk->queue)); + retval = sysfs_create_link(&disk_to_dev(disk)->kobj, &bdi->dev->kobj, "bdi"); WARN_ON(retval); @@ -841,7 +846,7 @@ static int show_partition(struct seq_file *seqf, void *v) char buf[BDEVNAME_SIZE]; /* Don't show non-partitionable removeable devices or empty devices */ - if (!get_capacity(sgp) || (!disk_partitionable(sgp) && + if (!get_capacity(sgp) || (!disk_max_parts(sgp) && (sgp->flags & GENHD_FL_REMOVABLE))) return 0; if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO) @@ -1095,13 +1100,15 @@ static void disk_release(struct device *dev) disk_replace_part_tbl(disk, NULL); free_part_stats(&disk->part0); free_part_info(&disk->part0); + if (disk->queue) + blk_put_queue(disk->queue); kfree(disk); } struct class block_class = { .name = "block", }; -static char *block_devnode(struct device *dev, mode_t *mode) +static char *block_devnode(struct device *dev, umode_t *mode) { struct gendisk *disk = dev_to_disk(dev); diff --git a/block/ioctl.c b/block/ioctl.c index 1124cd297263..4828fa349813 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -1,10 +1,11 @@ #include <linux/capability.h> #include <linux/blkdev.h> +#include <linux/export.h> #include <linux/gfp.h> #include <linux/blkpg.h> #include <linux/hdreg.h> #include <linux/backing-dev.h> -#include <linux/buffer_head.h> +#include <linux/fs.h> #include <linux/blktrace_api.h> #include <asm/uaccess.h> @@ -101,7 +102,7 @@ static int blkdev_reread_part(struct block_device *bdev) struct gendisk *disk = bdev->bd_disk; int res; - if (!disk_partitionable(disk) || bdev != bdev->bd_contains) + if (!disk_part_scan_enabled(disk) || bdev != bdev->bd_contains) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -179,6 +180,26 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); /* + * Is it an unrecognized ioctl? The correct returns are either + * ENOTTY (final) or ENOIOCTLCMD ("I don't know this one, try a + * fallback"). ENOIOCTLCMD gets turned into ENOTTY by the ioctl + * code before returning. + * + * Confused drivers sometimes return EINVAL, which is wrong. It + * means "I understood the ioctl command, but the parameters to + * it were wrong". + * + * We should aim to just fix the broken drivers, the EINVAL case + * should go away. + */ +static inline int is_unrecognized_ioctl(int ret) +{ + return ret == -EINVAL || + ret == -ENOTTY || + ret == -ENOIOCTLCMD; +} + +/* * always keep this in sync with compat_blkdev_ioctl() */ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, @@ -195,8 +216,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return -EACCES; ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - /* -EINVAL to handle old uncorrected drivers */ - if (ret != -EINVAL && ret != -ENOTTY) + if (!is_unrecognized_ioctl(ret)) return ret; fsync_bdev(bdev); @@ -205,8 +225,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKROSET: ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); - /* -EINVAL to handle old uncorrected drivers */ - if (ret != -EINVAL && ret != -ENOTTY) + if (!is_unrecognized_ioctl(ret)) return ret; if (!capable(CAP_SYS_ADMIN)) return -EACCES; diff --git a/block/partition-generic.c b/block/partition-generic.c new file mode 100644 index 000000000000..d06ec1c829c2 --- /dev/null +++ b/block/partition-generic.c @@ -0,0 +1,537 @@ +/* + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + * + * We now have independent partition support from the + * block drivers, which allows all the partition code to + * be grouped in one location, and it to be mostly self + * contained. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/kmod.h> +#include <linux/ctype.h> +#include <linux/genhd.h> +#include <linux/blktrace_api.h> + +#include "partitions/check.h" + +#ifdef CONFIG_BLK_DEV_MD +extern void md_autodetect_dev(dev_t dev); +#endif + +/* + * disk_name() is used by partition check code and the genhd driver. + * It formats the devicename of the indicated disk into + * the supplied buffer (of size at least 32), and returns + * a pointer to that same buffer (for convenience). + */ + +char *disk_name(struct gendisk *hd, int partno, char *buf) +{ + if (!partno) + snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); + else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) + snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); + else + snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); + + return buf; +} + +const char *bdevname(struct block_device *bdev, char *buf) +{ + return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); +} + +EXPORT_SYMBOL(bdevname); + +/* + * There's very little reason to use this, you should really + * have a struct block_device just about everywhere and use + * bdevname() instead. + */ +const char *__bdevname(dev_t dev, char *buffer) +{ + scnprintf(buffer, BDEVNAME_SIZE, "unknown-block(%u,%u)", + MAJOR(dev), MINOR(dev)); + return buffer; +} + +EXPORT_SYMBOL(__bdevname); + +static ssize_t part_partition_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%d\n", p->partno); +} + +static ssize_t part_start_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); +} + +ssize_t part_size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); +} + +static ssize_t part_ro_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%d\n", p->policy ? 1 : 0); +} + +static ssize_t part_alignment_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); +} + +static ssize_t part_discard_alignment_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + return sprintf(buf, "%u\n", p->discard_alignment); +} + +ssize_t part_stat_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + int cpu; + + cpu = part_stat_lock(); + part_round_stats(cpu, p); + part_stat_unlock(); + return sprintf(buf, + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8u %8u %8u" + "\n", + part_stat_read(p, ios[READ]), + part_stat_read(p, merges[READ]), + (unsigned long long)part_stat_read(p, sectors[READ]), + jiffies_to_msecs(part_stat_read(p, ticks[READ])), + part_stat_read(p, ios[WRITE]), + part_stat_read(p, merges[WRITE]), + (unsigned long long)part_stat_read(p, sectors[WRITE]), + jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), + part_in_flight(p), + jiffies_to_msecs(part_stat_read(p, io_ticks)), + jiffies_to_msecs(part_stat_read(p, time_in_queue))); +} + +ssize_t part_inflight_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]), + atomic_read(&p->in_flight[1])); +} + +#ifdef CONFIG_FAIL_MAKE_REQUEST +ssize_t part_fail_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hd_struct *p = dev_to_part(dev); + + return sprintf(buf, "%d\n", p->make_it_fail); +} + +ssize_t part_fail_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct hd_struct *p = dev_to_part(dev); + int i; + + if (count > 0 && sscanf(buf, "%d", &i) > 0) + p->make_it_fail = (i == 0) ? 0 : 1; + + return count; +} +#endif + +static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); +static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); +static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); +static DEVICE_ATTR(ro, S_IRUGO, part_ro_show, NULL); +static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); +static DEVICE_ATTR(discard_alignment, S_IRUGO, part_discard_alignment_show, + NULL); +static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); +static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL); +#ifdef CONFIG_FAIL_MAKE_REQUEST +static struct device_attribute dev_attr_fail = + __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); +#endif + +static struct attribute *part_attrs[] = { + &dev_attr_partition.attr, + &dev_attr_start.attr, + &dev_attr_size.attr, + &dev_attr_ro.attr, + &dev_attr_alignment_offset.attr, + &dev_attr_discard_alignment.attr, + &dev_attr_stat.attr, + &dev_attr_inflight.attr, +#ifdef CONFIG_FAIL_MAKE_REQUEST + &dev_attr_fail.attr, +#endif + NULL +}; + +static struct attribute_group part_attr_group = { + .attrs = part_attrs, +}; + +static const struct attribute_group *part_attr_groups[] = { + &part_attr_group, +#ifdef CONFIG_BLK_DEV_IO_TRACE + &blk_trace_attr_group, +#endif + NULL +}; + +static void part_release(struct device *dev) +{ + struct hd_struct *p = dev_to_part(dev); + free_part_stats(p); + free_part_info(p); + kfree(p); +} + +struct device_type part_type = { + .name = "partition", + .groups = part_attr_groups, + .release = part_release, +}; + +static void delete_partition_rcu_cb(struct rcu_head *head) +{ + struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); + + part->start_sect = 0; + part->nr_sects = 0; + part_stat_set_all(part, 0); + put_device(part_to_dev(part)); +} + +void __delete_partition(struct hd_struct *part) +{ + call_rcu(&part->rcu_head, delete_partition_rcu_cb); +} + +void delete_partition(struct gendisk *disk, int partno) +{ + struct disk_part_tbl *ptbl = disk->part_tbl; + struct hd_struct *part; + + if (partno >= ptbl->len) + return; + + part = ptbl->part[partno]; + if (!part) + return; + + blk_free_devt(part_devt(part)); + rcu_assign_pointer(ptbl->part[partno], NULL); + rcu_assign_pointer(ptbl->last_lookup, NULL); + kobject_put(part->holder_dir); + device_del(part_to_dev(part)); + + hd_struct_put(part); +} + +static ssize_t whole_disk_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return 0; +} +static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, + whole_disk_show, NULL); + +struct hd_struct *add_partition(struct gendisk *disk, int partno, + sector_t start, sector_t len, int flags, + struct partition_meta_info *info) +{ + struct hd_struct *p; + dev_t devt = MKDEV(0, 0); + struct device *ddev = disk_to_dev(disk); + struct device *pdev; + struct disk_part_tbl *ptbl; + const char *dname; + int err; + + err = disk_expand_part_tbl(disk, partno); + if (err) + return ERR_PTR(err); + ptbl = disk->part_tbl; + + if (ptbl->part[partno]) + return ERR_PTR(-EBUSY); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return ERR_PTR(-EBUSY); + + if (!init_part_stats(p)) { + err = -ENOMEM; + goto out_free; + } + pdev = part_to_dev(p); + + p->start_sect = start; + p->alignment_offset = + queue_limit_alignment_offset(&disk->queue->limits, start); + p->discard_alignment = + queue_limit_discard_alignment(&disk->queue->limits, start); + p->nr_sects = len; + p->partno = partno; + p->policy = get_disk_ro(disk); + + if (info) { + struct partition_meta_info *pinfo = alloc_part_info(disk); + if (!pinfo) + goto out_free_stats; + memcpy(pinfo, info, sizeof(*info)); + p->info = pinfo; + } + + dname = dev_name(ddev); + if (isdigit(dname[strlen(dname) - 1])) + dev_set_name(pdev, "%sp%d", dname, partno); + else + dev_set_name(pdev, "%s%d", dname, partno); + + device_initialize(pdev); + pdev->class = &block_class; + pdev->type = &part_type; + pdev->parent = ddev; + + err = blk_alloc_devt(p, &devt); + if (err) + goto out_free_info; + pdev->devt = devt; + + /* delay uevent until 'holders' subdir is created */ + dev_set_uevent_suppress(pdev, 1); + err = device_add(pdev); + if (err) + goto out_put; + + err = -ENOMEM; + p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); + if (!p->holder_dir) + goto out_del; + + dev_set_uevent_suppress(pdev, 0); + if (flags & ADDPART_FLAG_WHOLEDISK) { + err = device_create_file(pdev, &dev_attr_whole_disk); + if (err) + goto out_del; + } + + /* everything is up and running, commence */ + rcu_assign_pointer(ptbl->part[partno], p); + + /* suppress uevent if the disk suppresses it */ + if (!dev_get_uevent_suppress(ddev)) + kobject_uevent(&pdev->kobj, KOBJ_ADD); + + hd_ref_init(p); + return p; + +out_free_info: + free_part_info(p); +out_free_stats: + free_part_stats(p); +out_free: + kfree(p); + return ERR_PTR(err); +out_del: + kobject_put(p->holder_dir); + device_del(pdev); +out_put: + put_device(pdev); + blk_free_devt(devt); + return ERR_PTR(err); +} + +static bool disk_unlock_native_capacity(struct gendisk *disk) +{ + const struct block_device_operations *bdops = disk->fops; + + if (bdops->unlock_native_capacity && + !(disk->flags & GENHD_FL_NATIVE_CAPACITY)) { + printk(KERN_CONT "enabling native capacity\n"); + bdops->unlock_native_capacity(disk); + disk->flags |= GENHD_FL_NATIVE_CAPACITY; + return true; + } else { + printk(KERN_CONT "truncated\n"); + return false; + } +} + +int rescan_partitions(struct gendisk *disk, struct block_device *bdev) +{ + struct parsed_partitions *state = NULL; + struct disk_part_iter piter; + struct hd_struct *part; + int p, highest, res; +rescan: + if (state && !IS_ERR(state)) { + kfree(state); + state = NULL; + } + + if (bdev->bd_part_count) + return -EBUSY; + res = invalidate_partition(disk, 0); + if (res) + return res; + + disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); + while ((part = disk_part_iter_next(&piter))) + delete_partition(disk, part->partno); + disk_part_iter_exit(&piter); + + if (disk->fops->revalidate_disk) + disk->fops->revalidate_disk(disk); + check_disk_size_change(disk, bdev); + bdev->bd_invalidated = 0; + if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) + return 0; + if (IS_ERR(state)) { + /* + * I/O error reading the partition table. If any + * partition code tried to read beyond EOD, retry + * after unlocking native capacity. + */ + if (PTR_ERR(state) == -ENOSPC) { + printk(KERN_WARNING "%s: partition table beyond EOD, ", + disk->disk_name); + if (disk_unlock_native_capacity(disk)) + goto rescan; + } + return -EIO; + } + /* + * If any partition code tried to read beyond EOD, try + * unlocking native capacity even if partition table is + * successfully read as we could be missing some partitions. + */ + if (state->access_beyond_eod) { + printk(KERN_WARNING + "%s: partition table partially beyond EOD, ", + disk->disk_name); + if (disk_unlock_native_capacity(disk)) + goto rescan; + } + + /* tell userspace that the media / partition table may have changed */ + kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); + + /* Detect the highest partition number and preallocate + * disk->part_tbl. This is an optimization and not strictly + * necessary. + */ + for (p = 1, highest = 0; p < state->limit; p++) + if (state->parts[p].size) + highest = p; + + disk_expand_part_tbl(disk, highest); + + /* add partitions */ + for (p = 1; p < state->limit; p++) { + sector_t size, from; + struct partition_meta_info *info = NULL; + + size = state->parts[p].size; + if (!size) + continue; + + from = state->parts[p].from; + if (from >= get_capacity(disk)) { + printk(KERN_WARNING + "%s: p%d start %llu is beyond EOD, ", + disk->disk_name, p, (unsigned long long) from); + if (disk_unlock_native_capacity(disk)) + goto rescan; + continue; + } + + if (from + size > get_capacity(disk)) { + printk(KERN_WARNING + "%s: p%d size %llu extends beyond EOD, ", + disk->disk_name, p, (unsigned long long) size); + + if (disk_unlock_native_capacity(disk)) { + /* free state and restart */ + goto rescan; + } else { + /* + * we can not ignore partitions of broken tables + * created by for example camera firmware, but + * we limit them to the end of the disk to avoid + * creating invalid block devices + */ + size = get_capacity(disk) - from; + } + } + + if (state->parts[p].has_info) + info = &state->parts[p].info; + part = add_partition(disk, p, from, size, + state->parts[p].flags, + &state->parts[p].info); + if (IS_ERR(part)) { + printk(KERN_ERR " %s: p%d could not be added: %ld\n", + disk->disk_name, p, -PTR_ERR(part)); + continue; + } +#ifdef CONFIG_BLK_DEV_MD + if (state->parts[p].flags & ADDPART_FLAG_RAID) + md_autodetect_dev(part_to_dev(part)->devt); +#endif + } + kfree(state); + return 0; +} + +unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) +{ + struct address_space *mapping = bdev->bd_inode->i_mapping; + struct page *page; + + page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), + NULL); + if (!IS_ERR(page)) { + if (PageError(page)) + goto fail; + p->v = page; + return (unsigned char *)page_address(page) + ((n & ((1 << (PAGE_CACHE_SHIFT - 9)) - 1)) << 9); +fail: + page_cache_release(page); + } + p->v = NULL; + return NULL; +} + +EXPORT_SYMBOL(read_dev_sector); diff --git a/block/partitions/Kconfig b/block/partitions/Kconfig new file mode 100644 index 000000000000..cb5f0a3f1b03 --- /dev/null +++ b/block/partitions/Kconfig @@ -0,0 +1,251 @@ +# +# Partition configuration +# +config PARTITION_ADVANCED + bool "Advanced partition selection" + help + Say Y here if you would like to use hard disks under Linux which + were partitioned under an operating system running on a different + architecture than your Linux system. + + Note that the answer to this question won't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about foreign partitioning schemes. + + If unsure, say N. + +config ACORN_PARTITION + bool "Acorn partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + help + Support hard disks partitioned under Acorn operating systems. + +config ACORN_PARTITION_CUMANA + bool "Cumana partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + help + Say Y here if you would like to use hard disks under Linux which + were partitioned using the Cumana interface on Acorn machines. + +config ACORN_PARTITION_EESOX + bool "EESOX partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + +config ACORN_PARTITION_ICS + bool "ICS partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + help + Say Y here if you would like to use hard disks under Linux which + were partitioned using the ICS interface on Acorn machines. + +config ACORN_PARTITION_ADFS + bool "Native filecore partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + help + The Acorn Disc Filing System is the standard file system of the + RiscOS operating system which runs on Acorn's ARM-based Risc PC + systems and the Acorn Archimedes range of machines. If you say + `Y' here, Linux will support disk partitions created under ADFS. + +config ACORN_PARTITION_POWERTEC + bool "PowerTec partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + help + Support reading partition tables created on Acorn machines using + the PowerTec SCSI drive. + +config ACORN_PARTITION_RISCIX + bool "RISCiX partition support" if PARTITION_ADVANCED + default y if ARCH_ACORN + depends on ACORN_PARTITION + help + Once upon a time, there was a native Unix port for the Acorn series + of machines called RISCiX. If you say 'Y' here, Linux will be able + to read disks partitioned under RISCiX. + +config OSF_PARTITION + bool "Alpha OSF partition support" if PARTITION_ADVANCED + default y if ALPHA + help + Say Y here if you would like to use hard disks under Linux which + were partitioned on an Alpha machine. + +config AMIGA_PARTITION + bool "Amiga partition table support" if PARTITION_ADVANCED + default y if (AMIGA || AFFS_FS=y) + help + Say Y here if you would like to use hard disks under Linux which + were partitioned under AmigaOS. + +config ATARI_PARTITION + bool "Atari partition table support" if PARTITION_ADVANCED + default y if ATARI + help + Say Y here if you would like to use hard disks under Linux which + were partitioned under the Atari OS. + +config IBM_PARTITION + bool "IBM disk label and partition support" + depends on PARTITION_ADVANCED && S390 + help + Say Y here if you would like to be able to read the hard disk + partition table format used by IBM DASD disks operating under CMS. + Otherwise, say N. + +config MAC_PARTITION + bool "Macintosh partition map support" if PARTITION_ADVANCED + default y if (MAC || PPC_PMAC) + help + Say Y here if you would like to use hard disks under Linux which + were partitioned on a Macintosh. + +config MSDOS_PARTITION + bool "PC BIOS (MSDOS partition tables) support" if PARTITION_ADVANCED + default y + help + Say Y here. + +config BSD_DISKLABEL + bool "BSD disklabel (FreeBSD partition tables) support" + depends on PARTITION_ADVANCED && MSDOS_PARTITION + help + FreeBSD uses its own hard disk partition scheme on your PC. It + requires only one entry in the primary partition table of your disk + and manages it similarly to DOS extended partitions, putting in its + first sector a new partition table in BSD disklabel format. Saying Y + here allows you to read these disklabels and further mount FreeBSD + partitions from within Linux if you have also said Y to "UFS + file system support", above. If you don't know what all this is + about, say N. + +config MINIX_SUBPARTITION + bool "Minix subpartition support" + depends on PARTITION_ADVANCED && MSDOS_PARTITION + help + Minix 2.0.0/2.0.2 subpartition table support for Linux. + Say Y here if you want to mount and use Minix 2.0.0/2.0.2 + subpartitions. + +config SOLARIS_X86_PARTITION + bool "Solaris (x86) partition table support" + depends on PARTITION_ADVANCED && MSDOS_PARTITION + help + Like most systems, Solaris x86 uses its own hard disk partition + table format, incompatible with all others. Saying Y here allows you + to read these partition tables and further mount Solaris x86 + partitions from within Linux if you have also said Y to "UFS + file system support", above. + +config UNIXWARE_DISKLABEL + bool "Unixware slices support" + depends on PARTITION_ADVANCED && MSDOS_PARTITION + ---help--- + Like some systems, UnixWare uses its own slice table inside a + partition (VTOC - Virtual Table of Contents). Its format is + incompatible with all other OSes. Saying Y here allows you to read + VTOC and further mount UnixWare partitions read-only from within + Linux if you have also said Y to "UFS file system support" or + "System V and Coherent file system support", above. + + This is mainly used to carry data from a UnixWare box to your + Linux box via a removable medium like magneto-optical, ZIP or + removable IDE drives. Note, however, that a good portable way to + transport files and directories between unixes (and even other + operating systems) is given by the tar program ("man tar" or + preferably "info tar"). + + If you don't know what all this is about, say N. + +config LDM_PARTITION + bool "Windows Logical Disk Manager (Dynamic Disk) support" + depends on PARTITION_ADVANCED + ---help--- + Say Y here if you would like to use hard disks under Linux which + were partitioned using Windows 2000's/XP's or Vista's Logical Disk + Manager. They are also known as "Dynamic Disks". + + Note this driver only supports Dynamic Disks with a protective MBR + label, i.e. DOS partition table. It does not support GPT labelled + Dynamic Disks yet as can be created with Vista. + + Windows 2000 introduced the concept of Dynamic Disks to get around + the limitations of the PC's partitioning scheme. The Logical Disk + Manager allows the user to repartition a disk and create spanned, + mirrored, striped or RAID volumes, all without the need for + rebooting. + + Normal partitions are now called Basic Disks under Windows 2000, XP, + and Vista. + + For a fuller description read <file:Documentation/ldm.txt>. + + If unsure, say N. + +config LDM_DEBUG + bool "Windows LDM extra logging" + depends on LDM_PARTITION + help + Say Y here if you would like LDM to log verbosely. This could be + helpful if the driver doesn't work as expected and you'd like to + report a bug. + + If unsure, say N. + +config SGI_PARTITION + bool "SGI partition support" if PARTITION_ADVANCED + default y if DEFAULT_SGI_PARTITION + help + Say Y here if you would like to be able to read the hard disk + partition table format used by SGI machines. + +config ULTRIX_PARTITION + bool "Ultrix partition table support" if PARTITION_ADVANCED + default y if MACH_DECSTATION + help + Say Y here if you would like to be able to read the hard disk + partition table format used by DEC (now Compaq) Ultrix machines. + Otherwise, say N. + +config SUN_PARTITION + bool "Sun partition tables support" if PARTITION_ADVANCED + default y if (SPARC || SUN3 || SUN3X) + ---help--- + Like most systems, SunOS uses its own hard disk partition table + format, incompatible with all others. Saying Y here allows you to + read these partition tables and further mount SunOS partitions from + within Linux if you have also said Y to "UFS file system support", + above. This is mainly used to carry data from a SPARC under SunOS to + your Linux box via a removable medium like magneto-optical or ZIP + drives; note however that a good portable way to transport files and + directories between unixes (and even other operating systems) is + given by the tar program ("man tar" or preferably "info tar"). If + you don't know what all this is about, say N. + +config KARMA_PARTITION + bool "Karma Partition support" + depends on PARTITION_ADVANCED + help + Say Y here if you would like to mount the Rio Karma MP3 player, as it + uses a proprietary partition table. + +config EFI_PARTITION + bool "EFI GUID Partition support" + depends on PARTITION_ADVANCED + select CRC32 + help + Say Y here if you would like to use hard disks under Linux which + were partitioned using EFI GPT. + +config SYSV68_PARTITION + bool "SYSV68 partition table support" if PARTITION_ADVANCED + default y if VME + help + Say Y here if you would like to be able to read the hard disk + partition table format used by Motorola Delta machines (using + sysv68). + Otherwise, say N. diff --git a/block/partitions/Makefile b/block/partitions/Makefile new file mode 100644 index 000000000000..03af8eac51da --- /dev/null +++ b/block/partitions/Makefile @@ -0,0 +1,20 @@ +# +# Makefile for the linux kernel. +# + +obj-$(CONFIG_BLOCK) := check.o + +obj-$(CONFIG_ACORN_PARTITION) += acorn.o +obj-$(CONFIG_AMIGA_PARTITION) += amiga.o +obj-$(CONFIG_ATARI_PARTITION) += atari.o +obj-$(CONFIG_MAC_PARTITION) += mac.o +obj-$(CONFIG_LDM_PARTITION) += ldm.o +obj-$(CONFIG_MSDOS_PARTITION) += msdos.o +obj-$(CONFIG_OSF_PARTITION) += osf.o +obj-$(CONFIG_SGI_PARTITION) += sgi.o +obj-$(CONFIG_SUN_PARTITION) += sun.o +obj-$(CONFIG_ULTRIX_PARTITION) += ultrix.o +obj-$(CONFIG_IBM_PARTITION) += ibm.o +obj-$(CONFIG_EFI_PARTITION) += efi.o +obj-$(CONFIG_KARMA_PARTITION) += karma.o +obj-$(CONFIG_SYSV68_PARTITION) += sysv68.o diff --git a/block/partitions/acorn.c b/block/partitions/acorn.c new file mode 100644 index 000000000000..fbeb697374d5 --- /dev/null +++ b/block/partitions/acorn.c @@ -0,0 +1,556 @@ +/* + * linux/fs/partitions/acorn.c + * + * Copyright (c) 1996-2000 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Scan ADFS partitions on hard disk drives. Unfortunately, there + * isn't a standard for partitioning drives on Acorn machines, so + * every single manufacturer of SCSI and IDE cards created their own + * method. + */ +#include <linux/buffer_head.h> +#include <linux/adfs_fs.h> + +#include "check.h" +#include "acorn.h" + +/* + * Partition types. (Oh for reusability) + */ +#define PARTITION_RISCIX_MFM 1 +#define PARTITION_RISCIX_SCSI 2 +#define PARTITION_LINUX 9 + +#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ + defined(CONFIG_ACORN_PARTITION_ADFS) +static struct adfs_discrecord * +adfs_partition(struct parsed_partitions *state, char *name, char *data, + unsigned long first_sector, int slot) +{ + struct adfs_discrecord *dr; + unsigned int nr_sects; + + if (adfs_checkbblk(data)) + return NULL; + + dr = (struct adfs_discrecord *)(data + 0x1c0); + + if (dr->disc_size == 0 && dr->disc_size_high == 0) + return NULL; + + nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | + (le32_to_cpu(dr->disc_size) >> 9); + + if (name) { + strlcat(state->pp_buf, " [", PAGE_SIZE); + strlcat(state->pp_buf, name, PAGE_SIZE); + strlcat(state->pp_buf, "]", PAGE_SIZE); + } + put_partition(state, slot, first_sector, nr_sects); + return dr; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_RISCIX + +struct riscix_part { + __le32 start; + __le32 length; + __le32 one; + char name[16]; +}; + +struct riscix_record { + __le32 magic; +#define RISCIX_MAGIC cpu_to_le32(0x4a657320) + __le32 date; + struct riscix_part part[8]; +}; + +#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ + defined(CONFIG_ACORN_PARTITION_ADFS) +static int riscix_partition(struct parsed_partitions *state, + unsigned long first_sect, int slot, + unsigned long nr_sects) +{ + Sector sect; + struct riscix_record *rr; + + rr = read_part_sector(state, first_sect, §); + if (!rr) + return -1; + + strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE); + + + if (rr->magic == RISCIX_MAGIC) { + unsigned long size = nr_sects > 2 ? 2 : nr_sects; + int part; + + strlcat(state->pp_buf, " <", PAGE_SIZE); + + put_partition(state, slot++, first_sect, size); + for (part = 0; part < 8; part++) { + if (rr->part[part].one && + memcmp(rr->part[part].name, "All\0", 4)) { + put_partition(state, slot++, + le32_to_cpu(rr->part[part].start), + le32_to_cpu(rr->part[part].length)); + strlcat(state->pp_buf, "(", PAGE_SIZE); + strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE); + strlcat(state->pp_buf, ")", PAGE_SIZE); + } + } + + strlcat(state->pp_buf, " >\n", PAGE_SIZE); + } else { + put_partition(state, slot++, first_sect, nr_sects); + } + + put_dev_sector(sect); + return slot; +} +#endif +#endif + +#define LINUX_NATIVE_MAGIC 0xdeafa1de +#define LINUX_SWAP_MAGIC 0xdeafab1e + +struct linux_part { + __le32 magic; + __le32 start_sect; + __le32 nr_sects; +}; + +#if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ + defined(CONFIG_ACORN_PARTITION_ADFS) +static int linux_partition(struct parsed_partitions *state, + unsigned long first_sect, int slot, + unsigned long nr_sects) +{ + Sector sect; + struct linux_part *linuxp; + unsigned long size = nr_sects > 2 ? 2 : nr_sects; + + strlcat(state->pp_buf, " [Linux]", PAGE_SIZE); + + put_partition(state, slot++, first_sect, size); + + linuxp = read_part_sector(state, first_sect, §); + if (!linuxp) + return -1; + + strlcat(state->pp_buf, " <", PAGE_SIZE); + while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || + linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { + if (slot == state->limit) + break; + put_partition(state, slot++, first_sect + + le32_to_cpu(linuxp->start_sect), + le32_to_cpu(linuxp->nr_sects)); + linuxp ++; + } + strlcat(state->pp_buf, " >", PAGE_SIZE); + + put_dev_sector(sect); + return slot; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_CUMANA +int adfspart_check_CUMANA(struct parsed_partitions *state) +{ + unsigned long first_sector = 0; + unsigned int start_blk = 0; + Sector sect; + unsigned char *data; + char *name = "CUMANA/ADFS"; + int first = 1; + int slot = 1; + + /* + * Try Cumana style partitions - sector 6 contains ADFS boot block + * with pointer to next 'drive'. + * + * There are unknowns in this code - is the 'cylinder number' of the + * next partition relative to the start of this one - I'm assuming + * it is. + * + * Also, which ID did Cumana use? + * + * This is totally unfinished, and will require more work to get it + * going. Hence it is totally untested. + */ + do { + struct adfs_discrecord *dr; + unsigned int nr_sects; + + data = read_part_sector(state, start_blk * 2 + 6, §); + if (!data) + return -1; + + if (slot == state->limit) + break; + + dr = adfs_partition(state, name, data, first_sector, slot++); + if (!dr) + break; + + name = NULL; + + nr_sects = (data[0x1fd] + (data[0x1fe] << 8)) * + (dr->heads + (dr->lowsector & 0x40 ? 1 : 0)) * + dr->secspertrack; + + if (!nr_sects) + break; + + first = 0; + first_sector += nr_sects; + start_blk += nr_sects >> (BLOCK_SIZE_BITS - 9); + nr_sects = 0; /* hmm - should be partition size */ + + switch (data[0x1fc] & 15) { + case 0: /* No partition / ADFS? */ + break; + +#ifdef CONFIG_ACORN_PARTITION_RISCIX + case PARTITION_RISCIX_SCSI: + /* RISCiX - we don't know how to find the next one. */ + slot = riscix_partition(state, first_sector, slot, + nr_sects); + break; +#endif + + case PARTITION_LINUX: + slot = linux_partition(state, first_sector, slot, + nr_sects); + break; + } + put_dev_sector(sect); + if (slot == -1) + return -1; + } while (1); + put_dev_sector(sect); + return first ? 0 : 1; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_ADFS +/* + * Purpose: allocate ADFS partitions. + * + * Params : hd - pointer to gendisk structure to store partition info. + * dev - device number to access. + * + * Returns: -1 on error, 0 for no ADFS boot sector, 1 for ok. + * + * Alloc : hda = whole drive + * hda1 = ADFS partition on first drive. + * hda2 = non-ADFS partition. + */ +int adfspart_check_ADFS(struct parsed_partitions *state) +{ + unsigned long start_sect, nr_sects, sectscyl, heads; + Sector sect; + unsigned char *data; + struct adfs_discrecord *dr; + unsigned char id; + int slot = 1; + + data = read_part_sector(state, 6, §); + if (!data) + return -1; + + dr = adfs_partition(state, "ADFS", data, 0, slot++); + if (!dr) { + put_dev_sector(sect); + return 0; + } + + heads = dr->heads + ((dr->lowsector >> 6) & 1); + sectscyl = dr->secspertrack * heads; + start_sect = ((data[0x1fe] << 8) + data[0x1fd]) * sectscyl; + id = data[0x1fc] & 15; + put_dev_sector(sect); + + /* + * Work out start of non-adfs partition. + */ + nr_sects = (state->bdev->bd_inode->i_size >> 9) - start_sect; + + if (start_sect) { + switch (id) { +#ifdef CONFIG_ACORN_PARTITION_RISCIX + case PARTITION_RISCIX_SCSI: + case PARTITION_RISCIX_MFM: + slot = riscix_partition(state, start_sect, slot, + nr_sects); + break; +#endif + + case PARTITION_LINUX: + slot = linux_partition(state, start_sect, slot, + nr_sects); + break; + } + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_ICS + +struct ics_part { + __le32 start; + __le32 size; +}; + +static int adfspart_check_ICSLinux(struct parsed_partitions *state, + unsigned long block) +{ + Sector sect; + unsigned char *data = read_part_sector(state, block, §); + int result = 0; + + if (data) { + if (memcmp(data, "LinuxPart", 9) == 0) + result = 1; + put_dev_sector(sect); + } + + return result; +} + +/* + * Check for a valid ICS partition using the checksum. + */ +static inline int valid_ics_sector(const unsigned char *data) +{ + unsigned long sum; + int i; + + for (i = 0, sum = 0x50617274; i < 508; i++) + sum += data[i]; + + sum -= le32_to_cpu(*(__le32 *)(&data[508])); + + return sum == 0; +} + +/* + * Purpose: allocate ICS partitions. + * Params : hd - pointer to gendisk structure to store partition info. + * dev - device number to access. + * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok. + * Alloc : hda = whole drive + * hda1 = ADFS partition 0 on first drive. + * hda2 = ADFS partition 1 on first drive. + * ..etc.. + */ +int adfspart_check_ICS(struct parsed_partitions *state) +{ + const unsigned char *data; + const struct ics_part *p; + int slot; + Sector sect; + + /* + * Try ICS style partitions - sector 0 contains partition info. + */ + data = read_part_sector(state, 0, §); + if (!data) + return -1; + + if (!valid_ics_sector(data)) { + put_dev_sector(sect); + return 0; + } + + strlcat(state->pp_buf, " [ICS]", PAGE_SIZE); + + for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { + u32 start = le32_to_cpu(p->start); + s32 size = le32_to_cpu(p->size); /* yes, it's signed. */ + + if (slot == state->limit) + break; + + /* + * Negative sizes tell the RISC OS ICS driver to ignore + * this partition - in effect it says that this does not + * contain an ADFS filesystem. + */ + if (size < 0) { + size = -size; + + /* + * Our own extension - We use the first sector + * of the partition to identify what type this + * partition is. We must not make this visible + * to the filesystem. + */ + if (size > 1 && adfspart_check_ICSLinux(state, start)) { + start += 1; + size -= 1; + } + } + + if (size) + put_partition(state, slot++, start, size); + } + + put_dev_sector(sect); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_POWERTEC +struct ptec_part { + __le32 unused1; + __le32 unused2; + __le32 start; + __le32 size; + __le32 unused5; + char type[8]; +}; + +static inline int valid_ptec_sector(const unsigned char *data) +{ + unsigned char checksum = 0x2a; + int i; + + /* + * If it looks like a PC/BIOS partition, then it + * probably isn't PowerTec. + */ + if (data[510] == 0x55 && data[511] == 0xaa) + return 0; + + for (i = 0; i < 511; i++) + checksum += data[i]; + + return checksum == data[511]; +} + +/* + * Purpose: allocate ICS partitions. + * Params : hd - pointer to gendisk structure to store partition info. + * dev - device number to access. + * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok. + * Alloc : hda = whole drive + * hda1 = ADFS partition 0 on first drive. + * hda2 = ADFS partition 1 on first drive. + * ..etc.. + */ +int adfspart_check_POWERTEC(struct parsed_partitions *state) +{ + Sector sect; + const unsigned char *data; + const struct ptec_part *p; + int slot = 1; + int i; + + data = read_part_sector(state, 0, §); + if (!data) + return -1; + + if (!valid_ptec_sector(data)) { + put_dev_sector(sect); + return 0; + } + + strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE); + + for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { + u32 start = le32_to_cpu(p->start); + u32 size = le32_to_cpu(p->size); + + if (size) + put_partition(state, slot++, start, size); + } + + put_dev_sector(sect); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} +#endif + +#ifdef CONFIG_ACORN_PARTITION_EESOX +struct eesox_part { + char magic[6]; + char name[10]; + __le32 start; + __le32 unused6; + __le32 unused7; + __le32 unused8; +}; + +/* + * Guess who created this format? + */ +static const char eesox_name[] = { + 'N', 'e', 'i', 'l', ' ', + 'C', 'r', 'i', 't', 'c', 'h', 'e', 'l', 'l', ' ', ' ' +}; + +/* + * EESOX SCSI partition format. + * + * This is a goddamned awful partition format. We don't seem to store + * the size of the partition in this table, only the start addresses. + * + * There are two possibilities where the size comes from: + * 1. The individual ADFS boot block entries that are placed on the disk. + * 2. The start address of the next entry. + */ +int adfspart_check_EESOX(struct parsed_partitions *state) +{ + Sector sect; + const unsigned char *data; + unsigned char buffer[256]; + struct eesox_part *p; + sector_t start = 0; + int i, slot = 1; + + data = read_part_sector(state, 7, §); + if (!data) + return -1; + + /* + * "Decrypt" the partition table. God knows why... + */ + for (i = 0; i < 256; i++) + buffer[i] = data[i] ^ eesox_name[i & 15]; + + put_dev_sector(sect); + + for (i = 0, p = (struct eesox_part *)buffer; i < 8; i++, p++) { + sector_t next; + + if (memcmp(p->magic, "Eesox", 6)) + break; + + next = le32_to_cpu(p->start); + if (i) + put_partition(state, slot++, start, next - start); + start = next; + } + + if (i != 0) { + sector_t size; + + size = get_capacity(state->bdev->bd_disk); + put_partition(state, slot++, start, size - start); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + } + + return i ? 1 : 0; +} +#endif diff --git a/block/partitions/acorn.h b/block/partitions/acorn.h new file mode 100644 index 000000000000..ede828529692 --- /dev/null +++ b/block/partitions/acorn.h @@ -0,0 +1,14 @@ +/* + * linux/fs/partitions/acorn.h + * + * Copyright (C) 1996-2001 Russell King. + * + * I _hate_ this partitioning mess - why can't we have one defined + * format, and everyone stick to it? + */ + +int adfspart_check_CUMANA(struct parsed_partitions *state); +int adfspart_check_ADFS(struct parsed_partitions *state); +int adfspart_check_ICS(struct parsed_partitions *state); +int adfspart_check_POWERTEC(struct parsed_partitions *state); +int adfspart_check_EESOX(struct parsed_partitions *state); diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c new file mode 100644 index 000000000000..70cbf44a1560 --- /dev/null +++ b/block/partitions/amiga.c @@ -0,0 +1,139 @@ +/* + * fs/partitions/amiga.c + * + * Code extracted from drivers/block/genhd.c + * + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + */ + +#include <linux/types.h> +#include <linux/affs_hardblocks.h> + +#include "check.h" +#include "amiga.h" + +static __inline__ u32 +checksum_block(__be32 *m, int size) +{ + u32 sum = 0; + + while (size--) + sum += be32_to_cpu(*m++); + return sum; +} + +int amiga_partition(struct parsed_partitions *state) +{ + Sector sect; + unsigned char *data; + struct RigidDiskBlock *rdb; + struct PartitionBlock *pb; + int start_sect, nr_sects, blk, part, res = 0; + int blksize = 1; /* Multiplier for disk block size */ + int slot = 1; + char b[BDEVNAME_SIZE]; + + for (blk = 0; ; blk++, put_dev_sector(sect)) { + if (blk == RDB_ALLOCATION_LIMIT) + goto rdb_done; + data = read_part_sector(state, blk, §); + if (!data) { + if (warn_no_part) + printk("Dev %s: unable to read RDB block %d\n", + bdevname(state->bdev, b), blk); + res = -1; + goto rdb_done; + } + if (*(__be32 *)data != cpu_to_be32(IDNAME_RIGIDDISK)) + continue; + + rdb = (struct RigidDiskBlock *)data; + if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F) == 0) + break; + /* Try again with 0xdc..0xdf zeroed, Windows might have + * trashed it. + */ + *(__be32 *)(data+0xdc) = 0; + if (checksum_block((__be32 *)data, + be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { + printk("Warning: Trashed word at 0xd0 in block %d " + "ignored in checksum calculation\n",blk); + break; + } + + printk("Dev %s: RDB in block %d has bad checksum\n", + bdevname(state->bdev, b), blk); + } + + /* blksize is blocks per 512 byte standard block */ + blksize = be32_to_cpu( rdb->rdb_BlockBytes ) / 512; + + { + char tmp[7 + 10 + 1 + 1]; + + /* Be more informative */ + snprintf(tmp, sizeof(tmp), " RDSK (%d)", blksize * 512); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + blk = be32_to_cpu(rdb->rdb_PartitionList); + put_dev_sector(sect); + for (part = 1; blk>0 && part<=16; part++, put_dev_sector(sect)) { + blk *= blksize; /* Read in terms partition table understands */ + data = read_part_sector(state, blk, §); + if (!data) { + if (warn_no_part) + printk("Dev %s: unable to read partition block %d\n", + bdevname(state->bdev, b), blk); + res = -1; + goto rdb_done; + } + pb = (struct PartitionBlock *)data; + blk = be32_to_cpu(pb->pb_Next); + if (pb->pb_ID != cpu_to_be32(IDNAME_PARTITION)) + continue; + if (checksum_block((__be32 *)pb, be32_to_cpu(pb->pb_SummedLongs) & 0x7F) != 0 ) + continue; + + /* Tell Kernel about it */ + + nr_sects = (be32_to_cpu(pb->pb_Environment[10]) + 1 - + be32_to_cpu(pb->pb_Environment[9])) * + be32_to_cpu(pb->pb_Environment[3]) * + be32_to_cpu(pb->pb_Environment[5]) * + blksize; + if (!nr_sects) + continue; + start_sect = be32_to_cpu(pb->pb_Environment[9]) * + be32_to_cpu(pb->pb_Environment[3]) * + be32_to_cpu(pb->pb_Environment[5]) * + blksize; + put_partition(state,slot++,start_sect,nr_sects); + { + /* Be even more informative to aid mounting */ + char dostype[4]; + char tmp[42]; + + __be32 *dt = (__be32 *)dostype; + *dt = pb->pb_Environment[16]; + if (dostype[3] < ' ') + snprintf(tmp, sizeof(tmp), " (%c%c%c^%c)", + dostype[0], dostype[1], + dostype[2], dostype[3] + '@' ); + else + snprintf(tmp, sizeof(tmp), " (%c%c%c%c)", + dostype[0], dostype[1], + dostype[2], dostype[3]); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + snprintf(tmp, sizeof(tmp), "(res %d spb %d)", + be32_to_cpu(pb->pb_Environment[6]), + be32_to_cpu(pb->pb_Environment[4])); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + res = 1; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + +rdb_done: + return res; +} diff --git a/block/partitions/amiga.h b/block/partitions/amiga.h new file mode 100644 index 000000000000..d094585cadaa --- /dev/null +++ b/block/partitions/amiga.h @@ -0,0 +1,6 @@ +/* + * fs/partitions/amiga.h + */ + +int amiga_partition(struct parsed_partitions *state); + diff --git a/block/partitions/atari.c b/block/partitions/atari.c new file mode 100644 index 000000000000..9875b05e80a2 --- /dev/null +++ b/block/partitions/atari.c @@ -0,0 +1,149 @@ +/* + * fs/partitions/atari.c + * + * Code extracted from drivers/block/genhd.c + * + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + */ + +#include <linux/ctype.h> +#include "check.h" +#include "atari.h" + +/* ++guenther: this should be settable by the user ("make config")?. + */ +#define ICD_PARTS + +/* check if a partition entry looks valid -- Atari format is assumed if at + least one of the primary entries is ok this way */ +#define VALID_PARTITION(pi,hdsiz) \ + (((pi)->flg & 1) && \ + isalnum((pi)->id[0]) && isalnum((pi)->id[1]) && isalnum((pi)->id[2]) && \ + be32_to_cpu((pi)->st) <= (hdsiz) && \ + be32_to_cpu((pi)->st) + be32_to_cpu((pi)->siz) <= (hdsiz)) + +static inline int OK_id(char *s) +{ + return memcmp (s, "GEM", 3) == 0 || memcmp (s, "BGM", 3) == 0 || + memcmp (s, "LNX", 3) == 0 || memcmp (s, "SWP", 3) == 0 || + memcmp (s, "RAW", 3) == 0 ; +} + +int atari_partition(struct parsed_partitions *state) +{ + Sector sect; + struct rootsector *rs; + struct partition_info *pi; + u32 extensect; + u32 hd_size; + int slot; +#ifdef ICD_PARTS + int part_fmt = 0; /* 0:unknown, 1:AHDI, 2:ICD/Supra */ +#endif + + rs = read_part_sector(state, 0, §); + if (!rs) + return -1; + + /* Verify this is an Atari rootsector: */ + hd_size = state->bdev->bd_inode->i_size >> 9; + if (!VALID_PARTITION(&rs->part[0], hd_size) && + !VALID_PARTITION(&rs->part[1], hd_size) && + !VALID_PARTITION(&rs->part[2], hd_size) && + !VALID_PARTITION(&rs->part[3], hd_size)) { + /* + * if there's no valid primary partition, assume that no Atari + * format partition table (there's no reliable magic or the like + * :-() + */ + put_dev_sector(sect); + return 0; + } + + pi = &rs->part[0]; + strlcat(state->pp_buf, " AHDI", PAGE_SIZE); + for (slot = 1; pi < &rs->part[4] && slot < state->limit; slot++, pi++) { + struct rootsector *xrs; + Sector sect2; + ulong partsect; + + if ( !(pi->flg & 1) ) + continue; + /* active partition */ + if (memcmp (pi->id, "XGM", 3) != 0) { + /* we don't care about other id's */ + put_partition (state, slot, be32_to_cpu(pi->st), + be32_to_cpu(pi->siz)); + continue; + } + /* extension partition */ +#ifdef ICD_PARTS + part_fmt = 1; +#endif + strlcat(state->pp_buf, " XGM<", PAGE_SIZE); + partsect = extensect = be32_to_cpu(pi->st); + while (1) { + xrs = read_part_sector(state, partsect, §2); + if (!xrs) { + printk (" block %ld read failed\n", partsect); + put_dev_sector(sect); + return -1; + } + + /* ++roman: sanity check: bit 0 of flg field must be set */ + if (!(xrs->part[0].flg & 1)) { + printk( "\nFirst sub-partition in extended partition is not valid!\n" ); + put_dev_sector(sect2); + break; + } + + put_partition(state, slot, + partsect + be32_to_cpu(xrs->part[0].st), + be32_to_cpu(xrs->part[0].siz)); + + if (!(xrs->part[1].flg & 1)) { + /* end of linked partition list */ + put_dev_sector(sect2); + break; + } + if (memcmp( xrs->part[1].id, "XGM", 3 ) != 0) { + printk("\nID of extended partition is not XGM!\n"); + put_dev_sector(sect2); + break; + } + + partsect = be32_to_cpu(xrs->part[1].st) + extensect; + put_dev_sector(sect2); + if (++slot == state->limit) { + printk( "\nMaximum number of partitions reached!\n" ); + break; + } + } + strlcat(state->pp_buf, " >", PAGE_SIZE); + } +#ifdef ICD_PARTS + if ( part_fmt!=1 ) { /* no extended partitions -> test ICD-format */ + pi = &rs->icdpart[0]; + /* sanity check: no ICD format if first partition invalid */ + if (OK_id(pi->id)) { + strlcat(state->pp_buf, " ICD<", PAGE_SIZE); + for (; pi < &rs->icdpart[8] && slot < state->limit; slot++, pi++) { + /* accept only GEM,BGM,RAW,LNX,SWP partitions */ + if (!((pi->flg & 1) && OK_id(pi->id))) + continue; + part_fmt = 2; + put_partition (state, slot, + be32_to_cpu(pi->st), + be32_to_cpu(pi->siz)); + } + strlcat(state->pp_buf, " >", PAGE_SIZE); + } + } +#endif + put_dev_sector(sect); + + strlcat(state->pp_buf, "\n", PAGE_SIZE); + + return 1; +} diff --git a/block/partitions/atari.h b/block/partitions/atari.h new file mode 100644 index 000000000000..fe2d32a89f36 --- /dev/null +++ b/block/partitions/atari.h @@ -0,0 +1,34 @@ +/* + * fs/partitions/atari.h + * Moved by Russell King from: + * + * linux/include/linux/atari_rootsec.h + * definitions for Atari Rootsector layout + * by Andreas Schwab (schwab@ls5.informatik.uni-dortmund.de) + * + * modified for ICD/Supra partitioning scheme restricted to at most 12 + * partitions + * by Guenther Kelleter (guenther@pool.informatik.rwth-aachen.de) + */ + +struct partition_info +{ + u8 flg; /* bit 0: active; bit 7: bootable */ + char id[3]; /* "GEM", "BGM", "XGM", or other */ + __be32 st; /* start of partition */ + __be32 siz; /* length of partition */ +}; + +struct rootsector +{ + char unused[0x156]; /* room for boot code */ + struct partition_info icdpart[8]; /* info for ICD-partitions 5..12 */ + char unused2[0xc]; + u32 hd_siz; /* size of disk in blocks */ + struct partition_info part[4]; + u32 bsl_st; /* start of bad sector list */ + u32 bsl_cnt; /* length of bad sector list */ + u16 checksum; /* checksum for bootable disks */ +} __attribute__((__packed__)); + +int atari_partition(struct parsed_partitions *state); diff --git a/block/partitions/check.c b/block/partitions/check.c new file mode 100644 index 000000000000..bc908672c976 --- /dev/null +++ b/block/partitions/check.c @@ -0,0 +1,166 @@ +/* + * fs/partitions/check.c + * + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + * + * We now have independent partition support from the + * block drivers, which allows all the partition code to + * be grouped in one location, and it to be mostly self + * contained. + * + * Added needed MAJORS for new pairs, {hdi,hdj}, {hdk,hdl} + */ + +#include <linux/slab.h> +#include <linux/ctype.h> +#include <linux/genhd.h> + +#include "check.h" + +#include "acorn.h" +#include "amiga.h" +#include "atari.h" +#include "ldm.h" +#include "mac.h" +#include "msdos.h" +#include "osf.h" +#include "sgi.h" +#include "sun.h" +#include "ibm.h" +#include "ultrix.h" +#include "efi.h" +#include "karma.h" +#include "sysv68.h" + +int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/ + +static int (*check_part[])(struct parsed_partitions *) = { + /* + * Probe partition formats with tables at disk address 0 + * that also have an ADFS boot block at 0xdc0. + */ +#ifdef CONFIG_ACORN_PARTITION_ICS + adfspart_check_ICS, +#endif +#ifdef CONFIG_ACORN_PARTITION_POWERTEC + adfspart_check_POWERTEC, +#endif +#ifdef CONFIG_ACORN_PARTITION_EESOX + adfspart_check_EESOX, +#endif + + /* + * Now move on to formats that only have partition info at + * disk address 0xdc0. Since these may also have stale + * PC/BIOS partition tables, they need to come before + * the msdos entry. + */ +#ifdef CONFIG_ACORN_PARTITION_CUMANA + adfspart_check_CUMANA, +#endif +#ifdef CONFIG_ACORN_PARTITION_ADFS + adfspart_check_ADFS, +#endif + +#ifdef CONFIG_EFI_PARTITION + efi_partition, /* this must come before msdos */ +#endif +#ifdef CONFIG_SGI_PARTITION + sgi_partition, +#endif +#ifdef CONFIG_LDM_PARTITION + ldm_partition, /* this must come before msdos */ +#endif +#ifdef CONFIG_MSDOS_PARTITION + msdos_partition, +#endif +#ifdef CONFIG_OSF_PARTITION + osf_partition, +#endif +#ifdef CONFIG_SUN_PARTITION + sun_partition, +#endif +#ifdef CONFIG_AMIGA_PARTITION + amiga_partition, +#endif +#ifdef CONFIG_ATARI_PARTITION + atari_partition, +#endif +#ifdef CONFIG_MAC_PARTITION + mac_partition, +#endif +#ifdef CONFIG_ULTRIX_PARTITION + ultrix_partition, +#endif +#ifdef CONFIG_IBM_PARTITION + ibm_partition, +#endif +#ifdef CONFIG_KARMA_PARTITION + karma_partition, +#endif +#ifdef CONFIG_SYSV68_PARTITION + sysv68_partition, +#endif + NULL +}; + +struct parsed_partitions * +check_partition(struct gendisk *hd, struct block_device *bdev) +{ + struct parsed_partitions *state; + int i, res, err; + + state = kzalloc(sizeof(struct parsed_partitions), GFP_KERNEL); + if (!state) + return NULL; + state->pp_buf = (char *)__get_free_page(GFP_KERNEL); + if (!state->pp_buf) { + kfree(state); + return NULL; + } + state->pp_buf[0] = '\0'; + + state->bdev = bdev; + disk_name(hd, 0, state->name); + snprintf(state->pp_buf, PAGE_SIZE, " %s:", state->name); + if (isdigit(state->name[strlen(state->name)-1])) + sprintf(state->name, "p"); + + state->limit = disk_max_parts(hd); + i = res = err = 0; + while (!res && check_part[i]) { + memset(&state->parts, 0, sizeof(state->parts)); + res = check_part[i++](state); + if (res < 0) { + /* We have hit an I/O error which we don't report now. + * But record it, and let the others do their job. + */ + err = res; + res = 0; + } + + } + if (res > 0) { + printk(KERN_INFO "%s", state->pp_buf); + + free_page((unsigned long)state->pp_buf); + return state; + } + if (state->access_beyond_eod) + err = -ENOSPC; + if (err) + /* The partition is unrecognized. So report I/O errors if there were any */ + res = err; + if (!res) + strlcat(state->pp_buf, " unknown partition table\n", PAGE_SIZE); + else if (warn_no_part) + strlcat(state->pp_buf, " unable to read partition table\n", PAGE_SIZE); + + printk(KERN_INFO "%s", state->pp_buf); + + free_page((unsigned long)state->pp_buf); + kfree(state); + return ERR_PTR(res); +} diff --git a/block/partitions/check.h b/block/partitions/check.h new file mode 100644 index 000000000000..52b100311ec3 --- /dev/null +++ b/block/partitions/check.h @@ -0,0 +1,52 @@ +#include <linux/pagemap.h> +#include <linux/blkdev.h> +#include <linux/genhd.h> + +/* + * add_gd_partition adds a partitions details to the devices partition + * description. + */ +struct parsed_partitions { + struct block_device *bdev; + char name[BDEVNAME_SIZE]; + struct { + sector_t from; + sector_t size; + int flags; + bool has_info; + struct partition_meta_info info; + } parts[DISK_MAX_PARTS]; + int next; + int limit; + bool access_beyond_eod; + char *pp_buf; +}; + +struct parsed_partitions * +check_partition(struct gendisk *, struct block_device *); + +static inline void *read_part_sector(struct parsed_partitions *state, + sector_t n, Sector *p) +{ + if (n >= get_capacity(state->bdev->bd_disk)) { + state->access_beyond_eod = true; + return NULL; + } + return read_dev_sector(state->bdev, n, p); +} + +static inline void +put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) +{ + if (n < p->limit) { + char tmp[1 + BDEVNAME_SIZE + 10 + 1]; + + p->parts[n].from = from; + p->parts[n].size = size; + snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); + strlcat(p->pp_buf, tmp, PAGE_SIZE); + } +} + +extern int warn_no_part; + diff --git a/block/partitions/efi.c b/block/partitions/efi.c new file mode 100644 index 000000000000..6296b403c67a --- /dev/null +++ b/block/partitions/efi.c @@ -0,0 +1,675 @@ +/************************************************************ + * EFI GUID Partition Table handling + * + * http://www.uefi.org/specs/ + * http://www.intel.com/technology/efi/ + * + * efi.[ch] by Matt Domsch <Matt_Domsch@dell.com> + * Copyright 2000,2001,2002,2004 Dell Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * TODO: + * + * Changelog: + * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com> + * - test for valid PMBR and valid PGPT before ever reading + * AGPT, allow override with 'gpt' kernel command line option. + * - check for first/last_usable_lba outside of size of disk + * + * Tue Mar 26 2002 Matt Domsch <Matt_Domsch@dell.com> + * - Ported to 2.5.7-pre1 and 2.5.7-dj2 + * - Applied patch to avoid fault in alternate header handling + * - cleaned up find_valid_gpt + * - On-disk structure and copy in memory is *always* LE now - + * swab fields as needed + * - remove print_gpt_header() + * - only use first max_p partition entries, to keep the kernel minor number + * and partition numbers tied. + * + * Mon Feb 04 2002 Matt Domsch <Matt_Domsch@dell.com> + * - Removed __PRIPTR_PREFIX - not being used + * + * Mon Jan 14 2002 Matt Domsch <Matt_Domsch@dell.com> + * - Ported to 2.5.2-pre11 + library crc32 patch Linus applied + * + * Thu Dec 6 2001 Matt Domsch <Matt_Domsch@dell.com> + * - Added compare_gpts(). + * - moved le_efi_guid_to_cpus() back into this file. GPT is the only + * thing that keeps EFI GUIDs on disk. + * - Changed gpt structure names and members to be simpler and more Linux-like. + * + * Wed Oct 17 2001 Matt Domsch <Matt_Domsch@dell.com> + * - Removed CONFIG_DEVFS_VOLUMES_UUID code entirely per Martin Wilck + * + * Wed Oct 10 2001 Matt Domsch <Matt_Domsch@dell.com> + * - Changed function comments to DocBook style per Andreas Dilger suggestion. + * + * Mon Oct 08 2001 Matt Domsch <Matt_Domsch@dell.com> + * - Change read_lba() to use the page cache per Al Viro's work. + * - print u64s properly on all architectures + * - fixed debug_printk(), now Dprintk() + * + * Mon Oct 01 2001 Matt Domsch <Matt_Domsch@dell.com> + * - Style cleanups + * - made most functions static + * - Endianness addition + * - remove test for second alternate header, as it's not per spec, + * and is unnecessary. There's now a method to read/write the last + * sector of an odd-sized disk from user space. No tools have ever + * been released which used this code, so it's effectively dead. + * - Per Asit Mallick of Intel, added a test for a valid PMBR. + * - Added kernel command line option 'gpt' to override valid PMBR test. + * + * Wed Jun 6 2001 Martin Wilck <Martin.Wilck@Fujitsu-Siemens.com> + * - added devfs volume UUID support (/dev/volumes/uuids) for + * mounting file systems by the partition GUID. + * + * Tue Dec 5 2000 Matt Domsch <Matt_Domsch@dell.com> + * - Moved crc32() to linux/lib, added efi_crc32(). + * + * Thu Nov 30 2000 Matt Domsch <Matt_Domsch@dell.com> + * - Replaced Intel's CRC32 function with an equivalent + * non-license-restricted version. + * + * Wed Oct 25 2000 Matt Domsch <Matt_Domsch@dell.com> + * - Fixed the last_lba() call to return the proper last block + * + * Thu Oct 12 2000 Matt Domsch <Matt_Domsch@dell.com> + * - Thanks to Andries Brouwer for his debugging assistance. + * - Code works, detects all the partitions. + * + ************************************************************/ +#include <linux/crc32.h> +#include <linux/ctype.h> +#include <linux/math64.h> +#include <linux/slab.h> +#include "check.h" +#include "efi.h" + +/* This allows a kernel command line option 'gpt' to override + * the test for invalid PMBR. Not __initdata because reloading + * the partition tables happens after init too. + */ +static int force_gpt; +static int __init +force_gpt_fn(char *str) +{ + force_gpt = 1; + return 1; +} +__setup("gpt", force_gpt_fn); + + +/** + * efi_crc32() - EFI version of crc32 function + * @buf: buffer to calculate crc32 of + * @len - length of buf + * + * Description: Returns EFI-style CRC32 value for @buf + * + * This function uses the little endian Ethernet polynomial + * but seeds the function with ~0, and xor's with ~0 at the end. + * Note, the EFI Specification, v1.02, has a reference to + * Dr. Dobbs Journal, May 1994 (actually it's in May 1992). + */ +static inline u32 +efi_crc32(const void *buf, unsigned long len) +{ + return (crc32(~0L, buf, len) ^ ~0L); +} + +/** + * last_lba(): return number of last logical block of device + * @bdev: block device + * + * Description: Returns last LBA value on success, 0 on error. + * This is stored (by sd and ide-geometry) in + * the part[0] entry for this disk, and is the number of + * physical sectors available on the disk. + */ +static u64 last_lba(struct block_device *bdev) +{ + if (!bdev || !bdev->bd_inode) + return 0; + return div_u64(bdev->bd_inode->i_size, + bdev_logical_block_size(bdev)) - 1ULL; +} + +static inline int +pmbr_part_valid(struct partition *part) +{ + if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT && + le32_to_cpu(part->start_sect) == 1UL) + return 1; + return 0; +} + +/** + * is_pmbr_valid(): test Protective MBR for validity + * @mbr: pointer to a legacy mbr structure + * + * Description: Returns 1 if PMBR is valid, 0 otherwise. + * Validity depends on two things: + * 1) MSDOS signature is in the last two bytes of the MBR + * 2) One partition of type 0xEE is found + */ +static int +is_pmbr_valid(legacy_mbr *mbr) +{ + int i; + if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE) + return 0; + for (i = 0; i < 4; i++) + if (pmbr_part_valid(&mbr->partition_record[i])) + return 1; + return 0; +} + +/** + * read_lba(): Read bytes from disk, starting at given LBA + * @state + * @lba + * @buffer + * @size_t + * + * Description: Reads @count bytes from @state->bdev into @buffer. + * Returns number of bytes read on success, 0 on error. + */ +static size_t read_lba(struct parsed_partitions *state, + u64 lba, u8 *buffer, size_t count) +{ + size_t totalreadcount = 0; + struct block_device *bdev = state->bdev; + sector_t n = lba * (bdev_logical_block_size(bdev) / 512); + + if (!buffer || lba > last_lba(bdev)) + return 0; + + while (count) { + int copied = 512; + Sector sect; + unsigned char *data = read_part_sector(state, n++, §); + if (!data) + break; + if (copied > count) + copied = count; + memcpy(buffer, data, copied); + put_dev_sector(sect); + buffer += copied; + totalreadcount +=copied; + count -= copied; + } + return totalreadcount; +} + +/** + * alloc_read_gpt_entries(): reads partition entries from disk + * @state + * @gpt - GPT header + * + * Description: Returns ptes on success, NULL on error. + * Allocates space for PTEs based on information found in @gpt. + * Notes: remember to free pte when you're done! + */ +static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, + gpt_header *gpt) +{ + size_t count; + gpt_entry *pte; + + if (!gpt) + return NULL; + + count = le32_to_cpu(gpt->num_partition_entries) * + le32_to_cpu(gpt->sizeof_partition_entry); + if (!count) + return NULL; + pte = kzalloc(count, GFP_KERNEL); + if (!pte) + return NULL; + + if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba), + (u8 *) pte, + count) < count) { + kfree(pte); + pte=NULL; + return NULL; + } + return pte; +} + +/** + * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk + * @state + * @lba is the Logical Block Address of the partition table + * + * Description: returns GPT header on success, NULL on error. Allocates + * and fills a GPT header starting at @ from @state->bdev. + * Note: remember to free gpt when finished with it. + */ +static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, + u64 lba) +{ + gpt_header *gpt; + unsigned ssz = bdev_logical_block_size(state->bdev); + + gpt = kzalloc(ssz, GFP_KERNEL); + if (!gpt) + return NULL; + + if (read_lba(state, lba, (u8 *) gpt, ssz) < ssz) { + kfree(gpt); + gpt=NULL; + return NULL; + } + + return gpt; +} + +/** + * is_gpt_valid() - tests one GPT header and PTEs for validity + * @state + * @lba is the logical block address of the GPT header to test + * @gpt is a GPT header ptr, filled on return. + * @ptes is a PTEs ptr, filled on return. + * + * Description: returns 1 if valid, 0 on error. + * If valid, returns pointers to newly allocated GPT header and PTEs. + */ +static int is_gpt_valid(struct parsed_partitions *state, u64 lba, + gpt_header **gpt, gpt_entry **ptes) +{ + u32 crc, origcrc; + u64 lastlba; + + if (!ptes) + return 0; + if (!(*gpt = alloc_read_gpt_header(state, lba))) + return 0; + + /* Check the GUID Partition Table signature */ + if (le64_to_cpu((*gpt)->signature) != GPT_HEADER_SIGNATURE) { + pr_debug("GUID Partition Table Header signature is wrong:" + "%lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->signature), + (unsigned long long)GPT_HEADER_SIGNATURE); + goto fail; + } + + /* Check the GUID Partition Table header size */ + if (le32_to_cpu((*gpt)->header_size) > + bdev_logical_block_size(state->bdev)) { + pr_debug("GUID Partition Table Header size is wrong: %u > %u\n", + le32_to_cpu((*gpt)->header_size), + bdev_logical_block_size(state->bdev)); + goto fail; + } + + /* Check the GUID Partition Table CRC */ + origcrc = le32_to_cpu((*gpt)->header_crc32); + (*gpt)->header_crc32 = 0; + crc = efi_crc32((const unsigned char *) (*gpt), le32_to_cpu((*gpt)->header_size)); + + if (crc != origcrc) { + pr_debug("GUID Partition Table Header CRC is wrong: %x != %x\n", + crc, origcrc); + goto fail; + } + (*gpt)->header_crc32 = cpu_to_le32(origcrc); + + /* Check that the my_lba entry points to the LBA that contains + * the GUID Partition Table */ + if (le64_to_cpu((*gpt)->my_lba) != lba) { + pr_debug("GPT my_lba incorrect: %lld != %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->my_lba), + (unsigned long long)lba); + goto fail; + } + + /* Check the first_usable_lba and last_usable_lba are + * within the disk. + */ + lastlba = last_lba(state->bdev); + if (le64_to_cpu((*gpt)->first_usable_lba) > lastlba) { + pr_debug("GPT: first_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba), + (unsigned long long)lastlba); + goto fail; + } + if (le64_to_cpu((*gpt)->last_usable_lba) > lastlba) { + pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n", + (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba), + (unsigned long long)lastlba); + goto fail; + } + + /* Check that sizeof_partition_entry has the correct value */ + if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) { + pr_debug("GUID Partitition Entry Size check failed.\n"); + goto fail; + } + + if (!(*ptes = alloc_read_gpt_entries(state, *gpt))) + goto fail; + + /* Check the GUID Partition Entry Array CRC */ + crc = efi_crc32((const unsigned char *) (*ptes), + le32_to_cpu((*gpt)->num_partition_entries) * + le32_to_cpu((*gpt)->sizeof_partition_entry)); + + if (crc != le32_to_cpu((*gpt)->partition_entry_array_crc32)) { + pr_debug("GUID Partitition Entry Array CRC check failed.\n"); + goto fail_ptes; + } + + /* We're done, all's well */ + return 1; + + fail_ptes: + kfree(*ptes); + *ptes = NULL; + fail: + kfree(*gpt); + *gpt = NULL; + return 0; +} + +/** + * is_pte_valid() - tests one PTE for validity + * @pte is the pte to check + * @lastlba is last lba of the disk + * + * Description: returns 1 if valid, 0 on error. + */ +static inline int +is_pte_valid(const gpt_entry *pte, const u64 lastlba) +{ + if ((!efi_guidcmp(pte->partition_type_guid, NULL_GUID)) || + le64_to_cpu(pte->starting_lba) > lastlba || + le64_to_cpu(pte->ending_lba) > lastlba) + return 0; + return 1; +} + +/** + * compare_gpts() - Search disk for valid GPT headers and PTEs + * @pgpt is the primary GPT header + * @agpt is the alternate GPT header + * @lastlba is the last LBA number + * Description: Returns nothing. Sanity checks pgpt and agpt fields + * and prints warnings on discrepancies. + * + */ +static void +compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) +{ + int error_found = 0; + if (!pgpt || !agpt) + return; + if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) { + printk(KERN_WARNING + "GPT:Primary header LBA != Alt. header alternate_lba\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(pgpt->my_lba), + (unsigned long long)le64_to_cpu(agpt->alternate_lba)); + error_found++; + } + if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) { + printk(KERN_WARNING + "GPT:Primary header alternate_lba != Alt. header my_lba\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(pgpt->alternate_lba), + (unsigned long long)le64_to_cpu(agpt->my_lba)); + error_found++; + } + if (le64_to_cpu(pgpt->first_usable_lba) != + le64_to_cpu(agpt->first_usable_lba)) { + printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(pgpt->first_usable_lba), + (unsigned long long)le64_to_cpu(agpt->first_usable_lba)); + error_found++; + } + if (le64_to_cpu(pgpt->last_usable_lba) != + le64_to_cpu(agpt->last_usable_lba)) { + printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(pgpt->last_usable_lba), + (unsigned long long)le64_to_cpu(agpt->last_usable_lba)); + error_found++; + } + if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) { + printk(KERN_WARNING "GPT:disk_guids don't match.\n"); + error_found++; + } + if (le32_to_cpu(pgpt->num_partition_entries) != + le32_to_cpu(agpt->num_partition_entries)) { + printk(KERN_WARNING "GPT:num_partition_entries don't match: " + "0x%x != 0x%x\n", + le32_to_cpu(pgpt->num_partition_entries), + le32_to_cpu(agpt->num_partition_entries)); + error_found++; + } + if (le32_to_cpu(pgpt->sizeof_partition_entry) != + le32_to_cpu(agpt->sizeof_partition_entry)) { + printk(KERN_WARNING + "GPT:sizeof_partition_entry values don't match: " + "0x%x != 0x%x\n", + le32_to_cpu(pgpt->sizeof_partition_entry), + le32_to_cpu(agpt->sizeof_partition_entry)); + error_found++; + } + if (le32_to_cpu(pgpt->partition_entry_array_crc32) != + le32_to_cpu(agpt->partition_entry_array_crc32)) { + printk(KERN_WARNING + "GPT:partition_entry_array_crc32 values don't match: " + "0x%x != 0x%x\n", + le32_to_cpu(pgpt->partition_entry_array_crc32), + le32_to_cpu(agpt->partition_entry_array_crc32)); + error_found++; + } + if (le64_to_cpu(pgpt->alternate_lba) != lastlba) { + printk(KERN_WARNING + "GPT:Primary header thinks Alt. header is not at the end of the disk.\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(pgpt->alternate_lba), + (unsigned long long)lastlba); + error_found++; + } + + if (le64_to_cpu(agpt->my_lba) != lastlba) { + printk(KERN_WARNING + "GPT:Alternate GPT header not at the end of the disk.\n"); + printk(KERN_WARNING "GPT:%lld != %lld\n", + (unsigned long long)le64_to_cpu(agpt->my_lba), + (unsigned long long)lastlba); + error_found++; + } + + if (error_found) + printk(KERN_WARNING + "GPT: Use GNU Parted to correct GPT errors.\n"); + return; +} + +/** + * find_valid_gpt() - Search disk for valid GPT headers and PTEs + * @state + * @gpt is a GPT header ptr, filled on return. + * @ptes is a PTEs ptr, filled on return. + * Description: Returns 1 if valid, 0 on error. + * If valid, returns pointers to newly allocated GPT header and PTEs. + * Validity depends on PMBR being valid (or being overridden by the + * 'gpt' kernel command line option) and finding either the Primary + * GPT header and PTEs valid, or the Alternate GPT header and PTEs + * valid. If the Primary GPT header is not valid, the Alternate GPT header + * is not checked unless the 'gpt' kernel command line option is passed. + * This protects against devices which misreport their size, and forces + * the user to decide to use the Alternate GPT. + */ +static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, + gpt_entry **ptes) +{ + int good_pgpt = 0, good_agpt = 0, good_pmbr = 0; + gpt_header *pgpt = NULL, *agpt = NULL; + gpt_entry *pptes = NULL, *aptes = NULL; + legacy_mbr *legacymbr; + u64 lastlba; + + if (!ptes) + return 0; + + lastlba = last_lba(state->bdev); + if (!force_gpt) { + /* This will be added to the EFI Spec. per Intel after v1.02. */ + legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL); + if (legacymbr) { + read_lba(state, 0, (u8 *) legacymbr, + sizeof (*legacymbr)); + good_pmbr = is_pmbr_valid(legacymbr); + kfree(legacymbr); + } + if (!good_pmbr) + goto fail; + } + + good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA, + &pgpt, &pptes); + if (good_pgpt) + good_agpt = is_gpt_valid(state, + le64_to_cpu(pgpt->alternate_lba), + &agpt, &aptes); + if (!good_agpt && force_gpt) + good_agpt = is_gpt_valid(state, lastlba, &agpt, &aptes); + + /* The obviously unsuccessful case */ + if (!good_pgpt && !good_agpt) + goto fail; + + compare_gpts(pgpt, agpt, lastlba); + + /* The good cases */ + if (good_pgpt) { + *gpt = pgpt; + *ptes = pptes; + kfree(agpt); + kfree(aptes); + if (!good_agpt) { + printk(KERN_WARNING + "Alternate GPT is invalid, " + "using primary GPT.\n"); + } + return 1; + } + else if (good_agpt) { + *gpt = agpt; + *ptes = aptes; + kfree(pgpt); + kfree(pptes); + printk(KERN_WARNING + "Primary GPT is invalid, using alternate GPT.\n"); + return 1; + } + + fail: + kfree(pgpt); + kfree(agpt); + kfree(pptes); + kfree(aptes); + *gpt = NULL; + *ptes = NULL; + return 0; +} + +/** + * efi_partition(struct parsed_partitions *state) + * @state + * + * Description: called from check.c, if the disk contains GPT + * partitions, sets up partition entries in the kernel. + * + * If the first block on the disk is a legacy MBR, + * it will get handled by msdos_partition(). + * If it's a Protective MBR, we'll handle it here. + * + * We do not create a Linux partition for GPT, but + * only for the actual data partitions. + * Returns: + * -1 if unable to read the partition table + * 0 if this isn't our partition table + * 1 if successful + * + */ +int efi_partition(struct parsed_partitions *state) +{ + gpt_header *gpt = NULL; + gpt_entry *ptes = NULL; + u32 i; + unsigned ssz = bdev_logical_block_size(state->bdev) / 512; + u8 unparsed_guid[37]; + + if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { + kfree(gpt); + kfree(ptes); + return 0; + } + + pr_debug("GUID Partition Table is valid! Yea!\n"); + + for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { + struct partition_meta_info *info; + unsigned label_count = 0; + unsigned label_max; + u64 start = le64_to_cpu(ptes[i].starting_lba); + u64 size = le64_to_cpu(ptes[i].ending_lba) - + le64_to_cpu(ptes[i].starting_lba) + 1ULL; + + if (!is_pte_valid(&ptes[i], last_lba(state->bdev))) + continue; + + put_partition(state, i+1, start * ssz, size * ssz); + + /* If this is a RAID volume, tell md */ + if (!efi_guidcmp(ptes[i].partition_type_guid, + PARTITION_LINUX_RAID_GUID)) + state->parts[i + 1].flags = ADDPART_FLAG_RAID; + + info = &state->parts[i + 1].info; + /* Instead of doing a manual swap to big endian, reuse the + * common ASCII hex format as the interim. + */ + efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid); + part_pack_uuid(unparsed_guid, info->uuid); + + /* Naively convert UTF16-LE to 7 bits. */ + label_max = min(sizeof(info->volname) - 1, + sizeof(ptes[i].partition_name)); + info->volname[label_max] = 0; + while (label_count < label_max) { + u8 c = ptes[i].partition_name[label_count] & 0xff; + if (c && !isprint(c)) + c = '!'; + info->volname[label_count] = c; + label_count++; + } + state->parts[i + 1].has_info = true; + } + kfree(ptes); + kfree(gpt); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} diff --git a/block/partitions/efi.h b/block/partitions/efi.h new file mode 100644 index 000000000000..b69ab729558f --- /dev/null +++ b/block/partitions/efi.h @@ -0,0 +1,134 @@ +/************************************************************ + * EFI GUID Partition Table + * Per Intel EFI Specification v1.02 + * http://developer.intel.com/technology/efi/efi.htm + * + * By Matt Domsch <Matt_Domsch@dell.com> Fri Sep 22 22:15:56 CDT 2000 + * Copyright 2000,2001 Dell Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + ************************************************************/ + +#ifndef FS_PART_EFI_H_INCLUDED +#define FS_PART_EFI_H_INCLUDED + +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/genhd.h> +#include <linux/kernel.h> +#include <linux/major.h> +#include <linux/string.h> +#include <linux/efi.h> + +#define MSDOS_MBR_SIGNATURE 0xaa55 +#define EFI_PMBR_OSTYPE_EFI 0xEF +#define EFI_PMBR_OSTYPE_EFI_GPT 0xEE + +#define GPT_HEADER_SIGNATURE 0x5452415020494645ULL +#define GPT_HEADER_REVISION_V1 0x00010000 +#define GPT_PRIMARY_PARTITION_TABLE_LBA 1 + +#define PARTITION_SYSTEM_GUID \ + EFI_GUID( 0xC12A7328, 0xF81F, 0x11d2, \ + 0xBA, 0x4B, 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B) +#define LEGACY_MBR_PARTITION_GUID \ + EFI_GUID( 0x024DEE41, 0x33E7, 0x11d3, \ + 0x9D, 0x69, 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F) +#define PARTITION_MSFT_RESERVED_GUID \ + EFI_GUID( 0xE3C9E316, 0x0B5C, 0x4DB8, \ + 0x81, 0x7D, 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE) +#define PARTITION_BASIC_DATA_GUID \ + EFI_GUID( 0xEBD0A0A2, 0xB9E5, 0x4433, \ + 0x87, 0xC0, 0x68, 0xB6, 0xB7, 0x26, 0x99, 0xC7) +#define PARTITION_LINUX_RAID_GUID \ + EFI_GUID( 0xa19d880f, 0x05fc, 0x4d3b, \ + 0xa0, 0x06, 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e) +#define PARTITION_LINUX_SWAP_GUID \ + EFI_GUID( 0x0657fd6d, 0xa4ab, 0x43c4, \ + 0x84, 0xe5, 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f) +#define PARTITION_LINUX_LVM_GUID \ + EFI_GUID( 0xe6d6d379, 0xf507, 0x44c2, \ + 0xa2, 0x3c, 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28) + +typedef struct _gpt_header { + __le64 signature; + __le32 revision; + __le32 header_size; + __le32 header_crc32; + __le32 reserved1; + __le64 my_lba; + __le64 alternate_lba; + __le64 first_usable_lba; + __le64 last_usable_lba; + efi_guid_t disk_guid; + __le64 partition_entry_lba; + __le32 num_partition_entries; + __le32 sizeof_partition_entry; + __le32 partition_entry_array_crc32; + + /* The rest of the logical block is reserved by UEFI and must be zero. + * EFI standard handles this by: + * + * uint8_t reserved2[ BlockSize - 92 ]; + */ +} __attribute__ ((packed)) gpt_header; + +typedef struct _gpt_entry_attributes { + u64 required_to_function:1; + u64 reserved:47; + u64 type_guid_specific:16; +} __attribute__ ((packed)) gpt_entry_attributes; + +typedef struct _gpt_entry { + efi_guid_t partition_type_guid; + efi_guid_t unique_partition_guid; + __le64 starting_lba; + __le64 ending_lba; + gpt_entry_attributes attributes; + efi_char16_t partition_name[72 / sizeof (efi_char16_t)]; +} __attribute__ ((packed)) gpt_entry; + +typedef struct _legacy_mbr { + u8 boot_code[440]; + __le32 unique_mbr_signature; + __le16 unknown; + struct partition partition_record[4]; + __le16 signature; +} __attribute__ ((packed)) legacy_mbr; + +/* Functions */ +extern int efi_partition(struct parsed_partitions *state); + +#endif + +/* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically + * adjust the settings for this buffer only. This must remain at the end + * of the file. + * -------------------------------------------------------------------------- + * Local variables: + * c-indent-level: 4 + * c-brace-imaginary-offset: 0 + * c-brace-offset: -4 + * c-argdecl-indent: 4 + * c-label-offset: -4 + * c-continued-statement-offset: 4 + * c-continued-brace-offset: 0 + * indent-tabs-mode: nil + * tab-width: 8 + * End: + */ diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c new file mode 100644 index 000000000000..d513a07f44bb --- /dev/null +++ b/block/partitions/ibm.c @@ -0,0 +1,275 @@ +/* + * File...........: linux/fs/partitions/ibm.c + * Author(s)......: Holger Smolinski <Holger.Smolinski@de.ibm.com> + * Volker Sameske <sameske@de.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation, IBM Deutschland Entwicklung GmbH, 1999,2000 + */ + +#include <linux/buffer_head.h> +#include <linux/hdreg.h> +#include <linux/slab.h> +#include <asm/dasd.h> +#include <asm/ebcdic.h> +#include <asm/uaccess.h> +#include <asm/vtoc.h> + +#include "check.h" +#include "ibm.h" + +/* + * compute the block number from a + * cyl-cyl-head-head structure + */ +static sector_t +cchh2blk (struct vtoc_cchh *ptr, struct hd_geometry *geo) { + + sector_t cyl; + __u16 head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors; +} + +/* + * compute the block number from a + * cyl-cyl-head-head-block structure + */ +static sector_t +cchhb2blk (struct vtoc_cchhb *ptr, struct hd_geometry *geo) { + + sector_t cyl; + __u16 head; + + /*decode cylinder and heads for large volumes */ + cyl = ptr->hh & 0xFFF0; + cyl <<= 12; + cyl |= ptr->cc; + head = ptr->hh & 0x000F; + return cyl * geo->heads * geo->sectors + + head * geo->sectors + + ptr->b; +} + +/* + */ +int ibm_partition(struct parsed_partitions *state) +{ + struct block_device *bdev = state->bdev; + int blocksize, res; + loff_t i_size, offset, size, fmt_size; + dasd_information2_t *info; + struct hd_geometry *geo; + char type[5] = {0,}; + char name[7] = {0,}; + union label_t { + struct vtoc_volume_label_cdl vol; + struct vtoc_volume_label_ldl lnx; + struct vtoc_cms_label cms; + } *label; + unsigned char *data; + Sector sect; + sector_t labelsect; + char tmp[64]; + + res = 0; + blocksize = bdev_logical_block_size(bdev); + if (blocksize <= 0) + goto out_exit; + i_size = i_size_read(bdev->bd_inode); + if (i_size == 0) + goto out_exit; + + info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); + if (info == NULL) + goto out_exit; + geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL); + if (geo == NULL) + goto out_nogeo; + label = kmalloc(sizeof(union label_t), GFP_KERNEL); + if (label == NULL) + goto out_nolab; + + if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0 || + ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) + goto out_freeall; + + /* + * Special case for FBA disks: label sector does not depend on + * blocksize. + */ + if ((info->cu_type == 0x6310 && info->dev_type == 0x9336) || + (info->cu_type == 0x3880 && info->dev_type == 0x3370)) + labelsect = info->label_block; + else + labelsect = info->label_block * (blocksize >> 9); + + /* + * Get volume label, extract name and type. + */ + data = read_part_sector(state, labelsect, §); + if (data == NULL) + goto out_readerr; + + memcpy(label, data, sizeof(union label_t)); + put_dev_sector(sect); + + if ((!info->FBA_layout) && (!strcmp(info->type, "ECKD"))) { + strncpy(type, label->vol.vollbl, 4); + strncpy(name, label->vol.volid, 6); + } else { + strncpy(type, label->lnx.vollbl, 4); + strncpy(name, label->lnx.volid, 6); + } + EBCASC(type, 4); + EBCASC(name, 6); + + res = 1; + + /* + * Three different formats: LDL, CDL and unformated disk + * + * identified by info->format + * + * unformated disks we do not have to care about + */ + if (info->format == DASD_FORMAT_LDL) { + if (strncmp(type, "CMS1", 4) == 0) { + /* + * VM style CMS1 labeled disk + */ + blocksize = label->cms.block_size; + if (label->cms.disk_offset != 0) { + snprintf(tmp, sizeof(tmp), "CMS1/%8s(MDSK):", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* disk is reserved minidisk */ + offset = label->cms.disk_offset; + size = (label->cms.block_count - 1) + * (blocksize >> 9); + } else { + snprintf(tmp, sizeof(tmp), "CMS1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + offset = (info->label_block + 1); + size = label->cms.block_count + * (blocksize >> 9); + } + put_partition(state, 1, offset*(blocksize >> 9), + size-offset*(blocksize >> 9)); + } else { + if (strncmp(type, "LNX1", 4) == 0) { + snprintf(tmp, sizeof(tmp), "LNX1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + if (label->lnx.ldl_version == 0xf2) { + fmt_size = label->lnx.formatted_blocks + * (blocksize >> 9); + } else if (!strcmp(info->type, "ECKD")) { + /* formated w/o large volume support */ + fmt_size = geo->cylinders * geo->heads + * geo->sectors * (blocksize >> 9); + } else { + /* old label and no usable disk geometry + * (e.g. DIAG) */ + fmt_size = i_size >> 9; + } + size = i_size >> 9; + if (fmt_size < size) + size = fmt_size; + offset = (info->label_block + 1); + } else { + /* unlabeled disk */ + strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); + size = i_size >> 9; + offset = (info->label_block + 1); + } + put_partition(state, 1, offset*(blocksize >> 9), + size-offset*(blocksize >> 9)); + } + } else if (info->format == DASD_FORMAT_CDL) { + /* + * New style CDL formatted disk + */ + sector_t blk; + int counter; + + /* + * check if VOL1 label is available + * if not, something is wrong, skipping partition detection + */ + if (strncmp(type, "VOL1", 4) == 0) { + snprintf(tmp, sizeof(tmp), "VOL1/%8s:", name); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* + * get block number and read then go through format1 + * labels + */ + blk = cchhb2blk(&label->vol.vtoc, geo) + 1; + counter = 0; + data = read_part_sector(state, blk * (blocksize/512), + §); + while (data != NULL) { + struct vtoc_format1_label f1; + + memcpy(&f1, data, + sizeof(struct vtoc_format1_label)); + put_dev_sector(sect); + + /* skip FMT4 / FMT5 / FMT7 labels */ + if (f1.DS1FMTID == _ascebc['4'] + || f1.DS1FMTID == _ascebc['5'] + || f1.DS1FMTID == _ascebc['7'] + || f1.DS1FMTID == _ascebc['9']) { + blk++; + data = read_part_sector(state, + blk * (blocksize/512), §); + continue; + } + + /* only FMT1 and 8 labels valid at this point */ + if (f1.DS1FMTID != _ascebc['1'] && + f1.DS1FMTID != _ascebc['8']) + break; + + /* OK, we got valid partition data */ + offset = cchh2blk(&f1.DS1EXT1.llimit, geo); + size = cchh2blk(&f1.DS1EXT1.ulimit, geo) - + offset + geo->sectors; + if (counter >= state->limit) + break; + put_partition(state, counter + 1, + offset * (blocksize >> 9), + size * (blocksize >> 9)); + counter++; + blk++; + data = read_part_sector(state, + blk * (blocksize/512), §); + } + + if (!data) + /* Are we not supposed to report this ? */ + goto out_readerr; + } else + printk(KERN_WARNING "Warning, expected Label VOL1 not " + "found, treating as CDL formated Disk"); + + } + + strlcat(state->pp_buf, "\n", PAGE_SIZE); + goto out_freeall; + + +out_readerr: + res = -1; +out_freeall: + kfree(label); +out_nolab: + kfree(geo); +out_nogeo: + kfree(info); +out_exit: + return res; +} diff --git a/block/partitions/ibm.h b/block/partitions/ibm.h new file mode 100644 index 000000000000..08fb0804a812 --- /dev/null +++ b/block/partitions/ibm.h @@ -0,0 +1 @@ +int ibm_partition(struct parsed_partitions *); diff --git a/block/partitions/karma.c b/block/partitions/karma.c new file mode 100644 index 000000000000..0ea19312706b --- /dev/null +++ b/block/partitions/karma.c @@ -0,0 +1,57 @@ +/* + * fs/partitions/karma.c + * Rio Karma partition info. + * + * Copyright (C) 2006 Bob Copeland (me@bobcopeland.com) + * based on osf.c + */ + +#include "check.h" +#include "karma.h" + +int karma_partition(struct parsed_partitions *state) +{ + int i; + int slot = 1; + Sector sect; + unsigned char *data; + struct disklabel { + u8 d_reserved[270]; + struct d_partition { + __le32 p_res; + u8 p_fstype; + u8 p_res2[3]; + __le32 p_offset; + __le32 p_size; + } d_partitions[2]; + u8 d_blank[208]; + __le16 d_magic; + } __attribute__((packed)) *label; + struct d_partition *p; + + data = read_part_sector(state, 0, §); + if (!data) + return -1; + + label = (struct disklabel *)data; + if (le16_to_cpu(label->d_magic) != KARMA_LABEL_MAGIC) { + put_dev_sector(sect); + return 0; + } + + p = label->d_partitions; + for (i = 0 ; i < 2; i++, p++) { + if (slot == state->limit) + break; + + if (p->p_fstype == 0x4d && le32_to_cpu(p->p_size)) { + put_partition(state, slot, le32_to_cpu(p->p_offset), + le32_to_cpu(p->p_size)); + } + slot++; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; +} + diff --git a/block/partitions/karma.h b/block/partitions/karma.h new file mode 100644 index 000000000000..c764b2e9df21 --- /dev/null +++ b/block/partitions/karma.h @@ -0,0 +1,8 @@ +/* + * fs/partitions/karma.h + */ + +#define KARMA_LABEL_MAGIC 0xAB56 + +int karma_partition(struct parsed_partitions *state); + diff --git a/block/partitions/ldm.c b/block/partitions/ldm.c new file mode 100644 index 000000000000..bd8ae788f689 --- /dev/null +++ b/block/partitions/ldm.c @@ -0,0 +1,1570 @@ +/** + * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) + * + * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> + * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> + * + * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation; either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program (in the main directory of the source in the file COPYING); if + * not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, + * Boston, MA 02111-1307 USA + */ + +#include <linux/slab.h> +#include <linux/pagemap.h> +#include <linux/stringify.h> +#include <linux/kernel.h> +#include "ldm.h" +#include "check.h" +#include "msdos.h" + +/** + * ldm_debug/info/error/crit - Output an error message + * @f: A printf format string containing the message + * @...: Variables to substitute into @f + * + * ldm_debug() writes a DEBUG level message to the syslog but only if the + * driver was compiled with debug enabled. Otherwise, the call turns into a NOP. + */ +#ifndef CONFIG_LDM_DEBUG +#define ldm_debug(...) do {} while (0) +#else +#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a) +#endif + +#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a) +#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a) +#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a) + +static __printf(3, 4) +void _ldm_printk(const char *level, const char *function, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + va_start (args, fmt); + + vaf.fmt = fmt; + vaf.va = &args; + + printk("%s%s(): %pV\n", level, function, &vaf); + + va_end(args); +} + +/** + * ldm_parse_hexbyte - Convert a ASCII hex number to a byte + * @src: Pointer to at least 2 characters to convert. + * + * Convert a two character ASCII hex string to a number. + * + * Return: 0-255 Success, the byte was parsed correctly + * -1 Error, an invalid character was supplied + */ +static int ldm_parse_hexbyte (const u8 *src) +{ + unsigned int x; /* For correct wrapping */ + int h; + + /* high part */ + x = h = hex_to_bin(src[0]); + if (h < 0) + return -1; + + /* low part */ + h = hex_to_bin(src[1]); + if (h < 0) + return -1; + + return (x << 4) + h; +} + +/** + * ldm_parse_guid - Convert GUID from ASCII to binary + * @src: 36 char string of the form fa50ff2b-f2e8-45de-83fa-65417f2f49ba + * @dest: Memory block to hold binary GUID (16 bytes) + * + * N.B. The GUID need not be NULL terminated. + * + * Return: 'true' @dest contains binary GUID + * 'false' @dest contents are undefined + */ +static bool ldm_parse_guid (const u8 *src, u8 *dest) +{ + static const int size[] = { 4, 2, 2, 2, 6 }; + int i, j, v; + + if (src[8] != '-' || src[13] != '-' || + src[18] != '-' || src[23] != '-') + return false; + + for (j = 0; j < 5; j++, src++) + for (i = 0; i < size[j]; i++, src+=2, *dest++ = v) + if ((v = ldm_parse_hexbyte (src)) < 0) + return false; + + return true; +} + +/** + * ldm_parse_privhead - Read the LDM Database PRIVHEAD structure + * @data: Raw database PRIVHEAD structure loaded from the device + * @ph: In-memory privhead structure in which to return parsed information + * + * This parses the LDM database PRIVHEAD structure supplied in @data and + * sets up the in-memory privhead structure @ph with the obtained information. + * + * Return: 'true' @ph contains the PRIVHEAD data + * 'false' @ph contents are undefined + */ +static bool ldm_parse_privhead(const u8 *data, struct privhead *ph) +{ + bool is_vista = false; + + BUG_ON(!data || !ph); + if (MAGIC_PRIVHEAD != get_unaligned_be64(data)) { + ldm_error("Cannot find PRIVHEAD structure. LDM database is" + " corrupt. Aborting."); + return false; + } + ph->ver_major = get_unaligned_be16(data + 0x000C); + ph->ver_minor = get_unaligned_be16(data + 0x000E); + ph->logical_disk_start = get_unaligned_be64(data + 0x011B); + ph->logical_disk_size = get_unaligned_be64(data + 0x0123); + ph->config_start = get_unaligned_be64(data + 0x012B); + ph->config_size = get_unaligned_be64(data + 0x0133); + /* Version 2.11 is Win2k/XP and version 2.12 is Vista. */ + if (ph->ver_major == 2 && ph->ver_minor == 12) + is_vista = true; + if (!is_vista && (ph->ver_major != 2 || ph->ver_minor != 11)) { + ldm_error("Expected PRIVHEAD version 2.11 or 2.12, got %d.%d." + " Aborting.", ph->ver_major, ph->ver_minor); + return false; + } + ldm_debug("PRIVHEAD version %d.%d (Windows %s).", ph->ver_major, + ph->ver_minor, is_vista ? "Vista" : "2000/XP"); + if (ph->config_size != LDM_DB_SIZE) { /* 1 MiB in sectors. */ + /* Warn the user and continue, carefully. */ + ldm_info("Database is normally %u bytes, it claims to " + "be %llu bytes.", LDM_DB_SIZE, + (unsigned long long)ph->config_size); + } + if ((ph->logical_disk_size == 0) || (ph->logical_disk_start + + ph->logical_disk_size > ph->config_start)) { + ldm_error("PRIVHEAD disk size doesn't match real disk size"); + return false; + } + if (!ldm_parse_guid(data + 0x0030, ph->disk_id)) { + ldm_error("PRIVHEAD contains an invalid GUID."); + return false; + } + ldm_debug("Parsed PRIVHEAD successfully."); + return true; +} + +/** + * ldm_parse_tocblock - Read the LDM Database TOCBLOCK structure + * @data: Raw database TOCBLOCK structure loaded from the device + * @toc: In-memory toc structure in which to return parsed information + * + * This parses the LDM Database TOCBLOCK (table of contents) structure supplied + * in @data and sets up the in-memory tocblock structure @toc with the obtained + * information. + * + * N.B. The *_start and *_size values returned in @toc are not range-checked. + * + * Return: 'true' @toc contains the TOCBLOCK data + * 'false' @toc contents are undefined + */ +static bool ldm_parse_tocblock (const u8 *data, struct tocblock *toc) +{ + BUG_ON (!data || !toc); + + if (MAGIC_TOCBLOCK != get_unaligned_be64(data)) { + ldm_crit ("Cannot find TOCBLOCK, database may be corrupt."); + return false; + } + strncpy (toc->bitmap1_name, data + 0x24, sizeof (toc->bitmap1_name)); + toc->bitmap1_name[sizeof (toc->bitmap1_name) - 1] = 0; + toc->bitmap1_start = get_unaligned_be64(data + 0x2E); + toc->bitmap1_size = get_unaligned_be64(data + 0x36); + + if (strncmp (toc->bitmap1_name, TOC_BITMAP1, + sizeof (toc->bitmap1_name)) != 0) { + ldm_crit ("TOCBLOCK's first bitmap is '%s', should be '%s'.", + TOC_BITMAP1, toc->bitmap1_name); + return false; + } + strncpy (toc->bitmap2_name, data + 0x46, sizeof (toc->bitmap2_name)); + toc->bitmap2_name[sizeof (toc->bitmap2_name) - 1] = 0; + toc->bitmap2_start = get_unaligned_be64(data + 0x50); + toc->bitmap2_size = get_unaligned_be64(data + 0x58); + if (strncmp (toc->bitmap2_name, TOC_BITMAP2, + sizeof (toc->bitmap2_name)) != 0) { + ldm_crit ("TOCBLOCK's second bitmap is '%s', should be '%s'.", + TOC_BITMAP2, toc->bitmap2_name); + return false; + } + ldm_debug ("Parsed TOCBLOCK successfully."); + return true; +} + +/** + * ldm_parse_vmdb - Read the LDM Database VMDB structure + * @data: Raw database VMDB structure loaded from the device + * @vm: In-memory vmdb structure in which to return parsed information + * + * This parses the LDM Database VMDB structure supplied in @data and sets up + * the in-memory vmdb structure @vm with the obtained information. + * + * N.B. The *_start, *_size and *_seq values will be range-checked later. + * + * Return: 'true' @vm contains VMDB info + * 'false' @vm contents are undefined + */ +static bool ldm_parse_vmdb (const u8 *data, struct vmdb *vm) +{ + BUG_ON (!data || !vm); + + if (MAGIC_VMDB != get_unaligned_be32(data)) { + ldm_crit ("Cannot find the VMDB, database may be corrupt."); + return false; + } + + vm->ver_major = get_unaligned_be16(data + 0x12); + vm->ver_minor = get_unaligned_be16(data + 0x14); + if ((vm->ver_major != 4) || (vm->ver_minor != 10)) { + ldm_error ("Expected VMDB version %d.%d, got %d.%d. " + "Aborting.", 4, 10, vm->ver_major, vm->ver_minor); + return false; + } + + vm->vblk_size = get_unaligned_be32(data + 0x08); + if (vm->vblk_size == 0) { + ldm_error ("Illegal VBLK size"); + return false; + } + + vm->vblk_offset = get_unaligned_be32(data + 0x0C); + vm->last_vblk_seq = get_unaligned_be32(data + 0x04); + + ldm_debug ("Parsed VMDB successfully."); + return true; +} + +/** + * ldm_compare_privheads - Compare two privhead objects + * @ph1: First privhead + * @ph2: Second privhead + * + * This compares the two privhead structures @ph1 and @ph2. + * + * Return: 'true' Identical + * 'false' Different + */ +static bool ldm_compare_privheads (const struct privhead *ph1, + const struct privhead *ph2) +{ + BUG_ON (!ph1 || !ph2); + + return ((ph1->ver_major == ph2->ver_major) && + (ph1->ver_minor == ph2->ver_minor) && + (ph1->logical_disk_start == ph2->logical_disk_start) && + (ph1->logical_disk_size == ph2->logical_disk_size) && + (ph1->config_start == ph2->config_start) && + (ph1->config_size == ph2->config_size) && + !memcmp (ph1->disk_id, ph2->disk_id, GUID_SIZE)); +} + +/** + * ldm_compare_tocblocks - Compare two tocblock objects + * @toc1: First toc + * @toc2: Second toc + * + * This compares the two tocblock structures @toc1 and @toc2. + * + * Return: 'true' Identical + * 'false' Different + */ +static bool ldm_compare_tocblocks (const struct tocblock *toc1, + const struct tocblock *toc2) +{ + BUG_ON (!toc1 || !toc2); + + return ((toc1->bitmap1_start == toc2->bitmap1_start) && + (toc1->bitmap1_size == toc2->bitmap1_size) && + (toc1->bitmap2_start == toc2->bitmap2_start) && + (toc1->bitmap2_size == toc2->bitmap2_size) && + !strncmp (toc1->bitmap1_name, toc2->bitmap1_name, + sizeof (toc1->bitmap1_name)) && + !strncmp (toc1->bitmap2_name, toc2->bitmap2_name, + sizeof (toc1->bitmap2_name))); +} + +/** + * ldm_validate_privheads - Compare the primary privhead with its backups + * @state: Partition check state including device holding the LDM Database + * @ph1: Memory struct to fill with ph contents + * + * Read and compare all three privheads from disk. + * + * The privheads on disk show the size and location of the main disk area and + * the configuration area (the database). The values are range-checked against + * @hd, which contains the real size of the disk. + * + * Return: 'true' Success + * 'false' Error + */ +static bool ldm_validate_privheads(struct parsed_partitions *state, + struct privhead *ph1) +{ + static const int off[3] = { OFF_PRIV1, OFF_PRIV2, OFF_PRIV3 }; + struct privhead *ph[3] = { ph1 }; + Sector sect; + u8 *data; + bool result = false; + long num_sects; + int i; + + BUG_ON (!state || !ph1); + + ph[1] = kmalloc (sizeof (*ph[1]), GFP_KERNEL); + ph[2] = kmalloc (sizeof (*ph[2]), GFP_KERNEL); + if (!ph[1] || !ph[2]) { + ldm_crit ("Out of memory."); + goto out; + } + + /* off[1 & 2] are relative to ph[0]->config_start */ + ph[0]->config_start = 0; + + /* Read and parse privheads */ + for (i = 0; i < 3; i++) { + data = read_part_sector(state, ph[0]->config_start + off[i], + §); + if (!data) { + ldm_crit ("Disk read failed."); + goto out; + } + result = ldm_parse_privhead (data, ph[i]); + put_dev_sector (sect); + if (!result) { + ldm_error ("Cannot find PRIVHEAD %d.", i+1); /* Log again */ + if (i < 2) + goto out; /* Already logged */ + else + break; /* FIXME ignore for now, 3rd PH can fail on odd-sized disks */ + } + } + + num_sects = state->bdev->bd_inode->i_size >> 9; + + if ((ph[0]->config_start > num_sects) || + ((ph[0]->config_start + ph[0]->config_size) > num_sects)) { + ldm_crit ("Database extends beyond the end of the disk."); + goto out; + } + + if ((ph[0]->logical_disk_start > ph[0]->config_start) || + ((ph[0]->logical_disk_start + ph[0]->logical_disk_size) + > ph[0]->config_start)) { + ldm_crit ("Disk and database overlap."); + goto out; + } + + if (!ldm_compare_privheads (ph[0], ph[1])) { + ldm_crit ("Primary and backup PRIVHEADs don't match."); + goto out; + } + /* FIXME ignore this for now + if (!ldm_compare_privheads (ph[0], ph[2])) { + ldm_crit ("Primary and backup PRIVHEADs don't match."); + goto out; + }*/ + ldm_debug ("Validated PRIVHEADs successfully."); + result = true; +out: + kfree (ph[1]); + kfree (ph[2]); + return result; +} + +/** + * ldm_validate_tocblocks - Validate the table of contents and its backups + * @state: Partition check state including device holding the LDM Database + * @base: Offset, into @state->bdev, of the database + * @ldb: Cache of the database structures + * + * Find and compare the four tables of contents of the LDM Database stored on + * @state->bdev and return the parsed information into @toc1. + * + * The offsets and sizes of the configs are range-checked against a privhead. + * + * Return: 'true' @toc1 contains validated TOCBLOCK info + * 'false' @toc1 contents are undefined + */ +static bool ldm_validate_tocblocks(struct parsed_partitions *state, + unsigned long base, struct ldmdb *ldb) +{ + static const int off[4] = { OFF_TOCB1, OFF_TOCB2, OFF_TOCB3, OFF_TOCB4}; + struct tocblock *tb[4]; + struct privhead *ph; + Sector sect; + u8 *data; + int i, nr_tbs; + bool result = false; + + BUG_ON(!state || !ldb); + ph = &ldb->ph; + tb[0] = &ldb->toc; + tb[1] = kmalloc(sizeof(*tb[1]) * 3, GFP_KERNEL); + if (!tb[1]) { + ldm_crit("Out of memory."); + goto err; + } + tb[2] = (struct tocblock*)((u8*)tb[1] + sizeof(*tb[1])); + tb[3] = (struct tocblock*)((u8*)tb[2] + sizeof(*tb[2])); + /* + * Try to read and parse all four TOCBLOCKs. + * + * Windows Vista LDM v2.12 does not always have all four TOCBLOCKs so + * skip any that fail as long as we get at least one valid TOCBLOCK. + */ + for (nr_tbs = i = 0; i < 4; i++) { + data = read_part_sector(state, base + off[i], §); + if (!data) { + ldm_error("Disk read failed for TOCBLOCK %d.", i); + continue; + } + if (ldm_parse_tocblock(data, tb[nr_tbs])) + nr_tbs++; + put_dev_sector(sect); + } + if (!nr_tbs) { + ldm_crit("Failed to find a valid TOCBLOCK."); + goto err; + } + /* Range check the TOCBLOCK against a privhead. */ + if (((tb[0]->bitmap1_start + tb[0]->bitmap1_size) > ph->config_size) || + ((tb[0]->bitmap2_start + tb[0]->bitmap2_size) > + ph->config_size)) { + ldm_crit("The bitmaps are out of range. Giving up."); + goto err; + } + /* Compare all loaded TOCBLOCKs. */ + for (i = 1; i < nr_tbs; i++) { + if (!ldm_compare_tocblocks(tb[0], tb[i])) { + ldm_crit("TOCBLOCKs 0 and %d do not match.", i); + goto err; + } + } + ldm_debug("Validated %d TOCBLOCKs successfully.", nr_tbs); + result = true; +err: + kfree(tb[1]); + return result; +} + +/** + * ldm_validate_vmdb - Read the VMDB and validate it + * @state: Partition check state including device holding the LDM Database + * @base: Offset, into @bdev, of the database + * @ldb: Cache of the database structures + * + * Find the vmdb of the LDM Database stored on @bdev and return the parsed + * information in @ldb. + * + * Return: 'true' @ldb contains validated VBDB info + * 'false' @ldb contents are undefined + */ +static bool ldm_validate_vmdb(struct parsed_partitions *state, + unsigned long base, struct ldmdb *ldb) +{ + Sector sect; + u8 *data; + bool result = false; + struct vmdb *vm; + struct tocblock *toc; + + BUG_ON (!state || !ldb); + + vm = &ldb->vm; + toc = &ldb->toc; + + data = read_part_sector(state, base + OFF_VMDB, §); + if (!data) { + ldm_crit ("Disk read failed."); + return false; + } + + if (!ldm_parse_vmdb (data, vm)) + goto out; /* Already logged */ + + /* Are there uncommitted transactions? */ + if (get_unaligned_be16(data + 0x10) != 0x01) { + ldm_crit ("Database is not in a consistent state. Aborting."); + goto out; + } + + if (vm->vblk_offset != 512) + ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset); + + /* + * The last_vblkd_seq can be before the end of the vmdb, just make sure + * it is not out of bounds. + */ + if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) { + ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK. " + "Database is corrupt. Aborting."); + goto out; + } + + result = true; +out: + put_dev_sector (sect); + return result; +} + + +/** + * ldm_validate_partition_table - Determine whether bdev might be a dynamic disk + * @state: Partition check state including device holding the LDM Database + * + * This function provides a weak test to decide whether the device is a dynamic + * disk or not. It looks for an MS-DOS-style partition table containing at + * least one partition of type 0x42 (formerly SFS, now used by Windows for + * dynamic disks). + * + * N.B. The only possible error can come from the read_part_sector and that is + * only likely to happen if the underlying device is strange. If that IS + * the case we should return zero to let someone else try. + * + * Return: 'true' @state->bdev is a dynamic disk + * 'false' @state->bdev is not a dynamic disk, or an error occurred + */ +static bool ldm_validate_partition_table(struct parsed_partitions *state) +{ + Sector sect; + u8 *data; + struct partition *p; + int i; + bool result = false; + + BUG_ON(!state); + + data = read_part_sector(state, 0, §); + if (!data) { + ldm_info ("Disk read failed."); + return false; + } + + if (*(__le16*) (data + 0x01FE) != cpu_to_le16 (MSDOS_LABEL_MAGIC)) + goto out; + + p = (struct partition*)(data + 0x01BE); + for (i = 0; i < 4; i++, p++) + if (SYS_IND (p) == LDM_PARTITION) { + result = true; + break; + } + + if (result) + ldm_debug ("Found W2K dynamic disk partition type."); + +out: + put_dev_sector (sect); + return result; +} + +/** + * ldm_get_disk_objid - Search a linked list of vblk's for a given Disk Id + * @ldb: Cache of the database structures + * + * The LDM Database contains a list of all partitions on all dynamic disks. + * The primary PRIVHEAD, at the beginning of the physical disk, tells us + * the GUID of this disk. This function searches for the GUID in a linked + * list of vblk's. + * + * Return: Pointer, A matching vblk was found + * NULL, No match, or an error + */ +static struct vblk * ldm_get_disk_objid (const struct ldmdb *ldb) +{ + struct list_head *item; + + BUG_ON (!ldb); + + list_for_each (item, &ldb->v_disk) { + struct vblk *v = list_entry (item, struct vblk, list); + if (!memcmp (v->vblk.disk.disk_id, ldb->ph.disk_id, GUID_SIZE)) + return v; + } + + return NULL; +} + +/** + * ldm_create_data_partitions - Create data partitions for this device + * @pp: List of the partitions parsed so far + * @ldb: Cache of the database structures + * + * The database contains ALL the partitions for ALL disk groups, so we need to + * filter out this specific disk. Using the disk's object id, we can find all + * the partitions in the database that belong to this disk. + * + * Add each partition in our database, to the parsed_partitions structure. + * + * N.B. This function creates the partitions in the order it finds partition + * objects in the linked list. + * + * Return: 'true' Partition created + * 'false' Error, probably a range checking problem + */ +static bool ldm_create_data_partitions (struct parsed_partitions *pp, + const struct ldmdb *ldb) +{ + struct list_head *item; + struct vblk *vb; + struct vblk *disk; + struct vblk_part *part; + int part_num = 1; + + BUG_ON (!pp || !ldb); + + disk = ldm_get_disk_objid (ldb); + if (!disk) { + ldm_crit ("Can't find the ID of this disk in the database."); + return false; + } + + strlcat(pp->pp_buf, " [LDM]", PAGE_SIZE); + + /* Create the data partitions */ + list_for_each (item, &ldb->v_part) { + vb = list_entry (item, struct vblk, list); + part = &vb->vblk.part; + + if (part->disk_id != disk->obj_id) + continue; + + put_partition (pp, part_num, ldb->ph.logical_disk_start + + part->start, part->size); + part_num++; + } + + strlcat(pp->pp_buf, "\n", PAGE_SIZE); + return true; +} + + +/** + * ldm_relative - Calculate the next relative offset + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @base: Size of the previous fixed width fields + * @offset: Cumulative size of the previous variable-width fields + * + * Because many of the VBLK fields are variable-width, it's necessary + * to calculate each offset based on the previous one and the length + * of the field it pointed to. + * + * Return: -1 Error, the calculated offset exceeded the size of the buffer + * n OK, a range-checked offset into buffer + */ +static int ldm_relative(const u8 *buffer, int buflen, int base, int offset) +{ + + base += offset; + if (!buffer || offset < 0 || base > buflen) { + if (!buffer) + ldm_error("!buffer"); + if (offset < 0) + ldm_error("offset (%d) < 0", offset); + if (base > buflen) + ldm_error("base (%d) > buflen (%d)", base, buflen); + return -1; + } + if (base + buffer[base] >= buflen) { + ldm_error("base (%d) + buffer[base] (%d) >= buflen (%d)", base, + buffer[base], buflen); + return -1; + } + return buffer[base] + offset + 1; +} + +/** + * ldm_get_vnum - Convert a variable-width, big endian number, into cpu order + * @block: Pointer to the variable-width number to convert + * + * Large numbers in the LDM Database are often stored in a packed format. Each + * number is prefixed by a one byte width marker. All numbers in the database + * are stored in big-endian byte order. This function reads one of these + * numbers and returns the result + * + * N.B. This function DOES NOT perform any range checking, though the most + * it will read is eight bytes. + * + * Return: n A number + * 0 Zero, or an error occurred + */ +static u64 ldm_get_vnum (const u8 *block) +{ + u64 tmp = 0; + u8 length; + + BUG_ON (!block); + + length = *block++; + + if (length && length <= 8) + while (length--) + tmp = (tmp << 8) | *block++; + else + ldm_error ("Illegal length %d.", length); + + return tmp; +} + +/** + * ldm_get_vstr - Read a length-prefixed string into a buffer + * @block: Pointer to the length marker + * @buffer: Location to copy string to + * @buflen: Size of the output buffer + * + * Many of the strings in the LDM Database are not NULL terminated. Instead + * they are prefixed by a one byte length marker. This function copies one of + * these strings into a buffer. + * + * N.B. This function DOES NOT perform any range checking on the input. + * If the buffer is too small, the output will be truncated. + * + * Return: 0, Error and @buffer contents are undefined + * n, String length in characters (excluding NULL) + * buflen-1, String was truncated. + */ +static int ldm_get_vstr (const u8 *block, u8 *buffer, int buflen) +{ + int length; + + BUG_ON (!block || !buffer); + + length = block[0]; + if (length >= buflen) { + ldm_error ("Truncating string %d -> %d.", length, buflen); + length = buflen - 1; + } + memcpy (buffer, block + 1, length); + buffer[length] = 0; + return length; +} + + +/** + * ldm_parse_cmp3 - Read a raw VBLK Component object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Component object (version 3) into a vblk structure. + * + * Return: 'true' @vb contains a Component VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_cmp3 (const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, r_vstate, r_child, r_parent, r_stripe, r_cols, len; + struct vblk_comp *comp; + + BUG_ON (!buffer || !vb); + + r_objid = ldm_relative (buffer, buflen, 0x18, 0); + r_name = ldm_relative (buffer, buflen, 0x18, r_objid); + r_vstate = ldm_relative (buffer, buflen, 0x18, r_name); + r_child = ldm_relative (buffer, buflen, 0x1D, r_vstate); + r_parent = ldm_relative (buffer, buflen, 0x2D, r_child); + + if (buffer[0x12] & VBLK_FLAG_COMP_STRIPE) { + r_stripe = ldm_relative (buffer, buflen, 0x2E, r_parent); + r_cols = ldm_relative (buffer, buflen, 0x2E, r_stripe); + len = r_cols; + } else { + r_stripe = 0; + r_cols = 0; + len = r_parent; + } + if (len < 0) + return false; + + len += VBLK_SIZE_CMP3; + if (len != get_unaligned_be32(buffer + 0x14)) + return false; + + comp = &vb->vblk.comp; + ldm_get_vstr (buffer + 0x18 + r_name, comp->state, + sizeof (comp->state)); + comp->type = buffer[0x18 + r_vstate]; + comp->children = ldm_get_vnum (buffer + 0x1D + r_vstate); + comp->parent_id = ldm_get_vnum (buffer + 0x2D + r_child); + comp->chunksize = r_stripe ? ldm_get_vnum (buffer+r_parent+0x2E) : 0; + + return true; +} + +/** + * ldm_parse_dgr3 - Read a raw VBLK Disk Group object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Disk Group object (version 3) into a vblk structure. + * + * Return: 'true' @vb contains a Disk Group VBLK + * 'false' @vb contents are not defined + */ +static int ldm_parse_dgr3 (const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, r_diskid, r_id1, r_id2, len; + struct vblk_dgrp *dgrp; + + BUG_ON (!buffer || !vb); + + r_objid = ldm_relative (buffer, buflen, 0x18, 0); + r_name = ldm_relative (buffer, buflen, 0x18, r_objid); + r_diskid = ldm_relative (buffer, buflen, 0x18, r_name); + + if (buffer[0x12] & VBLK_FLAG_DGR3_IDS) { + r_id1 = ldm_relative (buffer, buflen, 0x24, r_diskid); + r_id2 = ldm_relative (buffer, buflen, 0x24, r_id1); + len = r_id2; + } else { + r_id1 = 0; + r_id2 = 0; + len = r_diskid; + } + if (len < 0) + return false; + + len += VBLK_SIZE_DGR3; + if (len != get_unaligned_be32(buffer + 0x14)) + return false; + + dgrp = &vb->vblk.dgrp; + ldm_get_vstr (buffer + 0x18 + r_name, dgrp->disk_id, + sizeof (dgrp->disk_id)); + return true; +} + +/** + * ldm_parse_dgr4 - Read a raw VBLK Disk Group object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Disk Group object (version 4) into a vblk structure. + * + * Return: 'true' @vb contains a Disk Group VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_dgr4 (const u8 *buffer, int buflen, struct vblk *vb) +{ + char buf[64]; + int r_objid, r_name, r_id1, r_id2, len; + struct vblk_dgrp *dgrp; + + BUG_ON (!buffer || !vb); + + r_objid = ldm_relative (buffer, buflen, 0x18, 0); + r_name = ldm_relative (buffer, buflen, 0x18, r_objid); + + if (buffer[0x12] & VBLK_FLAG_DGR4_IDS) { + r_id1 = ldm_relative (buffer, buflen, 0x44, r_name); + r_id2 = ldm_relative (buffer, buflen, 0x44, r_id1); + len = r_id2; + } else { + r_id1 = 0; + r_id2 = 0; + len = r_name; + } + if (len < 0) + return false; + + len += VBLK_SIZE_DGR4; + if (len != get_unaligned_be32(buffer + 0x14)) + return false; + + dgrp = &vb->vblk.dgrp; + + ldm_get_vstr (buffer + 0x18 + r_objid, buf, sizeof (buf)); + return true; +} + +/** + * ldm_parse_dsk3 - Read a raw VBLK Disk object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Disk object (version 3) into a vblk structure. + * + * Return: 'true' @vb contains a Disk VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_dsk3 (const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, r_diskid, r_altname, len; + struct vblk_disk *disk; + + BUG_ON (!buffer || !vb); + + r_objid = ldm_relative (buffer, buflen, 0x18, 0); + r_name = ldm_relative (buffer, buflen, 0x18, r_objid); + r_diskid = ldm_relative (buffer, buflen, 0x18, r_name); + r_altname = ldm_relative (buffer, buflen, 0x18, r_diskid); + len = r_altname; + if (len < 0) + return false; + + len += VBLK_SIZE_DSK3; + if (len != get_unaligned_be32(buffer + 0x14)) + return false; + + disk = &vb->vblk.disk; + ldm_get_vstr (buffer + 0x18 + r_diskid, disk->alt_name, + sizeof (disk->alt_name)); + if (!ldm_parse_guid (buffer + 0x19 + r_name, disk->disk_id)) + return false; + + return true; +} + +/** + * ldm_parse_dsk4 - Read a raw VBLK Disk object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Disk object (version 4) into a vblk structure. + * + * Return: 'true' @vb contains a Disk VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_dsk4 (const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, len; + struct vblk_disk *disk; + + BUG_ON (!buffer || !vb); + + r_objid = ldm_relative (buffer, buflen, 0x18, 0); + r_name = ldm_relative (buffer, buflen, 0x18, r_objid); + len = r_name; + if (len < 0) + return false; + + len += VBLK_SIZE_DSK4; + if (len != get_unaligned_be32(buffer + 0x14)) + return false; + + disk = &vb->vblk.disk; + memcpy (disk->disk_id, buffer + 0x18 + r_name, GUID_SIZE); + return true; +} + +/** + * ldm_parse_prt3 - Read a raw VBLK Partition object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Partition object (version 3) into a vblk structure. + * + * Return: 'true' @vb contains a Partition VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_prt3(const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, r_size, r_parent, r_diskid, r_index, len; + struct vblk_part *part; + + BUG_ON(!buffer || !vb); + r_objid = ldm_relative(buffer, buflen, 0x18, 0); + if (r_objid < 0) { + ldm_error("r_objid %d < 0", r_objid); + return false; + } + r_name = ldm_relative(buffer, buflen, 0x18, r_objid); + if (r_name < 0) { + ldm_error("r_name %d < 0", r_name); + return false; + } + r_size = ldm_relative(buffer, buflen, 0x34, r_name); + if (r_size < 0) { + ldm_error("r_size %d < 0", r_size); + return false; + } + r_parent = ldm_relative(buffer, buflen, 0x34, r_size); + if (r_parent < 0) { + ldm_error("r_parent %d < 0", r_parent); + return false; + } + r_diskid = ldm_relative(buffer, buflen, 0x34, r_parent); + if (r_diskid < 0) { + ldm_error("r_diskid %d < 0", r_diskid); + return false; + } + if (buffer[0x12] & VBLK_FLAG_PART_INDEX) { + r_index = ldm_relative(buffer, buflen, 0x34, r_diskid); + if (r_index < 0) { + ldm_error("r_index %d < 0", r_index); + return false; + } + len = r_index; + } else { + r_index = 0; + len = r_diskid; + } + if (len < 0) { + ldm_error("len %d < 0", len); + return false; + } + len += VBLK_SIZE_PRT3; + if (len > get_unaligned_be32(buffer + 0x14)) { + ldm_error("len %d > BE32(buffer + 0x14) %d", len, + get_unaligned_be32(buffer + 0x14)); + return false; + } + part = &vb->vblk.part; + part->start = get_unaligned_be64(buffer + 0x24 + r_name); + part->volume_offset = get_unaligned_be64(buffer + 0x2C + r_name); + part->size = ldm_get_vnum(buffer + 0x34 + r_name); + part->parent_id = ldm_get_vnum(buffer + 0x34 + r_size); + part->disk_id = ldm_get_vnum(buffer + 0x34 + r_parent); + if (vb->flags & VBLK_FLAG_PART_INDEX) + part->partnum = buffer[0x35 + r_diskid]; + else + part->partnum = 0; + return true; +} + +/** + * ldm_parse_vol5 - Read a raw VBLK Volume object into a vblk structure + * @buffer: Block of data being worked on + * @buflen: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK Volume object (version 5) into a vblk structure. + * + * Return: 'true' @vb contains a Volume VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_vol5(const u8 *buffer, int buflen, struct vblk *vb) +{ + int r_objid, r_name, r_vtype, r_disable_drive_letter, r_child, r_size; + int r_id1, r_id2, r_size2, r_drive, len; + struct vblk_volu *volu; + + BUG_ON(!buffer || !vb); + r_objid = ldm_relative(buffer, buflen, 0x18, 0); + if (r_objid < 0) { + ldm_error("r_objid %d < 0", r_objid); + return false; + } + r_name = ldm_relative(buffer, buflen, 0x18, r_objid); + if (r_name < 0) { + ldm_error("r_name %d < 0", r_name); + return false; + } + r_vtype = ldm_relative(buffer, buflen, 0x18, r_name); + if (r_vtype < 0) { + ldm_error("r_vtype %d < 0", r_vtype); + return false; + } + r_disable_drive_letter = ldm_relative(buffer, buflen, 0x18, r_vtype); + if (r_disable_drive_letter < 0) { + ldm_error("r_disable_drive_letter %d < 0", + r_disable_drive_letter); + return false; + } + r_child = ldm_relative(buffer, buflen, 0x2D, r_disable_drive_letter); + if (r_child < 0) { + ldm_error("r_child %d < 0", r_child); + return false; + } + r_size = ldm_relative(buffer, buflen, 0x3D, r_child); + if (r_size < 0) { + ldm_error("r_size %d < 0", r_size); + return false; + } + if (buffer[0x12] & VBLK_FLAG_VOLU_ID1) { + r_id1 = ldm_relative(buffer, buflen, 0x52, r_size); + if (r_id1 < 0) { + ldm_error("r_id1 %d < 0", r_id1); + return false; + } + } else + r_id1 = r_size; + if (buffer[0x12] & VBLK_FLAG_VOLU_ID2) { + r_id2 = ldm_relative(buffer, buflen, 0x52, r_id1); + if (r_id2 < 0) { + ldm_error("r_id2 %d < 0", r_id2); + return false; + } + } else + r_id2 = r_id1; + if (buffer[0x12] & VBLK_FLAG_VOLU_SIZE) { + r_size2 = ldm_relative(buffer, buflen, 0x52, r_id2); + if (r_size2 < 0) { + ldm_error("r_size2 %d < 0", r_size2); + return false; + } + } else + r_size2 = r_id2; + if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { + r_drive = ldm_relative(buffer, buflen, 0x52, r_size2); + if (r_drive < 0) { + ldm_error("r_drive %d < 0", r_drive); + return false; + } + } else + r_drive = r_size2; + len = r_drive; + if (len < 0) { + ldm_error("len %d < 0", len); + return false; + } + len += VBLK_SIZE_VOL5; + if (len > get_unaligned_be32(buffer + 0x14)) { + ldm_error("len %d > BE32(buffer + 0x14) %d", len, + get_unaligned_be32(buffer + 0x14)); + return false; + } + volu = &vb->vblk.volu; + ldm_get_vstr(buffer + 0x18 + r_name, volu->volume_type, + sizeof(volu->volume_type)); + memcpy(volu->volume_state, buffer + 0x18 + r_disable_drive_letter, + sizeof(volu->volume_state)); + volu->size = ldm_get_vnum(buffer + 0x3D + r_child); + volu->partition_type = buffer[0x41 + r_size]; + memcpy(volu->guid, buffer + 0x42 + r_size, sizeof(volu->guid)); + if (buffer[0x12] & VBLK_FLAG_VOLU_DRIVE) { + ldm_get_vstr(buffer + 0x52 + r_size, volu->drive_hint, + sizeof(volu->drive_hint)); + } + return true; +} + +/** + * ldm_parse_vblk - Read a raw VBLK object into a vblk structure + * @buf: Block of data being worked on + * @len: Size of the block of data + * @vb: In-memory vblk in which to return information + * + * Read a raw VBLK object into a vblk structure. This function just reads the + * information common to all VBLK types, then delegates the rest of the work to + * helper functions: ldm_parse_*. + * + * Return: 'true' @vb contains a VBLK + * 'false' @vb contents are not defined + */ +static bool ldm_parse_vblk (const u8 *buf, int len, struct vblk *vb) +{ + bool result = false; + int r_objid; + + BUG_ON (!buf || !vb); + + r_objid = ldm_relative (buf, len, 0x18, 0); + if (r_objid < 0) { + ldm_error ("VBLK header is corrupt."); + return false; + } + + vb->flags = buf[0x12]; + vb->type = buf[0x13]; + vb->obj_id = ldm_get_vnum (buf + 0x18); + ldm_get_vstr (buf+0x18+r_objid, vb->name, sizeof (vb->name)); + + switch (vb->type) { + case VBLK_CMP3: result = ldm_parse_cmp3 (buf, len, vb); break; + case VBLK_DSK3: result = ldm_parse_dsk3 (buf, len, vb); break; + case VBLK_DSK4: result = ldm_parse_dsk4 (buf, len, vb); break; + case VBLK_DGR3: result = ldm_parse_dgr3 (buf, len, vb); break; + case VBLK_DGR4: result = ldm_parse_dgr4 (buf, len, vb); break; + case VBLK_PRT3: result = ldm_parse_prt3 (buf, len, vb); break; + case VBLK_VOL5: result = ldm_parse_vol5 (buf, len, vb); break; + } + + if (result) + ldm_debug ("Parsed VBLK 0x%llx (type: 0x%02x) ok.", + (unsigned long long) vb->obj_id, vb->type); + else + ldm_error ("Failed to parse VBLK 0x%llx (type: 0x%02x).", + (unsigned long long) vb->obj_id, vb->type); + + return result; +} + + +/** + * ldm_ldmdb_add - Adds a raw VBLK entry to the ldmdb database + * @data: Raw VBLK to add to the database + * @len: Size of the raw VBLK + * @ldb: Cache of the database structures + * + * The VBLKs are sorted into categories. Partitions are also sorted by offset. + * + * N.B. This function does not check the validity of the VBLKs. + * + * Return: 'true' The VBLK was added + * 'false' An error occurred + */ +static bool ldm_ldmdb_add (u8 *data, int len, struct ldmdb *ldb) +{ + struct vblk *vb; + struct list_head *item; + + BUG_ON (!data || !ldb); + + vb = kmalloc (sizeof (*vb), GFP_KERNEL); + if (!vb) { + ldm_crit ("Out of memory."); + return false; + } + + if (!ldm_parse_vblk (data, len, vb)) { + kfree(vb); + return false; /* Already logged */ + } + + /* Put vblk into the correct list. */ + switch (vb->type) { + case VBLK_DGR3: + case VBLK_DGR4: + list_add (&vb->list, &ldb->v_dgrp); + break; + case VBLK_DSK3: + case VBLK_DSK4: + list_add (&vb->list, &ldb->v_disk); + break; + case VBLK_VOL5: + list_add (&vb->list, &ldb->v_volu); + break; + case VBLK_CMP3: + list_add (&vb->list, &ldb->v_comp); + break; + case VBLK_PRT3: + /* Sort by the partition's start sector. */ + list_for_each (item, &ldb->v_part) { + struct vblk *v = list_entry (item, struct vblk, list); + if ((v->vblk.part.disk_id == vb->vblk.part.disk_id) && + (v->vblk.part.start > vb->vblk.part.start)) { + list_add_tail (&vb->list, &v->list); + return true; + } + } + list_add_tail (&vb->list, &ldb->v_part); + break; + } + return true; +} + +/** + * ldm_frag_add - Add a VBLK fragment to a list + * @data: Raw fragment to be added to the list + * @size: Size of the raw fragment + * @frags: Linked list of VBLK fragments + * + * Fragmented VBLKs may not be consecutive in the database, so they are placed + * in a list so they can be pieced together later. + * + * Return: 'true' Success, the VBLK was added to the list + * 'false' Error, a problem occurred + */ +static bool ldm_frag_add (const u8 *data, int size, struct list_head *frags) +{ + struct frag *f; + struct list_head *item; + int rec, num, group; + + BUG_ON (!data || !frags); + + if (size < 2 * VBLK_SIZE_HEAD) { + ldm_error("Value of size is to small."); + return false; + } + + group = get_unaligned_be32(data + 0x08); + rec = get_unaligned_be16(data + 0x0C); + num = get_unaligned_be16(data + 0x0E); + if ((num < 1) || (num > 4)) { + ldm_error ("A VBLK claims to have %d parts.", num); + return false; + } + if (rec >= num) { + ldm_error("REC value (%d) exceeds NUM value (%d)", rec, num); + return false; + } + + list_for_each (item, frags) { + f = list_entry (item, struct frag, list); + if (f->group == group) + goto found; + } + + f = kmalloc (sizeof (*f) + size*num, GFP_KERNEL); + if (!f) { + ldm_crit ("Out of memory."); + return false; + } + + f->group = group; + f->num = num; + f->rec = rec; + f->map = 0xFF << num; + + list_add_tail (&f->list, frags); +found: + if (rec >= f->num) { + ldm_error("REC value (%d) exceeds NUM value (%d)", rec, f->num); + return false; + } + + if (f->map & (1 << rec)) { + ldm_error ("Duplicate VBLK, part %d.", rec); + f->map &= 0x7F; /* Mark the group as broken */ + return false; + } + + f->map |= (1 << rec); + + data += VBLK_SIZE_HEAD; + size -= VBLK_SIZE_HEAD; + + memcpy (f->data+rec*(size-VBLK_SIZE_HEAD)+VBLK_SIZE_HEAD, data, size); + + return true; +} + +/** + * ldm_frag_free - Free a linked list of VBLK fragments + * @list: Linked list of fragments + * + * Free a linked list of VBLK fragments + * + * Return: none + */ +static void ldm_frag_free (struct list_head *list) +{ + struct list_head *item, *tmp; + + BUG_ON (!list); + + list_for_each_safe (item, tmp, list) + kfree (list_entry (item, struct frag, list)); +} + +/** + * ldm_frag_commit - Validate fragmented VBLKs and add them to the database + * @frags: Linked list of VBLK fragments + * @ldb: Cache of the database structures + * + * Now that all the fragmented VBLKs have been collected, they must be added to + * the database for later use. + * + * Return: 'true' All the fragments we added successfully + * 'false' One or more of the fragments we invalid + */ +static bool ldm_frag_commit (struct list_head *frags, struct ldmdb *ldb) +{ + struct frag *f; + struct list_head *item; + + BUG_ON (!frags || !ldb); + + list_for_each (item, frags) { + f = list_entry (item, struct frag, list); + + if (f->map != 0xFF) { + ldm_error ("VBLK group %d is incomplete (0x%02x).", + f->group, f->map); + return false; + } + + if (!ldm_ldmdb_add (f->data, f->num*ldb->vm.vblk_size, ldb)) + return false; /* Already logged */ + } + return true; +} + +/** + * ldm_get_vblks - Read the on-disk database of VBLKs into memory + * @state: Partition check state including device holding the LDM Database + * @base: Offset, into @state->bdev, of the database + * @ldb: Cache of the database structures + * + * To use the information from the VBLKs, they need to be read from the disk, + * unpacked and validated. We cache them in @ldb according to their type. + * + * Return: 'true' All the VBLKs were read successfully + * 'false' An error occurred + */ +static bool ldm_get_vblks(struct parsed_partitions *state, unsigned long base, + struct ldmdb *ldb) +{ + int size, perbuf, skip, finish, s, v, recs; + u8 *data = NULL; + Sector sect; + bool result = false; + LIST_HEAD (frags); + + BUG_ON(!state || !ldb); + + size = ldb->vm.vblk_size; + perbuf = 512 / size; + skip = ldb->vm.vblk_offset >> 9; /* Bytes to sectors */ + finish = (size * ldb->vm.last_vblk_seq) >> 9; + + for (s = skip; s < finish; s++) { /* For each sector */ + data = read_part_sector(state, base + OFF_VMDB + s, §); + if (!data) { + ldm_crit ("Disk read failed."); + goto out; + } + + for (v = 0; v < perbuf; v++, data+=size) { /* For each vblk */ + if (MAGIC_VBLK != get_unaligned_be32(data)) { + ldm_error ("Expected to find a VBLK."); + goto out; + } + + recs = get_unaligned_be16(data + 0x0E); /* Number of records */ + if (recs == 1) { + if (!ldm_ldmdb_add (data, size, ldb)) + goto out; /* Already logged */ + } else if (recs > 1) { + if (!ldm_frag_add (data, size, &frags)) + goto out; /* Already logged */ + } + /* else Record is not in use, ignore it. */ + } + put_dev_sector (sect); + data = NULL; + } + + result = ldm_frag_commit (&frags, ldb); /* Failures, already logged */ +out: + if (data) + put_dev_sector (sect); + ldm_frag_free (&frags); + + return result; +} + +/** + * ldm_free_vblks - Free a linked list of vblk's + * @lh: Head of a linked list of struct vblk + * + * Free a list of vblk's and free the memory used to maintain the list. + * + * Return: none + */ +static void ldm_free_vblks (struct list_head *lh) +{ + struct list_head *item, *tmp; + + BUG_ON (!lh); + + list_for_each_safe (item, tmp, lh) + kfree (list_entry (item, struct vblk, list)); +} + + +/** + * ldm_partition - Find out whether a device is a dynamic disk and handle it + * @state: Partition check state including device holding the LDM Database + * + * This determines whether the device @bdev is a dynamic disk and if so creates + * the partitions necessary in the gendisk structure pointed to by @hd. + * + * We create a dummy device 1, which contains the LDM database, and then create + * each partition described by the LDM database in sequence as devices 2+. For + * example, if the device is hda, we would have: hda1: LDM database, hda2, hda3, + * and so on: the actual data containing partitions. + * + * Return: 1 Success, @state->bdev is a dynamic disk and we handled it + * 0 Success, @state->bdev is not a dynamic disk + * -1 An error occurred before enough information had been read + * Or @state->bdev is a dynamic disk, but it may be corrupted + */ +int ldm_partition(struct parsed_partitions *state) +{ + struct ldmdb *ldb; + unsigned long base; + int result = -1; + + BUG_ON(!state); + + /* Look for signs of a Dynamic Disk */ + if (!ldm_validate_partition_table(state)) + return 0; + + ldb = kmalloc (sizeof (*ldb), GFP_KERNEL); + if (!ldb) { + ldm_crit ("Out of memory."); + goto out; + } + + /* Parse and check privheads. */ + if (!ldm_validate_privheads(state, &ldb->ph)) + goto out; /* Already logged */ + + /* All further references are relative to base (database start). */ + base = ldb->ph.config_start; + + /* Parse and check tocs and vmdb. */ + if (!ldm_validate_tocblocks(state, base, ldb) || + !ldm_validate_vmdb(state, base, ldb)) + goto out; /* Already logged */ + + /* Initialize vblk lists in ldmdb struct */ + INIT_LIST_HEAD (&ldb->v_dgrp); + INIT_LIST_HEAD (&ldb->v_disk); + INIT_LIST_HEAD (&ldb->v_volu); + INIT_LIST_HEAD (&ldb->v_comp); + INIT_LIST_HEAD (&ldb->v_part); + + if (!ldm_get_vblks(state, base, ldb)) { + ldm_crit ("Failed to read the VBLKs from the database."); + goto cleanup; + } + + /* Finally, create the data partition devices. */ + if (ldm_create_data_partitions(state, ldb)) { + ldm_debug ("Parsed LDM database successfully."); + result = 1; + } + /* else Already logged */ + +cleanup: + ldm_free_vblks (&ldb->v_dgrp); + ldm_free_vblks (&ldb->v_disk); + ldm_free_vblks (&ldb->v_volu); + ldm_free_vblks (&ldb->v_comp); + ldm_free_vblks (&ldb->v_part); +out: + kfree (ldb); + return result; +} diff --git a/block/partitions/ldm.h b/block/partitions/ldm.h new file mode 100644 index 000000000000..374242c0971a --- /dev/null +++ b/block/partitions/ldm.h @@ -0,0 +1,215 @@ +/** + * ldm - Part of the Linux-NTFS project. + * + * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> + * Copyright (c) 2001-2007 Anton Altaparmakov + * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> + * + * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program (in the main directory of the Linux-NTFS source + * in the file COPYING); if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _FS_PT_LDM_H_ +#define _FS_PT_LDM_H_ + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/genhd.h> +#include <linux/fs.h> +#include <asm/unaligned.h> +#include <asm/byteorder.h> + +struct parsed_partitions; + +/* Magic numbers in CPU format. */ +#define MAGIC_VMDB 0x564D4442 /* VMDB */ +#define MAGIC_VBLK 0x56424C4B /* VBLK */ +#define MAGIC_PRIVHEAD 0x5052495648454144ULL /* PRIVHEAD */ +#define MAGIC_TOCBLOCK 0x544F43424C4F434BULL /* TOCBLOCK */ + +/* The defined vblk types. */ +#define VBLK_VOL5 0x51 /* Volume, version 5 */ +#define VBLK_CMP3 0x32 /* Component, version 3 */ +#define VBLK_PRT3 0x33 /* Partition, version 3 */ +#define VBLK_DSK3 0x34 /* Disk, version 3 */ +#define VBLK_DSK4 0x44 /* Disk, version 4 */ +#define VBLK_DGR3 0x35 /* Disk Group, version 3 */ +#define VBLK_DGR4 0x45 /* Disk Group, version 4 */ + +/* vblk flags indicating extra information will be present */ +#define VBLK_FLAG_COMP_STRIPE 0x10 +#define VBLK_FLAG_PART_INDEX 0x08 +#define VBLK_FLAG_DGR3_IDS 0x08 +#define VBLK_FLAG_DGR4_IDS 0x08 +#define VBLK_FLAG_VOLU_ID1 0x08 +#define VBLK_FLAG_VOLU_ID2 0x20 +#define VBLK_FLAG_VOLU_SIZE 0x80 +#define VBLK_FLAG_VOLU_DRIVE 0x02 + +/* size of a vblk's static parts */ +#define VBLK_SIZE_HEAD 16 +#define VBLK_SIZE_CMP3 22 /* Name and version */ +#define VBLK_SIZE_DGR3 12 +#define VBLK_SIZE_DGR4 44 +#define VBLK_SIZE_DSK3 12 +#define VBLK_SIZE_DSK4 45 +#define VBLK_SIZE_PRT3 28 +#define VBLK_SIZE_VOL5 58 + +/* component types */ +#define COMP_STRIPE 0x01 /* Stripe-set */ +#define COMP_BASIC 0x02 /* Basic disk */ +#define COMP_RAID 0x03 /* Raid-set */ + +/* Other constants. */ +#define LDM_DB_SIZE 2048 /* Size in sectors (= 1MiB). */ + +#define OFF_PRIV1 6 /* Offset of the first privhead + relative to the start of the + device in sectors */ + +/* Offsets to structures within the LDM Database in sectors. */ +#define OFF_PRIV2 1856 /* Backup private headers. */ +#define OFF_PRIV3 2047 + +#define OFF_TOCB1 1 /* Tables of contents. */ +#define OFF_TOCB2 2 +#define OFF_TOCB3 2045 +#define OFF_TOCB4 2046 + +#define OFF_VMDB 17 /* List of partitions. */ + +#define LDM_PARTITION 0x42 /* Formerly SFS (Landis). */ + +#define TOC_BITMAP1 "config" /* Names of the two defined */ +#define TOC_BITMAP2 "log" /* bitmaps in the TOCBLOCK. */ + +/* Borrowed from msdos.c */ +#define SYS_IND(p) (get_unaligned(&(p)->sys_ind)) + +struct frag { /* VBLK Fragment handling */ + struct list_head list; + u32 group; + u8 num; /* Total number of records */ + u8 rec; /* This is record number n */ + u8 map; /* Which portions are in use */ + u8 data[0]; +}; + +/* In memory LDM database structures. */ + +#define GUID_SIZE 16 + +struct privhead { /* Offsets and sizes are in sectors. */ + u16 ver_major; + u16 ver_minor; + u64 logical_disk_start; + u64 logical_disk_size; + u64 config_start; + u64 config_size; + u8 disk_id[GUID_SIZE]; +}; + +struct tocblock { /* We have exactly two bitmaps. */ + u8 bitmap1_name[16]; + u64 bitmap1_start; + u64 bitmap1_size; + u8 bitmap2_name[16]; + u64 bitmap2_start; + u64 bitmap2_size; +}; + +struct vmdb { /* VMDB: The database header */ + u16 ver_major; + u16 ver_minor; + u32 vblk_size; + u32 vblk_offset; + u32 last_vblk_seq; +}; + +struct vblk_comp { /* VBLK Component */ + u8 state[16]; + u64 parent_id; + u8 type; + u8 children; + u16 chunksize; +}; + +struct vblk_dgrp { /* VBLK Disk Group */ + u8 disk_id[64]; +}; + +struct vblk_disk { /* VBLK Disk */ + u8 disk_id[GUID_SIZE]; + u8 alt_name[128]; +}; + +struct vblk_part { /* VBLK Partition */ + u64 start; + u64 size; /* start, size and vol_off in sectors */ + u64 volume_offset; + u64 parent_id; + u64 disk_id; + u8 partnum; +}; + +struct vblk_volu { /* VBLK Volume */ + u8 volume_type[16]; + u8 volume_state[16]; + u8 guid[16]; + u8 drive_hint[4]; + u64 size; + u8 partition_type; +}; + +struct vblk_head { /* VBLK standard header */ + u32 group; + u16 rec; + u16 nrec; +}; + +struct vblk { /* Generalised VBLK */ + u8 name[64]; + u64 obj_id; + u32 sequence; + u8 flags; + u8 type; + union { + struct vblk_comp comp; + struct vblk_dgrp dgrp; + struct vblk_disk disk; + struct vblk_part part; + struct vblk_volu volu; + } vblk; + struct list_head list; +}; + +struct ldmdb { /* Cache of the database */ + struct privhead ph; + struct tocblock toc; + struct vmdb vm; + struct list_head v_dgrp; + struct list_head v_disk; + struct list_head v_volu; + struct list_head v_comp; + struct list_head v_part; +}; + +int ldm_partition(struct parsed_partitions *state); + +#endif /* _FS_PT_LDM_H_ */ + diff --git a/block/partitions/mac.c b/block/partitions/mac.c new file mode 100644 index 000000000000..11f688bd76c5 --- /dev/null +++ b/block/partitions/mac.c @@ -0,0 +1,134 @@ +/* + * fs/partitions/mac.c + * + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + */ + +#include <linux/ctype.h> +#include "check.h" +#include "mac.h" + +#ifdef CONFIG_PPC_PMAC +#include <asm/machdep.h> +extern void note_bootable_part(dev_t dev, int part, int goodness); +#endif + +/* + * Code to understand MacOS partition tables. + */ + +static inline void mac_fix_string(char *stg, int len) +{ + int i; + + for (i = len - 1; i >= 0 && stg[i] == ' '; i--) + stg[i] = 0; +} + +int mac_partition(struct parsed_partitions *state) +{ + Sector sect; + unsigned char *data; + int slot, blocks_in_map; + unsigned secsize; +#ifdef CONFIG_PPC_PMAC + int found_root = 0; + int found_root_goodness = 0; +#endif + struct mac_partition *part; + struct mac_driver_desc *md; + + /* Get 0th block and look at the first partition map entry. */ + md = read_part_sector(state, 0, §); + if (!md) + return -1; + if (be16_to_cpu(md->signature) != MAC_DRIVER_MAGIC) { + put_dev_sector(sect); + return 0; + } + secsize = be16_to_cpu(md->block_size); + put_dev_sector(sect); + data = read_part_sector(state, secsize/512, §); + if (!data) + return -1; + part = (struct mac_partition *) (data + secsize%512); + if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) { + put_dev_sector(sect); + return 0; /* not a MacOS disk */ + } + blocks_in_map = be32_to_cpu(part->map_count); + if (blocks_in_map < 0 || blocks_in_map >= DISK_MAX_PARTS) { + put_dev_sector(sect); + return 0; + } + strlcat(state->pp_buf, " [mac]", PAGE_SIZE); + for (slot = 1; slot <= blocks_in_map; ++slot) { + int pos = slot * secsize; + put_dev_sector(sect); + data = read_part_sector(state, pos/512, §); + if (!data) + return -1; + part = (struct mac_partition *) (data + pos%512); + if (be16_to_cpu(part->signature) != MAC_PARTITION_MAGIC) + break; + put_partition(state, slot, + be32_to_cpu(part->start_block) * (secsize/512), + be32_to_cpu(part->block_count) * (secsize/512)); + + if (!strnicmp(part->type, "Linux_RAID", 10)) + state->parts[slot].flags = ADDPART_FLAG_RAID; +#ifdef CONFIG_PPC_PMAC + /* + * If this is the first bootable partition, tell the + * setup code, in case it wants to make this the root. + */ + if (machine_is(powermac)) { + int goodness = 0; + + mac_fix_string(part->processor, 16); + mac_fix_string(part->name, 32); + mac_fix_string(part->type, 32); + + if ((be32_to_cpu(part->status) & MAC_STATUS_BOOTABLE) + && strcasecmp(part->processor, "powerpc") == 0) + goodness++; + + if (strcasecmp(part->type, "Apple_UNIX_SVR2") == 0 + || (strnicmp(part->type, "Linux", 5) == 0 + && strcasecmp(part->type, "Linux_swap") != 0)) { + int i, l; + + goodness++; + l = strlen(part->name); + if (strcmp(part->name, "/") == 0) + goodness++; + for (i = 0; i <= l - 4; ++i) { + if (strnicmp(part->name + i, "root", + 4) == 0) { + goodness += 2; + break; + } + } + if (strnicmp(part->name, "swap", 4) == 0) + goodness--; + } + + if (goodness > found_root_goodness) { + found_root = slot; + found_root_goodness = goodness; + } + } +#endif /* CONFIG_PPC_PMAC */ + } +#ifdef CONFIG_PPC_PMAC + if (found_root_goodness) + note_bootable_part(state->bdev->bd_dev, found_root, + found_root_goodness); +#endif + + put_dev_sector(sect); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; +} diff --git a/block/partitions/mac.h b/block/partitions/mac.h new file mode 100644 index 000000000000..3c7d98436380 --- /dev/null +++ b/block/partitions/mac.h @@ -0,0 +1,44 @@ +/* + * fs/partitions/mac.h + */ + +#define MAC_PARTITION_MAGIC 0x504d + +/* type field value for A/UX or other Unix partitions */ +#define APPLE_AUX_TYPE "Apple_UNIX_SVR2" + +struct mac_partition { + __be16 signature; /* expected to be MAC_PARTITION_MAGIC */ + __be16 res1; + __be32 map_count; /* # blocks in partition map */ + __be32 start_block; /* absolute starting block # of partition */ + __be32 block_count; /* number of blocks in partition */ + char name[32]; /* partition name */ + char type[32]; /* string type description */ + __be32 data_start; /* rel block # of first data block */ + __be32 data_count; /* number of data blocks */ + __be32 status; /* partition status bits */ + __be32 boot_start; + __be32 boot_size; + __be32 boot_load; + __be32 boot_load2; + __be32 boot_entry; + __be32 boot_entry2; + __be32 boot_cksum; + char processor[16]; /* identifies ISA of boot */ + /* there is more stuff after this that we don't need */ +}; + +#define MAC_STATUS_BOOTABLE 8 /* partition is bootable */ + +#define MAC_DRIVER_MAGIC 0x4552 + +/* Driver descriptor structure, in block 0 */ +struct mac_driver_desc { + __be16 signature; /* expected to be MAC_DRIVER_MAGIC */ + __be16 block_size; + __be32 block_count; + /* ... more stuff */ +}; + +int mac_partition(struct parsed_partitions *state); diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c new file mode 100644 index 000000000000..5f79a6677c69 --- /dev/null +++ b/block/partitions/msdos.c @@ -0,0 +1,552 @@ +/* + * fs/partitions/msdos.c + * + * Code extracted from drivers/block/genhd.c + * Copyright (C) 1991-1998 Linus Torvalds + * + * Thanks to Branko Lankester, lankeste@fwi.uva.nl, who found a bug + * in the early extended-partition checks and added DM partitions + * + * Support for DiskManager v6.0x added by Mark Lord, + * with information provided by OnTrack. This now works for linux fdisk + * and LILO, as well as loadlin and bootln. Note that disks other than + * /dev/hda *must* have a "DOS" type 0x51 partition in the first slot (hda1). + * + * More flexible handling of extended partitions - aeb, 950831 + * + * Check partition table on IDE disks for common CHS translations + * + * Re-organised Feb 1998 Russell King + */ +#include <linux/msdos_fs.h> + +#include "check.h" +#include "msdos.h" +#include "efi.h" + +/* + * Many architectures don't like unaligned accesses, while + * the nr_sects and start_sect partition table entries are + * at a 2 (mod 4) address. + */ +#include <asm/unaligned.h> + +#define SYS_IND(p) get_unaligned(&p->sys_ind) + +static inline sector_t nr_sects(struct partition *p) +{ + return (sector_t)get_unaligned_le32(&p->nr_sects); +} + +static inline sector_t start_sect(struct partition *p) +{ + return (sector_t)get_unaligned_le32(&p->start_sect); +} + +static inline int is_extended_partition(struct partition *p) +{ + return (SYS_IND(p) == DOS_EXTENDED_PARTITION || + SYS_IND(p) == WIN98_EXTENDED_PARTITION || + SYS_IND(p) == LINUX_EXTENDED_PARTITION); +} + +#define MSDOS_LABEL_MAGIC1 0x55 +#define MSDOS_LABEL_MAGIC2 0xAA + +static inline int +msdos_magic_present(unsigned char *p) +{ + return (p[0] == MSDOS_LABEL_MAGIC1 && p[1] == MSDOS_LABEL_MAGIC2); +} + +/* Value is EBCDIC 'IBMA' */ +#define AIX_LABEL_MAGIC1 0xC9 +#define AIX_LABEL_MAGIC2 0xC2 +#define AIX_LABEL_MAGIC3 0xD4 +#define AIX_LABEL_MAGIC4 0xC1 +static int aix_magic_present(struct parsed_partitions *state, unsigned char *p) +{ + struct partition *pt = (struct partition *) (p + 0x1be); + Sector sect; + unsigned char *d; + int slot, ret = 0; + + if (!(p[0] == AIX_LABEL_MAGIC1 && + p[1] == AIX_LABEL_MAGIC2 && + p[2] == AIX_LABEL_MAGIC3 && + p[3] == AIX_LABEL_MAGIC4)) + return 0; + /* Assume the partition table is valid if Linux partitions exists */ + for (slot = 1; slot <= 4; slot++, pt++) { + if (pt->sys_ind == LINUX_SWAP_PARTITION || + pt->sys_ind == LINUX_RAID_PARTITION || + pt->sys_ind == LINUX_DATA_PARTITION || + pt->sys_ind == LINUX_LVM_PARTITION || + is_extended_partition(pt)) + return 0; + } + d = read_part_sector(state, 7, §); + if (d) { + if (d[0] == '_' && d[1] == 'L' && d[2] == 'V' && d[3] == 'M') + ret = 1; + put_dev_sector(sect); + }; + return ret; +} + +/* + * Create devices for each logical partition in an extended partition. + * The logical partitions form a linked list, with each entry being + * a partition table with two entries. The first entry + * is the real data partition (with a start relative to the partition + * table start). The second is a pointer to the next logical partition + * (with a start relative to the entire extended partition). + * We do not create a Linux partition for the partition tables, but + * only for the actual data partitions. + */ + +static void parse_extended(struct parsed_partitions *state, + sector_t first_sector, sector_t first_size) +{ + struct partition *p; + Sector sect; + unsigned char *data; + sector_t this_sector, this_size; + sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; + int loopct = 0; /* number of links followed + without finding a data partition */ + int i; + + this_sector = first_sector; + this_size = first_size; + + while (1) { + if (++loopct > 100) + return; + if (state->next == state->limit) + return; + data = read_part_sector(state, this_sector, §); + if (!data) + return; + + if (!msdos_magic_present(data + 510)) + goto done; + + p = (struct partition *) (data + 0x1be); + + /* + * Usually, the first entry is the real data partition, + * the 2nd entry is the next extended partition, or empty, + * and the 3rd and 4th entries are unused. + * However, DRDOS sometimes has the extended partition as + * the first entry (when the data partition is empty), + * and OS/2 seems to use all four entries. + */ + + /* + * First process the data partition(s) + */ + for (i=0; i<4; i++, p++) { + sector_t offs, size, next; + if (!nr_sects(p) || is_extended_partition(p)) + continue; + + /* Check the 3rd and 4th entries - + these sometimes contain random garbage */ + offs = start_sect(p)*sector_size; + size = nr_sects(p)*sector_size; + next = this_sector + offs; + if (i >= 2) { + if (offs + size > this_size) + continue; + if (next < first_sector) + continue; + if (next + size > first_sector + first_size) + continue; + } + + put_partition(state, state->next, next, size); + if (SYS_IND(p) == LINUX_RAID_PARTITION) + state->parts[state->next].flags = ADDPART_FLAG_RAID; + loopct = 0; + if (++state->next == state->limit) + goto done; + } + /* + * Next, process the (first) extended partition, if present. + * (So far, there seems to be no reason to make + * parse_extended() recursive and allow a tree + * of extended partitions.) + * It should be a link to the next logical partition. + */ + p -= 4; + for (i=0; i<4; i++, p++) + if (nr_sects(p) && is_extended_partition(p)) + break; + if (i == 4) + goto done; /* nothing left to do */ + + this_sector = first_sector + start_sect(p) * sector_size; + this_size = nr_sects(p) * sector_size; + put_dev_sector(sect); + } +done: + put_dev_sector(sect); +} + +/* james@bpgc.com: Solaris has a nasty indicator: 0x82 which also + indicates linux swap. Be careful before believing this is Solaris. */ + +static void parse_solaris_x86(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_SOLARIS_X86_PARTITION + Sector sect; + struct solaris_x86_vtoc *v; + int i; + short max_nparts; + + v = read_part_sector(state, offset + 1, §); + if (!v) + return; + if (le32_to_cpu(v->v_sanity) != SOLARIS_X86_VTOC_SANE) { + put_dev_sector(sect); + return; + } + { + char tmp[1 + BDEVNAME_SIZE + 10 + 11 + 1]; + + snprintf(tmp, sizeof(tmp), " %s%d: <solaris:", state->name, origin); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + if (le32_to_cpu(v->v_version) != 1) { + char tmp[64]; + + snprintf(tmp, sizeof(tmp), " cannot handle version %d vtoc>\n", + le32_to_cpu(v->v_version)); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + put_dev_sector(sect); + return; + } + /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ + max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; + for (i=0; i<max_nparts && state->next<state->limit; i++) { + struct solaris_x86_slice *s = &v->v_slice[i]; + char tmp[3 + 10 + 1 + 1]; + + if (s->s_size == 0) + continue; + snprintf(tmp, sizeof(tmp), " [s%d]", i); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + /* solaris partitions are relative to current MS-DOS + * one; must add the offset of the current partition */ + put_partition(state, state->next++, + le32_to_cpu(s->s_start)+offset, + le32_to_cpu(s->s_size)); + } + put_dev_sector(sect); + strlcat(state->pp_buf, " >\n", PAGE_SIZE); +#endif +} + +#if defined(CONFIG_BSD_DISKLABEL) +/* + * Create devices for BSD partitions listed in a disklabel, under a + * dos-like partition. See parse_extended() for more information. + */ +static void parse_bsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin, char *flavour, + int max_partitions) +{ + Sector sect; + struct bsd_disklabel *l; + struct bsd_partition *p; + char tmp[64]; + + l = read_part_sector(state, offset + 1, §); + if (!l) + return; + if (le32_to_cpu(l->d_magic) != BSD_DISKMAGIC) { + put_dev_sector(sect); + return; + } + + snprintf(tmp, sizeof(tmp), " %s%d: <%s:", state->name, origin, flavour); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + + if (le16_to_cpu(l->d_npartitions) < max_partitions) + max_partitions = le16_to_cpu(l->d_npartitions); + for (p = l->d_partitions; p - l->d_partitions < max_partitions; p++) { + sector_t bsd_start, bsd_size; + + if (state->next == state->limit) + break; + if (p->p_fstype == BSD_FS_UNUSED) + continue; + bsd_start = le32_to_cpu(p->p_offset); + bsd_size = le32_to_cpu(p->p_size); + if (offset == bsd_start && size == bsd_size) + /* full parent partition, we have it already */ + continue; + if (offset > bsd_start || offset+size < bsd_start+bsd_size) { + strlcat(state->pp_buf, "bad subpartition - ignored\n", PAGE_SIZE); + continue; + } + put_partition(state, state->next++, bsd_start, bsd_size); + } + put_dev_sector(sect); + if (le16_to_cpu(l->d_npartitions) > max_partitions) { + snprintf(tmp, sizeof(tmp), " (ignored %d more)", + le16_to_cpu(l->d_npartitions) - max_partitions); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + strlcat(state->pp_buf, " >\n", PAGE_SIZE); +} +#endif + +static void parse_freebsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_BSD_DISKLABEL + parse_bsd(state, offset, size, origin, "bsd", BSD_MAXPARTITIONS); +#endif +} + +static void parse_netbsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_BSD_DISKLABEL + parse_bsd(state, offset, size, origin, "netbsd", BSD_MAXPARTITIONS); +#endif +} + +static void parse_openbsd(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_BSD_DISKLABEL + parse_bsd(state, offset, size, origin, "openbsd", + OPENBSD_MAXPARTITIONS); +#endif +} + +/* + * Create devices for Unixware partitions listed in a disklabel, under a + * dos-like partition. See parse_extended() for more information. + */ +static void parse_unixware(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_UNIXWARE_DISKLABEL + Sector sect; + struct unixware_disklabel *l; + struct unixware_slice *p; + + l = read_part_sector(state, offset + 29, §); + if (!l) + return; + if (le32_to_cpu(l->d_magic) != UNIXWARE_DISKMAGIC || + le32_to_cpu(l->vtoc.v_magic) != UNIXWARE_DISKMAGIC2) { + put_dev_sector(sect); + return; + } + { + char tmp[1 + BDEVNAME_SIZE + 10 + 12 + 1]; + + snprintf(tmp, sizeof(tmp), " %s%d: <unixware:", state->name, origin); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + p = &l->vtoc.v_slice[1]; + /* I omit the 0th slice as it is the same as whole disk. */ + while (p - &l->vtoc.v_slice[0] < UNIXWARE_NUMSLICE) { + if (state->next == state->limit) + break; + + if (p->s_label != UNIXWARE_FS_UNUSED) + put_partition(state, state->next++, + le32_to_cpu(p->start_sect), + le32_to_cpu(p->nr_sects)); + p++; + } + put_dev_sector(sect); + strlcat(state->pp_buf, " >\n", PAGE_SIZE); +#endif +} + +/* + * Minix 2.0.0/2.0.2 subpartition support. + * Anand Krishnamurthy <anandk@wiproge.med.ge.com> + * Rajeev V. Pillai <rajeevvp@yahoo.com> + */ +static void parse_minix(struct parsed_partitions *state, + sector_t offset, sector_t size, int origin) +{ +#ifdef CONFIG_MINIX_SUBPARTITION + Sector sect; + unsigned char *data; + struct partition *p; + int i; + + data = read_part_sector(state, offset, §); + if (!data) + return; + + p = (struct partition *)(data + 0x1be); + + /* The first sector of a Minix partition can have either + * a secondary MBR describing its subpartitions, or + * the normal boot sector. */ + if (msdos_magic_present (data + 510) && + SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ + char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; + + snprintf(tmp, sizeof(tmp), " %s%d: <minix:", state->name, origin); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + for (i = 0; i < MINIX_NR_SUBPARTITIONS; i++, p++) { + if (state->next == state->limit) + break; + /* add each partition in use */ + if (SYS_IND(p) == MINIX_PARTITION) + put_partition(state, state->next++, + start_sect(p), nr_sects(p)); + } + strlcat(state->pp_buf, " >\n", PAGE_SIZE); + } + put_dev_sector(sect); +#endif /* CONFIG_MINIX_SUBPARTITION */ +} + +static struct { + unsigned char id; + void (*parse)(struct parsed_partitions *, sector_t, sector_t, int); +} subtypes[] = { + {FREEBSD_PARTITION, parse_freebsd}, + {NETBSD_PARTITION, parse_netbsd}, + {OPENBSD_PARTITION, parse_openbsd}, + {MINIX_PARTITION, parse_minix}, + {UNIXWARE_PARTITION, parse_unixware}, + {SOLARIS_X86_PARTITION, parse_solaris_x86}, + {NEW_SOLARIS_X86_PARTITION, parse_solaris_x86}, + {0, NULL}, +}; + +int msdos_partition(struct parsed_partitions *state) +{ + sector_t sector_size = bdev_logical_block_size(state->bdev) / 512; + Sector sect; + unsigned char *data; + struct partition *p; + struct fat_boot_sector *fb; + int slot; + + data = read_part_sector(state, 0, §); + if (!data) + return -1; + if (!msdos_magic_present(data + 510)) { + put_dev_sector(sect); + return 0; + } + + if (aix_magic_present(state, data)) { + put_dev_sector(sect); + strlcat(state->pp_buf, " [AIX]", PAGE_SIZE); + return 0; + } + + /* + * Now that the 55aa signature is present, this is probably + * either the boot sector of a FAT filesystem or a DOS-type + * partition table. Reject this in case the boot indicator + * is not 0 or 0x80. + */ + p = (struct partition *) (data + 0x1be); + for (slot = 1; slot <= 4; slot++, p++) { + if (p->boot_ind != 0 && p->boot_ind != 0x80) { + /* + * Even without a valid boot inidicator value + * its still possible this is valid FAT filesystem + * without a partition table. + */ + fb = (struct fat_boot_sector *) data; + if (slot == 1 && fb->reserved && fb->fats + && fat_valid_media(fb->media)) { + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; + } else { + put_dev_sector(sect); + return 0; + } + } + } + +#ifdef CONFIG_EFI_PARTITION + p = (struct partition *) (data + 0x1be); + for (slot = 1 ; slot <= 4 ; slot++, p++) { + /* If this is an EFI GPT disk, msdos should ignore it. */ + if (SYS_IND(p) == EFI_PMBR_OSTYPE_EFI_GPT) { + put_dev_sector(sect); + return 0; + } + } +#endif + p = (struct partition *) (data + 0x1be); + + /* + * Look for partitions in two passes: + * First find the primary and DOS-type extended partitions. + * On the second pass look inside *BSD, Unixware and Solaris partitions. + */ + + state->next = 5; + for (slot = 1 ; slot <= 4 ; slot++, p++) { + sector_t start = start_sect(p)*sector_size; + sector_t size = nr_sects(p)*sector_size; + if (!size) + continue; + if (is_extended_partition(p)) { + /* + * prevent someone doing mkfs or mkswap on an + * extended partition, but leave room for LILO + * FIXME: this uses one logical sector for > 512b + * sector, although it may not be enough/proper. + */ + sector_t n = 2; + n = min(size, max(sector_size, n)); + put_partition(state, slot, start, n); + + strlcat(state->pp_buf, " <", PAGE_SIZE); + parse_extended(state, start, size); + strlcat(state->pp_buf, " >", PAGE_SIZE); + continue; + } + put_partition(state, slot, start, size); + if (SYS_IND(p) == LINUX_RAID_PARTITION) + state->parts[slot].flags = ADDPART_FLAG_RAID; + if (SYS_IND(p) == DM6_PARTITION) + strlcat(state->pp_buf, "[DM]", PAGE_SIZE); + if (SYS_IND(p) == EZD_PARTITION) + strlcat(state->pp_buf, "[EZD]", PAGE_SIZE); + } + + strlcat(state->pp_buf, "\n", PAGE_SIZE); + + /* second pass - output for each on a separate line */ + p = (struct partition *) (0x1be + data); + for (slot = 1 ; slot <= 4 ; slot++, p++) { + unsigned char id = SYS_IND(p); + int n; + + if (!nr_sects(p)) + continue; + + for (n = 0; subtypes[n].parse && id != subtypes[n].id; n++) + ; + + if (!subtypes[n].parse) + continue; + subtypes[n].parse(state, start_sect(p) * sector_size, + nr_sects(p) * sector_size, slot); + } + put_dev_sector(sect); + return 1; +} diff --git a/block/partitions/msdos.h b/block/partitions/msdos.h new file mode 100644 index 000000000000..38c781c490b3 --- /dev/null +++ b/block/partitions/msdos.h @@ -0,0 +1,8 @@ +/* + * fs/partitions/msdos.h + */ + +#define MSDOS_LABEL_MAGIC 0xAA55 + +int msdos_partition(struct parsed_partitions *state); + diff --git a/block/partitions/osf.c b/block/partitions/osf.c new file mode 100644 index 000000000000..764b86a01965 --- /dev/null +++ b/block/partitions/osf.c @@ -0,0 +1,86 @@ +/* + * fs/partitions/osf.c + * + * Code extracted from drivers/block/genhd.c + * + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + */ + +#include "check.h" +#include "osf.h" + +#define MAX_OSF_PARTITIONS 18 + +int osf_partition(struct parsed_partitions *state) +{ + int i; + int slot = 1; + unsigned int npartitions; + Sector sect; + unsigned char *data; + struct disklabel { + __le32 d_magic; + __le16 d_type,d_subtype; + u8 d_typename[16]; + u8 d_packname[16]; + __le32 d_secsize; + __le32 d_nsectors; + __le32 d_ntracks; + __le32 d_ncylinders; + __le32 d_secpercyl; + __le32 d_secprtunit; + __le16 d_sparespertrack; + __le16 d_sparespercyl; + __le32 d_acylinders; + __le16 d_rpm, d_interleave, d_trackskew, d_cylskew; + __le32 d_headswitch, d_trkseek, d_flags; + __le32 d_drivedata[5]; + __le32 d_spare[5]; + __le32 d_magic2; + __le16 d_checksum; + __le16 d_npartitions; + __le32 d_bbsize, d_sbsize; + struct d_partition { + __le32 p_size; + __le32 p_offset; + __le32 p_fsize; + u8 p_fstype; + u8 p_frag; + __le16 p_cpg; + } d_partitions[MAX_OSF_PARTITIONS]; + } * label; + struct d_partition * partition; + + data = read_part_sector(state, 0, §); + if (!data) + return -1; + + label = (struct disklabel *) (data+64); + partition = label->d_partitions; + if (le32_to_cpu(label->d_magic) != DISKLABELMAGIC) { + put_dev_sector(sect); + return 0; + } + if (le32_to_cpu(label->d_magic2) != DISKLABELMAGIC) { + put_dev_sector(sect); + return 0; + } + npartitions = le16_to_cpu(label->d_npartitions); + if (npartitions > MAX_OSF_PARTITIONS) { + put_dev_sector(sect); + return 0; + } + for (i = 0 ; i < npartitions; i++, partition++) { + if (slot == state->limit) + break; + if (le32_to_cpu(partition->p_size)) + put_partition(state, slot, + le32_to_cpu(partition->p_offset), + le32_to_cpu(partition->p_size)); + slot++; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; +} diff --git a/block/partitions/osf.h b/block/partitions/osf.h new file mode 100644 index 000000000000..20ed2315ec16 --- /dev/null +++ b/block/partitions/osf.h @@ -0,0 +1,7 @@ +/* + * fs/partitions/osf.h + */ + +#define DISKLABELMAGIC (0x82564557UL) + +int osf_partition(struct parsed_partitions *state); diff --git a/block/partitions/sgi.c b/block/partitions/sgi.c new file mode 100644 index 000000000000..ea8a86dceaf4 --- /dev/null +++ b/block/partitions/sgi.c @@ -0,0 +1,82 @@ +/* + * fs/partitions/sgi.c + * + * Code extracted from drivers/block/genhd.c + */ + +#include "check.h" +#include "sgi.h" + +struct sgi_disklabel { + __be32 magic_mushroom; /* Big fat spliff... */ + __be16 root_part_num; /* Root partition number */ + __be16 swap_part_num; /* Swap partition number */ + s8 boot_file[16]; /* Name of boot file for ARCS */ + u8 _unused0[48]; /* Device parameter useless crapola.. */ + struct sgi_volume { + s8 name[8]; /* Name of volume */ + __be32 block_num; /* Logical block number */ + __be32 num_bytes; /* How big, in bytes */ + } volume[15]; + struct sgi_partition { + __be32 num_blocks; /* Size in logical blocks */ + __be32 first_block; /* First logical block */ + __be32 type; /* Type of this partition */ + } partitions[16]; + __be32 csum; /* Disk label checksum */ + __be32 _unused1; /* Padding */ +}; + +int sgi_partition(struct parsed_partitions *state) +{ + int i, csum; + __be32 magic; + int slot = 1; + unsigned int start, blocks; + __be32 *ui, cs; + Sector sect; + struct sgi_disklabel *label; + struct sgi_partition *p; + char b[BDEVNAME_SIZE]; + + label = read_part_sector(state, 0, §); + if (!label) + return -1; + p = &label->partitions[0]; + magic = label->magic_mushroom; + if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) { + /*printk("Dev %s SGI disklabel: bad magic %08x\n", + bdevname(bdev, b), be32_to_cpu(magic));*/ + put_dev_sector(sect); + return 0; + } + ui = ((__be32 *) (label + 1)) - 1; + for(csum = 0; ui >= ((__be32 *) label);) { + cs = *ui--; + csum += be32_to_cpu(cs); + } + if(csum) { + printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", + bdevname(state->bdev, b)); + put_dev_sector(sect); + return 0; + } + /* All SGI disk labels have 16 partitions, disks under Linux only + * have 15 minor's. Luckily there are always a few zero length + * partitions which we don't care about so we never overflow the + * current_minor. + */ + for(i = 0; i < 16; i++, p++) { + blocks = be32_to_cpu(p->num_blocks); + start = be32_to_cpu(p->first_block); + if (blocks) { + put_partition(state, slot, start, blocks); + if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION) + state->parts[slot].flags = ADDPART_FLAG_RAID; + } + slot++; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; +} diff --git a/block/partitions/sgi.h b/block/partitions/sgi.h new file mode 100644 index 000000000000..b9553ebdd5a9 --- /dev/null +++ b/block/partitions/sgi.h @@ -0,0 +1,8 @@ +/* + * fs/partitions/sgi.h + */ + +extern int sgi_partition(struct parsed_partitions *state); + +#define SGI_LABEL_MAGIC 0x0be5a941 + diff --git a/block/partitions/sun.c b/block/partitions/sun.c new file mode 100644 index 000000000000..b5b6fcfb3d36 --- /dev/null +++ b/block/partitions/sun.c @@ -0,0 +1,122 @@ +/* + * fs/partitions/sun.c + * + * Code extracted from drivers/block/genhd.c + * + * Copyright (C) 1991-1998 Linus Torvalds + * Re-organised Feb 1998 Russell King + */ + +#include "check.h" +#include "sun.h" + +int sun_partition(struct parsed_partitions *state) +{ + int i; + __be16 csum; + int slot = 1; + __be16 *ush; + Sector sect; + struct sun_disklabel { + unsigned char info[128]; /* Informative text string */ + struct sun_vtoc { + __be32 version; /* Layout version */ + char volume[8]; /* Volume name */ + __be16 nparts; /* Number of partitions */ + struct sun_info { /* Partition hdrs, sec 2 */ + __be16 id; + __be16 flags; + } infos[8]; + __be16 padding; /* Alignment padding */ + __be32 bootinfo[3]; /* Info needed by mboot */ + __be32 sanity; /* To verify vtoc sanity */ + __be32 reserved[10]; /* Free space */ + __be32 timestamp[8]; /* Partition timestamp */ + } vtoc; + __be32 write_reinstruct; /* sectors to skip, writes */ + __be32 read_reinstruct; /* sectors to skip, reads */ + unsigned char spare[148]; /* Padding */ + __be16 rspeed; /* Disk rotational speed */ + __be16 pcylcount; /* Physical cylinder count */ + __be16 sparecyl; /* extra sects per cylinder */ + __be16 obs1; /* gap1 */ + __be16 obs2; /* gap2 */ + __be16 ilfact; /* Interleave factor */ + __be16 ncyl; /* Data cylinder count */ + __be16 nacyl; /* Alt. cylinder count */ + __be16 ntrks; /* Tracks per cylinder */ + __be16 nsect; /* Sectors per track */ + __be16 obs3; /* bhead - Label head offset */ + __be16 obs4; /* ppart - Physical Partition */ + struct sun_partition { + __be32 start_cylinder; + __be32 num_sectors; + } partitions[8]; + __be16 magic; /* Magic number */ + __be16 csum; /* Label xor'd checksum */ + } * label; + struct sun_partition *p; + unsigned long spc; + char b[BDEVNAME_SIZE]; + int use_vtoc; + int nparts; + + label = read_part_sector(state, 0, §); + if (!label) + return -1; + + p = label->partitions; + if (be16_to_cpu(label->magic) != SUN_LABEL_MAGIC) { +/* printk(KERN_INFO "Dev %s Sun disklabel: bad magic %04x\n", + bdevname(bdev, b), be16_to_cpu(label->magic)); */ + put_dev_sector(sect); + return 0; + } + /* Look at the checksum */ + ush = ((__be16 *) (label+1)) - 1; + for (csum = 0; ush >= ((__be16 *) label);) + csum ^= *ush--; + if (csum) { + printk("Dev %s Sun disklabel: Csum bad, label corrupted\n", + bdevname(state->bdev, b)); + put_dev_sector(sect); + return 0; + } + + /* Check to see if we can use the VTOC table */ + use_vtoc = ((be32_to_cpu(label->vtoc.sanity) == SUN_VTOC_SANITY) && + (be32_to_cpu(label->vtoc.version) == 1) && + (be16_to_cpu(label->vtoc.nparts) <= 8)); + + /* Use 8 partition entries if not specified in validated VTOC */ + nparts = (use_vtoc) ? be16_to_cpu(label->vtoc.nparts) : 8; + + /* + * So that old Linux-Sun partitions continue to work, + * alow the VTOC to be used under the additional condition ... + */ + use_vtoc = use_vtoc || !(label->vtoc.sanity || + label->vtoc.version || label->vtoc.nparts); + spc = be16_to_cpu(label->ntrks) * be16_to_cpu(label->nsect); + for (i = 0; i < nparts; i++, p++) { + unsigned long st_sector; + unsigned int num_sectors; + + st_sector = be32_to_cpu(p->start_cylinder) * spc; + num_sectors = be32_to_cpu(p->num_sectors); + if (num_sectors) { + put_partition(state, slot, st_sector, num_sectors); + state->parts[slot].flags = 0; + if (use_vtoc) { + if (be16_to_cpu(label->vtoc.infos[i].id) == LINUX_RAID_PARTITION) + state->parts[slot].flags |= ADDPART_FLAG_RAID; + else if (be16_to_cpu(label->vtoc.infos[i].id) == SUN_WHOLE_DISK) + state->parts[slot].flags |= ADDPART_FLAG_WHOLEDISK; + } + } + slot++; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; +} diff --git a/block/partitions/sun.h b/block/partitions/sun.h new file mode 100644 index 000000000000..2424baa8319f --- /dev/null +++ b/block/partitions/sun.h @@ -0,0 +1,8 @@ +/* + * fs/partitions/sun.h + */ + +#define SUN_LABEL_MAGIC 0xDABE +#define SUN_VTOC_SANITY 0x600DDEEE + +int sun_partition(struct parsed_partitions *state); diff --git a/block/partitions/sysv68.c b/block/partitions/sysv68.c new file mode 100644 index 000000000000..9627ccffc1c4 --- /dev/null +++ b/block/partitions/sysv68.c @@ -0,0 +1,95 @@ +/* + * fs/partitions/sysv68.c + * + * Copyright (C) 2007 Philippe De Muyter <phdm@macqel.be> + */ + +#include "check.h" +#include "sysv68.h" + +/* + * Volume ID structure: on first 256-bytes sector of disk + */ + +struct volumeid { + u8 vid_unused[248]; + u8 vid_mac[8]; /* ASCII string "MOTOROLA" */ +}; + +/* + * config block: second 256-bytes sector on disk + */ + +struct dkconfig { + u8 ios_unused0[128]; + __be32 ios_slcblk; /* Slice table block number */ + __be16 ios_slccnt; /* Number of entries in slice table */ + u8 ios_unused1[122]; +}; + +/* + * combined volumeid and dkconfig block + */ + +struct dkblk0 { + struct volumeid dk_vid; + struct dkconfig dk_ios; +}; + +/* + * Slice Table Structure + */ + +struct slice { + __be32 nblocks; /* slice size (in blocks) */ + __be32 blkoff; /* block offset of slice */ +}; + + +int sysv68_partition(struct parsed_partitions *state) +{ + int i, slices; + int slot = 1; + Sector sect; + unsigned char *data; + struct dkblk0 *b; + struct slice *slice; + char tmp[64]; + + data = read_part_sector(state, 0, §); + if (!data) + return -1; + + b = (struct dkblk0 *)data; + if (memcmp(b->dk_vid.vid_mac, "MOTOROLA", sizeof(b->dk_vid.vid_mac))) { + put_dev_sector(sect); + return 0; + } + slices = be16_to_cpu(b->dk_ios.ios_slccnt); + i = be32_to_cpu(b->dk_ios.ios_slcblk); + put_dev_sector(sect); + + data = read_part_sector(state, i, §); + if (!data) + return -1; + + slices -= 1; /* last slice is the whole disk */ + snprintf(tmp, sizeof(tmp), "sysV68: %s(s%u)", state->name, slices); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + slice = (struct slice *)data; + for (i = 0; i < slices; i++, slice++) { + if (slot == state->limit) + break; + if (be32_to_cpu(slice->nblocks)) { + put_partition(state, slot, + be32_to_cpu(slice->blkoff), + be32_to_cpu(slice->nblocks)); + snprintf(tmp, sizeof(tmp), "(s%u)", i); + strlcat(state->pp_buf, tmp, PAGE_SIZE); + } + slot++; + } + strlcat(state->pp_buf, "\n", PAGE_SIZE); + put_dev_sector(sect); + return 1; +} diff --git a/block/partitions/sysv68.h b/block/partitions/sysv68.h new file mode 100644 index 000000000000..bf2f5ffa97ac --- /dev/null +++ b/block/partitions/sysv68.h @@ -0,0 +1 @@ +extern int sysv68_partition(struct parsed_partitions *state); diff --git a/block/partitions/ultrix.c b/block/partitions/ultrix.c new file mode 100644 index 000000000000..8dbaf9f77a99 --- /dev/null +++ b/block/partitions/ultrix.c @@ -0,0 +1,48 @@ +/* + * fs/partitions/ultrix.c + * + * Code extracted from drivers/block/genhd.c + * + * Re-organised Jul 1999 Russell King + */ + +#include "check.h" +#include "ultrix.h" + +int ultrix_partition(struct parsed_partitions *state) +{ + int i; + Sector sect; + unsigned char *data; + struct ultrix_disklabel { + s32 pt_magic; /* magic no. indicating part. info exits */ + s32 pt_valid; /* set by driver if pt is current */ + struct pt_info { + s32 pi_nblocks; /* no. of sectors */ + u32 pi_blkoff; /* block offset for start */ + } pt_part[8]; + } *label; + +#define PT_MAGIC 0x032957 /* Partition magic number */ +#define PT_VALID 1 /* Indicates if struct is valid */ + + data = read_part_sector(state, (16384 - sizeof(*label))/512, §); + if (!data) + return -1; + + label = (struct ultrix_disklabel *)(data + 512 - sizeof(*label)); + + if (label->pt_magic == PT_MAGIC && label->pt_valid == PT_VALID) { + for (i=0; i<8; i++) + if (label->pt_part[i].pi_nblocks) + put_partition(state, i+1, + label->pt_part[i].pi_blkoff, + label->pt_part[i].pi_nblocks); + put_dev_sector(sect); + strlcat(state->pp_buf, "\n", PAGE_SIZE); + return 1; + } else { + put_dev_sector(sect); + return 0; + } +} diff --git a/block/partitions/ultrix.h b/block/partitions/ultrix.h new file mode 100644 index 000000000000..a3cc00b2bded --- /dev/null +++ b/block/partitions/ultrix.h @@ -0,0 +1,5 @@ +/* + * fs/partitions/ultrix.h + */ + +int ultrix_partition(struct parsed_partitions *state); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 4f4230b79bb6..fbdf0d802ec4 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -565,7 +565,7 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod { int err; - if (!q || blk_get_queue(q)) + if (!q) return -ENXIO; switch (cmd) { @@ -686,7 +686,6 @@ int scsi_cmd_ioctl(struct request_queue *q, struct gendisk *bd_disk, fmode_t mod err = -ENOTTY; } - blk_put_queue(q); return err; } EXPORT_SYMBOL(scsi_cmd_ioctl); |