From 704126ad81b8cb7d3d70adb9ecb143f4d3fb38af Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 4 Jan 2009 16:28:52 +0800 Subject: VT-d: handle Invalidation Queue Error to avoid system hang When hardware detects any error with a descriptor from the invalidation queue, it stops fetching new descriptors from the queue until software clears the Invalidation Queue Error bit in the Fault Status register. Following fix handles the IQE so the kernel won't be trapped in an infinite loop. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..d2e3cbfba14f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -194,6 +194,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* FSTS_REG */ #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) +#define DMA_FSTS_IQE (1 << 4) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -328,7 +329,7 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); +extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); -- cgit v1.2.3 From 8fed43684174b68f04d01d1210fd00536af790df Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Feb 2009 20:28:24 +0100 Subject: ide: fix refcounting in device drivers During host driver module removal del_gendisk() results in a final put on drive->gendev and freeing the drive by drive_release_dev(). Convert device drivers from using struct kref to use struct device so device driver's object holds reference on ->gendev and prevents drive from prematurely going away. Also fix ->remove methods to not erroneously drop reference on a host driver by using only put_device() instead of ide*_put(). Reported-by: Stanislaw Gruszka Tested-by: Stanislaw Gruszka Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 27 ++++++++++++++++++--------- drivers/ide/ide-cd.h | 2 +- drivers/ide/ide-gd.c | 26 +++++++++++++++++--------- drivers/ide/ide-gd.h | 2 +- drivers/ide/ide-tape.c | 29 +++++++++++++++++++---------- include/linux/ide.h | 2 +- 6 files changed, 57 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 690475b834de..ddfbea41d296 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -55,7 +55,7 @@ static DEFINE_MUTEX(idecd_ref_mutex); -static void ide_cd_release(struct kref *); +static void ide_cd_release(struct device *); static struct cdrom_info *ide_cd_get(struct gendisk *disk) { @@ -67,7 +67,7 @@ static struct cdrom_info *ide_cd_get(struct gendisk *disk) if (ide_device_get(cd->drive)) cd = NULL; else - kref_get(&cd->kref); + get_device(&cd->dev); } mutex_unlock(&idecd_ref_mutex); @@ -79,7 +79,7 @@ static void ide_cd_put(struct cdrom_info *cd) ide_drive_t *drive = cd->drive; mutex_lock(&idecd_ref_mutex); - kref_put(&cd->kref, ide_cd_release); + put_device(&cd->dev); ide_device_put(drive); mutex_unlock(&idecd_ref_mutex); } @@ -1798,15 +1798,17 @@ static void ide_cd_remove(ide_drive_t *drive) ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); ide_proc_unregister_driver(drive, info->driver); - + device_del(&info->dev); del_gendisk(info->disk); - ide_cd_put(info); + mutex_lock(&idecd_ref_mutex); + put_device(&info->dev); + mutex_unlock(&idecd_ref_mutex); } -static void ide_cd_release(struct kref *kref) +static void ide_cd_release(struct device *dev) { - struct cdrom_info *info = to_ide_drv(kref, cdrom_info); + struct cdrom_info *info = to_ide_drv(dev, cdrom_info); struct cdrom_device_info *devinfo = &info->devinfo; ide_drive_t *drive = info->drive; struct gendisk *g = info->disk; @@ -2005,7 +2007,12 @@ static int ide_cd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&info->kref); + info->dev.parent = &drive->gendev; + info->dev.release = ide_cd_release; + dev_set_name(&info->dev, dev_name(&drive->gendev)); + + if (device_register(&info->dev)) + goto out_free_disk; info->drive = drive; info->driver = &ide_cdrom_driver; @@ -2019,7 +2026,7 @@ static int ide_cd_probe(ide_drive_t *drive) g->driverfs_dev = &drive->gendev; g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; if (ide_cdrom_setup(drive)) { - ide_cd_release(&info->kref); + put_device(&info->dev); goto failed; } @@ -2029,6 +2036,8 @@ static int ide_cd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_cd: kfree(info); failed: diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index ac40d6cb90a2..c878bfcf1116 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -80,7 +80,7 @@ struct cdrom_info { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* Buffer for table of contents. NULL if we haven't allocated a TOC buffer for this device yet. */ diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 7857b209c6df..047109419902 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -25,7 +25,7 @@ module_param(debug_mask, ulong, 0644); static DEFINE_MUTEX(ide_disk_ref_mutex); -static void ide_disk_release(struct kref *); +static void ide_disk_release(struct device *); static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) { @@ -37,7 +37,7 @@ static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) if (ide_device_get(idkp->drive)) idkp = NULL; else - kref_get(&idkp->kref); + get_device(&idkp->dev); } mutex_unlock(&ide_disk_ref_mutex); return idkp; @@ -48,7 +48,7 @@ static void ide_disk_put(struct ide_disk_obj *idkp) ide_drive_t *drive = idkp->drive; mutex_lock(&ide_disk_ref_mutex); - kref_put(&idkp->kref, ide_disk_release); + put_device(&idkp->dev); ide_device_put(drive); mutex_unlock(&ide_disk_ref_mutex); } @@ -66,17 +66,18 @@ static void ide_gd_remove(ide_drive_t *drive) struct gendisk *g = idkp->disk; ide_proc_unregister_driver(drive, idkp->driver); - + device_del(&idkp->dev); del_gendisk(g); - drive->disk_ops->flush(drive); - ide_disk_put(idkp); + mutex_lock(&ide_disk_ref_mutex); + put_device(&idkp->dev); + mutex_unlock(&ide_disk_ref_mutex); } -static void ide_disk_release(struct kref *kref) +static void ide_disk_release(struct device *dev) { - struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj); + struct ide_disk_obj *idkp = to_ide_drv(dev, ide_disk_obj); ide_drive_t *drive = idkp->drive; struct gendisk *g = idkp->disk; @@ -348,7 +349,12 @@ static int ide_gd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&idkp->kref); + idkp->dev.parent = &drive->gendev; + idkp->dev.release = ide_disk_release; + dev_set_name(&idkp->dev, dev_name(&drive->gendev)); + + if (device_register(&idkp->dev)) + goto out_free_disk; idkp->drive = drive; idkp->driver = &ide_gd_driver; @@ -373,6 +379,8 @@ static int ide_gd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_idkp: kfree(idkp); failed: diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h index a86779f0756b..b604bdd318a1 100644 --- a/drivers/ide/ide-gd.h +++ b/drivers/ide/ide-gd.h @@ -17,7 +17,7 @@ struct ide_disk_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; unsigned int openers; /* protected by BKL for now */ /* Last failed packet command */ diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d7ecd3c79757..bb450a7608c2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -169,7 +169,7 @@ typedef struct ide_tape_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* * failed_pc points to the last failed packet command, or contains @@ -267,7 +267,7 @@ static DEFINE_MUTEX(idetape_ref_mutex); static struct class *idetape_sysfs_class; -static void ide_tape_release(struct kref *); +static void ide_tape_release(struct device *); static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) { @@ -279,7 +279,7 @@ static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) if (ide_device_get(tape->drive)) tape = NULL; else - kref_get(&tape->kref); + get_device(&tape->dev); } mutex_unlock(&idetape_ref_mutex); return tape; @@ -290,7 +290,7 @@ static void ide_tape_put(struct ide_tape_obj *tape) ide_drive_t *drive = tape->drive; mutex_lock(&idetape_ref_mutex); - kref_put(&tape->kref, ide_tape_release); + put_device(&tape->dev); ide_device_put(drive); mutex_unlock(&idetape_ref_mutex); } @@ -308,7 +308,7 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) mutex_lock(&idetape_ref_mutex); tape = idetape_devs[i]; if (tape) - kref_get(&tape->kref); + get_device(&tape->dev); mutex_unlock(&idetape_ref_mutex); return tape; } @@ -2256,15 +2256,17 @@ static void ide_tape_remove(ide_drive_t *drive) idetape_tape_t *tape = drive->driver_data; ide_proc_unregister_driver(drive, tape->driver); - + device_del(&tape->dev); ide_unregister_region(tape->disk); - ide_tape_put(tape); + mutex_lock(&idetape_ref_mutex); + put_device(&tape->dev); + mutex_unlock(&idetape_ref_mutex); } -static void ide_tape_release(struct kref *kref) +static void ide_tape_release(struct device *dev) { - struct ide_tape_obj *tape = to_ide_drv(kref, ide_tape_obj); + struct ide_tape_obj *tape = to_ide_drv(dev, ide_tape_obj); ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; @@ -2407,7 +2409,12 @@ static int ide_tape_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&tape->kref); + tape->dev.parent = &drive->gendev; + tape->dev.release = ide_tape_release; + dev_set_name(&tape->dev, dev_name(&drive->gendev)); + + if (device_register(&tape->dev)) + goto out_free_disk; tape->drive = drive; tape->driver = &idetape_driver; @@ -2436,6 +2443,8 @@ static int ide_tape_probe(ide_drive_t *drive) return 0; +out_free_disk: + put_disk(g); out_free_tape: kfree(tape); failed: diff --git a/include/linux/ide.h b/include/linux/ide.h index 194da5a4b0d6..fe235b65207e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -663,7 +663,7 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev) container_of(dev, ide_drive_t, gendev) #define to_ide_drv(obj, cont_type) \ - container_of(obj, struct cont_type, kref) + container_of(obj, struct cont_type, dev) #define ide_drv_g(disk, cont_type) \ container_of((disk)->private_data, struct cont_type, driver) -- cgit v1.2.3 From 1e42807918d17e8c93bf14fbb74be84b141334c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 09:03:10 +0100 Subject: block: reduce stack footprint of blk_recount_segments() blk_recalc_rq_segments() requires a request structure passed in, which we don't have from blk_recount_segments(). So the latter allocates one on the stack, using > 400 bytes of stack for that. This can cause us to spill over one page of stack from ext4 at least: 0) 4560 400 blk_recount_segments+0x43/0x62 1) 4160 32 bio_phys_segments+0x1c/0x24 2) 4128 32 blk_rq_bio_prep+0x2a/0xf9 3) 4096 32 init_request_from_bio+0xf9/0xfe 4) 4064 112 __make_request+0x33c/0x3f6 5) 3952 144 generic_make_request+0x2d1/0x321 6) 3808 64 submit_bio+0xb9/0xc3 7) 3744 48 submit_bh+0xea/0x10e 8) 3696 368 ext4_mb_init_cache+0x257/0xa6a [ext4] 9) 3328 288 ext4_mb_regular_allocator+0x421/0xcd9 [ext4] 10) 3040 160 ext4_mb_new_blocks+0x211/0x4b4 [ext4] 11) 2880 336 ext4_ext_get_blocks+0xb61/0xd45 [ext4] 12) 2544 96 ext4_get_blocks_wrap+0xf2/0x200 [ext4] 13) 2448 80 ext4_da_get_block_write+0x6e/0x16b [ext4] 14) 2368 352 mpage_da_map_blocks+0x7e/0x4b3 [ext4] 15) 2016 352 ext4_da_writepages+0x2ce/0x43c [ext4] 16) 1664 32 do_writepages+0x2d/0x3c 17) 1632 144 __writeback_single_inode+0x162/0x2cd 18) 1488 96 generic_sync_sb_inodes+0x1e3/0x32b 19) 1392 16 sync_sb_inodes+0xe/0x10 20) 1376 48 writeback_inodes+0x69/0xb3 21) 1328 208 balance_dirty_pages_ratelimited_nr+0x187/0x2f9 22) 1120 224 generic_file_buffered_write+0x1d4/0x2c4 23) 896 176 __generic_file_aio_write_nolock+0x35f/0x393 24) 720 80 generic_file_aio_write+0x6c/0xc8 25) 640 80 ext4_file_write+0xa9/0x137 [ext4] 26) 560 320 do_sync_write+0xf0/0x137 27) 240 48 vfs_write+0xb3/0x13c 28) 192 64 sys_write+0x4c/0x74 29) 128 128 system_call_fastpath+0x16/0x1b Split the segment counting out into a __blk_recalc_rq_segments() helper to avoid allocating an onstack request just for checking the physical segment count. Signed-off-by: Jens Axboe --- block/blk-merge.c | 94 ++++++++++++++++++++++++++++---------------------- include/linux/blkdev.h | 2 ++ 2 files changed, 55 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/block/blk-merge.c b/block/blk-merge.c index b92f5b0866b0..a104593e70c3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -38,72 +38,84 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) } } -void blk_recalc_rq_segments(struct request *rq) +static unsigned int __blk_recalc_rq_segments(struct request_queue *q, + struct bio *bio, + unsigned int *seg_size_ptr) { - int nr_phys_segs; unsigned int phys_size; struct bio_vec *bv, *bvprv = NULL; - int seg_size; - int cluster; - struct req_iterator iter; - int high, highprv = 1; - struct request_queue *q = rq->q; + int cluster, i, high, highprv = 1; + unsigned int seg_size, nr_phys_segs; + struct bio *fbio; - if (!rq->bio) - return; + if (!bio) + return 0; + fbio = bio; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); seg_size = 0; phys_size = nr_phys_segs = 0; - rq_for_each_segment(bv, rq, iter) { - /* - * the trick here is making sure that a high page is never - * considered part of another segment, since that might - * change with the bounce page. - */ - high = page_to_pfn(bv->bv_page) > q->bounce_pfn; - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len > q->max_segment_size) - goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + for_each_bio(bio) { + bio_for_each_segment(bv, bio, i) { + /* + * the trick here is making sure that a high page is + * never considered part of another segment, since that + * might change with the bounce page. + */ + high = page_to_pfn(bv->bv_page) > q->bounce_pfn; + if (high || highprv) goto new_segment; + if (cluster) { + if (seg_size + bv->bv_len > q->max_segment_size) + goto new_segment; + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + goto new_segment; + + seg_size += bv->bv_len; + bvprv = bv; + continue; + } +new_segment: + if (nr_phys_segs == 1 && seg_size > + fbio->bi_seg_front_size) + fbio->bi_seg_front_size = seg_size; - seg_size += bv->bv_len; + nr_phys_segs++; bvprv = bv; - continue; + seg_size = bv->bv_len; + highprv = high; } -new_segment: - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) - rq->bio->bi_seg_front_size = seg_size; - - nr_phys_segs++; - bvprv = bv; - seg_size = bv->bv_len; - highprv = high; } - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + if (seg_size_ptr) + *seg_size_ptr = seg_size; + + return nr_phys_segs; +} + +void blk_recalc_rq_segments(struct request *rq) +{ + unsigned int seg_size = 0, phys_segs; + + phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size); + + if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) rq->bio->bi_seg_front_size = seg_size; if (seg_size > rq->biotail->bi_seg_back_size) rq->biotail->bi_seg_back_size = seg_size; - rq->nr_phys_segments = nr_phys_segs; + rq->nr_phys_segments = phys_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - struct request rq; struct bio *nxt = bio->bi_next; - rq.q = q; - rq.bio = rq.biotail = bio; + bio->bi_next = NULL; - blk_recalc_rq_segments(&rq); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL); bio->bi_next = nxt; - bio->bi_phys_segments = rq.nr_phys_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dcaa0fd84b02..465d6babc847 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -708,6 +708,8 @@ struct req_iterator { }; /* This should not be used directly - use rq_for_each_segment */ +#define for_each_bio(_bio) \ + for (; _bio; _bio = _bio->bi_next) #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -- cgit v1.2.3 From 5170836679185357dc1b7660bad13287b39e1e33 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Feb 2009 14:03:03 -0800 Subject: Fix recursive lock in free_uid()/free_user_ns() free_uid() and free_user_ns() are corecursive when CONFIG_USER_SCHED=n, but free_user_ns() is called from free_uid() by way of uid_hash_remove(), which requires uidhash_lock to be held. free_user_ns() then calls free_uid() to complete the destruction. Fix this by deferring the destruction of the user_namespace. Signed-off-by: David Howells Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 1 + kernel/user_namespace.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 315bcd375224..cc4f45361dbb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -13,6 +13,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; struct user_struct *creator; + struct work_struct destroyer; }; extern struct user_namespace init_user_ns; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee57..076c7c8215b0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3