From c6c735441207b2ab54e45b0eb47671c508ee9847 Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Tue, 22 Mar 2011 09:32:51 -0300 Subject: [media] v4l2-device: fix a macro definition v4l2_device_unregister_subdev() wrongly uses "arg..." instead of "## arg" in its body. Fix it. Signed-off-by: Guennadi Liakhovetski Signed-off-by: Mauro Carvalho Chehab --- include/media/v4l2-device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/media/v4l2-device.h b/include/media/v4l2-device.h index bd102cf509ac..d61febfb1668 100644 --- a/include/media/v4l2-device.h +++ b/include/media/v4l2-device.h @@ -163,7 +163,7 @@ v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev); ({ \ struct v4l2_subdev *__sd; \ __v4l2_device_call_subdevs_until_err_p(v4l2_dev, __sd, cond, o, \ - f, args...); \ + f , ##args); \ }) /* Call the specified callback for all subdevs matching grp_id (if 0, then -- cgit v1.2.3 From 38a2f37258f9e2ae3f6e4241e01088be8dfaf4e9 Mon Sep 17 00:00:00 2001 From: huajun li Date: Wed, 13 Apr 2011 15:43:32 +0000 Subject: usbnet: Fix up 'FLAG_POINTTOPOINT' and 'FLAG_MULTI_PACKET' overlaps. USB tethering does not work anymore since 2.6.39-rc2, but it's okay in -rc1. The root cause is the new added mask code 'FLAG_POINTTOPOINT' overlaps 'FLAG_MULTI_PACKET' in include/linux/usb/usbnet.h, this causes logic issue in rx_process(). This patch cleans up the overlap. Reported-and-Tested-by: Gottfried Haider Signed-off-by: Huajun Li Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 3c7329b8ea0e..0e1855079fbb 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -103,8 +103,8 @@ struct driver_info { * Indicates to usbnet, that USB driver accumulates multiple IP packets. * Affects statistic (counters) and short packet handling. */ -#define FLAG_MULTI_PACKET 0x1000 -#define FLAG_RX_ASSEMBLE 0x2000 /* rx packets may span >1 frames */ +#define FLAG_MULTI_PACKET 0x2000 +#define FLAG_RX_ASSEMBLE 0x4000 /* rx packets may span >1 frames */ /* init device ... can sleep, or cause probe() failure */ int (*bind)(struct usbnet *, struct usb_interface *); -- cgit v1.2.3 From 1791f881435fab951939ad700e947b66c062e083 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 30 Mar 2011 15:24:21 +0200 Subject: posix clocks: Replace mutex with reader/writer semaphore A dynamic posix clock is protected from asynchronous removal by a mutex. However, using a mutex has the unwanted effect that a long running clock operation in one process will unnecessarily block other processes. For example, one process might call read() to get an external time stamp coming in at one pulse per second. A second process calling clock_gettime would have to wait for almost a whole second. This patch fixes the issue by using a reader/writer semaphore instead of a mutex. Signed-off-by: Richard Cochran Cc: John Stultz Link: http://lkml.kernel.org/r/%3C20110330132421.GA31771%40riccoc20.at.omicron.at%3E Signed-off-by: Thomas Gleixner --- include/linux/posix-clock.h | 5 +++-- kernel/time/posix-clock.c | 24 +++++++++--------------- 2 files changed, 12 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 369e19d3750b..7f1183dcd119 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -24,6 +24,7 @@ #include #include #include +#include struct posix_clock; @@ -104,7 +105,7 @@ struct posix_clock_operations { * @ops: Functional interface to the clock * @cdev: Character device instance for this clock * @kref: Reference count. - * @mutex: Protects the 'zombie' field from concurrent access. + * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. * @release: A function to free the structure when the reference count reaches * zero. May be NULL if structure is statically allocated. @@ -117,7 +118,7 @@ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; struct kref kref; - struct mutex mutex; + struct rw_semaphore rwsem; bool zombie; void (*release)(struct posix_clock *clk); }; diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index 25028dd4fa18..c340ca658f37 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -19,7 +19,6 @@ */ #include #include -#include #include #include #include @@ -34,19 +33,19 @@ static struct posix_clock *get_posix_clock(struct file *fp) { struct posix_clock *clk = fp->private_data; - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (!clk->zombie) return clk; - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return NULL; } static void put_posix_clock(struct posix_clock *clk) { - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); } static ssize_t posix_clock_read(struct file *fp, char __user *buf, @@ -156,7 +155,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) struct posix_clock *clk = container_of(inode->i_cdev, struct posix_clock, cdev); - mutex_lock(&clk->mutex); + down_read(&clk->rwsem); if (clk->zombie) { err = -ENODEV; @@ -172,7 +171,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) fp->private_data = clk; } out: - mutex_unlock(&clk->mutex); + up_read(&clk->rwsem); return err; } @@ -211,25 +210,20 @@ int posix_clock_register(struct posix_clock *clk, dev_t devid) int err; kref_init(&clk->kref); - mutex_init(&clk->mutex); + init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); clk->cdev.owner = clk->ops.owner; err = cdev_add(&clk->cdev, devid, 1); - if (err) - goto no_cdev; return err; -no_cdev: - mutex_destroy(&clk->mutex); - return err; } EXPORT_SYMBOL_GPL(posix_clock_register); static void delete_clock(struct kref *kref) { struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - mutex_destroy(&clk->mutex); + if (clk->release) clk->release(clk); } @@ -238,9 +232,9 @@ void posix_clock_unregister(struct posix_clock *clk) { cdev_del(&clk->cdev); - mutex_lock(&clk->mutex); + down_write(&clk->rwsem); clk->zombie = true; - mutex_unlock(&clk->mutex); + up_write(&clk->rwsem); kref_put(&clk->kref, delete_clock); } -- cgit v1.2.3 From 468f86134ee515234afe5c5b3f39f266c50e61a5 Mon Sep 17 00:00:00 2001 From: Bryan Schumaker Date: Mon, 18 Apr 2011 15:57:32 -0400 Subject: NFSv4.1: Don't update sequence number if rpc_task is not sent If we fail to contact the gss upcall program, then no message will be sent to the server. The client still updated the sequence number, however, and this lead to NFS4ERR_SEQ_MISMATCH for the next several RPC calls. Signed-off-by: Bryan Schumaker Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 4 ++-- include/linux/sunrpc/sched.h | 2 ++ net/sunrpc/xprt.c | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index b8e1ac69b743..e7e2077eebd9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -444,8 +444,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * if (res->sr_status == 1) res->sr_status = NFS_OK; - /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ - if (!res->sr_slot) + /* don't increment the sequence number if the task wasn't sent */ + if (!RPC_WAS_SENT(task)) goto out; /* Check the SEQUENCE operation status */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index d81db8012c63..3b94f804b852 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -127,6 +127,7 @@ struct rpc_task_setup { #define RPC_TASK_KILLED 0x0100 /* task was killed */ #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ +#define RPC_TASK_SENT 0x0800 /* message was sent */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) @@ -134,6 +135,7 @@ struct rpc_task_setup { #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) +#define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) #define RPC_TASK_RUNNING 0 #define RPC_TASK_QUEUED 1 diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 9494c3767356..ce5eb68a9664 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -906,6 +906,7 @@ void xprt_transmit(struct rpc_task *task) } dprintk("RPC: %5u xmit complete\n", task->tk_pid); + task->tk_flags |= RPC_TASK_SENT; spin_lock_bh(&xprt->transport_lock); xprt->ops->set_retrans_timeout(task); -- cgit v1.2.3 From c21e6beba8835d09bb80e34961430b13e60381c5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 19 Apr 2011 13:32:46 +0200 Subject: block: get rid of QUEUE_FLAG_REENTER We are currently using this flag to check whether it's safe to call into ->request_fn(). If it is set, we punt to kblockd. But we get a lot of false positives and excessive punts to kblockd, which hurts performance. The only real abuser of this infrastructure is SCSI. So export the async queue run and convert SCSI over to use that. There's room for improvement in that SCSI need not always use the async call, but this fixes our performance issue and they can fix that up in due time. Signed-off-by: Jens Axboe --- block/blk-core.c | 11 ++--------- block/blk.h | 1 - drivers/scsi/scsi_lib.c | 17 +---------------- drivers/scsi/scsi_transport_fc.c | 19 ++++--------------- include/linux/blkdev.h | 26 +++++++++++++------------- 5 files changed, 20 insertions(+), 54 deletions(-) (limited to 'include') diff --git a/block/blk-core.c b/block/blk-core.c index 580eee5743e5..40725b9091f1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -303,15 +303,7 @@ void __blk_run_queue(struct request_queue *q) if (unlikely(blk_queue_stopped(q))) return; - /* - * Only recurse once to avoid overrunning the stack, let the unplug - * handling reinvoke the handler shortly if we already got there. - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else - queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); + q->request_fn(q); } EXPORT_SYMBOL(__blk_run_queue); @@ -328,6 +320,7 @@ void blk_run_queue_async(struct request_queue *q) if (likely(!blk_queue_stopped(q))) queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); } +EXPORT_SYMBOL(blk_run_queue_async); /** * blk_run_queue - run a single device queue diff --git a/block/blk.h b/block/blk.h index c9df8fc3c999..61263463e38e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -22,7 +22,6 @@ void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); void blk_add_timer(struct request *); void __generic_unplug_device(struct request_queue *); -void blk_run_queue_async(struct request_queue *q); /* * Internal atomic flags for request handling diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ab55c2fa7ce2..e9901b8f8443 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -411,8 +411,6 @@ static void scsi_run_queue(struct request_queue *q) list_splice_init(&shost->starved_list, &starved_list); while (!list_empty(&starved_list)) { - int flagset; - /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn @@ -435,20 +433,7 @@ static void scsi_run_queue(struct request_queue *q) continue; } - spin_unlock(shost->host_lock); - - spin_lock(sdev->request_queue->queue_lock); - flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, - &sdev->request_queue->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue); - __blk_run_queue(sdev->request_queue); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue); - spin_unlock(sdev->request_queue->queue_lock); - - spin_lock(shost->host_lock); + blk_run_queue_async(sdev->request_queue); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 28c33506e4ad..815069d13f9b 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -3816,28 +3816,17 @@ fail_host_msg: static void fc_bsg_goose_queue(struct fc_rport *rport) { - int flagset; - unsigned long flags; - if (!rport->rqst_q) return; + /* + * This get/put dance makes no sense + */ get_device(&rport->dev); - - spin_lock_irqsave(rport->rqst_q->queue_lock, flags); - flagset = test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags) && - !test_bit(QUEUE_FLAG_REENTER, &rport->rqst_q->queue_flags); - if (flagset) - queue_flag_set(QUEUE_FLAG_REENTER, rport->rqst_q); - __blk_run_queue(rport->rqst_q); - if (flagset) - queue_flag_clear(QUEUE_FLAG_REENTER, rport->rqst_q); - spin_unlock_irqrestore(rport->rqst_q->queue_lock, flags); - + blk_run_queue_async(rport->rqst_q); put_device(&rport->dev); } - /** * fc_bsg_rport_dispatch - process rport bsg requests and dispatch to LLDD * @q: rport request queue diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cbbfd98ad4a3..2ad95fa1d130 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -388,20 +388,19 @@ struct request_queue #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ -#define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ -#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */ -#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_ELVSWITCH 6 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 9 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 10 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 11 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 12 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ -#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ -#define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ -#define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ -#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ -#define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ +#define QUEUE_FLAG_IO_STAT 13 /* do IO stats */ +#define QUEUE_FLAG_DISCARD 14 /* supports DISCARD */ +#define QUEUE_FLAG_NOXMERGES 15 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ +#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -699,6 +698,7 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(struct request_queue *q); extern void __blk_run_queue(struct request_queue *q); extern void blk_run_queue(struct request_queue *); +extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); -- cgit v1.2.3 From d924de09cac6e18bdfbe9461a2ab2adeb36e77b0 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 29 Mar 2011 05:19:06 -0300 Subject: [media] v4l: add V4L2_PIX_FMT_Y12 format Y12 is a grey-scale format with a depth of 12 bits per pixel stored in 16-bit words. Signed-off-by: Michael Jones Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/media-entities.tmpl | 1 + Documentation/DocBook/v4l/pixfmt-y12.xml | 79 +++++++++++++++++++++++++++++++ Documentation/DocBook/v4l/pixfmt.xml | 1 + include/linux/videodev2.h | 1 + 4 files changed, 82 insertions(+) create mode 100644 Documentation/DocBook/v4l/pixfmt-y12.xml (limited to 'include') diff --git a/Documentation/DocBook/media-entities.tmpl b/Documentation/DocBook/media-entities.tmpl index 5d259c632cdf..fea63b45471a 100644 --- a/Documentation/DocBook/media-entities.tmpl +++ b/Documentation/DocBook/media-entities.tmpl @@ -294,6 +294,7 @@ + diff --git a/Documentation/DocBook/v4l/pixfmt-y12.xml b/Documentation/DocBook/v4l/pixfmt-y12.xml new file mode 100644 index 000000000000..ff417b858cc9 --- /dev/null +++ b/Documentation/DocBook/v4l/pixfmt-y12.xml @@ -0,0 +1,79 @@ + + + V4L2_PIX_FMT_Y12 ('Y12 ') + &manvol; + + + V4L2_PIX_FMT_Y12 + Grey-scale image + + + Description + + This is a grey-scale image with a depth of 12 bits per pixel. Pixels +are stored in 16-bit words with unused high bits padded with 0. The least +significant byte is stored at lower memory addresses (little-endian). + + + <constant>V4L2_PIX_FMT_Y12</constant> 4 × 4 +pixel image + + + Byte Order. + Each cell is one byte. + + + + + + start + 0: + Y'00low + Y'00high + Y'01low + Y'01high + Y'02low + Y'02high + Y'03low + Y'03high + + + start + 8: + Y'10low + Y'10high + Y'11low + Y'11high + Y'12low + Y'12high + Y'13low + Y'13high + + + start + 16: + Y'20low + Y'20high + Y'21low + Y'21high + Y'22low + Y'22high + Y'23low + Y'23high + + + start + 24: + Y'30low + Y'30high + Y'31low + Y'31high + Y'32low + Y'32high + Y'33low + Y'33high + + + + + + + + + diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml index c6fdcbbd1b41..40af4beb48b9 100644 --- a/Documentation/DocBook/v4l/pixfmt.xml +++ b/Documentation/DocBook/v4l/pixfmt.xml @@ -696,6 +696,7 @@ information. &sub-packed-yuv; &sub-grey; &sub-y10; + &sub-y12; &sub-y16; &sub-yuyv; &sub-uyvy; diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index aa6c393b7ae9..be82c8ead1af 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -308,6 +308,7 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_Y4 v4l2_fourcc('Y', '0', '4', ' ') /* 4 Greyscale */ #define V4L2_PIX_FMT_Y6 v4l2_fourcc('Y', '0', '6', ' ') /* 6 Greyscale */ #define V4L2_PIX_FMT_Y10 v4l2_fourcc('Y', '1', '0', ' ') /* 10 Greyscale */ +#define V4L2_PIX_FMT_Y12 v4l2_fourcc('Y', '1', '2', ' ') /* 12 Greyscale */ #define V4L2_PIX_FMT_Y16 v4l2_fourcc('Y', '1', '6', ' ') /* 16 Greyscale */ /* Palette formats */ -- cgit v1.2.3 From cbbc69a4a98081740f0e3d7717fbfa0b584b983d Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Tue, 29 Mar 2011 05:19:07 -0300 Subject: [media] media: add missing 8-bit bayer formats and Y12 8-bit SGBRG and SRGGB media bus formats are missing, as well as the 12-bit grey format. Add them. Signed-off-by: Michael Jones Acked-by: Laurent Pinchart Signed-off-by: Mauro Carvalho Chehab --- Documentation/DocBook/v4l/subdev-formats.xml | 59 ++++++++++++++++++++++++++++ include/linux/v4l2-mediabus.h | 7 +++- 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/v4l/subdev-formats.xml b/Documentation/DocBook/v4l/subdev-formats.xml index 7041127d6dfc..d7ccd25edcc1 100644 --- a/Documentation/DocBook/v4l/subdev-formats.xml +++ b/Documentation/DocBook/v4l/subdev-formats.xml @@ -456,6 +456,23 @@ b1 b0 + + V4L2_MBUS_FMT_SGBRG8_1X8 + 0x3013 + + - + - + - + - + g7 + g6 + g5 + g4 + g3 + g2 + g1 + g0 + V4L2_MBUS_FMT_SGRBG8_1X8 0x3002 @@ -473,6 +490,23 @@ g1 g0 + + V4L2_MBUS_FMT_SRGGB8_1X8 + 0x3014 + + - + - + - + - + r7 + r6 + r5 + r4 + r3 + r2 + r1 + r0 + V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 0x300b @@ -2159,6 +2193,31 @@ u1 u0 + + V4L2_MBUS_FMT_Y12_1X12 + 0x2013 + + - + - + - + - + - + - + - + - + y11 + y10 + y9 + y8 + y7 + y6 + y5 + y4 + y3 + y2 + y1 + y0 + V4L2_MBUS_FMT_UYVY8_1X16 0x200f diff --git a/include/linux/v4l2-mediabus.h b/include/linux/v4l2-mediabus.h index 7054a7a8065e..de5c15921025 100644 --- a/include/linux/v4l2-mediabus.h +++ b/include/linux/v4l2-mediabus.h @@ -47,7 +47,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_RGB565_2X8_BE = 0x1007, V4L2_MBUS_FMT_RGB565_2X8_LE = 0x1008, - /* YUV (including grey) - next is 0x2013 */ + /* YUV (including grey) - next is 0x2014 */ V4L2_MBUS_FMT_Y8_1X8 = 0x2001, V4L2_MBUS_FMT_UYVY8_1_5X8 = 0x2002, V4L2_MBUS_FMT_VYUY8_1_5X8 = 0x2003, @@ -60,6 +60,7 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_Y10_1X10 = 0x200a, V4L2_MBUS_FMT_YUYV10_2X10 = 0x200b, V4L2_MBUS_FMT_YVYU10_2X10 = 0x200c, + V4L2_MBUS_FMT_Y12_1X12 = 0x2013, V4L2_MBUS_FMT_UYVY8_1X16 = 0x200f, V4L2_MBUS_FMT_VYUY8_1X16 = 0x2010, V4L2_MBUS_FMT_YUYV8_1X16 = 0x2011, @@ -67,9 +68,11 @@ enum v4l2_mbus_pixelcode { V4L2_MBUS_FMT_YUYV10_1X20 = 0x200d, V4L2_MBUS_FMT_YVYU10_1X20 = 0x200e, - /* Bayer - next is 0x3013 */ + /* Bayer - next is 0x3015 */ V4L2_MBUS_FMT_SBGGR8_1X8 = 0x3001, + V4L2_MBUS_FMT_SGBRG8_1X8 = 0x3013, V4L2_MBUS_FMT_SGRBG8_1X8 = 0x3002, + V4L2_MBUS_FMT_SRGGB8_1X8 = 0x3014, V4L2_MBUS_FMT_SBGGR10_DPCM8_1X8 = 0x300b, V4L2_MBUS_FMT_SGBRG10_DPCM8_1X8 = 0x300c, V4L2_MBUS_FMT_SGRBG10_DPCM8_1X8 = 0x3009, -- cgit v1.2.3 From 8c9e80ed276fc4b9c9fadf29d8bf6b3576112f1a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 21 Apr 2011 17:23:19 -0700 Subject: SECURITY: Move exec_permission RCU checks into security modules Right now all RCU walks fall back to reference walk when CONFIG_SECURITY is enabled, even though just the standard capability module is active. This is because security_inode_exec_permission unconditionally fails RCU walks. Move this decision to the low level security module. This requires passing the RCU flags down the security hook. This way at least the capability module and a few easy cases in selinux/smack work with RCU walks with CONFIG_SECURITY=y Signed-off-by: Andi Kleen Acked-by: Eric Paris Signed-off-by: Linus Torvalds --- include/linux/security.h | 2 +- security/capability.c | 2 +- security/security.c | 6 ++---- security/selinux/hooks.c | 6 +++++- security/smack/smack_lsm.c | 6 +++++- 5 files changed, 14 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/security.h b/include/linux/security.h index ca02f1716736..8ce59ef3e5af 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1456,7 +1456,7 @@ struct security_operations { struct inode *new_dir, struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); - int (*inode_permission) (struct inode *inode, int mask); + int (*inode_permission) (struct inode *inode, int mask, unsigned flags); int (*inode_setattr) (struct dentry *dentry, struct iattr *attr); int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry); int (*inode_setxattr) (struct dentry *dentry, const char *name, diff --git a/security/capability.c b/security/capability.c index 2984ea4f776f..bbb51156261b 100644 --- a/security/capability.c +++ b/security/capability.c @@ -181,7 +181,7 @@ static int cap_inode_follow_link(struct dentry *dentry, return 0; } -static int cap_inode_permission(struct inode *inode, int mask) +static int cap_inode_permission(struct inode *inode, int mask, unsigned flags) { return 0; } diff --git a/security/security.c b/security/security.c index 101142369db4..4ba6d4cc061f 100644 --- a/security/security.c +++ b/security/security.c @@ -518,16 +518,14 @@ int security_inode_permission(struct inode *inode, int mask) { if (unlikely(IS_PRIVATE(inode))) return 0; - return security_ops->inode_permission(inode, mask); + return security_ops->inode_permission(inode, mask, 0); } int security_inode_exec_permission(struct inode *inode, unsigned int flags) { if (unlikely(IS_PRIVATE(inode))) return 0; - if (flags) - return -ECHILD; - return security_ops->inode_permission(inode, MAY_EXEC); + return security_ops->inode_permission(inode, MAY_EXEC, flags); } int security_inode_setattr(struct dentry *dentry, struct iattr *attr) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index f9c3764e4859..a73f4e463774 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2635,7 +2635,7 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct nameidata *na return dentry_has_perm(cred, NULL, dentry, FILE__READ); } -static int selinux_inode_permission(struct inode *inode, int mask) +static int selinux_inode_permission(struct inode *inode, int mask, unsigned flags) { const struct cred *cred = current_cred(); struct common_audit_data ad; @@ -2649,6 +2649,10 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; + /* May be droppable after audit */ + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + COMMON_AUDIT_DATA_INIT(&ad, FS); ad.u.fs.inode = inode; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index c6f8fcadae07..400a5d5cde61 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -686,7 +686,7 @@ static int smack_inode_rename(struct inode *old_inode, * * Returns 0 if access is permitted, -EACCES otherwise */ -static int smack_inode_permission(struct inode *inode, int mask) +static int smack_inode_permission(struct inode *inode, int mask, unsigned flags) { struct smk_audit_info ad; @@ -696,6 +696,10 @@ static int smack_inode_permission(struct inode *inode, int mask) */ if (mask == 0) return 0; + + /* May be droppable after audit */ + if (flags & IPERM_FLAG_RCU) + return -ECHILD; smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_FS); smk_ad_setfield_u_fs_inode(&ad, inode); return smk_curacc(smk_of_inode(inode), mask, &ad); -- cgit v1.2.3 From dea3667bc3c2a0521e8d8855e407a49d9d70028c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Apr 2011 07:58:46 -0700 Subject: vfs: get rid of insane dentry hashing rules The dentry hashing rules have been really quite complicated for a long while, in odd ways. That made functions like __d_drop() very fragile and non-obvious. In particular, whether a dentry was hashed or not was indicated with an explicit DCACHE_UNHASHED bit. That's despite the fact that the hash abstraction that the dentries use actually have a 'is this entry hashed or not' model (which is a simple test of the 'pprev' pointer). The reason that was done is because we used the normal 'is this entry unhashed' model to mark whether the dentry had _ever_ been hashed in the dentry hash tables, and that logic goes back many years (commit b3423415fbc2: "dcache: avoid RCU for never-hashed dentries"). That, in turn, meant that __d_drop had totally different unhashing logic for the dentry hash table case and for the anonymous dcache case, because in order to use the "is this dentry hashed" logic as a flag for whether it had ever been on the RCU hash table, we had to unhash such a dentry differently so that we'd never think that it wasn't 'unhashed' and wouldn't be free'd correctly. That's just insane. It made the logic really hard to follow, when there were two different kinds of "unhashed" states, and one of them (the one that used "list_bl_unhashed()") really had nothing at all to do with being unhashed per se, but with a very subtle lifetime rule instead. So turn all of it around, and make it logical. Instead of having a DENTRY_UNHASHED bit in d_flags to indicate whether the dentry is on the hash chains or not, use the hash chain unhashed logic for that. Suddenly "d_unhashed()" just uses "list_bl_unhashed()", and everything makes sense. And for the lifetime rule, just use an explicit DENTRY_RCUACCEES bit. If we ever insert the dentry into the dentry hash table so that it is visible to RCU lookup, we mark it DENTRY_RCUACCESS to show that it now needs the RCU lifetime rules. Now suddently that test at dentry free time makes sense too. And because unhashing now is sane and doesn't depend on where the dentry got unhashed from (because the dentry hash chain details doesn't have some subtle side effects), we can re-unify the __d_drop() logic and use common code for the unhashing. Also fix one more open-coded hash chain bit_spin_lock() that I missed in the previous chain locking cleanup commit. Signed-off-by: Linus Torvalds --- fs/dcache.c | 42 ++++++++++++++++-------------------------- include/linux/dcache.h | 4 ++-- 2 files changed, 18 insertions(+), 28 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index 7108c15685dd..d600a0af3b2e 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -164,8 +164,8 @@ static void d_free(struct dentry *dentry) if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); - /* if dentry was never inserted into hash, immediate free is OK */ - if (hlist_bl_unhashed(&dentry->d_hash)) + /* if dentry was never visible to RCU, immediate free is OK */ + if (!(dentry->d_flags & DCACHE_RCUACCESS)) __d_free(&dentry->d_u.d_rcu); else call_rcu(&dentry->d_u.d_rcu, __d_free); @@ -327,28 +327,19 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent) */ void __d_drop(struct dentry *dentry) { - if (!(dentry->d_flags & DCACHE_UNHASHED)) { + if (!d_unhashed(dentry)) { struct hlist_bl_head *b; - if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) { + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) b = &dentry->d_sb->s_anon; - spin_lock_bucket(b); - dentry->d_flags |= DCACHE_UNHASHED; - hlist_bl_del_init(&dentry->d_hash); - spin_unlock_bucket(b); - } else { - struct hlist_bl_head *b; + else b = d_hash(dentry->d_parent, dentry->d_name.hash); - spin_lock_bucket(b); - /* - * We may not actually need to put DCACHE_UNHASHED - * manipulations under the hash lock, but follow - * the principle of least surprise. - */ - dentry->d_flags |= DCACHE_UNHASHED; - hlist_bl_del_rcu(&dentry->d_hash); - spin_unlock_bucket(b); - dentry_rcuwalk_barrier(dentry); - } + + spin_lock_bucket(b); + __hlist_bl_del(&dentry->d_hash); + dentry->d_hash.pprev = NULL; + spin_unlock_bucket(b); + + dentry_rcuwalk_barrier(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -1301,7 +1292,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) dname[name->len] = 0; dentry->d_count = 1; - dentry->d_flags = DCACHE_UNHASHED; + dentry->d_flags = 0; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; @@ -1603,10 +1594,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); - bit_spin_lock(0, (unsigned long *)&tmp->d_sb->s_anon.first); - tmp->d_flags &= ~DCACHE_UNHASHED; + spin_lock_bucket(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); - __bit_spin_unlock(0, (unsigned long *)&tmp->d_sb->s_anon.first); + spin_unlock_bucket(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); security_d_instantiate(tmp, inode); @@ -2087,7 +2077,7 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); spin_lock_bucket(b); - entry->d_flags &= ~DCACHE_UNHASHED; + entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); spin_unlock_bucket(b); } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index f2afed4fa945..19d90a55541d 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -197,7 +197,7 @@ struct dentry_operations { * typically using d_splice_alias. */ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ -#define DCACHE_UNHASHED 0x0010 +#define DCACHE_RCUACCESS 0x0010 /* Entry has ever been RCU-visible */ #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched by inotify */ @@ -384,7 +384,7 @@ extern struct dentry *dget_parent(struct dentry *dentry); static inline int d_unhashed(struct dentry *dentry) { - return (dentry->d_flags & DCACHE_UNHASHED); + return hlist_bl_unhashed(&dentry->d_hash); } static inline int d_unlinked(struct dentry *dentry) -- cgit v1.2.3 From 3f7ac1d6671ebca7a955853f7127c937f7befbd3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Mar 2011 11:14:25 +0100 Subject: libata: Kill unused ATA_DFLAG_{H|D}IPM flags ATA_DFLAG_{H|D}IPM flags are no longer used. Kill them. Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 7f675aa81d87..3b4d223a6aff 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -137,8 +137,6 @@ enum { ATA_DFLAG_ACPI_PENDING = (1 << 5), /* ACPI resume action pending */ ATA_DFLAG_ACPI_FAILED = (1 << 6), /* ACPI on devcfg has failed */ ATA_DFLAG_AN = (1 << 7), /* AN configured */ - ATA_DFLAG_HIPM = (1 << 8), /* device supports HIPM */ - ATA_DFLAG_DIPM = (1 << 9), /* device supports DIPM */ ATA_DFLAG_DMADIR = (1 << 10), /* device requires DMADIR */ ATA_DFLAG_CFG_MASK = (1 << 12) - 1, -- cgit v1.2.3 From ae01b2493c3bf03c504c32ac4ebb01d528508db3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Mar 2011 11:14:55 +0100 Subject: libata: Implement ATA_FLAG_NO_DIPM and apply it to mcp65 NVIDIA mcp65 familiy of controllers cause command timeouts when DIPM is used. Implement ATA_FLAG_NO_DIPM and apply it. This problem was reported by Stefan Bader in the following thread. http://thread.gmane.org/gmane.linux.ide/48841 stable: applicable to 2.6.37 and 38. Signed-off-by: Tejun Heo Reported-by: Stefan Bader Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/ahci.c | 2 +- drivers/ata/libata-eh.c | 6 ++++-- include/linux/libata.h | 1 + 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c index 39d829cd82dd..cbd38e130f05 100644 --- a/drivers/ata/ahci.c +++ b/drivers/ata/ahci.c @@ -150,7 +150,7 @@ static const struct ata_port_info ahci_port_info[] = { { AHCI_HFLAGS (AHCI_HFLAG_NO_FPDMA_AA | AHCI_HFLAG_NO_PMP | AHCI_HFLAG_YES_NCQ), - .flags = AHCI_FLAG_COMMON, + .flags = AHCI_FLAG_COMMON | ATA_FLAG_NO_DIPM, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index 88cd22fa65cd..f26f2fe3480a 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3316,6 +3316,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, struct ata_eh_context *ehc = &link->eh_context; struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; enum ata_lpm_policy old_policy = link->lpm_policy; + bool no_dipm = ap->flags & ATA_FLAG_NO_DIPM; unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; unsigned int err_mask; int rc; @@ -3332,7 +3333,7 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, */ ata_for_each_dev(dev, link, ENABLED) { bool hipm = ata_id_has_hipm(dev->id); - bool dipm = ata_id_has_dipm(dev->id); + bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; /* find the first enabled and LPM enabled devices */ if (!link_dev) @@ -3389,7 +3390,8 @@ static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, /* host config updated, enable DIPM if transitioning to MIN_POWER */ ata_for_each_dev(dev, link, ENABLED) { - if (policy == ATA_LPM_MIN_POWER && ata_id_has_dipm(dev->id)) { + if (policy == ATA_LPM_MIN_POWER && !no_dipm && + ata_id_has_dipm(dev->id)) { err_mask = ata_dev_set_feature(dev, SETFEATURES_SATA_ENABLE, SATA_DIPM); if (err_mask && err_mask != AC_ERR_DEV) { diff --git a/include/linux/libata.h b/include/linux/libata.h index 3b4d223a6aff..04f32a3eb26b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -196,6 +196,7 @@ enum { * management */ ATA_FLAG_SW_ACTIVITY = (1 << 22), /* driver supports sw activity * led */ + ATA_FLAG_NO_DIPM = (1 << 23), /* host not happy with DIPM */ /* bits 24:31 of ap->flags are reserved for LLD specific flags */ -- cgit v1.2.3 From fd954ae124e8a866e9cc1bc3de9a07be5492f608 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Apr 2011 14:28:18 -0400 Subject: NFSv4.1: Don't loop forever in nfs4_proc_create_session If a server for some reason keeps sending NFS4ERR_DELAY errors, we can end up looping forever inside nfs4_proc_create_session, and so the usual mechanisms for detecting if the nfs_client is dead don't work. Fix this by ensuring that we loop inside the nfs4_state_manager thread instead. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4_fs.h | 1 + fs/nfs/nfs4proc.c | 45 +++++++-------------------------------------- fs/nfs/nfs4state.c | 46 +++++++++++++++++++++++++++++++--------------- include/linux/nfs_fs_sb.h | 1 + 4 files changed, 40 insertions(+), 53 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index e1c261ddd65d..c4a69833dd0d 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -47,6 +47,7 @@ enum nfs4_client_state { NFS4CLNT_LAYOUTRECALL, NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, + NFS4CLNT_LEASE_CONFIRM, }; enum nfs4_session_state { diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 628e35f75307..50c814828fcb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3754,18 +3754,17 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); if (status != -NFS4ERR_CLID_INUSE) break; - if (signalled()) + if (loop != 0) { + ++clp->cl_id_uniquifier; break; - if (loop++ & 1) - ssleep(clp->cl_lease_time / HZ + 1); - else - if (++clp->cl_id_uniquifier == 0) - break; + } + ++loop; + ssleep(clp->cl_lease_time / HZ + 1); } return status; } -static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, +int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct nfs4_setclientid_res *arg, struct rpc_cred *cred) { @@ -3790,26 +3789,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, return status; } -int nfs4_proc_setclientid_confirm(struct nfs_client *clp, - struct nfs4_setclientid_res *arg, - struct rpc_cred *cred) -{ - long timeout = 0; - int err; - do { - err = _nfs4_proc_setclientid_confirm(clp, arg, cred); - switch (err) { - case 0: - return err; - case -NFS4ERR_RESOURCE: - /* The IBM lawyers misread another document! */ - case -NFS4ERR_DELAY: - err = nfs4_delay(clp->cl_rpcclient, &timeout); - } - } while (err == 0); - return err; -} - struct nfs4_delegreturndata { struct nfs4_delegreturnargs args; struct nfs4_delegreturnres res; @@ -5222,20 +5201,10 @@ int nfs4_proc_create_session(struct nfs_client *clp) int status; unsigned *ptr; struct nfs4_session *session = clp->cl_session; - long timeout = 0; - int err; dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); - do { - status = _nfs4_proc_create_session(clp); - if (status == -NFS4ERR_DELAY) { - err = nfs4_delay(clp->cl_rpcclient, &timeout); - if (err) - status = err; - } - } while (status == -NFS4ERR_DELAY); - + status = _nfs4_proc_create_session(clp); if (status) goto out; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4dfb34b43ffb..4a810c8b0753 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list); int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { - struct nfs4_setclientid_res clid; + struct nfs4_setclientid_res clid = { + .clientid = clp->cl_clientid, + .confirm = clp->cl_confirm, + }; unsigned short port; int status; + if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) + goto do_confirm; port = nfs_callback_tcpport; if (clp->cl_addr.ss_family == AF_INET6) port = nfs_callback_tcpport6; @@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); if (status != 0) goto out; + clp->cl_clientid = clid.clientid; + clp->cl_confirm = clid.confirm; + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); +do_confirm: status = nfs4_proc_setclientid_confirm(clp, &clid, cred); if (status != 0) goto out; - clp->cl_clientid = clid.clientid; + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); nfs4_schedule_state_renewal(clp); out: return status; @@ -230,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) { int status; + if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) + goto do_confirm; nfs4_begin_drain_session(clp); status = nfs4_proc_exchange_id(clp, cred); if (status != 0) goto out; + set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); +do_confirm: status = nfs4_proc_create_session(clp); if (status != 0) goto out; + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); nfs41_setup_state_renewal(clp); nfs_mark_client_ready(clp, NFS_CS_READY); out: @@ -1584,20 +1598,22 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; } */ static void nfs4_set_lease_expired(struct nfs_client *clp, int status) { - if (nfs4_has_session(clp)) { - switch (status) { - case -NFS4ERR_DELAY: - case -NFS4ERR_CLID_INUSE: - case -EAGAIN: - break; + switch (status) { + case -NFS4ERR_CLID_INUSE: + case -NFS4ERR_STALE_CLIENTID: + clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); + break; + case -NFS4ERR_DELAY: + case -EAGAIN: + ssleep(1); + break; - case -EKEYEXPIRED: - nfs4_warn_keyexpired(clp->cl_hostname); - case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery - * in nfs4_exchange_id */ - default: - return; - } + case -EKEYEXPIRED: + nfs4_warn_keyexpired(clp->cl_hostname); + case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery + * in nfs4_exchange_id */ + default: + return; } set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); } diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 216cea5db0aa..87694ca86914 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -47,6 +47,7 @@ struct nfs_client { #ifdef CONFIG_NFS_V4 u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; /* Clientid verifier */ unsigned long cl_state; spinlock_t cl_lock; -- cgit v1.2.3 From 7494d00c7b826b6ceb79ec33892bd0ef59be5614 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 24 Apr 2011 14:28:45 -0400 Subject: SUNRPC: Allow RPC calls to return ETIMEDOUT instead of EIO On occasion, it is useful for the NFS layer to distinguish between soft timeouts and other EIO errors due to (say) encoding errors, or authentication errors. The following patch ensures that the default behaviour of the RPC layer remains to return EIO on soft timeouts (until we have audited all the callers). Signed-off-by: Trond Myklebust --- include/linux/sunrpc/sched.h | 3 ++- net/sunrpc/clnt.c | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 3b94f804b852..f73c482ec9c6 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -128,12 +128,13 @@ struct rpc_task_setup { #define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_TASK_SOFTCONN 0x0400 /* Fail if can't connect */ #define RPC_TASK_SENT 0x0800 /* message was sent */ +#define RPC_TASK_TIMEOUT 0x1000 /* fail with ETIMEDOUT on timeout */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) -#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) +#define RPC_IS_SOFT(t) ((t)->tk_flags & (RPC_TASK_SOFT|RPC_TASK_TIMEOUT)) #define RPC_IS_SOFTCONN(t) ((t)->tk_flags & RPC_TASK_SOFTCONN) #define RPC_WAS_SENT(t) ((t)->tk_flags & RPC_TASK_SENT) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index e7a96e478f63..8d83f9d48713 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1508,7 +1508,10 @@ call_timeout(struct rpc_task *task) if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); - rpc_exit(task, -EIO); + if (task->tk_flags & RPC_TASK_TIMEOUT) + rpc_exit(task, -ETIMEDOUT); + else + rpc_exit(task, -EIO); return; } -- cgit v1.2.3 From bf26c018490c2fce7fe9b629083b96ce0e6ad019 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 7 Apr 2011 16:53:20 +0200 Subject: ptrace: Prepare to fix racy accesses on task breakpoints When a task is traced and is in a stopped state, the tracer may execute a ptrace request to examine the tracee state and get its task struct. Right after, the tracee can be killed and thus its breakpoints released. This can happen concurrently when the tracer is in the middle of reading or modifying these breakpoints, leading to dereferencing a freed pointer. Hence, to prepare the fix, create a generic breakpoint reference holding API. When a reference on the breakpoints of a task is held, the breakpoints won't be released until the last reference is dropped. After that, no more ptrace request on the task's breakpoints can be serviced for the tracer. Reported-by: Oleg Nesterov Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Will Deacon Cc: Prasad Cc: Paul Mundt Cc: v2.6.33.. Link: http://lkml.kernel.org/r/1302284067-7860-2-git-send-email-fweisbec@gmail.com --- include/linux/ptrace.h | 13 ++++++++++++- include/linux/sched.h | 3 +++ kernel/exit.c | 2 +- kernel/ptrace.c | 17 +++++++++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a1147e5dd245..9178d5cc0b01 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -189,6 +189,10 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) child->ptrace = current->ptrace; __ptrace_link(child, current->parent); } + +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_set(&child->ptrace_bp_refcnt, 1); +#endif } /** @@ -350,6 +354,13 @@ extern int task_current_syscall(struct task_struct *target, long *callno, unsigned long args[6], unsigned int maxargs, unsigned long *sp, unsigned long *pc); -#endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT +extern int ptrace_get_breakpoints(struct task_struct *tsk); +extern void ptrace_put_breakpoints(struct task_struct *tsk); +#else +static inline void ptrace_put_breakpoints(struct task_struct *tsk) { } +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ + +#endif /* __KERNEL */ #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 18d63cea2848..781abd137673 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1537,6 +1537,9 @@ struct task_struct { unsigned long memsw_nr_pages; /* uncharged mem+swap usage */ } memcg_batch; #endif +#ifdef CONFIG_HAVE_HW_BREAKPOINT + atomic_t ptrace_bp_refcnt; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/kernel/exit.c b/kernel/exit.c index f5d2f63bae0b..8dd874181542 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1016,7 +1016,7 @@ NORET_TYPE void do_exit(long code) /* * FIXME: do that only when needed, using sched_exit tracepoint */ - flush_ptrace_hw_breakpoint(tsk); + ptrace_put_breakpoints(tsk); exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 0fc1eed28d27..dc7ab65f3b36 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include #include +#include /* @@ -879,3 +880,19 @@ asmlinkage long compat_sys_ptrace(compat_long_t request, compat_long_t pid, return ret; } #endif /* CONFIG_COMPAT */ + +#ifdef CONFIG_HAVE_HW_BREAKPOINT +int ptrace_get_breakpoints(struct task_struct *tsk) +{ + if (atomic_inc_not_zero(&tsk->ptrace_bp_refcnt)) + return 0; + + return -1; +} + +void ptrace_put_breakpoints(struct task_struct *tsk) +{ + if (atomic_dec_and_test(&tsk->ptrace_bp_refcnt)) + flush_ptrace_hw_breakpoint(tsk); +} +#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -- cgit v1.2.3 From 3dd2ee4824b668a635d6d2bb6bc73f33708cab9f Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Apr 2011 18:10:58 -0700 Subject: bit_spinlock: don't play preemption games inside the busy loop When we are waiting for the bit-lock to be released, and are looping over the 'cpu_relax()' should not be doing anything else - otherwise we miss the point of trying to do the whole 'cpu_relax()'. Do the preemption enable/disable around the loop, rather than inside of it. Noticed when I was looking at the code generation for the dcache __d_drop usage, and the code just looked very odd. Signed-off-by: Linus Torvalds --- include/linux/bit_spinlock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h index e612575a2596..b4326bfa684f 100644 --- a/include/linux/bit_spinlock.h +++ b/include/linux/bit_spinlock.h @@ -23,11 +23,11 @@ static inline void bit_spin_lock(int bitnum, unsigned long *addr) preempt_disable(); #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) while (unlikely(test_and_set_bit_lock(bitnum, addr))) { - while (test_bit(bitnum, addr)) { - preempt_enable(); + preempt_enable(); + do { cpu_relax(); - preempt_disable(); - } + } while (test_bit(bitnum, addr)); + preempt_disable(); } #endif __acquire(bitlock); -- cgit v1.2.3 From 1879fd6a26571fd4e8e1f4bb3e7537bc936b1fe7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 25 Apr 2011 14:01:36 -0400 Subject: add hlist_bl_lock/unlock helpers Now that the whole dcache_hash_bucket crap is gone, go all the way and also remove the weird locking layering violations for locking the hash buckets. Add hlist_bl_lock/unlock helpers to move the locking into the list abstraction instead of requiring each caller to open code it. After all allowing for the bit locks is the whole point of these helpers over the plain hlist variant. Signed-off-by: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/dcache.c | 22 ++++++---------------- fs/gfs2/glock.c | 6 ++---- include/linux/list_bl.h | 11 +++++++++++ 3 files changed, 19 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/fs/dcache.c b/fs/dcache.c index d600a0af3b2e..22a0ef41bad1 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -109,16 +109,6 @@ static inline struct hlist_bl_head *d_hash(struct dentry *parent, return dentry_hashtable + (hash & D_HASHMASK); } -static inline void spin_lock_bucket(struct hlist_bl_head *b) -{ - bit_spin_lock(0, (unsigned long *)&b->first); -} - -static inline void spin_unlock_bucket(struct hlist_bl_head *b) -{ - __bit_spin_unlock(0, (unsigned long *)&b->first); -} - /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, @@ -334,10 +324,10 @@ void __d_drop(struct dentry *dentry) else b = d_hash(dentry->d_parent, dentry->d_name.hash); - spin_lock_bucket(b); + hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); dentry->d_hash.pprev = NULL; - spin_unlock_bucket(b); + hlist_bl_unlock(b); dentry_rcuwalk_barrier(dentry); } @@ -1594,9 +1584,9 @@ struct dentry *d_obtain_alias(struct inode *inode) tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; list_add(&tmp->d_alias, &inode->i_dentry); - spin_lock_bucket(&tmp->d_sb->s_anon); + hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); - spin_unlock_bucket(&tmp->d_sb->s_anon); + hlist_bl_unlock(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); security_d_instantiate(tmp, inode); @@ -2076,10 +2066,10 @@ EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b) { BUG_ON(!d_unhashed(entry)); - spin_lock_bucket(b); + hlist_bl_lock(b); entry->d_flags |= DCACHE_RCUACCESS; hlist_bl_add_head_rcu(&entry->d_hash, b); - spin_unlock_bucket(b); + hlist_bl_unlock(b); } static void _d_rehash(struct dentry * entry) diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index f07643e21bfa..7a4fb630a320 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -93,14 +93,12 @@ static unsigned int gl_hash(const struct gfs2_sbd *sdp, static inline void spin_lock_bucket(unsigned int hash) { - struct hlist_bl_head *bl = &gl_hash_table[hash]; - bit_spin_lock(0, (unsigned long *)bl); + hlist_bl_lock(&gl_hash_table[hash]); } static inline void spin_unlock_bucket(unsigned int hash) { - struct hlist_bl_head *bl = &gl_hash_table[hash]; - __bit_spin_unlock(0, (unsigned long *)bl); + hlist_bl_unlock(&gl_hash_table[hash]); } static void gfs2_glock_dealloc(struct rcu_head *rcu) diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 5bad17d1acde..31f9d75adc5b 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -2,6 +2,7 @@ #define _LINUX_LIST_BL_H #include +#include /* * Special version of lists, where head of the list has a lock in the lowest @@ -114,6 +115,16 @@ static inline void hlist_bl_del_init(struct hlist_bl_node *n) } } +static inline void hlist_bl_lock(struct hlist_bl_head *b) +{ + bit_spin_lock(0, (unsigned long *)b); +} + +static inline void hlist_bl_unlock(struct hlist_bl_head *b) +{ + __bit_spin_unlock(0, (unsigned long *)b); +} + /** * hlist_bl_for_each_entry - iterate over list of given type * @tpos: the type * to use as a loop cursor. -- cgit v1.2.3 From 6565945b60922211c299968ba66a66617af32c9f Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 26 Apr 2011 13:27:43 -0400 Subject: drm/radeon/kms: add info query for tile pipes needed by mesa for htile setup. Signed-off-by: Alex Deucher Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/radeon_kms.c | 13 +++++++++++++ include/drm/radeon_drm.h | 1 + 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index bf7d4c061451..871df0376b1c 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -221,6 +221,19 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } break; + case RADEON_INFO_NUM_TILE_PIPES: + if (rdev->family >= CHIP_CAYMAN) + value = rdev->config.cayman.max_tile_pipes; + else if (rdev->family >= CHIP_CEDAR) + value = rdev->config.evergreen.max_tile_pipes; + else if (rdev->family >= CHIP_RV770) + value = rdev->config.rv770.max_tile_pipes; + else if (rdev->family >= CHIP_R600) + value = rdev->config.r600.max_tile_pipes; + else { + return -EINVAL; + } + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 3bce1a4fc305..7aa5dddb2098 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -909,6 +909,7 @@ struct drm_radeon_cs { #define RADEON_INFO_WANT_CMASK 0x08 /* get access to CMASK on r300 */ #define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x09 /* clock crystal frequency */ #define RADEON_INFO_NUM_BACKENDS 0x0a /* DB/backends for r600+ - need for OQ */ +#define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */ struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From e8e7a2b8ccfdae0d4cb6bd25824bbedcd42da316 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Thu, 21 Apr 2011 22:18:32 +0100 Subject: drm/i915: restore only the mode of this driver on lastclose (v2) i915 calls the panic handler function on last close to reset the modes, however this is a really bad idea for multi-gpu machines, esp shareable gpus machines. So add a new entry point for the driver to just restore its own fbcon mode. v2: move code into fb helper, fix panic code to block mode change on powered off GPUs. [airlied: this hits drm core and I wrote it and it was reviewed on intel-gfx so really I signed it off twice ;-).] Signed-off-by: Chris Wilson Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_fb_helper.c | 27 ++++++++++++++++++++------- drivers/gpu/drm/i915/i915_dma.c | 2 +- drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/gpu/drm/i915/intel_fb.c | 10 ++++++++++ include/drm/drm_fb_helper.h | 1 + 5 files changed, 33 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 950720473967..11d7a72c22d9 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -342,9 +342,22 @@ int drm_fb_helper_debug_leave(struct fb_info *info) } EXPORT_SYMBOL(drm_fb_helper_debug_leave); +bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper) +{ + bool error = false; + int i, ret; + for (i = 0; i < fb_helper->crtc_count; i++) { + struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set; + ret = drm_crtc_helper_set_config(mode_set); + if (ret) + error = true; + } + return error; +} +EXPORT_SYMBOL(drm_fb_helper_restore_fbdev_mode); + bool drm_fb_helper_force_kernel_mode(void) { - int i = 0; bool ret, error = false; struct drm_fb_helper *helper; @@ -352,12 +365,12 @@ bool drm_fb_helper_force_kernel_mode(void) return false; list_for_each_entry(helper, &kernel_fb_helper_list, kernel_fb_list) { - for (i = 0; i < helper->crtc_count; i++) { - struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set; - ret = drm_crtc_helper_set_config(mode_set); - if (ret) - error = true; - } + if (helper->dev->switch_power_state == DRM_SWITCH_POWER_OFF) + continue; + + ret = drm_fb_helper_restore_fbdev_mode(helper); + if (ret) + error = true; } return error; } diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 72730377a01b..12876f2795d2 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -2207,7 +2207,7 @@ void i915_driver_lastclose(struct drm_device * dev) drm_i915_private_t *dev_priv = dev->dev_private; if (!dev_priv || drm_core_check_feature(dev, DRIVER_MODESET)) { - drm_fb_helper_restore(); + intel_fb_restore_mode(dev); vga_switcheroo_process_delayed_switch(); return; } diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index f5b0d8306d83..1d20712d527f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -338,4 +338,5 @@ extern int intel_overlay_attrs(struct drm_device *dev, void *data, struct drm_file *file_priv); extern void intel_fb_output_poll_changed(struct drm_device *dev); +extern void intel_fb_restore_mode(struct drm_device *dev); #endif /* __INTEL_DRV_H__ */ diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 512782728e51..ec49bae73382 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -264,3 +264,13 @@ void intel_fb_output_poll_changed(struct drm_device *dev) drm_i915_private_t *dev_priv = dev->dev_private; drm_fb_helper_hotplug_event(&dev_priv->fbdev->helper); } + +void intel_fb_restore_mode(struct drm_device *dev) +{ + int ret; + drm_i915_private_t *dev_priv = dev->dev_private; + + ret = drm_fb_helper_restore_fbdev_mode(&dev_priv->fbdev->helper); + if (ret) + DRM_DEBUG("failed to restore crtc mode\n"); +} diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index f22e7fe4b6db..ade09d7b4271 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -118,6 +118,7 @@ int drm_fb_helper_setcolreg(unsigned regno, unsigned transp, struct fb_info *info); +bool drm_fb_helper_restore_fbdev_mode(struct drm_fb_helper *fb_helper); void drm_fb_helper_restore(void); void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helper, uint32_t fb_width, uint32_t fb_height); -- cgit v1.2.3 From 28331a46d88459788c8fca72dbb0415cd7f514c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 27 Apr 2011 13:47:52 -0400 Subject: NFSv4: Ensure we request the ordinary fileid when doing readdirplus When readdir() returns a directory entry for the root of a mounted filesystem, Linux follows the old convention of returning the inode number of the covered directory (despite newer versions of POSIX declaring that this is a bug). To ensure this continues to work, the NFSv4 readdir implementation requests the 'mounted-on-fileid' from the server. However, readdirplus also needs to instantiate an inode for this entry, and for that, we also need to request the real fileid as per this patch. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 31 ++++++++++++++----------------- include/linux/nfs_xdr.h | 2 ++ 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index ba952bdc4d62..7310d2ec5de8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1452,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { - uint32_t attrs[2] = {0, 0}; + uint32_t attrs[2] = { + FATTR4_WORD0_RDATTR_ERROR, + FATTR4_WORD1_MOUNTED_ON_FILEID, + }; uint32_t dircount = readdir->count >> 1; __be32 *p; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| - FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; + FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID; attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; dircount >>= 1; } - attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; - attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; - /* Switch to mounted_on_fileid if the server supports it */ - if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) - attrs[0] &= ~FATTR4_WORD0_FILEID; - else - attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; + /* Use mounted_on_fileid only if the server supports it */ + if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) + attrs[0] |= FATTR4_WORD0_FILEID; p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); *p++ = cpu_to_be32(OP_READDIR); @@ -3140,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma goto out_overflow; xdr_decode_hyper(p, fileid); bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; - ret = NFS_ATTR_FATTR_FILEID; + ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID; } dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); return ret; @@ -4002,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, { int status; umode_t fmode = 0; - uint64_t fileid; uint32_t type; status = decode_attr_type(xdr, bitmap, &type); @@ -4101,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, goto xdr_error; fattr->valid |= status; - status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); + status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid); if (status < 0) goto xdr_error; - if (status != 0 && !(fattr->valid & status)) { - fattr->fileid = fileid; - fattr->valid |= status; - } + fattr->valid |= status; xdr_error: dprintk("%s: xdr returned %d\n", __func__, -status); @@ -6411,7 +6406,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, entry->server, 1) < 0) goto out_overflow; - if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) + if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) + entry->ino = entry->fattr->mounted_on_fileid; + else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) entry->ino = entry->fattr->fileid; entry->d_type = DT_UNKNOWN; diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 78b101e487ea..890dce242639 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -50,6 +50,7 @@ struct nfs_fattr { } du; struct nfs_fsid fsid; __u64 fileid; + __u64 mounted_on_fileid; struct timespec atime; struct timespec mtime; struct timespec ctime; @@ -83,6 +84,7 @@ struct nfs_fattr { #define NFS_ATTR_FATTR_PRECHANGE (1U << 18) #define NFS_ATTR_FATTR_V4_REFERRAL (1U << 19) /* NFSv4 referral */ #define NFS_ATTR_FATTR_MOUNTPOINT (1U << 20) /* Treat as mountpoint */ +#define NFS_ATTR_FATTR_MOUNTED_ON_FILEID (1U << 21) #define NFS_ATTR_FATTR (NFS_ATTR_FATTR_TYPE \ | NFS_ATTR_FATTR_MODE \ -- cgit v1.2.3 From 26fc8775b51484d8c0a671198639c6d5ae60533e Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Fri, 15 Apr 2011 20:08:19 +0200 Subject: mmc: fix a race between card-detect rescan and clock-gate work instances Currently there is a race in the MMC core between a card-detect rescan work and the clock-gating work, scheduled from a command completion. Fix it by removing the dedicated clock-gating mutex and using the MMC standard locking mechanism instead. Signed-off-by: Guennadi Liakhovetski Cc: Simon Horman Cc: Magnus Damm Acked-by: Linus Walleij Cc: Signed-off-by: Chris Ball --- drivers/mmc/core/host.c | 9 ++++----- include/linux/mmc/host.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 461e6a17fb90..2b200c1cfbba 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) spin_unlock_irqrestore(&host->clk_lock, flags); return; } - mutex_lock(&host->clk_gate_mutex); + mmc_claim_host(host); spin_lock_irqsave(&host->clk_lock, flags); if (!host->clk_requests) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) pr_debug("%s: gated MCI clock\n", mmc_hostname(host)); } spin_unlock_irqrestore(&host->clk_lock, flags); - mutex_unlock(&host->clk_gate_mutex); + mmc_release_host(host); } /* @@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) { unsigned long flags; - mutex_lock(&host->clk_gate_mutex); + mmc_claim_host(host); spin_lock_irqsave(&host->clk_lock, flags); if (host->clk_gated) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) } host->clk_requests++; spin_unlock_irqrestore(&host->clk_lock, flags); - mutex_unlock(&host->clk_gate_mutex); + mmc_release_host(host); } /** @@ -215,7 +215,6 @@ static inline void mmc_host_clk_init(struct mmc_host *host) host->clk_gated = false; INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); spin_lock_init(&host->clk_lock); - mutex_init(&host->clk_gate_mutex); } /** diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index bcb793ec7374..eb792cb6d745 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -183,7 +183,6 @@ struct mmc_host { struct work_struct clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ - struct mutex clk_gate_mutex; /* mutex for clock gating */ #endif /* host specific block data */ -- cgit v1.2.3 From acad9853b95df6a3887f52e0ec88e4a77119ee28 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Wed, 27 Apr 2011 23:08:51 -0700 Subject: Input: wm831x-ts - allow IRQ flags to be specified This allows maximum flexibility for configuring the direct GPIO based interrupts. Signed-off-by: Mark Brown Signed-off-by: Dmitry Torokhov --- drivers/input/touchscreen/wm831x-ts.c | 16 +++++++++++++--- include/linux/mfd/wm831x/pdata.h | 2 ++ 2 files changed, 15 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/input/touchscreen/wm831x-ts.c b/drivers/input/touchscreen/wm831x-ts.c index b9373012b3e6..78e8705df20d 100644 --- a/drivers/input/touchscreen/wm831x-ts.c +++ b/drivers/input/touchscreen/wm831x-ts.c @@ -241,7 +241,7 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev) struct wm831x_pdata *core_pdata = dev_get_platdata(pdev->dev.parent); struct wm831x_touch_pdata *pdata = NULL; struct input_dev *input_dev; - int error; + int error, irqf; if (core_pdata) pdata = core_pdata->touch; @@ -314,9 +314,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev) wm831x_set_bits(wm831x, WM831X_TOUCH_CONTROL_1, WM831X_TCH_RATE_MASK, 6); + if (pdata && pdata->data_irqf) + irqf = pdata->data_irqf; + else + irqf = IRQF_TRIGGER_HIGH; + error = request_threaded_irq(wm831x_ts->data_irq, NULL, wm831x_ts_data_irq, - IRQF_ONESHOT, + irqf | IRQF_ONESHOT, "Touchscreen data", wm831x_ts); if (error) { dev_err(&pdev->dev, "Failed to request data IRQ %d: %d\n", @@ -325,9 +330,14 @@ static __devinit int wm831x_ts_probe(struct platform_device *pdev) } disable_irq(wm831x_ts->data_irq); + if (pdata && pdata->pd_irqf) + irqf = pdata->pd_irqf; + else + irqf = IRQF_TRIGGER_HIGH; + error = request_threaded_irq(wm831x_ts->pd_irq, NULL, wm831x_ts_pen_down_irq, - IRQF_ONESHOT, + irqf | IRQF_ONESHOT, "Touchscreen pen down", wm831x_ts); if (error) { dev_err(&pdev->dev, "Failed to request pen down IRQ %d: %d\n", diff --git a/include/linux/mfd/wm831x/pdata.h b/include/linux/mfd/wm831x/pdata.h index 173086d42af4..6b0eb130efb8 100644 --- a/include/linux/mfd/wm831x/pdata.h +++ b/include/linux/mfd/wm831x/pdata.h @@ -81,7 +81,9 @@ struct wm831x_touch_pdata { int rpu; /** Pen down sensitivity resistor divider */ int pressure; /** Report pressure (boolean) */ unsigned int data_irq; /** Touch data ready IRQ */ + int data_irqf; /** IRQ flags for data ready IRQ */ unsigned int pd_irq; /** Touch pendown detect IRQ */ + int pd_irqf; /** IRQ flags for pen down IRQ */ }; enum wm831x_watchdog_action { -- cgit v1.2.3 From 78f11a255749d09025f54d4e2df4fbcb031530e2 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 27 Apr 2011 15:26:45 -0700 Subject: mm: thp: fix /dev/zero MAP_PRIVATE and vm_flags cleanups The huge_memory.c THP page fault was allowed to run if vm_ops was null (which would succeed for /dev/zero MAP_PRIVATE, as the f_op->mmap wouldn't setup a special vma->vm_ops and it would fallback to regular anonymous memory) but other THP logics weren't fully activated for vmas with vm_file not NULL (/dev/zero has a not NULL vma->vm_file). So this removes the vm_file checks so that /dev/zero also can safely use THP (the other albeit safer approach to fix this bug would have been to prevent the THP initial page fault to run if vm_file was set). After removing the vm_file checks, this also makes huge_memory.c stricter in khugepaged for the DEBUG_VM=y case. It doesn't replace the vm_file check with a is_pfn_mapping check (but it keeps checking for VM_PFNMAP under VM_BUG_ON) because for a is_cow_mapping() mapping VM_PFNMAP should only be allowed to exist before the first page fault, and in turn when vma->anon_vma is null (so preventing khugepaged registration). So I tend to think the previous comment saying if vm_file was set, VM_PFNMAP might have been set and we could still be registered in khugepaged (despite anon_vma was not NULL to be registered in khugepaged) was too paranoid. The is_linear_pfn_mapping check is also I think superfluous (as described by comment) but under DEBUG_VM it is safe to stay. Addresses https://bugzilla.kernel.org/show_bug.cgi?id=33682 Signed-off-by: Andrea Arcangeli Reported-by: Caspar Zhang Acked-by: Mel Gorman Acked-by: Rik van Riel Cc: [2.6.38.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- include/linux/mm.h | 3 ++- mm/huge_memory.c | 43 ++++++++++++++++++++++++------------------- 3 files changed, 27 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index df29c8fde36b..8847c8c29791 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -117,7 +117,7 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long end, long adjust_next) { - if (!vma->anon_vma || vma->vm_ops || vma->vm_file) + if (!vma->anon_vma || vma->vm_ops) return; __vma_adjust_trans_huge(vma, start, end, adjust_next); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 692dbae6ffa7..2348db26bc3d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -137,7 +137,8 @@ extern unsigned int kobjsize(const void *objp); #define VM_RandomReadHint(v) ((v)->vm_flags & VM_RAND_READ) /* - * special vmas that are non-mergable, non-mlock()able + * Special vmas that are non-mergable, non-mlock()able. + * Note: mm/huge_memory.c VM_NO_THP depends on this definition. */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 470dcda10add..83326ad66d9b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1408,6 +1408,9 @@ out: return ret; } +#define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \ + VM_HUGETLB|VM_SHARED|VM_MAYSHARE) + int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { @@ -1416,11 +1419,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_HUGEPAGE | - VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | - VM_MIXEDMAP | VM_SAO)) + if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) return -EINVAL; *vm_flags &= ~VM_NOHUGEPAGE; *vm_flags |= VM_HUGEPAGE; @@ -1436,11 +1435,7 @@ int hugepage_madvise(struct vm_area_struct *vma, /* * Be somewhat over-protective like KSM for now! */ - if (*vm_flags & (VM_NOHUGEPAGE | - VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | - VM_MIXEDMAP | VM_SAO)) + if (*vm_flags & (VM_NOHUGEPAGE | VM_NO_THP)) return -EINVAL; *vm_flags &= ~VM_HUGEPAGE; *vm_flags |= VM_NOHUGEPAGE; @@ -1574,10 +1569,14 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) * page fault if needed. */ return 0; - if (vma->vm_file || vma->vm_ops) + if (vma->vm_ops) /* khugepaged not yet working on file or special mappings */ return 0; - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); + /* + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be + * true too, verify it here. + */ + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; if (hstart < hend) @@ -1828,12 +1827,15 @@ static void collapse_huge_page(struct mm_struct *mm, (vma->vm_flags & VM_NOHUGEPAGE)) goto out; - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ - if (!vma->anon_vma || vma->vm_ops || vma->vm_file) + if (!vma->anon_vma || vma->vm_ops) goto out; if (is_vma_temporary_stack(vma)) goto out; - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); + /* + * If is_pfn_mapping() is true is_learn_pfn_mapping() must be + * true too, verify it here. + */ + VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) @@ -2066,13 +2068,16 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, progress++; continue; } - /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ - if (!vma->anon_vma || vma->vm_ops || vma->vm_file) + if (!vma->anon_vma || vma->vm_ops) goto skip; if (is_vma_temporary_stack(vma)) goto skip; - - VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); + /* + * If is_pfn_mapping() is true is_learn_pfn_mapping() + * must be true too, verify it here. + */ + VM_BUG_ON(is_linear_pfn_mapping(vma) || + vma->vm_flags & VM_NO_THP); hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; hend = vma->vm_end & HPAGE_PMD_MASK; -- cgit v1.2.3 From 68972efa657040f891c7eda07c7da8c8dd576788 Mon Sep 17 00:00:00 2001 From: Paul Stewart Date: Thu, 28 Apr 2011 05:43:37 +0000 Subject: usbnet: Resubmit interrupt URB if device is open Resubmit interrupt URB if device is open. Use a flag set in usbnet_open() to determine this state. Also kill and free interrupt URB in usbnet_disconnect(). [Rebased off git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git] Signed-off-by: Paul Stewart Signed-off-by: David S. Miller --- drivers/net/usb/usbnet.c | 8 ++++++++ include/linux/usb/usbnet.h | 1 + 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 069c1cf0fdf7..009bba3d753e 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -736,6 +736,7 @@ int usbnet_open (struct net_device *net) } } + set_bit(EVENT_DEV_OPEN, &dev->flags); netif_start_queue (net); netif_info(dev, ifup, dev->net, "open: enable queueing (rx %d, tx %d) mtu %d %s framing\n", @@ -1259,6 +1260,9 @@ void usbnet_disconnect (struct usb_interface *intf) if (dev->driver_info->unbind) dev->driver_info->unbind (dev, intf); + usb_kill_urb(dev->interrupt); + usb_free_urb(dev->interrupt); + free_netdev(net); usb_put_dev (xdev); } @@ -1498,6 +1502,10 @@ int usbnet_resume (struct usb_interface *intf) int retval; if (!--dev->suspend_count) { + /* resume interrupt URBs */ + if (dev->interrupt && test_bit(EVENT_DEV_OPEN, &dev->flags)) + usb_submit_urb(dev->interrupt, GFP_NOIO); + spin_lock_irq(&dev->txq.lock); while ((res = usb_get_from_anchor(&dev->deferred))) { diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 0e1855079fbb..605b0aa8d852 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -68,6 +68,7 @@ struct usbnet { # define EVENT_RX_PAUSED 5 # define EVENT_DEV_WAKING 6 # define EVENT_DEV_ASLEEP 7 +# define EVENT_DEV_OPEN 8 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3 From 5d30b10bd68df007e7ae21e77d1e0ce184b53040 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Thu, 28 Apr 2011 15:55:52 -0400 Subject: flex_array: flex_array_prealloc takes a number of elements, not an end Change flex_array_prealloc to take the number of elements for which space should be allocated instead of the last (inclusive) element. Users and documentation are updated accordingly. flex_arrays got introduced before they had users. When folks started using it, they ended up needing a different API than was coded up originally. This swaps over to the API that folks apparently need. Based-on-patch-by: Steffen Klassert Signed-off-by: Eric Paris Tested-by: Chris Richards Acked-by: Dave Hansen Cc: stable@kernel.org [2.6.38+] --- Documentation/flexible-arrays.txt | 4 ++-- include/linux/flex_array.h | 2 +- lib/flex_array.c | 13 ++++++++----- security/selinux/ss/policydb.c | 6 +++--- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/Documentation/flexible-arrays.txt b/Documentation/flexible-arrays.txt index cb8a3a00cc92..df904aec9904 100644 --- a/Documentation/flexible-arrays.txt +++ b/Documentation/flexible-arrays.txt @@ -66,10 +66,10 @@ trick is to ensure that any needed memory allocations are done before entering atomic context, using: int flex_array_prealloc(struct flex_array *array, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); This function will ensure that memory for the elements indexed in the range -defined by start and end has been allocated. Thereafter, a +defined by start and nr_elements has been allocated. Thereafter, a flex_array_put() call on an element in that range is guaranteed not to block. diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 70e4efabe0fb..ebeb2f3ad068 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -61,7 +61,7 @@ struct flex_array { struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags); + unsigned int nr_elements, gfp_t flags); void flex_array_free(struct flex_array *fa); void flex_array_free_parts(struct flex_array *fa); int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, diff --git a/lib/flex_array.c b/lib/flex_array.c index c0ea40ba2082..0c33b24498ba 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -232,10 +232,10 @@ EXPORT_SYMBOL(flex_array_clear); /** * flex_array_prealloc - guarantee that array space exists - * @fa: the flex array for which to preallocate parts - * @start: index of first array element for which space is allocated - * @end: index of last (inclusive) element for which space is allocated - * @flags: page allocation flags + * @fa: the flex array for which to preallocate parts + * @start: index of first array element for which space is allocated + * @nr_elements: number of elements for which space is allocated + * @flags: page allocation flags * * This will guarantee that no future calls to flex_array_put() * will allocate memory. It can be used if you are expecting to @@ -245,13 +245,16 @@ EXPORT_SYMBOL(flex_array_clear); * Locking must be provided by the caller. */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, - unsigned int end, gfp_t flags) + unsigned int nr_elements, gfp_t flags) { int start_part; int end_part; int part_nr; + unsigned int end; struct flex_array_part *part; + end = start + nr_elements - 1; + if (start >= fa->total_nr_elements || end >= fa->total_nr_elements) return -ENOSPC; if (elements_fit_in_base(fa)) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index e7b850ad57ee..e6e7ce0d3d55 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -502,7 +502,7 @@ static int policydb_index(struct policydb *p) goto out; rc = flex_array_prealloc(p->type_val_to_struct_array, 0, - p->p_types.nprim - 1, GFP_KERNEL | __GFP_ZERO); + p->p_types.nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto out; @@ -519,7 +519,7 @@ static int policydb_index(struct policydb *p) goto out; rc = flex_array_prealloc(p->sym_val_to_name[i], - 0, p->symtab[i].nprim - 1, + 0, p->symtab[i].nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto out; @@ -2375,7 +2375,7 @@ int policydb_read(struct policydb *p, void *fp) goto bad; /* preallocate so we don't have to worry about the put ever failing */ - rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim - 1, + rc = flex_array_prealloc(p->type_attr_map_array, 0, p->p_types.nprim, GFP_KERNEL | __GFP_ZERO); if (rc) goto bad; -- cgit v1.2.3 From a6e5e2be44616c8400f9ec2f635b10f8e579217c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 1 May 2011 18:18:49 +0200 Subject: i2c-i801: Move device ID definitions to driver Move the SMBus device ID definitions of recent devices from pci_ids.h to the i2c-i801.c driver file. They don't have to be shared, as they are clearly identified and only used in this driver. In the future, such IDs will go to i2c-i801 directly. This will make adding support for new devices much faster and easier, as it will avoid cross- subsystem patch sets and merge conflicts. Signed-off-by: Jean Delvare Cc: Seth Heasley Acked-by: Jesse Barnes --- drivers/i2c/busses/i2c-i801.c | 5 +++++ include/linux/pci_ids.h | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 72c0415f6f94..455e909bc768 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -134,10 +134,15 @@ SMBHSTSTS_BUS_ERR | SMBHSTSTS_DEV_ERR | \ SMBHSTSTS_INTR) +/* Older devices have their ID defined in */ +#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 +#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 /* Patsburg also has three 'Integrated Device Function' SMBus controllers */ #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF0 0x1d70 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF1 0x1d71 #define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS_IDF2 0x1d72 +#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS 0x2330 +#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS 0x3b30 struct i801_priv { struct i2c_adapter adapter; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 4e2c9150a785..8abe8d78c4bf 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2477,15 +2477,12 @@ #define PCI_DEVICE_ID_INTEL_82840_HB 0x1a21 #define PCI_DEVICE_ID_INTEL_82845_HB 0x1a30 #define PCI_DEVICE_ID_INTEL_IOAT 0x1a38 -#define PCI_DEVICE_ID_INTEL_COUGARPOINT_SMBUS 0x1c22 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN 0x1c41 #define PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX 0x1c5f -#define PCI_DEVICE_ID_INTEL_PATSBURG_SMBUS 0x1d22 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0 0x1d40 #define PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1 0x1d41 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MIN 0x2310 #define PCI_DEVICE_ID_INTEL_DH89XXCC_LPC_MAX 0x231f -#define PCI_DEVICE_ID_INTEL_DH89XXCC_SMBUS 0x2330 #define PCI_DEVICE_ID_INTEL_82801AA_0 0x2410 #define PCI_DEVICE_ID_INTEL_82801AA_1 0x2411 #define PCI_DEVICE_ID_INTEL_82801AA_3 0x2413 @@ -2696,7 +2693,6 @@ #define PCI_DEVICE_ID_INTEL_ICH10_5 0x3a60 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN 0x3b00 #define PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX 0x3b1f -#define PCI_DEVICE_ID_INTEL_5_3400_SERIES_SMBUS 0x3b30 #define PCI_DEVICE_ID_INTEL_IOAT_SNB 0x402f #define PCI_DEVICE_ID_INTEL_5100_16 0x65f0 #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 -- cgit v1.2.3 From e2c85d8e3974c9041ad7b080846b28d2243e771b Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 May 2011 15:15:55 -0400 Subject: drm/radeon/kms: add some new pci ids Signed-off-by: Alex Deucher Cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 816e30cbd968..f04b2a3b0f49 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -155,6 +155,7 @@ {0x1002, 0x6719, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x671c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x671d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x671f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CAYMAN|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6720, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6721, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6722, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ @@ -167,6 +168,7 @@ {0x1002, 0x6729, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6738, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6739, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x673e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_BARTS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6740, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6741, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6742, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TURKS|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ @@ -199,6 +201,7 @@ {0x1002, 0x688D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6898, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x6899, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x689b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_HEMLOCK|RADEON_NEW_MEMMAP}, \ {0x1002, 0x689e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_CYPRESS|RADEON_NEW_MEMMAP}, \ @@ -209,7 +212,9 @@ {0x1002, 0x68b0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68b8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68b9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x68ba, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68be, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x68bf, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_JUNIPER|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x68c7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_REDWOOD|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ -- cgit v1.2.3 From 8aeb96f80232e9a701b5c4715504f4c9173978bd Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 3 May 2011 19:28:02 -0400 Subject: drm/radeon/kms: fix gart setup on fusion parts (v2) Out of the entire GART/VM subsystem, the hw designers changed the location of 3 regs. v2: airlied: add parameter for userspace to work from. Signed-off-by: Alex Deucher Signed-off-by: Jerome Glisse Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/radeon/evergreen.c | 17 +++++++++-------- drivers/gpu/drm/radeon/evergreend.h | 5 +++++ drivers/gpu/drm/radeon/radeon_kms.c | 3 +++ include/drm/radeon_drm.h | 1 + 4 files changed, 18 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index e9bc135d9189..c20eac3379e6 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -862,9 +862,15 @@ int evergreen_pcie_gart_enable(struct radeon_device *rdev) SYSTEM_ACCESS_MODE_NOT_IN_SYS | SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU | EFFECTIVE_L1_TLB_SIZE(5) | EFFECTIVE_L1_QUEUE_SIZE(5); - WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); - WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); - WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + if (rdev->flags & RADEON_IS_IGP) { + WREG32(FUS_MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(FUS_MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(FUS_MC_VM_MD_L1_TLB2_CNTL, tmp); + } else { + WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp); + WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp); + } WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp); WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp); @@ -2923,11 +2929,6 @@ static int evergreen_startup(struct radeon_device *rdev) rdev->asic->copy = NULL; dev_warn(rdev->dev, "failed blitter (%d) falling back to memcpy\n", r); } - /* XXX: ontario has problems blitting to gart at the moment */ - if (rdev->family == CHIP_PALM) { - rdev->asic->copy = NULL; - radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); - } /* allocate wb buffer */ r = radeon_wb_init(rdev); diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h index 9aaa3f0c9372..94533849927e 100644 --- a/drivers/gpu/drm/radeon/evergreend.h +++ b/drivers/gpu/drm/radeon/evergreend.h @@ -221,6 +221,11 @@ #define MC_VM_MD_L1_TLB0_CNTL 0x2654 #define MC_VM_MD_L1_TLB1_CNTL 0x2658 #define MC_VM_MD_L1_TLB2_CNTL 0x265C + +#define FUS_MC_VM_MD_L1_TLB0_CNTL 0x265C +#define FUS_MC_VM_MD_L1_TLB1_CNTL 0x2660 +#define FUS_MC_VM_MD_L1_TLB2_CNTL 0x2664 + #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038 #define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034 diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 871df0376b1c..bd58af658581 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -234,6 +234,9 @@ int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return -EINVAL; } break; + case RADEON_INFO_FUSION_GART_WORKING: + value = 1; + break; default: DRM_DEBUG_KMS("Invalid request %d\n", info->request); return -EINVAL; diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h index 7aa5dddb2098..787f7b6fd622 100644 --- a/include/drm/radeon_drm.h +++ b/include/drm/radeon_drm.h @@ -910,6 +910,7 @@ struct drm_radeon_cs { #define RADEON_INFO_CLOCK_CRYSTAL_FREQ 0x09 /* clock crystal frequency */ #define RADEON_INFO_NUM_BACKENDS 0x0a /* DB/backends for r600+ - need for OQ */ #define RADEON_INFO_NUM_TILE_PIPES 0x0b /* tile pipes for r600+ */ +#define RADEON_INFO_FUSION_GART_WORKING 0x0c /* fusion writes to GTT were broken before this */ struct drm_radeon_info { uint32_t request; -- cgit v1.2.3 From 30106b8ce2cc2243514116d6f29086e6deecc754 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 4 May 2011 15:38:19 +0200 Subject: slub: Fix the lockless code on 32-bit platforms with no 64-bit cmpxchg The SLUB allocator use of the cmpxchg_double logic was wrong: it actually needs the irq-safe one. That happens automatically when we use the native unlocked 'cmpxchg8b' instruction, but when compiling the kernel for older x86 CPUs that do not support that instruction, we fall back to the generic emulation code. And if you don't specify that you want the irq-safe version, the generic code ends up just open-coding the cmpxchg8b equivalent without any protection against interrupts or preemption. Which definitely doesn't work for SLUB. This was reported by Werner Landgraf , who saw instability with his distro-kernel that was compiled to support pretty much everything under the sun. Most big Linux distributions tend to compile for PPro and later, and would never have noticed this problem. This also fixes the prototypes for the irqsafe cmpxchg_double functions to use 'bool' like they should. [ Btw, that whole "generic code defaults to no protection" design just sounds stupid - if the code needs no protection, there is no reason to use "cmpxchg_double" to begin with. So we should probably just remove the unprotected version entirely as pointless. - Linus ] Signed-off-by: Thomas Gleixner Reported-and-tested-by: werner Acked-and-tested-by: Ingo Molnar Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: Jens Axboe Cc: Tejun Heo Link: http://lkml.kernel.org/r/alpine.LFD.2.02.1105041539050.3005@ionos Signed-off-by: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 2 +- mm/slub.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 3a5c4449fd36..8b97308e65df 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -948,7 +948,7 @@ do { \ irqsafe_generic_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif # define irqsafe_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_int(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + __pcpu_double_call_return_bool(irqsafe_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif #endif /* __LINUX_PERCPU_H */ diff --git a/mm/slub.c b/mm/slub.c index 94d2a33a866e..9d2e5e46bf09 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1940,7 +1940,7 @@ redo: * Since this is without lock semantics the protection is only against * code executing on this cpu *not* from access by other cpus. */ - if (unlikely(!this_cpu_cmpxchg_double( + if (unlikely(!irqsafe_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, object, tid, get_freepointer(s, object), next_tid(tid)))) { @@ -2145,7 +2145,7 @@ redo: set_freepointer(s, object, c->freelist); #ifdef CONFIG_CMPXCHG_LOCAL - if (unlikely(!this_cpu_cmpxchg_double( + if (unlikely(!irqsafe_cpu_cmpxchg_double( s->cpu_slab->freelist, s->cpu_slab->tid, c->freelist, tid, object, next_tid(tid)))) { -- cgit v1.2.3 From a3a4a5acd3bd2f6f1e102e1f1b9d2e2bb320a7fd Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 5 May 2011 23:55:18 -0400 Subject: Regression: partial revert "tracing: Remove lock_depth from event entry" This partially reverts commit e6e1e2593592a8f6f6380496655d8c6f67431266. That commit changed the structure layout of the trace structure, which in turn broke PowerTOP (1.9x generation) quite badly. I appreciate not wanting to expose the variable in question, and PowerTOP was not using it, so I've replaced the variable with just a padding field - that way if in the future a new field is needed it can just use this padding field. Signed-off-by: Arjan van de Ven Signed-off-by: Linus Torvalds --- include/linux/ftrace_event.h | 1 + kernel/trace/trace.c | 1 + kernel/trace/trace_events.c | 1 + 3 files changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index 22b32af1b5ec..b5a550a39a70 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -37,6 +37,7 @@ struct trace_entry { unsigned char flags; unsigned char preempt_count; int pid; + int padding; }; #define FTRACE_MAX_EVENT \ diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index d38c16a06a6f..1cb49be7c7fb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1110,6 +1110,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, entry->preempt_count = pc & 0xff; entry->pid = (tsk) ? tsk->pid : 0; + entry->padding = 0; entry->flags = #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) | diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e88f74fe1d4c..2fe110341359 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -116,6 +116,7 @@ static int trace_define_common_fields(void) __common_field(unsigned char, flags); __common_field(unsigned char, preempt_count); __common_field(int, pid); + __common_field(int, padding); return ret; } -- cgit v1.2.3 From 2bbd4492552867053b5a618a2474297e2b1c355d Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Fri, 6 May 2011 23:47:53 +0200 Subject: drm: mm: fix debug output The looping helper didn't do anything due to a superficial semicolon. Furthermore one of the two dump functions suffered from copy&paste fail. While staring at the code I've also noticed that the replace helper (currently unused) is a bit broken. Signed-off-by: Daniel Vetter Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_mm.c | 6 +++--- include/drm/drm_mm.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 5d00b0fc0d91..959186cbf328 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -431,7 +431,7 @@ EXPORT_SYMBOL(drm_mm_search_free_in_range); void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new) { list_replace(&old->node_list, &new->node_list); - list_replace(&old->node_list, &new->hole_stack); + list_replace(&old->hole_stack, &new->hole_stack); new->hole_follows = old->hole_follows; new->mm = old->mm; new->start = old->start; @@ -699,8 +699,8 @@ int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm) entry->size); total_used += entry->size; if (entry->hole_follows) { - hole_start = drm_mm_hole_node_start(&mm->head_node); - hole_end = drm_mm_hole_node_end(&mm->head_node); + hole_start = drm_mm_hole_node_start(entry); + hole_end = drm_mm_hole_node_end(entry); hole_size = hole_end - hole_start; seq_printf(m, "0x%08lx-0x%08lx: 0x%08lx: free\n", hole_start, hole_end, hole_size); diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index c2f93a8ae2e1..564b14aa7e16 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -86,7 +86,7 @@ static inline bool drm_mm_initialized(struct drm_mm *mm) } #define drm_mm_for_each_node(entry, mm) list_for_each_entry(entry, \ &(mm)->head_node.node_list, \ - node_list); + node_list) #define drm_mm_for_each_scanned_node_reverse(entry, n, mm) \ for (entry = (mm)->prev_scanned_node, \ next = entry ? list_entry(entry->node_list.next, \ -- cgit v1.2.3 From a09a79f66874c905af35d5bb5e5f2fdc7b6b894d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 9 May 2011 13:01:09 +0200 Subject: Don't lock guardpage if the stack is growing up Linux kernel excludes guard page when performing mlock on a VMA with down-growing stack. However, some architectures have up-growing stack and locking the guard page should be excluded in this case too. This patch fixes lvm2 on PA-RISC (and possibly other architectures with up-growing stack). lvm2 calculates number of used pages when locking and when unlocking and reports an internal error if the numbers mismatch. [ Patch changed fairly extensively to also fix /proc//maps for the grows-up case, and to move things around a bit to clean it all up and share the infrstructure with the /proc bits. Tested on ia64 that has both grow-up and grow-down segments - Linus ] Signed-off-by: Mikulas Patocka Tested-by: Tony Luck Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 12 +++++++----- include/linux/mm.h | 24 +++++++++++++++++++++++- mm/memory.c | 16 +++++++--------- 3 files changed, 37 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2e7addfd9803..318d8654989b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -214,7 +214,7 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) int flags = vma->vm_flags; unsigned long ino = 0; unsigned long long pgoff = 0; - unsigned long start; + unsigned long start, end; dev_t dev = 0; int len; @@ -227,13 +227,15 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; - if (vma->vm_flags & VM_GROWSDOWN) - if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) - start += PAGE_SIZE; + if (stack_guard_page_start(vma, start)) + start += PAGE_SIZE; + end = vma->vm_end; + if (stack_guard_page_end(vma, end)) + end -= PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, - vma->vm_end, + end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', diff --git a/include/linux/mm.h b/include/linux/mm.h index 2348db26bc3d..6507dde38b16 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1011,11 +1011,33 @@ int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); /* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) { return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); } +static inline int stack_guard_page_start(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSDOWN) && + (vma->vm_start == addr) && + !vma_growsdown(vma->vm_prev, addr); +} + +/* Is the vma a continuation of the stack vma below it? */ +static inline int vma_growsup(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_start == addr) && (vma->vm_flags & VM_GROWSUP); +} + +static inline int stack_guard_page_end(struct vm_area_struct *vma, + unsigned long addr) +{ + return (vma->vm_flags & VM_GROWSUP) && + (vma->vm_end == addr) && + !vma_growsup(vma->vm_next, addr); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/mm/memory.c b/mm/memory.c index 27f425378112..61e66f026563 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1412,9 +1412,8 @@ no_page_table: static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { - return (vma->vm_flags & VM_GROWSDOWN) && - (vma->vm_start == addr) && - !vma_stack_continue(vma->vm_prev, addr); + return stack_guard_page_start(vma, addr) || + stack_guard_page_end(vma, addr+PAGE_SIZE); } /** @@ -1551,12 +1550,6 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, continue; } - /* - * For mlock, just skip the stack guard page. - */ - if ((gup_flags & FOLL_MLOCK) && stack_guard_page(vma, start)) - goto next_page; - do { struct page *page; unsigned int foll_flags = gup_flags; @@ -1573,6 +1566,11 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, int ret; unsigned int fault_flags = 0; + /* For mlock, just skip the stack guard page. */ + if (foll_flags & FOLL_MLOCK) { + if (stack_guard_page(vma, start)) + goto next_page; + } if (foll_flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; if (nonblocking) -- cgit v1.2.3 From 7a4f0761fce32ff4918a7c23b08db564ad33092d Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 3 May 2011 22:09:31 +0200 Subject: IPVS: init and cleanup restructuring DESCRIPTION This patch tries to restore the initial init and cleanup sequences that was before namspace patch. Netns also requires action when net devices unregister which has never been implemented. I.e this patch also covers when a device moves into a network namespace, and has to be released. IMPLEMENTATION The number of calls to register_pernet_device have been reduced to one for the ip_vs.ko Schedulers still have their own calls. This patch adds a function __ip_vs_service_cleanup() and an enable flag for the netfilter hooks. The nf hooks will be enabled when the first service is loaded and never disabled again, except when a namespace exit starts. Signed-off-by: Hans Schillstrom Acked-by: Julian Anastasov [horms@verge.net.au: minor edit to changelog] Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 ++++++ net/netfilter/ipvs/ip_vs_app.c | 15 +---- net/netfilter/ipvs/ip_vs_conn.c | 12 +--- net/netfilter/ipvs/ip_vs_core.c | 101 +++++++++++++++++++++++++++++--- net/netfilter/ipvs/ip_vs_ctl.c | 120 ++++++++++++++++++++++++++++++++------- net/netfilter/ipvs/ip_vs_est.c | 14 +---- net/netfilter/ipvs/ip_vs_proto.c | 11 +--- net/netfilter/ipvs/ip_vs_sync.c | 13 +---- 8 files changed, 223 insertions(+), 80 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d516f00c8e0f..86aefed6140b 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -791,6 +791,7 @@ struct ip_vs_app { /* IPVS in network namespace */ struct netns_ipvs { int gen; /* Generation */ + int enable; /* enable like nf_hooks do */ /* * Hash table: for real service lookups */ @@ -1089,6 +1090,22 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp) atomic_inc(&ctl_cp->n_control); } +/* + * IPVS netns init & cleanup functions + */ +extern int __ip_vs_estimator_init(struct net *net); +extern int __ip_vs_control_init(struct net *net); +extern int __ip_vs_protocol_init(struct net *net); +extern int __ip_vs_app_init(struct net *net); +extern int __ip_vs_conn_init(struct net *net); +extern int __ip_vs_sync_init(struct net *net); +extern void __ip_vs_conn_cleanup(struct net *net); +extern void __ip_vs_app_cleanup(struct net *net); +extern void __ip_vs_protocol_cleanup(struct net *net); +extern void __ip_vs_control_cleanup(struct net *net); +extern void __ip_vs_estimator_cleanup(struct net *net); +extern void __ip_vs_sync_cleanup(struct net *net); +extern void __ip_vs_service_cleanup(struct net *net); /* * IPVS application functions diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 2dc6de13ac18..51f3af7c4743 100644 --- a/net/netfilter/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c @@ -576,7 +576,7 @@ static const struct file_operations ip_vs_app_fops = { }; #endif -static int __net_init __ip_vs_app_init(struct net *net) +int __net_init __ip_vs_app_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -585,26 +585,17 @@ static int __net_init __ip_vs_app_init(struct net *net) return 0; } -static void __net_exit __ip_vs_app_cleanup(struct net *net) +void __net_exit __ip_vs_app_cleanup(struct net *net) { proc_net_remove(net, "ip_vs_app"); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_app_init, - .exit = __ip_vs_app_cleanup, -}; - int __init ip_vs_app_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_app_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index c97bd45975be..d3fd91bbba49 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -1258,22 +1258,17 @@ int __net_init __ip_vs_conn_init(struct net *net) return 0; } -static void __net_exit __ip_vs_conn_cleanup(struct net *net) +void __net_exit __ip_vs_conn_cleanup(struct net *net) { /* flush all the connection entries first */ ip_vs_conn_flush(net); proc_net_remove(net, "ip_vs_conn"); proc_net_remove(net, "ip_vs_conn_sync"); } -static struct pernet_operations ipvs_conn_ops = { - .init = __ip_vs_conn_init, - .exit = __ip_vs_conn_cleanup, -}; int __init ip_vs_conn_init(void) { int idx; - int retc; /* Compute size and mask */ ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; @@ -1309,17 +1304,14 @@ int __init ip_vs_conn_init(void) rwlock_init(&__ip_vs_conntbl_lock_array[idx].l); } - retc = register_pernet_subsys(&ipvs_conn_ops); - /* calculate the random value for connection hash */ get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return retc; + return 0; } void ip_vs_conn_cleanup(void) { - unregister_pernet_subsys(&ipvs_conn_ops); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); vfree(ip_vs_conn_tab); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a0791dc05a27..a74dae6c5dbc 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1113,6 +1113,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) return NF_ACCEPT; net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1343,6 +1346,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return NF_ACCEPT; /* The packet looks wrong, ignore */ net = skb_net(skb); + pd = ip_vs_proto_data_get(net, cih->protocol); if (!pd) return NF_ACCEPT; @@ -1529,6 +1533,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) IP_VS_DBG_ADDR(af, &iph.daddr), hooknum); return NF_ACCEPT; } + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* Bad... Do not break raw sockets */ @@ -1562,7 +1571,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } - net = skb_net(skb); /* Protocol supported? */ pd = ip_vs_proto_data_get(net, iph.protocol); if (unlikely(!pd)) @@ -1588,7 +1596,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) } IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); - net = skb_net(skb); ipvs = net_ipvs(net); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { @@ -1743,10 +1750,16 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ip_hdr(skb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp(skb, &r, hooknum); } @@ -1757,10 +1770,16 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { int r; + struct net *net; if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) return NF_ACCEPT; + /* ipvs enabled in this netns ? */ + net = skb_net(skb); + if (!net_ipvs(net)->enable) + return NF_ACCEPT; + return ip_vs_in_icmp_v6(skb, &r, hooknum); } #endif @@ -1884,21 +1903,72 @@ static int __net_init __ip_vs_init(struct net *net) pr_err("%s(): no memory.\n", __func__); return -ENOMEM; } + /* Hold the beast until a service is registerd */ + ipvs->enable = 0; ipvs->net = net; /* Counters used for creating unique names */ ipvs->gen = atomic_read(&ipvs_netns_cnt); atomic_inc(&ipvs_netns_cnt); net->ipvs = ipvs; + + if (__ip_vs_estimator_init(net) < 0) + goto estimator_fail; + + if (__ip_vs_control_init(net) < 0) + goto control_fail; + + if (__ip_vs_protocol_init(net) < 0) + goto protocol_fail; + + if (__ip_vs_app_init(net) < 0) + goto app_fail; + + if (__ip_vs_conn_init(net) < 0) + goto conn_fail; + + if (__ip_vs_sync_init(net) < 0) + goto sync_fail; + printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n", sizeof(struct netns_ipvs), ipvs->gen); return 0; +/* + * Error handling + */ + +sync_fail: + __ip_vs_conn_cleanup(net); +conn_fail: + __ip_vs_app_cleanup(net); +app_fail: + __ip_vs_protocol_cleanup(net); +protocol_fail: + __ip_vs_control_cleanup(net); +control_fail: + __ip_vs_estimator_cleanup(net); +estimator_fail: + return -ENOMEM; } static void __net_exit __ip_vs_cleanup(struct net *net) { + __ip_vs_service_cleanup(net); /* ip_vs_flush() with locks */ + __ip_vs_conn_cleanup(net); + __ip_vs_app_cleanup(net); + __ip_vs_protocol_cleanup(net); + __ip_vs_control_cleanup(net); + __ip_vs_estimator_cleanup(net); IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen); } +static void __net_exit __ip_vs_dev_cleanup(struct net *net) +{ + EnterFunction(2); + net_ipvs(net)->enable = 0; /* Disable packet reception */ + __ip_vs_sync_cleanup(net); + LeaveFunction(2); +} + static struct pernet_operations ipvs_core_ops = { .init = __ip_vs_init, .exit = __ip_vs_cleanup, @@ -1906,6 +1976,10 @@ static struct pernet_operations ipvs_core_ops = { .size = sizeof(struct netns_ipvs), }; +static struct pernet_operations ipvs_core_dev_ops = { + .exit = __ip_vs_dev_cleanup, +}; + /* * Initialize IP Virtual Server */ @@ -1913,10 +1987,6 @@ static int __init ip_vs_init(void) { int ret; - ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ - if (ret < 0) - return ret; - ip_vs_estimator_init(); ret = ip_vs_control_init(); if (ret < 0) { @@ -1944,15 +2014,28 @@ static int __init ip_vs_init(void) goto cleanup_conn; } + ret = register_pernet_subsys(&ipvs_core_ops); /* Alloc ip_vs struct */ + if (ret < 0) + goto cleanup_sync; + + ret = register_pernet_device(&ipvs_core_dev_ops); + if (ret < 0) + goto cleanup_sub; + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); if (ret < 0) { pr_err("can't register hooks.\n"); - goto cleanup_sync; + goto cleanup_dev; } pr_info("ipvs loaded.\n"); + return ret; +cleanup_dev: + unregister_pernet_device(&ipvs_core_dev_ops); +cleanup_sub: + unregister_pernet_subsys(&ipvs_core_ops); cleanup_sync: ip_vs_sync_cleanup(); cleanup_conn: @@ -1964,20 +2047,20 @@ cleanup_sync: ip_vs_control_cleanup(); cleanup_estimator: ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ return ret; } static void __exit ip_vs_cleanup(void) { nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops)); + unregister_pernet_device(&ipvs_core_dev_ops); + unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ ip_vs_sync_cleanup(); ip_vs_conn_cleanup(); ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); ip_vs_estimator_cleanup(); - unregister_pernet_subsys(&ipvs_core_ops); /* free ip_vs struct */ pr_info("ipvs unloaded.\n"); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ae47090bf45f..ea722810faf3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -69,6 +69,11 @@ int ip_vs_get_debug_level(void) } #endif + +/* Protos */ +static void __ip_vs_del_service(struct ip_vs_service *svc); + + #ifdef CONFIG_IP_VS_IPV6 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ static int __ip_vs_addr_is_local_v6(struct net *net, @@ -1214,6 +1219,8 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u, write_unlock_bh(&__ip_vs_svc_lock); *svc_p = svc; + /* Now there is a service - full throttle */ + ipvs->enable = 1; return 0; @@ -1472,6 +1479,84 @@ static int ip_vs_flush(struct net *net) return 0; } +/* + * Delete service by {netns} in the service table. + * Called by __ip_vs_cleanup() + */ +void __ip_vs_service_cleanup(struct net *net) +{ + EnterFunction(2); + /* Check for "full" addressed entries */ + mutex_lock(&__ip_vs_mutex); + ip_vs_flush(net); + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); +} +/* + * Release dst hold by dst_cache + */ +static inline void +__ip_vs_dev_reset(struct ip_vs_dest *dest, struct net_device *dev) +{ + spin_lock_bh(&dest->dst_lock); + if (dest->dst_cache && dest->dst_cache->dev == dev) { + IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", + dev->name, + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + ip_vs_dst_reset(dest); + } + spin_unlock_bh(&dest->dst_lock); + +} +/* + * Netdev event receiver + * Currently only NETDEV_UNREGISTER is handled, i.e. if we hold a reference to + * a device that is "unregister" it must be released. + */ +static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct net *net = dev_net(dev); + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + unsigned int idx; + + if (event != NETDEV_UNREGISTER) + return NOTIFY_DONE; + IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); + EnterFunction(2); + mutex_lock(&__ip_vs_mutex); + for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { + list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + } + + list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + if (net_eq(svc->net, net)) { + list_for_each_entry(dest, &svc->destinations, + n_list) { + __ip_vs_dev_reset(dest, dev); + } + } + + } + } + + list_for_each_entry(dest, &net_ipvs(net)->dest_trash, n_list) { + __ip_vs_dev_reset(dest, dev); + } + mutex_unlock(&__ip_vs_mutex); + LeaveFunction(2); + return NOTIFY_DONE; +} /* * Zero counters in a service or all services @@ -3588,6 +3673,10 @@ void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { } #endif +static struct notifier_block ip_vs_dst_notifier = { + .notifier_call = ip_vs_dst_event, +}; + int __net_init __ip_vs_control_init(struct net *net) { int idx; @@ -3626,7 +3715,7 @@ err: return -ENOMEM; } -static void __net_exit __ip_vs_control_cleanup(struct net *net) +void __net_exit __ip_vs_control_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -3639,11 +3728,6 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net) free_percpu(ipvs->tot_stats.cpustats); } -static struct pernet_operations ipvs_control_ops = { - .init = __ip_vs_control_init, - .exit = __ip_vs_control_cleanup, -}; - int __init ip_vs_control_init(void) { int idx; @@ -3657,33 +3741,32 @@ int __init ip_vs_control_init(void) INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); } - ret = register_pernet_subsys(&ipvs_control_ops); - if (ret) { - pr_err("cannot register namespace.\n"); - goto err; - } - smp_wmb(); /* Do we really need it now ? */ ret = nf_register_sockopt(&ip_vs_sockopts); if (ret) { pr_err("cannot register sockopt.\n"); - goto err_net; + goto err_sock; } ret = ip_vs_genl_register(); if (ret) { pr_err("cannot register Generic Netlink interface.\n"); - nf_unregister_sockopt(&ip_vs_sockopts); - goto err_net; + goto err_genl; } + ret = register_netdevice_notifier(&ip_vs_dst_notifier); + if (ret < 0) + goto err_notf; + LeaveFunction(2); return 0; -err_net: - unregister_pernet_subsys(&ipvs_control_ops); -err: +err_notf: + ip_vs_genl_unregister(); +err_genl: + nf_unregister_sockopt(&ip_vs_sockopts); +err_sock: return ret; } @@ -3691,7 +3774,6 @@ err: void ip_vs_control_cleanup(void) { EnterFunction(2); - unregister_pernet_subsys(&ipvs_control_ops); ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 8c8766ca56ad..508cce98777c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -192,7 +192,7 @@ void ip_vs_read_estimator(struct ip_vs_stats_user *dst, dst->outbps = (e->outbps + 0xF) >> 5; } -static int __net_init __ip_vs_estimator_init(struct net *net) +int __net_init __ip_vs_estimator_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -203,24 +203,16 @@ static int __net_init __ip_vs_estimator_init(struct net *net) return 0; } -static void __net_exit __ip_vs_estimator_exit(struct net *net) +void __net_exit __ip_vs_estimator_cleanup(struct net *net) { del_timer_sync(&net_ipvs(net)->est_timer); } -static struct pernet_operations ip_vs_app_ops = { - .init = __ip_vs_estimator_init, - .exit = __ip_vs_estimator_exit, -}; int __init ip_vs_estimator_init(void) { - int rv; - - rv = register_pernet_subsys(&ip_vs_app_ops); - return rv; + return 0; } void ip_vs_estimator_cleanup(void) { - unregister_pernet_subsys(&ip_vs_app_ops); } diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 17484a4416ef..eb86028536fc 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -316,7 +316,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, /* * per network name-space init */ -static int __net_init __ip_vs_protocol_init(struct net *net) +int __net_init __ip_vs_protocol_init(struct net *net) { #ifdef CONFIG_IP_VS_PROTO_TCP register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp); @@ -336,7 +336,7 @@ static int __net_init __ip_vs_protocol_init(struct net *net) return 0; } -static void __net_exit __ip_vs_protocol_cleanup(struct net *net) +void __net_exit __ip_vs_protocol_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_proto_data *pd; @@ -349,11 +349,6 @@ static void __net_exit __ip_vs_protocol_cleanup(struct net *net) } } -static struct pernet_operations ipvs_proto_ops = { - .init = __ip_vs_protocol_init, - .exit = __ip_vs_protocol_cleanup, -}; - int __init ip_vs_protocol_init(void) { char protocols[64]; @@ -382,7 +377,6 @@ int __init ip_vs_protocol_init(void) REGISTER_PROTOCOL(&ip_vs_protocol_esp); #endif pr_info("Registered protocols (%s)\n", &protocols[2]); - return register_pernet_subsys(&ipvs_proto_ops); return 0; } @@ -393,7 +387,6 @@ void ip_vs_protocol_cleanup(void) struct ip_vs_protocol *pp; int i; - unregister_pernet_subsys(&ipvs_proto_ops); /* unregister all the ipvs protocols */ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { while ((pp = ip_vs_proto_table[i]) != NULL) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 0cce95310820..e292e5bddc70 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1663,7 +1663,7 @@ int stop_sync_thread(struct net *net, int state) /* * Initialize data struct for each netns */ -static int __net_init __ip_vs_sync_init(struct net *net) +int __net_init __ip_vs_sync_init(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); @@ -1677,7 +1677,7 @@ static int __net_init __ip_vs_sync_init(struct net *net) return 0; } -static void __ip_vs_sync_cleanup(struct net *net) +void __ip_vs_sync_cleanup(struct net *net) { int retc; @@ -1690,18 +1690,11 @@ static void __ip_vs_sync_cleanup(struct net *net) pr_err("Failed to stop Backup Daemon\n"); } -static struct pernet_operations ipvs_sync_ops = { - .init = __ip_vs_sync_init, - .exit = __ip_vs_sync_cleanup, -}; - - int __init ip_vs_sync_init(void) { - return register_pernet_device(&ipvs_sync_ops); + return 0; } void ip_vs_sync_cleanup(void) { - unregister_pernet_device(&ipvs_sync_ops); } -- cgit v1.2.3 From 43a4dea4c9d44baae38ddc14b9b6d86fde4c8b88 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 9 May 2011 19:36:38 +0000 Subject: xfrm: Assign the inner mode output function to the dst entry As it is, we assign the outer modes output function to the dst entry when we create the xfrm bundle. This leads to two problems on interfamily scenarios. We might insert ipv4 packets into ip6_fragment when called from xfrm6_output. The system crashes if we try to fragment an ipv4 packet with ip6_fragment. This issue was introduced with git commit ad0081e4 (ipv6: Fragment locally generated tunnel-mode IPSec6 packets as needed). The second issue is, that we might insert ipv4 packets in netfilter6 and vice versa on interfamily scenarios. With this patch we assign the inner mode output function to the dst entry when we create the xfrm bundle. So xfrm4_output/xfrm6_output from the inner mode is used and the right fragmentation and netfilter functions are called. We switch then to outer mode with the output_finish functions. Signed-off-by: Steffen Klassert Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_output.c | 8 ++++++-- net/ipv4/xfrm4_state.c | 1 + net/ipv6/xfrm6_output.c | 6 +++--- net/ipv6/xfrm6_state.c | 1 + net/xfrm/xfrm_policy.c | 14 +++++++++++++- 6 files changed, 27 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6ae4bc5ce8a7..20afeaa39395 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -324,6 +324,7 @@ struct xfrm_state_afinfo { int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); int (*output)(struct sk_buff *skb); + int (*output_finish)(struct sk_buff *skb); int (*extract_input)(struct xfrm_state *x, struct sk_buff *skb); int (*extract_output)(struct xfrm_state *x, @@ -1454,6 +1455,7 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi) extern int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); +extern int xfrm4_output_finish(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_extract_header(struct sk_buff *skb); @@ -1470,6 +1472,7 @@ extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_output_finish(struct sk_buff *skb); extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 571aa96a175c..2d51840e53a1 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -static int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER if (!skb_dst(skb)->xfrm) { @@ -86,7 +86,11 @@ static int xfrm4_output_finish(struct sk_buff *skb) int xfrm4_output(struct sk_buff *skb) { + struct dst_entry *dst = skb_dst(skb); + struct xfrm_state *x = dst->xfrm; + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, - NULL, skb_dst(skb)->dev, xfrm4_output_finish, + NULL, dst->dev, + x->outer_mode->afinfo->output_finish, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 1717c64628d1..805d63ef4340 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -78,6 +78,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_tempsel = __xfrm4_init_tempsel, .init_temprop = xfrm4_init_temprop, .output = xfrm4_output, + .output_finish = xfrm4_output_finish, .extract_input = xfrm4_extract_input, .extract_output = xfrm4_extract_output, .transport_finish = xfrm4_transport_finish, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 8e688b3de9ab..49a91c5f5623 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -79,7 +79,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -static int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sk_buff *skb) { #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; @@ -97,9 +97,9 @@ static int __xfrm6_output(struct sk_buff *skb) if ((x && x->props.mode == XFRM_MODE_TUNNEL) && ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, xfrm6_output_finish); + return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); } - return xfrm6_output_finish(skb); + return x->outer_mode->afinfo->output_finish(skb); } int xfrm6_output(struct sk_buff *skb) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index afe941e9415c..248f0b2a7ee9 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -178,6 +178,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, .output = xfrm6_output, + .output_finish = xfrm6_output_finish, .extract_input = xfrm6_extract_input, .extract_output = xfrm6_extract_output, .transport_finish = xfrm6_transport_finish, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 15792d8b6272..b4d745ea8ee1 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1406,6 +1406,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; + struct xfrm_mode *inner_mode; struct dst_entry *dst_prev = NULL; struct dst_entry *dst0 = NULL; int i = 0; @@ -1436,6 +1437,17 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (xfrm[i]->sel.family == AF_UNSPEC) { + inner_mode = xfrm_ip2inner_mode(xfrm[i], + xfrm_af2proto(family)); + if (!inner_mode) { + err = -EAFNOSUPPORT; + dst_release(dst); + goto put_states; + } + } else + inner_mode = xfrm[i]->inner_mode; + if (!dst_prev) dst0 = dst1; else { @@ -1464,7 +1476,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, dst1->lastuse = now; dst1->input = dst_discard; - dst1->output = xfrm[i]->outer_mode->afinfo->output; + dst1->output = inner_mode->afinfo->output; dst1->next = dst_prev; dst_prev = dst1; -- cgit v1.2.3 From 8f389a99b652aab5b42297280bd94d95933ad12f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 11 May 2011 15:13:32 -0700 Subject: mm: use alloc_bootmem_node_nopanic() on really needed path Stefan found nobootmem does not work on his system that has only 8M of RAM. This causes an early panic: BIOS-provided physical RAM map: BIOS-88: 0000000000000000 - 000000000009f000 (usable) BIOS-88: 0000000000100000 - 0000000000840000 (usable) bootconsole [earlyser0] enabled Notice: NX (Execute Disable) protection missing in CPU or disabled in BIOS! DMI not present or invalid. last_pfn = 0x840 max_arch_pfn = 0x100000 init_memory_mapping: 0000000000000000-0000000000840000 8MB LOWMEM available. mapped low ram: 0 - 00840000 low ram: 0 - 00840000 Zone PFN ranges: DMA 0x00000001 -> 0x00001000 Normal empty Movable zone start PFN for each node early_node_map[2] active PFN ranges 0: 0x00000001 -> 0x0000009f 0: 0x00000100 -> 0x00000840 BUG: Int 6: CR2 (null) EDI c034663c ESI (null) EBP c0329f38 ESP c0329ef4 EBX c0346380 EDX 00000006 ECX ffffffff EAX fffffff4 err (null) EIP c0353191 CS c0320060 flg 00010082 Stack: (null) c030c533 000007cd (null) c030c533 00000001 (null) (null) 00000003 0000083f 00000018 00000002 00000002 c0329f6c c03534d6 (null) (null) 00000100 00000840 (null) c0329f64 00000001 00001000 (null) Pid: 0, comm: swapper Not tainted 2.6.36 #5 Call Trace: [] ? 0xc02e3707 [] 0xc035e6e5 [] ? 0xc0353191 [] 0xc03534d6 [] 0xc034f1cd [] 0xc034a824 [] ? 0xc03513cb [] 0xc0349432 [] 0xc0349066 It turns out that we should ignore the low limit of 16M. Use alloc_bootmem_node_nopanic() in this case. [akpm@linux-foundation.org: less mess] Signed-off-by: Yinghai LU Reported-by: Stefan Hellermann Tested-by: Stefan Hellermann Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: [2.6.34+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 2 ++ mm/page_alloc.c | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b8613e806aa9..01eca1794e14 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -111,6 +111,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, __alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_node_nopanic(pgdat, x) \ + __alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node(pgdat, x) \ __alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages_node_nopanic(pgdat, x) \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9f8a97b9a350..454191a25173 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3564,7 +3564,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages) if (!slab_is_available()) { zone->wait_table = (wait_queue_head_t *) - alloc_bootmem_node(pgdat, alloc_size); + alloc_bootmem_node_nopanic(pgdat, alloc_size); } else { /* * This case means that a zone whose size was 0 gets new memory @@ -4141,7 +4141,8 @@ static void __init setup_usemap(struct pglist_data *pgdat, unsigned long usemapsize = usemap_size(zonesize); zone->pageblock_flags = NULL; if (usemapsize) - zone->pageblock_flags = alloc_bootmem_node(pgdat, usemapsize); + zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat, + usemapsize); } #else static inline void setup_usemap(struct pglist_data *pgdat, @@ -4307,7 +4308,7 @@ static void __init_refok alloc_node_mem_map(struct pglist_data *pgdat) size = (end - start) * sizeof(struct page); map = alloc_remap(pgdat->node_id, size); if (!map) - map = alloc_bootmem_node(pgdat, size); + map = alloc_bootmem_node_nopanic(pgdat, size); pgdat->node_mem_map = map + (pgdat->node_start_pfn - start); } #ifndef CONFIG_NEED_MULTIPLE_NODES -- cgit v1.2.3 From ee85c2e1454603ebb9f8d87223ac79dcdc87fa32 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 11 May 2011 15:13:34 -0700 Subject: mm: add alloc_pages_exact_nid() Add a alloc_pages_exact_nid() that allocates on a specific node. The naming is quite broken, but fixing that would need a larger renaming action. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: tweak comment] Signed-off-by: Andi Kleen Cc: Michal Hocko Cc: Balbir Singh Cc: KOSAKI Motohiro Cc: Dave Hansen Cc: David Rientjes Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 ++ mm/page_alloc.c | 49 +++++++++++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index bfb8f934521e..56d8fc87fbbc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -353,6 +353,8 @@ extern unsigned long get_zeroed_page(gfp_t gfp_mask); void *alloc_pages_exact(size_t size, gfp_t gfp_mask); void free_pages_exact(void *virt, size_t size); +/* This is different from alloc_pages_exact_node !!! */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); #define __get_free_page(gfp_mask) \ __get_free_pages((gfp_mask), 0) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 454191a25173..570d944daeb5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2317,6 +2317,21 @@ void free_pages(unsigned long addr, unsigned int order) EXPORT_SYMBOL(free_pages); +static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size) +{ + if (addr) { + unsigned long alloc_end = addr + (PAGE_SIZE << order); + unsigned long used = addr + PAGE_ALIGN(size); + + split_page(virt_to_page((void *)addr), order); + while (used < alloc_end) { + free_page(used); + used += PAGE_SIZE; + } + } + return (void *)addr; +} + /** * alloc_pages_exact - allocate an exact number physically-contiguous pages. * @size: the number of bytes to allocate @@ -2336,21 +2351,31 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) unsigned long addr; addr = __get_free_pages(gfp_mask, order); - if (addr) { - unsigned long alloc_end = addr + (PAGE_SIZE << order); - unsigned long used = addr + PAGE_ALIGN(size); - - split_page(virt_to_page((void *)addr), order); - while (used < alloc_end) { - free_page(used); - used += PAGE_SIZE; - } - } - - return (void *)addr; + return make_alloc_exact(addr, order, size); } EXPORT_SYMBOL(alloc_pages_exact); +/** + * alloc_pages_exact_nid - allocate an exact number of physically-contiguous + * pages on a node. + * @size: the number of bytes to allocate + * @gfp_mask: GFP flags for the allocation + * + * Like alloc_pages_exact(), but try to allocate on node nid first before falling + * back. + * Note this is not alloc_pages_exact_node() which allocates on a specific node, + * but is not exact. + */ +void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +{ + unsigned order = get_order(size); + struct page *p = alloc_pages_node(nid, gfp_mask, order); + if (!p) + return NULL; + return make_alloc_exact((unsigned long)page_address(p), order, size); +} +EXPORT_SYMBOL(alloc_pages_exact_nid); + /** * free_pages_exact - release memory allocated via alloc_pages_exact() * @virt: the value returned by alloc_pages_exact. -- cgit v1.2.3 From 1d929b7a84438ad9012c5826f5617d79a3efcef1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 11 May 2011 15:13:39 -0700 Subject: mm: tracing: add missing GFP flags to tracing include/linux/gfp.h and include/trace/events/gfpflags.h are out of sync. When tracing is enabled, certain flags are not recognised and the text output is less useful as a result. Add the missing flags. Signed-off-by: Mel Gorman Cc: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/gfpflags.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index e3615c093741..9fe3a36646e9 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -10,6 +10,7 @@ */ #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", \ + {(unsigned long)GFP_TRANSHUGE, "GFP_TRANSHUGE"}, \ {(unsigned long)GFP_HIGHUSER_MOVABLE, "GFP_HIGHUSER_MOVABLE"}, \ {(unsigned long)GFP_HIGHUSER, "GFP_HIGHUSER"}, \ {(unsigned long)GFP_USER, "GFP_USER"}, \ @@ -32,6 +33,9 @@ {(unsigned long)__GFP_HARDWALL, "GFP_HARDWALL"}, \ {(unsigned long)__GFP_THISNODE, "GFP_THISNODE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ - {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"} \ + {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ + {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ + {(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \ + {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ ) : "GFP_NOWAIT" -- cgit v1.2.3 From a75b9df9d3bfc3cd1083974c045ae31ce5f3434f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 May 2011 18:00:51 -0400 Subject: NFSv4.1: Ensure that layoutget uses the correct gfp modes Currently, writebacks may end up recursing back into the filesystem due to GFP_KERNEL direct reclaims in the pnfs subsystem. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4filelayout.c | 25 ++++++++++++++----------- fs/nfs/nfs4filelayout.h | 2 +- fs/nfs/nfs4filelayoutdev.c | 34 +++++++++++++++++----------------- fs/nfs/pnfs.c | 33 +++++++++++++++++++-------------- fs/nfs/pnfs.h | 6 +++--- fs/nfs/read.c | 4 ++-- fs/nfs/write.c | 4 ++-- include/linux/nfs_xdr.h | 1 + 8 files changed, 59 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index 7841ea603c91..be79dc9f386d 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -418,7 +418,8 @@ static int filelayout_check_layout(struct pnfs_layout_hdr *lo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; @@ -441,7 +442,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, /* find and reference the deviceid */ dsaddr = nfs4_fl_find_get_deviceid(id); if (dsaddr == NULL) { - dsaddr = get_device_info(lo->plh_inode, id); + dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); if (dsaddr == NULL) goto out; } @@ -502,7 +503,8 @@ static int filelayout_decode_layout(struct pnfs_layout_hdr *flo, struct nfs4_filelayout_segment *fl, struct nfs4_layoutget_res *lgr, - struct nfs4_deviceid *id) + struct nfs4_deviceid *id, + gfp_t gfp_flags) { struct xdr_stream stream; struct xdr_buf buf = { @@ -518,7 +520,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, dprintk("%s: set_layout_map Begin\n", __func__); - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) return -ENOMEM; @@ -556,13 +558,13 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo, goto out_err; fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), - GFP_KERNEL); + gfp_flags); if (!fl->fh_array) goto out_err; for (i = 0; i < fl->num_fh; i++) { /* Do we want to use a mempool here? */ - fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); + fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags); if (!fl->fh_array[i]) goto out_err_free; @@ -607,19 +609,20 @@ filelayout_free_lseg(struct pnfs_layout_segment *lseg) static struct pnfs_layout_segment * filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, - struct nfs4_layoutget_res *lgr) + struct nfs4_layoutget_res *lgr, + gfp_t gfp_flags) { struct nfs4_filelayout_segment *fl; int rc; struct nfs4_deviceid id; dprintk("--> %s\n", __func__); - fl = kzalloc(sizeof(*fl), GFP_KERNEL); + fl = kzalloc(sizeof(*fl), gfp_flags); if (!fl) return NULL; - rc = filelayout_decode_layout(layoutid, fl, lgr, &id); - if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { + rc = filelayout_decode_layout(layoutid, fl, lgr, &id, gfp_flags); + if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id, gfp_flags)) { _filelayout_free_lseg(fl); return NULL; } @@ -635,7 +638,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, int size = (fl->stripe_type == STRIPE_SPARSE) ? fl->dsaddr->ds_num : fl->dsaddr->stripe_count; - fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL); + fl->commit_buckets = kcalloc(size, sizeof(struct list_head), gfp_flags); if (!fl->commit_buckets) { filelayout_free_lseg(&fl->generic_hdr); return NULL; diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index 7c44579f5832..2b461d77b43a 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -104,6 +104,6 @@ extern struct nfs4_file_layout_dsaddr * nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id); extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); #endif /* FS_NFS_NFS4FILELAYOUT_H */ diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index de5350f2b249..db07c7af1395 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -225,11 +225,11 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) } static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) +nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) { struct nfs4_pnfs_ds *tmp_ds, *ds; - ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); + ds = kzalloc(sizeof(*tmp_ds), gfp_flags); if (!ds) goto out; @@ -261,7 +261,7 @@ out: * Currently only support ipv4, and one multi-path address. */ static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) +decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) { struct nfs4_pnfs_ds *ds = NULL; char *buf; @@ -303,7 +303,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) rlen); goto out_err; } - buf = kmalloc(rlen + 1, GFP_KERNEL); + buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); goto out_err; @@ -333,7 +333,7 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode) sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port); + ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); dprintk("%s: Decoded address and port %s\n", __func__, buf); out_free: kfree(buf); @@ -343,7 +343,7 @@ out_err: /* Decode opaque device data and return the result */ static struct nfs4_file_layout_dsaddr* -decode_device(struct inode *ino, struct pnfs_device *pdev) +decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) { int i; u32 cnt, num; @@ -362,7 +362,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) struct page *scratch; /* set up xdr stream */ - scratch = alloc_page(GFP_KERNEL); + scratch = alloc_page(gfp_flags); if (!scratch) goto out_err; @@ -384,7 +384,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) } /* read stripe indices */ - stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL); + stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags); if (!stripe_indices) goto out_err_free_scratch; @@ -423,7 +423,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) dsaddr = kzalloc(sizeof(*dsaddr) + (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), - GFP_KERNEL); + gfp_flags); if (!dsaddr) goto out_err_free_stripe_indices; @@ -452,7 +452,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev) for (j = 0; j < mp_count; j++) { if (j == 0) { dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino); + ino, gfp_flags); if (dsaddr->ds_list[i] == NULL) goto out_err_free_deviceid; } else { @@ -503,12 +503,12 @@ out_err: * available devices. */ static struct nfs4_file_layout_dsaddr * -decode_and_add_device(struct inode *inode, struct pnfs_device *dev) +decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_flags) { struct nfs4_file_layout_dsaddr *d, *new; long hash; - new = decode_device(inode, dev); + new = decode_device(inode, dev, gfp_flags); if (!new) { printk(KERN_WARNING "%s: Could not decode or add device\n", __func__); @@ -537,7 +537,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev) * of available devices, and return it. */ struct nfs4_file_layout_dsaddr * -get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) +get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) { struct pnfs_device *pdev = NULL; u32 max_resp_sz; @@ -556,17 +556,17 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) dprintk("%s inode %p max_resp_sz %u max_pages %d\n", __func__, inode, max_resp_sz, max_pages); - pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pnfs_device), gfp_flags); if (pdev == NULL) return NULL; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (pages == NULL) { kfree(pdev); return NULL; } for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_free; } @@ -587,7 +587,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) * Found new device, need to decode it and then add it to the * list of known devices for this mountpoint. */ - dsaddr = decode_and_add_device(inode, pdev); + dsaddr = decode_and_add_device(inode, pdev, gfp_flags); out_free: for (i = 0; i < max_pages; i++) __free_page(pages[i]); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 65455f58b109..f57f5281a520 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -467,7 +467,8 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, static struct pnfs_layout_segment * send_layoutget(struct pnfs_layout_hdr *lo, struct nfs_open_context *ctx, - u32 iomode) + u32 iomode, + gfp_t gfp_flags) { struct inode *ino = lo->plh_inode; struct nfs_server *server = NFS_SERVER(ino); @@ -480,7 +481,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); BUG_ON(ctx == NULL); - lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); + lgp = kzalloc(sizeof(*lgp), gfp_flags); if (lgp == NULL) return NULL; @@ -488,12 +489,12 @@ send_layoutget(struct pnfs_layout_hdr *lo, max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; max_pages = max_resp_sz >> PAGE_SHIFT; - pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); + pages = kzalloc(max_pages * sizeof(struct page *), gfp_flags); if (!pages) goto out_err_free; for (i = 0; i < max_pages; i++) { - pages[i] = alloc_page(GFP_KERNEL); + pages[i] = alloc_page(gfp_flags); if (!pages[i]) goto out_err_free; } @@ -509,6 +510,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, lgp->args.layout.pages = pages; lgp->args.layout.pglen = max_pages * PAGE_SIZE; lgp->lsegpp = &lseg; + lgp->gfp_flags = gfp_flags; /* Synchronously retrieve layout information from server and * store in lseg. @@ -666,11 +668,11 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo, } static struct pnfs_layout_hdr * -alloc_init_layout_hdr(struct inode *ino) +alloc_init_layout_hdr(struct inode *ino, gfp_t gfp_flags) { struct pnfs_layout_hdr *lo; - lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); + lo = kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags); if (!lo) return NULL; atomic_set(&lo->plh_refcount, 1); @@ -682,7 +684,7 @@ alloc_init_layout_hdr(struct inode *ino) } static struct pnfs_layout_hdr * -pnfs_find_alloc_layout(struct inode *ino) +pnfs_find_alloc_layout(struct inode *ino, gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct pnfs_layout_hdr *new = NULL; @@ -697,7 +699,7 @@ pnfs_find_alloc_layout(struct inode *ino) return nfsi->layout; } spin_unlock(&ino->i_lock); - new = alloc_init_layout_hdr(ino); + new = alloc_init_layout_hdr(ino, gfp_flags); spin_lock(&ino->i_lock); if (likely(nfsi->layout == NULL)) /* Won the race? */ @@ -757,7 +759,8 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode) struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode iomode) + enum pnfs_iomode iomode, + gfp_t gfp_flags) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; @@ -768,7 +771,7 @@ pnfs_update_layout(struct inode *ino, if (!pnfs_enabled_sb(NFS_SERVER(ino))) return NULL; spin_lock(&ino->i_lock); - lo = pnfs_find_alloc_layout(ino); + lo = pnfs_find_alloc_layout(ino, gfp_flags); if (lo == NULL) { dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); goto out_unlock; @@ -808,7 +811,7 @@ pnfs_update_layout(struct inode *ino, spin_unlock(&clp->cl_lock); } - lseg = send_layoutget(lo, ctx, iomode); + lseg = send_layoutget(lo, ctx, iomode, gfp_flags); if (!lseg && first) { spin_lock(&clp->cl_lock); list_del_init(&lo->plh_layouts); @@ -847,7 +850,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) goto out; } /* Inject layout blob into I/O device driver */ - lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); + lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res, lgp->gfp_flags); if (!lseg || IS_ERR(lseg)) { if (!lseg) status = -ENOMEM; @@ -900,7 +903,8 @@ static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_READ); + IOMODE_READ, + GFP_KERNEL); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } @@ -922,7 +926,8 @@ static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio, /* This is first coelesce call for a series of nfs_pages */ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, prev->wb_context, - IOMODE_RW); + IOMODE_RW, + GFP_NOFS); } return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index bc4827202e7a..0c015bad9e7a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -70,7 +70,7 @@ struct pnfs_layoutdriver_type { const u32 id; const char *name; struct module *owner; - struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); + struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr, gfp_t gfp_flags); void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ @@ -126,7 +126,7 @@ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type); + enum pnfs_iomode access_type, gfp_t gfp_flags); void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, @@ -245,7 +245,7 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) static inline struct pnfs_layout_segment * pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - enum pnfs_iomode access_type) + enum pnfs_iomode access_type, gfp_t gfp_flags) { return NULL; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 7cded2b12a05..2bcf0dc306a1 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -288,7 +288,7 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -351,7 +351,7 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ, GFP_KERNEL); ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 0, lseg); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3bd5d7e80f6c..49c715b4ac92 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -939,7 +939,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) atomic_set(&req->wb_complete, requests); BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); ClearPageError(page); offset = 0; nbytes = desc->pg_count; @@ -1013,7 +1013,7 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) } req = nfs_list_entry(data->pages.next); if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW); + lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 890dce242639..7e371f7df9c4 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -233,6 +233,7 @@ struct nfs4_layoutget { struct nfs4_layoutget_args args; struct nfs4_layoutget_res res; struct pnfs_layout_segment **lsegpp; + gfp_t gfp_flags; }; struct nfs4_getdeviceinfo_args { -- cgit v1.2.3 From 698b368275c3fa98261159253cfc79653f9dffc6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 11 May 2011 14:49:36 -0700 Subject: fbcon: add lifetime refcount to opened frame buffers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This just adds the refcount and the new registration lock logic. It does not (for example) actually change the read/write/ioctl routines to actually use the frame buffer that was opened: those function still end up alway susing whatever the current frame buffer is at the time of the call. Without this, if something holds the frame buffer open over a framebuffer switch, the close() operation after the switch will access a fb_info that has been free'd by the unregistering of the old frame buffer. (The read/write/ioctl operations will normally not cause problems, because they will - illogically - pick up the new fbcon instead. But a switch that happens just as one of those is going on might see problems too, the window is just much smaller: one individual op rather than the whole open-close sequence.) This use-after-free is apparently fairly easily triggered by the Ubuntu 11.04 boot sequence. Acked-by: Tim Gardner Tested-by: Daniel J Blueman Tested-by: Anca Emanuel Cc: Bruno PrĂ©mont Cc: Alan Cox Cc: Paul Mundt Cc: Dave Airlie Cc: Andy Whitcroft Signed-off-by: Linus Torvalds --- drivers/video/fbmem.c | 56 ++++++++++++++++++++++++++++++++++++++++++--------- include/linux/fb.h | 1 + 2 files changed, 47 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index e0c2284924b6..eec14d2ca1c7 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -42,9 +42,34 @@ #define FBPIXMAPSIZE (1024 * 8) +static DEFINE_MUTEX(registration_lock); struct fb_info *registered_fb[FB_MAX] __read_mostly; int num_registered_fb __read_mostly; +static struct fb_info *get_fb_info(unsigned int idx) +{ + struct fb_info *fb_info; + + if (idx >= FB_MAX) + return ERR_PTR(-ENODEV); + + mutex_lock(®istration_lock); + fb_info = registered_fb[idx]; + if (fb_info) + atomic_inc(&fb_info->count); + mutex_unlock(®istration_lock); + + return fb_info; +} + +static void put_fb_info(struct fb_info *fb_info) +{ + if (!atomic_dec_and_test(&fb_info->count)) + return; + if (fb_info->fbops->fb_destroy) + fb_info->fbops->fb_destroy(fb_info); +} + int lock_fb_info(struct fb_info *info) { mutex_lock(&info->lock); @@ -647,6 +672,7 @@ int fb_show_logo(struct fb_info *info, int rotate) { return 0; } static void *fb_seq_start(struct seq_file *m, loff_t *pos) { + mutex_lock(®istration_lock); return (*pos < FB_MAX) ? pos : NULL; } @@ -658,6 +684,7 @@ static void *fb_seq_next(struct seq_file *m, void *v, loff_t *pos) static void fb_seq_stop(struct seq_file *m, void *v) { + mutex_unlock(®istration_lock); } static int fb_seq_show(struct seq_file *m, void *v) @@ -1361,14 +1388,16 @@ __releases(&info->lock) struct fb_info *info; int res = 0; - if (fbidx >= FB_MAX) - return -ENODEV; - info = registered_fb[fbidx]; - if (!info) + info = get_fb_info(fbidx); + if (!info) { request_module("fb%d", fbidx); - info = registered_fb[fbidx]; - if (!info) - return -ENODEV; + info = get_fb_info(fbidx); + if (!info) + return -ENODEV; + } + if (IS_ERR(info)) + return PTR_ERR(info); + mutex_lock(&info->lock); if (!try_module_get(info->fbops->owner)) { res = -ENODEV; @@ -1386,6 +1415,8 @@ __releases(&info->lock) #endif out: mutex_unlock(&info->lock); + if (res) + put_fb_info(info); return res; } @@ -1401,6 +1432,7 @@ __releases(&info->lock) info->fbops->fb_release(info,1); module_put(info->fbops->owner); mutex_unlock(&info->lock); + put_fb_info(info); return 0; } @@ -1542,11 +1574,13 @@ register_framebuffer(struct fb_info *fb_info) remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id, fb_is_primary_device(fb_info)); + mutex_lock(®istration_lock); num_registered_fb++; for (i = 0 ; i < FB_MAX; i++) if (!registered_fb[i]) break; fb_info->node = i; + atomic_set(&fb_info->count, 1); mutex_init(&fb_info->lock); mutex_init(&fb_info->mm_lock); @@ -1583,6 +1617,7 @@ register_framebuffer(struct fb_info *fb_info) fb_var_to_videomode(&mode, &fb_info->var); fb_add_videomode(&mode, &fb_info->modelist); registered_fb[i] = fb_info; + mutex_unlock(®istration_lock); event.info = fb_info; if (!lock_fb_info(fb_info)) @@ -1616,6 +1651,7 @@ unregister_framebuffer(struct fb_info *fb_info) struct fb_event event; int i, ret = 0; + mutex_lock(®istration_lock); i = fb_info->node; if (!registered_fb[i]) { ret = -EINVAL; @@ -1638,7 +1674,7 @@ unregister_framebuffer(struct fb_info *fb_info) (fb_info->pixmap.flags & FB_PIXMAP_DEFAULT)) kfree(fb_info->pixmap.addr); fb_destroy_modelist(&fb_info->modelist); - registered_fb[i]=NULL; + registered_fb[i] = NULL; num_registered_fb--; fb_cleanup_device(fb_info); device_destroy(fb_class, MKDEV(FB_MAJOR, i)); @@ -1646,9 +1682,9 @@ unregister_framebuffer(struct fb_info *fb_info) fb_notifier_call_chain(FB_EVENT_FB_UNREGISTERED, &event); /* this may free fb info */ - if (fb_info->fbops->fb_destroy) - fb_info->fbops->fb_destroy(fb_info); + put_fb_info(fb_info); done: + mutex_unlock(®istration_lock); return ret; } diff --git a/include/linux/fb.h b/include/linux/fb.h index df728c1c29ed..6a8274877171 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -832,6 +832,7 @@ struct fb_tile_ops { #define FBINFO_CAN_FORCE_OUTPUT 0x200000 struct fb_info { + atomic_t count; int node; int flags; struct mutex lock; /* Lock for open/release/ioctl funcs */ -- cgit v1.2.3 From ca06707022d6ba4744198a8ebbe4994786b0c613 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Fri, 6 May 2011 23:44:46 +0000 Subject: ipv6: restore correct ECN handling on TCP xmit Since commit e9df2e8fd8fbc9 (Use appropriate sock tclass setting for routing lookup) we lost ability to properly add ECN codemarks to ipv6 TCP frames. It seems like TCP_ECN_send() calls INET_ECN_xmit(), which only sets the ECN bit in the IPv4 ToS field (inet_sk(sk)->tos), but after the patch, what's checked is inet6_sk(sk)->tclass, which is a completely different field. Close bug https://bugzilla.kernel.org/show_bug.cgi?id=34322 [Eric Dumazet] : added the INET_ECN_dontxmit() fix and replace macros by inline functions for clarity. Signed-off-by: Steinar H. Gunderson Signed-off-by: Eric Dumazet Cc: YOSHIFUJI Hideaki Cc: Andrew Morton Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 88bdd010d65d..2fa8d1341a0a 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -38,9 +38,19 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) return outer; } -#define INET_ECN_xmit(sk) do { inet_sk(sk)->tos |= INET_ECN_ECT_0; } while (0) -#define INET_ECN_dontxmit(sk) \ - do { inet_sk(sk)->tos &= ~INET_ECN_MASK; } while (0) +static inline void INET_ECN_xmit(struct sock *sk) +{ + inet_sk(sk)->tos |= INET_ECN_ECT_0; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + +static inline void INET_ECN_dontxmit(struct sock *sk) +{ + inet_sk(sk)->tos &= ~INET_ECN_MASK; + if (inet6_sk(sk) != NULL) + inet6_sk(sk)->tclass &= ~INET_ECN_MASK; +} #define IP6_ECN_flow_init(label) do { \ (label) &= ~htonl(INET_ECN_MASK << 20); \ -- cgit v1.2.3 From 47a150edc2ae734c0f4bf50aa19499e23b9a46f8 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Fri, 13 May 2011 04:27:54 +0100 Subject: Cache user_ns in struct cred If !CONFIG_USERNS, have current_user_ns() defined to (&init_user_ns). Get rid of _current_user_ns. This requires nsown_capable() to be defined in capability.c rather than as static inline in capability.h, so do that. Request_key needs init_user_ns defined at current_user_ns if !CONFIG_USERNS, so forward-declare that in cred.h if !CONFIG_USERNS at current_user_ns() define. Compile-tested with and without CONFIG_USERNS. Signed-off-by: Serge E. Hallyn [ This makes a huge performance difference for acl_permission_check(), up to 30%. And that is one of the hottest kernel functions for loads that are pathname-lookup heavy. ] Signed-off-by: Linus Torvalds --- include/linux/capability.h | 13 +------------ include/linux/cred.h | 10 ++++++++-- kernel/capability.c | 12 ++++++++++++ kernel/cred.c | 12 ++++++------ 4 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/linux/capability.h b/include/linux/capability.h index 16ee8b49a200..d4675af963fa 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -546,18 +546,7 @@ extern bool has_capability_noaudit(struct task_struct *t, int cap); extern bool capable(int cap); extern bool ns_capable(struct user_namespace *ns, int cap); extern bool task_ns_capable(struct task_struct *t, int cap); - -/** - * nsown_capable - Check superior capability to one's own user_ns - * @cap: The capability in question - * - * Return true if the current task has the given superior capability - * targeted at its own user namespace. - */ -static inline bool nsown_capable(int cap) -{ - return ns_capable(current_user_ns(), cap); -} +extern bool nsown_capable(int cap); /* audit system wants to get cap info from files as well */ extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps); diff --git a/include/linux/cred.h b/include/linux/cred.h index 9aeeb0ba2003..be16b61283cc 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -146,6 +146,7 @@ struct cred { void *security; /* subjective LSM security */ #endif struct user_struct *user; /* real user ID subscription */ + struct user_namespace *user_ns; /* cached user->user_ns */ struct group_info *group_info; /* supplementary groups for euid/fsgid */ struct rcu_head rcu; /* RCU deletion hook */ }; @@ -354,10 +355,15 @@ static inline void put_cred(const struct cred *_cred) #define current_fsgid() (current_cred_xxx(fsgid)) #define current_cap() (current_cred_xxx(cap_effective)) #define current_user() (current_cred_xxx(user)) -#define _current_user_ns() (current_cred_xxx(user)->user_ns) #define current_security() (current_cred_xxx(security)) -extern struct user_namespace *current_user_ns(void); +#ifdef CONFIG_USER_NS +#define current_user_ns() (current_cred_xxx(user_ns)) +#else +extern struct user_namespace init_user_ns; +#define current_user_ns() (&init_user_ns) +#endif + #define current_uid_gid(_uid, _gid) \ do { \ diff --git a/kernel/capability.c b/kernel/capability.c index bf0c734d0c12..32a80e08ff4b 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -399,3 +399,15 @@ bool task_ns_capable(struct task_struct *t, int cap) return ns_capable(task_cred_xxx(t, user)->user_ns, cap); } EXPORT_SYMBOL(task_ns_capable); + +/** + * nsown_capable - Check superior capability to one's own user_ns + * @cap: The capability in question + * + * Return true if the current task has the given superior capability + * targeted at its own user namespace. + */ +bool nsown_capable(int cap) +{ + return ns_capable(current_user_ns(), cap); +} diff --git a/kernel/cred.c b/kernel/cred.c index 5557b55048df..8093c16b84b1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -54,6 +54,7 @@ struct cred init_cred = { .cap_effective = CAP_INIT_EFF_SET, .cap_bset = CAP_INIT_BSET, .user = INIT_USER, + .user_ns = &init_user_ns, .group_info = &init_groups, #ifdef CONFIG_KEYS .tgcred = &init_tgcred, @@ -410,6 +411,11 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) goto error_put; } + /* cache user_ns in cred. Doesn't need a refcount because it will + * stay pinned by cred->user + */ + new->user_ns = new->user->user_ns; + #ifdef CONFIG_KEYS /* new threads get their own thread keyrings if their parent already * had one */ @@ -741,12 +747,6 @@ int set_create_files_as(struct cred *new, struct inode *inode) } EXPORT_SYMBOL(set_create_files_as); -struct user_namespace *current_user_ns(void) -{ - return _current_user_ns(); -} -EXPORT_SYMBOL(current_user_ns); - #ifdef CONFIG_DEBUG_CREDENTIALS bool creds_are_invalid(const struct cred *cred) -- cgit v1.2.3 From a10e14667635dde504ed9e7ee851494c2cf2ae8e Mon Sep 17 00:00:00 2001 From: Vitalii Demianets Date: Thu, 12 May 2011 23:04:29 +0000 Subject: bonding,llc: Fix structure sizeof incompatibility for some PDUs With some combinations of arch/compiler (e.g. arm-linux-gcc) the sizeof operator on structure returns value greater than expected. In cases when the structure is used for mapping PDU fields it may lead to unexpected results (such as holes and alignment problems in skb data). __packed prevents this undesired behavior. Signed-off-by: Vitalii Demianets Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.h | 10 +++++----- include/net/llc_pdu.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index b28baff70864..01b8a6af275b 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -39,7 +39,7 @@ typedef struct mac_addr { u8 mac_addr_value[ETH_ALEN]; -} mac_addr_t; +} __packed mac_addr_t; enum { BOND_AD_STABLE = 0, @@ -134,12 +134,12 @@ typedef struct lacpdu { u8 tlv_type_terminator; // = terminator u8 terminator_length; // = 0 u8 reserved_50[50]; // = 0 -} lacpdu_t; +} __packed lacpdu_t; typedef struct lacpdu_header { struct ethhdr hdr; struct lacpdu lacpdu; -} lacpdu_header_t; +} __packed lacpdu_header_t; // Marker Protocol Data Unit(PDU) structure(43.5.3.2 in the 802.3ad standard) typedef struct bond_marker { @@ -155,12 +155,12 @@ typedef struct bond_marker { u8 tlv_type_terminator; // = 0x00 u8 terminator_length; // = 0x00 u8 reserved_90[90]; // = 0 -} bond_marker_t; +} __packed bond_marker_t; typedef struct bond_marker_header { struct ethhdr hdr; struct bond_marker marker; -} bond_marker_header_t; +} __packed bond_marker_header_t; #pragma pack() diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h index 75b8e2968c9b..f57e7d46a453 100644 --- a/include/net/llc_pdu.h +++ b/include/net/llc_pdu.h @@ -199,7 +199,7 @@ struct llc_pdu_sn { u8 ssap; u8 ctrl_1; u8 ctrl_2; -}; +} __packed; static inline struct llc_pdu_sn *llc_pdu_sn_hdr(struct sk_buff *skb) { @@ -211,7 +211,7 @@ struct llc_pdu_un { u8 dsap; u8 ssap; u8 ctrl_1; -}; +} __packed; static inline struct llc_pdu_un *llc_pdu_un_hdr(struct sk_buff *skb) { @@ -359,7 +359,7 @@ struct llc_xid_info { u8 fmt_id; /* always 0x81 for LLC */ u8 type; /* different if NULL/non-NULL LSAP */ u8 rw; /* sender receive window */ -}; +} __packed; /** * llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID @@ -415,7 +415,7 @@ struct llc_frmr_info { u8 curr_ssv; /* current send state variable val */ u8 curr_rsv; /* current receive state variable */ u8 ind_bits; /* indicator bits set with macro */ -}; +} __packed; extern void llc_pdu_set_cmd_rsp(struct sk_buff *skb, u8 type); extern void llc_pdu_set_pf_bit(struct sk_buff *skb, u8 bit_value); -- cgit v1.2.3 From e1e8fb6a1ff3f9487e03a4cbf85b81d1316068ce Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Fri, 15 Apr 2011 03:02:49 +0000 Subject: fs: remove FS_COW_FL FS_COW_FL and FS_NOCOW_FL were newly introduced to control per file COW in btrfs, but FS_NOCOW_FL is sufficient. The fact is we don't have corresponding BTRFS_INODE_COW flag. COW is default, and FS_NOCOW_FL can be used to switch off COW for a single file. If we mount btrfs with nodatacow, a newly created file will be set with the FS_NOCOW_FL flag. So to turn on COW for it, we can just clear the FS_NOCOW_FL flag. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 15 ++++++--------- include/linux/fs.h | 1 - 2 files changed, 6 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index f580a3a5d2fc..3240dd90da42 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -144,16 +144,13 @@ static int check_flags(unsigned int flags) if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ FS_NOATIME_FL | FS_NODUMP_FL | \ FS_SYNC_FL | FS_DIRSYNC_FL | \ - FS_NOCOMP_FL | FS_COMPR_FL | \ - FS_NOCOW_FL | FS_COW_FL)) + FS_NOCOMP_FL | FS_COMPR_FL | + FS_NOCOW_FL)) return -EOPNOTSUPP; if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) return -EINVAL; - if ((flags & FS_NOCOW_FL) && (flags & FS_COW_FL)) - return -EINVAL; - return 0; } @@ -218,6 +215,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags |= BTRFS_INODE_DIRSYNC; else ip->flags &= ~BTRFS_INODE_DIRSYNC; + if (flags & FS_NOCOW_FL) + ip->flags |= BTRFS_INODE_NODATACOW; + else + ip->flags &= ~BTRFS_INODE_NODATACOW; /* * The COMPRESS flag can only be changed by users, while the NOCOMPRESS @@ -231,10 +232,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) ip->flags |= BTRFS_INODE_COMPRESS; ip->flags &= ~BTRFS_INODE_NOCOMPRESS; } - if (flags & FS_NOCOW_FL) - ip->flags |= BTRFS_INODE_NODATACOW; - else if (flags & FS_COW_FL) - ip->flags &= ~BTRFS_INODE_NODATACOW; trans = btrfs_join_transaction(root, 1); BUG_ON(IS_ERR(trans)); diff --git a/include/linux/fs.h b/include/linux/fs.h index de9dd8119b71..56a41412903d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -365,7 +365,6 @@ struct inodes_stat_t { #define FS_EXTENT_FL 0x00080000 /* Extents */ #define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ #define FS_NOCOW_FL 0x00800000 /* Do not cow file */ -#define FS_COW_FL 0x02000000 /* Cow file */ #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ -- cgit v1.2.3 From 752d2635ebb12b6122ba05775f7d1ccfef14b275 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 Apr 2011 11:03:57 +0100 Subject: drm: Take lock around probes for drm_fb_helper_hotplug_event We need to hold the dev->mode_config.mutex whilst detecting the output status. But we also need to drop it for the call into drm_fb_helper_single_fb_probe(), which indirectly acquires the lock when attaching the fbcon. Failure to do so exposes a race with normal output probing. Detected by adding some warnings that the mutex is held to the backend detect routines: [ 17.772456] WARNING: at drivers/gpu/drm/i915/intel_crt.c:471 intel_crt_detect+0x3e/0x373 [i915]() [ 17.772458] Hardware name: Latitude E6400 [ 17.772460] Modules linked in: .... [ 17.772582] Pid: 11, comm: kworker/0:1 Tainted: G W 2.6.38.4-custom.2 #8 [ 17.772584] Call Trace: [ 17.772591] [] ? warn_slowpath_common+0x78/0x8c [ 17.772603] [] ? intel_crt_detect+0x3e/0x373 [i915] [ 17.772612] [] ? drm_helper_probe_single_connector_modes+0xbf/0x2af [drm_kms_helper] [ 17.772619] [] ? drm_fb_helper_probe_connector_modes+0x39/0x4d [drm_kms_helper] [ 17.772625] [] ? drm_fb_helper_hotplug_event+0xa5/0xc3 [drm_kms_helper] [ 17.772633] [] ? output_poll_execute+0x146/0x17c [drm_kms_helper] [ 17.772638] [] ? cfq_init_queue+0x247/0x345 [ 17.772644] [] ? output_poll_execute+0x0/0x17c [drm_kms_helper] [ 17.772648] [] ? process_one_work+0x193/0x28e [ 17.772652] [] ? worker_thread+0xef/0x172 [ 17.772655] [] ? worker_thread+0x0/0x172 [ 17.772658] [] ? worker_thread+0x0/0x172 [ 17.772663] [] ? kthread+0x7a/0x82 [ 17.772668] [] ? kernel_thread_helper+0x4/0x10 [ 17.772671] [] ? kthread+0x0/0x82 [ 17.772674] [] ? kernel_thread_helper+0x0/0x10 Reported-by: Frederik Himpe References: https://bugs.freedesktop.org/show_bug.cgi?id=36394 Signed-off-by: Chris Wilson Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_fb_helper.c | 26 ++++++++++++++++++++++---- include/drm/drm_fb_helper.h | 2 +- 2 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 11d7a72c22d9..140b9525b48a 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1516,17 +1516,33 @@ bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel) } EXPORT_SYMBOL(drm_fb_helper_initial_config); -bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) +/** + * drm_fb_helper_hotplug_event - respond to a hotplug notification by + * probing all the outputs attached to the fb. + * @fb_helper: the drm_fb_helper + * + * LOCKING: + * Called at runtime, must take mode config lock. + * + * Scan the connectors attached to the fb_helper and try to put together a + * setup after *notification of a change in output configuration. + * + * RETURNS: + * 0 on success and a non-zero error code otherwise. + */ +int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) { + struct drm_device *dev = fb_helper->dev; int count = 0; u32 max_width, max_height, bpp_sel; bool bound = false, crtcs_bound = false; struct drm_crtc *crtc; if (!fb_helper->fb) - return false; + return 0; - list_for_each_entry(crtc, &fb_helper->dev->mode_config.crtc_list, head) { + mutex_lock(&dev->mode_config.mutex); + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { if (crtc->fb) crtcs_bound = true; if (crtc->fb == fb_helper->fb) @@ -1535,7 +1551,8 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) if (!bound && crtcs_bound) { fb_helper->delayed_hotplug = true; - return false; + mutex_unlock(&dev->mode_config.mutex); + return 0; } DRM_DEBUG_KMS("\n"); @@ -1546,6 +1563,7 @@ bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper) count = drm_fb_helper_probe_connector_modes(fb_helper, max_width, max_height); drm_setup_crtcs(fb_helper); + mutex_unlock(&dev->mode_config.mutex); return drm_fb_helper_single_fb_probe(fb_helper, bpp_sel); } diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index ade09d7b4271..c99c3d3e7811 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -127,7 +127,7 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch, int drm_fb_helper_setcmap(struct fb_cmap *cmap, struct fb_info *info); -bool drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); +int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); bool drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper, int bpp_sel); int drm_fb_helper_single_add_all_connectors(struct drm_fb_helper *fb_helper); int drm_fb_helper_debug_enter(struct fb_info *info); -- cgit v1.2.3 From 86f315bbb2374f1f077500ad131dd9b71856e697 Mon Sep 17 00:00:00 2001 From: Chris Ball Date: Mon, 16 May 2011 11:32:26 -0400 Subject: Revert "mmc: fix a race between card-detect rescan and clock-gate work instances" This reverts commit 26fc8775b51484d8c0a671198639c6d5ae60533e, which has been reported to cause boot/resume-time crashes for some users: https://bbs.archlinux.org/viewtopic.php?id=118751. Signed-off-by: Chris Ball Cc: --- drivers/mmc/core/host.c | 9 +++++---- include/linux/mmc/host.h | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c index 2b200c1cfbba..461e6a17fb90 100644 --- a/drivers/mmc/core/host.c +++ b/drivers/mmc/core/host.c @@ -94,7 +94,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) spin_unlock_irqrestore(&host->clk_lock, flags); return; } - mmc_claim_host(host); + mutex_lock(&host->clk_gate_mutex); spin_lock_irqsave(&host->clk_lock, flags); if (!host->clk_requests) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -104,7 +104,7 @@ static void mmc_host_clk_gate_delayed(struct mmc_host *host) pr_debug("%s: gated MCI clock\n", mmc_hostname(host)); } spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_release_host(host); + mutex_unlock(&host->clk_gate_mutex); } /* @@ -130,7 +130,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) { unsigned long flags; - mmc_claim_host(host); + mutex_lock(&host->clk_gate_mutex); spin_lock_irqsave(&host->clk_lock, flags); if (host->clk_gated) { spin_unlock_irqrestore(&host->clk_lock, flags); @@ -140,7 +140,7 @@ void mmc_host_clk_ungate(struct mmc_host *host) } host->clk_requests++; spin_unlock_irqrestore(&host->clk_lock, flags); - mmc_release_host(host); + mutex_unlock(&host->clk_gate_mutex); } /** @@ -215,6 +215,7 @@ static inline void mmc_host_clk_init(struct mmc_host *host) host->clk_gated = false; INIT_WORK(&host->clk_gate_work, mmc_host_clk_gate_work); spin_lock_init(&host->clk_lock); + mutex_init(&host->clk_gate_mutex); } /** diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index eb792cb6d745..bcb793ec7374 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -183,6 +183,7 @@ struct mmc_host { struct work_struct clk_gate_work; /* delayed clock gate */ unsigned int clk_old; /* old clock value cache */ spinlock_t clk_lock; /* lock for clk fields */ + struct mutex clk_gate_mutex; /* mutex for clock gating */ #endif /* host specific block data */ -- cgit v1.2.3 From 9937a5e2f32892db0dbeefc2b3bc74b3ae3ea9c7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 17 May 2011 11:04:44 +0200 Subject: scsi: remove performance regression due to async queue run Commit c21e6beb removed our queue request_fn re-enter protection, and defaulted to always running the queues from kblockd to be safe. This was a known potential slow down, but should be safe. Unfortunately this is causing big performance regressions for some, so we need to improve this logic. Looking into the details of the re-enter, the real issue is on requeue of requests. Requeue of requests upon seeing a BUSY condition from the device ends up re-running the queue, causing traces like this: scsi_request_fn() scsi_dispatch_cmd() scsi_queue_insert() __scsi_queue_insert() scsi_run_queue() scsi_request_fn() ... potentially causing the issue we want to avoid. So special case the requeue re-run of the queue, but improve it to offload the entire run of local queue and starved queue from a single workqueue callback. This is a lot better than potentially kicking off a workqueue run for each device seen. This also fixes the issue of the local device going into recursion, since the above mentioned commit never moved that queue run out of line. Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 20 ++++++++++++++++---- drivers/scsi/scsi_scan.c | 2 ++ include/scsi/scsi_device.h | 1 + 3 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e9901b8f8443..01e4e51c4b68 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -74,8 +74,6 @@ struct kmem_cache *scsi_sdb_cache; */ #define SCSI_QUEUE_DELAY 3 -static void scsi_run_queue(struct request_queue *q); - /* * Function: scsi_unprep_request() * @@ -161,7 +159,7 @@ static int __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) blk_requeue_request(q, cmd->request); spin_unlock_irqrestore(q->queue_lock, flags); - scsi_run_queue(q); + kblockd_schedule_work(q, &device->requeue_work); return 0; } @@ -433,7 +431,11 @@ static void scsi_run_queue(struct request_queue *q) continue; } - blk_run_queue_async(sdev->request_queue); + spin_unlock(shost->host_lock); + spin_lock(sdev->request_queue->queue_lock); + __blk_run_queue(sdev->request_queue); + spin_unlock(sdev->request_queue->queue_lock); + spin_lock(shost->host_lock); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); @@ -442,6 +444,16 @@ static void scsi_run_queue(struct request_queue *q) blk_run_queue(q); } +void scsi_requeue_run_queue(struct work_struct *work) +{ + struct scsi_device *sdev; + struct request_queue *q; + + sdev = container_of(work, struct scsi_device, requeue_work); + q = sdev->request_queue; + scsi_run_queue(q); +} + /* * Function: scsi_requeue_command() * diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 087821fac8fe..58584dc0724a 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -242,6 +242,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, int display_failure_msg = 1, ret; struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); extern void scsi_evt_thread(struct work_struct *work); + extern void scsi_requeue_run_queue(struct work_struct *work); sdev = kzalloc(sizeof(*sdev) + shost->transportt->device_size, GFP_ATOMIC); @@ -264,6 +265,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget, INIT_LIST_HEAD(&sdev->event_list); spin_lock_init(&sdev->list_lock); INIT_WORK(&sdev->event_work, scsi_evt_thread); + INIT_WORK(&sdev->requeue_work, scsi_requeue_run_queue); sdev->sdev_gendev.parent = get_device(&starget->dev); sdev->sdev_target = starget; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 2d3ec5094685..dd82e02ddde3 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -169,6 +169,7 @@ struct scsi_device { sdev_dev; struct execute_work ew; /* used to get process context on put */ + struct work_struct requeue_work; struct scsi_dh_data *scsi_dh_data; enum scsi_device_state sdev_state; -- cgit v1.2.3 From f12a20fc9bfba4218ecbc4e40c8e08dc2a85dc99 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 May 2011 15:44:12 -0700 Subject: procfs: add stub for proc_mkdir_mode() Provide a stub for proc_mkdir_mode() when CONFIG_PROC_FS is not enabled, just like the stub for proc_mkdir(). Fixes this linux-next build error: drivers/net/wireless/airo.c:4504: error: implicit declaration of function 'proc_mkdir_mode' Signed-off-by: Randy Dunlap Cc: Stephen Rothwell Cc: Alexey Dobriyan Cc: "John W. Linville" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 838c1149251a..eaf4350c0f90 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -208,6 +208,8 @@ static inline struct proc_dir_entry *proc_symlink(const char *name, struct proc_dir_entry *parent,const char *dest) {return NULL;} static inline struct proc_dir_entry *proc_mkdir(const char *name, struct proc_dir_entry *parent) {return NULL;} +static inline struct proc_dir_entry *proc_mkdir_mode(const char *name, + mode_t mode, struct proc_dir_entry *parent) { return NULL; } static inline struct proc_dir_entry *create_proc_read_entry(const char *name, mode_t mode, struct proc_dir_entry *base, -- cgit v1.2.3 From 01294d82622d6d9d64bde8e4530c7e2c6dbb6ee6 Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Wed, 18 May 2011 10:27:39 -0500 Subject: of: fix race when matching drivers If two drivers are probing devices at the same time, both will write their match table result to the dev->of_match cache at the same time. Only write the result if the device matches. In a thread titled "SBus devices sometimes detected, sometimes not", Meelis reported his SBus hme was not detected about 50% of the time. From the debug suggested by Grant it was obvious another driver matched some devices between the call to match the hme and the hme discovery failling. Reported-by: Meelis Roos Signed-off-by: Milton Miller [grant.likely: modified to only call of_match_device() once] Signed-off-by: Grant Likely --- include/linux/of_device.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8bfe6c1d4365..b33d68814a73 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,8 +21,12 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - dev->of_match = of_match_device(drv->of_match_table, dev); - return dev->of_match != NULL; + const struct of_device_id *match; + + match = of_match_device(drv->of_match_table, dev); + if (match) + dev->of_match = match; + return match != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); -- cgit v1.2.3 From b1608d69cb804e414d0887140ba08a9398e4e638 Mon Sep 17 00:00:00 2001 From: Grant Likely Date: Wed, 18 May 2011 11:19:24 -0600 Subject: drivercore: revert addition of of_match to struct device Commit b826291c, "drivercore/dt: add a match table pointer to struct device" added an of_match pointer to struct device to cache the of_match_table entry discovered at driver match time. This was unsafe because matching is not an atomic operation with probing a driver. If two or more drivers are attempted to be matched to a driver at the same time, then the cached matching entry pointer could get overwritten. This patch reverts the of_match cache pointer and reworks all users to call of_match_device() directly instead. Signed-off-by: Grant Likely --- arch/powerpc/platforms/83xx/suspend.c | 7 +++++-- arch/powerpc/sysdev/fsl_msi.c | 7 +++++-- arch/sparc/kernel/pci_sabre.c | 5 ++++- arch/sparc/kernel/pci_schizo.c | 8 ++++++-- drivers/atm/fore200e.c | 7 +++++-- drivers/char/hw_random/n2-drv.c | 7 +++++-- drivers/char/ipmi/ipmi_si_intf.c | 7 +++++-- drivers/char/xilinx_hwicap/xilinx_hwicap.c | 14 +++++++++----- drivers/edac/ppc4xx_edac.c | 2 +- drivers/i2c/busses/i2c-mpc.c | 9 ++++++--- drivers/mmc/host/sdhci-of-core.c | 7 +++++-- drivers/mtd/maps/physmap_of.c | 7 +++++-- drivers/net/can/mscan/mpc5xxx_can.c | 7 +++++-- drivers/net/fs_enet/fs_enet-main.c | 9 ++++++--- drivers/net/fs_enet/mii-fec.c | 7 +++++-- drivers/net/sunhme.c | 7 +++++-- drivers/scsi/qlogicpti.c | 7 +++++-- drivers/tty/serial/of_serial.c | 7 +++++-- drivers/usb/gadget/fsl_qe_udc.c | 7 +++++-- drivers/watchdog/mpc8xxx_wdt.c | 7 +++++-- include/linux/device.h | 1 - include/linux/of_device.h | 12 ++++++------ 22 files changed, 108 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 188272934cfb..104faa8aa23c 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -318,17 +318,20 @@ static const struct platform_suspend_ops mpc83xx_suspend_ops = { .end = mpc83xx_suspend_end, }; +static struct of_device_id pmc_match[]; static int pmc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct resource res; struct pmc_type *type; int ret = 0; - if (!ofdev->dev.of_match) + match = of_match_device(pmc_match, &ofdev->dev); + if (!match) return -EINVAL; - type = ofdev->dev.of_match->data; + type = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index d5679dc1e20f..01cd2f089512 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -304,8 +304,10 @@ static int __devinit fsl_msi_setup_hwirq(struct fsl_msi *msi, return 0; } +static const struct of_device_id fsl_of_msi_ids[]; static int __devinit fsl_of_msi_probe(struct platform_device *dev) { + const struct of_device_id *match; struct fsl_msi *msi; struct resource res; int err, i, j, irq_index, count; @@ -316,9 +318,10 @@ static int __devinit fsl_of_msi_probe(struct platform_device *dev) u32 offset; static const u32 all_avail[] = { 0, NR_MSI_IRQS }; - if (!dev->dev.of_match) + match = of_match_device(fsl_of_msi_ids, &dev->dev); + if (!match) return -EINVAL; - features = dev->dev.of_match->data; + features = match->data; printk(KERN_DEBUG "Setting up Freescale MSI support\n"); diff --git a/arch/sparc/kernel/pci_sabre.c b/arch/sparc/kernel/pci_sabre.c index 948068a083fc..d1840dbdaa2f 100644 --- a/arch/sparc/kernel/pci_sabre.c +++ b/arch/sparc/kernel/pci_sabre.c @@ -452,8 +452,10 @@ static void __devinit sabre_pbm_init(struct pci_pbm_info *pbm, sabre_scan_bus(pbm, &op->dev); } +static const struct of_device_id sabre_match[]; static int __devinit sabre_probe(struct platform_device *op) { + const struct of_device_id *match; const struct linux_prom64_registers *pr_regs; struct device_node *dp = op->dev.of_node; struct pci_pbm_info *pbm; @@ -463,7 +465,8 @@ static int __devinit sabre_probe(struct platform_device *op) const u32 *vdma; u64 clear_irq; - hummingbird_p = op->dev.of_match && (op->dev.of_match->data != NULL); + match = of_match_device(sabre_match, &op->dev); + hummingbird_p = match && (match->data != NULL); if (!hummingbird_p) { struct device_node *cpu_dp; diff --git a/arch/sparc/kernel/pci_schizo.c b/arch/sparc/kernel/pci_schizo.c index fecfcb2063c8..283fbc329a43 100644 --- a/arch/sparc/kernel/pci_schizo.c +++ b/arch/sparc/kernel/pci_schizo.c @@ -1458,11 +1458,15 @@ out_err: return err; } +static const struct of_device_id schizo_match[]; static int __devinit schizo_probe(struct platform_device *op) { - if (!op->dev.of_match) + const struct of_device_id *match; + + match = of_match_device(schizo_match, &op->dev); + if (!match) return -EINVAL; - return __schizo_init(op, (unsigned long) op->dev.of_match->data); + return __schizo_init(op, (unsigned long)match->data); } /* The ordering of this table is very important. Some Tomatillo diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index bdd2719f3f68..bc9e702186dd 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2643,16 +2643,19 @@ fore200e_init(struct fore200e* fore200e, struct device *parent) } #ifdef CONFIG_SBUS +static const struct of_device_id fore200e_sba_match[]; static int __devinit fore200e_sba_probe(struct platform_device *op) { + const struct of_device_id *match; const struct fore200e_bus *bus; struct fore200e *fore200e; static int index = 0; int err; - if (!op->dev.of_match) + match = of_match_device(fore200e_sba_match, &op->dev); + if (!match) return -EINVAL; - bus = op->dev.of_match->data; + bus = match->data; fore200e = kzalloc(sizeof(struct fore200e), GFP_KERNEL); if (!fore200e) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index 43ac61978d8b..ac6739e085e3 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -619,15 +619,18 @@ static void __devinit n2rng_driver_version(void) pr_info("%s", version); } +static const struct of_device_id n2rng_match[]; static int __devinit n2rng_probe(struct platform_device *op) { + const struct of_device_id *match; int victoria_falls; int err = -ENOMEM; struct n2rng *np; - if (!op->dev.of_match) + match = of_match_device(n2rng_match, &op->dev); + if (!match) return -EINVAL; - victoria_falls = (op->dev.of_match->data != NULL); + victoria_falls = (match->data != NULL); n2rng_driver_version(); np = kzalloc(sizeof(*np), GFP_KERNEL); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index cc6c9b2546a3..64c6b8530615 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2554,9 +2554,11 @@ static struct pci_driver ipmi_pci_driver = { }; #endif /* CONFIG_PCI */ +static struct of_device_id ipmi_match[]; static int __devinit ipmi_probe(struct platform_device *dev) { #ifdef CONFIG_OF + const struct of_device_id *match; struct smi_info *info; struct resource resource; const __be32 *regsize, *regspacing, *regshift; @@ -2566,7 +2568,8 @@ static int __devinit ipmi_probe(struct platform_device *dev) dev_info(&dev->dev, "probing via device tree\n"); - if (!dev->dev.of_match) + match = of_match_device(ipmi_match, &dev->dev); + if (!match) return -EINVAL; ret = of_address_to_resource(np, 0, &resource); @@ -2601,7 +2604,7 @@ static int __devinit ipmi_probe(struct platform_device *dev) return -ENOMEM; } - info->si_type = (enum si_type) dev->dev.of_match->data; + info->si_type = (enum si_type) match->data; info->addr_source = SI_DEVICETREE; info->irq_setup = std_irq_setup; diff --git a/drivers/char/xilinx_hwicap/xilinx_hwicap.c b/drivers/char/xilinx_hwicap/xilinx_hwicap.c index d6412c16385f..39ccdeada791 100644 --- a/drivers/char/xilinx_hwicap/xilinx_hwicap.c +++ b/drivers/char/xilinx_hwicap/xilinx_hwicap.c @@ -715,13 +715,13 @@ static int __devexit hwicap_remove(struct device *dev) } #ifdef CONFIG_OF -static int __devinit hwicap_of_probe(struct platform_device *op) +static int __devinit hwicap_of_probe(struct platform_device *op, + const struct hwicap_driver_config *config) { struct resource res; const unsigned int *id; const char *family; int rc; - const struct hwicap_driver_config *config = op->dev.of_match->data; const struct config_registers *regs; @@ -751,20 +751,24 @@ static int __devinit hwicap_of_probe(struct platform_device *op) regs); } #else -static inline int hwicap_of_probe(struct platform_device *op) +static inline int hwicap_of_probe(struct platform_device *op, + const struct hwicap_driver_config *config) { return -EINVAL; } #endif /* CONFIG_OF */ +static const struct of_device_id __devinitconst hwicap_of_match[]; static int __devinit hwicap_drv_probe(struct platform_device *pdev) { + const struct of_device_id *match; struct resource *res; const struct config_registers *regs; const char *family; - if (pdev->dev.of_match) - return hwicap_of_probe(pdev); + match = of_match_device(hwicap_of_match, &pdev->dev); + if (match) + return hwicap_of_probe(pdev, match->data); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!res) diff --git a/drivers/edac/ppc4xx_edac.c b/drivers/edac/ppc4xx_edac.c index c1f0045ceb8e..af8e7b1aa290 100644 --- a/drivers/edac/ppc4xx_edac.c +++ b/drivers/edac/ppc4xx_edac.c @@ -1019,7 +1019,7 @@ ppc4xx_edac_mc_init(struct mem_ctl_info *mci, struct ppc4xx_edac_pdata *pdata = NULL; const struct device_node *np = op->dev.of_node; - if (op->dev.of_match == NULL) + if (of_match_device(ppc4xx_edac_match, &op->dev) == NULL) return -EINVAL; /* Initial driver pointers and private data */ diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 75b984c519ac..107397a606b4 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -560,15 +560,18 @@ static struct i2c_adapter mpc_ops = { .timeout = HZ, }; +static const struct of_device_id mpc_i2c_of_match[]; static int __devinit fsl_i2c_probe(struct platform_device *op) { + const struct of_device_id *match; struct mpc_i2c *i2c; const u32 *prop; u32 clock = MPC_I2C_CLOCK_LEGACY; int result = 0; int plen; - if (!op->dev.of_match) + match = of_match_device(mpc_i2c_of_match, &op->dev); + if (!match) return -EINVAL; i2c = kzalloc(sizeof(*i2c), GFP_KERNEL); @@ -605,8 +608,8 @@ static int __devinit fsl_i2c_probe(struct platform_device *op) clock = *prop; } - if (op->dev.of_match->data) { - struct mpc_i2c_data *data = op->dev.of_match->data; + if (match->data) { + struct mpc_i2c_data *data = match->data; data->setup(op->dev.of_node, i2c, clock, data->prescaler); } else { /* Backwards compatibility */ diff --git a/drivers/mmc/host/sdhci-of-core.c b/drivers/mmc/host/sdhci-of-core.c index f9b611fc773e..60e4186a4345 100644 --- a/drivers/mmc/host/sdhci-of-core.c +++ b/drivers/mmc/host/sdhci-of-core.c @@ -124,8 +124,10 @@ static bool __devinit sdhci_of_wp_inverted(struct device_node *np) #endif } +static const struct of_device_id sdhci_of_match[]; static int __devinit sdhci_of_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct sdhci_of_data *sdhci_of_data; struct sdhci_host *host; @@ -134,9 +136,10 @@ static int __devinit sdhci_of_probe(struct platform_device *ofdev) int size; int ret; - if (!ofdev->dev.of_match) + match = of_match_device(sdhci_of_match, &ofdev->dev); + if (!match) return -EINVAL; - sdhci_of_data = ofdev->dev.of_match->data; + sdhci_of_data = match->data; if (!of_device_is_available(np)) return -ENODEV; diff --git a/drivers/mtd/maps/physmap_of.c b/drivers/mtd/maps/physmap_of.c index bd483f0c57e1..c1d33464aee8 100644 --- a/drivers/mtd/maps/physmap_of.c +++ b/drivers/mtd/maps/physmap_of.c @@ -214,11 +214,13 @@ static void __devinit of_free_probes(const char **probes) } #endif +static struct of_device_id of_flash_match[]; static int __devinit of_flash_probe(struct platform_device *dev) { #ifdef CONFIG_MTD_PARTITIONS const char **part_probe_types; #endif + const struct of_device_id *match; struct device_node *dp = dev->dev.of_node; struct resource res; struct of_flash *info; @@ -232,9 +234,10 @@ static int __devinit of_flash_probe(struct platform_device *dev) struct mtd_info **mtd_list = NULL; resource_size_t res_size; - if (!dev->dev.of_match) + match = of_match_device(of_flash_match, &dev->dev); + if (!match) return -EINVAL; - probe_type = dev->dev.of_match->data; + probe_type = match->data; reg_tuple_size = (of_n_addr_cells(dp) + of_n_size_cells(dp)) * sizeof(u32); diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c index bd1d811c204f..5fedc3375562 100644 --- a/drivers/net/can/mscan/mpc5xxx_can.c +++ b/drivers/net/can/mscan/mpc5xxx_can.c @@ -247,8 +247,10 @@ static u32 __devinit mpc512x_can_get_clock(struct platform_device *ofdev, } #endif /* CONFIG_PPC_MPC512x */ +static struct of_device_id mpc5xxx_can_table[]; static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct mpc5xxx_can_data *data; struct device_node *np = ofdev->dev.of_node; struct net_device *dev; @@ -258,9 +260,10 @@ static int __devinit mpc5xxx_can_probe(struct platform_device *ofdev) int irq, mscan_clksrc = 0; int err = -ENOMEM; - if (!ofdev->dev.of_match) + match = of_match_device(mpc5xxx_can_table, &ofdev->dev); + if (!match) return -EINVAL; - data = (struct mpc5xxx_can_data *)ofdev->dev.of_match->data; + data = match->data; base = of_iomap(np, 0); if (!base) { diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 24cb953900dd..5131e61c358c 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -998,8 +998,10 @@ static const struct net_device_ops fs_enet_netdev_ops = { #endif }; +static struct of_device_id fs_enet_match[]; static int __devinit fs_enet_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct net_device *ndev; struct fs_enet_private *fep; struct fs_platform_info *fpi; @@ -1007,14 +1009,15 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev) const u8 *mac_addr; int privsize, len, ret = -ENODEV; - if (!ofdev->dev.of_match) + match = of_match_device(fs_enet_match, &ofdev->dev); + if (!match) return -EINVAL; fpi = kzalloc(sizeof(*fpi), GFP_KERNEL); if (!fpi) return -ENOMEM; - if (!IS_FEC(ofdev->dev.of_match)) { + if (!IS_FEC(match)) { data = of_get_property(ofdev->dev.of_node, "fsl,cpm-command", &len); if (!data || len != 4) goto out_free_fpi; @@ -1049,7 +1052,7 @@ static int __devinit fs_enet_probe(struct platform_device *ofdev) fep->dev = &ofdev->dev; fep->ndev = ndev; fep->fpi = fpi; - fep->ops = ofdev->dev.of_match->data; + fep->ops = match->data; ret = fep->ops->setup_data(ndev); if (ret) diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c index 7e840d373ab3..6a2e150e75bb 100644 --- a/drivers/net/fs_enet/mii-fec.c +++ b/drivers/net/fs_enet/mii-fec.c @@ -101,17 +101,20 @@ static int fs_enet_fec_mii_reset(struct mii_bus *bus) return 0; } +static struct of_device_id fs_enet_mdio_fec_match[]; static int __devinit fs_enet_mdio_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct resource res; struct mii_bus *new_bus; struct fec_info *fec; int (*get_bus_freq)(struct device_node *); int ret = -ENOMEM, clock, speed; - if (!ofdev->dev.of_match) + match = of_match_device(fs_enet_mdio_fec_match, &ofdev->dev); + if (!match) return -EINVAL; - get_bus_freq = ofdev->dev.of_match->data; + get_bus_freq = match->data; new_bus = mdiobus_alloc(); if (!new_bus) diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index eb4f59fb01e9..bff2f7999ff0 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -3237,15 +3237,18 @@ static void happy_meal_pci_exit(void) #endif #ifdef CONFIG_SBUS +static const struct of_device_id hme_sbus_match[]; static int __devinit hme_sbus_probe(struct platform_device *op) { + const struct of_device_id *match; struct device_node *dp = op->dev.of_node; const char *model = of_get_property(dp, "model", NULL); int is_qfe; - if (!op->dev.of_match) + match = of_match_device(hme_sbus_match, &op->dev); + if (!match) return -EINVAL; - is_qfe = (op->dev.of_match->data != NULL); + is_qfe = (match->data != NULL); if (!is_qfe && model && !strcmp(model, "SUNW,sbus-qfe")) is_qfe = 1; diff --git a/drivers/scsi/qlogicpti.c b/drivers/scsi/qlogicpti.c index e2d45c91b8e8..9689d41c7888 100644 --- a/drivers/scsi/qlogicpti.c +++ b/drivers/scsi/qlogicpti.c @@ -1292,8 +1292,10 @@ static struct scsi_host_template qpti_template = { .use_clustering = ENABLE_CLUSTERING, }; +static const struct of_device_id qpti_match[]; static int __devinit qpti_sbus_probe(struct platform_device *op) { + const struct of_device_id *match; struct scsi_host_template *tpnt; struct device_node *dp = op->dev.of_node; struct Scsi_Host *host; @@ -1301,9 +1303,10 @@ static int __devinit qpti_sbus_probe(struct platform_device *op) static int nqptis; const char *fcode; - if (!op->dev.of_match) + match = of_match_device(qpti_match, &op->dev); + if (!match) return -EINVAL; - tpnt = op->dev.of_match->data; + tpnt = match->data; /* Sometimes Antares cards come up not completely * setup, and we get a report of a zero IRQ. diff --git a/drivers/tty/serial/of_serial.c b/drivers/tty/serial/of_serial.c index 0e8eec516df4..c911b2419abb 100644 --- a/drivers/tty/serial/of_serial.c +++ b/drivers/tty/serial/of_serial.c @@ -80,14 +80,17 @@ static int __devinit of_platform_serial_setup(struct platform_device *ofdev, /* * Try to register a serial port */ +static struct of_device_id of_platform_serial_table[]; static int __devinit of_platform_serial_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct of_serial_info *info; struct uart_port port; int port_type; int ret; - if (!ofdev->dev.of_match) + match = of_match_device(of_platform_serial_table, &ofdev->dev); + if (!match) return -EINVAL; if (of_find_property(ofdev->dev.of_node, "used-by-rtas", NULL)) @@ -97,7 +100,7 @@ static int __devinit of_platform_serial_probe(struct platform_device *ofdev) if (info == NULL) return -ENOMEM; - port_type = (unsigned long)ofdev->dev.of_match->data; + port_type = (unsigned long)match->data; ret = of_platform_serial_setup(ofdev, port_type, &port); if (ret) goto out; diff --git a/drivers/usb/gadget/fsl_qe_udc.c b/drivers/usb/gadget/fsl_qe_udc.c index 36613b37c504..3a68e09309f7 100644 --- a/drivers/usb/gadget/fsl_qe_udc.c +++ b/drivers/usb/gadget/fsl_qe_udc.c @@ -2539,15 +2539,18 @@ static void qe_udc_release(struct device *dev) } /* Driver probe functions */ +static const struct of_device_id qe_udc_match[]; static int __devinit qe_udc_probe(struct platform_device *ofdev) { + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct qe_ep *ep; unsigned int ret = 0; unsigned int i; const void *prop; - if (!ofdev->dev.of_match) + match = of_match_device(qe_udc_match, &ofdev->dev); + if (!match) return -EINVAL; prop = of_get_property(np, "mode", NULL); @@ -2561,7 +2564,7 @@ static int __devinit qe_udc_probe(struct platform_device *ofdev) return -ENOMEM; } - udc_controller->soc_type = (unsigned long)ofdev->dev.of_match->data; + udc_controller->soc_type = (unsigned long)match->data; udc_controller->usb_regs = of_iomap(np, 0); if (!udc_controller->usb_regs) { ret = -ENOMEM; diff --git a/drivers/watchdog/mpc8xxx_wdt.c b/drivers/watchdog/mpc8xxx_wdt.c index 528bceb220fd..eed5436ffb51 100644 --- a/drivers/watchdog/mpc8xxx_wdt.c +++ b/drivers/watchdog/mpc8xxx_wdt.c @@ -185,17 +185,20 @@ static struct miscdevice mpc8xxx_wdt_miscdev = { .fops = &mpc8xxx_wdt_fops, }; +static const struct of_device_id mpc8xxx_wdt_match[]; static int __devinit mpc8xxx_wdt_probe(struct platform_device *ofdev) { int ret; + const struct of_device_id *match; struct device_node *np = ofdev->dev.of_node; struct mpc8xxx_wdt_type *wdt_type; u32 freq = fsl_get_sys_freq(); bool enabled; - if (!ofdev->dev.of_match) + match = of_match_device(mpc8xxx_wdt_match, &ofdev->dev); + if (!match) return -EINVAL; - wdt_type = ofdev->dev.of_match->data; + wdt_type = match->data; if (!freq || freq == -1) return -EINVAL; diff --git a/include/linux/device.h b/include/linux/device.h index ab8dfc095709..d08399db6e2c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -442,7 +442,6 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ - const struct of_device_id *of_match; /* matching of_device_id from driver */ dev_t devt; /* dev_t, creates the sysfs "dev" */ diff --git a/include/linux/of_device.h b/include/linux/of_device.h index b33d68814a73..ae5638480ef2 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -21,12 +21,7 @@ extern void of_device_make_bus_id(struct device *dev); static inline int of_driver_match_device(struct device *dev, const struct device_driver *drv) { - const struct of_device_id *match; - - match = of_match_device(drv->of_match_table, dev); - if (match) - dev->of_match = match; - return match != NULL; + return of_match_device(drv->of_match_table, dev) != NULL; } extern struct platform_device *of_dev_get(struct platform_device *dev); @@ -62,6 +57,11 @@ static inline int of_device_uevent(struct device *dev, static inline void of_device_node_put(struct device *dev) { } +static inline const struct of_device_id *of_match_device( + const struct of_device_id *matches, const struct device *dev) +{ + return NULL; +} #endif /* CONFIG_OF_DEVICE */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3