From 5f986c590fcf4284924fcda991cf14ab32bff49f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Oct 2012 01:07:27 +0200 Subject: PM / QoS: Prepare device structure for adding more constraint types Currently struct dev_pm_info contains only one PM QoS constraints pointer reserved for latency requirements. Since one more device constraints type (i.e. flags) will be necessary, introduce a new structure, struct dev_pm_qos, that eventually will contain all of the available device PM QoS constraints and replace the "constraints" pointer in struct dev_pm_info with a pointer to the new structure called "qos". Signed-off-by: Rafael J. Wysocki Reviewed-by: Jean Pihet --- include/linux/pm.h | 2 +- include/linux/pm_qos.h | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 007e687c4f69..0ce6df94221a 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -549,7 +549,7 @@ struct dev_pm_info { struct dev_pm_qos_request *pq_req; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ - struct pm_qos_constraints *constraints; + struct dev_pm_qos *qos; }; extern void update_pm_runtime_accounting(struct device *dev); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 9924ea1f22e0..30e9ad72e797 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -57,6 +57,10 @@ struct pm_qos_constraints { struct blocking_notifier_head *notifiers; }; +struct dev_pm_qos { + struct pm_qos_constraints latency; +}; + /* Action requested to pm_qos_update_target */ enum pm_qos_req_action { PM_QOS_ADD_REQ, /* Add a new request */ -- cgit v1.2.3 From 5efbe4279f959a3f5ed26adf5f05cb78dd1ffa7e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Oct 2012 01:07:46 +0200 Subject: PM / QoS: Introduce request and constraint data types for PM QoS flags Introduce struct pm_qos_flags_request and struct pm_qos_flags representing PM QoS flags request type and PM QoS flags constraint type, respectively. With these definitions the data structures will be arranged so that the list member of a struct pm_qos_flags object will contain the head of a list of struct pm_qos_flags_request objects representing all of the "flags" requests present for the given device. Then, the effective_flags member of a struct pm_qos_flags object will contain the bitwise OR of the flags members of all the struct pm_qos_flags_request objects in the list. Additionally, introduce helper function pm_qos_update_flags() allowing the caller to manage the list of struct pm_qos_flags_request pointed to by the list member of struct pm_qos_flags. The flags are of type s32 so that the request's "value" field is always of the same type regardless of what kind of request it is (latency requests already have value fields of type s32). Signed-off-by: Rafael J. Wysocki Reviewed-by: Jean Pihet Acked-by: mark gross --- include/linux/pm_qos.h | 17 ++++++++++++-- kernel/power/qos.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 30e9ad72e797..413ada3c7c97 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -33,6 +33,11 @@ struct pm_qos_request { struct delayed_work work; /* for pm_qos_update_request_timeout */ }; +struct pm_qos_flags_request { + struct list_head node; + s32 flags; /* Do not change to 64 bit */ +}; + struct dev_pm_qos_request { struct plist_node node; struct device *dev; @@ -45,8 +50,8 @@ enum pm_qos_type { }; /* - * Note: The lockless read path depends on the CPU accessing - * target_value atomically. Atomic access is only guaranteed on all CPU + * Note: The lockless read path depends on the CPU accessing target_value + * or effective_flags atomically. Atomic access is only guaranteed on all CPU * types linux supports for 32 bit quantites */ struct pm_qos_constraints { @@ -57,6 +62,11 @@ struct pm_qos_constraints { struct blocking_notifier_head *notifiers; }; +struct pm_qos_flags { + struct list_head list; + s32 effective_flags; /* Do not change to 64 bit */ +}; + struct dev_pm_qos { struct pm_qos_constraints latency; }; @@ -75,6 +85,9 @@ static inline int dev_pm_qos_request_active(struct dev_pm_qos_request *req) int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, enum pm_qos_req_action action, int value); +bool pm_qos_update_flags(struct pm_qos_flags *pqf, + struct pm_qos_flags_request *req, + enum pm_qos_req_action action, s32 val); void pm_qos_add_request(struct pm_qos_request *req, int pm_qos_class, s32 value); void pm_qos_update_request(struct pm_qos_request *req, diff --git a/kernel/power/qos.c b/kernel/power/qos.c index 846bd42c7ed1..2ab2819aee65 100644 --- a/kernel/power/qos.c +++ b/kernel/power/qos.c @@ -212,6 +212,69 @@ int pm_qos_update_target(struct pm_qos_constraints *c, struct plist_node *node, } } +/** + * pm_qos_flags_remove_req - Remove device PM QoS flags request. + * @pqf: Device PM QoS flags set to remove the request from. + * @req: Request to remove from the set. + */ +static void pm_qos_flags_remove_req(struct pm_qos_flags *pqf, + struct pm_qos_flags_request *req) +{ + s32 val = 0; + + list_del(&req->node); + list_for_each_entry(req, &pqf->list, node) + val |= req->flags; + + pqf->effective_flags = val; +} + +/** + * pm_qos_update_flags - Update a set of PM QoS flags. + * @pqf: Set of flags to update. + * @req: Request to add to the set, to modify, or to remove from the set. + * @action: Action to take on the set. + * @val: Value of the request to add or modify. + * + * Update the given set of PM QoS flags and call notifiers if the aggregate + * value has changed. Returns 1 if the aggregate constraint value has changed, + * 0 otherwise. + */ +bool pm_qos_update_flags(struct pm_qos_flags *pqf, + struct pm_qos_flags_request *req, + enum pm_qos_req_action action, s32 val) +{ + unsigned long irqflags; + s32 prev_value, curr_value; + + spin_lock_irqsave(&pm_qos_lock, irqflags); + + prev_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; + + switch (action) { + case PM_QOS_REMOVE_REQ: + pm_qos_flags_remove_req(pqf, req); + break; + case PM_QOS_UPDATE_REQ: + pm_qos_flags_remove_req(pqf, req); + case PM_QOS_ADD_REQ: + req->flags = val; + INIT_LIST_HEAD(&req->node); + list_add_tail(&req->node, &pqf->list); + pqf->effective_flags |= val; + break; + default: + /* no action */ + ; + } + + curr_value = list_empty(&pqf->list) ? 0 : pqf->effective_flags; + + spin_unlock_irqrestore(&pm_qos_lock, irqflags); + + return prev_value != curr_value; +} + /** * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested -- cgit v1.2.3 From 021c870ba4ab4bc9a23d5db4e324f50f26d8ab24 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Oct 2012 01:09:00 +0200 Subject: PM / QoS: Prepare struct dev_pm_qos_request for more request types The subsequent patches will use struct dev_pm_qos_request for representing both latency requests and flags requests. To make that easier, put the node member of struct dev_pm_qos_request (under the name "pnode") into a union called "data" that will represent the request's value and list node depending on its type. Signed-off-by: Rafael J. Wysocki Reviewed-by: Jean Pihet Reviewed-by: mark gross --- drivers/base/power/qos.c | 6 +++--- drivers/base/power/sysfs.c | 2 +- include/linux/pm_qos.h | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 40ff1b02a7c5..96d27b821bb8 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -90,7 +90,7 @@ static int apply_constraint(struct dev_pm_qos_request *req, int ret, curr_value; ret = pm_qos_update_target(&req->dev->power.qos->latency, - &req->node, action, value); + &req->data.pnode, action, value); if (ret) { /* Call the global callbacks if needed */ @@ -183,7 +183,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) c = &qos->latency; /* Flush the constraints list for the device */ - plist_for_each_entry_safe(req, tmp, &c->list, node) { + plist_for_each_entry_safe(req, tmp, &c->list, data.pnode) { /* * Update constraints list and call the notification * callbacks if needed @@ -293,7 +293,7 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, mutex_lock(&dev_pm_qos_mtx); if (req->dev->power.qos) { - if (new_value != req->node.prio) + if (new_value != req->data.pnode.prio) ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); } else { diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index b91dc6f1e914..54c61ffa2044 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -221,7 +221,7 @@ static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show, static ssize_t pm_qos_latency_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev->power.pq_req->node.prio); + return sprintf(buf, "%d\n", dev->power.pq_req->data.pnode.prio); } static ssize_t pm_qos_latency_store(struct device *dev, diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 413ada3c7c97..3b9d14964d2b 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -39,7 +39,9 @@ struct pm_qos_flags_request { }; struct dev_pm_qos_request { - struct plist_node node; + union { + struct plist_node pnode; + } data; struct device *dev; }; -- cgit v1.2.3 From ae0fb4b72c8db7e6c4ef32bc58a43a759ad414b9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 23 Oct 2012 01:09:12 +0200 Subject: PM / QoS: Introduce PM QoS device flags support Modify the device PM QoS core code to support PM QoS flags requests. First, add a new field of type struct pm_qos_flags called "flags" to struct dev_pm_qos for representing the list of PM QoS flags requests for the given device. Accordingly, add a new "type" field to struct dev_pm_qos_request (along with an enum for representing request types) and a new member called "flr" to its data union for representig flags requests. Second, modify dev_pm_qos_add_request(), dev_pm_qos_update_request(), the internal routine apply_constraint() used by them and their existing callers to cover flags requests as well as latency requests. In particular, dev_pm_qos_add_request() gets a new argument called "type" for specifying the type of a request to be added. Finally, introduce two routines, __dev_pm_qos_flags() and dev_pm_qos_flags(), allowing their callers to check which PM QoS flags have been requested for the given device (the caller is supposed to pass the mask of flags to check as the routine's second argument and examine its return value for the result). Signed-off-by: Rafael J. Wysocki Reviewed-by: Jean Pihet Reviewed-by: mark gross --- Documentation/power/pm_qos_interface.txt | 2 +- drivers/base/power/qos.c | 124 ++++++++++++++++++++++++------- drivers/mtd/nand/sh_flctl.c | 4 +- include/linux/pm_qos.h | 26 ++++++- 4 files changed, 127 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt index 17e130a80347..79a2a58425ee 100644 --- a/Documentation/power/pm_qos_interface.txt +++ b/Documentation/power/pm_qos_interface.txt @@ -99,7 +99,7 @@ reading the aggregated value does not require any locking mechanism. From kernel mode the use of this interface is the following: -int dev_pm_qos_add_request(device, handle, value): +int dev_pm_qos_add_request(device, handle, type, value): Will insert an element into the list for that identified device with the target value. Upon change to this list the new target is recomputed and any registered notifiers are called only if the target value is now different. diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 96d27b821bb8..3c66f75d14b0 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -47,6 +47,50 @@ static DEFINE_MUTEX(dev_pm_qos_mtx); static BLOCKING_NOTIFIER_HEAD(dev_pm_notifiers); +/** + * __dev_pm_qos_flags - Check PM QoS flags for a given device. + * @dev: Device to check the PM QoS flags for. + * @mask: Flags to check against. + * + * This routine must be called with dev->power.lock held. + */ +enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask) +{ + struct dev_pm_qos *qos = dev->power.qos; + struct pm_qos_flags *pqf; + s32 val; + + if (!qos) + return PM_QOS_FLAGS_UNDEFINED; + + pqf = &qos->flags; + if (list_empty(&pqf->list)) + return PM_QOS_FLAGS_UNDEFINED; + + val = pqf->effective_flags & mask; + if (val) + return (val == mask) ? PM_QOS_FLAGS_ALL : PM_QOS_FLAGS_SOME; + + return PM_QOS_FLAGS_NONE; +} + +/** + * dev_pm_qos_flags - Check PM QoS flags for a given device (locked). + * @dev: Device to check the PM QoS flags for. + * @mask: Flags to check against. + */ +enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) +{ + unsigned long irqflags; + enum pm_qos_flags_status ret; + + spin_lock_irqsave(&dev->power.lock, irqflags); + ret = __dev_pm_qos_flags(dev, mask); + spin_unlock_irqrestore(&dev->power.lock, irqflags); + + return ret; +} + /** * __dev_pm_qos_read_value - Get PM QoS constraint for a given device. * @dev: Device to get the PM QoS constraint value for. @@ -74,30 +118,39 @@ s32 dev_pm_qos_read_value(struct device *dev) return ret; } -/* - * apply_constraint - * @req: constraint request to apply - * @action: action to perform add/update/remove, of type enum pm_qos_req_action - * @value: defines the qos request +/** + * apply_constraint - Add/modify/remove device PM QoS request. + * @req: Constraint request to apply + * @action: Action to perform (add/update/remove). + * @value: Value to assign to the QoS request. * * Internal function to update the constraints list using the PM QoS core * code and if needed call the per-device and the global notification * callbacks */ static int apply_constraint(struct dev_pm_qos_request *req, - enum pm_qos_req_action action, int value) + enum pm_qos_req_action action, s32 value) { - int ret, curr_value; - - ret = pm_qos_update_target(&req->dev->power.qos->latency, - &req->data.pnode, action, value); + struct dev_pm_qos *qos = req->dev->power.qos; + int ret; - if (ret) { - /* Call the global callbacks if needed */ - curr_value = pm_qos_read_value(&req->dev->power.qos->latency); - blocking_notifier_call_chain(&dev_pm_notifiers, - (unsigned long)curr_value, - req); + switch(req->type) { + case DEV_PM_QOS_LATENCY: + ret = pm_qos_update_target(&qos->latency, &req->data.pnode, + action, value); + if (ret) { + value = pm_qos_read_value(&qos->latency); + blocking_notifier_call_chain(&dev_pm_notifiers, + (unsigned long)value, + req); + } + break; + case DEV_PM_QOS_FLAGS: + ret = pm_qos_update_flags(&qos->flags, &req->data.flr, + action, value); + break; + default: + ret = -EINVAL; } return ret; @@ -134,6 +187,8 @@ static int dev_pm_qos_constraints_allocate(struct device *dev) c->type = PM_QOS_MIN; c->notifiers = n; + INIT_LIST_HEAD(&qos->flags.list); + spin_lock_irq(&dev->power.lock); dev->power.qos = qos; spin_unlock_irq(&dev->power.lock); @@ -207,6 +262,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) * dev_pm_qos_add_request - inserts new qos request into the list * @dev: target device for the constraint * @req: pointer to a preallocated handle + * @type: type of the request * @value: defines the qos request * * This function inserts a new entry in the device constraints list of @@ -222,7 +278,7 @@ void dev_pm_qos_constraints_destroy(struct device *dev) * from the system. */ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, - s32 value) + enum dev_pm_qos_req_type type, s32 value) { int ret = 0; @@ -253,8 +309,10 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, } } - if (!ret) + if (!ret) { + req->type = type; ret = apply_constraint(req, PM_QOS_ADD_REQ, value); + } out: mutex_unlock(&dev_pm_qos_mtx); @@ -281,6 +339,7 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { + s32 curr_value; int ret = 0; if (!req) /*guard against callers passing in null */ @@ -292,15 +351,27 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, mutex_lock(&dev_pm_qos_mtx); - if (req->dev->power.qos) { - if (new_value != req->data.pnode.prio) - ret = apply_constraint(req, PM_QOS_UPDATE_REQ, - new_value); - } else { - /* Return if the device has been removed */ + if (!req->dev->power.qos) { ret = -ENODEV; + goto out; } + switch(req->type) { + case DEV_PM_QOS_LATENCY: + curr_value = req->data.pnode.prio; + break; + case DEV_PM_QOS_FLAGS: + curr_value = req->data.flr.flags; + break; + default: + ret = -EINVAL; + goto out; + } + + if (curr_value != new_value) + ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); + + out: mutex_unlock(&dev_pm_qos_mtx); return ret; } @@ -451,7 +522,8 @@ int dev_pm_qos_add_ancestor_request(struct device *dev, ancestor = ancestor->parent; if (ancestor) - error = dev_pm_qos_add_request(ancestor, req, value); + error = dev_pm_qos_add_request(ancestor, req, + DEV_PM_QOS_LATENCY, value); if (error) req->dev = NULL; @@ -487,7 +559,7 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) if (!req) return -ENOMEM; - ret = dev_pm_qos_add_request(dev, req, value); + ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_LATENCY, value); if (ret < 0) return ret; diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c index 4fbfe96e37a1..f48ac5d80bbf 100644 --- a/drivers/mtd/nand/sh_flctl.c +++ b/drivers/mtd/nand/sh_flctl.c @@ -727,7 +727,9 @@ static void flctl_select_chip(struct mtd_info *mtd, int chipnr) if (!flctl->qos_request) { ret = dev_pm_qos_add_request(&flctl->pdev->dev, - &flctl->pm_qos, 100); + &flctl->pm_qos, + DEV_PM_QOS_LATENCY, + 100); if (ret < 0) dev_err(&flctl->pdev->dev, "PM QoS request failed: %d\n", ret); diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 3b9d14964d2b..3af7d8573c23 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -20,6 +20,13 @@ enum { PM_QOS_NUM_CLASSES, }; +enum pm_qos_flags_status { + PM_QOS_FLAGS_UNDEFINED = -1, + PM_QOS_FLAGS_NONE, + PM_QOS_FLAGS_SOME, + PM_QOS_FLAGS_ALL, +}; + #define PM_QOS_DEFAULT_VALUE -1 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) @@ -38,9 +45,16 @@ struct pm_qos_flags_request { s32 flags; /* Do not change to 64 bit */ }; +enum dev_pm_qos_req_type { + DEV_PM_QOS_LATENCY = 1, + DEV_PM_QOS_FLAGS, +}; + struct dev_pm_qos_request { + enum dev_pm_qos_req_type type; union { struct plist_node pnode; + struct pm_qos_flags_request flr; } data; struct device *dev; }; @@ -71,6 +85,7 @@ struct pm_qos_flags { struct dev_pm_qos { struct pm_qos_constraints latency; + struct pm_qos_flags flags; }; /* Action requested to pm_qos_update_target */ @@ -105,10 +120,12 @@ int pm_qos_request_active(struct pm_qos_request *req); s32 pm_qos_read_value(struct pm_qos_constraints *c); #ifdef CONFIG_PM +enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, s32 mask); +enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask); s32 __dev_pm_qos_read_value(struct device *dev); s32 dev_pm_qos_read_value(struct device *dev); int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, - s32 value); + enum dev_pm_qos_req_type type, s32 value); int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value); int dev_pm_qos_remove_request(struct dev_pm_qos_request *req); int dev_pm_qos_add_notifier(struct device *dev, @@ -122,12 +139,19 @@ void dev_pm_qos_constraints_destroy(struct device *dev); int dev_pm_qos_add_ancestor_request(struct device *dev, struct dev_pm_qos_request *req, s32 value); #else +static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, + s32 mask) + { return PM_QOS_FLAGS_UNDEFINED; } +static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, + s32 mask) + { return PM_QOS_FLAGS_UNDEFINED; } static inline s32 __dev_pm_qos_read_value(struct device *dev) { return 0; } static inline s32 dev_pm_qos_read_value(struct device *dev) { return 0; } static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, + enum dev_pm_qos_req_type type, s32 value) { return 0; } static inline int dev_pm_qos_update_request(struct dev_pm_qos_request *req, -- cgit v1.2.3 From e39473d0b9448e770f49b0b15e514be884264438 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 24 Oct 2012 02:08:18 +0200 Subject: PM / QoS: Make it possible to expose PM QoS device flags to user space Define two device PM QoS flags, PM_QOS_FLAG_NO_POWER_OFF and PM_QOS_FLAG_REMOTE_WAKEUP, and introduce routines dev_pm_qos_expose_flags() and dev_pm_qos_hide_flags() allowing the caller to expose those two flags to user space or to hide them from it, respectively. After the flags have been exposed, user space will see two additional sysfs attributes, pm_qos_no_power_off and pm_qos_remote_wakeup, under the device's /sys/devices/.../power/ directory. Then, writing 1 to one of them will update the PM QoS flags request owned by user space so that the corresponding flag is requested to be set. In turn, writing 0 to one of them will cause the corresponding flag in the user space's request to be cleared (however, the owners of the other PM QoS flags requests for the same device may still request the flag to be set and it may be effectively set even if user space doesn't request that). Signed-off-by: Rafael J. Wysocki Reviewed-by: Jean Pihet Acked-by: mark gross --- Documentation/ABI/testing/sysfs-devices-power | 31 +++++ drivers/base/power/power.h | 6 +- drivers/base/power/qos.c | 168 ++++++++++++++++++++------ drivers/base/power/sysfs.c | 94 ++++++++++++-- include/linux/pm.h | 1 - include/linux/pm_qos.h | 26 ++++ 6 files changed, 278 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-devices-power b/Documentation/ABI/testing/sysfs-devices-power index 45000f0db4d4..7fc2997b23a6 100644 --- a/Documentation/ABI/testing/sysfs-devices-power +++ b/Documentation/ABI/testing/sysfs-devices-power @@ -204,3 +204,34 @@ Description: This attribute has no effect on system-wide suspend/resume and hibernation. + +What: /sys/devices/.../power/pm_qos_no_power_off +Date: September 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power/pm_qos_no_power_off attribute + is used for manipulating the PM QoS "no power off" flag. If + set, this flag indicates to the kernel that power should not + be removed entirely from the device. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. + +What: /sys/devices/.../power/pm_qos_remote_wakeup +Date: September 2012 +Contact: Rafael J. Wysocki +Description: + The /sys/devices/.../power/pm_qos_remote_wakeup attribute + is used for manipulating the PM QoS "remote wakeup required" + flag. If set, this flag indicates to the kernel that the + device is a source of user events that have to be signaled from + its low-power states. + + Not all drivers support this attribute. If it isn't supported, + it is not present. + + This attribute has no effect on system-wide suspend/resume and + hibernation. diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h index 0dbfdf4419af..b16686a0a5a2 100644 --- a/drivers/base/power/power.h +++ b/drivers/base/power/power.h @@ -93,8 +93,10 @@ extern void dpm_sysfs_remove(struct device *dev); extern void rpm_sysfs_remove(struct device *dev); extern int wakeup_sysfs_add(struct device *dev); extern void wakeup_sysfs_remove(struct device *dev); -extern int pm_qos_sysfs_add(struct device *dev); -extern void pm_qos_sysfs_remove(struct device *dev); +extern int pm_qos_sysfs_add_latency(struct device *dev); +extern void pm_qos_sysfs_remove_latency(struct device *dev); +extern int pm_qos_sysfs_add_flags(struct device *dev); +extern void pm_qos_sysfs_remove_flags(struct device *dev); #else /* CONFIG_PM */ diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c index 3c66f75d14b0..167834dcc82a 100644 --- a/drivers/base/power/qos.c +++ b/drivers/base/power/qos.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "power.h" @@ -321,6 +322,37 @@ int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, } EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); +/** + * __dev_pm_qos_update_request - Modify an existing device PM QoS request. + * @req : PM QoS request to modify. + * @new_value: New value to request. + */ +static int __dev_pm_qos_update_request(struct dev_pm_qos_request *req, + s32 new_value) +{ + s32 curr_value; + int ret = 0; + + if (!req->dev->power.qos) + return -ENODEV; + + switch(req->type) { + case DEV_PM_QOS_LATENCY: + curr_value = req->data.pnode.prio; + break; + case DEV_PM_QOS_FLAGS: + curr_value = req->data.flr.flags; + break; + default: + return -EINVAL; + } + + if (curr_value != new_value) + ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); + + return ret; +} + /** * dev_pm_qos_update_request - modifies an existing qos request * @req : handle to list element holding a dev_pm_qos request to use @@ -336,11 +368,9 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_add_request); * -EINVAL in case of wrong parameters, -ENODEV if the device has been * removed from the system */ -int dev_pm_qos_update_request(struct dev_pm_qos_request *req, - s32 new_value) +int dev_pm_qos_update_request(struct dev_pm_qos_request *req, s32 new_value) { - s32 curr_value; - int ret = 0; + int ret; if (!req) /*guard against callers passing in null */ return -EINVAL; @@ -350,29 +380,9 @@ int dev_pm_qos_update_request(struct dev_pm_qos_request *req, return -EINVAL; mutex_lock(&dev_pm_qos_mtx); - - if (!req->dev->power.qos) { - ret = -ENODEV; - goto out; - } - - switch(req->type) { - case DEV_PM_QOS_LATENCY: - curr_value = req->data.pnode.prio; - break; - case DEV_PM_QOS_FLAGS: - curr_value = req->data.flr.flags; - break; - default: - ret = -EINVAL; - goto out; - } - - if (curr_value != new_value) - ret = apply_constraint(req, PM_QOS_UPDATE_REQ, new_value); - - out: + __dev_pm_qos_update_request(req, new_value); mutex_unlock(&dev_pm_qos_mtx); + return ret; } EXPORT_SYMBOL_GPL(dev_pm_qos_update_request); @@ -533,10 +543,19 @@ int dev_pm_qos_add_ancestor_request(struct device *dev, EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request); #ifdef CONFIG_PM_RUNTIME -static void __dev_pm_qos_drop_user_request(struct device *dev) +static void __dev_pm_qos_drop_user_request(struct device *dev, + enum dev_pm_qos_req_type type) { - dev_pm_qos_remove_request(dev->power.pq_req); - dev->power.pq_req = NULL; + switch(type) { + case DEV_PM_QOS_LATENCY: + dev_pm_qos_remove_request(dev->power.qos->latency_req); + dev->power.qos->latency_req = NULL; + break; + case DEV_PM_QOS_FLAGS: + dev_pm_qos_remove_request(dev->power.qos->flags_req); + dev->power.qos->flags_req = NULL; + break; + } } /** @@ -552,7 +571,7 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) if (!device_is_registered(dev) || value < 0) return -EINVAL; - if (dev->power.pq_req) + if (dev->power.qos && dev->power.qos->latency_req) return -EEXIST; req = kzalloc(sizeof(*req), GFP_KERNEL); @@ -563,10 +582,10 @@ int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) if (ret < 0) return ret; - dev->power.pq_req = req; - ret = pm_qos_sysfs_add(dev); + dev->power.qos->latency_req = req; + ret = pm_qos_sysfs_add_latency(dev); if (ret) - __dev_pm_qos_drop_user_request(dev); + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); return ret; } @@ -578,10 +597,87 @@ EXPORT_SYMBOL_GPL(dev_pm_qos_expose_latency_limit); */ void dev_pm_qos_hide_latency_limit(struct device *dev) { - if (dev->power.pq_req) { - pm_qos_sysfs_remove(dev); - __dev_pm_qos_drop_user_request(dev); + if (dev->power.qos && dev->power.qos->latency_req) { + pm_qos_sysfs_remove_latency(dev); + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_LATENCY); } } EXPORT_SYMBOL_GPL(dev_pm_qos_hide_latency_limit); + +/** + * dev_pm_qos_expose_flags - Expose PM QoS flags of a device to user space. + * @dev: Device whose PM QoS flags are to be exposed to user space. + * @val: Initial values of the flags. + */ +int dev_pm_qos_expose_flags(struct device *dev, s32 val) +{ + struct dev_pm_qos_request *req; + int ret; + + if (!device_is_registered(dev)) + return -EINVAL; + + if (dev->power.qos && dev->power.qos->flags_req) + return -EEXIST; + + req = kzalloc(sizeof(*req), GFP_KERNEL); + if (!req) + return -ENOMEM; + + ret = dev_pm_qos_add_request(dev, req, DEV_PM_QOS_FLAGS, val); + if (ret < 0) + return ret; + + dev->power.qos->flags_req = req; + ret = pm_qos_sysfs_add_flags(dev); + if (ret) + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_qos_expose_flags); + +/** + * dev_pm_qos_hide_flags - Hide PM QoS flags of a device from user space. + * @dev: Device whose PM QoS flags are to be hidden from user space. + */ +void dev_pm_qos_hide_flags(struct device *dev) +{ + if (dev->power.qos && dev->power.qos->flags_req) { + pm_qos_sysfs_remove_flags(dev); + __dev_pm_qos_drop_user_request(dev, DEV_PM_QOS_FLAGS); + } +} +EXPORT_SYMBOL_GPL(dev_pm_qos_hide_flags); + +/** + * dev_pm_qos_update_flags - Update PM QoS flags request owned by user space. + * @dev: Device to update the PM QoS flags request for. + * @mask: Flags to set/clear. + * @set: Whether to set or clear the flags (true means set). + */ +int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set) +{ + s32 value; + int ret; + + if (!dev->power.qos || !dev->power.qos->flags_req) + return -EINVAL; + + pm_runtime_get_sync(dev); + mutex_lock(&dev_pm_qos_mtx); + + value = dev_pm_qos_requested_flags(dev); + if (set) + value |= mask; + else + value &= ~mask; + + ret = __dev_pm_qos_update_request(dev->power.qos->flags_req, value); + + mutex_unlock(&dev_pm_qos_mtx); + pm_runtime_put(dev); + + return ret; +} #endif /* CONFIG_PM_RUNTIME */ diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index 54c61ffa2044..50d16e3cb0a9 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -221,7 +221,7 @@ static DEVICE_ATTR(autosuspend_delay_ms, 0644, autosuspend_delay_ms_show, static ssize_t pm_qos_latency_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev->power.pq_req->data.pnode.prio); + return sprintf(buf, "%d\n", dev_pm_qos_requested_latency(dev)); } static ssize_t pm_qos_latency_store(struct device *dev, @@ -237,12 +237,66 @@ static ssize_t pm_qos_latency_store(struct device *dev, if (value < 0) return -EINVAL; - ret = dev_pm_qos_update_request(dev->power.pq_req, value); + ret = dev_pm_qos_update_request(dev->power.qos->latency_req, value); return ret < 0 ? ret : n; } static DEVICE_ATTR(pm_qos_resume_latency_us, 0644, pm_qos_latency_show, pm_qos_latency_store); + +static ssize_t pm_qos_no_power_off_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) + & PM_QOS_FLAG_NO_POWER_OFF)); +} + +static ssize_t pm_qos_no_power_off_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t n) +{ + int ret; + + if (kstrtoint(buf, 0, &ret)) + return -EINVAL; + + if (ret != 0 && ret != 1) + return -EINVAL; + + ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_NO_POWER_OFF, ret); + return ret < 0 ? ret : n; +} + +static DEVICE_ATTR(pm_qos_no_power_off, 0644, + pm_qos_no_power_off_show, pm_qos_no_power_off_store); + +static ssize_t pm_qos_remote_wakeup_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", !!(dev_pm_qos_requested_flags(dev) + & PM_QOS_FLAG_REMOTE_WAKEUP)); +} + +static ssize_t pm_qos_remote_wakeup_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t n) +{ + int ret; + + if (kstrtoint(buf, 0, &ret)) + return -EINVAL; + + if (ret != 0 && ret != 1) + return -EINVAL; + + ret = dev_pm_qos_update_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP, ret); + return ret < 0 ? ret : n; +} + +static DEVICE_ATTR(pm_qos_remote_wakeup, 0644, + pm_qos_remote_wakeup_show, pm_qos_remote_wakeup_store); #endif /* CONFIG_PM_RUNTIME */ #ifdef CONFIG_PM_SLEEP @@ -564,15 +618,27 @@ static struct attribute_group pm_runtime_attr_group = { .attrs = runtime_attrs, }; -static struct attribute *pm_qos_attrs[] = { +static struct attribute *pm_qos_latency_attrs[] = { #ifdef CONFIG_PM_RUNTIME &dev_attr_pm_qos_resume_latency_us.attr, #endif /* CONFIG_PM_RUNTIME */ NULL, }; -static struct attribute_group pm_qos_attr_group = { +static struct attribute_group pm_qos_latency_attr_group = { .name = power_group_name, - .attrs = pm_qos_attrs, + .attrs = pm_qos_latency_attrs, +}; + +static struct attribute *pm_qos_flags_attrs[] = { +#ifdef CONFIG_PM_RUNTIME + &dev_attr_pm_qos_no_power_off.attr, + &dev_attr_pm_qos_remote_wakeup.attr, +#endif /* CONFIG_PM_RUNTIME */ + NULL, +}; +static struct attribute_group pm_qos_flags_attr_group = { + .name = power_group_name, + .attrs = pm_qos_flags_attrs, }; int dpm_sysfs_add(struct device *dev) @@ -615,14 +681,24 @@ void wakeup_sysfs_remove(struct device *dev) sysfs_unmerge_group(&dev->kobj, &pm_wakeup_attr_group); } -int pm_qos_sysfs_add(struct device *dev) +int pm_qos_sysfs_add_latency(struct device *dev) +{ + return sysfs_merge_group(&dev->kobj, &pm_qos_latency_attr_group); +} + +void pm_qos_sysfs_remove_latency(struct device *dev) +{ + sysfs_unmerge_group(&dev->kobj, &pm_qos_latency_attr_group); +} + +int pm_qos_sysfs_add_flags(struct device *dev) { - return sysfs_merge_group(&dev->kobj, &pm_qos_attr_group); + return sysfs_merge_group(&dev->kobj, &pm_qos_flags_attr_group); } -void pm_qos_sysfs_remove(struct device *dev) +void pm_qos_sysfs_remove_flags(struct device *dev) { - sysfs_unmerge_group(&dev->kobj, &pm_qos_attr_group); + sysfs_unmerge_group(&dev->kobj, &pm_qos_flags_attr_group); } void rpm_sysfs_remove(struct device *dev) diff --git a/include/linux/pm.h b/include/linux/pm.h index 0ce6df94221a..03d7bb145311 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -546,7 +546,6 @@ struct dev_pm_info { unsigned long active_jiffies; unsigned long suspended_jiffies; unsigned long accounting_timestamp; - struct dev_pm_qos_request *pq_req; #endif struct pm_subsys_data *subsys_data; /* Owned by the subsystem. */ struct dev_pm_qos *qos; diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 3af7d8573c23..5a95013905c8 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -34,6 +34,9 @@ enum pm_qos_flags_status { #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_DEV_LAT_DEFAULT_VALUE 0 +#define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) +#define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) + struct pm_qos_request { struct plist_node node; int pm_qos_class; @@ -86,6 +89,8 @@ struct pm_qos_flags { struct dev_pm_qos { struct pm_qos_constraints latency; struct pm_qos_flags flags; + struct dev_pm_qos_request *latency_req; + struct dev_pm_qos_request *flags_req; }; /* Action requested to pm_qos_update_target */ @@ -187,10 +192,31 @@ static inline int dev_pm_qos_add_ancestor_request(struct device *dev, #ifdef CONFIG_PM_RUNTIME int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value); void dev_pm_qos_hide_latency_limit(struct device *dev); +int dev_pm_qos_expose_flags(struct device *dev, s32 value); +void dev_pm_qos_hide_flags(struct device *dev); +int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set); + +static inline s32 dev_pm_qos_requested_latency(struct device *dev) +{ + return dev->power.qos->latency_req->data.pnode.prio; +} + +static inline s32 dev_pm_qos_requested_flags(struct device *dev) +{ + return dev->power.qos->flags_req->data.flr.flags; +} #else static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value) { return 0; } static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {} +static inline int dev_pm_qos_expose_flags(struct device *dev, s32 value) + { return 0; } +static inline void dev_pm_qos_hide_flags(struct device *dev) {} +static inline int dev_pm_qos_update_flags(struct device *dev, s32 m, bool set) + { return 0; } + +static inline s32 dev_pm_qos_requested_latency(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } #endif #endif -- cgit v1.2.3 From e5cc8ef31267317f3e177415c84e3f3602e5bfc9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 2 Nov 2012 01:41:01 +0100 Subject: ACPI / PM: Provide ACPI PM callback routines for subsystems Some bus types don't support power management natively, but generally there may be device nodes in ACPI tables corresponding to the devices whose bus types they are (under ACPI 5 those bus types may be SPI, I2C and platform). If that is the case, standard ACPI power management may be applied to those devices, although currently the kernel has no means for that. For this reason, provide a set of routines that may be used as power management callbacks for such devices. This may be done in three different ways. (1) Device drivers handling the devices in question may run acpi_dev_pm_attach() in their .probe() routines, which (on success) will cause the devices to be added to the general ACPI PM domain and ACPI power management will be used for them going forward. Then, acpi_dev_pm_detach() may be used to remove the devices from the general ACPI PM domain if ACPI power management is not necessary for them any more. (2) The devices' subsystems may use acpi_subsys_runtime_suspend(), acpi_subsys_runtime_resume(), acpi_subsys_prepare(), acpi_subsys_suspend_late(), acpi_subsys_resume_early() as their power management callbacks in the same way as the general ACPI PM domain does that. (3) The devices' drivers may execute acpi_dev_suspend_late(), acpi_dev_resume_early(), acpi_dev_runtime_suspend(), acpi_dev_runtime_resume() from their power management callbacks as appropriate, if that's absolutely necessary, but it is not recommended to do that, because such drivers may not work without ACPI support as a result. Signed-off-by: Rafael J. Wysocki --- drivers/acpi/device_pm.c | 317 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 34 +++++ 2 files changed, 351 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index 7ddd93463a2e..a8e059f69d50 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -224,6 +224,22 @@ int acpi_pm_device_sleep_state(struct device *dev, int *d_min_p, int d_max_in) EXPORT_SYMBOL(acpi_pm_device_sleep_state); #ifdef CONFIG_PM_RUNTIME +/** + * acpi_wakeup_device - Wakeup notification handler for ACPI devices. + * @handle: ACPI handle of the device the notification is for. + * @event: Type of the signaled event. + * @context: Device corresponding to @handle. + */ +static void acpi_wakeup_device(acpi_handle handle, u32 event, void *context) +{ + struct device *dev = context; + + if (event == ACPI_NOTIFY_DEVICE_WAKE && dev) { + pm_wakeup_event(dev, 0); + pm_runtime_resume(dev); + } +} + /** * __acpi_device_run_wake - Enable/disable runtime remote wakeup for device. * @adev: ACPI device to enable/disable the remote wakeup for. @@ -283,6 +299,9 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable) return __acpi_device_run_wake(adev, enable); } EXPORT_SYMBOL(acpi_pm_device_run_wake); +#else +static inline void acpi_wakeup_device(acpi_handle handle, u32 event, + void *context) {} #endif /* CONFIG_PM_RUNTIME */ #ifdef CONFIG_PM_SLEEP @@ -329,3 +348,301 @@ int acpi_pm_device_sleep_wake(struct device *dev, bool enable) return error; } #endif /* CONFIG_PM_SLEEP */ + +/** + * acpi_dev_pm_get_node - Get ACPI device node for the given physical device. + * @dev: Device to get the ACPI node for. + */ +static struct acpi_device *acpi_dev_pm_get_node(struct device *dev) +{ + acpi_handle handle = DEVICE_ACPI_HANDLE(dev); + struct acpi_device *adev; + + return handle && ACPI_SUCCESS(acpi_bus_get_device(handle, &adev)) ? + adev : NULL; +} + +/** + * acpi_dev_pm_low_power - Put ACPI device into a low-power state. + * @dev: Device to put into a low-power state. + * @adev: ACPI device node corresponding to @dev. + * @system_state: System state to choose the device state for. + */ +static int acpi_dev_pm_low_power(struct device *dev, struct acpi_device *adev, + u32 system_state) +{ + int power_state; + + if (!acpi_device_power_manageable(adev)) + return 0; + + power_state = acpi_device_power_state(dev, adev, system_state, + ACPI_STATE_D3, NULL); + if (power_state < ACPI_STATE_D0 || power_state > ACPI_STATE_D3) + return -EIO; + + return acpi_device_set_power(adev, power_state); +} + +/** + * acpi_dev_pm_full_power - Put ACPI device into the full-power state. + * @adev: ACPI device node to put into the full-power state. + */ +static int acpi_dev_pm_full_power(struct acpi_device *adev) +{ + return acpi_device_power_manageable(adev) ? + acpi_device_set_power(adev, ACPI_STATE_D0) : 0; +} + +#ifdef CONFIG_PM_RUNTIME +/** + * acpi_dev_runtime_suspend - Put device into a low-power state using ACPI. + * @dev: Device to put into a low-power state. + * + * Put the given device into a runtime low-power state using the standard ACPI + * mechanism. Set up remote wakeup if desired, choose the state to put the + * device into (this checks if remote wakeup is expected to work too), and set + * the power state of the device. + */ +int acpi_dev_runtime_suspend(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + bool remote_wakeup; + int error; + + if (!adev) + return 0; + + remote_wakeup = dev_pm_qos_flags(dev, PM_QOS_FLAG_REMOTE_WAKEUP) > + PM_QOS_FLAGS_NONE; + error = __acpi_device_run_wake(adev, remote_wakeup); + if (remote_wakeup && error) + return -EAGAIN; + + error = acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0); + if (error) + __acpi_device_run_wake(adev, false); + + return error; +} +EXPORT_SYMBOL_GPL(acpi_dev_runtime_suspend); + +/** + * acpi_dev_runtime_resume - Put device into the full-power state using ACPI. + * @dev: Device to put into the full-power state. + * + * Put the given device into the full-power state using the standard ACPI + * mechanism at run time. Set the power state of the device to ACPI D0 and + * disable remote wakeup. + */ +int acpi_dev_runtime_resume(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + int error; + + if (!adev) + return 0; + + error = acpi_dev_pm_full_power(adev); + __acpi_device_run_wake(adev, false); + return error; +} +EXPORT_SYMBOL_GPL(acpi_dev_runtime_resume); + +/** + * acpi_subsys_runtime_suspend - Suspend device using ACPI. + * @dev: Device to suspend. + * + * Carry out the generic runtime suspend procedure for @dev and use ACPI to put + * it into a runtime low-power state. + */ +int acpi_subsys_runtime_suspend(struct device *dev) +{ + int ret = pm_generic_runtime_suspend(dev); + return ret ? ret : acpi_dev_runtime_suspend(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_runtime_suspend); + +/** + * acpi_subsys_runtime_resume - Resume device using ACPI. + * @dev: Device to Resume. + * + * Use ACPI to put the given device into the full-power state and carry out the + * generic runtime resume procedure for it. + */ +int acpi_subsys_runtime_resume(struct device *dev) +{ + int ret = acpi_dev_runtime_resume(dev); + return ret ? ret : pm_generic_runtime_resume(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_runtime_resume); +#endif /* CONFIG_PM_RUNTIME */ + +#ifdef CONFIG_PM_SLEEP +/** + * acpi_dev_suspend_late - Put device into a low-power state using ACPI. + * @dev: Device to put into a low-power state. + * + * Put the given device into a low-power state during system transition to a + * sleep state using the standard ACPI mechanism. Set up system wakeup if + * desired, choose the state to put the device into (this checks if system + * wakeup is expected to work too), and set the power state of the device. + */ +int acpi_dev_suspend_late(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + u32 target_state; + bool wakeup; + int error; + + if (!adev) + return 0; + + target_state = acpi_target_system_state(); + wakeup = device_may_wakeup(dev); + error = __acpi_device_sleep_wake(adev, target_state, wakeup); + if (wakeup && error) + return error; + + error = acpi_dev_pm_low_power(dev, adev, target_state); + if (error) + __acpi_device_sleep_wake(adev, ACPI_STATE_UNKNOWN, false); + + return error; +} +EXPORT_SYMBOL_GPL(acpi_dev_suspend_late); + +/** + * acpi_dev_resume_early - Put device into the full-power state using ACPI. + * @dev: Device to put into the full-power state. + * + * Put the given device into the full-power state using the standard ACPI + * mechanism during system transition to the working state. Set the power + * state of the device to ACPI D0 and disable remote wakeup. + */ +int acpi_dev_resume_early(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + int error; + + if (!adev) + return 0; + + error = acpi_dev_pm_full_power(adev); + __acpi_device_sleep_wake(adev, ACPI_STATE_UNKNOWN, false); + return error; +} +EXPORT_SYMBOL_GPL(acpi_dev_resume_early); + +/** + * acpi_subsys_prepare - Prepare device for system transition to a sleep state. + * @dev: Device to prepare. + */ +int acpi_subsys_prepare(struct device *dev) +{ + /* + * Follow PCI and resume devices suspended at run time before running + * their system suspend callbacks. + */ + pm_runtime_resume(dev); + return pm_generic_prepare(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_prepare); + +/** + * acpi_subsys_suspend_late - Suspend device using ACPI. + * @dev: Device to suspend. + * + * Carry out the generic late suspend procedure for @dev and use ACPI to put + * it into a low-power state during system transition into a sleep state. + */ +int acpi_subsys_suspend_late(struct device *dev) +{ + int ret = pm_generic_suspend_late(dev); + return ret ? ret : acpi_dev_suspend_late(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_suspend_late); + +/** + * acpi_subsys_resume_early - Resume device using ACPI. + * @dev: Device to Resume. + * + * Use ACPI to put the given device into the full-power state and carry out the + * generic early resume procedure for it during system transition into the + * working state. + */ +int acpi_subsys_resume_early(struct device *dev) +{ + int ret = acpi_dev_resume_early(dev); + return ret ? ret : pm_generic_resume_early(dev); +} +EXPORT_SYMBOL_GPL(acpi_subsys_resume_early); +#endif /* CONFIG_PM_SLEEP */ + +static struct dev_pm_domain acpi_general_pm_domain = { + .ops = { +#ifdef CONFIG_PM_RUNTIME + .runtime_suspend = acpi_subsys_runtime_suspend, + .runtime_resume = acpi_subsys_runtime_resume, + .runtime_idle = pm_generic_runtime_idle, +#endif +#ifdef CONFIG_PM_SLEEP + .prepare = acpi_subsys_prepare, + .suspend_late = acpi_subsys_suspend_late, + .resume_early = acpi_subsys_resume_early, + .poweroff_late = acpi_subsys_suspend_late, + .restore_early = acpi_subsys_resume_early, +#endif + }, +}; + +/** + * acpi_dev_pm_attach - Prepare device for ACPI power management. + * @dev: Device to prepare. + * + * If @dev has a valid ACPI handle that has a valid struct acpi_device object + * attached to it, install a wakeup notification handler for the device and + * add it to the general ACPI PM domain. + * + * This assumes that the @dev's bus type uses generic power management callbacks + * (or doesn't use any power management callbacks at all). + * + * Callers must ensure proper synchronization of this function with power + * management callbacks. + */ +int acpi_dev_pm_attach(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + + if (!adev) + return -ENODEV; + + if (dev->pm_domain) + return -EEXIST; + + acpi_add_pm_notifier(adev, acpi_wakeup_device, dev); + dev->pm_domain = &acpi_general_pm_domain; + return 0; +} +EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); + +/** + * acpi_dev_pm_detach - Remove ACPI power management from the device. + * @dev: Device to take care of. + * + * Remove the device from the general ACPI PM domain and remove its wakeup + * notifier. + * + * Callers must ensure proper synchronization of this function with power + * management callbacks. + */ +void acpi_dev_pm_detach(struct device *dev) +{ + struct acpi_device *adev = acpi_dev_pm_get_node(dev); + + if (adev && dev->pm_domain == &acpi_general_pm_domain) { + dev->pm_domain = NULL; + acpi_remove_pm_notifier(adev, acpi_wakeup_device); + } +} +EXPORT_SYMBOL_GPL(acpi_dev_pm_detach); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 90be98981102..0676b6ac57fa 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -430,4 +430,38 @@ acpi_status acpi_os_prepare_sleep(u8 sleep_state, #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif +#if defined(CONFIG_ACPI) && defined(CONFIG_PM_RUNTIME) +int acpi_dev_runtime_suspend(struct device *dev); +int acpi_dev_runtime_resume(struct device *dev); +int acpi_subsys_runtime_suspend(struct device *dev); +int acpi_subsys_runtime_resume(struct device *dev); +#else +static inline int acpi_dev_runtime_suspend(struct device *dev) { return 0; } +static inline int acpi_dev_runtime_resume(struct device *dev) { return 0; } +static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } +static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } +#endif + +#ifdef CONFIG_ACPI_SLEEP +int acpi_dev_suspend_late(struct device *dev); +int acpi_dev_resume_early(struct device *dev); +int acpi_subsys_prepare(struct device *dev); +int acpi_subsys_suspend_late(struct device *dev); +int acpi_subsys_resume_early(struct device *dev); +#else +static inline int acpi_dev_suspend_late(struct device *dev) { return 0; } +static inline int acpi_dev_resume_early(struct device *dev) { return 0; } +static inline int acpi_subsys_prepare(struct device *dev) { return 0; } +static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } +static inline int acpi_subsys_resume_early(struct device *dev) { return 0; } +#endif + +#if defined(CONFIG_ACPI) && defined(CONFIG_PM) +int acpi_dev_pm_attach(struct device *dev); +int acpi_dev_pm_detach(struct device *dev); +#else +static inline int acpi_dev_pm_attach(struct device *dev) { return -ENODEV; } +static inline void acpi_dev_pm_detach(struct device *dev) {} +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 1bad2f19f7f79d1ec9e6c48168fd7ce8dc1c305f Mon Sep 17 00:00:00 2001 From: Kristen Carlson Accardi Date: Fri, 26 Oct 2012 13:39:15 +0200 Subject: ACPI / Sleep: add acpi_sleep=nonvs_s3 parameter The ACPI specificiation would like us to save NVS at hibernation time, but makes no mention of saving NVS over S3. Not all versions of Windows do this either, and it is clear that not all machines need NVS saved/restored over S3. Allow the user to improve their suspend/resume time by disabling the NVS save/restore at S3 time, but continue to do the NVS save/restore for S4 as specified. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/sleep.c | 2 ++ drivers/acpi/sleep.c | 17 ++++++++++++++++- include/linux/acpi.h | 1 + 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 11676cf65aee..d5e0d717005a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -101,6 +101,8 @@ static int __init acpi_sleep_setup(char *str) #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); + if (strncmp(str, "nonvs_s3", 8) == 0) + acpi_nvs_nosave_s3(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); str = strchr(str, ','); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index fdcdbb652915..8640782944cc 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -97,6 +97,21 @@ void __init acpi_nvs_nosave(void) nvs_nosave = true; } +/* + * The ACPI specification wants us to save NVS memory regions during hibernation + * but says nothing about saving NVS during S3. Not all versions of Windows + * save NVS on S3 suspend either, and it is clear that not all systems need + * NVS to be saved at S3 time. To improve suspend/resume time, allow the + * user to disable saving NVS on S3 if their system does not require it, but + * continue to save/restore NVS for S4 as specified. + */ +static bool nvs_nosave_s3; + +void __init acpi_nvs_nosave_s3(void) +{ + nvs_nosave_s3 = true; +} + /* * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the * user to request that behavior by using the 'acpi_old_suspend_ordering' @@ -243,7 +258,7 @@ static int acpi_suspend_begin(suspend_state_t pm_state) u32 acpi_state = acpi_suspend_states[pm_state]; int error = 0; - error = nvs_nosave ? 0 : suspend_nvs_alloc(); + error = (nvs_nosave || nvs_nosave_s3) ? 0 : suspend_nvs_alloc(); if (error) return error; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 90be98981102..3cf93491125c 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -261,6 +261,7 @@ int acpi_resources_are_enforced(void); void __init acpi_no_s4_hw_signature(void); void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); +void __init acpi_nvs_nosave_s3(void); #endif /* CONFIG_PM_SLEEP */ struct acpi_osc_context { -- cgit v1.2.3 From 9743fdea9f9473cd2440741342a5ed8e19eb51bd Mon Sep 17 00:00:00 2001 From: Yuanhan Liu Date: Fri, 26 Oct 2012 13:39:24 +0200 Subject: ACPI: move acpi_no_s4_hw_signature() declaration into #ifdef CONFIG_HIBERNATION MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit acpi_no_s4_hw_signature is defined in #ifdef CONFIG_HIBERNATION block, but the current code put the declaration in #ifdef CONFIG_PM_SLEEP block. I happened to meet this issue when I turned off PM_SLEEP config manually: arch/x86/kernel/acpi/sleep.c:100:4: error: implicit declaration of function ‘acpi_no_s4_hw_signature’ [-Werror=implicit-function-declaration] Signed-off-by: Yuanhan Liu Reviewed-by: Fengguang Wu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3cf93491125c..87edfcc0ab62 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -257,8 +257,11 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); +#endif + +#ifdef CONFIG_PM_SLEEP void __init acpi_old_suspend_ordering(void); void __init acpi_nvs_nosave(void); void __init acpi_nvs_nosave_s3(void); -- cgit v1.2.3 From 06f64c8f239a47b359c60301914c783b56b32c13 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 31 Oct 2012 22:44:33 +0100 Subject: driver core / ACPI: Move ACPI support to core device and driver types With ACPI 5 we are starting to see devices that don't natively support discovery but can be enumerated with the help of the ACPI namespace. Typically, these devices can be represented in the Linux device driver model as platform devices or some serial bus devices, like SPI or I2C devices. Since we want to re-use existing drivers for those devices, we need a way for drivers to specify the ACPI IDs of supported devices, so that they can be matched against device nodes in the ACPI namespace. To this end, it is sufficient to add a pointer to an array of supported ACPI device IDs, that can be provided by the driver, to struct device. Moreover, things like ACPI power management need to have access to the ACPI handle of each supported device, because that handle is used to invoke AML methods associated with the corresponding ACPI device node. The ACPI handles of devices are now stored in the archdata member structure of struct device whose definition depends on the architecture and includes the ACPI handle only on x86 and ia64. Since the pointer to an array of supported ACPI IDs is added to struct device_driver in an architecture-independent way, it is logical to move the ACPI handle from archdata to struct device itself at the same time. This also makes code more straightforward in some places and follows the example of Device Trees that have a poiter to struct device_node in there too. This changeset is based on Mika Westerberg's work. Signed-off-by: Mika Westerberg Acked-by: Greg Kroah-Hartman Acked-by: H. Peter Anvin Acked-by: Tony Luck Signed-off-by: Rafael J. Wysocki --- arch/ia64/include/asm/device.h | 3 --- arch/x86/include/asm/device.h | 3 --- drivers/acpi/glue.c | 14 ++++++-------- include/acpi/acpi_bus.h | 2 +- include/linux/device.h | 4 ++++ 5 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h index d05e78f6db94..f69c32ffbe6a 100644 --- a/arch/ia64/include/asm/device.h +++ b/arch/ia64/include/asm/device.h @@ -7,9 +7,6 @@ #define _ASM_IA64_DEVICE_H struct dev_archdata { -#ifdef CONFIG_ACPI - void *acpi_handle; -#endif #ifdef CONFIG_INTEL_IOMMU void *iommu; /* hook for IOMMU specific extension */ #endif diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 93e1c55f14ab..03dd72957d2f 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -2,9 +2,6 @@ #define _ASM_X86_DEVICE_H struct dev_archdata { -#ifdef CONFIG_ACPI - void *acpi_handle; -#endif #ifdef CONFIG_X86_DEV_DMA_OPS struct dma_map_ops *dma_ops; #endif diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 08373086cd7e..2f3849aedc97 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -134,7 +134,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2]; int retval = -EINVAL; - if (dev->archdata.acpi_handle) { + if (dev->acpi_handle) { dev_warn(dev, "Drivers changed 'acpi_handle'\n"); return -EINVAL; } @@ -169,7 +169,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) acpi_dev->physical_node_count++; mutex_unlock(&acpi_dev->physical_node_lock); - dev->archdata.acpi_handle = handle; + dev->acpi_handle = handle; if (!physical_node->node_id) strcpy(physical_node_name, PHYSICAL_NODE_STRING); @@ -198,11 +198,10 @@ static int acpi_unbind_one(struct device *dev) acpi_status status; struct list_head *node, *next; - if (!dev->archdata.acpi_handle) + if (!dev->acpi_handle) return 0; - status = acpi_bus_get_device(dev->archdata.acpi_handle, - &acpi_dev); + status = acpi_bus_get_device(dev->acpi_handle, &acpi_dev); if (ACPI_FAILURE(status)) goto err; @@ -228,7 +227,7 @@ static int acpi_unbind_one(struct device *dev) sysfs_remove_link(&acpi_dev->dev.kobj, physical_node_name); sysfs_remove_link(&dev->kobj, "firmware_node"); - dev->archdata.acpi_handle = NULL; + dev->acpi_handle = NULL; /* acpi_bind_one increase refcnt by one */ put_device(dev); kfree(entry); @@ -269,8 +268,7 @@ static int acpi_platform_notify(struct device *dev) if (!ret) { struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; - acpi_get_name(dev->archdata.acpi_handle, - ACPI_FULL_PATHNAME, &buffer); + acpi_get_name(dev->acpi_handle, ACPI_FULL_PATHNAME, &buffer); DBG("Device %s -> %s\n", dev_name(dev), (char *)buffer.pointer); kfree(buffer.pointer); } else diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0daa0fbd8654..bb1537c5e672 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -410,7 +410,7 @@ acpi_handle acpi_get_child(acpi_handle, u64); int acpi_is_root_bridge(acpi_handle); acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); -#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->archdata.acpi_handle)) +#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->acpi_handle)) int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); diff --git a/include/linux/device.h b/include/linux/device.h index 86ef6ab553b1..cc3aee57a46e 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -190,6 +190,7 @@ extern struct klist *bus_get_device_klist(struct bus_type *bus); * @mod_name: Used for built-in modules. * @suppress_bind_attrs: Disables bind/unbind via sysfs. * @of_match_table: The open firmware table. + * @acpi_match_table: The ACPI match table. * @probe: Called to query the existence of a specific device, * whether this driver can work with it, and bind the driver * to a specific device. @@ -223,6 +224,7 @@ struct device_driver { bool suppress_bind_attrs; /* disables bind/unbind via sysfs */ const struct of_device_id *of_match_table; + const struct acpi_device_id *acpi_match_table; int (*probe) (struct device *dev); int (*remove) (struct device *dev); @@ -616,6 +618,7 @@ struct device_dma_parameters { * @dma_mem: Internal for coherent mem override. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. + * @acpi_handle: Associated ACPI device node's namespace handle. * @devt: For creating the sysfs "dev". * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. @@ -680,6 +683,7 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ + void *acpi_handle; /* associated ACPI device node */ dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ -- cgit v1.2.3 From cf761af9ee0f2c172710ad2b7ca029016b5d4c45 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Wed, 31 Oct 2012 22:44:41 +0100 Subject: ACPI: Provide generic functions for matching ACPI device nodes Introduce function acpi_match_device() allowing callers to match struct device objects with populated acpi_handle fields against arrays of ACPI device IDs. Also introduce function acpi_driver_match_device() using acpi_match_device() internally and allowing callers to match a struct device object against an array of ACPI device IDs provided by a device driver. Additionally, introduce macro ACPI_PTR() that may be used by device drivers to escape pointers to data structures whose definitions depend on CONFIG_ACPI. Signed-off-by: Mika Westerberg Acked-by: Greg Kroah-Hartman Acked-by: H. Peter Anvin Acked-by: Tony Luck Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 40 +++++++++++++++++++++++++++++++++++----- include/linux/acpi.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 1fcb8678665c..a0dfdffe54d4 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -340,8 +340,8 @@ static void acpi_device_remove_files(struct acpi_device *dev) ACPI Bus operations -------------------------------------------------------------------------- */ -int acpi_match_device_ids(struct acpi_device *device, - const struct acpi_device_id *ids) +static const struct acpi_device_id *__acpi_match_device( + struct acpi_device *device, const struct acpi_device_id *ids) { const struct acpi_device_id *id; struct acpi_hardware_id *hwid; @@ -351,14 +351,44 @@ int acpi_match_device_ids(struct acpi_device *device, * driver for it. */ if (!device->status.present) - return -ENODEV; + return NULL; for (id = ids; id->id[0]; id++) list_for_each_entry(hwid, &device->pnp.ids, list) if (!strcmp((char *) id->id, hwid->id)) - return 0; + return id; + + return NULL; +} - return -ENOENT; +/** + * acpi_match_device - Match a struct device against a given list of ACPI IDs + * @ids: Array of struct acpi_device_id object to match against. + * @dev: The device structure to match. + * + * Check if @dev has a valid ACPI handle and if there is a struct acpi_device + * object for that handle and use that object to match against a given list of + * device IDs. + * + * Return a pointer to the first matching ID on success or %NULL on failure. + */ +const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, + const struct device *dev) +{ + struct acpi_device *adev; + + if (!ids || !dev->acpi_handle + || ACPI_FAILURE(acpi_bus_get_device(dev->acpi_handle, &adev))) + return NULL; + + return __acpi_match_device(adev, ids); +} +EXPORT_SYMBOL_GPL(acpi_match_device); + +int acpi_match_device_ids(struct acpi_device *device, + const struct acpi_device_id *ids) +{ + return __acpi_match_device(device, ids) ? 0 : -ENOENT; } EXPORT_SYMBOL(acpi_match_device_ids); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 90be98981102..48761cb481db 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -26,6 +26,7 @@ #define _LINUX_ACPI_H #include /* for struct resource */ +#include #ifdef CONFIG_ACPI @@ -364,6 +365,17 @@ extern int acpi_nvs_register(__u64 start, __u64 size); extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), void *data); +const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, + const struct device *dev); + +static inline bool acpi_driver_match_device(struct device *dev, + const struct device_driver *drv) +{ + return !!acpi_match_device(drv->acpi_match_table, dev); +} + +#define ACPI_PTR(_ptr) (_ptr) + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -418,6 +430,22 @@ static inline int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *), return 0; } +struct acpi_device_id; + +static inline const struct acpi_device_id *acpi_match_device( + const struct acpi_device_id *ids, const struct device *dev) +{ + return NULL; +} + +static inline bool acpi_driver_match_device(struct device *dev, + const struct device_driver *drv) +{ + return false; +} + +#define ACPI_PTR(_ptr) (NULL) + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From 046d9ce6820e99087e81511284045eada94950e8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Nov 2012 00:30:01 +0100 Subject: ACPI: Move device resources interpretation code from PNP to ACPI core Move some code used for parsing ACPI device resources from the PNP subsystem to the ACPI core, so that other bus types (platform, SPI, I2C) can use the same routines for parsing resources in a consistent way, without duplicating code. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Tested-by: Mika Westerberg --- drivers/acpi/Makefile | 1 + drivers/acpi/resource.c | 393 +++++++++++++++++++++++++++++++++++++++++ drivers/pnp/base.h | 2 + drivers/pnp/pnpacpi/rsparser.c | 296 ++++--------------------------- drivers/pnp/resource.c | 16 ++ include/linux/acpi.h | 10 ++ 6 files changed, 454 insertions(+), 264 deletions(-) create mode 100644 drivers/acpi/resource.c (limited to 'include/linux') diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 227c82bbe9a1..3223edfb23b6 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -32,6 +32,7 @@ acpi-$(CONFIG_ACPI_SLEEP) += proc.o # acpi-y += bus.o glue.o acpi-y += scan.o +acpi-y += resource.o acpi-y += processor_core.o acpi-y += ec.o acpi-$(CONFIG_ACPI_DOCK) += dock.o diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c new file mode 100644 index 000000000000..3e7fd349e29d --- /dev/null +++ b/drivers/acpi/resource.c @@ -0,0 +1,393 @@ +/* + * drivers/acpi/resource.c - ACPI device resources interpretation. + * + * Copyright (C) 2012, Intel Corp. + * Author: Rafael J. Wysocki + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + +#include +#include +#include +#include + +#ifdef CONFIG_X86 +#define valid_IRQ(i) (((i) != 0) && ((i) != 2)) +#else +#define valid_IRQ(i) (true) +#endif + +static unsigned long acpi_dev_memresource_flags(u64 len, u8 write_protect, + bool window) +{ + unsigned long flags = IORESOURCE_MEM; + + if (len == 0) + flags |= IORESOURCE_DISABLED; + + if (write_protect == ACPI_READ_WRITE_MEMORY) + flags |= IORESOURCE_MEM_WRITEABLE; + + if (window) + flags |= IORESOURCE_WINDOW; + + return flags; +} + +static void acpi_dev_get_memresource(struct resource *res, u64 start, u64 len, + u8 write_protect) +{ + res->start = start; + res->end = start + len - 1; + res->flags = acpi_dev_memresource_flags(len, write_protect, false); +} + +/** + * acpi_dev_resource_memory - Extract ACPI memory resource information. + * @ares: Input ACPI resource object. + * @res: Output generic resource object. + * + * Check if the given ACPI resource object represents a memory resource and + * if that's the case, use the information in it to populate the generic + * resource object pointed to by @res. + */ +bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res) +{ + struct acpi_resource_memory24 *memory24; + struct acpi_resource_memory32 *memory32; + struct acpi_resource_fixed_memory32 *fixed_memory32; + + switch (ares->type) { + case ACPI_RESOURCE_TYPE_MEMORY24: + memory24 = &ares->data.memory24; + acpi_dev_get_memresource(res, memory24->minimum, + memory24->address_length, + memory24->write_protect); + break; + case ACPI_RESOURCE_TYPE_MEMORY32: + memory32 = &ares->data.memory32; + acpi_dev_get_memresource(res, memory32->minimum, + memory32->address_length, + memory32->write_protect); + break; + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + fixed_memory32 = &ares->data.fixed_memory32; + acpi_dev_get_memresource(res, fixed_memory32->address, + fixed_memory32->address_length, + fixed_memory32->write_protect); + break; + default: + return false; + } + return true; +} +EXPORT_SYMBOL_GPL(acpi_dev_resource_memory); + +static unsigned int acpi_dev_ioresource_flags(u64 start, u64 end, u8 io_decode, + bool window) +{ + int flags = IORESOURCE_IO; + + if (io_decode == ACPI_DECODE_16) + flags |= IORESOURCE_IO_16BIT_ADDR; + + if (start > end || end >= 0x10003) + flags |= IORESOURCE_DISABLED; + + if (window) + flags |= IORESOURCE_WINDOW; + + return flags; +} + +static void acpi_dev_get_ioresource(struct resource *res, u64 start, u64 len, + u8 io_decode) +{ + u64 end = start + len - 1; + + res->start = start; + res->end = end; + res->flags = acpi_dev_ioresource_flags(start, end, io_decode, false); +} + +/** + * acpi_dev_resource_io - Extract ACPI I/O resource information. + * @ares: Input ACPI resource object. + * @res: Output generic resource object. + * + * Check if the given ACPI resource object represents an I/O resource and + * if that's the case, use the information in it to populate the generic + * resource object pointed to by @res. + */ +bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res) +{ + struct acpi_resource_io *io; + struct acpi_resource_fixed_io *fixed_io; + + switch (ares->type) { + case ACPI_RESOURCE_TYPE_IO: + io = &ares->data.io; + acpi_dev_get_ioresource(res, io->minimum, + io->address_length, + io->io_decode); + break; + case ACPI_RESOURCE_TYPE_FIXED_IO: + fixed_io = &ares->data.fixed_io; + acpi_dev_get_ioresource(res, fixed_io->address, + fixed_io->address_length, + ACPI_DECODE_10); + break; + default: + return false; + } + return true; +} +EXPORT_SYMBOL_GPL(acpi_dev_resource_io); + +/** + * acpi_dev_resource_address_space - Extract ACPI address space information. + * @ares: Input ACPI resource object. + * @res: Output generic resource object. + * + * Check if the given ACPI resource object represents an address space resource + * and if that's the case, use the information in it to populate the generic + * resource object pointed to by @res. + */ +bool acpi_dev_resource_address_space(struct acpi_resource *ares, + struct resource *res) +{ + acpi_status status; + struct acpi_resource_address64 addr; + bool window; + u64 len; + u8 io_decode; + + switch (ares->type) { + case ACPI_RESOURCE_TYPE_ADDRESS16: + case ACPI_RESOURCE_TYPE_ADDRESS32: + case ACPI_RESOURCE_TYPE_ADDRESS64: + break; + default: + return false; + } + + status = acpi_resource_to_address64(ares, &addr); + if (ACPI_FAILURE(status)) + return true; + + res->start = addr.minimum; + res->end = addr.maximum; + window = addr.producer_consumer == ACPI_PRODUCER; + + switch(addr.resource_type) { + case ACPI_MEMORY_RANGE: + len = addr.maximum - addr.minimum + 1; + res->flags = acpi_dev_memresource_flags(len, + addr.info.mem.write_protect, + window); + break; + case ACPI_IO_RANGE: + io_decode = addr.granularity == 0xfff ? + ACPI_DECODE_10 : ACPI_DECODE_16; + res->flags = acpi_dev_ioresource_flags(addr.minimum, + addr.maximum, + io_decode, window); + break; + case ACPI_BUS_NUMBER_RANGE: + res->flags = IORESOURCE_BUS; + break; + default: + res->flags = 0; + } + + return true; +} +EXPORT_SYMBOL_GPL(acpi_dev_resource_address_space); + +/** + * acpi_dev_resource_ext_address_space - Extract ACPI address space information. + * @ares: Input ACPI resource object. + * @res: Output generic resource object. + * + * Check if the given ACPI resource object represents an extended address space + * resource and if that's the case, use the information in it to populate the + * generic resource object pointed to by @res. + */ +bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, + struct resource *res) +{ + struct acpi_resource_extended_address64 *ext_addr; + bool window; + u64 len; + u8 io_decode; + + if (ares->type != ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64) + return false; + + ext_addr = &ares->data.ext_address64; + + res->start = ext_addr->minimum; + res->end = ext_addr->maximum; + window = ext_addr->producer_consumer == ACPI_PRODUCER; + + switch(ext_addr->resource_type) { + case ACPI_MEMORY_RANGE: + len = ext_addr->maximum - ext_addr->minimum + 1; + res->flags = acpi_dev_memresource_flags(len, + ext_addr->info.mem.write_protect, + window); + break; + case ACPI_IO_RANGE: + io_decode = ext_addr->granularity == 0xfff ? + ACPI_DECODE_10 : ACPI_DECODE_16; + res->flags = acpi_dev_ioresource_flags(ext_addr->minimum, + ext_addr->maximum, + io_decode, window); + break; + case ACPI_BUS_NUMBER_RANGE: + res->flags = IORESOURCE_BUS; + break; + default: + res->flags = 0; + } + + return true; +} +EXPORT_SYMBOL_GPL(acpi_dev_resource_ext_address_space); + +/** + * acpi_dev_irq_flags - Determine IRQ resource flags. + * @triggering: Triggering type as provided by ACPI. + * @polarity: Interrupt polarity as provided by ACPI. + * @shareable: Whether or not the interrupt is shareable. + */ +unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable) +{ + unsigned long flags; + + if (triggering == ACPI_LEVEL_SENSITIVE) + flags = polarity == ACPI_ACTIVE_LOW ? + IORESOURCE_IRQ_LOWLEVEL : IORESOURCE_IRQ_HIGHLEVEL; + else + flags = polarity == ACPI_ACTIVE_LOW ? + IORESOURCE_IRQ_LOWEDGE : IORESOURCE_IRQ_HIGHEDGE; + + if (shareable == ACPI_SHARED) + flags |= IORESOURCE_IRQ_SHAREABLE; + + return flags | IORESOURCE_IRQ; +} +EXPORT_SYMBOL_GPL(acpi_dev_irq_flags); + +static void acpi_dev_irqresource_disabled(struct resource *res, u32 gsi) +{ + res->start = gsi; + res->end = gsi; + res->flags = IORESOURCE_IRQ | IORESOURCE_DISABLED; +} + +static void acpi_dev_get_irqresource(struct resource *res, u32 gsi, + u8 triggering, u8 polarity, u8 shareable) +{ + int irq, p, t; + + if (!valid_IRQ(gsi)) { + acpi_dev_irqresource_disabled(res, gsi); + return; + } + + /* + * In IO-APIC mode, use overrided attribute. Two reasons: + * 1. BIOS bug in DSDT + * 2. BIOS uses IO-APIC mode Interrupt Source Override + */ + if (!acpi_get_override_irq(gsi, &t, &p)) { + u8 trig = t ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; + u8 pol = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; + + if (triggering != trig || polarity != pol) { + pr_warning("ACPI: IRQ %d override to %s, %s\n", gsi, + t ? "edge" : "level", p ? "low" : "high"); + triggering = trig; + polarity = pol; + } + } + + res->flags = acpi_dev_irq_flags(triggering, polarity, shareable); + irq = acpi_register_gsi(NULL, gsi, triggering, polarity); + if (irq >= 0) { + res->start = irq; + res->end = irq; + } else { + acpi_dev_irqresource_disabled(res, gsi); + } +} + +/** + * acpi_dev_resource_interrupt - Extract ACPI interrupt resource information. + * @ares: Input ACPI resource object. + * @index: Index into the array of GSIs represented by the resource. + * @res: Output generic resource object. + * + * Check if the given ACPI resource object represents an interrupt resource + * and @index does not exceed the resource's interrupt count (true is returned + * in that case regardless of the results of the other checks)). If that's the + * case, register the GSI corresponding to @index from the array of interrupts + * represented by the resource and populate the generic resource object pointed + * to by @res accordingly. If the registration of the GSI is not successful, + * IORESOURCE_DISABLED will be set it that object's flags. + */ +bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, + struct resource *res) +{ + struct acpi_resource_irq *irq; + struct acpi_resource_extended_irq *ext_irq; + + switch (ares->type) { + case ACPI_RESOURCE_TYPE_IRQ: + /* + * Per spec, only one interrupt per descriptor is allowed in + * _CRS, but some firmware violates this, so parse them all. + */ + irq = &ares->data.irq; + if (index >= irq->interrupt_count) { + acpi_dev_irqresource_disabled(res, 0); + return false; + } + acpi_dev_get_irqresource(res, irq->interrupts[index], + irq->triggering, irq->polarity, + irq->sharable); + break; + case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: + ext_irq = &ares->data.extended_irq; + if (index >= ext_irq->interrupt_count) { + acpi_dev_irqresource_disabled(res, 0); + return false; + } + acpi_dev_get_irqresource(res, ext_irq->interrupts[index], + ext_irq->triggering, ext_irq->polarity, + ext_irq->sharable); + break; + default: + return false; + } + + return true; +} +EXPORT_SYMBOL_GPL(acpi_dev_resource_interrupt); diff --git a/drivers/pnp/base.h b/drivers/pnp/base.h index fa4e0a5db3f8..ffd53e3eb92f 100644 --- a/drivers/pnp/base.h +++ b/drivers/pnp/base.h @@ -159,6 +159,8 @@ struct pnp_resource { void pnp_free_resource(struct pnp_resource *pnp_res); +struct pnp_resource *pnp_add_resource(struct pnp_dev *dev, + struct resource *res); struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, int flags); struct pnp_resource *pnp_add_dma_resource(struct pnp_dev *dev, int dma, diff --git a/drivers/pnp/pnpacpi/rsparser.c b/drivers/pnp/pnpacpi/rsparser.c index 5be4a392a3ae..b8f4ea7b27fc 100644 --- a/drivers/pnp/pnpacpi/rsparser.c +++ b/drivers/pnp/pnpacpi/rsparser.c @@ -28,37 +28,6 @@ #include "../base.h" #include "pnpacpi.h" -#ifdef CONFIG_IA64 -#define valid_IRQ(i) (1) -#else -#define valid_IRQ(i) (((i) != 0) && ((i) != 2)) -#endif - -/* - * Allocated Resources - */ -static int irq_flags(int triggering, int polarity, int shareable) -{ - int flags; - - if (triggering == ACPI_LEVEL_SENSITIVE) { - if (polarity == ACPI_ACTIVE_LOW) - flags = IORESOURCE_IRQ_LOWLEVEL; - else - flags = IORESOURCE_IRQ_HIGHLEVEL; - } else { - if (polarity == ACPI_ACTIVE_LOW) - flags = IORESOURCE_IRQ_LOWEDGE; - else - flags = IORESOURCE_IRQ_HIGHEDGE; - } - - if (shareable == ACPI_SHARED) - flags |= IORESOURCE_IRQ_SHAREABLE; - - return flags; -} - static void decode_irq_flags(struct pnp_dev *dev, int flags, int *triggering, int *polarity, int *shareable) { @@ -94,45 +63,6 @@ static void decode_irq_flags(struct pnp_dev *dev, int flags, int *triggering, *shareable = ACPI_EXCLUSIVE; } -static void pnpacpi_parse_allocated_irqresource(struct pnp_dev *dev, - u32 gsi, int triggering, - int polarity, int shareable) -{ - int irq, flags; - int p, t; - - if (!valid_IRQ(gsi)) { - pnp_add_irq_resource(dev, gsi, IORESOURCE_DISABLED); - return; - } - - /* - * in IO-APIC mode, use overrided attribute. Two reasons: - * 1. BIOS bug in DSDT - * 2. BIOS uses IO-APIC mode Interrupt Source Override - */ - if (!acpi_get_override_irq(gsi, &t, &p)) { - t = t ? ACPI_LEVEL_SENSITIVE : ACPI_EDGE_SENSITIVE; - p = p ? ACPI_ACTIVE_LOW : ACPI_ACTIVE_HIGH; - - if (triggering != t || polarity != p) { - dev_warn(&dev->dev, "IRQ %d override to %s, %s\n", - gsi, t ? "edge":"level", p ? "low":"high"); - triggering = t; - polarity = p; - } - } - - flags = irq_flags(triggering, polarity, shareable); - irq = acpi_register_gsi(&dev->dev, gsi, triggering, polarity); - if (irq >= 0) - pcibios_penalize_isa_irq(irq, 1); - else - flags |= IORESOURCE_DISABLED; - - pnp_add_irq_resource(dev, irq, flags); -} - static int dma_flags(struct pnp_dev *dev, int type, int bus_master, int transfer) { @@ -177,21 +107,16 @@ static int dma_flags(struct pnp_dev *dev, int type, int bus_master, return flags; } -static void pnpacpi_parse_allocated_ioresource(struct pnp_dev *dev, u64 start, - u64 len, int io_decode, - int window) -{ - int flags = 0; - u64 end = start + len - 1; +/* + * Allocated Resources + */ - if (io_decode == ACPI_DECODE_16) - flags |= IORESOURCE_IO_16BIT_ADDR; - if (len == 0 || end >= 0x10003) - flags |= IORESOURCE_DISABLED; - if (window) - flags |= IORESOURCE_WINDOW; +static void pnpacpi_add_irqresource(struct pnp_dev *dev, struct resource *r) +{ + if (!(r->flags & IORESOURCE_DISABLED)) + pcibios_penalize_isa_irq(r->start, 1); - pnp_add_io_resource(dev, start, end, flags); + pnp_add_resource(dev, r); } /* @@ -249,130 +174,49 @@ static void pnpacpi_parse_allocated_vendor(struct pnp_dev *dev, } } -static void pnpacpi_parse_allocated_memresource(struct pnp_dev *dev, - u64 start, u64 len, - int write_protect, int window) -{ - int flags = 0; - u64 end = start + len - 1; - - if (len == 0) - flags |= IORESOURCE_DISABLED; - if (write_protect == ACPI_READ_WRITE_MEMORY) - flags |= IORESOURCE_MEM_WRITEABLE; - if (window) - flags |= IORESOURCE_WINDOW; - - pnp_add_mem_resource(dev, start, end, flags); -} - -static void pnpacpi_parse_allocated_busresource(struct pnp_dev *dev, - u64 start, u64 len) -{ - u64 end = start + len - 1; - - pnp_add_bus_resource(dev, start, end); -} - -static void pnpacpi_parse_allocated_address_space(struct pnp_dev *dev, - struct acpi_resource *res) -{ - struct acpi_resource_address64 addr, *p = &addr; - acpi_status status; - int window; - u64 len; - - status = acpi_resource_to_address64(res, p); - if (!ACPI_SUCCESS(status)) { - dev_warn(&dev->dev, "failed to convert resource type %d\n", - res->type); - return; - } - - /* Windows apparently computes length rather than using _LEN */ - len = p->maximum - p->minimum + 1; - window = (p->producer_consumer == ACPI_PRODUCER) ? 1 : 0; - - if (p->resource_type == ACPI_MEMORY_RANGE) - pnpacpi_parse_allocated_memresource(dev, p->minimum, len, - p->info.mem.write_protect, window); - else if (p->resource_type == ACPI_IO_RANGE) - pnpacpi_parse_allocated_ioresource(dev, p->minimum, len, - p->granularity == 0xfff ? ACPI_DECODE_10 : - ACPI_DECODE_16, window); - else if (p->resource_type == ACPI_BUS_NUMBER_RANGE) - pnpacpi_parse_allocated_busresource(dev, p->minimum, len); -} - -static void pnpacpi_parse_allocated_ext_address_space(struct pnp_dev *dev, - struct acpi_resource *res) -{ - struct acpi_resource_extended_address64 *p = &res->data.ext_address64; - int window; - u64 len; - - /* Windows apparently computes length rather than using _LEN */ - len = p->maximum - p->minimum + 1; - window = (p->producer_consumer == ACPI_PRODUCER) ? 1 : 0; - - if (p->resource_type == ACPI_MEMORY_RANGE) - pnpacpi_parse_allocated_memresource(dev, p->minimum, len, - p->info.mem.write_protect, window); - else if (p->resource_type == ACPI_IO_RANGE) - pnpacpi_parse_allocated_ioresource(dev, p->minimum, len, - p->granularity == 0xfff ? ACPI_DECODE_10 : - ACPI_DECODE_16, window); - else if (p->resource_type == ACPI_BUS_NUMBER_RANGE) - pnpacpi_parse_allocated_busresource(dev, p->minimum, len); -} - static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, void *data) { struct pnp_dev *dev = data; - struct acpi_resource_irq *irq; struct acpi_resource_dma *dma; - struct acpi_resource_io *io; - struct acpi_resource_fixed_io *fixed_io; struct acpi_resource_vendor_typed *vendor_typed; - struct acpi_resource_memory24 *memory24; - struct acpi_resource_memory32 *memory32; - struct acpi_resource_fixed_memory32 *fixed_memory32; - struct acpi_resource_extended_irq *extended_irq; + struct resource r; int i, flags; - switch (res->type) { - case ACPI_RESOURCE_TYPE_IRQ: - /* - * Per spec, only one interrupt per descriptor is allowed in - * _CRS, but some firmware violates this, so parse them all. - */ - irq = &res->data.irq; - if (irq->interrupt_count == 0) - pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); - else { - for (i = 0; i < irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - irq->interrupts[i], - irq->triggering, - irq->polarity, - irq->sharable); - } + if (acpi_dev_resource_memory(res, &r) + || acpi_dev_resource_io(res, &r) + || acpi_dev_resource_address_space(res, &r) + || acpi_dev_resource_ext_address_space(res, &r)) { + pnp_add_resource(dev, &r); + return AE_OK; + } + r.flags = 0; + if (acpi_dev_resource_interrupt(res, 0, &r)) { + pnpacpi_add_irqresource(dev, &r); + for (i = 1; acpi_dev_resource_interrupt(res, i, &r); i++) + pnpacpi_add_irqresource(dev, &r); + + if (i > 1) { /* * The IRQ encoder puts a single interrupt in each * descriptor, so if a _CRS descriptor has more than * one interrupt, we won't be able to re-encode it. */ - if (pnp_can_write(dev) && irq->interrupt_count > 1) { + if (pnp_can_write(dev)) { dev_warn(&dev->dev, "multiple interrupts in " "_CRS descriptor; configuration can't " "be changed\n"); dev->capabilities &= ~PNP_WRITE; } } - break; + return AE_OK; + } else if (r.flags & IORESOURCE_DISABLED) { + pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); + return AE_OK; + } + switch (res->type) { case ACPI_RESOURCE_TYPE_DMA: dma = &res->data.dma; if (dma->channel_count > 0 && dma->channels[0] != (u8) -1) @@ -383,26 +227,10 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, pnp_add_dma_resource(dev, dma->channels[0], flags); break; - case ACPI_RESOURCE_TYPE_IO: - io = &res->data.io; - pnpacpi_parse_allocated_ioresource(dev, - io->minimum, - io->address_length, - io->io_decode, 0); - break; - case ACPI_RESOURCE_TYPE_START_DEPENDENT: case ACPI_RESOURCE_TYPE_END_DEPENDENT: break; - case ACPI_RESOURCE_TYPE_FIXED_IO: - fixed_io = &res->data.fixed_io; - pnpacpi_parse_allocated_ioresource(dev, - fixed_io->address, - fixed_io->address_length, - ACPI_DECODE_10, 0); - break; - case ACPI_RESOURCE_TYPE_VENDOR: vendor_typed = &res->data.vendor_typed; pnpacpi_parse_allocated_vendor(dev, vendor_typed); @@ -411,66 +239,6 @@ static acpi_status pnpacpi_allocated_resource(struct acpi_resource *res, case ACPI_RESOURCE_TYPE_END_TAG: break; - case ACPI_RESOURCE_TYPE_MEMORY24: - memory24 = &res->data.memory24; - pnpacpi_parse_allocated_memresource(dev, - memory24->minimum, - memory24->address_length, - memory24->write_protect, 0); - break; - case ACPI_RESOURCE_TYPE_MEMORY32: - memory32 = &res->data.memory32; - pnpacpi_parse_allocated_memresource(dev, - memory32->minimum, - memory32->address_length, - memory32->write_protect, 0); - break; - case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: - fixed_memory32 = &res->data.fixed_memory32; - pnpacpi_parse_allocated_memresource(dev, - fixed_memory32->address, - fixed_memory32->address_length, - fixed_memory32->write_protect, 0); - break; - case ACPI_RESOURCE_TYPE_ADDRESS16: - case ACPI_RESOURCE_TYPE_ADDRESS32: - case ACPI_RESOURCE_TYPE_ADDRESS64: - pnpacpi_parse_allocated_address_space(dev, res); - break; - - case ACPI_RESOURCE_TYPE_EXTENDED_ADDRESS64: - pnpacpi_parse_allocated_ext_address_space(dev, res); - break; - - case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - extended_irq = &res->data.extended_irq; - - if (extended_irq->interrupt_count == 0) - pnp_add_irq_resource(dev, 0, IORESOURCE_DISABLED); - else { - for (i = 0; i < extended_irq->interrupt_count; i++) { - pnpacpi_parse_allocated_irqresource(dev, - extended_irq->interrupts[i], - extended_irq->triggering, - extended_irq->polarity, - extended_irq->sharable); - } - - /* - * The IRQ encoder puts a single interrupt in each - * descriptor, so if a _CRS descriptor has more than - * one interrupt, we won't be able to re-encode it. - */ - if (pnp_can_write(dev) && - extended_irq->interrupt_count > 1) { - dev_warn(&dev->dev, "multiple interrupts in " - "_CRS descriptor; configuration can't " - "be changed\n"); - dev->capabilities &= ~PNP_WRITE; - } - } - break; - case ACPI_RESOURCE_TYPE_GENERIC_REGISTER: break; @@ -531,7 +299,7 @@ static __init void pnpacpi_parse_irq_option(struct pnp_dev *dev, if (p->interrupts[i]) __set_bit(p->interrupts[i], map.bits); - flags = irq_flags(p->triggering, p->polarity, p->sharable); + flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option_flags, &map, flags); } @@ -555,7 +323,7 @@ static __init void pnpacpi_parse_ext_irq_option(struct pnp_dev *dev, } } - flags = irq_flags(p->triggering, p->polarity, p->sharable); + flags = acpi_dev_irq_flags(p->triggering, p->polarity, p->sharable); pnp_register_irq_resource(dev, option_flags, &map, flags); } diff --git a/drivers/pnp/resource.c b/drivers/pnp/resource.c index b0ecacbe53b1..3e6db1c1dc29 100644 --- a/drivers/pnp/resource.c +++ b/drivers/pnp/resource.c @@ -503,6 +503,22 @@ static struct pnp_resource *pnp_new_resource(struct pnp_dev *dev) return pnp_res; } +struct pnp_resource *pnp_add_resource(struct pnp_dev *dev, + struct resource *res) +{ + struct pnp_resource *pnp_res; + + pnp_res = pnp_new_resource(dev); + if (!pnp_res) { + dev_err(&dev->dev, "can't add resource %pR\n", res); + return NULL; + } + + pnp_res->res = *res; + dev_dbg(&dev->dev, "%pR\n", res); + return pnp_res; +} + struct pnp_resource *pnp_add_irq_resource(struct pnp_dev *dev, int irq, int flags) { diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 48761cb481db..16fcaf8dad3d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -251,6 +251,16 @@ extern int pnpacpi_disabled; #define PXM_INVAL (-1) +bool acpi_dev_resource_memory(struct acpi_resource *ares, struct resource *res); +bool acpi_dev_resource_io(struct acpi_resource *ares, struct resource *res); +bool acpi_dev_resource_address_space(struct acpi_resource *ares, + struct resource *res); +bool acpi_dev_resource_ext_address_space(struct acpi_resource *ares, + struct resource *res); +unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); +bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, + struct resource *res); + int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, -- cgit v1.2.3 From 8e345c991c8c7a3c081199ef77deada79e37618a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Nov 2012 00:30:21 +0100 Subject: ACPI: Centralized processing of ACPI device resources Currently, whoever wants to use ACPI device resources has to call acpi_walk_resources() to browse the buffer returned by the _CRS method for the given device and create filters passed to that routine to apply to the individual resource items. This generally is cumbersome, time-consuming and inefficient. Moreover, it may be problematic if resource conflicts need to be resolved, because the different users of _CRS will need to do that in a consistent way. However, if there are resource conflicts, the ACPI core should be able to resolve them centrally instead of relying on various users of acpi_walk_resources() to handle them correctly together. For this reason, introduce a new function, acpi_dev_get_resources(), that can be used by subsystems to obtain a list of struct resource objects corresponding to the ACPI device resources returned by _CRS and, if necessary, to apply additional preprocessing routine to the ACPI resources before converting them to the struct resource format. Make the ACPI code that creates platform device objects use acpi_dev_get_resources() for resource processing instead of executing acpi_walk_resources() twice by itself, which causes it to be much more straightforward and easier to follow. In the future, acpi_dev_get_resources() can be extended to meet the needs of the ACPI PNP subsystem and other users of _CRS in the kernel. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Tested-by: Mika Westerberg --- drivers/acpi/acpi_platform.c | 94 ++++++------------------------ drivers/acpi/resource.c | 134 +++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 10 ++++ 3 files changed, 161 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index bcbb00ce6d99..7ac20d8b8f07 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -19,61 +19,6 @@ ACPI_MODULE_NAME("platform"); -struct resource_info { - struct device *dev; - struct resource *res; - size_t n, cur; -}; - -static acpi_status acpi_platform_count_resources(struct acpi_resource *res, - void *data) -{ - struct acpi_resource_extended_irq *acpi_xirq; - struct acpi_resource_irq *acpi_irq; - struct resource_info *ri = data; - - switch (res->type) { - case ACPI_RESOURCE_TYPE_IRQ: - acpi_irq = &res->data.irq; - ri->n += acpi_irq->interrupt_count; - break; - case ACPI_RESOURCE_TYPE_EXTENDED_IRQ: - acpi_xirq = &res->data.extended_irq; - ri->n += acpi_xirq->interrupt_count; - break; - default: - ri->n++; - } - - return AE_OK; -} - -static acpi_status acpi_platform_add_resources(struct acpi_resource *res, - void *data) -{ - struct resource_info *ri = data; - struct resource *r; - - r = ri->res + ri->cur; - if (acpi_dev_resource_memory(res, r) - || acpi_dev_resource_io(res, r) - || acpi_dev_resource_address_space(res, r) - || acpi_dev_resource_ext_address_space(res, r)) { - ri->cur++; - return AE_OK; - } - if (acpi_dev_resource_interrupt(res, 0, r)) { - int i; - - r++; - for (i = 1; acpi_dev_resource_interrupt(res, i, r); i++) - r++; - - ri->cur += i; - } - return AE_OK; -} - /** * acpi_create_platform_device - Create platform device for ACPI device node * @adev: ACPI device node to create a platform device for. @@ -89,35 +34,31 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) struct platform_device *pdev = NULL; struct acpi_device *acpi_parent; struct device *parent = NULL; - struct resource_info ri; - acpi_status status; + struct resource_list_entry *rentry; + struct list_head resource_list; + struct resource *resources; + int count; /* If the ACPI node already has a physical device attached, skip it. */ if (adev->physical_node_count) return NULL; - memset(&ri, 0, sizeof(ri)); - /* First, count the resources. */ - status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, - acpi_platform_count_resources, &ri); - if (ACPI_FAILURE(status) || !ri.n) + INIT_LIST_HEAD(&resource_list); + count = acpi_dev_get_resources(adev, &resource_list, NULL, NULL); + if (count <= 0) return NULL; - /* Next, allocate memory for all the resources and populate it. */ - ri.dev = &adev->dev; - ri.res = kzalloc(ri.n * sizeof(struct resource), GFP_KERNEL); - if (!ri.res) { - dev_err(&adev->dev, - "failed to allocate memory for resources\n"); + resources = kmalloc(count * sizeof(struct resource), GFP_KERNEL); + if (!resources) { + dev_err(&adev->dev, "No memory for resources\n"); + acpi_dev_free_resource_list(&resource_list); return NULL; } + count = 0; + list_for_each_entry(rentry, &resource_list, node) + resources[count++] = rentry->res; - status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, - acpi_platform_add_resources, &ri); - if (ACPI_FAILURE(status)) { - dev_err(&adev->dev, "failed to walk resources\n"); - goto out; - } + acpi_dev_free_resource_list(&resource_list); /* * If the ACPI node has a parent and that parent has a physical device @@ -140,7 +81,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) mutex_unlock(&acpi_parent->physical_node_lock); } pdev = platform_device_register_resndata(parent, dev_name(&adev->dev), - -1, ri.res, ri.cur, NULL, 0); + -1, resources, count, NULL, 0); if (IS_ERR(pdev)) { dev_err(&adev->dev, "platform device creation failed: %ld\n", PTR_ERR(pdev)); @@ -150,8 +91,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) dev_name(&pdev->dev)); } - out: - kfree(ri.res); + kfree(resources); return pdev; } diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 3e7fd349e29d..2bafc25482b3 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_X86 #define valid_IRQ(i) (((i) != 0) && ((i) != 2)) @@ -391,3 +392,136 @@ bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, return true; } EXPORT_SYMBOL_GPL(acpi_dev_resource_interrupt); + +/** + * acpi_dev_free_resource_list - Free resource from %acpi_dev_get_resources(). + * @list: The head of the resource list to free. + */ +void acpi_dev_free_resource_list(struct list_head *list) +{ + struct resource_list_entry *rentry, *re; + + list_for_each_entry_safe(rentry, re, list, node) { + list_del(&rentry->node); + kfree(rentry); + } +} +EXPORT_SYMBOL_GPL(acpi_dev_free_resource_list); + +struct res_proc_context { + struct list_head *list; + int (*preproc)(struct acpi_resource *, void *); + void *preproc_data; + int count; + int error; +}; + +static acpi_status acpi_dev_new_resource_entry(struct resource *r, + struct res_proc_context *c) +{ + struct resource_list_entry *rentry; + + rentry = kmalloc(sizeof(*rentry), GFP_KERNEL); + if (!rentry) { + c->error = -ENOMEM; + return AE_NO_MEMORY; + } + INIT_LIST_HEAD(&rentry->node); + rentry->res = *r; + list_add_tail(&rentry->node, c->list); + c->count++; + return AE_OK; +} + +static acpi_status acpi_dev_process_resource(struct acpi_resource *ares, + void *context) +{ + struct res_proc_context *c = context; + struct resource r; + int i; + + if (c->preproc) { + int ret; + + ret = c->preproc(ares, c->preproc_data); + if (ret < 0) { + c->error = ret; + return AE_ABORT_METHOD; + } else if (ret > 0) { + return AE_OK; + } + } + + memset(&r, 0, sizeof(r)); + + if (acpi_dev_resource_memory(ares, &r) + || acpi_dev_resource_io(ares, &r) + || acpi_dev_resource_address_space(ares, &r) + || acpi_dev_resource_ext_address_space(ares, &r)) + return acpi_dev_new_resource_entry(&r, c); + + for (i = 0; acpi_dev_resource_interrupt(ares, i, &r); i++) { + acpi_status status; + + status = acpi_dev_new_resource_entry(&r, c); + if (ACPI_FAILURE(status)) + return status; + } + + return AE_OK; +} + +/** + * acpi_dev_get_resources - Get current resources of a device. + * @adev: ACPI device node to get the resources for. + * @list: Head of the resultant list of resources (must be empty). + * @preproc: The caller's preprocessing routine. + * @preproc_data: Pointer passed to the caller's preprocessing routine. + * + * Evaluate the _CRS method for the given device node and process its output by + * (1) executing the @preproc() rountine provided by the caller, passing the + * resource pointer and @preproc_data to it as arguments, for each ACPI resource + * returned and (2) converting all of the returned ACPI resources into struct + * resource objects if possible. If the return value of @preproc() in step (1) + * is different from 0, step (2) is not applied to the given ACPI resource and + * if that value is negative, the whole processing is aborted and that value is + * returned as the final error code. + * + * The resultant struct resource objects are put on the list pointed to by + * @list, that must be empty initially, as members of struct resource_list_entry + * objects. Callers of this routine should use %acpi_dev_free_resource_list() to + * free that list. + * + * The number of resources in the output list is returned on success, an error + * code reflecting the error condition is returned otherwise. + */ +int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, + int (*preproc)(struct acpi_resource *, void *), + void *preproc_data) +{ + struct res_proc_context c; + acpi_handle not_used; + acpi_status status; + + if (!adev || !adev->handle || !list_empty(list)) + return -EINVAL; + + status = acpi_get_handle(adev->handle, METHOD_NAME__CRS, ¬_used); + if (ACPI_FAILURE(status)) + return 0; + + c.list = list; + c.preproc = preproc; + c.preproc_data = preproc_data; + c.count = 0; + c.error = 0; + status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS, + acpi_dev_process_resource, &c); + if (ACPI_FAILURE(status)) { + acpi_dev_free_resource_list(list); + return c.error ? c.error : -EIO; + } + + return c.count; +} +EXPORT_SYMBOL_GPL(acpi_dev_get_resources); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 16fcaf8dad3d..32fbc4e73a56 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -261,6 +261,16 @@ unsigned long acpi_dev_irq_flags(u8 triggering, u8 polarity, u8 shareable); bool acpi_dev_resource_interrupt(struct acpi_resource *ares, int index, struct resource *res); +struct resource_list_entry { + struct list_head node; + struct resource res; +}; + +void acpi_dev_free_resource_list(struct list_head *list); +int acpi_dev_get_resources(struct acpi_device *adev, struct list_head *list, + int (*preproc)(struct acpi_resource *, void *), + void *preproc_data); + int acpi_check_resource_conflict(const struct resource *res); int acpi_check_region(resource_size_t start, resource_size_t n, -- cgit v1.2.3 From 4b972f0b04eaae645b22d99479b9aea43c3d64e7 Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Tue, 23 Oct 2012 01:23:43 +0200 Subject: cpufreq / core: Fix printing of governor and driver name Arrays for governer and driver name are of size CPUFREQ_NAME_LEN or 16. i.e. 15 bytes for name and 1 for trailing '\0'. When cpufreq driver print these names (for sysfs), it includes '\n' or ' ' in the fmt string and still passes length as CPUFREQ_NAME_LEN. If the driver or governor names are using all 15 fields allocated to them, then the trailing '\n' or ' ' will never be printed. And so commands like: root@linaro-developer# cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_driver will print something like: cpufreq_foodrvroot@linaro-developer# Fix this by increasing print length by one character. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq.c | 6 +++--- include/linux/cpufreq.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 021973b7dc56..db6e337ad337 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -445,7 +445,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) return sprintf(buf, "performance\n"); else if (policy->governor) - return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", + return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", policy->governor->name); return -EINVAL; } @@ -491,7 +491,7 @@ static ssize_t store_scaling_governor(struct cpufreq_policy *policy, */ static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) { - return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); + return scnprintf(buf, CPUFREQ_NAME_PLEN, "%s\n", cpufreq_driver->name); } /** @@ -512,7 +512,7 @@ static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) goto out; - i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); + i += scnprintf(&buf[i], CPUFREQ_NAME_PLEN, "%s ", t->name); } out: i += sprintf(&buf[i], "\n"); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index b60f6ba01d0c..fc4b78510151 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -22,6 +22,8 @@ #include #define CPUFREQ_NAME_LEN 16 +/* Print length for names. Extra 1 space for accomodating '\n' in prints */ +#define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) /********************************************************************* -- cgit v1.2.3 From 2aacdfff9c6958723aa5076003247933cefc32ea Mon Sep 17 00:00:00 2001 From: viresh kumar Date: Tue, 23 Oct 2012 01:28:05 +0200 Subject: cpufreq: Move common part from governors to separate file, v2 Multiple cpufreq governers have defined similar get_cpu_idle_time_***() routines. These routines must be moved to some common place, so that all governors can use them. So moving them to cpufreq_governor.c, which seems to be a better place for keeping these routines. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/Makefile | 4 +-- drivers/cpufreq/cpufreq_conservative.c | 34 ---------------------- drivers/cpufreq/cpufreq_governor.c | 52 ++++++++++++++++++++++++++++++++++ drivers/cpufreq/cpufreq_ondemand.c | 34 ---------------------- include/linux/cpufreq.h | 5 ++++ 5 files changed, 59 insertions(+), 70 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_governor.c (limited to 'include/linux') diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 1bc90e1306d8..5b1413e47216 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -7,8 +7,8 @@ obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o -obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o -obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o +obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o cpufreq_governor.o +obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o cpufreq_governor.o # CPUfreq cross-arch helpers obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index a152af7e1991..181abad07266 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -95,40 +95,6 @@ static struct dbs_tuners { .freq_step = 5, }; -static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) -{ - u64 idle_time; - u64 cur_wall_time; - u64 busy_time; - - cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); - - busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; - - idle_time = cur_wall_time - busy_time; - if (wall) - *wall = jiffies_to_usecs(cur_wall_time); - - return jiffies_to_usecs(idle_time); -} - -static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) -{ - u64 idle_time = get_cpu_idle_time_us(cpu, NULL); - - if (idle_time == -1ULL) - return get_cpu_idle_time_jiffy(cpu, wall); - else - idle_time += get_cpu_iowait_time_us(cpu, wall); - - return idle_time; -} - /* keep track of frequency transitions */ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c new file mode 100644 index 000000000000..0001071cdcdb --- /dev/null +++ b/drivers/cpufreq/cpufreq_governor.c @@ -0,0 +1,52 @@ +/* + * drivers/cpufreq/cpufreq_governor.c + * + * CPUFREQ governors common code + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +/* + * Code picked from earlier governer implementations + */ +static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) +{ + u64 idle_time; + u64 cur_wall_time; + u64 busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + + busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + + idle_time = cur_wall_time - busy_time; + if (wall) + *wall = jiffies_to_usecs(cur_wall_time); + + return jiffies_to_usecs(idle_time); +} + +cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, NULL); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + else + idle_time += get_cpu_iowait_time_us(cpu, wall); + + return idle_time; +} +EXPORT_SYMBOL_GPL(get_cpu_idle_time); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 396322f2a83f..d7f774bb49dd 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -119,40 +119,6 @@ static struct dbs_tuners { .powersave_bias = 0, }; -static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) -{ - u64 idle_time; - u64 cur_wall_time; - u64 busy_time; - - cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); - - busy_time = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; - busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; - - idle_time = cur_wall_time - busy_time; - if (wall) - *wall = jiffies_to_usecs(cur_wall_time); - - return jiffies_to_usecs(idle_time); -} - -static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) -{ - u64 idle_time = get_cpu_idle_time_us(cpu, NULL); - - if (idle_time == -1ULL) - return get_cpu_idle_time_jiffy(cpu, wall); - else - idle_time += get_cpu_iowait_time_us(cpu, wall); - - return idle_time; -} - static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) { u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index fc4b78510151..d03c21993052 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -11,6 +11,7 @@ #ifndef _LINUX_CPUFREQ_H #define _LINUX_CPUFREQ_H +#include #include #include #include @@ -407,5 +408,9 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, void cpufreq_frequency_table_put_attr(unsigned int cpu); +/********************************************************************* + * Governor Helpers * + *********************************************************************/ +cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall); #endif /* _LINUX_CPUFREQ_H */ -- cgit v1.2.3 From 4471a34f9a1f2da220272e823bdb8e8fa83a7661 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 26 Oct 2012 00:47:42 +0200 Subject: cpufreq: governors: remove redundant code Initially ondemand governor was written and then using its code conservative governor is written. It used a lot of code from ondemand governor, but copy of code was created instead of using the same routines from both governors. Which increased code redundancy, which is difficult to manage. This patch is an attempt to move common part of both the governors to cpufreq_governor.c file to come over above mentioned issues. This shouldn't change anything from functionality point of view. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/cpufreq_conservative.c | 548 ++++++++------------------ drivers/cpufreq/cpufreq_governor.c | 276 ++++++++++++- drivers/cpufreq/cpufreq_governor.h | 177 +++++++++ drivers/cpufreq/cpufreq_ondemand.c | 698 +++++++++++---------------------- include/linux/cpufreq.h | 6 - 5 files changed, 832 insertions(+), 873 deletions(-) create mode 100644 drivers/cpufreq/cpufreq_governor.h (limited to 'include/linux') diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index 181abad07266..64ef737e7e72 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -11,83 +11,30 @@ * published by the Free Software Foundation. */ -#include -#include -#include #include -#include -#include +#include +#include #include +#include +#include #include -#include -#include -#include -#include +#include +#include +#include +#include -/* - * dbs is used in this file as a shortform for demandbased switching - * It helps to keep variable names smaller, simpler - */ +#include "cpufreq_governor.h" +/* Conservative governor macors */ #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_FREQUENCY_DOWN_THRESHOLD (20) - -/* - * The polling frequency of this governor depends on the capability of - * the processor. Default polling frequency is 1000 times the transition - * latency of the processor. The governor will work on any processor with - * transition latency <= 10mS, using appropriate sampling - * rate. - * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) - * this governor will not work. - * All times here are in uS. - */ -#define MIN_SAMPLING_RATE_RATIO (2) - -static unsigned int min_sampling_rate; - -#define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (100) #define DEF_SAMPLING_DOWN_FACTOR (1) #define MAX_SAMPLING_DOWN_FACTOR (10) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) - -static void do_dbs_timer(struct work_struct *work); - -struct cpu_dbs_info_s { - cputime64_t prev_cpu_idle; - cputime64_t prev_cpu_wall; - cputime64_t prev_cpu_nice; - struct cpufreq_policy *cur_policy; - struct delayed_work work; - unsigned int down_skip; - unsigned int requested_freq; - int cpu; - unsigned int enable:1; - /* - * percpu mutex that serializes governor limit change with - * do_dbs_timer invocation. We do not want do_dbs_timer to run - * when user is changing the governor or limits. - */ - struct mutex timer_mutex; -}; -static DEFINE_PER_CPU(struct cpu_dbs_info_s, cs_cpu_dbs_info); -static unsigned int dbs_enable; /* number of CPUs using this policy */ +static struct dbs_data cs_dbs_data; +static DEFINE_PER_CPU(struct cs_cpu_dbs_info_s, cs_cpu_dbs_info); -/* - * dbs_mutex protects dbs_enable in governor start/stop. - */ -static DEFINE_MUTEX(dbs_mutex); - -static struct dbs_tuners { - unsigned int sampling_rate; - unsigned int sampling_down_factor; - unsigned int up_threshold; - unsigned int down_threshold; - unsigned int ignore_nice; - unsigned int freq_step; -} dbs_tuners_ins = { +static struct cs_dbs_tuners cs_tuners = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, @@ -95,61 +42,121 @@ static struct dbs_tuners { .freq_step = 5, }; -/* keep track of frequency transitions */ -static int -dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, - void *data) +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency Every sampling_rate * + * sampling_down_factor, we check, if current idle time is more than 80%, then + * we try to decrease frequency + * + * Any frequency increase takes it to the maximum frequency. Frequency reduction + * happens at minimum steps of 5% (default) of maximum frequency + */ +static void cs_check_cpu(int cpu, unsigned int load) { - struct cpufreq_freqs *freq = data; - struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cs_cpu_dbs_info, - freq->cpu); + struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + unsigned int freq_target; + + /* + * break out if we 'cannot' reduce the speed as the user might + * want freq_step to be zero + */ + if (cs_tuners.freq_step == 0) + return; + + /* Check for frequency increase */ + if (load > cs_tuners.up_threshold) { + dbs_info->down_skip = 0; + + /* if we are already at full speed then break out early */ + if (dbs_info->requested_freq == policy->max) + return; + + freq_target = (cs_tuners.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_target == 0)) + freq_target = 5; + + dbs_info->requested_freq += freq_target; + if (dbs_info->requested_freq > policy->max) + dbs_info->requested_freq = policy->max; + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } + + /* + * The optimal frequency is the frequency that is the lowest that can + * support the current CPU usage without triggering the up policy. To be + * safe, we focus 10 points under the threshold. + */ + if (load < (cs_tuners.down_threshold - 10)) { + freq_target = (cs_tuners.freq_step * policy->max) / 100; + + dbs_info->requested_freq -= freq_target; + if (dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = policy->min; + + /* + * if we cannot reduce the frequency anymore, break out early + */ + if (policy->cur == policy->min) + return; + + __cpufreq_driver_target(policy, dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } +} + +static void cs_dbs_timer(struct work_struct *work) +{ + struct cs_cpu_dbs_info_s *dbs_info = container_of(work, + struct cs_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cpu; + int delay = delay_for_sampling_rate(cs_tuners.sampling_rate); + + mutex_lock(&dbs_info->cdbs.timer_mutex); + + dbs_check_cpu(&cs_dbs_data, cpu); + + schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); + mutex_unlock(&dbs_info->cdbs.timer_mutex); +} + +static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cs_cpu_dbs_info_s *dbs_info = + &per_cpu(cs_cpu_dbs_info, freq->cpu); struct cpufreq_policy *policy; - if (!this_dbs_info->enable) + if (!dbs_info->enable) return 0; - policy = this_dbs_info->cur_policy; + policy = dbs_info->cdbs.cur_policy; /* - * we only care if our internally tracked freq moves outside - * the 'valid' ranges of freqency available to us otherwise - * we do not change it + * we only care if our internally tracked freq moves outside the 'valid' + * ranges of freqency available to us otherwise we do not change it */ - if (this_dbs_info->requested_freq > policy->max - || this_dbs_info->requested_freq < policy->min) - this_dbs_info->requested_freq = freq->new; + if (dbs_info->requested_freq > policy->max + || dbs_info->requested_freq < policy->min) + dbs_info->requested_freq = freq->new; return 0; } -static struct notifier_block dbs_cpufreq_notifier_block = { - .notifier_call = dbs_cpufreq_notifier -}; - /************************** sysfs interface ************************/ static ssize_t show_sampling_rate_min(struct kobject *kobj, struct attribute *attr, char *buf) { - return sprintf(buf, "%u\n", min_sampling_rate); + return sprintf(buf, "%u\n", cs_dbs_data.min_sampling_rate); } -define_one_global_ro(sampling_rate_min); - -/* cpufreq_conservative Governor Tunables */ -#define show_one(file_name, object) \ -static ssize_t show_##file_name \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ -} -show_one(sampling_rate, sampling_rate); -show_one(sampling_down_factor, sampling_down_factor); -show_one(up_threshold, up_threshold); -show_one(down_threshold, down_threshold); -show_one(ignore_nice_load, ignore_nice); -show_one(freq_step, freq_step); - static ssize_t store_sampling_down_factor(struct kobject *a, struct attribute *b, const char *buf, size_t count) @@ -161,7 +168,7 @@ static ssize_t store_sampling_down_factor(struct kobject *a, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - dbs_tuners_ins.sampling_down_factor = input; + cs_tuners.sampling_down_factor = input; return count; } @@ -175,7 +182,7 @@ static ssize_t store_sampling_rate(struct kobject *a, struct attribute *b, if (ret != 1) return -EINVAL; - dbs_tuners_ins.sampling_rate = max(input, min_sampling_rate); + cs_tuners.sampling_rate = max(input, cs_dbs_data.min_sampling_rate); return count; } @@ -186,11 +193,10 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, int ret; ret = sscanf(buf, "%u", &input); - if (ret != 1 || input > 100 || - input <= dbs_tuners_ins.down_threshold) + if (ret != 1 || input > 100 || input <= cs_tuners.down_threshold) return -EINVAL; - dbs_tuners_ins.up_threshold = input; + cs_tuners.up_threshold = input; return count; } @@ -203,21 +209,19 @@ static ssize_t store_down_threshold(struct kobject *a, struct attribute *b, /* cannot be lower than 11 otherwise freq will not fall */ if (ret != 1 || input < 11 || input > 100 || - input >= dbs_tuners_ins.up_threshold) + input >= cs_tuners.up_threshold) return -EINVAL; - dbs_tuners_ins.down_threshold = input; + cs_tuners.down_threshold = input; return count; } static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, const char *buf, size_t count) { - unsigned int input; + unsigned int input, j; int ret; - unsigned int j; - ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; @@ -225,19 +229,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, if (input > 1) input = 1; - if (input == dbs_tuners_ins.ignore_nice) /* nothing to do */ + if (input == cs_tuners.ignore_nice) /* nothing to do */ return count; - dbs_tuners_ins.ignore_nice = input; + cs_tuners.ignore_nice = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { - struct cpu_dbs_info_s *dbs_info; + struct cs_cpu_dbs_info_s *dbs_info; dbs_info = &per_cpu(cs_cpu_dbs_info, j); - dbs_info->prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->prev_cpu_wall); - if (dbs_tuners_ins.ignore_nice) - dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall); + if (cs_tuners.ignore_nice) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } return count; } @@ -255,18 +260,28 @@ static ssize_t store_freq_step(struct kobject *a, struct attribute *b, if (input > 100) input = 100; - /* no need to test here if freq_step is zero as the user might actually - * want this, they would be crazy though :) */ - dbs_tuners_ins.freq_step = input; + /* + * no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) + */ + cs_tuners.freq_step = input; return count; } +show_one(cs, sampling_rate, sampling_rate); +show_one(cs, sampling_down_factor, sampling_down_factor); +show_one(cs, up_threshold, up_threshold); +show_one(cs, down_threshold, down_threshold); +show_one(cs, ignore_nice_load, ignore_nice); +show_one(cs, freq_step, freq_step); + define_one_global_rw(sampling_rate); define_one_global_rw(sampling_down_factor); define_one_global_rw(up_threshold); define_one_global_rw(down_threshold); define_one_global_rw(ignore_nice_load); define_one_global_rw(freq_step); +define_one_global_ro(sampling_rate_min); static struct attribute *dbs_attributes[] = { &sampling_rate_min.attr, @@ -279,283 +294,38 @@ static struct attribute *dbs_attributes[] = { NULL }; -static struct attribute_group dbs_attr_group = { +static struct attribute_group cs_attr_group = { .attrs = dbs_attributes, .name = "conservative", }; /************************** sysfs end ************************/ -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) -{ - unsigned int load = 0; - unsigned int max_load = 0; - unsigned int freq_target; - - struct cpufreq_policy *policy; - unsigned int j; - - policy = this_dbs_info->cur_policy; - - /* - * Every sampling_rate, we check, if current idle time is less - * than 20% (default), then we try to increase frequency - * Every sampling_rate*sampling_down_factor, we check, if current - * idle time is more than 80%, then we try to decrease frequency - * - * Any frequency increase takes it to the maximum frequency. - * Frequency reduction happens at minimum steps of - * 5% (default) of maximum frequency - */ - - /* Get Absolute Load */ - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; - cputime64_t cur_wall_time, cur_idle_time; - unsigned int idle_time, wall_time; - - j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); - - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); - - wall_time = (unsigned int) - (cur_wall_time - j_dbs_info->prev_cpu_wall); - j_dbs_info->prev_cpu_wall = cur_wall_time; - - idle_time = (unsigned int) - (cur_idle_time - j_dbs_info->prev_cpu_idle); - j_dbs_info->prev_cpu_idle = cur_idle_time; - - if (dbs_tuners_ins.ignore_nice) { - u64 cur_nice; - unsigned long cur_nice_jiffies; - - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - - j_dbs_info->prev_cpu_nice; - /* - * Assumption: nice time between sampling periods will - * be less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - - j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - idle_time += jiffies_to_usecs(cur_nice_jiffies); - } - - if (unlikely(!wall_time || wall_time < idle_time)) - continue; - - load = 100 * (wall_time - idle_time) / wall_time; - - if (load > max_load) - max_load = load; - } +define_get_cpu_dbs_routines(cs_cpu_dbs_info); - /* - * break out if we 'cannot' reduce the speed as the user might - * want freq_step to be zero - */ - if (dbs_tuners_ins.freq_step == 0) - return; - - /* Check for frequency increase */ - if (max_load > dbs_tuners_ins.up_threshold) { - this_dbs_info->down_skip = 0; - - /* if we are already at full speed then break out early */ - if (this_dbs_info->requested_freq == policy->max) - return; - - freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; - - /* max freq cannot be less than 100. But who knows.... */ - if (unlikely(freq_target == 0)) - freq_target = 5; - - this_dbs_info->requested_freq += freq_target; - if (this_dbs_info->requested_freq > policy->max) - this_dbs_info->requested_freq = policy->max; - - __cpufreq_driver_target(policy, this_dbs_info->requested_freq, - CPUFREQ_RELATION_H); - return; - } - - /* - * The optimal frequency is the frequency that is the lowest that - * can support the current CPU usage without triggering the up - * policy. To be safe, we focus 10 points under the threshold. - */ - if (max_load < (dbs_tuners_ins.down_threshold - 10)) { - freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; - - this_dbs_info->requested_freq -= freq_target; - if (this_dbs_info->requested_freq < policy->min) - this_dbs_info->requested_freq = policy->min; - - /* - * if we cannot reduce the frequency anymore, break out early - */ - if (policy->cur == policy->min) - return; - - __cpufreq_driver_target(policy, this_dbs_info->requested_freq, - CPUFREQ_RELATION_H); - return; - } -} - -static void do_dbs_timer(struct work_struct *work) -{ - struct cpu_dbs_info_s *dbs_info = - container_of(work, struct cpu_dbs_info_s, work.work); - unsigned int cpu = dbs_info->cpu; - - /* We want all CPUs to do sampling nearly on same jiffy */ - int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); - - delay -= jiffies % delay; - - mutex_lock(&dbs_info->timer_mutex); - - dbs_check_cpu(dbs_info); - - schedule_delayed_work_on(cpu, &dbs_info->work, delay); - mutex_unlock(&dbs_info->timer_mutex); -} - -static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) -{ - /* We want all CPUs to do sampling nearly on same jiffy */ - int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); - delay -= jiffies % delay; +static struct notifier_block cs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier, +}; - dbs_info->enable = 1; - INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); - schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); -} +static struct cs_ops cs_ops = { + .notifier_block = &cs_cpufreq_notifier_block, +}; -static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) -{ - dbs_info->enable = 0; - cancel_delayed_work_sync(&dbs_info->work); -} +static struct dbs_data cs_dbs_data = { + .governor = GOV_CONSERVATIVE, + .attr_group = &cs_attr_group, + .tuners = &cs_tuners, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = cs_dbs_timer, + .gov_check_cpu = cs_check_cpu, + .gov_ops = &cs_ops, +}; -static int cpufreq_governor_dbs(struct cpufreq_policy *policy, +static int cs_cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event) { - unsigned int cpu = policy->cpu; - struct cpu_dbs_info_s *this_dbs_info; - unsigned int j; - int rc; - - this_dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || (!policy->cur)) - return -EINVAL; - - mutex_lock(&dbs_mutex); - - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(cs_cpu_dbs_info, j); - j_dbs_info->cur_policy = policy; - - j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, - &j_dbs_info->prev_cpu_wall); - if (dbs_tuners_ins.ignore_nice) - j_dbs_info->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - } - this_dbs_info->cpu = cpu; - this_dbs_info->down_skip = 0; - this_dbs_info->requested_freq = policy->cur; - - mutex_init(&this_dbs_info->timer_mutex); - dbs_enable++; - /* - * Start the timerschedule work, when this governor - * is used for first time - */ - if (dbs_enable == 1) { - unsigned int latency; - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - - rc = sysfs_create_group(cpufreq_global_kobject, - &dbs_attr_group); - if (rc) { - mutex_unlock(&dbs_mutex); - return rc; - } - - /* - * conservative does not implement micro like ondemand - * governor, thus we are bound to jiffes/HZ - */ - min_sampling_rate = - MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); - /* Bring kernel and HW constraints together */ - min_sampling_rate = max(min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - dbs_tuners_ins.sampling_rate = - max(min_sampling_rate, - latency * LATENCY_MULTIPLIER); - - cpufreq_register_notifier( - &dbs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - } - mutex_unlock(&dbs_mutex); - - dbs_timer_init(this_dbs_info); - - break; - - case CPUFREQ_GOV_STOP: - dbs_timer_exit(this_dbs_info); - - mutex_lock(&dbs_mutex); - dbs_enable--; - mutex_destroy(&this_dbs_info->timer_mutex); - - /* - * Stop the timerschedule work, when this governor - * is used for first time - */ - if (dbs_enable == 0) - cpufreq_unregister_notifier( - &dbs_cpufreq_notifier_block, - CPUFREQ_TRANSITION_NOTIFIER); - - mutex_unlock(&dbs_mutex); - if (!dbs_enable) - sysfs_remove_group(cpufreq_global_kobject, - &dbs_attr_group); - - break; - - case CPUFREQ_GOV_LIMITS: - mutex_lock(&this_dbs_info->timer_mutex); - if (policy->max < this_dbs_info->cur_policy->cur) - __cpufreq_driver_target( - this_dbs_info->cur_policy, - policy->max, CPUFREQ_RELATION_H); - else if (policy->min > this_dbs_info->cur_policy->cur) - __cpufreq_driver_target( - this_dbs_info->cur_policy, - policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(this_dbs_info); - mutex_unlock(&this_dbs_info->timer_mutex); - - break; - } - return 0; + return cpufreq_governor_dbs(&cs_dbs_data, policy, event); } #ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE @@ -563,13 +333,14 @@ static #endif struct cpufreq_governor cpufreq_gov_conservative = { .name = "conservative", - .governor = cpufreq_governor_dbs, + .governor = cs_cpufreq_governor_dbs, .max_transition_latency = TRANSITION_LATENCY_LIMIT, .owner = THIS_MODULE, }; static int __init cpufreq_gov_dbs_init(void) { + mutex_init(&cs_dbs_data.mutex); return cpufreq_register_governor(&cpufreq_gov_conservative); } @@ -578,7 +349,6 @@ static void __exit cpufreq_gov_dbs_exit(void) cpufreq_unregister_governor(&cpufreq_gov_conservative); } - MODULE_AUTHOR("Alexander Clouter "); MODULE_DESCRIPTION("'cpufreq_conservative' - A dynamic cpufreq governor for " "Low Latency Frequency Transition capable processors " diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 679842a8d34a..5ea2c829a796 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -3,19 +3,31 @@ * * CPUFREQ governors common code * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * (C) 2003 Jun Nakajima + * (C) 2009 Alexander Clouter + * (c) 2012 Viresh Kumar + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include +#include +#include #include #include +#include #include #include -/* - * Code picked from earlier governer implementations - */ +#include + +#include "cpufreq_governor.h" + static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) { u64 idle_time; @@ -33,9 +45,9 @@ static inline u64 get_cpu_idle_time_jiffy(unsigned int cpu, u64 *wall) idle_time = cur_wall_time - busy_time; if (wall) - *wall = cputime_to_usecs(cur_wall_time); + *wall = jiffies_to_usecs(cur_wall_time); - return cputime_to_usecs(idle_time); + return jiffies_to_usecs(idle_time); } cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) @@ -50,3 +62,257 @@ cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) return idle_time; } EXPORT_SYMBOL_GPL(get_cpu_idle_time); + +void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) +{ + struct cpu_dbs_common_info *cdbs = dbs_data->get_cpu_cdbs(cpu); + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + struct cpufreq_policy *policy; + unsigned int max_load = 0; + unsigned int ignore_nice; + unsigned int j; + + if (dbs_data->governor == GOV_ONDEMAND) + ignore_nice = od_tuners->ignore_nice; + else + ignore_nice = cs_tuners->ignore_nice; + + policy = cdbs->cur_policy; + + /* Get Absolute Load (in terms of freq for ondemand gov) */ + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_common_info *j_cdbs; + cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; + unsigned int idle_time, wall_time, iowait_time; + unsigned int load; + + j_cdbs = dbs_data->get_cpu_cdbs(j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + + wall_time = (unsigned int) + (cur_wall_time - j_cdbs->prev_cpu_wall); + j_cdbs->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) + (cur_idle_time - j_cdbs->prev_cpu_idle); + j_cdbs->prev_cpu_idle = cur_idle_time; + + if (ignore_nice) { + u64 cur_nice; + unsigned long cur_nice_jiffies; + + cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - + cdbs->prev_cpu_nice; + /* + * Assumption: nice time between sampling periods will + * be less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + + cdbs->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + idle_time += jiffies_to_usecs(cur_nice_jiffies); + } + + if (dbs_data->governor == GOV_ONDEMAND) { + struct od_cpu_dbs_info_s *od_j_dbs_info = + dbs_data->get_cpu_dbs_info_s(cpu); + + cur_iowait_time = get_cpu_iowait_time_us(j, + &cur_wall_time); + if (cur_iowait_time == -1ULL) + cur_iowait_time = 0; + + iowait_time = (unsigned int) (cur_iowait_time - + od_j_dbs_info->prev_cpu_iowait); + od_j_dbs_info->prev_cpu_iowait = cur_iowait_time; + + /* + * For the purpose of ondemand, waiting for disk IO is + * an indication that you're performance critical, and + * not that the system is actually idle. So subtract the + * iowait time from the cpu idle time. + */ + if (od_tuners->io_is_busy && idle_time >= iowait_time) + idle_time -= iowait_time; + } + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + if (dbs_data->governor == GOV_ONDEMAND) { + int freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load *= freq_avg; + } + + if (load > max_load) + max_load = load; + } + + dbs_data->gov_check_cpu(cpu, max_load); +} +EXPORT_SYMBOL_GPL(dbs_check_cpu); + +static inline void dbs_timer_init(struct dbs_data *dbs_data, + struct cpu_dbs_common_info *cdbs, unsigned int sampling_rate) +{ + int delay = delay_for_sampling_rate(sampling_rate); + + INIT_DEFERRABLE_WORK(&cdbs->work, dbs_data->gov_dbs_timer); + schedule_delayed_work_on(cdbs->cpu, &cdbs->work, delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_common_info *cdbs) +{ + cancel_delayed_work_sync(&cdbs->work); +} + +int cpufreq_governor_dbs(struct dbs_data *dbs_data, + struct cpufreq_policy *policy, unsigned int event) +{ + struct od_cpu_dbs_info_s *od_dbs_info = NULL; + struct cs_cpu_dbs_info_s *cs_dbs_info = NULL; + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + struct cpu_dbs_common_info *cpu_cdbs; + unsigned int *sampling_rate, latency, ignore_nice, j, cpu = policy->cpu; + int rc; + + cpu_cdbs = dbs_data->get_cpu_cdbs(cpu); + + if (dbs_data->governor == GOV_CONSERVATIVE) { + cs_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); + sampling_rate = &cs_tuners->sampling_rate; + ignore_nice = cs_tuners->ignore_nice; + } else { + od_dbs_info = dbs_data->get_cpu_dbs_info_s(cpu); + sampling_rate = &od_tuners->sampling_rate; + ignore_nice = od_tuners->ignore_nice; + } + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + mutex_lock(&dbs_data->mutex); + + dbs_data->enable++; + cpu_cdbs->cpu = cpu; + for_each_cpu(j, policy->cpus) { + struct cpu_dbs_common_info *j_cdbs; + j_cdbs = dbs_data->get_cpu_cdbs(j); + + j_cdbs->cur_policy = policy; + j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, + &j_cdbs->prev_cpu_wall); + if (ignore_nice) + j_cdbs->prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + } + + /* + * Start the timerschedule work, when this governor is used for + * first time + */ + if (dbs_data->enable != 1) + goto second_time; + + rc = sysfs_create_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (rc) { + mutex_unlock(&dbs_data->mutex); + return rc; + } + + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + /* + * conservative does not implement micro like ondemand + * governor, thus we are bound to jiffes/HZ + */ + if (dbs_data->governor == GOV_CONSERVATIVE) { + struct cs_ops *ops = dbs_data->gov_ops; + + cpufreq_register_notifier(ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + + dbs_data->min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); + } else { + struct od_ops *ops = dbs_data->gov_ops; + + od_tuners->io_is_busy = ops->io_busy(); + } + + /* Bring kernel and HW constraints together */ + dbs_data->min_sampling_rate = max(dbs_data->min_sampling_rate, + MIN_LATENCY_MULTIPLIER * latency); + *sampling_rate = max(dbs_data->min_sampling_rate, latency * + LATENCY_MULTIPLIER); + +second_time: + if (dbs_data->governor == GOV_CONSERVATIVE) { + cs_dbs_info->down_skip = 0; + cs_dbs_info->enable = 1; + cs_dbs_info->requested_freq = policy->cur; + } else { + struct od_ops *ops = dbs_data->gov_ops; + od_dbs_info->rate_mult = 1; + od_dbs_info->sample_type = OD_NORMAL_SAMPLE; + ops->powersave_bias_init_cpu(cpu); + } + mutex_unlock(&dbs_data->mutex); + + mutex_init(&cpu_cdbs->timer_mutex); + dbs_timer_init(dbs_data, cpu_cdbs, *sampling_rate); + break; + + case CPUFREQ_GOV_STOP: + if (dbs_data->governor == GOV_CONSERVATIVE) + cs_dbs_info->enable = 0; + + dbs_timer_exit(cpu_cdbs); + + mutex_lock(&dbs_data->mutex); + mutex_destroy(&cpu_cdbs->timer_mutex); + dbs_data->enable--; + if (!dbs_data->enable) { + struct cs_ops *ops = dbs_data->gov_ops; + + sysfs_remove_group(cpufreq_global_kobject, + dbs_data->attr_group); + if (dbs_data->governor == GOV_CONSERVATIVE) + cpufreq_unregister_notifier(ops->notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + mutex_unlock(&dbs_data->mutex); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&cpu_cdbs->timer_mutex); + if (policy->max < cpu_cdbs->cur_policy->cur) + __cpufreq_driver_target(cpu_cdbs->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > cpu_cdbs->cur_policy->cur) + __cpufreq_driver_target(cpu_cdbs->cur_policy, + policy->min, CPUFREQ_RELATION_L); + dbs_check_cpu(dbs_data, cpu); + mutex_unlock(&cpu_cdbs->timer_mutex); + break; + } + return 0; +} +EXPORT_SYMBOL_GPL(cpufreq_governor_dbs); diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h new file mode 100644 index 000000000000..34e14adfc3f9 --- /dev/null +++ b/drivers/cpufreq/cpufreq_governor.h @@ -0,0 +1,177 @@ +/* + * drivers/cpufreq/cpufreq_governor.h + * + * Header file for CPUFreq governors common code + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi . + * (C) 2003 Jun Nakajima + * (C) 2009 Alexander Clouter + * (c) 2012 Viresh Kumar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _CPUFREQ_GOVERNER_H +#define _CPUFREQ_GOVERNER_H + +#include +#include +#include +#include +#include +#include + +/* + * The polling frequency depends on the capability of the processor. Default + * polling frequency is 1000 times the transition latency of the processor. The + * governor will work on any processor with transition latency <= 10mS, using + * appropriate sampling rate. + * + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. All times here are in uS. + */ +#define MIN_SAMPLING_RATE_RATIO (2) +#define LATENCY_MULTIPLIER (1000) +#define MIN_LATENCY_MULTIPLIER (100) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +/* Ondemand Sampling types */ +enum {OD_NORMAL_SAMPLE, OD_SUB_SAMPLE}; + +/* Macro creating sysfs show routines */ +#define show_one(_gov, file_name, object) \ +static ssize_t show_##file_name \ +(struct kobject *kobj, struct attribute *attr, char *buf) \ +{ \ + return sprintf(buf, "%u\n", _gov##_tuners.object); \ +} + +#define define_get_cpu_dbs_routines(_dbs_info) \ +static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu).cdbs; \ +} \ + \ +static void *get_cpu_dbs_info_s(int cpu) \ +{ \ + return &per_cpu(_dbs_info, cpu); \ +} + +/* + * Abbreviations: + * dbs: used as a shortform for demand based switching It helps to keep variable + * names smaller, simpler + * cdbs: common dbs + * on_*: On-demand governor + * cs_*: Conservative governor + */ + +/* Per cpu structures */ +struct cpu_dbs_common_info { + int cpu; + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + /* + * percpu mutex that serializes governor limit change with gov_dbs_timer + * invocation. We do not want gov_dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; +}; + +struct od_cpu_dbs_info_s { + struct cpu_dbs_common_info cdbs; + cputime64_t prev_cpu_iowait; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + unsigned int rate_mult; + unsigned int sample_type:1; +}; + +struct cs_cpu_dbs_info_s { + struct cpu_dbs_common_info cdbs; + unsigned int down_skip; + unsigned int requested_freq; + unsigned int enable:1; +}; + +/* Governers sysfs tunables */ +struct od_dbs_tuners { + unsigned int ignore_nice; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int powersave_bias; + unsigned int io_is_busy; +}; + +struct cs_dbs_tuners { + unsigned int ignore_nice; + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int freq_step; +}; + +/* Per Governer data */ +struct dbs_data { + /* Common across governors */ + #define GOV_ONDEMAND 0 + #define GOV_CONSERVATIVE 1 + int governor; + unsigned int min_sampling_rate; + unsigned int enable; /* number of CPUs using this policy */ + struct attribute_group *attr_group; + void *tuners; + + /* dbs_mutex protects dbs_enable in governor start/stop */ + struct mutex mutex; + + struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); + void *(*get_cpu_dbs_info_s)(int cpu); + void (*gov_dbs_timer)(struct work_struct *work); + void (*gov_check_cpu)(int cpu, unsigned int load); + + /* Governor specific ops, see below */ + void *gov_ops; +}; + +/* Governor specific ops, will be passed to dbs_data->gov_ops */ +struct od_ops { + int (*io_busy)(void); + void (*powersave_bias_init_cpu)(int cpu); + unsigned int (*powersave_bias_target)(struct cpufreq_policy *policy, + unsigned int freq_next, unsigned int relation); + void (*freq_increase)(struct cpufreq_policy *p, unsigned int freq); +}; + +struct cs_ops { + struct notifier_block *notifier_block; +}; + +static inline int delay_for_sampling_rate(unsigned int sampling_rate) +{ + int delay = usecs_to_jiffies(sampling_rate); + + /* We want all CPUs to do sampling nearly on same jiffy */ + if (num_online_cpus() > 1) + delay -= jiffies % delay; + + return delay; +} + +cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall); +void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); +int cpufreq_governor_dbs(struct dbs_data *dbs_data, + struct cpufreq_policy *policy, unsigned int event); +#endif /* _CPUFREQ_GOVERNER_H */ diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index d7f774bb49dd..bdaab9206303 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -10,24 +10,23 @@ * published by the Free Software Foundation. */ -#include -#include -#include +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include -#include -#include +#include +#include #include +#include +#include #include -#include +#include +#include #include -#include -#include +#include -/* - * dbs is used in this file as a shortform for demandbased switching - * It helps to keep variable names smaller, simpler - */ +#include "cpufreq_governor.h" +/* On-demand governor macors */ #define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) #define DEF_FREQUENCY_UP_THRESHOLD (80) #define DEF_SAMPLING_DOWN_FACTOR (1) @@ -38,80 +37,10 @@ #define MIN_FREQUENCY_UP_THRESHOLD (11) #define MAX_FREQUENCY_UP_THRESHOLD (100) -/* - * The polling frequency of this governor depends on the capability of - * the processor. Default polling frequency is 1000 times the transition - * latency of the processor. The governor will work on any processor with - * transition latency <= 10mS, using appropriate sampling - * rate. - * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) - * this governor will not work. - * All times here are in uS. - */ -#define MIN_SAMPLING_RATE_RATIO (2) - -static unsigned int min_sampling_rate; - -#define LATENCY_MULTIPLIER (1000) -#define MIN_LATENCY_MULTIPLIER (100) -#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) - -static void do_dbs_timer(struct work_struct *work); -static int cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event); - -#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND -static -#endif -struct cpufreq_governor cpufreq_gov_ondemand = { - .name = "ondemand", - .governor = cpufreq_governor_dbs, - .max_transition_latency = TRANSITION_LATENCY_LIMIT, - .owner = THIS_MODULE, -}; - -/* Sampling types */ -enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; - -struct cpu_dbs_info_s { - cputime64_t prev_cpu_idle; - cputime64_t prev_cpu_iowait; - cputime64_t prev_cpu_wall; - cputime64_t prev_cpu_nice; - struct cpufreq_policy *cur_policy; - struct delayed_work work; - struct cpufreq_frequency_table *freq_table; - unsigned int freq_lo; - unsigned int freq_lo_jiffies; - unsigned int freq_hi_jiffies; - unsigned int rate_mult; - int cpu; - unsigned int sample_type:1; - /* - * percpu mutex that serializes governor limit change with - * do_dbs_timer invocation. We do not want do_dbs_timer to run - * when user is changing the governor or limits. - */ - struct mutex timer_mutex; -}; -static DEFINE_PER_CPU(struct cpu_dbs_info_s, od_cpu_dbs_info); - -static unsigned int dbs_enable; /* number of CPUs using this policy */ +static struct dbs_data od_dbs_data; +static DEFINE_PER_CPU(struct od_cpu_dbs_info_s, od_cpu_dbs_info); -/* - * dbs_mutex protects dbs_enable in governor start/stop. - */ -static DEFINE_MUTEX(dbs_mutex); - -static struct dbs_tuners { - unsigned int sampling_rate; - unsigned int up_threshold; - unsigned int down_differential; - unsigned int ignore_nice; - unsigned int sampling_down_factor; - unsigned int powersave_bias; - unsigned int io_is_busy; -} dbs_tuners_ins = { +static struct od_dbs_tuners od_tuners = { .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, @@ -119,14 +48,35 @@ static struct dbs_tuners { .powersave_bias = 0, }; -static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wall) +static void ondemand_powersave_bias_init_cpu(int cpu) { - u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - if (iowait_time == -1ULL) - return 0; + dbs_info->freq_table = cpufreq_frequency_get_table(cpu); + dbs_info->freq_lo = 0; +} - return iowait_time; +/* + * Not all CPUs want IO time to be accounted as busy; this depends on how + * efficient idling at a higher frequency/voltage is. + * Pavel Machek says this is not so for various generations of AMD and old + * Intel systems. + * Mike Chan (androidlcom) calis this is also not true for ARM. + * Because of this, whitelist specific known (series) of CPUs by default, and + * leave all others up to the user. + */ +static int should_io_be_busy(void) +{ +#if defined(CONFIG_X86) + /* + * For Intel, Core 2 (model 15) andl later have an efficient idle. + */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6 && + boot_cpu_data.x86_model >= 15) + return 1; +#endif + return 0; } /* @@ -135,14 +85,13 @@ static inline cputime64_t get_cpu_iowait_time(unsigned int cpu, cputime64_t *wal * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. */ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, - unsigned int freq_next, - unsigned int relation) + unsigned int freq_next, unsigned int relation) { unsigned int freq_req, freq_reduc, freq_avg; unsigned int freq_hi, freq_lo; unsigned int index = 0; unsigned int jiffies_total, jiffies_hi, jiffies_lo; - struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); if (!dbs_info->freq_table) { @@ -154,7 +103,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, relation, &index); freq_req = dbs_info->freq_table[index].frequency; - freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_reduc = freq_req * od_tuners.powersave_bias / 1000; freq_avg = freq_req - freq_reduc; /* Find freq bounds for freq_avg in freq_table */ @@ -173,7 +122,7 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, dbs_info->freq_lo_jiffies = 0; return freq_lo; } - jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_total = usecs_to_jiffies(od_tuners.sampling_rate); jiffies_hi = (freq_avg - freq_lo) * jiffies_total; jiffies_hi += ((freq_hi - freq_lo) / 2); jiffies_hi /= (freq_hi - freq_lo); @@ -184,13 +133,6 @@ static unsigned int powersave_bias_target(struct cpufreq_policy *policy, return freq_hi; } -static void ondemand_powersave_bias_init_cpu(int cpu) -{ - struct cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - dbs_info->freq_table = cpufreq_frequency_get_table(cpu); - dbs_info->freq_lo = 0; -} - static void ondemand_powersave_bias_init(void) { int i; @@ -199,53 +141,138 @@ static void ondemand_powersave_bias_init(void) } } -/************************** sysfs interface ************************/ +static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) +{ + if (od_tuners.powersave_bias) + freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); + else if (p->cur == p->max) + return; -static ssize_t show_sampling_rate_min(struct kobject *kobj, - struct attribute *attr, char *buf) + __cpufreq_driver_target(p, freq, od_tuners.powersave_bias ? + CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); +} + +/* + * Every sampling_rate, we check, if current idle time is less than 20% + * (default), then we try to increase frequency Every sampling_rate, we look for + * a the lowest frequency which can sustain the load while keeping idle time + * over 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. Frequency reduction + * happens at minimum steps of 5% (default) of current frequency + */ +static void od_check_cpu(int cpu, unsigned int load_freq) { - return sprintf(buf, "%u\n", min_sampling_rate); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); + struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + + dbs_info->freq_lo = 0; + + /* Check for frequency increase */ + if (load_freq > od_tuners.up_threshold * policy->cur) { + /* If switching to max speed, apply sampling_down_factor */ + if (policy->cur < policy->max) + dbs_info->rate_mult = + od_tuners.sampling_down_factor; + dbs_freq_increase(policy, policy->max); + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that can + * support the current CPU usage without triggering the up policy. To be + * safe, we focus 10 points under the threshold. + */ + if (load_freq < (od_tuners.up_threshold - od_tuners.down_differential) * + policy->cur) { + unsigned int freq_next; + freq_next = load_freq / (od_tuners.up_threshold - + od_tuners.down_differential); + + /* No longer fully busy, reset rate_mult */ + dbs_info->rate_mult = 1; + + if (freq_next < policy->min) + freq_next = policy->min; + + if (!od_tuners.powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + } } -define_one_global_ro(sampling_rate_min); +static void od_dbs_timer(struct work_struct *work) +{ + struct od_cpu_dbs_info_s *dbs_info = + container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); + unsigned int cpu = dbs_info->cdbs.cpu; + int delay, sample_type = dbs_info->sample_type; -/* cpufreq_ondemand Governor Tunables */ -#define show_one(file_name, object) \ -static ssize_t show_##file_name \ -(struct kobject *kobj, struct attribute *attr, char *buf) \ -{ \ - return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ + mutex_lock(&dbs_info->cdbs.timer_mutex); + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = OD_NORMAL_SAMPLE; + if (sample_type == OD_SUB_SAMPLE) { + delay = dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(dbs_info->cdbs.cur_policy, + dbs_info->freq_lo, CPUFREQ_RELATION_H); + } else { + dbs_check_cpu(&od_dbs_data, cpu); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = OD_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } else { + delay = delay_for_sampling_rate(dbs_info->rate_mult); + } + } + + schedule_delayed_work_on(cpu, &dbs_info->cdbs.work, delay); + mutex_unlock(&dbs_info->cdbs.timer_mutex); +} + +/************************** sysfs interface ************************/ + +static ssize_t show_sampling_rate_min(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + return sprintf(buf, "%u\n", od_dbs_data.min_sampling_rate); } -show_one(sampling_rate, sampling_rate); -show_one(io_is_busy, io_is_busy); -show_one(up_threshold, up_threshold); -show_one(sampling_down_factor, sampling_down_factor); -show_one(ignore_nice_load, ignore_nice); -show_one(powersave_bias, powersave_bias); /** * update_sampling_rate - update sampling rate effective immediately if needed. * @new_rate: new sampling rate * * If new rate is smaller than the old, simply updaing - * dbs_tuners_int.sampling_rate might not be appropriate. For example, - * if the original sampling_rate was 1 second and the requested new sampling - * rate is 10 ms because the user needs immediate reaction from ondemand - * governor, but not sure if higher frequency will be required or not, - * then, the governor may change the sampling rate too late; up to 1 second - * later. Thus, if we are reducing the sampling rate, we need to make the - * new value effective immediately. + * dbs_tuners_int.sampling_rate might not be appropriate. For example, if the + * original sampling_rate was 1 second and the requested new sampling rate is 10 + * ms because the user needs immediate reaction from ondemand governor, but not + * sure if higher frequency will be required or not, then, the governor may + * change the sampling rate too late; up to 1 second later. Thus, if we are + * reducing the sampling rate, we need to make the new value effective + * immediately. */ static void update_sampling_rate(unsigned int new_rate) { int cpu; - dbs_tuners_ins.sampling_rate = new_rate - = max(new_rate, min_sampling_rate); + od_tuners.sampling_rate = new_rate = max(new_rate, + od_dbs_data.min_sampling_rate); for_each_online_cpu(cpu) { struct cpufreq_policy *policy; - struct cpu_dbs_info_s *dbs_info; + struct od_cpu_dbs_info_s *dbs_info; unsigned long next_sampling, appointed_at; policy = cpufreq_cpu_get(cpu); @@ -254,28 +281,28 @@ static void update_sampling_rate(unsigned int new_rate) dbs_info = &per_cpu(od_cpu_dbs_info, policy->cpu); cpufreq_cpu_put(policy); - mutex_lock(&dbs_info->timer_mutex); + mutex_lock(&dbs_info->cdbs.timer_mutex); - if (!delayed_work_pending(&dbs_info->work)) { - mutex_unlock(&dbs_info->timer_mutex); + if (!delayed_work_pending(&dbs_info->cdbs.work)) { + mutex_unlock(&dbs_info->cdbs.timer_mutex); continue; } - next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->work.timer.expires; - + next_sampling = jiffies + usecs_to_jiffies(new_rate); + appointed_at = dbs_info->cdbs.work.timer.expires; if (time_before(next_sampling, appointed_at)) { - mutex_unlock(&dbs_info->timer_mutex); - cancel_delayed_work_sync(&dbs_info->work); - mutex_lock(&dbs_info->timer_mutex); + mutex_unlock(&dbs_info->cdbs.timer_mutex); + cancel_delayed_work_sync(&dbs_info->cdbs.work); + mutex_lock(&dbs_info->cdbs.timer_mutex); - schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, - usecs_to_jiffies(new_rate)); + schedule_delayed_work_on(dbs_info->cdbs.cpu, + &dbs_info->cdbs.work, + usecs_to_jiffies(new_rate)); } - mutex_unlock(&dbs_info->timer_mutex); + mutex_unlock(&dbs_info->cdbs.timer_mutex); } } @@ -300,7 +327,7 @@ static ssize_t store_io_is_busy(struct kobject *a, struct attribute *b, ret = sscanf(buf, "%u", &input); if (ret != 1) return -EINVAL; - dbs_tuners_ins.io_is_busy = !!input; + od_tuners.io_is_busy = !!input; return count; } @@ -315,7 +342,7 @@ static ssize_t store_up_threshold(struct kobject *a, struct attribute *b, input < MIN_FREQUENCY_UP_THRESHOLD) { return -EINVAL; } - dbs_tuners_ins.up_threshold = input; + od_tuners.up_threshold = input; return count; } @@ -328,12 +355,12 @@ static ssize_t store_sampling_down_factor(struct kobject *a, if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) return -EINVAL; - dbs_tuners_ins.sampling_down_factor = input; + od_tuners.sampling_down_factor = input; /* Reset down sampling multiplier in case it was active */ for_each_online_cpu(j) { - struct cpu_dbs_info_s *dbs_info; - dbs_info = &per_cpu(od_cpu_dbs_info, j); + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, + j); dbs_info->rate_mult = 1; } return count; @@ -354,19 +381,20 @@ static ssize_t store_ignore_nice_load(struct kobject *a, struct attribute *b, if (input > 1) input = 1; - if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + if (input == od_tuners.ignore_nice) { /* nothing to do */ return count; } - dbs_tuners_ins.ignore_nice = input; + od_tuners.ignore_nice = input; /* we need to re-evaluate prev_cpu_idle */ for_each_online_cpu(j) { - struct cpu_dbs_info_s *dbs_info; + struct od_cpu_dbs_info_s *dbs_info; dbs_info = &per_cpu(od_cpu_dbs_info, j); - dbs_info->prev_cpu_idle = get_cpu_idle_time(j, - &dbs_info->prev_cpu_wall); - if (dbs_tuners_ins.ignore_nice) - dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; + dbs_info->cdbs.prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->cdbs.prev_cpu_wall); + if (od_tuners.ignore_nice) + dbs_info->cdbs.prev_cpu_nice = + kcpustat_cpu(j).cpustat[CPUTIME_NICE]; } return count; @@ -385,17 +413,25 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b, if (input > 1000) input = 1000; - dbs_tuners_ins.powersave_bias = input; + od_tuners.powersave_bias = input; ondemand_powersave_bias_init(); return count; } +show_one(od, sampling_rate, sampling_rate); +show_one(od, io_is_busy, io_is_busy); +show_one(od, up_threshold, up_threshold); +show_one(od, sampling_down_factor, sampling_down_factor); +show_one(od, ignore_nice_load, ignore_nice); +show_one(od, powersave_bias, powersave_bias); + define_one_global_rw(sampling_rate); define_one_global_rw(io_is_busy); define_one_global_rw(up_threshold); define_one_global_rw(sampling_down_factor); define_one_global_rw(ignore_nice_load); define_one_global_rw(powersave_bias); +define_one_global_ro(sampling_rate_min); static struct attribute *dbs_attributes[] = { &sampling_rate_min.attr, @@ -408,354 +444,71 @@ static struct attribute *dbs_attributes[] = { NULL }; -static struct attribute_group dbs_attr_group = { +static struct attribute_group od_attr_group = { .attrs = dbs_attributes, .name = "ondemand", }; /************************** sysfs end ************************/ -static void dbs_freq_increase(struct cpufreq_policy *p, unsigned int freq) -{ - if (dbs_tuners_ins.powersave_bias) - freq = powersave_bias_target(p, freq, CPUFREQ_RELATION_H); - else if (p->cur == p->max) - return; - - __cpufreq_driver_target(p, freq, dbs_tuners_ins.powersave_bias ? - CPUFREQ_RELATION_L : CPUFREQ_RELATION_H); -} - -static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) -{ - unsigned int max_load_freq; - - struct cpufreq_policy *policy; - unsigned int j; - - this_dbs_info->freq_lo = 0; - policy = this_dbs_info->cur_policy; - - /* - * Every sampling_rate, we check, if current idle time is less - * than 20% (default), then we try to increase frequency - * Every sampling_rate, we look for a the lowest - * frequency which can sustain the load while keeping idle time over - * 30%. If such a frequency exist, we try to decrease to this frequency. - * - * Any frequency increase takes it to the maximum frequency. - * Frequency reduction happens at minimum steps of - * 5% (default) of current frequency - */ - - /* Get Absolute Load - in terms of freq */ - max_load_freq = 0; - - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; - cputime64_t cur_wall_time, cur_idle_time, cur_iowait_time; - unsigned int idle_time, wall_time, iowait_time; - unsigned int load, load_freq; - int freq_avg; - - j_dbs_info = &per_cpu(od_cpu_dbs_info, j); - - cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); - cur_iowait_time = get_cpu_iowait_time(j, &cur_wall_time); - - wall_time = (unsigned int) - (cur_wall_time - j_dbs_info->prev_cpu_wall); - j_dbs_info->prev_cpu_wall = cur_wall_time; - - idle_time = (unsigned int) - (cur_idle_time - j_dbs_info->prev_cpu_idle); - j_dbs_info->prev_cpu_idle = cur_idle_time; - - iowait_time = (unsigned int) - (cur_iowait_time - j_dbs_info->prev_cpu_iowait); - j_dbs_info->prev_cpu_iowait = cur_iowait_time; - - if (dbs_tuners_ins.ignore_nice) { - u64 cur_nice; - unsigned long cur_nice_jiffies; - - cur_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE] - - j_dbs_info->prev_cpu_nice; - /* - * Assumption: nice time between sampling periods will - * be less than 2^32 jiffies for 32 bit sys - */ - cur_nice_jiffies = (unsigned long) - cputime64_to_jiffies64(cur_nice); - - j_dbs_info->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - idle_time += jiffies_to_usecs(cur_nice_jiffies); - } - - /* - * For the purpose of ondemand, waiting for disk IO is an - * indication that you're performance critical, and not that - * the system is actually idle. So subtract the iowait time - * from the cpu idle time. - */ - - if (dbs_tuners_ins.io_is_busy && idle_time >= iowait_time) - idle_time -= iowait_time; +define_get_cpu_dbs_routines(od_cpu_dbs_info); - if (unlikely(!wall_time || wall_time < idle_time)) - continue; - - load = 100 * (wall_time - idle_time) / wall_time; - - freq_avg = __cpufreq_driver_getavg(policy, j); - if (freq_avg <= 0) - freq_avg = policy->cur; - - load_freq = load * freq_avg; - if (load_freq > max_load_freq) - max_load_freq = load_freq; - } - - /* Check for frequency increase */ - if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { - /* If switching to max speed, apply sampling_down_factor */ - if (policy->cur < policy->max) - this_dbs_info->rate_mult = - dbs_tuners_ins.sampling_down_factor; - dbs_freq_increase(policy, policy->max); - return; - } - - /* Check for frequency decrease */ - /* if we cannot reduce the frequency anymore, break out early */ - if (policy->cur == policy->min) - return; - - /* - * The optimal frequency is the frequency that is the lowest that - * can support the current CPU usage without triggering the up - * policy. To be safe, we focus 10 points under the threshold. - */ - if (max_load_freq < - (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * - policy->cur) { - unsigned int freq_next; - freq_next = max_load_freq / - (dbs_tuners_ins.up_threshold - - dbs_tuners_ins.down_differential); - - /* No longer fully busy, reset rate_mult */ - this_dbs_info->rate_mult = 1; - - if (freq_next < policy->min) - freq_next = policy->min; - - if (!dbs_tuners_ins.powersave_bias) { - __cpufreq_driver_target(policy, freq_next, - CPUFREQ_RELATION_L); - } else { - int freq = powersave_bias_target(policy, freq_next, - CPUFREQ_RELATION_L); - __cpufreq_driver_target(policy, freq, - CPUFREQ_RELATION_L); - } - } -} - -static void do_dbs_timer(struct work_struct *work) -{ - struct cpu_dbs_info_s *dbs_info = - container_of(work, struct cpu_dbs_info_s, work.work); - unsigned int cpu = dbs_info->cpu; - int sample_type = dbs_info->sample_type; - - int delay; - - mutex_lock(&dbs_info->timer_mutex); - - /* Common NORMAL_SAMPLE setup */ - dbs_info->sample_type = DBS_NORMAL_SAMPLE; - if (!dbs_tuners_ins.powersave_bias || - sample_type == DBS_NORMAL_SAMPLE) { - dbs_check_cpu(dbs_info); - if (dbs_info->freq_lo) { - /* Setup timer for SUB_SAMPLE */ - dbs_info->sample_type = DBS_SUB_SAMPLE; - delay = dbs_info->freq_hi_jiffies; - } else { - /* We want all CPUs to do sampling nearly on - * same jiffy - */ - delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate - * dbs_info->rate_mult); - - if (num_online_cpus() > 1) - delay -= jiffies % delay; - } - } else { - __cpufreq_driver_target(dbs_info->cur_policy, - dbs_info->freq_lo, CPUFREQ_RELATION_H); - delay = dbs_info->freq_lo_jiffies; - } - schedule_delayed_work_on(cpu, &dbs_info->work, delay); - mutex_unlock(&dbs_info->timer_mutex); -} - -static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) -{ - /* We want all CPUs to do sampling nearly on same jiffy */ - int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); - - if (num_online_cpus() > 1) - delay -= jiffies % delay; +static struct od_ops od_ops = { + .io_busy = should_io_be_busy, + .powersave_bias_init_cpu = ondemand_powersave_bias_init_cpu, + .powersave_bias_target = powersave_bias_target, + .freq_increase = dbs_freq_increase, +}; - dbs_info->sample_type = DBS_NORMAL_SAMPLE; - INIT_DEFERRABLE_WORK(&dbs_info->work, do_dbs_timer); - schedule_delayed_work_on(dbs_info->cpu, &dbs_info->work, delay); -} +static struct dbs_data od_dbs_data = { + .governor = GOV_ONDEMAND, + .attr_group = &od_attr_group, + .tuners = &od_tuners, + .get_cpu_cdbs = get_cpu_cdbs, + .get_cpu_dbs_info_s = get_cpu_dbs_info_s, + .gov_dbs_timer = od_dbs_timer, + .gov_check_cpu = od_check_cpu, + .gov_ops = &od_ops, +}; -static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +static int od_cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) { - cancel_delayed_work_sync(&dbs_info->work); + return cpufreq_governor_dbs(&od_dbs_data, policy, event); } -/* - * Not all CPUs want IO time to be accounted as busy; this dependson how - * efficient idling at a higher frequency/voltage is. - * Pavel Machek says this is not so for various generations of AMD and old - * Intel systems. - * Mike Chan (androidlcom) calis this is also not true for ARM. - * Because of this, whitelist specific known (series) of CPUs by default, and - * leave all others up to the user. - */ -static int should_io_be_busy(void) -{ -#if defined(CONFIG_X86) - /* - * For Intel, Core 2 (model 15) andl later have an efficient idle. - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && - boot_cpu_data.x86 == 6 && - boot_cpu_data.x86_model >= 15) - return 1; +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static #endif - return 0; -} - -static int cpufreq_governor_dbs(struct cpufreq_policy *policy, - unsigned int event) -{ - unsigned int cpu = policy->cpu; - struct cpu_dbs_info_s *this_dbs_info; - unsigned int j; - int rc; - - this_dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - - switch (event) { - case CPUFREQ_GOV_START: - if ((!cpu_online(cpu)) || (!policy->cur)) - return -EINVAL; - - mutex_lock(&dbs_mutex); - - dbs_enable++; - for_each_cpu(j, policy->cpus) { - struct cpu_dbs_info_s *j_dbs_info; - j_dbs_info = &per_cpu(od_cpu_dbs_info, j); - j_dbs_info->cur_policy = policy; - - j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, - &j_dbs_info->prev_cpu_wall); - if (dbs_tuners_ins.ignore_nice) - j_dbs_info->prev_cpu_nice = - kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - } - this_dbs_info->cpu = cpu; - this_dbs_info->rate_mult = 1; - ondemand_powersave_bias_init_cpu(cpu); - /* - * Start the timerschedule work, when this governor - * is used for first time - */ - if (dbs_enable == 1) { - unsigned int latency; - - rc = sysfs_create_group(cpufreq_global_kobject, - &dbs_attr_group); - if (rc) { - mutex_unlock(&dbs_mutex); - return rc; - } - - /* policy latency is in nS. Convert it to uS first */ - latency = policy->cpuinfo.transition_latency / 1000; - if (latency == 0) - latency = 1; - /* Bring kernel and HW constraints together */ - min_sampling_rate = max(min_sampling_rate, - MIN_LATENCY_MULTIPLIER * latency); - dbs_tuners_ins.sampling_rate = - max(min_sampling_rate, - latency * LATENCY_MULTIPLIER); - dbs_tuners_ins.io_is_busy = should_io_be_busy(); - } - mutex_unlock(&dbs_mutex); - - mutex_init(&this_dbs_info->timer_mutex); - dbs_timer_init(this_dbs_info); - break; - - case CPUFREQ_GOV_STOP: - dbs_timer_exit(this_dbs_info); - - mutex_lock(&dbs_mutex); - mutex_destroy(&this_dbs_info->timer_mutex); - dbs_enable--; - mutex_unlock(&dbs_mutex); - if (!dbs_enable) - sysfs_remove_group(cpufreq_global_kobject, - &dbs_attr_group); - - break; - - case CPUFREQ_GOV_LIMITS: - mutex_lock(&this_dbs_info->timer_mutex); - if (policy->max < this_dbs_info->cur_policy->cur) - __cpufreq_driver_target(this_dbs_info->cur_policy, - policy->max, CPUFREQ_RELATION_H); - else if (policy->min > this_dbs_info->cur_policy->cur) - __cpufreq_driver_target(this_dbs_info->cur_policy, - policy->min, CPUFREQ_RELATION_L); - dbs_check_cpu(this_dbs_info); - mutex_unlock(&this_dbs_info->timer_mutex); - break; - } - return 0; -} +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = od_cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; static int __init cpufreq_gov_dbs_init(void) { u64 idle_time; int cpu = get_cpu(); + mutex_init(&od_dbs_data.mutex); idle_time = get_cpu_idle_time_us(cpu, NULL); put_cpu(); if (idle_time != -1ULL) { /* Idle micro accounting is supported. Use finer thresholds */ - dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; - dbs_tuners_ins.down_differential = - MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + od_tuners.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + od_tuners.down_differential = MICRO_FREQUENCY_DOWN_DIFFERENTIAL; /* * In nohz/micro accounting case we set the minimum frequency * not depending on HZ, but fixed (very low). The deferred * timer might skip some samples if idle/sleeping as needed. */ - min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; + od_dbs_data.min_sampling_rate = MICRO_FREQUENCY_MIN_SAMPLE_RATE; } else { /* For correct statistics, we need 10 ticks for each measure */ - min_sampling_rate = - MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10); + od_dbs_data.min_sampling_rate = MIN_SAMPLING_RATE_RATIO * + jiffies_to_usecs(10); } return cpufreq_register_governor(&cpufreq_gov_ondemand); @@ -766,7 +519,6 @@ static void __exit cpufreq_gov_dbs_exit(void) cpufreq_unregister_governor(&cpufreq_gov_ondemand); } - MODULE_AUTHOR("Venkatesh Pallipadi "); MODULE_AUTHOR("Alexey Starikovskiy "); MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index d03c21993052..a55b88eaf96a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -407,10 +407,4 @@ void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, unsigned int cpu); void cpufreq_frequency_table_put_attr(unsigned int cpu); - -/********************************************************************* - * Governor Helpers * - *********************************************************************/ -cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall); - #endif /* _LINUX_CPUFREQ_H */ -- cgit v1.2.3 From 69a37beabf1f0a6705c08e879bdd5d82ff6486c4 Mon Sep 17 00:00:00 2001 From: Youquan Song Date: Fri, 26 Oct 2012 12:26:41 +0200 Subject: cpuidle: Quickly notice prediction failure for repeat mode The prediction for future is difficult and when the cpuidle governor prediction fails and govenor possibly choose the shallower C-state than it should. How to quickly notice and find the failure becomes important for power saving. cpuidle menu governor has a method to predict the repeat pattern if there are 8 C-states residency which are continuous and the same or very close, so it will predict the next C-states residency will keep same residency time. There is a real case that turbostat utility (tools/power/x86/turbostat) at kernel 3.3 or early. turbostat utility will read 10 registers one by one at Sandybridge, so it will generate 10 IPIs to wake up idle CPUs. So cpuidle menu governor will predict it is repeat mode and there is another IPI wake up idle CPU soon, so it keeps idle CPU stay at C1 state even though CPU is totally idle. However, in the turbostat, following 10 registers reading is sleep 5 seconds by default, so the idle CPU will keep at C1 for a long time though it is idle until break event occurs. In a idle Sandybridge system, run "./turbostat -v", we will notice that deep C-state dangles between "70% ~ 99%". After patched the kernel, we will notice deep C-state stays at >99.98%. In the patch, a timer is added when menu governor detects a repeat mode and choose a shallow C-state. The timer is set to a time out value that greater than predicted time, and we conclude repeat mode prediction failure if timer is triggered. When repeat mode happens as expected, the timer is not triggered and CPU waken up from C-states and it will cancel the timer initiatively. When repeat mode does not happen, the timer will be time out and menu governor will quickly notice that the repeat mode prediction fails and then re-evaluates deeper C-states possibility. Below is another case which will clearly show the patch much benefit: #include #include #include #include #include #include #include volatile int * shutdown; volatile long * count; int delay = 20; int loop = 8; void usage(void) { fprintf(stderr, "Usage: idle_predict [options]\n" " --help -h Print this help\n" " --thread -n Thread number\n" " --loop -l Loop times in shallow Cstate\n" " --delay -t Sleep time (uS)in shallow Cstate\n"); } void *simple_loop() { int idle_num = 1; while (!(*shutdown)) { *count = *count + 1; if (idle_num % loop) usleep(delay); else { /* sleep 1 second */ usleep(1000000); idle_num = 0; } idle_num++; } } static void sighand(int sig) { *shutdown = 1; } int main(int argc, char *argv[]) { sigset_t sigset; int signum = SIGALRM; int i, c, er = 0, thread_num = 8; pthread_t pt[1024]; static char optstr[] = "n:l:t:h:"; while ((c = getopt(argc, argv, optstr)) != EOF) switch (c) { case 'n': thread_num = atoi(optarg); break; case 'l': loop = atoi(optarg); break; case 't': delay = atoi(optarg); break; case 'h': default: usage(); exit(1); } printf("thread=%d,loop=%d,delay=%d\n",thread_num,loop,delay); count = malloc(sizeof(long)); shutdown = malloc(sizeof(int)); *count = 0; *shutdown = 0; sigemptyset(&sigset); sigaddset(&sigset, signum); sigprocmask (SIG_BLOCK, &sigset, NULL); signal(SIGINT, sighand); signal(SIGTERM, sighand); for(i = 0; i < thread_num ; i++) pthread_create(&pt[i], NULL, simple_loop, NULL); for (i = 0; i < thread_num; i++) pthread_join(pt[i], NULL); exit(0); } Get powertop V2 from git://github.com/fenrus75/powertop, build powertop. After build the above test application, then run it. Test plaform can be Intel Sandybridge or other recent platforms. #./idle_predict -l 10 & #./powertop We will find that deep C-state will dangle between 40%~100% and much time spent on C1 state. It is because menu governor wrongly predict that repeat mode is kept, so it will choose the C1 shallow C-state even though it has chance to sleep 1 second in deep C-state. While after patched the kernel, we find that deep C-state will keep >99.6%. Signed-off-by: Rik van Riel Signed-off-by: Youquan Song Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/governors/menu.c | 75 +++++++++++++++++++++++++++++++++++++--- include/linux/tick.h | 6 ++++ kernel/time/tick-sched.c | 4 +++ 3 files changed, 80 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 5b1f2c372c1f..37c0ff6c805c 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -28,6 +28,13 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 +/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ +#define MAX_DEVIATION 60 + +static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); +static DEFINE_PER_CPU(int, hrtimer_status); +/* menu hrtimer mode */ +enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT}; /* * Concepts and ideas behind the menu governor @@ -191,17 +198,42 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } +/* Cancel the hrtimer if it is not triggered yet */ +void menu_hrtimer_cancel(void) +{ + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); + + /* The timer is still not time out*/ + if (per_cpu(hrtimer_status, cpu)) { + hrtimer_cancel(hrtmr); + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + } +} +EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); + +/* Call back for hrtimer is triggered */ +static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) +{ + int cpu = smp_processor_id(); + + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; + + return HRTIMER_NORESTART; +} + /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static void detect_repeating_patterns(struct menu_device *data) +static int detect_repeating_patterns(struct menu_device *data) { int i; uint64_t avg = 0; uint64_t stddev = 0; /* contains the square of the std deviation */ + int ret = 0; /* first calculate average and standard deviation of the past */ for (i = 0; i < INTERVALS; i++) @@ -210,7 +242,7 @@ static void detect_repeating_patterns(struct menu_device *data) /* if the avg is beyond the known next tick, it's worthless */ if (avg > data->expected_us) - return; + return 0; for (i = 0; i < INTERVALS; i++) stddev += (data->intervals[i] - avg) * @@ -223,8 +255,12 @@ static void detect_repeating_patterns(struct menu_device *data) * repeating pattern and predict we keep doing this. */ - if (avg && stddev < STDDEV_THRESH) + if (avg && stddev < STDDEV_THRESH) { data->predicted_us = avg; + ret = 1; + } + + return ret; } /** @@ -240,6 +276,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; + int repeat = 0, low_predicted = 0; + int cpu = smp_processor_id(); + struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -274,7 +313,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], RESOLUTION * DECAY); - detect_repeating_patterns(data); + repeat = detect_repeating_patterns(data); /* * We want to default to C1 (hlt), not to busy polling @@ -295,8 +334,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) + if (s->target_residency > data->predicted_us) { + low_predicted = 1; continue; + } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -309,6 +350,27 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) } } + /* not deepest C-state chosen for low predicted residency */ + if (low_predicted) { + unsigned int timer_us = 0; + + /* + * Set a timer to detect whether this sleep is much + * longer than repeat mode predicted. If the timer + * triggers, the code will evaluate whether to put + * the CPU into a deeper C-state. + * The timer is cancelled on CPU wakeup. + */ + timer_us = 2 * (data->predicted_us + MAX_DEVIATION); + + if (repeat && (4 * timer_us < data->expected_us)) { + hrtimer_start(hrtmr, ns_to_ktime(1000 * timer_us), + HRTIMER_MODE_REL_PINNED); + /* In repeat case, menu hrtimer is started */ + per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; + } + } + return data->last_state_idx; } @@ -399,6 +461,9 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); + struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); + hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); diff --git a/include/linux/tick.h b/include/linux/tick.h index f37fceb69b73..1a6567b48492 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -142,4 +142,10 @@ static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; } static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } # endif /* !NO_HZ */ +# ifdef CONFIG_CPU_IDLE_GOV_MENU +extern void menu_hrtimer_cancel(void); +# else +static inline void menu_hrtimer_cancel(void) {} +# endif /* CONFIG_CPU_IDLE_GOV_MENU */ + #endif diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index a40260885265..6f337068dc4c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -526,6 +526,8 @@ void tick_nohz_irq_exit(void) if (!ts->inidle) return; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); __tick_nohz_idle_enter(ts); } @@ -621,6 +623,8 @@ void tick_nohz_idle_exit(void) ts->inidle = 0; + /* Cancel the timer because CPU already waken up from the C-states*/ + menu_hrtimer_cancel(); if (ts->idle_active || ts->tick_stopped) now = ktime_get(); -- cgit v1.2.3 From 349631e0e411fefa2fed7e0a30b97704562dbd6b Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 01:05:16 +0100 Subject: cpuidle / sysfs: move structure declaration into the sysfs.c file The structure cpuidle_state_kobj is not used anywhere except in the sysfs.c file. The definition of this structure is not needed in the cpuidle header file. This patch moves it to the sysfs.c file in order to encapsulate the code a bit more. Signed-off-by: Daniel Lezcano Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/sysfs.c | 7 +++++++ include/linux/cpuidle.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index ed87399bb02b..f15c1e56e16f 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -297,6 +297,13 @@ static struct attribute *cpuidle_state_default_attrs[] = { NULL }; +struct cpuidle_state_kobj { + struct cpuidle_state *state; + struct cpuidle_state_usage *state_usage; + struct completion kobj_unregister; + struct kobject kobj; +}; + #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 279b1eaa8b73..7daf0e3b93bd 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -82,13 +82,6 @@ cpuidle_set_statedata(struct cpuidle_state_usage *st_usage, void *data) st_usage->driver_data = data; } -struct cpuidle_state_kobj { - struct cpuidle_state *state; - struct cpuidle_state_usage *state_usage; - struct completion kobj_unregister; - struct kobject kobj; -}; - struct cpuidle_device { unsigned int registered:1; unsigned int enabled:1; -- cgit v1.2.3 From 42f67f2acab2b7179c0d1ab234869e391448dfa6 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:45 +0000 Subject: cpuidle: move driver's refcount to cpuidle We want to support different cpuidle drivers co-existing together. In this case we should move the refcount to the cpuidle_driver structure to handle several drivers at a time. Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/driver.c | 13 ++++++++----- include/linux/cpuidle.h | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 87db3877fead..39ba8e181e96 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -16,7 +16,6 @@ static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); -int cpuidle_driver_refcount; static void set_power_states(struct cpuidle_driver *drv) { @@ -61,6 +60,8 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) if (!drv->power_specified) set_power_states(drv); + drv->refcnt = 0; + cpuidle_curr_driver = drv; spin_unlock(&cpuidle_driver_lock); @@ -92,7 +93,7 @@ void cpuidle_unregister_driver(struct cpuidle_driver *drv) spin_lock(&cpuidle_driver_lock); - if (!WARN_ON(cpuidle_driver_refcount > 0)) + if (!WARN_ON(drv->refcnt > 0)) cpuidle_curr_driver = NULL; spin_unlock(&cpuidle_driver_lock); @@ -106,7 +107,7 @@ struct cpuidle_driver *cpuidle_driver_ref(void) spin_lock(&cpuidle_driver_lock); drv = cpuidle_curr_driver; - cpuidle_driver_refcount++; + drv->refcnt++; spin_unlock(&cpuidle_driver_lock); return drv; @@ -114,10 +115,12 @@ struct cpuidle_driver *cpuidle_driver_ref(void) void cpuidle_driver_unref(void) { + struct cpuidle_driver *drv = cpuidle_curr_driver; + spin_lock(&cpuidle_driver_lock); - if (!WARN_ON(cpuidle_driver_refcount <= 0)) - cpuidle_driver_refcount--; + if (drv && !WARN_ON(drv->refcnt <= 0)) + drv->refcnt--; spin_unlock(&cpuidle_driver_lock); } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 7daf0e3b93bd..d08e1afa4919 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -124,6 +124,7 @@ static inline int cpuidle_get_last_residency(struct cpuidle_device *dev) struct cpuidle_driver { const char *name; struct module *owner; + int refcnt; unsigned int power_specified:1; /* set to 1 to use the core cpuidle time keeping (for all states). */ -- cgit v1.2.3 From bf4d1b5ddb78f86078ac6ae0415802d5f0c68f92 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 31 Oct 2012 16:44:48 +0000 Subject: cpuidle: support multiple drivers With the tegra3 and the big.LITTLE [1] new architectures, several cpus with different characteristics (latencies and states) can co-exists on the system. The cpuidle framework has the limitation of handling only identical cpus. This patch removes this limitation by introducing the multiple driver support for cpuidle. This option is configurable at compile time and should be enabled for the architectures mentioned above. So there is no impact for the other platforms if the option is disabled. The option defaults to 'n'. Note the multiple drivers support is also compatible with the existing drivers, even if just one driver is needed, all the cpu will be tied to this driver using an extra small chunk of processor memory. The multiple driver support use a per-cpu driver pointer instead of a global variable and the accessor to this variable are done from a cpu context. In order to keep the compatibility with the existing drivers, the function 'cpuidle_register_driver' and 'cpuidle_unregister_driver' will register the specified driver for all the cpus. The semantic for the output of /sys/devices/system/cpu/cpuidle/current_driver remains the same except the driver name will be related to the current cpu. The /sys/devices/system/cpu/cpu[0-9]/cpuidle/driver/name files are added allowing to read the per cpu driver name. [1] http://lwn.net/Articles/481055/ Signed-off-by: Daniel Lezcano Acked-by: Peter De Schrijver Signed-off-by: Rafael J. Wysocki --- drivers/cpuidle/Kconfig | 9 +++ drivers/cpuidle/cpuidle.c | 36 ++++++---- drivers/cpuidle/cpuidle.h | 4 +- drivers/cpuidle/driver.c | 166 ++++++++++++++++++++++++++++++++++++++----- drivers/cpuidle/sysfs.c | 174 ++++++++++++++++++++++++++++++++++++++++++++-- include/linux/cpuidle.h | 7 +- 6 files changed, 356 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index a76b689e553b..234ae651b38f 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -9,6 +9,15 @@ config CPU_IDLE If you're using an ACPI-enabled platform, you should say Y here. +config CPU_IDLE_MULTIPLE_DRIVERS + bool "Support multiple cpuidle drivers" + depends on CPU_IDLE + default n + help + Allows the cpuidle framework to use different drivers for each CPU. + This is useful if you have a system with different CPU latencies and + states. If unsure say N. + config CPU_IDLE_GOV_LADDER bool depends on CPU_IDLE diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index ce4cac706dd1..711dd83fd3ba 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -68,7 +68,7 @@ static cpuidle_enter_t cpuidle_enter_ops; int cpuidle_play_dead(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int i, dead_state = -1; int power_usage = -1; @@ -128,7 +128,7 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv, int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; int next_state, entered_state; if (off) @@ -141,6 +141,8 @@ int cpuidle_idle_call(void) if (!dev || !dev->enabled) return -EBUSY; + drv = cpuidle_get_cpu_driver(dev); + /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(drv, dev); if (need_resched()) { @@ -312,15 +314,19 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} int cpuidle_enable_device(struct cpuidle_device *dev) { int ret, i; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv; if (!dev) return -EINVAL; if (dev->enabled) return 0; + + drv = cpuidle_get_cpu_driver(dev); + if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->state_count) dev->state_count = drv->state_count; @@ -335,7 +341,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) poll_idle_init(drv); - if ((ret = cpuidle_add_state_sysfs(dev))) + ret = cpuidle_add_device_sysfs(dev); + if (ret) return ret; if (cpuidle_curr_governor->enable && @@ -356,7 +363,7 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return 0; fail_sysfs: - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); return ret; } @@ -372,17 +379,20 @@ EXPORT_SYMBOL_GPL(cpuidle_enable_device); */ void cpuidle_disable_device(struct cpuidle_device *dev) { + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + if (!dev || !dev->enabled) return; - if (!cpuidle_get_driver() || !cpuidle_curr_governor) + + if (!drv || !cpuidle_curr_governor) return; dev->enabled = 0; if (cpuidle_curr_governor->disable) - cpuidle_curr_governor->disable(cpuidle_get_driver(), dev); + cpuidle_curr_governor->disable(drv, dev); - cpuidle_remove_state_sysfs(dev); + cpuidle_remove_device_sysfs(dev); enabled_devices--; } @@ -398,9 +408,9 @@ EXPORT_SYMBOL_GPL(cpuidle_disable_device); static int __cpuidle_register_device(struct cpuidle_device *dev) { int ret; - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - if (!try_module_get(cpuidle_driver->owner)) + if (!try_module_get(drv->owner)) return -EINVAL; per_cpu(cpuidle_devices, dev->cpu) = dev; @@ -421,7 +431,7 @@ err_coupled: err_sysfs: list_del(&dev->device_list); per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(cpuidle_driver->owner); + module_put(drv->owner); return ret; } @@ -460,7 +470,7 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct cpuidle_driver *cpuidle_driver = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); if (dev->registered == 0) return; @@ -477,7 +487,7 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_resume_and_unlock(); - module_put(cpuidle_driver->owner); + module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index f6b0923c2253..ee97e9672ecf 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -28,8 +28,8 @@ struct device; extern int cpuidle_add_interface(struct device *dev); extern void cpuidle_remove_interface(struct device *dev); -extern int cpuidle_add_state_sysfs(struct cpuidle_device *device); -extern void cpuidle_remove_state_sysfs(struct cpuidle_device *device); +extern int cpuidle_add_device_sysfs(struct cpuidle_device *device); +extern void cpuidle_remove_device_sysfs(struct cpuidle_device *device); extern int cpuidle_add_sysfs(struct cpuidle_device *dev); extern void cpuidle_remove_sysfs(struct cpuidle_device *dev); diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 8246662f594a..3af841fb397a 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -14,9 +14,11 @@ #include "cpuidle.h" -static struct cpuidle_driver *cpuidle_curr_driver; DEFINE_SPINLOCK(cpuidle_driver_lock); +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu); +static struct cpuidle_driver * __cpuidle_get_cpu_driver(int cpu); + static void set_power_states(struct cpuidle_driver *drv) { int i; @@ -47,12 +49,7 @@ static void __cpuidle_driver_init(struct cpuidle_driver *drv) set_power_states(drv); } -static void cpuidle_set_driver(struct cpuidle_driver *drv) -{ - cpuidle_curr_driver = drv; -} - -static int __cpuidle_register_driver(struct cpuidle_driver *drv) +static int __cpuidle_register_driver(struct cpuidle_driver *drv, int cpu) { if (!drv || !drv->state_count) return -EINVAL; @@ -60,23 +57,84 @@ static int __cpuidle_register_driver(struct cpuidle_driver *drv) if (cpuidle_disabled()) return -ENODEV; - if (cpuidle_get_driver()) + if (__cpuidle_get_cpu_driver(cpu)) return -EBUSY; __cpuidle_driver_init(drv); - cpuidle_set_driver(drv); + __cpuidle_set_cpu_driver(drv, cpu); return 0; } -static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) +static void __cpuidle_unregister_driver(struct cpuidle_driver *drv, int cpu) { - if (drv != cpuidle_get_driver()) + if (drv != __cpuidle_get_cpu_driver(cpu)) return; if (!WARN_ON(drv->refcnt > 0)) - cpuidle_set_driver(NULL); + __cpuidle_set_cpu_driver(NULL, cpu); +} + +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS + +static DEFINE_PER_CPU(struct cpuidle_driver *, cpuidle_drivers); + +static void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + per_cpu(cpuidle_drivers, cpu) = drv; +} + +static struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) +{ + return per_cpu(cpuidle_drivers, cpu); +} + +static void __cpuidle_unregister_all_cpu_driver(struct cpuidle_driver *drv) +{ + int cpu; + for_each_present_cpu(cpu) + __cpuidle_unregister_driver(drv, cpu); +} + +static int __cpuidle_register_all_cpu_driver(struct cpuidle_driver *drv) +{ + int ret = 0; + int i, cpu; + + for_each_present_cpu(cpu) { + ret = __cpuidle_register_driver(drv, cpu); + if (ret) + break; + } + + if (ret) + for_each_present_cpu(i) { + if (i == cpu) + break; + __cpuidle_unregister_driver(drv, i); + } + + + return ret; +} + +int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + int ret; + + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); } /** @@ -88,7 +146,7 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) int ret; spin_lock(&cpuidle_driver_lock); - ret = __cpuidle_register_driver(drv); + ret = __cpuidle_register_all_cpu_driver(drv); spin_unlock(&cpuidle_driver_lock); return ret; @@ -96,13 +154,48 @@ int cpuidle_register_driver(struct cpuidle_driver *drv) EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** - * cpuidle_get_driver - return the current driver + * cpuidle_unregister_driver - unregisters a driver + * @drv: the driver */ -struct cpuidle_driver *cpuidle_get_driver(void) +void cpuidle_unregister_driver(struct cpuidle_driver *drv) +{ + spin_lock(&cpuidle_driver_lock); + __cpuidle_unregister_all_cpu_driver(drv); + spin_unlock(&cpuidle_driver_lock); +} +EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); + +#else + +static struct cpuidle_driver *cpuidle_curr_driver; + +static inline void __cpuidle_set_cpu_driver(struct cpuidle_driver *drv, int cpu) +{ + cpuidle_curr_driver = drv; +} + +static inline struct cpuidle_driver *__cpuidle_get_cpu_driver(int cpu) { return cpuidle_curr_driver; } -EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_register_driver - registers a driver + * @drv: the driver + */ +int cpuidle_register_driver(struct cpuidle_driver *drv) +{ + int ret, cpu; + + cpu = get_cpu(); + spin_lock(&cpuidle_driver_lock); + ret = __cpuidle_register_driver(drv, cpu); + spin_unlock(&cpuidle_driver_lock); + put_cpu(); + + return ret; +} +EXPORT_SYMBOL_GPL(cpuidle_register_driver); /** * cpuidle_unregister_driver - unregisters a driver @@ -110,11 +203,50 @@ EXPORT_SYMBOL_GPL(cpuidle_get_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { + int cpu; + + cpu = get_cpu(); spin_lock(&cpuidle_driver_lock); - __cpuidle_unregister_driver(drv); + __cpuidle_unregister_driver(drv, cpu); spin_unlock(&cpuidle_driver_lock); + put_cpu(); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver); +#endif + +/** + * cpuidle_get_driver - return the current driver + */ +struct cpuidle_driver *cpuidle_get_driver(void) +{ + struct cpuidle_driver *drv; + int cpu; + + cpu = get_cpu(); + drv = __cpuidle_get_cpu_driver(cpu); + put_cpu(); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_driver); + +/** + * cpuidle_get_cpu_driver - return the driver tied with a cpu + */ +struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv; + + if (!dev) + return NULL; + + spin_lock(&cpuidle_driver_lock); + drv = __cpuidle_get_cpu_driver(dev->cpu); + spin_unlock(&cpuidle_driver_lock); + + return drv; +} +EXPORT_SYMBOL_GPL(cpuidle_get_cpu_driver); struct cpuidle_driver *cpuidle_driver_ref(void) { diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 49b1f4bcc1b3..340942946106 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -364,17 +364,17 @@ static inline void cpuidle_free_state_kobj(struct cpuidle_device *device, int i) } /** - * cpuidle_add_driver_sysfs - adds driver-specific sysfs attributes + * cpuidle_add_state_sysfs - adds cpuidle states sysfs attributes * @device: the target device */ -int cpuidle_add_state_sysfs(struct cpuidle_device *device) +static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; - struct cpuidle_driver *drv = cpuidle_get_driver(); + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ - for (i = 0; i < device->state_count; i++) { + for (i = 0; i < drv->state_count; i++) { kobj = kzalloc(sizeof(struct cpuidle_state_kobj), GFP_KERNEL); if (!kobj) goto error_state; @@ -401,10 +401,10 @@ error_state: } /** - * cpuidle_remove_driver_sysfs - removes driver-specific sysfs attributes + * cpuidle_remove_driver_sysfs - removes the cpuidle states sysfs attributes * @device: the target device */ -void cpuidle_remove_state_sysfs(struct cpuidle_device *device) +static void cpuidle_remove_state_sysfs(struct cpuidle_device *device) { int i; @@ -412,6 +412,168 @@ void cpuidle_remove_state_sysfs(struct cpuidle_device *device) cpuidle_free_state_kobj(device, i); } +#ifdef CONFIG_CPU_IDLE_MULTIPLE_DRIVERS +#define kobj_to_driver_kobj(k) container_of(k, struct cpuidle_driver_kobj, kobj) +#define attr_to_driver_attr(a) container_of(a, struct cpuidle_driver_attr, attr) + +#define define_one_driver_ro(_name, show) \ + static struct cpuidle_driver_attr attr_driver_##_name = \ + __ATTR(_name, 0644, show, NULL) + +struct cpuidle_driver_kobj { + struct cpuidle_driver *drv; + struct completion kobj_unregister; + struct kobject kobj; +}; + +struct cpuidle_driver_attr { + struct attribute attr; + ssize_t (*show)(struct cpuidle_driver *, char *); + ssize_t (*store)(struct cpuidle_driver *, const char *, size_t); +}; + +static ssize_t show_driver_name(struct cpuidle_driver *drv, char *buf) +{ + ssize_t ret; + + spin_lock(&cpuidle_driver_lock); + ret = sprintf(buf, "%s\n", drv ? drv->name : "none"); + spin_unlock(&cpuidle_driver_lock); + + return ret; +} + +static void cpuidle_driver_sysfs_release(struct kobject *kobj) +{ + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + complete(&driver_kobj->kobj_unregister); +} + +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, + char * buf) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->show) + ret = dattr->show(driver_kobj->drv, buf); + + return ret; +} + +static ssize_t cpuidle_driver_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) +{ + int ret = -EIO; + struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); + struct cpuidle_driver_attr *dattr = attr_to_driver_attr(attr); + + if (dattr->store) + ret = dattr->store(driver_kobj->drv, buf, size); + + return ret; +} + +define_one_driver_ro(name, show_driver_name); + +static const struct sysfs_ops cpuidle_driver_sysfs_ops = { + .show = cpuidle_driver_show, + .store = cpuidle_driver_store, +}; + +static struct attribute *cpuidle_driver_default_attrs[] = { + &attr_driver_name.attr, + NULL +}; + +static struct kobj_type ktype_driver_cpuidle = { + .sysfs_ops = &cpuidle_driver_sysfs_ops, + .default_attrs = cpuidle_driver_default_attrs, + .release = cpuidle_driver_sysfs_release, +}; + +/** + * cpuidle_add_driver_sysfs - adds the driver name sysfs attribute + * @device: the target device + */ +static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv; + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + int ret; + + kdrv = kzalloc(sizeof(*kdrv), GFP_KERNEL); + if (!kdrv) + return -ENOMEM; + + kdrv->drv = drv; + init_completion(&kdrv->kobj_unregister); + + ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, + &dev->kobj, "driver"); + if (ret) { + kfree(kdrv); + return ret; + } + + kobject_uevent(&kdrv->kobj, KOBJ_ADD); + dev->kobj_driver = kdrv; + + return ret; +} + +/** + * cpuidle_remove_driver_sysfs - removes the driver name sysfs attribute + * @device: the target device + */ +static void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + struct cpuidle_driver_kobj *kdrv = dev->kobj_driver; + kobject_put(&kdrv->kobj); + wait_for_completion(&kdrv->kobj_unregister); + kfree(kdrv); +} +#else +static inline int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) +{ + return 0; +} + +static inline void cpuidle_remove_driver_sysfs(struct cpuidle_device *dev) +{ + ; +} +#endif + +/** + * cpuidle_add_device_sysfs - adds device specific sysfs attributes + * @device: the target device + */ +int cpuidle_add_device_sysfs(struct cpuidle_device *device) +{ + int ret; + + ret = cpuidle_add_state_sysfs(device); + if (ret) + return ret; + + ret = cpuidle_add_driver_sysfs(device); + if (ret) + cpuidle_remove_state_sysfs(device); + return ret; +} + +/** + * cpuidle_remove_device_sysfs : removes device specific sysfs attributes + * @device : the target device + */ +void cpuidle_remove_device_sysfs(struct cpuidle_device *device) +{ + cpuidle_remove_driver_sysfs(device); + cpuidle_remove_state_sysfs(device); +} + /** * cpuidle_add_sysfs - creates a sysfs instance for the target device * @dev: the target device diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index d08e1afa4919..3711b34dc4f9 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -91,7 +91,7 @@ struct cpuidle_device { int state_count; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; - + struct cpuidle_driver_kobj *kobj_driver; struct list_head device_list; struct kobject kobj; struct completion kobj_unregister; @@ -157,6 +157,10 @@ extern int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)); extern int cpuidle_play_dead(void); +extern struct cpuidle_driver *cpuidle_get_cpu_driver(struct cpuidle_device *dev); +extern int cpuidle_register_cpu_driver(struct cpuidle_driver *drv, int cpu); +extern void cpuidle_unregister_cpu_driver(struct cpuidle_driver *drv, int cpu); + #else static inline void disable_cpuidle(void) { } static inline int cpuidle_idle_call(void) { return -ENODEV; } @@ -183,7 +187,6 @@ static inline int cpuidle_wrap_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index)) { return -ENODEV; } static inline int cpuidle_play_dead(void) {return -ENODEV; } - #endif #ifdef CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED -- cgit v1.2.3 From 7e6fdd4bad033fa2d73716377b184fa975b0d985 Mon Sep 17 00:00:00 2001 From: Rajagopal Venkat Date: Fri, 26 Oct 2012 01:50:09 +0200 Subject: PM / devfreq: Core updates to support devices which can idle Prepare devfreq core framework to support devices which can idle. When device idleness is detected perhaps through runtime-pm, need some mechanism to suspend devfreq load monitoring and resume back when device is online. Present code continues monitoring unless device is removed from devfreq core. This patch introduces following design changes, - use per device work instead of global work to monitor device load. This enables suspend/resume of device devfreq and reduces monitoring code complexity. - decouple delayed work based load monitoring logic from core by introducing helpers functions to be used by governors. This provides flexibility for governors either to use delayed work based monitoring functions or to implement their own mechanism. - devfreq core interacts with governors via events to perform specific actions. These events include start/stop devfreq. This sets ground for adding suspend/resume events. The devfreq apis are not modified and are kept intact. Signed-off-by: Rajagopal Venkat Acked-by: MyungJoo Ham Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-class-devfreq | 8 - drivers/devfreq/devfreq.c | 442 +++++++++++--------------- drivers/devfreq/governor.h | 11 + drivers/devfreq/governor_performance.c | 16 +- drivers/devfreq/governor_powersave.c | 16 +- drivers/devfreq/governor_simpleondemand.c | 24 ++ drivers/devfreq/governor_userspace.c | 23 +- include/linux/devfreq.h | 34 +- 8 files changed, 278 insertions(+), 296 deletions(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 23d78b5aab11..89283b1b0240 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -21,14 +21,6 @@ Description: The /sys/class/devfreq/.../cur_freq shows the current frequency of the corresponding devfreq object. -What: /sys/class/devfreq/.../central_polling -Date: September 2011 -Contact: MyungJoo Ham -Description: - The /sys/class/devfreq/.../central_polling shows whether - the devfreq ojbect is using devfreq-provided central - polling mechanism or not. - What: /sys/class/devfreq/.../polling_interval Date: September 2011 Contact: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index b146d76f04cf..1aaf1aeb1f1d 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -30,17 +30,11 @@ struct class *devfreq_class; /* - * devfreq_work periodically monitors every registered device. - * The minimum polling interval is one jiffy. The polling interval is - * determined by the minimum polling period among all polling devfreq - * devices. The resolution of polling interval is one jiffy. + * devfreq core provides delayed work based load monitoring helper + * functions. Governors can use these or can implement their own + * monitoring mechanism. */ -static bool polling; static struct workqueue_struct *devfreq_wq; -static struct delayed_work devfreq_work; - -/* wait removing if this is to be removed */ -static struct devfreq *wait_remove_device; /* The list of all device-devfreq */ static LIST_HEAD(devfreq_list); @@ -72,6 +66,8 @@ static struct devfreq *find_device_devfreq(struct device *dev) return ERR_PTR(-ENODEV); } +/* Load monitoring helper functions for governors use */ + /** * update_devfreq() - Reevaluate the device and configure frequency. * @devfreq: the devfreq instance. @@ -120,6 +116,152 @@ int update_devfreq(struct devfreq *devfreq) return err; } +/** + * devfreq_monitor() - Periodically poll devfreq objects. + * @work: the work struct used to run devfreq_monitor periodically. + * + */ +static void devfreq_monitor(struct work_struct *work) +{ + int err; + struct devfreq *devfreq = container_of(work, + struct devfreq, work.work); + + mutex_lock(&devfreq->lock); + err = update_devfreq(devfreq); + if (err) + dev_err(&devfreq->dev, "dvfs failed with (%d) error\n", err); + + queue_delayed_work(devfreq_wq, &devfreq->work, + msecs_to_jiffies(devfreq->profile->polling_ms)); + mutex_unlock(&devfreq->lock); +} + +/** + * devfreq_monitor_start() - Start load monitoring of devfreq instance + * @devfreq: the devfreq instance. + * + * Helper function for starting devfreq device load monitoing. By + * default delayed work based monitoring is supported. Function + * to be called from governor in response to DEVFREQ_GOV_START + * event when device is added to devfreq framework. + */ +void devfreq_monitor_start(struct devfreq *devfreq) +{ + INIT_DEFERRABLE_WORK(&devfreq->work, devfreq_monitor); + if (devfreq->profile->polling_ms) + queue_delayed_work(devfreq_wq, &devfreq->work, + msecs_to_jiffies(devfreq->profile->polling_ms)); +} + +/** + * devfreq_monitor_stop() - Stop load monitoring of a devfreq instance + * @devfreq: the devfreq instance. + * + * Helper function to stop devfreq device load monitoing. Function + * to be called from governor in response to DEVFREQ_GOV_STOP + * event when device is removed from devfreq framework. + */ +void devfreq_monitor_stop(struct devfreq *devfreq) +{ + cancel_delayed_work_sync(&devfreq->work); +} + +/** + * devfreq_monitor_suspend() - Suspend load monitoring of a devfreq instance + * @devfreq: the devfreq instance. + * + * Helper function to suspend devfreq device load monitoing. Function + * to be called from governor in response to DEVFREQ_GOV_SUSPEND + * event or when polling interval is set to zero. + * + * Note: Though this function is same as devfreq_monitor_stop(), + * intentionally kept separate to provide hooks for collecting + * transition statistics. + */ +void devfreq_monitor_suspend(struct devfreq *devfreq) +{ + mutex_lock(&devfreq->lock); + if (devfreq->stop_polling) { + mutex_unlock(&devfreq->lock); + return; + } + + devfreq->stop_polling = true; + mutex_unlock(&devfreq->lock); + cancel_delayed_work_sync(&devfreq->work); +} + +/** + * devfreq_monitor_resume() - Resume load monitoring of a devfreq instance + * @devfreq: the devfreq instance. + * + * Helper function to resume devfreq device load monitoing. Function + * to be called from governor in response to DEVFREQ_GOV_RESUME + * event or when polling interval is set to non-zero. + */ +void devfreq_monitor_resume(struct devfreq *devfreq) +{ + mutex_lock(&devfreq->lock); + if (!devfreq->stop_polling) + goto out; + + if (!delayed_work_pending(&devfreq->work) && + devfreq->profile->polling_ms) + queue_delayed_work(devfreq_wq, &devfreq->work, + msecs_to_jiffies(devfreq->profile->polling_ms)); + devfreq->stop_polling = false; + +out: + mutex_unlock(&devfreq->lock); +} + +/** + * devfreq_interval_update() - Update device devfreq monitoring interval + * @devfreq: the devfreq instance. + * @delay: new polling interval to be set. + * + * Helper function to set new load monitoring polling interval. Function + * to be called from governor in response to DEVFREQ_GOV_INTERVAL event. + */ +void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay) +{ + unsigned int cur_delay = devfreq->profile->polling_ms; + unsigned int new_delay = *delay; + + mutex_lock(&devfreq->lock); + devfreq->profile->polling_ms = new_delay; + + if (devfreq->stop_polling) + goto out; + + /* if new delay is zero, stop polling */ + if (!new_delay) { + mutex_unlock(&devfreq->lock); + cancel_delayed_work_sync(&devfreq->work); + return; + } + + /* if current delay is zero, start polling with new delay */ + if (!cur_delay) { + queue_delayed_work(devfreq_wq, &devfreq->work, + msecs_to_jiffies(devfreq->profile->polling_ms)); + goto out; + } + + /* if current delay is greater than new delay, restart polling */ + if (cur_delay > new_delay) { + mutex_unlock(&devfreq->lock); + cancel_delayed_work_sync(&devfreq->work); + mutex_lock(&devfreq->lock); + if (!devfreq->stop_polling) + queue_delayed_work(devfreq_wq, &devfreq->work, + msecs_to_jiffies(devfreq->profile->polling_ms)); + } +out: + mutex_unlock(&devfreq->lock); +} + /** * devfreq_notifier_call() - Notify that the device frequency requirements * has been changed out of devfreq framework. @@ -143,59 +285,32 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type, } /** - * _remove_devfreq() - Remove devfreq from the device. + * _remove_devfreq() - Remove devfreq from the list and release its resources. * @devfreq: the devfreq struct * @skip: skip calling device_unregister(). - * - * Note that the caller should lock devfreq->lock before calling - * this. _remove_devfreq() will unlock it and free devfreq - * internally. devfreq_list_lock should be locked by the caller - * as well (not relased at return) - * - * Lock usage: - * devfreq->lock: locked before call. - * unlocked at return (and freed) - * devfreq_list_lock: locked before call. - * kept locked at return. - * if devfreq is centrally polled. - * - * Freed memory: - * devfreq */ static void _remove_devfreq(struct devfreq *devfreq, bool skip) { - if (!mutex_is_locked(&devfreq->lock)) { - WARN(true, "devfreq->lock must be locked by the caller.\n"); - return; - } - if (!devfreq->governor->no_central_polling && - !mutex_is_locked(&devfreq_list_lock)) { - WARN(true, "devfreq_list_lock must be locked by the caller.\n"); + mutex_lock(&devfreq_list_lock); + if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) { + mutex_unlock(&devfreq_list_lock); + dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n"); return; } + list_del(&devfreq->node); + mutex_unlock(&devfreq_list_lock); - if (devfreq->being_removed) - return; - - devfreq->being_removed = true; + devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL); if (devfreq->profile->exit) devfreq->profile->exit(devfreq->dev.parent); - if (devfreq->governor->exit) - devfreq->governor->exit(devfreq); - if (!skip && get_device(&devfreq->dev)) { device_unregister(&devfreq->dev); put_device(&devfreq->dev); } - if (!devfreq->governor->no_central_polling) - list_del(&devfreq->node); - - mutex_unlock(&devfreq->lock); mutex_destroy(&devfreq->lock); - kfree(devfreq); } @@ -210,130 +325,8 @@ static void _remove_devfreq(struct devfreq *devfreq, bool skip) static void devfreq_dev_release(struct device *dev) { struct devfreq *devfreq = to_devfreq(dev); - bool central_polling = !devfreq->governor->no_central_polling; - - /* - * If devfreq_dev_release() was called by device_unregister() of - * _remove_devfreq(), we cannot mutex_lock(&devfreq->lock) and - * being_removed is already set. This also partially checks the case - * where devfreq_dev_release() is called from a thread other than - * the one called _remove_devfreq(); however, this case is - * dealt completely with another following being_removed check. - * - * Because being_removed is never being - * unset, we do not need to worry about race conditions on - * being_removed. - */ - if (devfreq->being_removed) - return; - if (central_polling) - mutex_lock(&devfreq_list_lock); - - mutex_lock(&devfreq->lock); - - /* - * Check being_removed flag again for the case where - * devfreq_dev_release() was called in a thread other than the one - * possibly called _remove_devfreq(). - */ - if (devfreq->being_removed) { - mutex_unlock(&devfreq->lock); - goto out; - } - - /* devfreq->lock is unlocked and removed in _removed_devfreq() */ _remove_devfreq(devfreq, true); - -out: - if (central_polling) - mutex_unlock(&devfreq_list_lock); -} - -/** - * devfreq_monitor() - Periodically poll devfreq objects. - * @work: the work struct used to run devfreq_monitor periodically. - * - */ -static void devfreq_monitor(struct work_struct *work) -{ - static unsigned long last_polled_at; - struct devfreq *devfreq, *tmp; - int error; - unsigned long jiffies_passed; - unsigned long next_jiffies = ULONG_MAX, now = jiffies; - struct device *dev; - - /* Initially last_polled_at = 0, polling every device at bootup */ - jiffies_passed = now - last_polled_at; - last_polled_at = now; - if (jiffies_passed == 0) - jiffies_passed = 1; - - mutex_lock(&devfreq_list_lock); - list_for_each_entry_safe(devfreq, tmp, &devfreq_list, node) { - mutex_lock(&devfreq->lock); - dev = devfreq->dev.parent; - - /* Do not remove tmp for a while */ - wait_remove_device = tmp; - - if (devfreq->governor->no_central_polling || - devfreq->next_polling == 0) { - mutex_unlock(&devfreq->lock); - continue; - } - mutex_unlock(&devfreq_list_lock); - - /* - * Reduce more next_polling if devfreq_wq took an extra - * delay. (i.e., CPU has been idled.) - */ - if (devfreq->next_polling <= jiffies_passed) { - error = update_devfreq(devfreq); - - /* Remove a devfreq with an error. */ - if (error && error != -EAGAIN) { - - dev_err(dev, "Due to update_devfreq error(%d), devfreq(%s) is removed from the device\n", - error, devfreq->governor->name); - - /* - * Unlock devfreq before locking the list - * in order to avoid deadlock with - * find_device_devfreq or others - */ - mutex_unlock(&devfreq->lock); - mutex_lock(&devfreq_list_lock); - /* Check if devfreq is already removed */ - if (IS_ERR(find_device_devfreq(dev))) - continue; - mutex_lock(&devfreq->lock); - /* This unlocks devfreq->lock and free it */ - _remove_devfreq(devfreq, false); - continue; - } - devfreq->next_polling = devfreq->polling_jiffies; - } else { - devfreq->next_polling -= jiffies_passed; - } - - if (devfreq->next_polling) - next_jiffies = (next_jiffies > devfreq->next_polling) ? - devfreq->next_polling : next_jiffies; - - mutex_unlock(&devfreq->lock); - mutex_lock(&devfreq_list_lock); - } - wait_remove_device = NULL; - mutex_unlock(&devfreq_list_lock); - - if (next_jiffies > 0 && next_jiffies < ULONG_MAX) { - polling = true; - queue_delayed_work(devfreq_wq, &devfreq_work, next_jiffies); - } else { - polling = false; - } } /** @@ -357,16 +350,13 @@ struct devfreq *devfreq_add_device(struct device *dev, return ERR_PTR(-EINVAL); } - - if (!governor->no_central_polling) { - mutex_lock(&devfreq_list_lock); - devfreq = find_device_devfreq(dev); - mutex_unlock(&devfreq_list_lock); - if (!IS_ERR(devfreq)) { - dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__); - err = -EINVAL; - goto err_out; - } + mutex_lock(&devfreq_list_lock); + devfreq = find_device_devfreq(dev); + mutex_unlock(&devfreq_list_lock); + if (!IS_ERR(devfreq)) { + dev_err(dev, "%s: Unable to create devfreq for the device. It already has one.\n", __func__); + err = -EINVAL; + goto err_out; } devfreq = kzalloc(sizeof(struct devfreq), GFP_KERNEL); @@ -386,48 +376,41 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->governor = governor; devfreq->previous_freq = profile->initial_freq; devfreq->data = data; - devfreq->next_polling = devfreq->polling_jiffies - = msecs_to_jiffies(devfreq->profile->polling_ms); devfreq->nb.notifier_call = devfreq_notifier_call; dev_set_name(&devfreq->dev, dev_name(dev)); err = device_register(&devfreq->dev); if (err) { put_device(&devfreq->dev); + mutex_unlock(&devfreq->lock); goto err_dev; } - if (governor->init) - err = governor->init(devfreq); - if (err) - goto err_init; - mutex_unlock(&devfreq->lock); - if (governor->no_central_polling) - goto out; - mutex_lock(&devfreq_list_lock); - list_add(&devfreq->node, &devfreq_list); + mutex_unlock(&devfreq_list_lock); - if (devfreq_wq && devfreq->next_polling && !polling) { - polling = true; - queue_delayed_work(devfreq_wq, &devfreq_work, - devfreq->next_polling); + err = devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_START, NULL); + if (err) { + dev_err(dev, "%s: Unable to start governor for the device\n", + __func__); + goto err_init; } - mutex_unlock(&devfreq_list_lock); -out: + return devfreq; err_init: + list_del(&devfreq->node); device_unregister(&devfreq->dev); err_dev: - mutex_unlock(&devfreq->lock); kfree(devfreq); err_out: return ERR_PTR(err); } +EXPORT_SYMBOL(devfreq_add_device); /** * devfreq_remove_device() - Remove devfreq feature from a device. @@ -435,30 +418,14 @@ err_out: */ int devfreq_remove_device(struct devfreq *devfreq) { - bool central_polling; - if (!devfreq) return -EINVAL; - central_polling = !devfreq->governor->no_central_polling; - - if (central_polling) { - mutex_lock(&devfreq_list_lock); - while (wait_remove_device == devfreq) { - mutex_unlock(&devfreq_list_lock); - schedule(); - mutex_lock(&devfreq_list_lock); - } - } - - mutex_lock(&devfreq->lock); - _remove_devfreq(devfreq, false); /* it unlocks devfreq->lock */ - - if (central_polling) - mutex_unlock(&devfreq_list_lock); + _remove_devfreq(devfreq, false); return 0; } +EXPORT_SYMBOL(devfreq_remove_device); static ssize_t show_governor(struct device *dev, struct device_attribute *attr, char *buf) @@ -490,35 +457,13 @@ static ssize_t store_polling_interval(struct device *dev, if (ret != 1) goto out; - mutex_lock(&df->lock); - df->profile->polling_ms = value; - df->next_polling = df->polling_jiffies - = msecs_to_jiffies(value); - mutex_unlock(&df->lock); - + df->governor->event_handler(df, DEVFREQ_GOV_INTERVAL, &value); ret = count; - if (df->governor->no_central_polling) - goto out; - - mutex_lock(&devfreq_list_lock); - if (df->next_polling > 0 && !polling) { - polling = true; - queue_delayed_work(devfreq_wq, &devfreq_work, - df->next_polling); - } - mutex_unlock(&devfreq_list_lock); out: return ret; } -static ssize_t show_central_polling(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "%d\n", - !to_devfreq(dev)->governor->no_central_polling); -} - static ssize_t store_min_freq(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -590,7 +535,6 @@ static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr, static struct device_attribute devfreq_attrs[] = { __ATTR(governor, S_IRUGO, show_governor, NULL), __ATTR(cur_freq, S_IRUGO, show_freq, NULL), - __ATTR(central_polling, S_IRUGO, show_central_polling, NULL), __ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval, store_polling_interval), __ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq), @@ -598,23 +542,6 @@ static struct device_attribute devfreq_attrs[] = { { }, }; -/** - * devfreq_start_polling() - Initialize data structure for devfreq framework and - * start polling registered devfreq devices. - */ -static int __init devfreq_start_polling(void) -{ - mutex_lock(&devfreq_list_lock); - polling = false; - devfreq_wq = create_freezable_workqueue("devfreq_wq"); - INIT_DEFERRABLE_WORK(&devfreq_work, devfreq_monitor); - mutex_unlock(&devfreq_list_lock); - - devfreq_monitor(&devfreq_work.work); - return 0; -} -late_initcall(devfreq_start_polling); - static int __init devfreq_init(void) { devfreq_class = class_create(THIS_MODULE, "devfreq"); @@ -622,7 +549,15 @@ static int __init devfreq_init(void) pr_err("%s: couldn't create class\n", __FILE__); return PTR_ERR(devfreq_class); } + + devfreq_wq = create_freezable_workqueue("devfreq_wq"); + if (IS_ERR(devfreq_wq)) { + class_destroy(devfreq_class); + pr_err("%s: couldn't create workqueue\n", __FILE__); + return PTR_ERR(devfreq_wq); + } devfreq_class->dev_attrs = devfreq_attrs; + return 0; } subsys_initcall(devfreq_init); @@ -630,6 +565,7 @@ subsys_initcall(devfreq_init); static void __exit devfreq_exit(void) { class_destroy(devfreq_class); + destroy_workqueue(devfreq_wq); } module_exit(devfreq_exit); diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index ea7f13c58ded..bb3aff32d627 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -18,7 +18,18 @@ #define to_devfreq(DEV) container_of((DEV), struct devfreq, dev) +/* Devfreq events */ +#define DEVFREQ_GOV_START 0x1 +#define DEVFREQ_GOV_STOP 0x2 +#define DEVFREQ_GOV_INTERVAL 0x3 + /* Caution: devfreq->lock must be locked before calling update_devfreq */ extern int update_devfreq(struct devfreq *devfreq); +extern void devfreq_monitor_start(struct devfreq *devfreq); +extern void devfreq_monitor_stop(struct devfreq *devfreq); +extern void devfreq_monitor_suspend(struct devfreq *devfreq); +extern void devfreq_monitor_resume(struct devfreq *devfreq); +extern void devfreq_interval_update(struct devfreq *devfreq, + unsigned int *delay); #endif /* _GOVERNOR_H */ diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index af75ddd4f158..eea3f9bd7894 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -26,14 +26,22 @@ static int devfreq_performance_func(struct devfreq *df, return 0; } -static int performance_init(struct devfreq *devfreq) +static int devfreq_performance_handler(struct devfreq *devfreq, + unsigned int event, void *data) { - return update_devfreq(devfreq); + int ret = 0; + + if (event == DEVFREQ_GOV_START) { + mutex_lock(&devfreq->lock); + ret = update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + } + + return ret; } const struct devfreq_governor devfreq_performance = { .name = "performance", - .init = performance_init, .get_target_freq = devfreq_performance_func, - .no_central_polling = true, + .event_handler = devfreq_performance_handler, }; diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index fec0cdbd2477..2868d98ed3e2 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -23,14 +23,22 @@ static int devfreq_powersave_func(struct devfreq *df, return 0; } -static int powersave_init(struct devfreq *devfreq) +static int devfreq_powersave_handler(struct devfreq *devfreq, + unsigned int event, void *data) { - return update_devfreq(devfreq); + int ret = 0; + + if (event == DEVFREQ_GOV_START) { + mutex_lock(&devfreq->lock); + ret = update_devfreq(devfreq); + mutex_unlock(&devfreq->lock); + } + + return ret; } const struct devfreq_governor devfreq_powersave = { .name = "powersave", - .init = powersave_init, .get_target_freq = devfreq_powersave_func, - .no_central_polling = true, + .event_handler = devfreq_powersave_handler, }; diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index a2e3eae79011..3716a659122b 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -12,6 +12,7 @@ #include #include #include +#include "governor.h" /* Default constants for DevFreq-Simple-Ondemand (DFSO) */ #define DFSO_UPTHRESHOLD (90) @@ -88,7 +89,30 @@ static int devfreq_simple_ondemand_func(struct devfreq *df, return 0; } +static int devfreq_simple_ondemand_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + switch (event) { + case DEVFREQ_GOV_START: + devfreq_monitor_start(devfreq); + break; + + case DEVFREQ_GOV_STOP: + devfreq_monitor_stop(devfreq); + break; + + case DEVFREQ_GOV_INTERVAL: + devfreq_interval_update(devfreq, (unsigned int *)data); + break; + default: + break; + } + + return 0; +} + const struct devfreq_governor devfreq_simple_ondemand = { .name = "simple_ondemand", .get_target_freq = devfreq_simple_ondemand_func, + .event_handler = devfreq_simple_ondemand_handler, }; diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index 0681246fc89d..7067555bd444 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -116,10 +116,27 @@ static void userspace_exit(struct devfreq *devfreq) devfreq->data = NULL; } +static int devfreq_userspace_handler(struct devfreq *devfreq, + unsigned int event, void *data) +{ + int ret = 0; + + switch (event) { + case DEVFREQ_GOV_START: + ret = userspace_init(devfreq); + break; + case DEVFREQ_GOV_STOP: + userspace_exit(devfreq); + break; + default: + break; + } + + return ret; +} + const struct devfreq_governor devfreq_userspace = { .name = "userspace", .get_target_freq = devfreq_userspace_func, - .init = userspace_init, - .exit = userspace_exit, - .no_central_polling = true, + .event_handler = devfreq_userspace_handler, }; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 281c72a3b9d5..9cdffde74bb5 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -91,25 +91,18 @@ struct devfreq_dev_profile { * status of the device (load = busy_time / total_time). * If no_central_polling is set, this callback is called * only with update_devfreq() notified by OPP. - * @init Called when the devfreq is being attached to a device - * @exit Called when the devfreq is being removed from a - * device. Governor should stop any internal routines - * before return because related data may be - * freed after exit(). - * @no_central_polling Do not use devfreq's central polling mechanism. - * When this is set, devfreq will not call - * get_target_freq with devfreq_monitor(). However, - * devfreq will call get_target_freq with - * devfreq_update() notified by OPP framework. + * @event_handler Callback for devfreq core framework to notify events + * to governors. Events include per device governor + * init and exit, opp changes out of devfreq, suspend + * and resume of per device devfreq during device idle. * * Note that the callbacks are called with devfreq->lock locked by devfreq. */ struct devfreq_governor { const char name[DEVFREQ_NAME_LEN]; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); - int (*init)(struct devfreq *this); - void (*exit)(struct devfreq *this); - const bool no_central_polling; + int (*event_handler)(struct devfreq *devfreq, + unsigned int event, void *data); }; /** @@ -124,18 +117,13 @@ struct devfreq_governor { * @nb notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. - * @polling_jiffies interval in jiffies. + * @work delayed work for load monitoring. * @previous_freq previously configured frequency value. - * @next_polling the number of remaining jiffies to poll with - * "devfreq_monitor" executions to reevaluate - * frequency/voltage of the device. Set by - * profile's polling_ms interval. * @data Private data of the governor. The devfreq framework does not * touch this. - * @being_removed a flag to mark that this object is being removed in - * order to prevent trying to remove the object multiple times. * @min_freq Limit minimum frequency requested by user (0: none) * @max_freq Limit maximum frequency requested by user (0: none) + * @stop_polling devfreq polling status of a device. * * This structure stores the devfreq information for a give device. * @@ -153,17 +141,15 @@ struct devfreq { struct devfreq_dev_profile *profile; const struct devfreq_governor *governor; struct notifier_block nb; + struct delayed_work work; - unsigned long polling_jiffies; unsigned long previous_freq; - unsigned int next_polling; void *data; /* private data for governors */ - bool being_removed; - unsigned long min_freq; unsigned long max_freq; + bool stop_polling; }; #if defined(CONFIG_PM_DEVFREQ) -- cgit v1.2.3 From 206c30cfeb7c05dfb9fdfd81b1deb933627e43c1 Mon Sep 17 00:00:00 2001 From: Rajagopal Venkat Date: Fri, 26 Oct 2012 01:50:18 +0200 Subject: PM / devfreq: Add suspend and resume apis Add devfreq suspend/resume apis for devfreq users. This patch supports suspend and resume of devfreq load monitoring, required for devices which can idle. Signed-off-by: Rajagopal Venkat Acked-by: MyungJoo Ham Signed-off-by: Rafael J. Wysocki --- drivers/devfreq/devfreq.c | 28 ++++++++++++++++++++++++++++ drivers/devfreq/governor.h | 2 ++ drivers/devfreq/governor_simpleondemand.c | 9 +++++++++ include/linux/devfreq.h | 12 ++++++++++++ 4 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 1aaf1aeb1f1d..999600da21c7 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -427,6 +427,34 @@ int devfreq_remove_device(struct devfreq *devfreq) } EXPORT_SYMBOL(devfreq_remove_device); +/** + * devfreq_suspend_device() - Suspend devfreq of a device. + * @devfreq: the devfreq instance to be suspended + */ +int devfreq_suspend_device(struct devfreq *devfreq) +{ + if (!devfreq) + return -EINVAL; + + return devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_SUSPEND, NULL); +} +EXPORT_SYMBOL(devfreq_suspend_device); + +/** + * devfreq_resume_device() - Resume devfreq of a device. + * @devfreq: the devfreq instance to be resumed + */ +int devfreq_resume_device(struct devfreq *devfreq) +{ + if (!devfreq) + return -EINVAL; + + return devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_RESUME, NULL); +} +EXPORT_SYMBOL(devfreq_resume_device); + static ssize_t show_governor(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index bb3aff32d627..26432ac0a398 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -22,6 +22,8 @@ #define DEVFREQ_GOV_START 0x1 #define DEVFREQ_GOV_STOP 0x2 #define DEVFREQ_GOV_INTERVAL 0x3 +#define DEVFREQ_GOV_SUSPEND 0x4 +#define DEVFREQ_GOV_RESUME 0x5 /* Caution: devfreq->lock must be locked before calling update_devfreq */ extern int update_devfreq(struct devfreq *devfreq); diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index 3716a659122b..b5cf0fb24efe 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -104,6 +104,15 @@ static int devfreq_simple_ondemand_handler(struct devfreq *devfreq, case DEVFREQ_GOV_INTERVAL: devfreq_interval_update(devfreq, (unsigned int *)data); break; + + case DEVFREQ_GOV_SUSPEND: + devfreq_monitor_suspend(devfreq); + break; + + case DEVFREQ_GOV_RESUME: + devfreq_monitor_resume(devfreq); + break; + default: break; } diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 9cdffde74bb5..ee243a3229b8 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -158,6 +158,8 @@ extern struct devfreq *devfreq_add_device(struct device *dev, const struct devfreq_governor *governor, void *data); extern int devfreq_remove_device(struct devfreq *devfreq); +extern int devfreq_suspend_device(struct devfreq *devfreq); +extern int devfreq_resume_device(struct devfreq *devfreq); /* Helper functions for devfreq user device driver with OPP. */ extern struct opp *devfreq_recommended_opp(struct device *dev, @@ -211,6 +213,16 @@ static int devfreq_remove_device(struct devfreq *devfreq) return 0; } +static int devfreq_suspend_device(struct devfreq *devfreq) +{ + return 0; +} + +static int devfreq_resume_device(struct devfreq *devfreq) +{ + return 0; +} + static struct opp *devfreq_recommended_opp(struct device *dev, unsigned long *freq, u32 flags) { -- cgit v1.2.3 From 7f98a905dca6e4f144cdd4462edeac00c2bdc379 Mon Sep 17 00:00:00 2001 From: Rajagopal Venkat Date: Fri, 26 Oct 2012 01:50:26 +0200 Subject: PM / devfreq: Add current freq callback in device profile Devfreq returns governor predicted frequency as current frequency via sysfs interface. But device may not support all frequencies that governor predicts. So add a callback in device profile to get current freq from driver. Also add a new sysfs node to expose governor predicted next target frequency. Signed-off-by: Rajagopal Venkat Acked-by: MyungJoo Ham Signed-off-by: Rafael J. Wysocki --- Documentation/ABI/testing/sysfs-class-devfreq | 11 ++++++++++- drivers/devfreq/devfreq.c | 14 ++++++++++++++ include/linux/devfreq.h | 3 +++ 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index 89283b1b0240..e6cf08e6734d 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -19,7 +19,16 @@ Date: September 2011 Contact: MyungJoo Ham Description: The /sys/class/devfreq/.../cur_freq shows the current - frequency of the corresponding devfreq object. + frequency of the corresponding devfreq object. Same as + target_freq when get_cur_freq() is not implemented by + devfreq driver. + +What: /sys/class/devfreq/.../target_freq +Date: September 2012 +Contact: Rajagopal Venkat +Description: + The /sys/class/devfreq/.../target_freq shows the next governor + predicted target frequency of the corresponding devfreq object. What: /sys/class/devfreq/.../polling_interval Date: September 2011 diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 999600da21c7..f2f8a976c465 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -463,6 +463,19 @@ static ssize_t show_governor(struct device *dev, static ssize_t show_freq(struct device *dev, struct device_attribute *attr, char *buf) +{ + unsigned long freq; + struct devfreq *devfreq = to_devfreq(dev); + + if (devfreq->profile->get_cur_freq && + !devfreq->profile->get_cur_freq(devfreq->dev.parent, &freq)) + return sprintf(buf, "%lu\n", freq); + + return sprintf(buf, "%lu\n", devfreq->previous_freq); +} + +static ssize_t show_target_freq(struct device *dev, + struct device_attribute *attr, char *buf) { return sprintf(buf, "%lu\n", to_devfreq(dev)->previous_freq); } @@ -563,6 +576,7 @@ static ssize_t show_max_freq(struct device *dev, struct device_attribute *attr, static struct device_attribute devfreq_attrs[] = { __ATTR(governor, S_IRUGO, show_governor, NULL), __ATTR(cur_freq, S_IRUGO, show_freq, NULL), + __ATTR(target_freq, S_IRUGO, show_target_freq, NULL), __ATTR(polling_interval, S_IRUGO | S_IWUSR, show_polling_interval, store_polling_interval), __ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq), diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index ee243a3229b8..7e2e2ea4a70f 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -66,6 +66,8 @@ struct devfreq_dev_status { * explained above with "DEVFREQ_FLAG_*" macros. * @get_dev_status The device should provide the current performance * status to devfreq, which is used by governors. + * @get_cur_freq The device should provide the current frequency + * at which it is operating. * @exit An optional callback that is called when devfreq * is removing the devfreq object due to error or * from devfreq_remove_device() call. If the user @@ -79,6 +81,7 @@ struct devfreq_dev_profile { int (*target)(struct device *dev, unsigned long *freq, u32 flags); int (*get_dev_status)(struct device *dev, struct devfreq_dev_status *stat); + int (*get_cur_freq)(struct device *dev, unsigned long *freq); void (*exit)(struct device *dev); }; -- cgit v1.2.3 From 5133375bb46a0d6c3fba07097caed7aa5e629ccd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 15 Nov 2012 13:15:37 +0100 Subject: ACPI / PM: Fix build problem when CONFIG_ACPI or CONFIG_PM is not set Commit e5cc8ef (ACPI / PM: Provide ACPI PM callback routines for subsystems) introduced a build problem occuring if CONFIG_ACPI is unset or CONFIG_PM is unset and errno.h is not included before acpi.h, because in that case ENODEV used in acpi.h is undefined. Fix the issue by making acpi.h include errno.h. Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0676b6ac57fa..5fdd87271518 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -25,6 +25,7 @@ #ifndef _LINUX_ACPI_H #define _LINUX_ACPI_H +#include #include /* for struct resource */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From e09651fcc295a7dc802f38d9494f5b860dd90bca Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 29 Oct 2012 08:02:23 -0500 Subject: PM / devfreq: documentation cleanups for devfreq header struct parameters need to have ':' in documentation for scripts/kernel-doc to parse appropriately. Fix the errors reported by: ./scripts/kernel-doc include/linux/devfreq.h >/dev/null Cc: Rajagopal Venkat Cc: MyungJoo Ham Cc: Kyungmin Park Cc: "Rafael J. Wysocki" Cc: Kevin Hilman Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Acked-by: Randy Dunlap Acked-by: MyungJoo Ham Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 54 ++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 7e2e2ea4a70f..1461fb2355ad 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -25,12 +25,12 @@ struct devfreq; * struct devfreq_dev_status - Data given from devfreq user device to * governors. Represents the performance * statistics. - * @total_time The total time represented by this instance of + * @total_time: The total time represented by this instance of * devfreq_dev_status - * @busy_time The time that the device was working among the + * @busy_time: The time that the device was working among the * total_time. - * @current_frequency The operating frequency. - * @private_data An entry not specified by the devfreq framework. + * @current_frequency: The operating frequency. + * @private_data: An entry not specified by the devfreq framework. * A device and a specific governor may have their * own protocol with private_data. However, because * this is governor-specific, a governor using this @@ -54,21 +54,21 @@ struct devfreq_dev_status { /** * struct devfreq_dev_profile - Devfreq's user device profile - * @initial_freq The operating frequency when devfreq_add_device() is + * @initial_freq: The operating frequency when devfreq_add_device() is * called. - * @polling_ms The polling interval in ms. 0 disables polling. - * @target The device should set its operating frequency at + * @polling_ms: The polling interval in ms. 0 disables polling. + * @target: The device should set its operating frequency at * freq or lowest-upper-than-freq value. If freq is * higher than any operable frequency, set maximum. * Before returning, target function should set * freq at the current frequency. * The "flags" parameter's possible values are * explained above with "DEVFREQ_FLAG_*" macros. - * @get_dev_status The device should provide the current performance + * @get_dev_status: The device should provide the current performance * status to devfreq, which is used by governors. - * @get_cur_freq The device should provide the current frequency + * @get_cur_freq: The device should provide the current frequency * at which it is operating. - * @exit An optional callback that is called when devfreq + * @exit: An optional callback that is called when devfreq * is removing the devfreq object due to error or * from devfreq_remove_device() call. If the user * has registered devfreq->nb at a notifier-head, @@ -87,14 +87,14 @@ struct devfreq_dev_profile { /** * struct devfreq_governor - Devfreq policy governor - * @name Governor's name - * @get_target_freq Returns desired operating frequency for the device. + * @name: Governor's name + * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run * devfreq_dev_profile.get_dev_status() to get the * status of the device (load = busy_time / total_time). * If no_central_polling is set, this callback is called * only with update_devfreq() notified by OPP. - * @event_handler Callback for devfreq core framework to notify events + * @event_handler: Callback for devfreq core framework to notify events * to governors. Events include per device governor * init and exit, opp changes out of devfreq, suspend * and resume of per device devfreq during device idle. @@ -110,23 +110,23 @@ struct devfreq_governor { /** * struct devfreq - Device devfreq structure - * @node list node - contains the devices with devfreq that have been + * @node: list node - contains the devices with devfreq that have been * registered. - * @lock a mutex to protect accessing devfreq. - * @dev device registered by devfreq class. dev.parent is the device + * @lock: a mutex to protect accessing devfreq. + * @dev: device registered by devfreq class. dev.parent is the device * using devfreq. - * @profile device-specific devfreq profile - * @governor method how to choose frequency based on the usage. - * @nb notifier block used to notify devfreq object that it should + * @profile: device-specific devfreq profile + * @governor: method how to choose frequency based on the usage. + * @nb: notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. - * @work delayed work for load monitoring. - * @previous_freq previously configured frequency value. - * @data Private data of the governor. The devfreq framework does not + * @work: delayed work for load monitoring. + * @previous_freq: previously configured frequency value. + * @data: Private data of the governor. The devfreq framework does not * touch this. - * @min_freq Limit minimum frequency requested by user (0: none) - * @max_freq Limit maximum frequency requested by user (0: none) - * @stop_polling devfreq polling status of a device. + * @min_freq: Limit minimum frequency requested by user (0: none) + * @max_freq: Limit maximum frequency requested by user (0: none) + * @stop_polling: devfreq polling status of a device. * * This structure stores the devfreq information for a give device. * @@ -186,9 +186,9 @@ extern const struct devfreq_governor devfreq_simple_ondemand; /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq * and devfreq_add_device - * @ upthreshold If the load is over this value, the frequency jumps. + * @upthreshold: If the load is over this value, the frequency jumps. * Specify 0 to use the default. Valid value = 0 to 100. - * @ downdifferential If the load is under upthreshold - downdifferential, + * @downdifferential: If the load is under upthreshold - downdifferential, * the governor may consider slowing the frequency down. * Specify 0 to use the default. Valid value = 0 to 100. * downdifferential < upthreshold must hold. -- cgit v1.2.3 From e552bbaf5b987f57c43e6981a452b8a3c700b1ae Mon Sep 17 00:00:00 2001 From: Jonghwa Lee Date: Thu, 23 Aug 2012 20:00:46 +0900 Subject: PM / devfreq: Add sysfs node for representing frequency transition information. This patch adds sysfs node which can be used to get information of frequency transition. It represents transition table which contains total number of transition of each freqeuncy state and time spent. It is inspired CPUFREQ's status driver. Signed-off-by: Jonghwa Lee [Added Documentation/ABI entry, updated kernel-doc, and resolved merge conflict] Signed-off-by: MyungJoo Ham --- Documentation/ABI/testing/sysfs-class-devfreq | 11 +++ drivers/devfreq/devfreq.c | 101 ++++++++++++++++++++++++++ include/linux/devfreq.h | 15 ++++ 3 files changed, 127 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-devfreq b/Documentation/ABI/testing/sysfs-class-devfreq index e672ccb02e7f..40f98a9428dc 100644 --- a/Documentation/ABI/testing/sysfs-class-devfreq +++ b/Documentation/ABI/testing/sysfs-class-devfreq @@ -44,6 +44,17 @@ Description: (/sys/class/devfreq/.../central_polling is 0), this value may be useless. +What: /sys/class/devfreq/.../trans_stat +Date: October 2012 +Contact: MyungJoo Ham +Descrtiption: + This ABI shows the statistics of devfreq behavior on a + specific device. It shows the time spent in each state and + the number of transitions between states. + In order to activate this ABI, the devfreq target device + driver should provide the list of available frequencies + with its profile. + What: /sys/class/devfreq/.../userspace/set_freq Date: September 2011 Contact: MyungJoo Ham diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index c44e562bdfe0..bf6de38190cf 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -66,6 +66,51 @@ static struct devfreq *find_device_devfreq(struct device *dev) return ERR_PTR(-ENODEV); } +/** + * devfreq_get_freq_level() - Lookup freq_table for the frequency + * @devfreq: the devfreq instance + * @freq: the target frequency + */ +static int devfreq_get_freq_level(struct devfreq *devfreq, unsigned long freq) +{ + int lev; + + for (lev = 0; lev < devfreq->profile->max_state; lev++) + if (freq == devfreq->profile->freq_table[lev]) + return lev; + + return -EINVAL; +} + +/** + * devfreq_update_status() - Update statistics of devfreq behavior + * @devfreq: the devfreq instance + * @freq: the update target frequency + */ +static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) +{ + int lev, prev_lev; + unsigned long cur_time; + + lev = devfreq_get_freq_level(devfreq, freq); + if (lev < 0) + return lev; + + cur_time = jiffies; + devfreq->time_in_state[lev] += + cur_time - devfreq->last_stat_updated; + if (freq != devfreq->previous_freq) { + prev_lev = devfreq_get_freq_level(devfreq, + devfreq->previous_freq); + devfreq->trans_table[(prev_lev * + devfreq->profile->max_state) + lev]++; + devfreq->total_trans++; + } + devfreq->last_stat_updated = cur_time; + + return 0; +} + /* Load monitoring helper functions for governors use */ /** @@ -112,6 +157,11 @@ int update_devfreq(struct devfreq *devfreq) if (err) return err; + if (devfreq->profile->freq_table) + if (devfreq_update_status(devfreq, freq)) + dev_err(&devfreq->dev, + "Couldn't update frequency transition information.\n"); + devfreq->previous_freq = freq; return err; } @@ -378,6 +428,15 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->data = data; devfreq->nb.notifier_call = devfreq_notifier_call; + devfreq->trans_table = devm_kzalloc(dev, sizeof(unsigned int) * + devfreq->profile->max_state * + devfreq->profile->max_state, + GFP_KERNEL); + devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned int) * + devfreq->profile->max_state, + GFP_KERNEL); + devfreq->last_stat_updated = jiffies; + dev_set_name(&devfreq->dev, dev_name(dev)); err = device_register(&devfreq->dev); if (err) { @@ -601,6 +660,47 @@ static ssize_t show_available_freqs(struct device *d, return count; } +static ssize_t show_trans_table(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct devfreq *devfreq = to_devfreq(dev); + ssize_t len; + int i, j, err; + unsigned int max_state = devfreq->profile->max_state; + + err = devfreq_update_status(devfreq, devfreq->previous_freq); + if (err) + return 0; + + len = sprintf(buf, " From : To\n"); + len += sprintf(buf + len, " :"); + for (i = 0; i < max_state; i++) + len += sprintf(buf + len, "%8u", + devfreq->profile->freq_table[i]); + + len += sprintf(buf + len, " time(ms)\n"); + + for (i = 0; i < max_state; i++) { + if (devfreq->profile->freq_table[i] + == devfreq->previous_freq) { + len += sprintf(buf + len, "*"); + } else { + len += sprintf(buf + len, " "); + } + len += sprintf(buf + len, "%8u:", + devfreq->profile->freq_table[i]); + for (j = 0; j < max_state; j++) + len += sprintf(buf + len, "%8u", + devfreq->trans_table[(i * max_state) + j]); + len += sprintf(buf + len, "%10u\n", + jiffies_to_msecs(devfreq->time_in_state[i])); + } + + len += sprintf(buf + len, "Total transition : %u\n", + devfreq->total_trans); + return len; +} + static struct device_attribute devfreq_attrs[] = { __ATTR(governor, S_IRUGO, show_governor, NULL), __ATTR(cur_freq, S_IRUGO, show_freq, NULL), @@ -610,6 +710,7 @@ static struct device_attribute devfreq_attrs[] = { store_polling_interval), __ATTR(min_freq, S_IRUGO | S_IWUSR, show_min_freq, store_min_freq), __ATTR(max_freq, S_IRUGO | S_IWUSR, show_max_freq, store_max_freq), + __ATTR(trans_stat, S_IRUGO, show_trans_table, NULL), { }, }; diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 1461fb2355ad..bc35c4aee6a3 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -73,6 +73,8 @@ struct devfreq_dev_status { * from devfreq_remove_device() call. If the user * has registered devfreq->nb at a notifier-head, * this is the time to unregister it. + * @freq_table: Optional list of frequencies to support statistics. + * @max_state: The size of freq_table. */ struct devfreq_dev_profile { unsigned long initial_freq; @@ -83,6 +85,9 @@ struct devfreq_dev_profile { struct devfreq_dev_status *stat); int (*get_cur_freq)(struct device *dev, unsigned long *freq); void (*exit)(struct device *dev); + + unsigned int *freq_table; + unsigned int max_state; }; /** @@ -127,6 +132,10 @@ struct devfreq_governor { * @min_freq: Limit minimum frequency requested by user (0: none) * @max_freq: Limit maximum frequency requested by user (0: none) * @stop_polling: devfreq polling status of a device. + * @total_trans: Number of devfreq transitions + * @trans_table: Statistics of devfreq transitions + * @time_in_state: Statistics of devfreq states + * @last_stat_updated: The last time stat updated * * This structure stores the devfreq information for a give device. * @@ -153,6 +162,12 @@ struct devfreq { unsigned long min_freq; unsigned long max_freq; bool stop_polling; + + /* information for device freqeuncy transition */ + unsigned int total_trans; + unsigned int *trans_table; + unsigned long *time_in_state; + unsigned long last_stat_updated; }; #if defined(CONFIG_PM_DEVFREQ) -- cgit v1.2.3 From 3aa173b8db200bb96354481acc0a5b9e123119fe Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 29 Oct 2012 15:01:43 -0500 Subject: PM / devfreq: provide hooks for governors to be registered Add devfreq_add_governor and devfreq_remove_governor which can be invoked by governors to register with devfreq. This sets up the stage to dynamically switch governors and allow governors to be dynamically loaded as well. Cc: Rajagopal Venkat Cc: MyungJoo Ham Cc: Kyungmin Park Cc: "Rafael J. Wysocki" Cc: Kevin Hilman Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon Acked-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++ drivers/devfreq/governor.h | 4 ++ include/linux/devfreq.h | 3 ++ 3 files changed, 98 insertions(+) (limited to 'include/linux') diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index e0002c5cbadc..679ac424472f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -36,6 +36,8 @@ static struct class *devfreq_class; */ static struct workqueue_struct *devfreq_wq; +/* The list of all device-devfreq governors */ +static LIST_HEAD(devfreq_governor_list); /* The list of all device-devfreq */ static LIST_HEAD(devfreq_list); static DEFINE_MUTEX(devfreq_list_lock); @@ -111,6 +113,32 @@ static int devfreq_update_status(struct devfreq *devfreq, unsigned long freq) return 0; } +/** + * find_devfreq_governor() - find devfreq governor from name + * @name: name of the governor + * + * Search the list of devfreq governors and return the matched + * governor's pointer. devfreq_list_lock should be held by the caller. + */ +static struct devfreq_governor *find_devfreq_governor(const char *name) +{ + struct devfreq_governor *tmp_governor; + + if (unlikely(IS_ERR_OR_NULL(name))) { + pr_err("DEVFREQ: %s: Invalid parameters\n", __func__); + return ERR_PTR(-EINVAL); + } + WARN(!mutex_is_locked(&devfreq_list_lock), + "devfreq_list_lock must be locked."); + + list_for_each_entry(tmp_governor, &devfreq_governor_list, node) { + if (!strncmp(tmp_governor->name, name, DEVFREQ_NAME_LEN)) + return tmp_governor; + } + + return ERR_PTR(-ENODEV); +} + /* Load monitoring helper functions for governors use */ /** @@ -515,6 +543,69 @@ int devfreq_resume_device(struct devfreq *devfreq) } EXPORT_SYMBOL(devfreq_resume_device); +/** + * devfreq_add_governor() - Add devfreq governor + * @governor: the devfreq governor to be added + */ +int devfreq_add_governor(struct devfreq_governor *governor) +{ + struct devfreq_governor *g; + int err = 0; + + if (!governor) { + pr_err("%s: Invalid parameters.\n", __func__); + return -EINVAL; + } + + mutex_lock(&devfreq_list_lock); + g = find_devfreq_governor(governor->name); + if (!IS_ERR(g)) { + pr_err("%s: governor %s already registered\n", __func__, + g->name); + err = -EINVAL; + goto err_out; + } + + list_add(&governor->node, &devfreq_governor_list); + +err_out: + mutex_unlock(&devfreq_list_lock); + + return err; +} +EXPORT_SYMBOL(devfreq_add_governor); + +/** + * devfreq_remove_device() - Remove devfreq feature from a device. + * @governor: the devfreq governor to be removed + */ +int devfreq_remove_governor(struct devfreq_governor *governor) +{ + struct devfreq_governor *g; + int err = 0; + + if (!governor) { + pr_err("%s: Invalid parameters.\n", __func__); + return -EINVAL; + } + + mutex_lock(&devfreq_list_lock); + g = find_devfreq_governor(governor->name); + if (IS_ERR(g)) { + pr_err("%s: governor %s not registered\n", __func__, + g->name); + err = -EINVAL; + goto err_out; + } + + list_del(&governor->node); +err_out: + mutex_unlock(&devfreq_list_lock); + + return err; +} +EXPORT_SYMBOL(devfreq_remove_governor); + static ssize_t show_governor(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h index 26432ac0a398..fad7d6321978 100644 --- a/drivers/devfreq/governor.h +++ b/drivers/devfreq/governor.h @@ -34,4 +34,8 @@ extern void devfreq_monitor_suspend(struct devfreq *devfreq); extern void devfreq_monitor_resume(struct devfreq *devfreq); extern void devfreq_interval_update(struct devfreq *devfreq, unsigned int *delay); + +extern int devfreq_add_governor(struct devfreq_governor *governor); +extern int devfreq_remove_governor(struct devfreq_governor *governor); + #endif /* _GOVERNOR_H */ diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index bc35c4aee6a3..6484a3f8dda8 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -92,6 +92,7 @@ struct devfreq_dev_profile { /** * struct devfreq_governor - Devfreq policy governor + * @node: list node - contains registered devfreq governors * @name: Governor's name * @get_target_freq: Returns desired operating frequency for the device. * Basically, get_target_freq will run @@ -107,6 +108,8 @@ struct devfreq_dev_profile { * Note that the callbacks are called with devfreq->lock locked by devfreq. */ struct devfreq_governor { + struct list_head node; + const char name[DEVFREQ_NAME_LEN]; int (*get_target_freq)(struct devfreq *this, unsigned long *freq); int (*event_handler)(struct devfreq *devfreq, -- cgit v1.2.3 From 1b5c1be2c88e8445a20fa1929e26c37e7ca8c926 Mon Sep 17 00:00:00 2001 From: Nishanth Menon Date: Mon, 29 Oct 2012 15:01:45 -0500 Subject: PM / devfreq: map devfreq drivers to governor using name Allow devfreq drivers to register a preferred governor name and when the devfreq governor loads itself at a later point required drivers are managed appropriately, at the time of unload of a devfreq governor, stop managing those drivers as well. Since the governor structures do not need to be exposed anymore, remove the definitions and make them static NOTE: devfreq_list_lock is now used to protect governor start and stop - as this allows us to protect governors and devfreq with the proper dependencies as needed. As part of this change, change the registration of exynos bus driver to request for ondemand using the governor name. Cc: Rajagopal Venkat Cc: MyungJoo Ham Cc: Kyungmin Park Cc: "Rafael J. Wysocki" Cc: Kevin Hilman Cc: linux-pm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Nishanth Menon [Merge conflict resolved by MyungJoo Ham] Signed-off-by: MyungJoo Ham --- drivers/devfreq/devfreq.c | 95 ++++++++++++++++++++++++++++--- drivers/devfreq/exynos4_bus.c | 2 +- drivers/devfreq/governor_performance.c | 2 +- drivers/devfreq/governor_powersave.c | 2 +- drivers/devfreq/governor_simpleondemand.c | 2 +- drivers/devfreq/governor_userspace.c | 2 +- include/linux/devfreq.h | 21 ++----- 7 files changed, 96 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 679ac424472f..0d7be03d561f 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -159,6 +159,9 @@ int update_devfreq(struct devfreq *devfreq) return -EINVAL; } + if (!devfreq->governor) + return -EINVAL; + /* Reevaluate the proper frequency */ err = devfreq->governor->get_target_freq(devfreq, &freq); if (err) @@ -379,7 +382,9 @@ static void _remove_devfreq(struct devfreq *devfreq, bool skip) list_del(&devfreq->node); mutex_unlock(&devfreq_list_lock); - devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_STOP, NULL); + if (devfreq->governor) + devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_STOP, NULL); if (devfreq->profile->exit) devfreq->profile->exit(devfreq->dev.parent); @@ -412,19 +417,20 @@ static void devfreq_dev_release(struct device *dev) * devfreq_add_device() - Add devfreq feature to the device * @dev: the device to add devfreq feature. * @profile: device-specific profile to run devfreq. - * @governor: the policy to choose frequency. + * @governor_name: name of the policy to choose frequency. * @data: private data for the governor. The devfreq framework does not * touch this value. */ struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, - const struct devfreq_governor *governor, + const char *governor_name, void *data) { struct devfreq *devfreq; + struct devfreq_governor *governor; int err = 0; - if (!dev || !profile || !governor) { + if (!dev || !profile || !governor_name) { dev_err(dev, "%s: Invalid parameters.\n", __func__); return ERR_PTR(-EINVAL); } @@ -452,7 +458,7 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->dev.class = devfreq_class; devfreq->dev.release = devfreq_dev_release; devfreq->profile = profile; - devfreq->governor = governor; + strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN); devfreq->previous_freq = profile->initial_freq; devfreq->data = data; devfreq->nb.notifier_call = devfreq_notifier_call; @@ -478,10 +484,14 @@ struct devfreq *devfreq_add_device(struct device *dev, mutex_lock(&devfreq_list_lock); list_add(&devfreq->node, &devfreq_list); - mutex_unlock(&devfreq_list_lock); - err = devfreq->governor->event_handler(devfreq, - DEVFREQ_GOV_START, NULL); + governor = find_devfreq_governor(devfreq->governor_name); + if (!IS_ERR(governor)) + devfreq->governor = governor; + if (devfreq->governor) + err = devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_START, NULL); + mutex_unlock(&devfreq_list_lock); if (err) { dev_err(dev, "%s: Unable to start governor for the device\n", __func__); @@ -524,6 +534,9 @@ int devfreq_suspend_device(struct devfreq *devfreq) if (!devfreq) return -EINVAL; + if (!devfreq->governor) + return 0; + return devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_SUSPEND, NULL); } @@ -538,6 +551,9 @@ int devfreq_resume_device(struct devfreq *devfreq) if (!devfreq) return -EINVAL; + if (!devfreq->governor) + return 0; + return devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_RESUME, NULL); } @@ -550,6 +566,7 @@ EXPORT_SYMBOL(devfreq_resume_device); int devfreq_add_governor(struct devfreq_governor *governor) { struct devfreq_governor *g; + struct devfreq *devfreq; int err = 0; if (!governor) { @@ -568,6 +585,38 @@ int devfreq_add_governor(struct devfreq_governor *governor) list_add(&governor->node, &devfreq_governor_list); + list_for_each_entry(devfreq, &devfreq_list, node) { + int ret = 0; + struct device *dev = devfreq->dev.parent; + + if (!strncmp(devfreq->governor_name, governor->name, + DEVFREQ_NAME_LEN)) { + /* The following should never occur */ + if (devfreq->governor) { + dev_warn(dev, + "%s: Governor %s already present\n", + __func__, devfreq->governor->name); + ret = devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_STOP, NULL); + if (ret) { + dev_warn(dev, + "%s: Governor %s stop = %d\n", + __func__, + devfreq->governor->name, ret); + } + /* Fall through */ + } + devfreq->governor = governor; + ret = devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_START, NULL); + if (ret) { + dev_warn(dev, "%s: Governor %s start=%d\n", + __func__, devfreq->governor->name, + ret); + } + } + } + err_out: mutex_unlock(&devfreq_list_lock); @@ -582,6 +631,7 @@ EXPORT_SYMBOL(devfreq_add_governor); int devfreq_remove_governor(struct devfreq_governor *governor) { struct devfreq_governor *g; + struct devfreq *devfreq; int err = 0; if (!governor) { @@ -597,6 +647,29 @@ int devfreq_remove_governor(struct devfreq_governor *governor) err = -EINVAL; goto err_out; } + list_for_each_entry(devfreq, &devfreq_list, node) { + int ret; + struct device *dev = devfreq->dev.parent; + + if (!strncmp(devfreq->governor_name, governor->name, + DEVFREQ_NAME_LEN)) { + /* we should have a devfreq governor! */ + if (!devfreq->governor) { + dev_warn(dev, "%s: Governor %s NOT present\n", + __func__, governor->name); + continue; + /* Fall through */ + } + ret = devfreq->governor->event_handler(devfreq, + DEVFREQ_GOV_STOP, NULL); + if (ret) { + dev_warn(dev, "%s: Governor %s stop=%d\n", + __func__, devfreq->governor->name, + ret); + } + devfreq->governor = NULL; + } + } list_del(&governor->node); err_out: @@ -609,6 +682,9 @@ EXPORT_SYMBOL(devfreq_remove_governor); static ssize_t show_governor(struct device *dev, struct device_attribute *attr, char *buf) { + if (!to_devfreq(dev)->governor) + return -EINVAL; + return sprintf(buf, "%s\n", to_devfreq(dev)->governor->name); } @@ -645,6 +721,9 @@ static ssize_t store_polling_interval(struct device *dev, unsigned int value; int ret; + if (!df->governor) + return -EINVAL; + ret = sscanf(buf, "%u", &value); if (ret != 1) return -EINVAL; diff --git a/drivers/devfreq/exynos4_bus.c b/drivers/devfreq/exynos4_bus.c index 68145316c49c..b8ac28497b37 100644 --- a/drivers/devfreq/exynos4_bus.c +++ b/drivers/devfreq/exynos4_bus.c @@ -1040,7 +1040,7 @@ static __devinit int exynos4_busfreq_probe(struct platform_device *pdev) busfreq_mon_reset(data); data->devfreq = devfreq_add_device(dev, &exynos4_devfreq_profile, - &devfreq_simple_ondemand, NULL); + "simple_ondemand", NULL); if (IS_ERR(data->devfreq)) return PTR_ERR(data->devfreq); diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c index db8ff77dbed2..865a36956917 100644 --- a/drivers/devfreq/governor_performance.c +++ b/drivers/devfreq/governor_performance.c @@ -40,7 +40,7 @@ static int devfreq_performance_handler(struct devfreq *devfreq, return ret; } -const struct devfreq_governor devfreq_performance = { +static struct devfreq_governor devfreq_performance = { .name = "performance", .get_target_freq = devfreq_performance_func, .event_handler = devfreq_performance_handler, diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c index 30f0fca8d635..8612c0f96b79 100644 --- a/drivers/devfreq/governor_powersave.c +++ b/drivers/devfreq/governor_powersave.c @@ -37,7 +37,7 @@ static int devfreq_powersave_handler(struct devfreq *devfreq, return ret; } -const struct devfreq_governor devfreq_powersave = { +static struct devfreq_governor devfreq_powersave = { .name = "powersave", .get_target_freq = devfreq_powersave_func, .event_handler = devfreq_powersave_handler, diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c index 85f9ed531b1e..a870a24bb56b 100644 --- a/drivers/devfreq/governor_simpleondemand.c +++ b/drivers/devfreq/governor_simpleondemand.c @@ -120,7 +120,7 @@ static int devfreq_simple_ondemand_handler(struct devfreq *devfreq, return 0; } -const struct devfreq_governor devfreq_simple_ondemand = { +static struct devfreq_governor devfreq_simple_ondemand = { .name = "simple_ondemand", .get_target_freq = devfreq_simple_ondemand_func, .event_handler = devfreq_simple_ondemand_handler, diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c index 110f178fec04..34fb80f50cf6 100644 --- a/drivers/devfreq/governor_userspace.c +++ b/drivers/devfreq/governor_userspace.c @@ -135,7 +135,7 @@ static int devfreq_userspace_handler(struct devfreq *devfreq, return ret; } -const struct devfreq_governor devfreq_userspace = { +static struct devfreq_governor devfreq_userspace = { .name = "userspace", .get_target_freq = devfreq_userspace_func, .event_handler = devfreq_userspace_handler, diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 6484a3f8dda8..235248cb2c93 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -125,6 +125,7 @@ struct devfreq_governor { * using devfreq. * @profile: device-specific devfreq profile * @governor: method how to choose frequency based on the usage. + * @governor_name: devfreq governor name for use with this devfreq * @nb: notifier block used to notify devfreq object that it should * reevaluate operable frequencies. Devfreq users may use * devfreq.nb to the corresponding register notifier call chain. @@ -155,6 +156,7 @@ struct devfreq { struct device dev; struct devfreq_dev_profile *profile; const struct devfreq_governor *governor; + char governor_name[DEVFREQ_NAME_LEN]; struct notifier_block nb; struct delayed_work work; @@ -176,7 +178,7 @@ struct devfreq { #if defined(CONFIG_PM_DEVFREQ) extern struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, - const struct devfreq_governor *governor, + const char *governor_name, void *data); extern int devfreq_remove_device(struct devfreq *devfreq); extern int devfreq_suspend_device(struct devfreq *devfreq); @@ -190,17 +192,7 @@ extern int devfreq_register_opp_notifier(struct device *dev, extern int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); -#ifdef CONFIG_DEVFREQ_GOV_POWERSAVE -extern const struct devfreq_governor devfreq_powersave; -#endif -#ifdef CONFIG_DEVFREQ_GOV_PERFORMANCE -extern const struct devfreq_governor devfreq_performance; -#endif -#ifdef CONFIG_DEVFREQ_GOV_USERSPACE -extern const struct devfreq_governor devfreq_userspace; -#endif #ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND -extern const struct devfreq_governor devfreq_simple_ondemand; /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq * and devfreq_add_device @@ -223,7 +215,7 @@ struct devfreq_simple_ondemand_data { #else /* !CONFIG_PM_DEVFREQ */ static struct devfreq *devfreq_add_device(struct device *dev, struct devfreq_dev_profile *profile, - struct devfreq_governor *governor, + const char *governor_name, void *data) { return NULL; @@ -262,11 +254,6 @@ static int devfreq_unregister_opp_notifier(struct device *dev, return -EINVAL; } -#define devfreq_powersave NULL -#define devfreq_performance NULL -#define devfreq_userspace NULL -#define devfreq_simple_ondemand NULL - #endif /* CONFIG_PM_DEVFREQ */ #endif /* __LINUX_DEVFREQ_H__ */ -- cgit v1.2.3 From 95f8a082b9b1ead0c2859f2a7b1ac91ff63d8765 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Nov 2012 00:21:50 +0100 Subject: ACPI / driver core: Introduce struct acpi_dev_node and related macros To avoid adding an ACPI handle pointer to struct device on architectures that don't use ACPI, or generally when CONFIG_ACPI is not set, in which cases that pointer is useless, define struct acpi_dev_node that will contain the handle pointer if CONFIG_ACPI is set and will be empty otherwise and use it to represent the ACPI device node field in struct device. In addition to that define macros for reading and setting the ACPI handle of a device that don't generate code when CONFIG_ACPI is unset. Modify the ACPI subsystem to use those macros instead of referring to the given device's ACPI handle directly. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Greg Kroah-Hartman --- drivers/acpi/glue.c | 16 ++++++++-------- drivers/acpi/scan.c | 4 ++-- include/acpi/acpi_bus.h | 2 +- include/linux/device.h | 18 ++++++++++++++++-- 4 files changed, 27 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index 3e75d6e5a469..01551840d236 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -134,12 +134,12 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) char physical_node_name[sizeof(PHYSICAL_NODE_STRING) + 2]; int retval = -EINVAL; - if (dev->acpi_handle) { + if (ACPI_HANDLE(dev)) { if (handle) { dev_warn(dev, "ACPI handle is already set\n"); return -EINVAL; } else { - handle = dev->acpi_handle; + handle = ACPI_HANDLE(dev); } } if (!handle) @@ -181,8 +181,8 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) mutex_unlock(&acpi_dev->physical_node_lock); - if (!dev->acpi_handle) - dev->acpi_handle = handle; + if (!ACPI_HANDLE(dev)) + ACPI_HANDLE_SET(dev, acpi_dev->handle); if (!physical_node->node_id) strcpy(physical_node_name, PHYSICAL_NODE_STRING); @@ -200,7 +200,7 @@ static int acpi_bind_one(struct device *dev, acpi_handle handle) return 0; err: - dev->acpi_handle = NULL; + ACPI_HANDLE_SET(dev, NULL); put_device(dev); return retval; @@ -217,10 +217,10 @@ static int acpi_unbind_one(struct device *dev) acpi_status status; struct list_head *node, *next; - if (!dev->acpi_handle) + if (!ACPI_HANDLE(dev)) return 0; - status = acpi_bus_get_device(dev->acpi_handle, &acpi_dev); + status = acpi_bus_get_device(ACPI_HANDLE(dev), &acpi_dev); if (ACPI_FAILURE(status)) goto err; @@ -246,7 +246,7 @@ static int acpi_unbind_one(struct device *dev) sysfs_remove_link(&acpi_dev->dev.kobj, physical_node_name); sysfs_remove_link(&dev->kobj, "firmware_node"); - dev->acpi_handle = NULL; + ACPI_HANDLE_SET(dev, NULL); /* acpi_bind_one increase refcnt by one */ put_device(dev); kfree(entry); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index d842569395a9..e92ca67d0e46 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -386,8 +386,8 @@ const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids, { struct acpi_device *adev; - if (!ids || !dev->acpi_handle - || ACPI_FAILURE(acpi_bus_get_device(dev->acpi_handle, &adev))) + if (!ids || !ACPI_HANDLE(dev) + || ACPI_FAILURE(acpi_bus_get_device(ACPI_HANDLE(dev), &adev))) return NULL; return __acpi_match_device(adev, ids); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index bb1537c5e672..d1659904f2a0 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -410,7 +410,7 @@ acpi_handle acpi_get_child(acpi_handle, u64); int acpi_is_root_bridge(acpi_handle); acpi_handle acpi_get_pci_rootbridge_handle(unsigned int, unsigned int); struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle); -#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)((dev)->acpi_handle)) +#define DEVICE_ACPI_HANDLE(dev) ((acpi_handle)ACPI_HANDLE(dev)) int acpi_enable_wakeup_device_power(struct acpi_device *dev, int state); int acpi_disable_wakeup_device_power(struct acpi_device *dev); diff --git a/include/linux/device.h b/include/linux/device.h index cc3aee57a46e..05292e488346 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -578,6 +578,12 @@ struct device_dma_parameters { unsigned long segment_boundary_mask; }; +struct acpi_dev_node { +#ifdef CONFIG_ACPI + void *handle; +#endif +}; + /** * struct device - The basic device structure * @parent: The device's "parent" device, the device to which it is attached. @@ -618,7 +624,7 @@ struct device_dma_parameters { * @dma_mem: Internal for coherent mem override. * @archdata: For arch-specific additions. * @of_node: Associated device tree node. - * @acpi_handle: Associated ACPI device node's namespace handle. + * @acpi_node: Associated ACPI device node. * @devt: For creating the sysfs "dev". * @id: device instance * @devres_lock: Spinlock to protect the resource of the device. @@ -683,7 +689,7 @@ struct device { struct dev_archdata archdata; struct device_node *of_node; /* associated device tree node */ - void *acpi_handle; /* associated ACPI device node */ + struct acpi_dev_node acpi_node; /* associated ACPI device node */ dev_t devt; /* dev_t, creates the sysfs "dev" */ u32 id; /* device instance */ @@ -704,6 +710,14 @@ static inline struct device *kobj_to_dev(struct kobject *kobj) return container_of(kobj, struct device, kobj); } +#ifdef CONFIG_ACPI +#define ACPI_HANDLE(dev) ((dev)->acpi_node.handle) +#define ACPI_HANDLE_SET(dev, _handle_) (dev)->acpi_node.handle = (_handle_) +#else +#define ACPI_HANDLE(dev) (NULL) +#define ACPI_HANDLE_SET(dev, _handle_) do { } while (0) +#endif + /* Get the wakeup routines, which depend on struct device */ #include -- cgit v1.2.3 From 863f9f30e6c1e30cb19a0cd17c5cf8879257dfd7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 21 Nov 2012 00:21:59 +0100 Subject: ACPI / platform: Initialize ACPI handles of platform devices in advance The current platform device creation and registration code in acpi_create_platform_device() is quite convoluted. This function takes an ACPI device node as an argument and eventually calls platform_device_register_resndata() to create and register a platform device object on the basis of the information contained in that code. However, it doesn't associate the new platform device with the ACPI node directly, but instead it relies on acpi_platform_notify(), called from within device_add(), to find that ACPI node again with the help of acpi_platform_find_device() and acpi_platform_match() and then attach the new platform device to it. This causes an additional ACPI namespace walk to happen and is clearly suboptimal. Use the observation that it is now possible to initialize the ACPI handle of a device before calling device_add() for it to make this code more straightforward. Namely, add a new field to struct platform_device_info allowing us to pass the ACPI handle of interest to platform_device_register_full(), which will then use it to initialize the new device's ACPI handle before registering it. This will cause acpi_platform_notify() to use the ACPI handle from the device structure directly instead of using the .find_device() routine provided by the device's bus type. In consequence, acpi_platform_bus, acpi_platform_find_device(), and acpi_platform_match() are not necessary any more, so remove them. Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Acked-by: Greg Kroah-Hartman --- drivers/acpi/acpi_platform.c | 76 ++++++----------------------------------- drivers/base/platform.c | 2 ++ include/linux/platform_device.h | 1 + 3 files changed, 13 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c index 7ac20d8b8f07..b7df9b197bcf 100644 --- a/drivers/acpi/acpi_platform.c +++ b/drivers/acpi/acpi_platform.c @@ -33,7 +33,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) { struct platform_device *pdev = NULL; struct acpi_device *acpi_parent; - struct device *parent = NULL; + struct platform_device_info pdevinfo; struct resource_list_entry *rentry; struct list_head resource_list; struct resource *resources; @@ -60,11 +60,13 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) acpi_dev_free_resource_list(&resource_list); + memset(&pdevinfo, 0, sizeof(pdevinfo)); /* * If the ACPI node has a parent and that parent has a physical device * attached to it, that physical device should be the parent of the * platform device we are about to create. */ + pdevinfo.parent = NULL; acpi_parent = adev->parent; if (acpi_parent) { struct acpi_device_physical_node *entry; @@ -76,12 +78,16 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) entry = list_first_entry(list, struct acpi_device_physical_node, node); - parent = entry->dev; + pdevinfo.parent = entry->dev; } mutex_unlock(&acpi_parent->physical_node_lock); } - pdev = platform_device_register_resndata(parent, dev_name(&adev->dev), - -1, resources, count, NULL, 0); + pdevinfo.name = dev_name(&adev->dev); + pdevinfo.id = -1; + pdevinfo.res = resources; + pdevinfo.num_res = count; + pdevinfo.acpi_node.handle = adev->handle; + pdev = platform_device_register_full(&pdevinfo); if (IS_ERR(pdev)) { dev_err(&adev->dev, "platform device creation failed: %ld\n", PTR_ERR(pdev)); @@ -94,65 +100,3 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev) kfree(resources); return pdev; } - -static acpi_status acpi_platform_match(acpi_handle handle, u32 depth, - void *data, void **return_value) -{ - struct platform_device *pdev = data; - struct acpi_device *adev; - acpi_status status; - - status = acpi_bus_get_device(handle, &adev); - if (ACPI_FAILURE(status)) - return status; - - /* Skip ACPI devices that have physical device attached */ - if (adev->physical_node_count) - return AE_OK; - - if (!strcmp(dev_name(&pdev->dev), dev_name(&adev->dev))) { - *(acpi_handle *)return_value = handle; - return AE_CTRL_TERMINATE; - } - - return AE_OK; -} - -static int acpi_platform_find_device(struct device *dev, acpi_handle *handle) -{ - struct platform_device *pdev = to_platform_device(dev); - char *name, *tmp, *hid; - - /* - * The platform device is named using the ACPI device name - * _HID:INSTANCE so we strip the INSTANCE out in order to find the - * correct device using its _HID. - */ - name = kstrdup(dev_name(dev), GFP_KERNEL); - if (!name) - return -ENOMEM; - - tmp = name; - hid = strsep(&tmp, ":"); - if (!hid) { - kfree(name); - return -ENODEV; - } - - *handle = NULL; - acpi_get_devices(hid, acpi_platform_match, pdev, handle); - - kfree(name); - return *handle ? 0 : -ENODEV; -} - -static struct acpi_bus_type acpi_platform_bus = { - .bus = &platform_bus_type, - .find_device = acpi_platform_find_device, -}; - -static int __init acpi_platform_init(void) -{ - return register_acpi_bus_type(&acpi_platform_bus); -} -arch_initcall(acpi_platform_init); diff --git a/drivers/base/platform.c b/drivers/base/platform.c index 7de29ebfce7f..49fd96e23460 100644 --- a/drivers/base/platform.c +++ b/drivers/base/platform.c @@ -437,6 +437,7 @@ struct platform_device *platform_device_register_full( goto err_alloc; pdev->dev.parent = pdevinfo->parent; + ACPI_HANDLE_SET(&pdev->dev, pdevinfo->acpi_node.handle); if (pdevinfo->dma_mask) { /* @@ -467,6 +468,7 @@ struct platform_device *platform_device_register_full( ret = platform_device_add(pdev); if (ret) { err: + ACPI_HANDLE_SET(&pdev->dev, NULL); kfree(pdev->dev.dma_mask); err_alloc: diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index 5711e9525a2a..a9ded9a3c175 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -55,6 +55,7 @@ extern int platform_add_devices(struct platform_device **, int); struct platform_device_info { struct device *parent; + struct acpi_dev_node acpi_node; const char *name; int id; -- cgit v1.2.3 From fbfddae696572e57a441252abbd65f7220e06030 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 21 Nov 2012 01:36:28 +0000 Subject: ACPI: Add acpi_handle_() interfaces This patch introduces acpi_handle_(), where is a kernel message level such as err/warn/info, to support improved logging messages for ACPI, esp. hot-plug operations. acpi_handle_() appends "ACPI" prefix and ACPI object path to the messages. This improves diagnosis of hotplug operations since an error message in a log file identifies an object that caused an issue. This interface acquires the global namespace mutex to obtain an object path. In interrupt context, it shows the object path as . acpi_handle_() takes acpi_handle as an argument, which is passed to ACPI hotplug notify handlers from the ACPICA. Therefore, it is always available unlike other kernel objects, such as device. For example: acpi_handle_err(handle, "Device don't exist, dropping EJECT\n"); logs an error message like this at KERN_ERR. ACPI: \_SB_.SCK4.CPU4: Device don't exist, dropping EJECT ACPI hot-plug drivers can use acpi_handle_() when they need to identify a target ACPI object path in their messages, such as error cases. The usage model is similar to dev_(). acpi_handle_() can be used when a device is not created or is invalid during hot-plug operations. ACPI object path is also consistent on the platform, unlike device name that gets incremented over hotplug operations. ACPI drivers should use dev_() when a device object is valid. Device name provides more user friendly information, and avoids acquiring the global ACPI namespace mutex. ACPI drivers also continue to use pr_() when they do not need to specify device information, such as boot-up messages. Note: ACPI_[WARNING|INFO|ERROR]() are intended for the ACPICA and are not associated with the kernel message level. Signed-off-by: Toshi Kani Tested-by: Vijay Mohan Pandarathil Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) (limited to 'include/linux') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 462f7e300363..744371304313 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -28,6 +28,8 @@ #include #include #include +#include +#include #include #include @@ -457,3 +459,39 @@ acpi_evaluate_hotplug_ost(acpi_handle handle, u32 source_event, #endif } EXPORT_SYMBOL(acpi_evaluate_hotplug_ost); + +/** + * acpi_handle_printk: Print message with ACPI prefix and object path + * + * This function is called through acpi_handle_ macros and prints + * a message with ACPI prefix and object path. This function acquires + * the global namespace mutex to obtain an object path. In interrupt + * context, it shows the object path as . + */ +void +acpi_handle_printk(const char *level, acpi_handle handle, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + struct acpi_buffer buffer = { + .length = ACPI_ALLOCATE_BUFFER, + .pointer = NULL + }; + const char *path; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + if (in_interrupt() || + acpi_get_name(handle, ACPI_FULL_PATHNAME, &buffer) != AE_OK) + path = ""; + else + path = buffer.pointer; + + printk("%sACPI: %s: %pV", level, path, &vaf); + + va_end(args); + kfree(buffer.pointer); +} +EXPORT_SYMBOL(acpi_handle_printk); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 87edfcc0ab62..9201ac1f0511 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -434,4 +434,47 @@ acpi_status acpi_os_prepare_sleep(u8 sleep_state, #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif +#ifdef CONFIG_ACPI +__printf(3, 4) +void acpi_handle_printk(const char *level, acpi_handle handle, + const char *fmt, ...); +#else /* !CONFIG_ACPI */ +static inline __printf(3, 4) void +acpi_handle_printk(const char *level, void *handle, const char *fmt, ...) {} +#endif /* !CONFIG_ACPI */ + +/* + * acpi_handle_: Print message with ACPI prefix and object path + * + * These interfaces acquire the global namespace mutex to obtain an object + * path. In interrupt context, it shows the object path as . + */ +#define acpi_handle_emerg(handle, fmt, ...) \ + acpi_handle_printk(KERN_EMERG, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_alert(handle, fmt, ...) \ + acpi_handle_printk(KERN_ALERT, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_crit(handle, fmt, ...) \ + acpi_handle_printk(KERN_CRIT, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_err(handle, fmt, ...) \ + acpi_handle_printk(KERN_ERR, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_warn(handle, fmt, ...) \ + acpi_handle_printk(KERN_WARNING, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_notice(handle, fmt, ...) \ + acpi_handle_printk(KERN_NOTICE, handle, fmt, ##__VA_ARGS__) +#define acpi_handle_info(handle, fmt, ...) \ + acpi_handle_printk(KERN_INFO, handle, fmt, ##__VA_ARGS__) + +/* REVISIT: Support CONFIG_DYNAMIC_DEBUG when necessary */ +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) +#define acpi_handle_debug(handle, fmt, ...) \ + acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__) +#else +#define acpi_handle_debug(handle, fmt, ...) \ +({ \ + if (0) \ + acpi_handle_printk(KERN_DEBUG, handle, fmt, ##__VA_ARGS__); \ + 0; \ +}) +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 907ddf89d0bb7f57e1e21485900e6564a1ab512a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Fri, 23 Nov 2012 12:23:40 +0100 Subject: i2c / ACPI: add ACPI enumeration support ACPI 5 introduced I2cSerialBus resource that makes it possible to enumerate and configure the I2C slave devices behind the I2C controller. This patch adds helper functions to support I2C slave enumeration. An ACPI enabled I2C controller driver only needs to call acpi_i2c_register_devices() in order to get its slave devices enumerated, created and bound to the corresponding ACPI handle. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/Kconfig | 6 +++ drivers/acpi/Makefile | 1 + drivers/acpi/acpi_i2c.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/i2c/i2c-core.c | 6 +++ include/linux/i2c.h | 9 +++++ 5 files changed, 125 insertions(+) create mode 100644 drivers/acpi/acpi_i2c.c (limited to 'include/linux') diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 119d58db8342..0300bf612946 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -181,6 +181,12 @@ config ACPI_DOCK This driver supports ACPI-controlled docking stations and removable drive bays such as the IBM Ultrabay and the Dell Module Bay. +config ACPI_I2C + def_tristate I2C + depends on I2C + help + ACPI I2C enumeration support. + config ACPI_PROCESSOR tristate "Processor" select THERMAL diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 3223edfb23b6..7198b6d6b763 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -69,6 +69,7 @@ obj-$(CONFIG_ACPI_HED) += hed.o obj-$(CONFIG_ACPI_EC_DEBUGFS) += ec_sys.o obj-$(CONFIG_ACPI_CUSTOM_METHOD)+= custom_method.o obj-$(CONFIG_ACPI_BGRT) += bgrt.o +obj-$(CONFIG_ACPI_I2C) += acpi_i2c.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o processor_throttling.o diff --git a/drivers/acpi/acpi_i2c.c b/drivers/acpi/acpi_i2c.c new file mode 100644 index 000000000000..82045e3f5cac --- /dev/null +++ b/drivers/acpi/acpi_i2c.c @@ -0,0 +1,103 @@ +/* + * ACPI I2C enumeration support + * + * Copyright (C) 2012, Intel Corporation + * Author: Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +ACPI_MODULE_NAME("i2c"); + +static int acpi_i2c_add_resource(struct acpi_resource *ares, void *data) +{ + struct i2c_board_info *info = data; + + if (ares->type == ACPI_RESOURCE_TYPE_SERIAL_BUS) { + struct acpi_resource_i2c_serialbus *sb; + + sb = &ares->data.i2c_serial_bus; + if (sb->type == ACPI_RESOURCE_SERIAL_TYPE_I2C) { + info->addr = sb->slave_address; + if (sb->access_mode == ACPI_I2C_10BIT_MODE) + info->flags |= I2C_CLIENT_TEN; + } + } else if (info->irq < 0) { + struct resource r; + + if (acpi_dev_resource_interrupt(ares, 0, &r)) + info->irq = r.start; + } + + /* Tell the ACPI core to skip this resource */ + return 1; +} + +static acpi_status acpi_i2c_add_device(acpi_handle handle, u32 level, + void *data, void **return_value) +{ + struct i2c_adapter *adapter = data; + struct list_head resource_list; + struct i2c_board_info info; + struct acpi_device *adev; + int ret; + + if (acpi_bus_get_device(handle, &adev)) + return AE_OK; + if (acpi_bus_get_status(adev) || !adev->status.present) + return AE_OK; + + memset(&info, 0, sizeof(info)); + info.acpi_node.handle = handle; + info.irq = -1; + + INIT_LIST_HEAD(&resource_list); + ret = acpi_dev_get_resources(adev, &resource_list, + acpi_i2c_add_resource, &info); + acpi_dev_free_resource_list(&resource_list); + + if (ret < 0 || !info.addr) + return AE_OK; + + strlcpy(info.type, dev_name(&adev->dev), sizeof(info.type)); + if (!i2c_new_device(adapter, &info)) { + dev_err(&adapter->dev, + "failed to add I2C device %s from ACPI\n", + dev_name(&adev->dev)); + } + + return AE_OK; +} + +/** + * acpi_i2c_register_devices - enumerate I2C slave devices behind adapter + * @adapter: pointer to adapter + * + * Enumerate all I2C slave devices behind this adapter by walking the ACPI + * namespace. When a device is found it will be added to the Linux device + * model and bound to the corresponding ACPI handle. + */ +void acpi_i2c_register_devices(struct i2c_adapter *adapter) +{ + acpi_handle handle; + acpi_status status; + + handle = ACPI_HANDLE(&adapter->dev); + if (!handle) + return; + + status = acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1, + acpi_i2c_add_device, NULL, + adapter, NULL); + if (ACPI_FAILURE(status)) + dev_warn(&adapter->dev, "failed to enumerate I2C slaves\n"); +} +EXPORT_SYMBOL_GPL(acpi_i2c_register_devices); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index a7edf987a339..e388590b44ab 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "i2c-core.h" @@ -78,6 +79,10 @@ static int i2c_device_match(struct device *dev, struct device_driver *drv) if (of_driver_match_device(dev, drv)) return 1; + /* Then ACPI style match */ + if (acpi_driver_match_device(dev, drv)) + return 1; + driver = to_i2c_driver(drv); /* match on an id table if there is one */ if (driver->id_table) @@ -539,6 +544,7 @@ i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) client->dev.bus = &i2c_bus_type; client->dev.type = &i2c_client_type; client->dev.of_node = info->of_node; + ACPI_HANDLE_SET(&client->dev, info->acpi_node.handle); /* For 10-bit clients, add an arbitrary offset to avoid collisions */ dev_set_name(&client->dev, "%d-%04x", i2c_adapter_id(adap), diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 800de224336b..d0c4db7b4872 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -259,6 +259,7 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) * @platform_data: stored in i2c_client.dev.platform_data * @archdata: copied into i2c_client.dev.archdata * @of_node: pointer to OpenFirmware device node + * @acpi_node: ACPI device node * @irq: stored in i2c_client.irq * * I2C doesn't actually support hardware probing, although controllers and @@ -279,6 +280,7 @@ struct i2c_board_info { void *platform_data; struct dev_archdata *archdata; struct device_node *of_node; + struct acpi_dev_node acpi_node; int irq; }; @@ -501,4 +503,11 @@ static inline int i2c_adapter_id(struct i2c_adapter *adap) i2c_del_driver) #endif /* I2C */ + +#if IS_ENABLED(CONFIG_ACPI_I2C) +extern void acpi_i2c_register_devices(struct i2c_adapter *adap); +#else +static inline void acpi_i2c_register_devices(struct i2c_adapter *adap) {} +#endif + #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From e5f5762177be52fde50ccba88938d66b5103c8e0 Mon Sep 17 00:00:00 2001 From: Li Haifeng Date: Fri, 23 Nov 2012 21:55:19 +0100 Subject: PM / Freezer: Fixup compile error of try_to_freeze_nowarn() If FREEZER is not defined, the error as following will be throw when compiled. arch/arm/kernel/signal.c:645: error: implicit declaration of function 'try_to_freeze_nowarn' Signed-off-by: Haifeng Li Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index d09af4b67cf1..b90091af5798 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -177,6 +177,7 @@ static inline int freeze_kernel_threads(void) { return -ENOSYS; } static inline void thaw_processes(void) {} static inline void thaw_kernel_threads(void) {} +static inline bool try_to_freeze_nowarn(void) { return false; } static inline bool try_to_freeze(void) { return false; } static inline void freezer_do_not_count(void) {} -- cgit v1.2.3 From b88ce2a41562d1a9554f209e0f31a32d9f473794 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 26 Nov 2012 10:03:06 +0100 Subject: ACPI / PM: Allow attach/detach routines to change device power states Make it possible to ask the routines used for adding/removing devices to/from the general ACPI PM domain, acpi_dev_pm_attach() and acpi_dev_pm_detach(), respectively, to change the power states of devices so that they are put into the full-power state automatically by acpi_dev_pm_attach() and into the lowest-power state available automatically by acpi_dev_pm_detach(). Signed-off-by: Rafael J. Wysocki Reviewed-by: Mika Westerberg Tested-by: Mika Westerberg --- drivers/acpi/device_pm.c | 28 ++++++++++++++++++++++++---- include/linux/acpi.h | 11 +++++++---- 2 files changed, 31 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c index a8e059f69d50..f09dc987cf17 100644 --- a/drivers/acpi/device_pm.c +++ b/drivers/acpi/device_pm.c @@ -599,10 +599,12 @@ static struct dev_pm_domain acpi_general_pm_domain = { /** * acpi_dev_pm_attach - Prepare device for ACPI power management. * @dev: Device to prepare. + * @power_on: Whether or not to power on the device. * * If @dev has a valid ACPI handle that has a valid struct acpi_device object * attached to it, install a wakeup notification handler for the device and - * add it to the general ACPI PM domain. + * add it to the general ACPI PM domain. If @power_on is set, the device will + * be put into the ACPI D0 state before the function returns. * * This assumes that the @dev's bus type uses generic power management callbacks * (or doesn't use any power management callbacks at all). @@ -610,7 +612,7 @@ static struct dev_pm_domain acpi_general_pm_domain = { * Callers must ensure proper synchronization of this function with power * management callbacks. */ -int acpi_dev_pm_attach(struct device *dev) +int acpi_dev_pm_attach(struct device *dev, bool power_on) { struct acpi_device *adev = acpi_dev_pm_get_node(dev); @@ -622,6 +624,10 @@ int acpi_dev_pm_attach(struct device *dev) acpi_add_pm_notifier(adev, acpi_wakeup_device, dev); dev->pm_domain = &acpi_general_pm_domain; + if (power_on) { + acpi_dev_pm_full_power(adev); + __acpi_device_run_wake(adev, false); + } return 0; } EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); @@ -629,20 +635,34 @@ EXPORT_SYMBOL_GPL(acpi_dev_pm_attach); /** * acpi_dev_pm_detach - Remove ACPI power management from the device. * @dev: Device to take care of. + * @power_off: Whether or not to try to remove power from the device. * * Remove the device from the general ACPI PM domain and remove its wakeup - * notifier. + * notifier. If @power_off is set, additionally remove power from the device if + * possible. * * Callers must ensure proper synchronization of this function with power * management callbacks. */ -void acpi_dev_pm_detach(struct device *dev) +void acpi_dev_pm_detach(struct device *dev, bool power_off) { struct acpi_device *adev = acpi_dev_pm_get_node(dev); if (adev && dev->pm_domain == &acpi_general_pm_domain) { dev->pm_domain = NULL; acpi_remove_pm_notifier(adev, acpi_wakeup_device); + if (power_off) { + /* + * If the device's PM QoS resume latency limit or flags + * have been exposed to user space, they have to be + * hidden at this point, so that they don't affect the + * choice of the low-power state to put the device into. + */ + dev_pm_qos_hide_latency_limit(dev); + dev_pm_qos_hide_flags(dev); + __acpi_device_run_wake(adev, false); + acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0); + } } } EXPORT_SYMBOL_GPL(acpi_dev_pm_detach); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5fdd87271518..28ba643c92c1 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -458,11 +458,14 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; } #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM) -int acpi_dev_pm_attach(struct device *dev); -int acpi_dev_pm_detach(struct device *dev); +int acpi_dev_pm_attach(struct device *dev, bool power_on); +int acpi_dev_pm_detach(struct device *dev, bool power_off); #else -static inline int acpi_dev_pm_attach(struct device *dev) { return -ENODEV; } -static inline void acpi_dev_pm_detach(struct device *dev) {} +static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) +{ + return -ENODEV; +} +static inline void acpi_dev_pm_detach(struct device *dev, bool power_off) {} #endif #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 883d588e556347c4b3221ac492a8acd8a75e730a Mon Sep 17 00:00:00 2001 From: MyungJoo Ham Date: Wed, 21 Nov 2012 17:17:11 +0900 Subject: PM / devfreq: remove compiler error when a governor is module With the intruction of patch, eff607fdb1f787da1fedf46ab6e64adc2afd1c5a, it became possible to include a governor as a module. Thus the #ifdef statement for a governor should become #if IS_ENABLED. Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 235248cb2c93..e83ef39b3bea 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -192,7 +192,7 @@ extern int devfreq_register_opp_notifier(struct device *dev, extern int devfreq_unregister_opp_notifier(struct device *dev, struct devfreq *devfreq); -#ifdef CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND +#if IS_ENABLED(CONFIG_DEVFREQ_GOV_SIMPLE_ONDEMAND) /** * struct devfreq_simple_ondemand_data - void *data fed to struct devfreq * and devfreq_add_device -- cgit v1.2.3 From e29482e8487954c87dc7b4fdbc53574bf1d4cce2 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 30 Nov 2012 12:37:36 +0100 Subject: gpio / ACPI: add ACPI support Add support for translating ACPI GPIO pin numbers to Linux GPIO API pins. Needs a gpio controller driver with the acpi handler hook set. Drivers can use acpi_get_gpio() to translate ACPI5 GpioIO and GpioInt resources to Linux GPIO's. Signed-off-by: Mathias Nyman Signed-off-by: Mika Westerberg Acked-by: Grant Likely Signed-off-by: Rafael J. Wysocki --- drivers/gpio/Kconfig | 4 ++++ drivers/gpio/Makefile | 1 + drivers/gpio/gpiolib-acpi.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi_gpio.h | 19 ++++++++++++++++ 4 files changed, 78 insertions(+) create mode 100644 drivers/gpio/gpiolib-acpi.c create mode 100644 include/linux/acpi_gpio.h (limited to 'include/linux') diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index f11d8e3b4041..5c9b384119b8 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -49,6 +49,10 @@ config OF_GPIO def_bool y depends on OF +config GPIO_ACPI + def_bool y + depends on ACPI + config DEBUG_GPIO bool "Debug GPIO calls" depends on DEBUG_KERNEL diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 9aeed6707326..420dbaca05f1 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -4,6 +4,7 @@ ccflags-$(CONFIG_DEBUG_GPIO) += -DDEBUG obj-$(CONFIG_GPIOLIB) += gpiolib.o devres.o obj-$(CONFIG_OF_GPIO) += gpiolib-of.o +obj-$(CONFIG_GPIO_ACPI) += gpiolib-acpi.o # Device drivers. Generally keep list sorted alphabetically obj-$(CONFIG_GPIO_GENERIC) += gpio-generic.o diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c new file mode 100644 index 000000000000..cbad6e908d30 --- /dev/null +++ b/drivers/gpio/gpiolib-acpi.c @@ -0,0 +1,54 @@ +/* + * ACPI helpers for GPIO API + * + * Copyright (C) 2012, Intel Corporation + * Authors: Mathias Nyman + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +static int acpi_gpiochip_find(struct gpio_chip *gc, void *data) +{ + if (!gc->dev) + return false; + + return ACPI_HANDLE(gc->dev) == data; +} + +/** + * acpi_get_gpio() - Translate ACPI GPIO pin to GPIO number usable with GPIO API + * @path: ACPI GPIO controller full path name, (e.g. "\\_SB.GPO1") + * @pin: ACPI GPIO pin number (0-based, controller-relative) + * + * Returns GPIO number to use with Linux generic GPIO API, or errno error value + */ + +int acpi_get_gpio(char *path, int pin) +{ + struct gpio_chip *chip; + acpi_handle handle; + acpi_status status; + + status = acpi_get_handle(NULL, path, &handle); + if (ACPI_FAILURE(status)) + return -ENODEV; + + chip = gpiochip_find(handle, acpi_gpiochip_find); + if (!chip) + return -ENODEV; + + if (!gpio_is_valid(chip->base + pin)) + return -EINVAL; + + return chip->base + pin; +} +EXPORT_SYMBOL_GPL(acpi_get_gpio); diff --git a/include/linux/acpi_gpio.h b/include/linux/acpi_gpio.h new file mode 100644 index 000000000000..91615a389b65 --- /dev/null +++ b/include/linux/acpi_gpio.h @@ -0,0 +1,19 @@ +#ifndef _LINUX_ACPI_GPIO_H_ +#define _LINUX_ACPI_GPIO_H_ + +#include + +#ifdef CONFIG_GPIO_ACPI + +int acpi_get_gpio(char *path, int pin); + +#else /* CONFIG_GPIO_ACPI */ + +static inline int acpi_get_gpio(char *path, int pin) +{ + return -ENODEV; +} + +#endif /* CONFIG_GPIO_ACPI */ + +#endif /* _LINUX_ACPI_GPIO_H_ */ -- cgit v1.2.3 From d71f2f88825b31553881944959962b1871099e1f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 5 Dec 2012 11:59:22 +0100 Subject: ACPI / PM: Fix header of acpi_dev_pm_detach() in acpi.h The header of acpi_dev_pm_detach() in include/linux/acpi.h has an incorrect return type, which should be void. Fix that. Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 28ba643c92c1..79345cd5ac20 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -459,7 +459,7 @@ static inline int acpi_subsys_resume_early(struct device *dev) { return 0; } #if defined(CONFIG_ACPI) && defined(CONFIG_PM) int acpi_dev_pm_attach(struct device *dev, bool power_on); -int acpi_dev_pm_detach(struct device *dev, bool power_off); +void acpi_dev_pm_detach(struct device *dev, bool power_off); #else static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { -- cgit v1.2.3