From 9a30523066cde73c1442b76224bb540de9f9b0b0 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:25 -0800 Subject: hugetlb: add per node hstate attributes Add the per huge page size control/query attributes to the per node sysdevs: /sys/devices/system/node/node/hugepages/hugepages-/ nr_hugepages - r/w free_huge_pages - r/o surplus_huge_pages - r/o The patch attempts to re-use/share as much of the existing global hstate attribute initialization and handling, and the "nodes_allowed" constraint processing as possible. Calling set_max_huge_pages() with no node indicates a change to global hstate parameters. In this case, any non-default task mempolicy will be used to generate the nodes_allowed mask. A valid node id indicates an update to that node's hstate parameters, and the count argument specifies the target count for the specified node. From this info, we compute the target global count for the hstate and construct a nodes_allowed node mask contain only the specified node. Setting the node specific nr_hugepages via the per node attribute effectively ignores any task mempolicy or cpuset constraints. With this patch: (me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB ./ ../ free_hugepages nr_hugepages surplus_hugepages Starting from: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 0 Node 2 HugePages_Free: 0 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 0 Allocate 16 persistent huge pages on node 2: (me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages [Note that this is equivalent to: numactl -m 2 hugeadmin --pool-pages-min 2M:+16 ] Yields: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 16 Node 2 HugePages_Free: 16 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 16 Global controls work as expected--reduce pool to 8 persistent huge pages: (me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 8 Node 2 HugePages_Free: 8 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 Signed-off-by: Lee Schermerhorn Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/node.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/node.h') diff --git a/include/linux/node.h b/include/linux/node.h index 681a697b9a86..dae1521e1f05 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -28,6 +28,7 @@ struct node { struct memory_block; extern struct node node_devices[]; +typedef void (*node_registration_func_t)(struct node *); extern int register_node(struct node *, int, struct node *); extern void unregister_node(struct node *node); @@ -39,6 +40,11 @@ extern int unregister_cpu_under_node(unsigned int cpu, unsigned int nid); extern int register_mem_sect_under_node(struct memory_block *mem_blk, int nid); extern int unregister_mem_sect_under_nodes(struct memory_block *mem_blk); + +#ifdef CONFIG_HUGETLBFS +extern void register_hugetlbfs_with_node(node_registration_func_t doregister, + node_registration_func_t unregister); +#endif #else static inline int register_one_node(int nid) { @@ -65,6 +71,11 @@ static inline int unregister_mem_sect_under_nodes(struct memory_block *mem_blk) { return 0; } + +static inline void register_hugetlbfs_with_node(node_registration_func_t reg, + node_registration_func_t unreg) +{ +} #endif #define to_node(sys_device) container_of(sys_device, struct node, sysdev) -- cgit v1.2.3 From 39da08cb074cf19cb249832a2a955dfb28837e65 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Mon, 14 Dec 2009 17:58:36 -0800 Subject: hugetlb: offload per node attribute registrations Offload the registration and unregistration of per node hstate sysfs attributes to a worker thread rather than attempt the allocation/attachment or detachment/freeing of the attributes in the context of the memory hotplug handler. I don't know that this is absolutely required, but the registration can sleep in allocations and other mem hot plug handlers do it this way. If it turns out this is NOT required, we can drop this patch. N.B., Only tested build, boot, libhugetlbfs regression. i.e., no memory hotplug testing. Signed-off-by: Lee Schermerhorn Reviewed-by: Andi Kleen Cc: KAMEZAWA Hiroyuki Cc: Lee Schermerhorn Cc: Mel Gorman Cc: Randy Dunlap Cc: Nishanth Aravamudan Cc: David Rientjes Cc: Adam Litke Cc: Andy Whitcroft Cc: Eric Whitney Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 57 +++++++++++++++++++++++++++++++++++++++++++--------- include/linux/node.h | 5 +++++ 2 files changed, 52 insertions(+), 10 deletions(-) (limited to 'include/linux/node.h') diff --git a/drivers/base/node.c b/drivers/base/node.c index 9e218a6d4a5b..54e5d8eaf70e 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -186,11 +186,14 @@ static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL); static node_registration_func_t __hugetlb_register_node; static node_registration_func_t __hugetlb_unregister_node; -static inline void hugetlb_register_node(struct node *node) +static inline bool hugetlb_register_node(struct node *node) { if (__hugetlb_register_node && - node_state(node->sysdev.id, N_HIGH_MEMORY)) + node_state(node->sysdev.id, N_HIGH_MEMORY)) { __hugetlb_register_node(node); + return true; + } + return false; } static inline void hugetlb_unregister_node(struct node *node) @@ -387,10 +390,31 @@ static int link_mem_sections(int nid) return err; } +#ifdef CONFIG_HUGETLBFS /* * Handle per node hstate attribute [un]registration on transistions * to/from memoryless state. */ +static void node_hugetlb_work(struct work_struct *work) +{ + struct node *node = container_of(work, struct node, node_work); + + /* + * We only get here when a node transitions to/from memoryless state. + * We can detect which transition occurred by examining whether the + * node has memory now. hugetlb_register_node() already check this + * so we try to register the attributes. If that fails, then the + * node has transitioned to memoryless, try to unregister the + * attributes. + */ + if (!hugetlb_register_node(node)) + hugetlb_unregister_node(node); +} + +static void init_node_hugetlb_work(int nid) +{ + INIT_WORK(&node_devices[nid].node_work, node_hugetlb_work); +} static int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) @@ -399,14 +423,16 @@ static int node_memory_callback(struct notifier_block *self, int nid = mnb->status_change_nid; switch (action) { - case MEM_ONLINE: /* memory successfully brought online */ + case MEM_ONLINE: + case MEM_OFFLINE: + /* + * offload per node hstate [un]registration to a work thread + * when transitioning to/from memoryless state. + */ if (nid != NUMA_NO_NODE) - hugetlb_register_node(&node_devices[nid]); - break; - case MEM_OFFLINE: /* or offline */ - if (nid != NUMA_NO_NODE) - hugetlb_unregister_node(&node_devices[nid]); + schedule_work(&node_devices[nid].node_work); break; + case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: case MEM_CANCEL_ONLINE: @@ -417,15 +443,23 @@ static int node_memory_callback(struct notifier_block *self, return NOTIFY_OK; } -#else +#endif /* CONFIG_HUGETLBFS */ +#else /* !CONFIG_MEMORY_HOTPLUG_SPARSE */ + static int link_mem_sections(int nid) { return 0; } +#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ +#if !defined(CONFIG_MEMORY_HOTPLUG_SPARSE) || \ + !defined(CONFIG_HUGETLBFS) static inline int node_memory_callback(struct notifier_block *self, unsigned long action, void *arg) { return NOTIFY_OK; } -#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ + +static void init_node_hugetlb_work(int nid) { } + +#endif int register_one_node(int nid) { @@ -449,6 +483,9 @@ int register_one_node(int nid) /* link memory sections under this node */ error = link_mem_sections(nid); + + /* initialize work queue for memory hot plug */ + init_node_hugetlb_work(nid); } return error; diff --git a/include/linux/node.h b/include/linux/node.h index dae1521e1f05..06292dac3eab 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -21,9 +21,14 @@ #include #include +#include struct node { struct sys_device sysdev; + +#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HUGETLBFS) + struct work_struct node_work; +#endif }; struct memory_block; -- cgit v1.2.3 From b17cd8d69a75f921d9d444cc3ac9b5b1d0b66ca0 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Nov 2010 01:28:24 -0500 Subject: driver core: prune docs about device_interface drivers/base/intf.c was removed before the beginning of (git) time but its Documentation stuck around. Remove it. Signed-off-by: Brandon Philips Signed-off-by: Greg Kroah-Hartman --- Documentation/driver-model/interface.txt | 129 ------------------------------- include/linux/cpu.h | 5 -- include/linux/node.h | 5 -- 3 files changed, 139 deletions(-) delete mode 100644 Documentation/driver-model/interface.txt (limited to 'include/linux/node.h') diff --git a/Documentation/driver-model/interface.txt b/Documentation/driver-model/interface.txt deleted file mode 100644 index c66912bfe866..000000000000 --- a/Documentation/driver-model/interface.txt +++ /dev/null @@ -1,129 +0,0 @@ - -Device Interfaces - -Introduction -~~~~~~~~~~~~ - -Device interfaces are the logical interfaces of device classes that correlate -directly to userspace interfaces, like device nodes. - -Each device class may have multiple interfaces through which you can -access the same device. An input device may support the mouse interface, -the 'evdev' interface, and the touchscreen interface. A SCSI disk would -support the disk interface, the SCSI generic interface, and possibly a raw -device interface. - -Device interfaces are registered with the class they belong to. As devices -are added to the class, they are added to each interface registered with -the class. The interface is responsible for determining whether the device -supports the interface or not. - - -Programming Interface -~~~~~~~~~~~~~~~~~~~~~ - -struct device_interface { - char * name; - rwlock_t lock; - u32 devnum; - struct device_class * devclass; - - struct list_head node; - struct driver_dir_entry dir; - - int (*add_device)(struct device *); - int (*add_device)(struct intf_data *); -}; - -int interface_register(struct device_interface *); -void interface_unregister(struct device_interface *); - - -An interface must specify the device class it belongs to. It is added -to that class's list of interfaces on registration. - - -Interfaces can be added to a device class at any time. Whenever it is -added, each device in the class is passed to the interface's -add_device callback. When an interface is removed, each device is -removed from the interface. - - -Devices -~~~~~~~ -Once a device is added to a device class, it is added to each -interface that is registered with the device class. The class -is expected to place a class-specific data structure in -struct device::class_data. The interface can use that (along with -other fields of struct device) to determine whether or not the driver -and/or device support that particular interface. - - -Data -~~~~ - -struct intf_data { - struct list_head node; - struct device_interface * intf; - struct device * dev; - u32 intf_num; -}; - -int interface_add_data(struct interface_data *); - -The interface is responsible for allocating and initializing a struct -intf_data and calling interface_add_data() to add it to the device's list -of interfaces it belongs to. This list will be iterated over when the device -is removed from the class (instead of all possible interfaces for a class). -This structure should probably be embedded in whatever per-device data -structure the interface is allocating anyway. - -Devices are enumerated within the interface. This happens in interface_add_data() -and the enumerated value is stored in the struct intf_data for that device. - -sysfs -~~~~~ -Each interface is given a directory in the directory of the device -class it belongs to: - -Interfaces get a directory in the class's directory as well: - - class/ - `-- input - |-- devices - |-- drivers - |-- mouse - `-- evdev - -When a device is added to the interface, a symlink is created that points -to the device's directory in the physical hierarchy: - - class/ - `-- input - |-- devices - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - |-- drivers - | `-- usb:usb_mouse -> ../../../bus/drivers/usb_mouse/ - |-- mouse - | `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - `-- evdev - `-- 1 -> ../../../root/pci0/00:1f.0/usb_bus/00:1f.2-1:0/ - - -Future Plans -~~~~~~~~~~~~ -A device interface is correlated directly with a userspace interface -for a device, specifically a device node. For instance, a SCSI disk -exposes at least two interfaces to userspace: the standard SCSI disk -interface and the SCSI generic interface. It might also export a raw -device interface. - -Many interfaces have a major number associated with them and each -device gets a minor number. Or, multiple interfaces might share one -major number, and each will receive a range of minor numbers (like in -the case of input devices). - -These major and minor numbers could be stored in the interface -structure. Major and minor allocations could happen when the interface -is registered with the class, or via a helper function. - diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 4823af64e9db..5f09323ee880 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -10,11 +10,6 @@ * * CPUs are exported via sysfs in the class/cpu/devices/ * directory. - * - * Per-cpu interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_CPU_H_ #define _LINUX_CPU_H_ diff --git a/include/linux/node.h b/include/linux/node.h index 06292dac3eab..1466945cc9ef 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -10,11 +10,6 @@ * * Nodes are exported via driverfs in the class/node/devices/ * directory. - * - * Per-node interfaces can be implemented using a struct device_interface. - * See the following for how to do this: - * - drivers/base/intf.c - * - Documentation/driver-model/interface.txt */ #ifndef _LINUX_NODE_H_ #define _LINUX_NODE_H_ -- cgit v1.2.3