From fc5a40a2301aa241eedb16caf9169ca5763707c1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Oct 2012 09:49:02 +0100 Subject: UAPI: (Scripted) Disintegrate include/linux/raid Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones --- include/linux/raid/Kbuild | 2 - include/linux/raid/md_p.h | 301 ----------------------------------------- include/linux/raid/md_u.h | 141 +------------------ include/uapi/linux/raid/Kbuild | 2 + include/uapi/linux/raid/md_p.h | 301 +++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/raid/md_u.h | 155 +++++++++++++++++++++ 6 files changed, 459 insertions(+), 443 deletions(-) delete mode 100644 include/linux/raid/md_p.h create mode 100644 include/uapi/linux/raid/md_p.h create mode 100644 include/uapi/linux/raid/md_u.h (limited to 'include') diff --git a/include/linux/raid/Kbuild b/include/linux/raid/Kbuild index 2415a64c5e51..e69de29bb2d1 100644 --- a/include/linux/raid/Kbuild +++ b/include/linux/raid/Kbuild @@ -1,2 +0,0 @@ -header-y += md_p.h -header-y += md_u.h diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h deleted file mode 100644 index ee753536ab70..000000000000 --- a/include/linux/raid/md_p.h +++ /dev/null @@ -1,301 +0,0 @@ -/* - md_p.h : physical layout of Linux RAID devices - Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - You should have received a copy of the GNU General Public License - (for example /usr/src/linux/COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _MD_P_H -#define _MD_P_H - -#include - -/* - * RAID superblock. - * - * The RAID superblock maintains some statistics on each RAID configuration. - * Each real device in the RAID set contains it near the end of the device. - * Some of the ideas are copied from the ext2fs implementation. - * - * We currently use 4096 bytes as follows: - * - * word offset function - * - * 0 - 31 Constant generic RAID device information. - * 32 - 63 Generic state information. - * 64 - 127 Personality specific information. - * 128 - 511 12 32-words descriptors of the disks in the raid set. - * 512 - 911 Reserved. - * 912 - 1023 Disk specific descriptor. - */ - -/* - * If x is the real device size in bytes, we return an apparent size of: - * - * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES - * - * and place the 4kB superblock at offset y. - */ -#define MD_RESERVED_BYTES (64 * 1024) -#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) - -#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) - -#define MD_SB_BYTES 4096 -#define MD_SB_WORDS (MD_SB_BYTES / 4) -#define MD_SB_SECTORS (MD_SB_BYTES / 512) - -/* - * The following are counted in 32-bit words - */ -#define MD_SB_GENERIC_OFFSET 0 -#define MD_SB_PERSONALITY_OFFSET 64 -#define MD_SB_DISKS_OFFSET 128 -#define MD_SB_DESCRIPTOR_OFFSET 992 - -#define MD_SB_GENERIC_CONSTANT_WORDS 32 -#define MD_SB_GENERIC_STATE_WORDS 32 -#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) -#define MD_SB_PERSONALITY_WORDS 64 -#define MD_SB_DESCRIPTOR_WORDS 32 -#define MD_SB_DISKS 27 -#define MD_SB_DISKS_WORDS (MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS) -#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) -#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) - -/* - * Device "operational" state bits - */ -#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ -#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ -#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ -#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ - -#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. - * read requests will only be sent here in - * dire need - */ - -typedef struct mdp_device_descriptor_s { - __u32 number; /* 0 Device number in the entire set */ - __u32 major; /* 1 Device major number */ - __u32 minor; /* 2 Device minor number */ - __u32 raid_disk; /* 3 The role of the device in the raid set */ - __u32 state; /* 4 Operational state */ - __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; -} mdp_disk_t; - -#define MD_SB_MAGIC 0xa92b4efc - -/* - * Superblock state bits - */ -#define MD_SB_CLEAN 0 -#define MD_SB_ERRORS 1 - -#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ - -/* - * Notes: - * - if an array is being reshaped (restriped) in order to change the - * the number of active devices in the array, 'raid_disks' will be - * the larger of the old and new numbers. 'delta_disks' will - * be the "new - old". So if +ve, raid_disks is the new value, and - * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the - * old value and "raid_disks+delta_disks" is the new (smaller) value. - */ - - -typedef struct mdp_superblock_s { - /* - * Constant generic information - */ - __u32 md_magic; /* 0 MD identifier */ - __u32 major_version; /* 1 major version to which the set conforms */ - __u32 minor_version; /* 2 minor version ... */ - __u32 patch_version; /* 3 patchlevel version ... */ - __u32 gvalid_words; /* 4 Number of used words in this section */ - __u32 set_uuid0; /* 5 Raid set identifier */ - __u32 ctime; /* 6 Creation time */ - __u32 level; /* 7 Raid personality */ - __u32 size; /* 8 Apparent size of each individual disk */ - __u32 nr_disks; /* 9 total disks in the raid set */ - __u32 raid_disks; /* 10 disks in a fully functional raid set */ - __u32 md_minor; /* 11 preferred MD minor device number */ - __u32 not_persistent; /* 12 does it have a persistent superblock */ - __u32 set_uuid1; /* 13 Raid set identifier #2 */ - __u32 set_uuid2; /* 14 Raid set identifier #3 */ - __u32 set_uuid3; /* 15 Raid set identifier #4 */ - __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; - - /* - * Generic state information - */ - __u32 utime; /* 0 Superblock update time */ - __u32 state; /* 1 State bits (clean, ...) */ - __u32 active_disks; /* 2 Number of currently active disks */ - __u32 working_disks; /* 3 Number of working disks */ - __u32 failed_disks; /* 4 Number of failed disks */ - __u32 spare_disks; /* 5 Number of spare disks */ - __u32 sb_csum; /* 6 checksum of the whole superblock */ -#ifdef __BIG_ENDIAN - __u32 events_hi; /* 7 high-order of superblock update count */ - __u32 events_lo; /* 8 low-order of superblock update count */ - __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ - __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ -#else - __u32 events_lo; /* 7 low-order of superblock update count */ - __u32 events_hi; /* 8 high-order of superblock update count */ - __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ - __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ -#endif - __u32 recovery_cp; /* 11 recovery checkpoint sector count */ - /* There are only valid for minor_version > 90 */ - __u64 reshape_position; /* 12,13 next address in array-space for reshape */ - __u32 new_level; /* 14 new level we are reshaping to */ - __u32 delta_disks; /* 15 change in number of raid_disks */ - __u32 new_layout; /* 16 new layout */ - __u32 new_chunk; /* 17 new chunk size (bytes) */ - __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; - - /* - * Personality information - */ - __u32 layout; /* 0 the array's physical layout */ - __u32 chunk_size; /* 1 chunk size in bytes */ - __u32 root_pv; /* 2 LV root PV */ - __u32 root_block; /* 3 LV root block */ - __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; - - /* - * Disks information - */ - mdp_disk_t disks[MD_SB_DISKS]; - - /* - * Reserved - */ - __u32 reserved[MD_SB_RESERVED_WORDS]; - - /* - * Active descriptor - */ - mdp_disk_t this_disk; - -} mdp_super_t; - -static inline __u64 md_event(mdp_super_t *sb) { - __u64 ev = sb->events_hi; - return (ev<<32)| sb->events_lo; -} - -#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) - -/* - * The version-1 superblock : - * All numeric fields are little-endian. - * - * total size: 256 bytes plus 2 per device. - * 1K allows 384 devices. - */ -struct mdp_superblock_1 { - /* constant array information - 128 bytes */ - __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ - __le32 major_version; /* 1 */ - __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ - __le32 pad0; /* always set to 0 when writing */ - - __u8 set_uuid[16]; /* user-space generated. */ - char set_name[32]; /* set and interpreted by user-space */ - - __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ - __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ - __le32 layout; /* only for raid5 and raid10 currently */ - __le64 size; /* used size of component devices, in 512byte sectors */ - - __le32 chunksize; /* in 512byte sectors */ - __le32 raid_disks; - __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts - * NOTE: signed, so bitmap can be before superblock - * only meaningful of feature_map[0] is set. - */ - - /* These are only valid with feature bit '4' */ - __le32 new_level; /* new level we are reshaping to */ - __le64 reshape_position; /* next address in array-space for reshape */ - __le32 delta_disks; /* change in number of raid_disks */ - __le32 new_layout; /* new layout */ - __le32 new_chunk; /* new chunk size (512byte sectors) */ - __le32 new_offset; /* signed number to add to data_offset in new - * layout. 0 == no-change. This can be - * different on each device in the array. - */ - - /* constant this-device information - 64 bytes */ - __le64 data_offset; /* sector start of data, often 0 */ - __le64 data_size; /* sectors in this device that can be used for data */ - __le64 super_offset; /* sector start of this superblock */ - __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ - __le32 dev_number; /* permanent identifier of this device - not role in raid */ - __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ - __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ - __u8 devflags; /* per-device flags. Only one defined...*/ -#define WriteMostly1 1 /* mask for writemostly flag in above */ - /* Bad block log. If there are any bad blocks the feature flag is set. - * If offset and size are non-zero, that space is reserved and available - */ - __u8 bblog_shift; /* shift from sectors to block size */ - __le16 bblog_size; /* number of sectors reserved for list */ - __le32 bblog_offset; /* sector offset from superblock to bblog, - * signed - not unsigned */ - - /* array state information - 64 bytes */ - __le64 utime; /* 40 bits second, 24 bits microseconds */ - __le64 events; /* incremented when superblock updated */ - __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ - __le32 sb_csum; /* checksum up to devs[max_dev] */ - __le32 max_dev; /* size of devs[] array to consider */ - __u8 pad3[64-32]; /* set to 0 when writing */ - - /* device state information. Indexed by dev_number. - * 2 bytes per device - * Note there are no per-device state flags. State information is rolled - * into the 'roles' value. If a device is spare or faulty, then it doesn't - * have a meaningful role. - */ - __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ -}; - -/* feature_map bits */ -#define MD_FEATURE_BITMAP_OFFSET 1 -#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and - * must be honoured - */ -#define MD_FEATURE_RESHAPE_ACTIVE 4 -#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ -#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an - * active device with same 'role'. - * 'recovery_offset' is also set. - */ -#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number - * of devices, but is going - * backwards anyway. - */ -#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ -#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ - |MD_FEATURE_RECOVERY_OFFSET \ - |MD_FEATURE_RESHAPE_ACTIVE \ - |MD_FEATURE_BAD_BLOCKS \ - |MD_FEATURE_REPLACEMENT \ - |MD_FEATURE_RESHAPE_BACKWARDS \ - |MD_FEATURE_NEW_OFFSET \ - ) - -#endif diff --git a/include/linux/raid/md_u.h b/include/linux/raid/md_u.h index fb1abb3367e9..358c04bfbe2a 100644 --- a/include/linux/raid/md_u.h +++ b/include/linux/raid/md_u.h @@ -11,149 +11,10 @@ (for example /usr/src/linux/COPYING); if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - #ifndef _MD_U_H #define _MD_U_H -/* - * Different major versions are not compatible. - * Different minor versions are only downward compatible. - * Different patchlevel versions are downward and upward compatible. - */ -#define MD_MAJOR_VERSION 0 -#define MD_MINOR_VERSION 90 -/* - * MD_PATCHLEVEL_VERSION indicates kernel functionality. - * >=1 means different superblock formats are selectable using SET_ARRAY_INFO - * and major_version/minor_version accordingly - * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT - * in the super status byte - * >=3 means that bitmap superblock version 4 is supported, which uses - * little-ending representation rather than host-endian - */ -#define MD_PATCHLEVEL_VERSION 3 - -/* ioctls */ - -/* status */ -#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) -#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) -#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) -#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) -#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) -#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) - -/* configuration */ -#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) -#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) -#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) -#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) -#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) -#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) -#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) -#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) -#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) -#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) -#define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a) -#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) +#include -/* usage */ -#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) -/* 0x31 was START_ARRAY */ -#define STOP_ARRAY _IO (MD_MAJOR, 0x32) -#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) -#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) - -/* 63 partitions with the alternate major number (mdp) */ -#define MdpMinorShift 6 -#ifdef __KERNEL__ extern int mdp_major; -#endif - -typedef struct mdu_version_s { - int major; - int minor; - int patchlevel; -} mdu_version_t; - -typedef struct mdu_array_info_s { - /* - * Generic constant information - */ - int major_version; - int minor_version; - int patch_version; - int ctime; - int level; - int size; - int nr_disks; - int raid_disks; - int md_minor; - int not_persistent; - - /* - * Generic state information - */ - int utime; /* 0 Superblock update time */ - int state; /* 1 State bits (clean, ...) */ - int active_disks; /* 2 Number of currently active disks */ - int working_disks; /* 3 Number of working disks */ - int failed_disks; /* 4 Number of failed disks */ - int spare_disks; /* 5 Number of spare disks */ - - /* - * Personality information - */ - int layout; /* 0 the array's physical layout */ - int chunk_size; /* 1 chunk size in bytes */ - -} mdu_array_info_t; - -/* non-obvious values for 'level' */ -#define LEVEL_MULTIPATH (-4) -#define LEVEL_LINEAR (-1) -#define LEVEL_FAULTY (-5) - -/* we need a value for 'no level specified' and 0 - * means 'raid0', so we need something else. This is - * for internal use only - */ -#define LEVEL_NONE (-1000000) - -typedef struct mdu_disk_info_s { - /* - * configuration/status of one particular disk - */ - int number; - int major; - int minor; - int raid_disk; - int state; - -} mdu_disk_info_t; - -typedef struct mdu_start_info_s { - /* - * configuration/status of one particular disk - */ - int major; - int minor; - int raid_disk; - int state; - -} mdu_start_info_t; - -typedef struct mdu_bitmap_file_s -{ - char pathname[4096]; -} mdu_bitmap_file_t; - -typedef struct mdu_param_s -{ - int personality; /* 1,2,3,4 */ - int chunk_size; /* in bytes */ - int max_fault; /* unused for now */ -} mdu_param_t; - #endif - diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild index aafaa5aa54d4..e2c3d25405d7 100644 --- a/include/uapi/linux/raid/Kbuild +++ b/include/uapi/linux/raid/Kbuild @@ -1 +1,3 @@ # UAPI Header export list +header-y += md_p.h +header-y += md_u.h diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h new file mode 100644 index 000000000000..ee753536ab70 --- /dev/null +++ b/include/uapi/linux/raid/md_p.h @@ -0,0 +1,301 @@ +/* + md_p.h : physical layout of Linux RAID devices + Copyright (C) 1996-98 Ingo Molnar, Gadi Oxman + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + You should have received a copy of the GNU General Public License + (for example /usr/src/linux/COPYING); if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef _MD_P_H +#define _MD_P_H + +#include + +/* + * RAID superblock. + * + * The RAID superblock maintains some statistics on each RAID configuration. + * Each real device in the RAID set contains it near the end of the device. + * Some of the ideas are copied from the ext2fs implementation. + * + * We currently use 4096 bytes as follows: + * + * word offset function + * + * 0 - 31 Constant generic RAID device information. + * 32 - 63 Generic state information. + * 64 - 127 Personality specific information. + * 128 - 511 12 32-words descriptors of the disks in the raid set. + * 512 - 911 Reserved. + * 912 - 1023 Disk specific descriptor. + */ + +/* + * If x is the real device size in bytes, we return an apparent size of: + * + * y = (x & ~(MD_RESERVED_BYTES - 1)) - MD_RESERVED_BYTES + * + * and place the 4kB superblock at offset y. + */ +#define MD_RESERVED_BYTES (64 * 1024) +#define MD_RESERVED_SECTORS (MD_RESERVED_BYTES / 512) + +#define MD_NEW_SIZE_SECTORS(x) ((x & ~(MD_RESERVED_SECTORS - 1)) - MD_RESERVED_SECTORS) + +#define MD_SB_BYTES 4096 +#define MD_SB_WORDS (MD_SB_BYTES / 4) +#define MD_SB_SECTORS (MD_SB_BYTES / 512) + +/* + * The following are counted in 32-bit words + */ +#define MD_SB_GENERIC_OFFSET 0 +#define MD_SB_PERSONALITY_OFFSET 64 +#define MD_SB_DISKS_OFFSET 128 +#define MD_SB_DESCRIPTOR_OFFSET 992 + +#define MD_SB_GENERIC_CONSTANT_WORDS 32 +#define MD_SB_GENERIC_STATE_WORDS 32 +#define MD_SB_GENERIC_WORDS (MD_SB_GENERIC_CONSTANT_WORDS + MD_SB_GENERIC_STATE_WORDS) +#define MD_SB_PERSONALITY_WORDS 64 +#define MD_SB_DESCRIPTOR_WORDS 32 +#define MD_SB_DISKS 27 +#define MD_SB_DISKS_WORDS (MD_SB_DISKS*MD_SB_DESCRIPTOR_WORDS) +#define MD_SB_RESERVED_WORDS (1024 - MD_SB_GENERIC_WORDS - MD_SB_PERSONALITY_WORDS - MD_SB_DISKS_WORDS - MD_SB_DESCRIPTOR_WORDS) +#define MD_SB_EQUAL_WORDS (MD_SB_GENERIC_WORDS + MD_SB_PERSONALITY_WORDS + MD_SB_DISKS_WORDS) + +/* + * Device "operational" state bits + */ +#define MD_DISK_FAULTY 0 /* disk is faulty / operational */ +#define MD_DISK_ACTIVE 1 /* disk is running or spare disk */ +#define MD_DISK_SYNC 2 /* disk is in sync with the raid set */ +#define MD_DISK_REMOVED 3 /* disk is in sync with the raid set */ + +#define MD_DISK_WRITEMOSTLY 9 /* disk is "write-mostly" is RAID1 config. + * read requests will only be sent here in + * dire need + */ + +typedef struct mdp_device_descriptor_s { + __u32 number; /* 0 Device number in the entire set */ + __u32 major; /* 1 Device major number */ + __u32 minor; /* 2 Device minor number */ + __u32 raid_disk; /* 3 The role of the device in the raid set */ + __u32 state; /* 4 Operational state */ + __u32 reserved[MD_SB_DESCRIPTOR_WORDS - 5]; +} mdp_disk_t; + +#define MD_SB_MAGIC 0xa92b4efc + +/* + * Superblock state bits + */ +#define MD_SB_CLEAN 0 +#define MD_SB_ERRORS 1 + +#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ + +/* + * Notes: + * - if an array is being reshaped (restriped) in order to change the + * the number of active devices in the array, 'raid_disks' will be + * the larger of the old and new numbers. 'delta_disks' will + * be the "new - old". So if +ve, raid_disks is the new value, and + * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the + * old value and "raid_disks+delta_disks" is the new (smaller) value. + */ + + +typedef struct mdp_superblock_s { + /* + * Constant generic information + */ + __u32 md_magic; /* 0 MD identifier */ + __u32 major_version; /* 1 major version to which the set conforms */ + __u32 minor_version; /* 2 minor version ... */ + __u32 patch_version; /* 3 patchlevel version ... */ + __u32 gvalid_words; /* 4 Number of used words in this section */ + __u32 set_uuid0; /* 5 Raid set identifier */ + __u32 ctime; /* 6 Creation time */ + __u32 level; /* 7 Raid personality */ + __u32 size; /* 8 Apparent size of each individual disk */ + __u32 nr_disks; /* 9 total disks in the raid set */ + __u32 raid_disks; /* 10 disks in a fully functional raid set */ + __u32 md_minor; /* 11 preferred MD minor device number */ + __u32 not_persistent; /* 12 does it have a persistent superblock */ + __u32 set_uuid1; /* 13 Raid set identifier #2 */ + __u32 set_uuid2; /* 14 Raid set identifier #3 */ + __u32 set_uuid3; /* 15 Raid set identifier #4 */ + __u32 gstate_creserved[MD_SB_GENERIC_CONSTANT_WORDS - 16]; + + /* + * Generic state information + */ + __u32 utime; /* 0 Superblock update time */ + __u32 state; /* 1 State bits (clean, ...) */ + __u32 active_disks; /* 2 Number of currently active disks */ + __u32 working_disks; /* 3 Number of working disks */ + __u32 failed_disks; /* 4 Number of failed disks */ + __u32 spare_disks; /* 5 Number of spare disks */ + __u32 sb_csum; /* 6 checksum of the whole superblock */ +#ifdef __BIG_ENDIAN + __u32 events_hi; /* 7 high-order of superblock update count */ + __u32 events_lo; /* 8 low-order of superblock update count */ + __u32 cp_events_hi; /* 9 high-order of checkpoint update count */ + __u32 cp_events_lo; /* 10 low-order of checkpoint update count */ +#else + __u32 events_lo; /* 7 low-order of superblock update count */ + __u32 events_hi; /* 8 high-order of superblock update count */ + __u32 cp_events_lo; /* 9 low-order of checkpoint update count */ + __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ +#endif + __u32 recovery_cp; /* 11 recovery checkpoint sector count */ + /* There are only valid for minor_version > 90 */ + __u64 reshape_position; /* 12,13 next address in array-space for reshape */ + __u32 new_level; /* 14 new level we are reshaping to */ + __u32 delta_disks; /* 15 change in number of raid_disks */ + __u32 new_layout; /* 16 new layout */ + __u32 new_chunk; /* 17 new chunk size (bytes) */ + __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; + + /* + * Personality information + */ + __u32 layout; /* 0 the array's physical layout */ + __u32 chunk_size; /* 1 chunk size in bytes */ + __u32 root_pv; /* 2 LV root PV */ + __u32 root_block; /* 3 LV root block */ + __u32 pstate_reserved[MD_SB_PERSONALITY_WORDS - 4]; + + /* + * Disks information + */ + mdp_disk_t disks[MD_SB_DISKS]; + + /* + * Reserved + */ + __u32 reserved[MD_SB_RESERVED_WORDS]; + + /* + * Active descriptor + */ + mdp_disk_t this_disk; + +} mdp_super_t; + +static inline __u64 md_event(mdp_super_t *sb) { + __u64 ev = sb->events_hi; + return (ev<<32)| sb->events_lo; +} + +#define MD_SUPERBLOCK_1_TIME_SEC_MASK ((1ULL<<40) - 1) + +/* + * The version-1 superblock : + * All numeric fields are little-endian. + * + * total size: 256 bytes plus 2 per device. + * 1K allows 384 devices. + */ +struct mdp_superblock_1 { + /* constant array information - 128 bytes */ + __le32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ + __le32 major_version; /* 1 */ + __le32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ + __le32 pad0; /* always set to 0 when writing */ + + __u8 set_uuid[16]; /* user-space generated. */ + char set_name[32]; /* set and interpreted by user-space */ + + __le64 ctime; /* lo 40 bits are seconds, top 24 are microseconds or 0*/ + __le32 level; /* -4 (multipath), -1 (linear), 0,1,4,5 */ + __le32 layout; /* only for raid5 and raid10 currently */ + __le64 size; /* used size of component devices, in 512byte sectors */ + + __le32 chunksize; /* in 512byte sectors */ + __le32 raid_disks; + __le32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ + + /* These are only valid with feature bit '4' */ + __le32 new_level; /* new level we are reshaping to */ + __le64 reshape_position; /* next address in array-space for reshape */ + __le32 delta_disks; /* change in number of raid_disks */ + __le32 new_layout; /* new layout */ + __le32 new_chunk; /* new chunk size (512byte sectors) */ + __le32 new_offset; /* signed number to add to data_offset in new + * layout. 0 == no-change. This can be + * different on each device in the array. + */ + + /* constant this-device information - 64 bytes */ + __le64 data_offset; /* sector start of data, often 0 */ + __le64 data_size; /* sectors in this device that can be used for data */ + __le64 super_offset; /* sector start of this superblock */ + __le64 recovery_offset;/* sectors before this offset (from data_offset) have been recovered */ + __le32 dev_number; /* permanent identifier of this device - not role in raid */ + __le32 cnt_corrected_read; /* number of read errors that were corrected by re-writing */ + __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ + __u8 devflags; /* per-device flags. Only one defined...*/ +#define WriteMostly1 1 /* mask for writemostly flag in above */ + /* Bad block log. If there are any bad blocks the feature flag is set. + * If offset and size are non-zero, that space is reserved and available + */ + __u8 bblog_shift; /* shift from sectors to block size */ + __le16 bblog_size; /* number of sectors reserved for list */ + __le32 bblog_offset; /* sector offset from superblock to bblog, + * signed - not unsigned */ + + /* array state information - 64 bytes */ + __le64 utime; /* 40 bits second, 24 bits microseconds */ + __le64 events; /* incremented when superblock updated */ + __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ + __le32 sb_csum; /* checksum up to devs[max_dev] */ + __le32 max_dev; /* size of devs[] array to consider */ + __u8 pad3[64-32]; /* set to 0 when writing */ + + /* device state information. Indexed by dev_number. + * 2 bytes per device + * Note there are no per-device state flags. State information is rolled + * into the 'roles' value. If a device is spare or faulty, then it doesn't + * have a meaningful role. + */ + __le16 dev_roles[0]; /* role in array, or 0xffff for a spare, or 0xfffe for faulty */ +}; + +/* feature_map bits */ +#define MD_FEATURE_BITMAP_OFFSET 1 +#define MD_FEATURE_RECOVERY_OFFSET 2 /* recovery_offset is present and + * must be honoured + */ +#define MD_FEATURE_RESHAPE_ACTIVE 4 +#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ +#define MD_FEATURE_REPLACEMENT 16 /* This device is replacing an + * active device with same 'role'. + * 'recovery_offset' is also set. + */ +#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number + * of devices, but is going + * backwards anyway. + */ +#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ +#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ + |MD_FEATURE_RECOVERY_OFFSET \ + |MD_FEATURE_RESHAPE_ACTIVE \ + |MD_FEATURE_BAD_BLOCKS \ + |MD_FEATURE_REPLACEMENT \ + |MD_FEATURE_RESHAPE_BACKWARDS \ + |MD_FEATURE_NEW_OFFSET \ + ) + +#endif diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h new file mode 100644 index 000000000000..4133e744e4e6 --- /dev/null +++ b/include/uapi/linux/raid/md_u.h @@ -0,0 +1,155 @@ +/* + md_u.h : user <=> kernel API between Linux raidtools and RAID drivers + Copyright (C) 1998 Ingo Molnar + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + You should have received a copy of the GNU General Public License + (for example /usr/src/linux/COPYING); if not, write to the Free + Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifndef _UAPI_MD_U_H +#define _UAPI_MD_U_H + +/* + * Different major versions are not compatible. + * Different minor versions are only downward compatible. + * Different patchlevel versions are downward and upward compatible. + */ +#define MD_MAJOR_VERSION 0 +#define MD_MINOR_VERSION 90 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + * >=3 means that bitmap superblock version 4 is supported, which uses + * little-ending representation rather than host-endian + */ +#define MD_PATCHLEVEL_VERSION 3 + +/* ioctls */ + +/* status */ +#define RAID_VERSION _IOR (MD_MAJOR, 0x10, mdu_version_t) +#define GET_ARRAY_INFO _IOR (MD_MAJOR, 0x11, mdu_array_info_t) +#define GET_DISK_INFO _IOR (MD_MAJOR, 0x12, mdu_disk_info_t) +#define PRINT_RAID_DEBUG _IO (MD_MAJOR, 0x13) +#define RAID_AUTORUN _IO (MD_MAJOR, 0x14) +#define GET_BITMAP_FILE _IOR (MD_MAJOR, 0x15, mdu_bitmap_file_t) + +/* configuration */ +#define CLEAR_ARRAY _IO (MD_MAJOR, 0x20) +#define ADD_NEW_DISK _IOW (MD_MAJOR, 0x21, mdu_disk_info_t) +#define HOT_REMOVE_DISK _IO (MD_MAJOR, 0x22) +#define SET_ARRAY_INFO _IOW (MD_MAJOR, 0x23, mdu_array_info_t) +#define SET_DISK_INFO _IO (MD_MAJOR, 0x24) +#define WRITE_RAID_INFO _IO (MD_MAJOR, 0x25) +#define UNPROTECT_ARRAY _IO (MD_MAJOR, 0x26) +#define PROTECT_ARRAY _IO (MD_MAJOR, 0x27) +#define HOT_ADD_DISK _IO (MD_MAJOR, 0x28) +#define SET_DISK_FAULTY _IO (MD_MAJOR, 0x29) +#define HOT_GENERATE_ERROR _IO (MD_MAJOR, 0x2a) +#define SET_BITMAP_FILE _IOW (MD_MAJOR, 0x2b, int) + +/* usage */ +#define RUN_ARRAY _IOW (MD_MAJOR, 0x30, mdu_param_t) +/* 0x31 was START_ARRAY */ +#define STOP_ARRAY _IO (MD_MAJOR, 0x32) +#define STOP_ARRAY_RO _IO (MD_MAJOR, 0x33) +#define RESTART_ARRAY_RW _IO (MD_MAJOR, 0x34) + +/* 63 partitions with the alternate major number (mdp) */ +#define MdpMinorShift 6 + +typedef struct mdu_version_s { + int major; + int minor; + int patchlevel; +} mdu_version_t; + +typedef struct mdu_array_info_s { + /* + * Generic constant information + */ + int major_version; + int minor_version; + int patch_version; + int ctime; + int level; + int size; + int nr_disks; + int raid_disks; + int md_minor; + int not_persistent; + + /* + * Generic state information + */ + int utime; /* 0 Superblock update time */ + int state; /* 1 State bits (clean, ...) */ + int active_disks; /* 2 Number of currently active disks */ + int working_disks; /* 3 Number of working disks */ + int failed_disks; /* 4 Number of failed disks */ + int spare_disks; /* 5 Number of spare disks */ + + /* + * Personality information + */ + int layout; /* 0 the array's physical layout */ + int chunk_size; /* 1 chunk size in bytes */ + +} mdu_array_info_t; + +/* non-obvious values for 'level' */ +#define LEVEL_MULTIPATH (-4) +#define LEVEL_LINEAR (-1) +#define LEVEL_FAULTY (-5) + +/* we need a value for 'no level specified' and 0 + * means 'raid0', so we need something else. This is + * for internal use only + */ +#define LEVEL_NONE (-1000000) + +typedef struct mdu_disk_info_s { + /* + * configuration/status of one particular disk + */ + int number; + int major; + int minor; + int raid_disk; + int state; + +} mdu_disk_info_t; + +typedef struct mdu_start_info_s { + /* + * configuration/status of one particular disk + */ + int major; + int minor; + int raid_disk; + int state; + +} mdu_start_info_t; + +typedef struct mdu_bitmap_file_s +{ + char pathname[4096]; +} mdu_bitmap_file_t; + +typedef struct mdu_param_s +{ + int personality; /* 1,2,3,4 */ + int chunk_size; /* in bytes */ + int max_fault; /* unused for now */ +} mdu_param_t; + +#endif /* _UAPI_MD_U_H */ -- cgit v1.2.3 From 9b395bc3be1cebf0144a127c7e67d56dbdac0930 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 26 Oct 2012 00:36:40 +0200 Subject: mac80211: verify that skb data is present A number of places in the mesh code don't check that the frame data is present and in the skb header when trying to access. Add those checks and the necessary pskb_may_pull() calls. This prevents accessing data that doesn't actually exist. To do this, export ieee80211_get_mesh_hdrlen() to be able to use it in mac80211. Cc: stable@vger.kernel.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 9 +++++++++ net/mac80211/rx.c | 32 +++++++++++++++++++++++++++++++- net/wireless/util.c | 3 ++- 3 files changed, 42 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f8cd4cf3fad8..7d5b6000378b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2651,6 +2651,15 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * ieee80211_get_mesh_hdrlen - get mesh extension header length + * @meshhdr: the mesh extension header, only the flags field + * (first byte) will be accessed + * Returns the length of the extension header, which is always at + * least 6 bytes and at most 18 if address 5 and 6 are present. + */ +unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); + /** * DOC: Data path helpers * diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 99cdee16e31b..265a032dec49 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -531,6 +531,11 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (ieee80211_is_action(hdr->frame_control)) { u8 category; + + /* make sure category field is present */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + return RX_DROP_MONITOR; + mgmt = (struct ieee80211_mgmt *)hdr; category = mgmt->u.action.category; if (category != WLAN_CATEGORY_MESH_ACTION && @@ -1892,6 +1897,20 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* make sure fixed part of mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, hdrlen + 6)) + return RX_DROP_MONITOR; + + mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); + + /* make sure full mesh header is there, also checks skb len */ + if (!pskb_may_pull(rx->skb, + hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr))) + return RX_DROP_MONITOR; + + /* reload pointers */ + hdr = (struct ieee80211_hdr *) skb->data; mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen); /* frame is in RMC, don't forward */ @@ -1915,9 +1934,12 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (is_multicast_ether_addr(hdr->addr1)) { mpp_addr = hdr->addr3; proxied_addr = mesh_hdr->eaddr1; - } else { + } else if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6) { + /* has_a4 already checked in ieee80211_rx_mesh_check */ mpp_addr = hdr->addr4; proxied_addr = mesh_hdr->eaddr2; + } else { + return RX_DROP_MONITOR; } rcu_read_lock(); @@ -2354,6 +2376,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_SELF_PROTECTED: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.self_prot.action_code))) + break; + switch (mgmt->u.action.u.self_prot.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: @@ -2372,6 +2398,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_MESH_ACTION: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.mesh_action.action_code))) + break; + if (!ieee80211_vif_is_mesh(&sdata->vif)) break; if (mesh_action_is_path_sel(mgmt) && diff --git a/net/wireless/util.c b/net/wireless/util.c index 45a09de1ffe3..2762e8329986 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -309,7 +309,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb) } EXPORT_SYMBOL(ieee80211_get_hdrlen_from_skb); -static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) +unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) { int ae = meshhdr->flags & MESH_FLAGS_AE; /* 802.11-2012, 8.2.4.7.3 */ @@ -323,6 +323,7 @@ static int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) return 18; } } +EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen); int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) -- cgit v1.2.3 From a0830dbd4e42b38aefdf3fb61ba5019a1a99ea85 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 16 Oct 2012 13:05:59 +0200 Subject: ALSA: Add a reference counter to card instance For more strict protection for wild disconnections, a refcount is introduced to the card instance, and let it up/down when an object is referred via snd_lookup_*() in the open ops. The free-after-last-close check is also changed to check this refcount instead of the empty list, too. Reported-by: Matthieu CASTET Cc: Signed-off-by: Takashi Iwai --- include/sound/core.h | 3 +++ sound/core/compress_offload.c | 9 ++++++-- sound/core/control.c | 3 +++ sound/core/hwdep.c | 5 ++++- sound/core/init.c | 50 ++++++++++++++++++++++++++----------------- sound/core/oss/mixer_oss.c | 10 +++++++-- sound/core/oss/pcm_oss.c | 2 ++ sound/core/pcm_native.c | 9 ++++++-- sound/core/rawmidi.c | 6 +++++- sound/core/sound.c | 11 ++++++++-- sound/core/sound_oss.c | 10 +++++++-- 11 files changed, 86 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/sound/core.h b/include/sound/core.h index bc056687f647..93896ad1fcdd 100644 --- a/include/sound/core.h +++ b/include/sound/core.h @@ -132,6 +132,7 @@ struct snd_card { int shutdown; /* this card is going down */ int free_on_last_close; /* free in context of file_release */ wait_queue_head_t shutdown_sleep; + atomic_t refcount; /* refcount for disconnection */ struct device *dev; /* device assigned to this card */ struct device *card_dev; /* cardX object for sysfs */ @@ -189,6 +190,7 @@ struct snd_minor { const struct file_operations *f_ops; /* file operations */ void *private_data; /* private data for f_ops->open */ struct device *dev; /* device for sysfs */ + struct snd_card *card_ptr; /* assigned card instance */ }; /* return a device pointer linked to each sound device as a parent */ @@ -295,6 +297,7 @@ int snd_card_info_done(void); int snd_component_add(struct snd_card *card, const char *component); int snd_card_file_add(struct snd_card *card, struct file *file); int snd_card_file_remove(struct snd_card *card, struct file *file); +void snd_card_unref(struct snd_card *card); #define snd_card_set_dev(card, devptr) ((card)->dev = (devptr)) diff --git a/sound/core/compress_offload.c b/sound/core/compress_offload.c index c40ae573346d..ad11dc994792 100644 --- a/sound/core/compress_offload.c +++ b/sound/core/compress_offload.c @@ -100,12 +100,15 @@ static int snd_compr_open(struct inode *inode, struct file *f) if (dirn != compr->direction) { pr_err("this device doesn't support this direction\n"); + snd_card_unref(compr->card); return -EINVAL; } data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) + if (!data) { + snd_card_unref(compr->card); return -ENOMEM; + } data->stream.ops = compr->ops; data->stream.direction = dirn; data->stream.private_data = compr->private_data; @@ -113,6 +116,7 @@ static int snd_compr_open(struct inode *inode, struct file *f) runtime = kzalloc(sizeof(*runtime), GFP_KERNEL); if (!runtime) { kfree(data); + snd_card_unref(compr->card); return -ENOMEM; } runtime->state = SNDRV_PCM_STATE_OPEN; @@ -126,7 +130,8 @@ static int snd_compr_open(struct inode *inode, struct file *f) kfree(runtime); kfree(data); } - return ret; + snd_card_unref(compr->card); + return 0; } static int snd_compr_free(struct inode *inode, struct file *f) diff --git a/sound/core/control.c b/sound/core/control.c index 7e86a5b9f3b5..9768a3963c8f 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -86,6 +86,7 @@ static int snd_ctl_open(struct inode *inode, struct file *file) write_lock_irqsave(&card->ctl_files_rwlock, flags); list_add_tail(&ctl->list, &card->ctl_files); write_unlock_irqrestore(&card->ctl_files_rwlock, flags); + snd_card_unref(card); return 0; __error: @@ -93,6 +94,8 @@ static int snd_ctl_open(struct inode *inode, struct file *file) __error2: snd_card_file_remove(card, file); __error1: + if (card) + snd_card_unref(card); return err; } diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index 75ea16f35b1a..53a6ba5ad615 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -100,8 +100,10 @@ static int snd_hwdep_open(struct inode *inode, struct file * file) if (hw == NULL) return -ENODEV; - if (!try_module_get(hw->card->module)) + if (!try_module_get(hw->card->module)) { + snd_card_unref(hw->card); return -EFAULT; + } init_waitqueue_entry(&wait, current); add_wait_queue(&hw->open_wait, &wait); @@ -148,6 +150,7 @@ static int snd_hwdep_open(struct inode *inode, struct file * file) mutex_unlock(&hw->open_mutex); if (err < 0) module_put(hw->card->module); + snd_card_unref(hw->card); return err; } diff --git a/sound/core/init.c b/sound/core/init.c index d8ec849af128..7b012d15c2cf 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -213,6 +213,7 @@ int snd_card_create(int idx, const char *xid, spin_lock_init(&card->files_lock); INIT_LIST_HEAD(&card->files_list); init_waitqueue_head(&card->shutdown_sleep); + atomic_set(&card->refcount, 0); #ifdef CONFIG_PM mutex_init(&card->power_lock); init_waitqueue_head(&card->power_sleep); @@ -446,21 +447,36 @@ static int snd_card_do_free(struct snd_card *card) return 0; } +/** + * snd_card_unref - release the reference counter + * @card: the card instance + * + * Decrements the reference counter. When it reaches to zero, wake up + * the sleeper and call the destructor if needed. + */ +void snd_card_unref(struct snd_card *card) +{ + if (atomic_dec_and_test(&card->refcount)) { + wake_up(&card->shutdown_sleep); + if (card->free_on_last_close) + snd_card_do_free(card); + } +} +EXPORT_SYMBOL(snd_card_unref); + int snd_card_free_when_closed(struct snd_card *card) { - int free_now = 0; - int ret = snd_card_disconnect(card); - if (ret) - return ret; + int ret; - spin_lock(&card->files_lock); - if (list_empty(&card->files_list)) - free_now = 1; - else - card->free_on_last_close = 1; - spin_unlock(&card->files_lock); + atomic_inc(&card->refcount); + ret = snd_card_disconnect(card); + if (ret) { + atomic_dec(&card->refcount); + return ret; + } - if (free_now) + card->free_on_last_close = 1; + if (atomic_dec_and_test(&card->refcount)) snd_card_do_free(card); return 0; } @@ -474,7 +490,7 @@ int snd_card_free(struct snd_card *card) return ret; /* wait, until all devices are ready for the free operation */ - wait_event(card->shutdown_sleep, list_empty(&card->files_list)); + wait_event(card->shutdown_sleep, !atomic_read(&card->refcount)); snd_card_do_free(card); return 0; } @@ -886,6 +902,7 @@ int snd_card_file_add(struct snd_card *card, struct file *file) return -ENODEV; } list_add(&mfile->list, &card->files_list); + atomic_inc(&card->refcount); spin_unlock(&card->files_lock); return 0; } @@ -908,7 +925,6 @@ EXPORT_SYMBOL(snd_card_file_add); int snd_card_file_remove(struct snd_card *card, struct file *file) { struct snd_monitor_file *mfile, *found = NULL; - int last_close = 0; spin_lock(&card->files_lock); list_for_each_entry(mfile, &card->files_list, list) { @@ -923,19 +939,13 @@ int snd_card_file_remove(struct snd_card *card, struct file *file) break; } } - if (list_empty(&card->files_list)) - last_close = 1; spin_unlock(&card->files_lock); - if (last_close) { - wake_up(&card->shutdown_sleep); - if (card->free_on_last_close) - snd_card_do_free(card); - } if (!found) { snd_printk(KERN_ERR "ALSA card file remove problem (%p)\n", file); return -ENOENT; } kfree(found); + snd_card_unref(card); return 0; } diff --git a/sound/core/oss/mixer_oss.c b/sound/core/oss/mixer_oss.c index 29f6ded02555..a9a2e63c0222 100644 --- a/sound/core/oss/mixer_oss.c +++ b/sound/core/oss/mixer_oss.c @@ -52,14 +52,19 @@ static int snd_mixer_oss_open(struct inode *inode, struct file *file) SNDRV_OSS_DEVICE_TYPE_MIXER); if (card == NULL) return -ENODEV; - if (card->mixer_oss == NULL) + if (card->mixer_oss == NULL) { + snd_card_unref(card); return -ENODEV; + } err = snd_card_file_add(card, file); - if (err < 0) + if (err < 0) { + snd_card_unref(card); return err; + } fmixer = kzalloc(sizeof(*fmixer), GFP_KERNEL); if (fmixer == NULL) { snd_card_file_remove(card, file); + snd_card_unref(card); return -ENOMEM; } fmixer->card = card; @@ -68,6 +73,7 @@ static int snd_mixer_oss_open(struct inode *inode, struct file *file) if (!try_module_get(card->module)) { kfree(fmixer); snd_card_file_remove(card, file); + snd_card_unref(card); return -EFAULT; } return 0; diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 08fde0060fd9..2529e01538e9 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -2457,6 +2457,8 @@ static int snd_pcm_oss_open(struct inode *inode, struct file *file) __error2: snd_card_file_remove(pcm->card, file); __error1: + if (pcm) + snd_card_unref(pcm->card); return err; } diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index 8753c89f3290..48c6a70ad69e 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1642,6 +1642,7 @@ static int snd_pcm_link(struct snd_pcm_substream *substream, int fd) write_unlock_irq(&snd_pcm_link_rwlock); up_write(&snd_pcm_link_rwsem); _nolock: + snd_card_unref(substream1->pcm->card); fput_light(file, fput_needed); if (res < 0) kfree(group); @@ -2116,7 +2117,9 @@ static int snd_pcm_playback_open(struct inode *inode, struct file *file) return err; pcm = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_PCM_PLAYBACK); - return snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK); + err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_PLAYBACK); + snd_card_unref(pcm->card); + return err; } static int snd_pcm_capture_open(struct inode *inode, struct file *file) @@ -2127,7 +2130,9 @@ static int snd_pcm_capture_open(struct inode *inode, struct file *file) return err; pcm = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_PCM_CAPTURE); - return snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE); + err = snd_pcm_open(file, pcm, SNDRV_PCM_STREAM_CAPTURE); + snd_card_unref(pcm->card); + return err; } static int snd_pcm_open(struct file *file, struct snd_pcm *pcm, int stream) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index ebf6e49ad3d4..7d4f62ab6711 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -379,8 +379,10 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) if (rmidi == NULL) return -ENODEV; - if (!try_module_get(rmidi->card->module)) + if (!try_module_get(rmidi->card->module)) { + snd_card_unref(rmidi->card); return -ENXIO; + } mutex_lock(&rmidi->open_mutex); card = rmidi->card; @@ -440,6 +442,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) #endif file->private_data = rawmidi_file; mutex_unlock(&rmidi->open_mutex); + snd_card_unref(rmidi->card); return 0; __error: @@ -447,6 +450,7 @@ static int snd_rawmidi_open(struct inode *inode, struct file *file) __error_card: mutex_unlock(&rmidi->open_mutex); module_put(rmidi->card->module); + snd_card_unref(rmidi->card); return err; } diff --git a/sound/core/sound.c b/sound/core/sound.c index 643976000ce8..89780c323f19 100644 --- a/sound/core/sound.c +++ b/sound/core/sound.c @@ -98,6 +98,10 @@ static void snd_request_other(int minor) * * Checks that a minor device with the specified type is registered, and returns * its user data pointer. + * + * This function increments the reference counter of the card instance + * if an associated instance with the given minor number and type is found. + * The caller must call snd_card_unref() appropriately later. */ void *snd_lookup_minor_data(unsigned int minor, int type) { @@ -108,9 +112,11 @@ void *snd_lookup_minor_data(unsigned int minor, int type) return NULL; mutex_lock(&sound_mutex); mreg = snd_minors[minor]; - if (mreg && mreg->type == type) + if (mreg && mreg->type == type) { private_data = mreg->private_data; - else + if (mreg->card_ptr) + atomic_inc(&mreg->card_ptr->refcount); + } else private_data = NULL; mutex_unlock(&sound_mutex); return private_data; @@ -275,6 +281,7 @@ int snd_register_device_for_dev(int type, struct snd_card *card, int dev, preg->device = dev; preg->f_ops = f_ops; preg->private_data = private_data; + preg->card_ptr = card; mutex_lock(&sound_mutex); #ifdef CONFIG_SND_DYNAMIC_MINORS minor = snd_find_free_minor(type); diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c index e9528333e36d..e1d79ee35906 100644 --- a/sound/core/sound_oss.c +++ b/sound/core/sound_oss.c @@ -40,6 +40,9 @@ static struct snd_minor *snd_oss_minors[SNDRV_OSS_MINORS]; static DEFINE_MUTEX(sound_oss_mutex); +/* NOTE: This function increments the refcount of the associated card like + * snd_lookup_minor_data(); the caller must call snd_card_unref() appropriately + */ void *snd_lookup_oss_minor_data(unsigned int minor, int type) { struct snd_minor *mreg; @@ -49,9 +52,11 @@ void *snd_lookup_oss_minor_data(unsigned int minor, int type) return NULL; mutex_lock(&sound_oss_mutex); mreg = snd_oss_minors[minor]; - if (mreg && mreg->type == type) + if (mreg && mreg->type == type) { private_data = mreg->private_data; - else + if (mreg->card_ptr) + atomic_inc(&mreg->card_ptr->refcount); + } else private_data = NULL; mutex_unlock(&sound_oss_mutex); return private_data; @@ -123,6 +128,7 @@ int snd_register_oss_device(int type, struct snd_card *card, int dev, preg->device = dev; preg->f_ops = f_ops; preg->private_data = private_data; + preg->card_ptr = card; mutex_lock(&sound_oss_mutex); snd_oss_minors[minor] = preg; minor_unit = SNDRV_MINOR_OSS_DEVICE(minor); -- cgit v1.2.3 From 95a7d76897c1e7243d4137037c66d15cbf2cce76 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 31 Oct 2012 12:38:31 -0400 Subject: xen/mmu: Use Xen specific TLB flush instead of the generic one. As Mukesh explained it, the MMUEXT_TLB_FLUSH_ALL allows the hypervisor to do a TLB flush on all active vCPUs. If instead we were using the generic one (which ends up being xen_flush_tlb) we end up making the MMUEXT_TLB_FLUSH_LOCAL hypercall. But before we make that hypercall the kernel will IPI all of the vCPUs (even those that were asleep from the hypervisor perspective). The end result is that we needlessly wake them up and do a TLB flush when we can just let the hypervisor do it correctly. This patch gives around 50% speed improvement when migrating idle guest's from one host to another. Oracle-bug: 14630170 CC: stable@vger.kernel.org Tested-by: Jingjie Jiang Suggested-by: Mukesh Rathor Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 21 ++++++++++++++++++++- include/trace/events/xen.h | 8 ++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6226c99729b9..dcf5f2dd91ec 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1288,6 +1288,25 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } +void xen_flush_tlb_all(void) +{ + struct mmuext_op *op; + struct multicall_space mcs; + + trace_xen_mmu_flush_tlb_all(0); + + preempt_disable(); + + mcs = xen_mc_entry(sizeof(*op)); + + op = mcs.args; + op->cmd = MMUEXT_TLB_FLUSH_ALL; + MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); + + xen_mc_issue(PARAVIRT_LAZY_MMU); + + preempt_enable(); +} static void xen_flush_tlb(void) { struct mmuext_op *op; @@ -2518,7 +2537,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma, err = 0; out: - flush_tlb_all(); + xen_flush_tlb_all(); return err; } diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 15ba03bdd7c6..d06b6da5c1e3 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -377,6 +377,14 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd, DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin); DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin); +TRACE_EVENT(xen_mmu_flush_tlb_all, + TP_PROTO(int x), + TP_ARGS(x), + TP_STRUCT__entry(__array(char, x, 0)), + TP_fast_assign((void)x), + TP_printk("%s", "") + ); + TRACE_EVENT(xen_mmu_flush_tlb, TP_PROTO(int x), TP_ARGS(x), -- cgit v1.2.3 From 87da7e66a40532b743cd50972fcf85a1f15b14ea Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 24 Oct 2012 14:07:59 +0800 Subject: KVM: x86: fix vcpu->mmio_fragments overflow After commit b3356bf0dbb349 (KVM: emulator: optimize "rep ins" handling), the pieces of io data can be collected and write them to the guest memory or MMIO together Unfortunately, kvm splits the mmio access into 8 bytes and store them to vcpu->mmio_fragments. If the guest uses "rep ins" to move large data, it will cause vcpu->mmio_fragments overflow The bug can be exposed by isapc (-M isapc): [23154.818733] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC [ ......] [23154.858083] Call Trace: [23154.859874] [] kvm_get_cr8+0x1d/0x28 [kvm] [23154.861677] [] kvm_arch_vcpu_ioctl_run+0xcda/0xe45 [kvm] [23154.863604] [] ? kvm_arch_vcpu_load+0x17b/0x180 [kvm] Actually, we can use one mmio_fragment to store a large mmio access then split it when we pass the mmio-exit-info to userspace. After that, we only need two entries to store mmio info for the cross-mmio pages access Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 60 +++++++++++++++++++++++++++--------------------- include/linux/kvm_host.h | 15 ++---------- 2 files changed, 36 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1eefebe5d727..224a7e78cb6c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3779,7 +3779,7 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, { struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0]; - memcpy(vcpu->run->mmio.data, frag->data, frag->len); + memcpy(vcpu->run->mmio.data, frag->data, min(8u, frag->len)); return X86EMUL_CONTINUE; } @@ -3832,18 +3832,11 @@ mmio: bytes -= handled; val += handled; - while (bytes) { - unsigned now = min(bytes, 8U); - - frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; - frag->gpa = gpa; - frag->data = val; - frag->len = now; - - gpa += now; - val += now; - bytes -= now; - } + WARN_ON(vcpu->mmio_nr_fragments >= KVM_MAX_MMIO_FRAGMENTS); + frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; + frag->gpa = gpa; + frag->data = val; + frag->len = bytes; return X86EMUL_CONTINUE; } @@ -3890,7 +3883,7 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, vcpu->mmio_needed = 1; vcpu->mmio_cur_fragment = 0; - vcpu->run->mmio.len = vcpu->mmio_fragments[0].len; + vcpu->run->mmio.len = min(8u, vcpu->mmio_fragments[0].len); vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write; vcpu->run->exit_reason = KVM_EXIT_MMIO; vcpu->run->mmio.phys_addr = gpa; @@ -5522,28 +5515,44 @@ static int complete_emulated_pio(struct kvm_vcpu *vcpu) * * read: * for each fragment - * write gpa, len - * exit - * copy data + * for each mmio piece in the fragment + * write gpa, len + * exit + * copy data * execute insn * * write: * for each fragment - * write gpa, len - * copy data - * exit + * for each mmio piece in the fragment + * write gpa, len + * copy data + * exit */ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; struct kvm_mmio_fragment *frag; + unsigned len; BUG_ON(!vcpu->mmio_needed); /* Complete previous fragment */ - frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment]; + len = min(8u, frag->len); if (!vcpu->mmio_is_write) - memcpy(frag->data, run->mmio.data, frag->len); + memcpy(frag->data, run->mmio.data, len); + + if (frag->len <= 8) { + /* Switch to the next fragment. */ + frag++; + vcpu->mmio_cur_fragment++; + } else { + /* Go forward to the next mmio piece. */ + frag->data += len; + frag->gpa += len; + frag->len -= len; + } + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { vcpu->mmio_needed = 0; if (vcpu->mmio_is_write) @@ -5551,13 +5560,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) vcpu->mmio_read_completed = 1; return complete_emulated_io(vcpu); } - /* Initiate next fragment */ - ++frag; + run->exit_reason = KVM_EXIT_MMIO; run->mmio.phys_addr = frag->gpa; if (vcpu->mmio_is_write) - memcpy(run->mmio.data, frag->data, frag->len); - run->mmio.len = frag->len; + memcpy(run->mmio.data, frag->data, min(8u, frag->len)); + run->mmio.len = min(8u, frag->len); run->mmio.is_write = vcpu->mmio_is_write; vcpu->arch.complete_userspace_io = complete_emulated_mmio; return 0; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93bfc9f9815c..ecc554374e44 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -42,19 +42,8 @@ */ #define KVM_MEMSLOT_INVALID (1UL << 16) -/* - * If we support unaligned MMIO, at most one fragment will be split into two: - */ -#ifdef KVM_UNALIGNED_MMIO -# define KVM_EXTRA_MMIO_FRAGMENTS 1 -#else -# define KVM_EXTRA_MMIO_FRAGMENTS 0 -#endif - -#define KVM_USER_MMIO_SIZE 8 - -#define KVM_MAX_MMIO_FRAGMENTS \ - (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) +/* Two fragments for cross MMIO pages. */ +#define KVM_MAX_MMIO_FRAGMENTS 2 /* * For the normal pfn, the highest 12 bits should be zero, -- cgit v1.2.3 From d9b482c8ba1973a189f2d4c8175d405b87fbf2d7 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Tue, 30 Oct 2012 14:45:57 -0400 Subject: hashtable: introduce a small and naive hashtable This hashtable implementation is using hlist buckets to provide a simple hashtable to prevent it from getting reimplemented all over the kernel. Signed-off-by: Sasha Levin [ Merging this now, so that subsystems can start applying Sasha's patches that use this - Linus ] Signed-off-by: Linus Torvalds --- include/linux/hashtable.h | 192 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 include/linux/hashtable.h (limited to 'include') diff --git a/include/linux/hashtable.h b/include/linux/hashtable.h new file mode 100644 index 000000000000..227c62424f3c --- /dev/null +++ b/include/linux/hashtable.h @@ -0,0 +1,192 @@ +/* + * Statically sized hash table implementation + * (C) 2012 Sasha Levin + */ + +#ifndef _LINUX_HASHTABLE_H +#define _LINUX_HASHTABLE_H + +#include +#include +#include +#include +#include + +#define DEFINE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] = \ + { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT } + +#define DECLARE_HASHTABLE(name, bits) \ + struct hlist_head name[1 << (bits)] + +#define HASH_SIZE(name) (ARRAY_SIZE(name)) +#define HASH_BITS(name) ilog2(HASH_SIZE(name)) + +/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */ +#define hash_min(val, bits) \ + (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits)) + +static inline void __hash_init(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); +} + +/** + * hash_init - initialize a hash table + * @hashtable: hashtable to be initialized + * + * Calculates the size of the hashtable from the given parameter, otherwise + * same as hash_init_size. + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_add - add an object to a hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add(hashtable, node, key) \ + hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_add_rcu - add an object to a rcu enabled hashtable + * @hashtable: hashtable to add to + * @node: the &struct hlist_node of the object to be added + * @key: the key of the object to be added + */ +#define hash_add_rcu(hashtable, node, key) \ + hlist_add_head_rcu(node, &hashtable[hash_min(key, HASH_BITS(hashtable))]) + +/** + * hash_hashed - check whether an object is in any hashtable + * @node: the &struct hlist_node of the object to be checked + */ +static inline bool hash_hashed(struct hlist_node *node) +{ + return !hlist_unhashed(node); +} + +static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz) +{ + unsigned int i; + + for (i = 0; i < sz; i++) + if (!hlist_empty(&ht[i])) + return false; + + return true; +} + +/** + * hash_empty - check whether a hashtable is empty + * @hashtable: hashtable to check + * + * This has to be a macro since HASH_BITS() will not work on pointers since + * it calculates the size during preprocessing. + */ +#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable)) + +/** + * hash_del - remove an object from a hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del(struct hlist_node *node) +{ + hlist_del_init(node); +} + +/** + * hash_del_rcu - remove an object from a rcu enabled hashtable + * @node: &struct hlist_node of the object to remove + */ +static inline void hash_del_rcu(struct hlist_node *node) +{ + hlist_del_init_rcu(node); +} + +/** + * hash_for_each - iterate over a hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @node: the &struct list_head to use as a loop cursor for each entry + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each(name, bkt, node, obj, member) \ + for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ + hlist_for_each_entry(obj, node, &name[bkt], member) + +/** + * hash_for_each_rcu - iterate over a rcu enabled hashtable + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @node: the &struct list_head to use as a loop cursor for each entry + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_rcu(name, bkt, node, obj, member) \ + for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ + hlist_for_each_entry_rcu(obj, node, &name[bkt], member) + +/** + * hash_for_each_safe - iterate over a hashtable safe against removal of + * hash entry + * @name: hashtable to iterate + * @bkt: integer to use as bucket loop cursor + * @node: the &struct list_head to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @obj: the type * to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + */ +#define hash_for_each_safe(name, bkt, node, tmp, obj, member) \ + for ((bkt) = 0, node = NULL; node == NULL && (bkt) < HASH_SIZE(name); (bkt)++)\ + hlist_for_each_entry_safe(obj, node, tmp, &name[bkt], member) + +/** + * hash_for_each_possible - iterate over all possible objects hashing to the + * same bucket + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @node: the &struct list_head to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible(name, obj, node, member, key) \ + hlist_for_each_entry(obj, node, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_rcu - iterate over all possible objects hashing to the + * same bucket in an rcu enabled hashtable + * in a rcu enabled hashtable + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @node: the &struct list_head to use as a loop cursor for each entry + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_rcu(name, obj, node, member, key) \ + hlist_for_each_entry_rcu(obj, node, &name[hash_min(key, HASH_BITS(name))], member) + +/** + * hash_for_each_possible_safe - iterate over all possible objects hashing to the + * same bucket safe against removals + * @name: hashtable to iterate + * @obj: the type * to use as a loop cursor for each entry + * @node: the &struct list_head to use as a loop cursor for each entry + * @tmp: a &struct used for temporary storage + * @member: the name of the hlist_node within the struct + * @key: the key of the objects to iterate over + */ +#define hash_for_each_possible_safe(name, obj, node, tmp, member, key) \ + hlist_for_each_entry_safe(obj, node, tmp, \ + &name[hash_min(key, HASH_BITS(name))], member) + + +#endif -- cgit v1.2.3 From 87f4d7c1d36f44b0822053b7e5dedc31fdd0ab99 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Thu, 1 Nov 2012 12:30:16 +0000 Subject: ptp: update adjfreq callback description This patch updates the adjfreq callback description to include a note that the delta in ppb is always relative to the base frequency, and not to the current frequency of the hardware clock. Signed-off-by: Jacob Keller CC: stable@vger.kernel.org [v3.5+] CC: Richard Cochran CC: John Stultz Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- include/linux/ptp_clock_kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index f2dc6d8fc680..38a993508327 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -54,7 +54,8 @@ struct ptp_clock_request { * clock operations * * @adjfreq: Adjusts the frequency of the hardware clock. - * parameter delta: Desired period change in parts per billion. + * parameter delta: Desired frequency offset from nominal frequency + * in parts per billion * * @adjtime: Shifts the time of the hardware clock. * parameter delta: Desired change in nanoseconds. -- cgit v1.2.3 From 6d877e6b85691e0b2b22e90aeb9b86c3dafcfc6b Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 19 Oct 2012 15:01:46 -0400 Subject: xen/hvm: If we fail to fetch an HVM parameter print out which flag it is. Makes it easier to troubleshoot in the field. Acked-by: Ian Campbell [v1: Use macro per Ian's suggestion] Signed-off-by: Konrad Rzeszutek Wilk --- include/xen/hvm.h | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/xen/hvm.h b/include/xen/hvm.h index b193fa2f9fdd..13e43e41637d 100644 --- a/include/xen/hvm.h +++ b/include/xen/hvm.h @@ -5,6 +5,36 @@ #include #include +static const char *param_name(int op) +{ +#define PARAM(x) [HVM_PARAM_##x] = #x + static const char *const names[] = { + PARAM(CALLBACK_IRQ), + PARAM(STORE_PFN), + PARAM(STORE_EVTCHN), + PARAM(PAE_ENABLED), + PARAM(IOREQ_PFN), + PARAM(BUFIOREQ_PFN), + PARAM(TIMER_MODE), + PARAM(HPET_ENABLED), + PARAM(IDENT_PT), + PARAM(DM_DOMAIN), + PARAM(ACPI_S_STATE), + PARAM(VM86_TSS), + PARAM(VPT_ALIGN), + PARAM(CONSOLE_PFN), + PARAM(CONSOLE_EVTCHN), + }; +#undef PARAM + + if (op >= ARRAY_SIZE(names)) + return "unknown"; + + if (!names[op]) + return "reserved"; + + return names[op]; +} static inline int hvm_get_parameter(int idx, uint64_t *value) { struct xen_hvm_param xhv; @@ -14,8 +44,8 @@ static inline int hvm_get_parameter(int idx, uint64_t *value) xhv.index = idx; r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); if (r < 0) { - printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n", - idx, r); + printk(KERN_ERR "Cannot get hvm parameter %s (%d): %d!\n", + param_name(idx), idx, r); return r; } *value = xhv.value; -- cgit v1.2.3 From d676188e44680c2f2eb114a24b3b32e56165f079 Mon Sep 17 00:00:00 2001 From: Seungwon Jeon Date: Fri, 28 Sep 2012 14:21:59 +0900 Subject: mmc: dw_mmc: convert the variable type of irq Even though platform_get_irq returns error, 'host->irq' always has an unsigned value. Less-than-zero comparison of an unsigned value is never true. Type of 'unsigned int' will be changed for 'int'. Signed-off-by: Seungwon Jeon Acked-by: Will Newton Signed-off-by: Chris Ball --- include/linux/mmc/dw_mmc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 7c6a1139d8fa..31416760723c 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -186,7 +186,7 @@ struct dw_mci { struct regulator *vmmc; /* Power regulator */ unsigned long irq_flags; /* IRQ flags */ - unsigned int irq; + int irq; }; /* DMA ops for Internal/External DMAC interface */ -- cgit v1.2.3 From 63ef5d8c28b2a944f104d854254941e7375c85a3 Mon Sep 17 00:00:00 2001 From: Jerry Huang Date: Thu, 25 Oct 2012 13:47:19 +0800 Subject: mmc: sdhci-of-esdhc: disable CMD23 for some Freescale SoCs CMD23 causes lots of errors in kernel on some freescale SoCs (P1020, P1021, P1022, P1024, P1025 and P4080) when MMC card used, which is because these controllers does not support CMD23, even on the SoCs which declares CMD23 is supported. Therefore, we'll not use CMD23. Signed-off-by: Jerry Huang Signed-off-by: Shaohui Xie Acked-by: Anton Vorontsov Signed-off-by: Chris Ball --- drivers/mmc/host/sdhci-of-esdhc.c | 11 +++++++++++ drivers/mmc/host/sdhci-pltfm.c | 7 +++++++ drivers/mmc/host/sdhci.c | 3 +++ drivers/mmc/host/sdhci.h | 1 + include/linux/mmc/sdhci.h | 1 + 5 files changed, 23 insertions(+) (limited to 'include') diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index ae5fcbfa1eef..63d219f57cae 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -169,6 +169,16 @@ static void esdhc_of_resume(struct sdhci_host *host) } #endif +static void esdhc_of_platform_init(struct sdhci_host *host) +{ + u32 vvn; + + vvn = in_be32(host->ioaddr + SDHCI_SLOT_INT_STATUS); + vvn = (vvn & SDHCI_VENDOR_VER_MASK) >> SDHCI_VENDOR_VER_SHIFT; + if (vvn == VENDOR_V_22) + host->quirks2 |= SDHCI_QUIRK2_HOST_NO_CMD23; +} + static struct sdhci_ops sdhci_esdhc_ops = { .read_l = esdhc_readl, .read_w = esdhc_readw, @@ -180,6 +190,7 @@ static struct sdhci_ops sdhci_esdhc_ops = { .enable_dma = esdhc_of_enable_dma, .get_max_clock = esdhc_of_get_max_clock, .get_min_clock = esdhc_of_get_min_clock, + .platform_init = esdhc_of_platform_init, #ifdef CONFIG_PM .platform_suspend = esdhc_of_suspend, .platform_resume = esdhc_of_resume, diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c index 65551a9709cc..27164457f861 100644 --- a/drivers/mmc/host/sdhci-pltfm.c +++ b/drivers/mmc/host/sdhci-pltfm.c @@ -150,6 +150,13 @@ struct sdhci_host *sdhci_pltfm_init(struct platform_device *pdev, goto err_remap; } + /* + * Some platforms need to probe the controller to be able to + * determine which caps should be used. + */ + if (host->ops && host->ops->platform_init) + host->ops->platform_init(host); + platform_set_drvdata(pdev, host); return host; diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 7922adb42386..f05a37747b3d 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -2837,6 +2837,9 @@ int sdhci_add_host(struct sdhci_host *host) if (!(host->quirks & SDHCI_QUIRK_FORCE_1_BIT_DATA)) mmc->caps |= MMC_CAP_4_BIT_DATA; + if (host->quirks2 & SDHCI_QUIRK2_HOST_NO_CMD23) + mmc->caps &= ~MMC_CAP_CMD23; + if (caps[0] & SDHCI_CAN_DO_HISPD) mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 97653ea8942b..71a4a7ed46c5 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -278,6 +278,7 @@ struct sdhci_ops { void (*hw_reset)(struct sdhci_host *host); void (*platform_suspend)(struct sdhci_host *host); void (*platform_resume)(struct sdhci_host *host); + void (*platform_init)(struct sdhci_host *host); }; #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index fa8529a859b8..1edcb4dad8c4 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -91,6 +91,7 @@ struct sdhci_host { unsigned int quirks2; /* More deviations from spec. */ #define SDHCI_QUIRK2_HOST_OFF_CARD_ON (1<<0) +#define SDHCI_QUIRK2_HOST_NO_CMD23 (1<<1) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From 8e2b36ea6e3abc613cbbdb41692fbd2f9ee18996 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 6 Nov 2012 22:55:31 +0100 Subject: mmc: dw_mmc: constify dw_mci_idmac_ops in exynos back-end The of_device_id match data is now marked as const and must not be modified. This changes the dw_mmc to mark all pointers passing the dw_mci_drv_data or dw_mci_dma_ops structures as const, and also marks the static definitions as const. drivers/mmc/host/dw_mmc-exynos.c: In function 'dw_mci_exynos_probe': drivers/mmc/host/dw_mmc-exynos.c:234:11: warning: assignment discards 'const' qualifier from pointer target type [enabled by default] Signed-off-by: Arnd Bergmann Cc: Thomas Abraham Cc: Will Newton Signed-off-by: Chris Ball --- drivers/mmc/host/dw_mmc-exynos.c | 6 +++--- drivers/mmc/host/dw_mmc-pltfm.c | 2 +- drivers/mmc/host/dw_mmc-pltfm.h | 2 +- drivers/mmc/host/dw_mmc.c | 2 +- include/linux/mmc/dw_mmc.h | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c index 0147ac3aad59..4d50da618166 100644 --- a/drivers/mmc/host/dw_mmc-exynos.c +++ b/drivers/mmc/host/dw_mmc-exynos.c @@ -208,7 +208,7 @@ static unsigned long exynos5250_dwmmc_caps[4] = { MMC_CAP_CMD23, }; -static struct dw_mci_drv_data exynos5250_drv_data = { +static const struct dw_mci_drv_data exynos5250_drv_data = { .caps = exynos5250_dwmmc_caps, .init = dw_mci_exynos_priv_init, .setup_clock = dw_mci_exynos_setup_clock, @@ -220,14 +220,14 @@ static struct dw_mci_drv_data exynos5250_drv_data = { static const struct of_device_id dw_mci_exynos_match[] = { { .compatible = "samsung,exynos5250-dw-mshc", - .data = (void *)&exynos5250_drv_data, }, + .data = &exynos5250_drv_data, }, {}, }; MODULE_DEVICE_TABLE(of, dw_mci_exynos_match); int dw_mci_exynos_probe(struct platform_device *pdev) { - struct dw_mci_drv_data *drv_data; + const struct dw_mci_drv_data *drv_data; const struct of_device_id *match; match = of_match_node(dw_mci_exynos_match, pdev->dev.of_node); diff --git a/drivers/mmc/host/dw_mmc-pltfm.c b/drivers/mmc/host/dw_mmc-pltfm.c index e5957211b171..917936bee5d5 100644 --- a/drivers/mmc/host/dw_mmc-pltfm.c +++ b/drivers/mmc/host/dw_mmc-pltfm.c @@ -24,7 +24,7 @@ #include "dw_mmc.h" int dw_mci_pltfm_register(struct platform_device *pdev, - struct dw_mci_drv_data *drv_data) + const struct dw_mci_drv_data *drv_data) { struct dw_mci *host; struct resource *regs; diff --git a/drivers/mmc/host/dw_mmc-pltfm.h b/drivers/mmc/host/dw_mmc-pltfm.h index 301f24541fc2..2ac37b81de4d 100644 --- a/drivers/mmc/host/dw_mmc-pltfm.h +++ b/drivers/mmc/host/dw_mmc-pltfm.h @@ -13,7 +13,7 @@ #define _DW_MMC_PLTFM_H_ extern int dw_mci_pltfm_register(struct platform_device *pdev, - struct dw_mci_drv_data *drv_data); + const struct dw_mci_drv_data *drv_data); extern int __devexit dw_mci_pltfm_remove(struct platform_device *pdev); extern const struct dev_pm_ops dw_mci_pltfm_pmops; diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index b087f66e30c4..c0667c8af2bd 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -435,7 +435,7 @@ static int dw_mci_idmac_init(struct dw_mci *host) return 0; } -static struct dw_mci_dma_ops dw_mci_idmac_ops = { +static const struct dw_mci_dma_ops dw_mci_idmac_ops = { .init = dw_mci_idmac_init, .start = dw_mci_idmac_start_dma, .stop = dw_mci_idmac_stop_dma, diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 31416760723c..96531664a061 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -137,7 +137,7 @@ struct dw_mci { dma_addr_t sg_dma; void *sg_cpu; - struct dw_mci_dma_ops *dma_ops; + const struct dw_mci_dma_ops *dma_ops; #ifdef CONFIG_MMC_DW_IDMAC unsigned int ring_size; #else @@ -162,7 +162,7 @@ struct dw_mci { u16 data_offset; struct device *dev; struct dw_mci_board *pdata; - struct dw_mci_drv_data *drv_data; + const struct dw_mci_drv_data *drv_data; void *priv; struct clk *biu_clk; struct clk *ciu_clk; -- cgit v1.2.3 From a80a6b85b428e6ce12a8363bb1f08d44c50f3252 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 8 Nov 2012 15:53:35 -0800 Subject: revert "epoll: support for disabling items, and a self-test app" Revert commit 03a7beb55b9f ("epoll: support for disabling items, and a self-test app") pending resolution of the issues identified by Michael Kerrisk, copied below. We'll revisit this for 3.8. : I've taken a look at this patch as it currently stands in 3.7-rc1, and : done a bit of testing. (By the way, the test program : tools/testing/selftests/epoll/test_epoll.c does not compile...) : : There are one or two places where the behavior seems a little strange, : so I have a question or two at the end of this mail. But other than : that, I want to check my understanding so that the interface can be : correctly documented. : : Just to go though my understanding, the problem is the following : scenario in a multithreaded application: : : 1. Multiple threads are performing epoll_wait() operations, : and maintaining a user-space cache that contains information : corresponding to each file descriptor being monitored by : epoll_wait(). : : 2. At some point, a thread wants to delete (EPOLL_CTL_DEL) : a file descriptor from the epoll interest list, and : delete the corresponding record from the user-space cache. : : 3. The problem with (2) is that some other thread may have : previously done an epoll_wait() that retrieved information : about the fd in question, and may be in the middle of using : information in the cache that relates to that fd. Thus, : there is a potential race. : : 4. The race can't solved purely in user space, because doing : so would require applying a mutex across the epoll_wait() : call, which would of course blow thread concurrency. : : Right? : : Your solution is the EPOLL_CTL_DISABLE operation. I want to : confirm my understanding about how to use this flag, since : the description that has accompanied the patches so far : has been a bit sparse : : 0. In the scenario you're concerned about, deleting a file : descriptor means (safely) doing the following: : (a) Deleting the file descriptor from the epoll interest list : using EPOLL_CTL_DEL : (b) Deleting the corresponding record in the user-space cache : : 1. It's only meaningful to use this EPOLL_CTL_DISABLE in : conjunction with EPOLLONESHOT. : : 2. Using EPOLL_CTL_DISABLE without using EPOLLONESHOT in : conjunction is a logical error. : : 3. The correct way to code multithreaded applications using : EPOLL_CTL_DISABLE and EPOLLONESHOT is as follows: : : a. All EPOLL_CTL_ADD and EPOLL_CTL_MOD operations should : should EPOLLONESHOT. : : b. When a thread wants to delete a file descriptor, it : should do the following: : : [1] Call epoll_ctl(EPOLL_CTL_DISABLE) : [2] If the return status from epoll_ctl(EPOLL_CTL_DISABLE) : was zero, then the file descriptor can be safely : deleted by the thread that made this call. : [3] If the epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY, : then the descriptor is in use. In this case, the calling : thread should set a flag in the user-space cache to : indicate that the thread that is using the descriptor : should perform the deletion operation. : : Is all of the above correct? : : The implementation depends on checking on whether : (events & ~EP_PRIVATE_BITS) == 0 : This replies on the fact that EPOLL_CTL_AD and EPOLL_CTL_MOD always : set EPOLLHUP and EPOLLERR in the 'events' mask, and EPOLLONESHOT : causes those flags (as well as all others in ~EP_PRIVATE_BITS) to be : cleared. : : A corollary to the previous paragraph is that using EPOLL_CTL_DISABLE : is only useful in conjunction with EPOLLONESHOT. However, as things : stand, one can use EPOLL_CTL_DISABLE on a file descriptor that does : not have EPOLLONESHOT set in 'events' This results in the following : (slightly surprising) behavior: : : (a) The first call to epoll_ctl(EPOLL_CTL_DISABLE) returns 0 : (the indicator that the file descriptor can be safely deleted). : (b) The next call to epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY. : : This doesn't seem particularly useful, and in fact is probably an : indication that the user made a logic error: they should only be using : epoll_ctl(EPOLL_CTL_DISABLE) on a file descriptor for which : EPOLLONESHOT was set in 'events'. If that is correct, then would it : not make sense to return an error to user space for this case? Cc: Michael Kerrisk Cc: "Paton J. Lewis" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 38 +--- include/uapi/linux/eventpoll.h | 1 - tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/epoll/Makefile | 11 - tools/testing/selftests/epoll/test_epoll.c | 344 ----------------------------- 5 files changed, 4 insertions(+), 392 deletions(-) delete mode 100644 tools/testing/selftests/epoll/Makefile delete mode 100644 tools/testing/selftests/epoll/test_epoll.c (limited to 'include') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index da72250ddc1c..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { - return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; + return op != EPOLL_CTL_DEL; } /* Initialize the poll safe wake up structure */ @@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) return 0; } -/* - * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item - * had no event flags set, indicating that another thread may be currently - * handling that item's events (in the case that EPOLLONESHOT was being - * used). Otherwise a zero result indicates that the item has been disabled - * from receiving events. A disabled item may be re-enabled via - * EPOLL_CTL_MOD. Must be called with "mtx" held. - */ -static int ep_disable(struct eventpoll *ep, struct epitem *epi) -{ - int result = 0; - unsigned long flags; - - spin_lock_irqsave(&ep->lock, flags); - if (epi->event.events & ~EP_PRIVATE_BITS) { - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - /* Ensure ep_poll_callback will not add epi back onto ready - list: */ - epi->event.events &= EP_PRIVATE_BITS; - } - else - result = -EBUSY; - spin_unlock_irqrestore(&ep->lock, flags); - - return result; -} - static void ep_free(struct eventpoll *ep) { struct rb_node *rbp; @@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate @@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } else error = -ENOENT; break; - case EPOLL_CTL_DISABLE: - if (epi) - error = ep_disable(ep, epi); - else - error = -ENOENT; - break; } mutex_unlock(&ep->mtx); diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 8c99ce7202c5..2c267bcbb85c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -25,7 +25,6 @@ #define EPOLL_CTL_ADD 1 #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 -#define EPOLL_CTL_DISABLE 4 /* * Request the handling of system wakeup events so as to prevent system suspends diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 43480149119e..85baf11e2acd 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile deleted file mode 100644 index 19806ed62f50..000000000000 --- a/tools/testing/selftests/epoll/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# Makefile for epoll selftests - -all: test_epoll -%: %.c - gcc -pthread -g -o $@ $^ - -run_tests: all - ./test_epoll - -clean: - $(RM) test_epoll diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c deleted file mode 100644 index f7525392ce84..000000000000 --- a/tools/testing/selftests/epoll/test_epoll.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * tools/testing/selftests/epoll/test_epoll.c - * - * Copyright 2012 Adobe Systems Incorporated - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Paton J. Lewis - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * A pointer to an epoll_item_private structure will be stored in the epoll - * item's event structure so that we can get access to the epoll_item_private - * data after calling epoll_wait: - */ -struct epoll_item_private { - int index; /* Position of this struct within the epoll_items array. */ - int fd; - uint32_t events; - pthread_mutex_t mutex; /* Guards the following variables... */ - int stop; - int status; /* Stores any error encountered while handling item. */ - /* The following variable allows us to test whether we have encountered - a problem while attempting to cancel and delete the associated - event. When the test program exits, 'deleted' should be exactly - one. If it is greater than one, then the failed test reflects a real - world situation where we would have tried to access the epoll item's - private data after deleting it: */ - int deleted; -}; - -struct epoll_item_private *epoll_items; - -/* - * Delete the specified item from the epoll set. In a real-world secneario this - * is where we would free the associated data structure, but in this testing - * environment we retain the structure so that we can test for double-deletion: - */ -void delete_item(int index) -{ - __sync_fetch_and_add(&epoll_items[index].deleted, 1); -} - -/* - * A pointer to a read_thread_data structure will be passed as the argument to - * each read thread: - */ -struct read_thread_data { - int stop; - int status; /* Indicates any error encountered by the read thread. */ - int epoll_set; -}; - -/* - * The function executed by the read threads: - */ -void *read_thread_function(void *function_data) -{ - struct read_thread_data *thread_data = - (struct read_thread_data *)function_data; - struct epoll_event event_data; - struct epoll_item_private *item_data; - char socket_data; - - /* Handle events until we encounter an error or this thread's 'stop' - condition is set: */ - while (1) { - int result = epoll_wait(thread_data->epoll_set, - &event_data, - 1, /* Number of desired events */ - 1000); /* Timeout in ms */ - if (result < 0) { - /* Breakpoints signal all threads. Ignore that while - debugging: */ - if (errno == EINTR) - continue; - thread_data->status = errno; - return 0; - } else if (thread_data->stop) - return 0; - else if (result == 0) /* Timeout */ - continue; - - /* We need the mutex here because checking for the stop - condition and re-enabling the epoll item need to be done - together as one atomic operation when EPOLL_CTL_DISABLE is - available: */ - item_data = (struct epoll_item_private *)event_data.data.ptr; - pthread_mutex_lock(&item_data->mutex); - - /* Remove the item from the epoll set if we want to stop - handling that event: */ - if (item_data->stop) - delete_item(item_data->index); - else { - /* Clear the data that was written to the other end of - our non-blocking socket: */ - do { - if (read(item_data->fd, &socket_data, 1) < 1) { - if ((errno == EAGAIN) || - (errno == EWOULDBLOCK)) - break; - else - goto error_unlock; - } - } while (item_data->events & EPOLLET); - - /* The item was one-shot, so re-enable it: */ - event_data.events = item_data->events; - if (epoll_ctl(thread_data->epoll_set, - EPOLL_CTL_MOD, - item_data->fd, - &event_data) < 0) - goto error_unlock; - } - - pthread_mutex_unlock(&item_data->mutex); - } - -error_unlock: - thread_data->status = item_data->status = errno; - pthread_mutex_unlock(&item_data->mutex); - return 0; -} - -/* - * A pointer to a write_thread_data structure will be passed as the argument to - * the write thread: - */ -struct write_thread_data { - int stop; - int status; /* Indicates any error encountered by the write thread. */ - int n_fds; - int *fds; -}; - -/* - * The function executed by the write thread. It writes a single byte to each - * socket in turn until the stop condition for this thread is set. If writing to - * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for - * the moment and just move on to the next socket in the list. We don't care - * about the order in which we deliver events to the epoll set. In fact we don't - * care about the data we're writing to the pipes at all; we just want to - * trigger epoll events: - */ -void *write_thread_function(void *function_data) -{ - const char data = 'X'; - int index; - struct write_thread_data *thread_data = - (struct write_thread_data *)function_data; - while (!thread_data->stop) - for (index = 0; - !thread_data->stop && (index < thread_data->n_fds); - ++index) - if ((write(thread_data->fds[index], &data, 1) < 1) && - (errno != EAGAIN) && - (errno != EWOULDBLOCK)) { - thread_data->status = errno; - return; - } -} - -/* - * Arguments are currently ignored: - */ -int main(int argc, char **argv) -{ - const int n_read_threads = 100; - const int n_epoll_items = 500; - int index; - int epoll_set = epoll_create1(0); - struct write_thread_data write_thread_data = { - 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) - }; - struct read_thread_data *read_thread_data = - malloc(n_read_threads * sizeof(struct read_thread_data)); - pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); - pthread_t write_thread; - - printf("-----------------\n"); - printf("Runing test_epoll\n"); - printf("-----------------\n"); - - epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); - - if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || - read_thread_data == 0 || read_threads == 0) - goto error; - - if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { - printf("Error: please run this test on a multi-core system.\n"); - goto error; - } - - /* Create the socket pairs and epoll items: */ - for (index = 0; index < n_epoll_items; ++index) { - int socket_pair[2]; - struct epoll_event event_data; - if (socketpair(AF_UNIX, - SOCK_STREAM | SOCK_NONBLOCK, - 0, - socket_pair) < 0) - goto error; - write_thread_data.fds[index] = socket_pair[0]; - epoll_items[index].index = index; - epoll_items[index].fd = socket_pair[1]; - if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) - goto error; - /* We always use EPOLLONESHOT because this test is currently - structured to demonstrate the need for EPOLL_CTL_DISABLE, - which only produces useful information in the EPOLLONESHOT - case (without EPOLLONESHOT, calling epoll_ctl with - EPOLL_CTL_DISABLE will never return EBUSY). If support for - testing events without EPOLLONESHOT is desired, it should - probably be implemented in a separate unit test. */ - epoll_items[index].events = EPOLLIN | EPOLLONESHOT; - if (index < n_epoll_items / 2) - epoll_items[index].events |= EPOLLET; - epoll_items[index].stop = 0; - epoll_items[index].status = 0; - epoll_items[index].deleted = 0; - event_data.events = epoll_items[index].events; - event_data.data.ptr = &epoll_items[index]; - if (epoll_ctl(epoll_set, - EPOLL_CTL_ADD, - epoll_items[index].fd, - &event_data) < 0) - goto error; - } - - /* Create and start the read threads: */ - for (index = 0; index < n_read_threads; ++index) { - read_thread_data[index].stop = 0; - read_thread_data[index].status = 0; - read_thread_data[index].epoll_set = epoll_set; - if (pthread_create(&read_threads[index], - NULL, - read_thread_function, - &read_thread_data[index]) != 0) - goto error; - } - - if (pthread_create(&write_thread, - NULL, - write_thread_function, - &write_thread_data) != 0) - goto error; - - /* Cancel all event pollers: */ -#ifdef EPOLL_CTL_DISABLE - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - if (epoll_ctl(epoll_set, - EPOLL_CTL_DISABLE, - epoll_items[index].fd, - NULL) == 0) - delete_item(index); - else if (errno != EBUSY) { - pthread_mutex_unlock(&epoll_items[index].mutex); - goto error; - } - /* EBUSY means events were being handled; allow the other thread - to delete the item. */ - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#else - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - pthread_mutex_unlock(&epoll_items[index].mutex); - /* Wait in case a thread running read_thread_function is - currently executing code between epoll_wait and - pthread_mutex_lock with this item. Note that a longer delay - would make double-deletion less likely (at the expense of - performance), but there is no guarantee that any delay would - ever be sufficient. Note also that we delete all event - pollers at once for testing purposes, but in a real-world - environment we are likely to want to be able to cancel event - pollers at arbitrary times. Therefore we can't improve this - situation by just splitting this loop into two loops - (i.e. signal 'stop' for all items, sleep, and then delete all - items). We also can't fix the problem via EPOLL_CTL_DEL - because that command can't prevent the case where some other - thread is executing read_thread_function within the region - mentioned above: */ - usleep(1); - pthread_mutex_lock(&epoll_items[index].mutex); - if (!epoll_items[index].deleted) - delete_item(index); - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#endif - - /* Shut down the read threads: */ - for (index = 0; index < n_read_threads; ++index) - __sync_fetch_and_add(&read_thread_data[index].stop, 1); - for (index = 0; index < n_read_threads; ++index) { - if (pthread_join(read_threads[index], NULL) != 0) - goto error; - if (read_thread_data[index].status) - goto error; - } - - /* Shut down the write thread: */ - __sync_fetch_and_add(&write_thread_data.stop, 1); - if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) - goto error; - - /* Check for final error conditions: */ - for (index = 0; index < n_epoll_items; ++index) { - if (epoll_items[index].status != 0) - goto error; - if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) - goto error; - } - for (index = 0; index < n_epoll_items; ++index) - if (epoll_items[index].deleted != 1) { - printf("Error: item data deleted %1d times.\n", - epoll_items[index].deleted); - goto error; - } - - printf("[PASS]\n"); - return 0; - - error: - printf("[FAIL]\n"); - return errno; -} -- cgit v1.2.3 From 0bce04be442cf4d6e4ba9dac2f0a4c5ee88af5c5 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Tue, 6 Nov 2012 00:12:03 +0000 Subject: of/address: sparc: Declare of_address_to_resource() as an extern function for sparc again This bug-fix makes sure that of_address_to_resource is defined extern for sparc so that the sparc-specific implementation of of_address_to_resource() is once again used when including include/linux/of_address.h in a sparc context. A number of drivers in mainline relies on this function working for sparc. The bug was introduced in a850a7554442f08d3e910c6eeb4ee216868dda1e, "of/address: add empty static inlines for !CONFIG_OF". Contrary to that commit title, the static inlines are added for !CONFIG_OF_ADDRESS, and CONFIG_OF_ADDRESS is never defined for sparc. This is good behavior for the other functions in include/linux/of_address.h, as the extern functions defined in drivers/of/address.c only gets linked when OF_ADDRESS is configured. However, for of_address_to_resource there exists a sparc-specific implementation in arch/sparc/arch/sparc/kernel/of_device_common.c Solution suggested by: Sam Ravnborg Signed-off-by: Andreas Larsson Acked-by: Rob Herring Signed-off-by: David S. Miller --- arch/sparc/include/asm/prom.h | 5 +++++ include/linux/of_address.h | 2 ++ 2 files changed, 7 insertions(+) (limited to 'include') diff --git a/arch/sparc/include/asm/prom.h b/arch/sparc/include/asm/prom.h index c28765110706..f93003123bce 100644 --- a/arch/sparc/include/asm/prom.h +++ b/arch/sparc/include/asm/prom.h @@ -63,5 +63,10 @@ extern char *of_console_options; extern void irq_trans_init(struct device_node *dp); extern char *build_path_component(struct device_node *dp); +/* SPARC has a local implementation */ +extern int of_address_to_resource(struct device_node *dev, int index, + struct resource *r); +#define of_address_to_resource of_address_to_resource + #endif /* __KERNEL__ */ #endif /* _SPARC_PROM_H */ diff --git a/include/linux/of_address.h b/include/linux/of_address.h index a1984dd037da..e20e3af68fb6 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -28,11 +28,13 @@ static inline unsigned long pci_address_to_pio(phys_addr_t addr) { return -1; } #endif #else /* CONFIG_OF_ADDRESS */ +#ifndef of_address_to_resource static inline int of_address_to_resource(struct device_node *dev, int index, struct resource *r) { return -EINVAL; } +#endif static inline struct device_node *of_find_matching_node_by_address( struct device_node *from, const struct of_device_id *matches, -- cgit v1.2.3