From 8bf62ecee58360749c5f0e68bc97d5e02a6816b1 Mon Sep 17 00:00:00 2001 From: Albert Lee Date: Thu, 12 May 2005 15:29:42 -0400 Subject: [libata] C/H/S support, for older devices --- include/linux/ata.h | 14 ++++++++++++++ include/linux/libata.h | 6 ++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index f178894edd04..d8981402cd5b 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -125,6 +125,7 @@ enum { ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, ATA_CMD_VERIFY_EXT = 0x42, + ATA_CMD_INIT_DEV_PARAMS = 0x91, /* SETFEATURES stuff */ SETFEATURES_XFER = 0x03, @@ -174,6 +175,7 @@ enum { ATA_TFLAG_ISADDR = (1 << 1), /* enable r/w to nsect/lba regs */ ATA_TFLAG_DEVICE = (1 << 2), /* enable r/w to device reg */ ATA_TFLAG_WRITE = (1 << 3), /* data dir: host->dev==1 (write) */ + ATA_TFLAG_LBA = (1 << 4), /* enable LBA */ }; enum ata_tf_protocols { @@ -242,6 +244,18 @@ struct ata_taskfile { ((u64) (id)[(n) + 1] << 16) | \ ((u64) (id)[(n) + 0]) ) +static inline int ata_id_current_chs_valid(u16 *id) +{ + /* For ATA-1 devices, if the INITIALIZE DEVICE PARAMETERS command + has not been issued to the device then the values of + id[54] to id[56] are vendor specific. */ + return (id[53] & 0x01) && /* Current translation valid */ + id[54] && /* cylinders in current translation */ + id[55] && /* heads in current translation */ + id[55] <= 16 && + id[56]; /* sectors in current translation */ +} + static inline int atapi_cdb_len(u16 *dev_id) { u16 tmp = dev_id[0] & 0x3; diff --git a/include/linux/libata.h b/include/linux/libata.h index 505160ab472b..bd0df84cfd87 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -95,6 +95,7 @@ enum { ATA_DFLAG_LBA48 = (1 << 0), /* device supports LBA48 */ ATA_DFLAG_PIO = (1 << 1), /* device currently in PIO mode */ ATA_DFLAG_LOCK_SECTORS = (1 << 2), /* don't adjust max_sectors */ + ATA_DFLAG_LBA = (1 << 3), /* device supports LBA */ ATA_DEV_UNKNOWN = 0, /* unknown device */ ATA_DEV_ATA = 1, /* ATA device */ @@ -278,6 +279,11 @@ struct ata_device { u8 xfer_protocol; /* taskfile xfer protocol */ u8 read_cmd; /* opcode to use on read */ u8 write_cmd; /* opcode to use on write */ + + /* for CHS addressing */ + u16 cylinders; /* Number of cylinders */ + u16 heads; /* Number of heads */ + u16 sectors; /* Number of sectors per track */ }; struct ata_port { -- cgit v1.2.3 From b652b438fcad4c9c079d0774e9d45ee58fae22e2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Jun 2005 12:38:14 +0100 Subject: [PATCH] I2C: Add PXA I2C driver Add support for the I2C PXA driver. Signed-off-by: Russell King --- drivers/i2c/busses/Kconfig | 16 + drivers/i2c/busses/Makefile | 1 + drivers/i2c/busses/i2c-pxa.c | 1031 ++++++++++++++++++++++++++++++++++++++++ include/asm-arm/arch-pxa/i2c.h | 70 +++ include/linux/i2c-id.h | 1 + include/linux/i2c-pxa.h | 48 ++ 6 files changed, 1167 insertions(+) create mode 100644 drivers/i2c/busses/i2c-pxa.c create mode 100644 include/asm-arm/arch-pxa/i2c.h create mode 100644 include/linux/i2c-pxa.h (limited to 'include/linux') diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index a0018de3bef4..a3bec8d65cff 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -144,6 +144,22 @@ config I2C_I810 This driver can also be built as a module. If so, the module will be called i2c-i810. +config I2C_PXA + tristate "Intel PXA2XX I2C adapter (EXPERIMENTAL)" + depends on I2C && EXPERIMENTAL && ARCH_PXA + help + If you have devices in the PXA I2C bus, say yes to this option. + This driver can also be built as a module. If so, the module + will be called i2c-pxa. + +config I2C_PXA_SLAVE + bool "Intel PXA2XX I2C Slave comms support" + depends on I2C_PXA + help + Support I2C slave mode communications on the PXA I2C bus. This + is necessary for systems where the PXA may be a target on the + I2C bus. + config I2C_PIIX4 tristate "Intel PIIX4" depends on I2C && PCI diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 42d6d814da72..980b3e983670 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -28,6 +28,7 @@ obj-$(CONFIG_I2C_PARPORT_LIGHT) += i2c-parport-light.o obj-$(CONFIG_I2C_PCA_ISA) += i2c-pca-isa.o obj-$(CONFIG_I2C_PIIX4) += i2c-piix4.o obj-$(CONFIG_I2C_PROSAVAGE) += i2c-prosavage.o +obj-$(CONFIG_I2C_PXA) += i2c-pxa.o obj-$(CONFIG_I2C_RPXLITE) += i2c-rpx.o obj-$(CONFIG_I2C_S3C2410) += i2c-s3c2410.o obj-$(CONFIG_I2C_SAVAGE4) += i2c-savage4.o diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c new file mode 100644 index 000000000000..a72d28365297 --- /dev/null +++ b/drivers/i2c/busses/i2c-pxa.c @@ -0,0 +1,1031 @@ +/* + * i2c_adap_pxa.c + * + * I2C adapter for the PXA I2C bus access. + * + * Copyright (C) 2002 Intrinsyc Software Inc. + * Copyright (C) 2004-2005 Deep Blue Solutions Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * History: + * Apr 2002: Initial version [CS] + * Jun 2002: Properly seperated algo/adap [FB] + * Jan 2003: Fixed several bugs concerning interrupt handling [Kai-Uwe Bloem] + * Jan 2003: added limited signal handling [Kai-Uwe Bloem] + * Sep 2004: Major rework to ensure efficient bus handling [RMK] + * Dec 2004: Added support for PXA27x and slave device probing [Liam Girdwood] + * Feb 2005: Rework slave mode handling [RMK] + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +struct pxa_i2c { + spinlock_t lock; + wait_queue_head_t wait; + struct i2c_msg *msg; + unsigned int msg_num; + unsigned int msg_idx; + unsigned int msg_ptr; + unsigned int slave_addr; + + struct i2c_adapter adap; +#ifdef CONFIG_I2C_PXA_SLAVE + struct i2c_slave_client *slave; +#endif + + unsigned int irqlogidx; + u32 isrlog[32]; + u32 icrlog[32]; +}; + +/* + * I2C Slave mode address + */ +#define I2C_PXA_SLAVE_ADDR 0x1 + +/* + * Set this to zero to remove all debug statements via dead code elimination. + */ +#undef DEBUG + +#if 0 +#define DBGLVL KERN_INFO +#else +#define DBGLVL KERN_DEBUG +#endif + +#ifdef DEBUG + +struct bits { + u32 mask; + const char *set; + const char *unset; +}; +#define BIT(m, s, u) { .mask = m, .set = s, .unset = u } + +static inline void +decode_bits(const char *prefix, const struct bits *bits, int num, u32 val) +{ + printk("%s %08x: ", prefix, val); + while (num--) { + const char *str = val & bits->mask ? bits->set : bits->unset; + if (str) + printk("%s ", str); + bits++; + } +} + +static const struct bits isr_bits[] = { + BIT(ISR_RWM, "RX", "TX"), + BIT(ISR_ACKNAK, "NAK", "ACK"), + BIT(ISR_UB, "Bsy", "Rdy"), + BIT(ISR_IBB, "BusBsy", "BusRdy"), + BIT(ISR_SSD, "SlaveStop", NULL), + BIT(ISR_ALD, "ALD", NULL), + BIT(ISR_ITE, "TxEmpty", NULL), + BIT(ISR_IRF, "RxFull", NULL), + BIT(ISR_GCAD, "GenCall", NULL), + BIT(ISR_SAD, "SlaveAddr", NULL), + BIT(ISR_BED, "BusErr", NULL), +}; + +static void decode_ISR(unsigned int val) +{ + decode_bits(DBGLVL "ISR", isr_bits, ARRAY_SIZE(isr_bits), val); + printk("\n"); +} + +static const struct bits icr_bits[] = { + BIT(ICR_START, "START", NULL), + BIT(ICR_STOP, "STOP", NULL), + BIT(ICR_ACKNAK, "ACKNAK", NULL), + BIT(ICR_TB, "TB", NULL), + BIT(ICR_MA, "MA", NULL), + BIT(ICR_SCLE, "SCLE", "scle"), + BIT(ICR_IUE, "IUE", "iue"), + BIT(ICR_GCD, "GCD", NULL), + BIT(ICR_ITEIE, "ITEIE", NULL), + BIT(ICR_IRFIE, "IRFIE", NULL), + BIT(ICR_BEIE, "BEIE", NULL), + BIT(ICR_SSDIE, "SSDIE", NULL), + BIT(ICR_ALDIE, "ALDIE", NULL), + BIT(ICR_SADIE, "SADIE", NULL), + BIT(ICR_UR, "UR", "ur"), +}; + +static void decode_ICR(unsigned int val) +{ + decode_bits(DBGLVL "ICR", icr_bits, ARRAY_SIZE(icr_bits), val); + printk("\n"); +} + +static unsigned int i2c_debug = DEBUG; + +static void i2c_pxa_show_state(struct pxa_i2c *i2c, int lno, const char *fname) +{ + printk(DBGLVL "state:%s:%d: ISR=%08x, ICR=%08x, IBMR=%02x\n", fname, lno, ISR, ICR, IBMR); +} + +#define show_state(i2c) i2c_pxa_show_state(i2c, __LINE__, __FUNCTION__) +#else +#define i2c_debug 0 + +#define show_state(i2c) do { } while (0) +#define decode_ISR(val) do { } while (0) +#define decode_ICR(val) do { } while (0) +#endif + +#define eedbg(lvl, x...) do { if ((lvl) < 1) { printk(DBGLVL "" x); } } while(0) + +static void i2c_pxa_master_complete(struct pxa_i2c *i2c, int ret); + +static void i2c_pxa_scream_blue_murder(struct pxa_i2c *i2c, const char *why) +{ + unsigned int i; + printk("i2c: error: %s\n", why); + printk("i2c: msg_num: %d msg_idx: %d msg_ptr: %d\n", + i2c->msg_num, i2c->msg_idx, i2c->msg_ptr); + printk("i2c: ICR: %08x ISR: %08x\ni2c: log: ", ICR, ISR); + for (i = 0; i < i2c->irqlogidx; i++) + printk("[%08x:%08x] ", i2c->isrlog[i], i2c->icrlog[i]); + printk("\n"); +} + +static inline int i2c_pxa_is_slavemode(struct pxa_i2c *i2c) +{ + return !(ICR & ICR_SCLE); +} + +static void i2c_pxa_abort(struct pxa_i2c *i2c) +{ + unsigned long timeout = jiffies + HZ/4; + + if (i2c_pxa_is_slavemode(i2c)) { + printk(DBGLVL "i2c_pxa_transfer: called in slave mode\n"); + return; + } + + while (time_before(jiffies, timeout) && (IBMR & 0x1) == 0) { + unsigned long icr = ICR; + + icr &= ~ICR_START; + icr |= ICR_ACKNAK | ICR_STOP | ICR_TB; + + ICR = icr; + + show_state(i2c); + + msleep(1); + } + + ICR &= ~(ICR_MA | ICR_START | ICR_STOP); +} + +static int i2c_pxa_wait_bus_not_busy(struct pxa_i2c *i2c) +{ + int timeout = DEF_TIMEOUT; + + while (timeout-- && ISR & (ISR_IBB | ISR_UB)) { + if ((ISR & ISR_SAD) != 0) + timeout += 4; + + msleep(2); + show_state(i2c); + } + + if (timeout <= 0) + show_state(i2c); + + return timeout <= 0 ? I2C_RETRY : 0; +} + +static int i2c_pxa_wait_master(struct pxa_i2c *i2c) +{ + unsigned long timeout = jiffies + HZ*4; + + while (time_before(jiffies, timeout)) { + if (i2c_debug > 1) + printk(DBGLVL "i2c_pxa_wait_master: %ld: ISR=%08x, ICR=%08x, IBMR=%02x\n", + (long)jiffies, ISR, ICR, IBMR); + + if (ISR & ISR_SAD) { + if (i2c_debug > 0) + printk(DBGLVL "i2c_pxa_wait_master: Slave detected\n"); + goto out; + } + + /* wait for unit and bus being not busy, and we also do a + * quick check of the i2c lines themselves to ensure they've + * gone high... + */ + if ((ISR & (ISR_UB | ISR_IBB)) == 0 && IBMR == 3) { + if (i2c_debug > 0) + printk(DBGLVL "i2c_pxa_wait_master: done\n"); + return 1; + } + + msleep(1); + } + + if (i2c_debug > 0) + printk(DBGLVL "i2c_pxa_wait_master: did not free\n"); + out: + return 0; +} + +static int i2c_pxa_set_master(struct pxa_i2c *i2c) +{ + if (i2c_debug) + printk(DBGLVL "I2C: setting to bus master\n"); + + if ((ISR & (ISR_UB | ISR_IBB)) != 0) { + printk(DBGLVL "set_master: unit is busy\n"); + if (!i2c_pxa_wait_master(i2c)) { + printk(DBGLVL "set_master: error: unit busy\n"); + return I2C_RETRY; + } + } + + ICR |= ICR_SCLE; + return 0; +} + +#ifdef CONFIG_I2C_PXA_SLAVE +static int i2c_pxa_wait_slave(struct pxa_i2c *i2c) +{ + unsigned long timeout = jiffies + HZ*1; + + /* wait for stop */ + + show_state(i2c); + + while (time_before(jiffies, timeout)) { + if (i2c_debug > 1) + printk(DBGLVL "i2c_pxa_wait_slave: %ld: ISR=%08x, ICR=%08x, IBMR=%02x\n", + (long)jiffies, ISR, ICR, IBMR); + + if ((ISR & (ISR_UB|ISR_IBB|ISR_SAD)) == ISR_SAD || + (ICR & ICR_SCLE) == 0) { + if (i2c_debug > 1) + printk(DBGLVL "i2c_pxa_wait_slave: done\n"); + return 1; + } + + msleep(1); + } + + if (i2c_debug > 0) + printk(DBGLVL "i2c_pxa_wait_slave: did not free\n"); + return 0; +} + +/* + * clear the hold on the bus, and take of anything else + * that has been configured + */ +static void i2c_pxa_set_slave(struct pxa_i2c *i2c, int errcode) +{ + show_state(i2c); + + if (errcode < 0) { + udelay(100); /* simple delay */ + } else { + /* we need to wait for the stop condition to end */ + + /* if we where in stop, then clear... */ + if (ICR & ICR_STOP) { + udelay(100); + ICR &= ~ICR_STOP; + } + + if (!i2c_pxa_wait_slave(i2c)) { + printk(KERN_ERR "i2c_pxa_set_slave: wait timedout\n"); + return; + } + } + + ICR &= ~(ICR_STOP|ICR_ACKNAK|ICR_MA); + ICR &= ~ICR_SCLE; + + if (i2c_debug) { + printk(DBGLVL "ICR now %08x, ISR %08x\n", ICR, ISR); + decode_ICR(ICR); + } +} +#else +#define i2c_pxa_set_slave(i2c, err) do { } while (0) +#endif + +static void i2c_pxa_reset(struct pxa_i2c *i2c) +{ + pr_debug("Resetting I2C Controller Unit\n"); + + /* abort any transfer currently under way */ + i2c_pxa_abort(i2c); + + /* reset according to 9.8 */ + ICR = ICR_UR; + ISR = I2C_ISR_INIT; + ICR &= ~ICR_UR; + + ISAR = i2c->slave_addr; + + /* set control register values */ + ICR = I2C_ICR_INIT; + +#ifdef CONFIG_I2C_PXA_SLAVE + printk(KERN_INFO "I2C: Enabling slave mode\n"); + ICR |= ICR_SADIE | ICR_ALDIE | ICR_SSDIE; +#endif + + i2c_pxa_set_slave(i2c, 0); + + /* enable unit */ + ICR |= ICR_IUE; + udelay(100); +} + + +#ifdef CONFIG_I2C_PXA_SLAVE +/* + * I2C EEPROM emulation. + */ +static struct i2c_eeprom_emu eeprom = { + .size = I2C_EEPROM_EMU_SIZE, + .watch = LIST_HEAD_INIT(eeprom.watch), +}; + +struct i2c_eeprom_emu *i2c_pxa_get_eeprom(void) +{ + return &eeprom; +} + +int i2c_eeprom_emu_addwatcher(struct i2c_eeprom_emu *emu, void *data, + unsigned int addr, unsigned int size, + struct i2c_eeprom_emu_watcher *watcher) +{ + struct i2c_eeprom_emu_watch *watch; + unsigned long flags; + + if (addr + size > emu->size) + return -EINVAL; + + watch = kmalloc(sizeof(struct i2c_eeprom_emu_watch), GFP_KERNEL); + if (watch) { + watch->start = addr; + watch->end = addr + size - 1; + watch->ops = watcher; + watch->data = data; + + local_irq_save(flags); + list_add(&watch->node, &emu->watch); + local_irq_restore(flags); + } + + return watch ? 0 : -ENOMEM; +} + +void i2c_eeprom_emu_delwatcher(struct i2c_eeprom_emu *emu, void *data, + struct i2c_eeprom_emu_watcher *watcher) +{ + struct i2c_eeprom_emu_watch *watch, *n; + unsigned long flags; + + list_for_each_entry_safe(watch, n, &emu->watch, node) { + if (watch->ops == watcher && watch->data == data) { + local_irq_save(flags); + list_del(&watch->node); + local_irq_restore(flags); + kfree(watch); + } + } +} + +static void i2c_eeprom_emu_event(void *ptr, i2c_slave_event_t event) +{ + struct i2c_eeprom_emu *emu = ptr; + + eedbg(3, "i2c_eeprom_emu_event: %d\n", event); + + switch (event) { + case I2C_SLAVE_EVENT_START_WRITE: + emu->seen_start = 1; + eedbg(2, "i2c_eeprom: write initiated\n"); + break; + + case I2C_SLAVE_EVENT_START_READ: + emu->seen_start = 0; + eedbg(2, "i2c_eeprom: read initiated\n"); + break; + + case I2C_SLAVE_EVENT_STOP: + emu->seen_start = 0; + eedbg(2, "i2c_eeprom: received stop\n"); + break; + + default: + eedbg(0, "i2c_eeprom: unhandled event\n"); + break; + } +} + +static int i2c_eeprom_emu_read(void *ptr) +{ + struct i2c_eeprom_emu *emu = ptr; + int ret; + + ret = emu->bytes[emu->ptr]; + emu->ptr = (emu->ptr + 1) % emu->size; + + return ret; +} + +static void i2c_eeprom_emu_write(void *ptr, unsigned int val) +{ + struct i2c_eeprom_emu *emu = ptr; + struct i2c_eeprom_emu_watch *watch; + + if (emu->seen_start != 0) { + eedbg(2, "i2c_eeprom_emu_write: setting ptr %02x\n", val); + emu->ptr = val; + emu->seen_start = 0; + return; + } + + emu->bytes[emu->ptr] = val; + + eedbg(1, "i2c_eeprom_emu_write: ptr=0x%02x, val=0x%02x\n", + emu->ptr, val); + + list_for_each_entry(watch, &emu->watch, node) { + if (!watch->ops || !watch->ops->write) + continue; + if (watch->start <= emu->ptr && watch->end >= emu->ptr) + watch->ops->write(watch->data, emu->ptr, val); + } + + emu->ptr = (emu->ptr + 1) % emu->size; +} + +struct i2c_slave_client eeprom_client = { + .data = &eeprom, + .event = i2c_eeprom_emu_event, + .read = i2c_eeprom_emu_read, + .write = i2c_eeprom_emu_write +}; + +/* + * PXA I2C Slave mode + */ + +static void i2c_pxa_slave_txempty(struct pxa_i2c *i2c, u32 isr) +{ + if (isr & ISR_BED) { + /* what should we do here? */ + } else { + int ret = i2c->slave->read(i2c->slave->data); + + IDBR = ret; + ICR |= ICR_TB; /* allow next byte */ + } +} + +static void i2c_pxa_slave_rxfull(struct pxa_i2c *i2c, u32 isr) +{ + unsigned int byte = IDBR; + + if (i2c->slave != NULL) + i2c->slave->write(i2c->slave->data, byte); + + ICR |= ICR_TB; +} + +static void i2c_pxa_slave_start(struct pxa_i2c *i2c, u32 isr) +{ + int timeout; + + if (i2c_debug > 0) + printk(DBGLVL "I2C: SAD, mode is slave-%cx\n", + (isr & ISR_RWM) ? 'r' : 't'); + + if (i2c->slave != NULL) + i2c->slave->event(i2c->slave->data, + (isr & ISR_RWM) ? I2C_SLAVE_EVENT_START_READ : I2C_SLAVE_EVENT_START_WRITE); + + /* + * slave could interrupt in the middle of us generating a + * start condition... if this happens, we'd better back off + * and stop holding the poor thing up + */ + ICR &= ~(ICR_START|ICR_STOP); + ICR |= ICR_TB; + + timeout = 0x10000; + + while (1) { + if ((IBMR & 2) == 2) + break; + + timeout--; + + if (timeout <= 0) { + printk(KERN_ERR "timeout waiting for SCL high\n"); + break; + } + } + + ICR &= ~ICR_SCLE; +} + +static void i2c_pxa_slave_stop(struct pxa_i2c *i2c) +{ + if (i2c_debug > 2) + printk(DBGLVL "ISR: SSD (Slave Stop)\n"); + + if (i2c->slave != NULL) + i2c->slave->event(i2c->slave->data, I2C_SLAVE_EVENT_STOP); + + if (i2c_debug > 2) + printk(DBGLVL "ISR: SSD (Slave Stop) acked\n"); + + /* + * If we have a master-mode message waiting, + * kick it off now that the slave has completed. + */ + if (i2c->msg) + i2c_pxa_master_complete(i2c, I2C_RETRY); +} +#else +static void i2c_pxa_slave_txempty(struct pxa_i2c *i2c, u32 isr) +{ + if (isr & ISR_BED) { + /* what should we do here? */ + } else { + IDBR = 0; + ICR |= ICR_TB; + } +} + +static void i2c_pxa_slave_rxfull(struct pxa_i2c *i2c, u32 isr) +{ + ICR |= ICR_TB | ICR_ACKNAK; +} + +static void i2c_pxa_slave_start(struct pxa_i2c *i2c, u32 isr) +{ + int timeout; + + /* + * slave could interrupt in the middle of us generating a + * start condition... if this happens, we'd better back off + * and stop holding the poor thing up + */ + ICR &= ~(ICR_START|ICR_STOP); + ICR |= ICR_TB | ICR_ACKNAK; + + timeout = 0x10000; + + while (1) { + if ((IBMR & 2) == 2) + break; + + timeout--; + + if (timeout <= 0) { + printk(KERN_ERR "timeout waiting for SCL high\n"); + break; + } + } + + ICR &= ~ICR_SCLE; +} + +static void i2c_pxa_slave_stop(struct pxa_i2c *i2c) +{ + if (i2c->msg) + i2c_pxa_master_complete(i2c, I2C_RETRY); +} +#endif + +/* + * PXA I2C Master mode + */ + +static inline unsigned int i2c_pxa_addr_byte(struct i2c_msg *msg) +{ + unsigned int addr = (msg->addr & 0x7f) << 1; + + if (msg->flags & I2C_M_RD) + addr |= 1; + + return addr; +} + +static inline void i2c_pxa_start_message(struct pxa_i2c *i2c) +{ + u32 icr; + + /* + * Step 1: target slave address into IDBR + */ + IDBR = i2c_pxa_addr_byte(i2c->msg); + + /* + * Step 2: initiate the write. + */ + icr = ICR & ~(ICR_STOP | ICR_ALDIE); + ICR = icr | ICR_START | ICR_TB; +} + +/* + * We are protected by the adapter bus semaphore. + */ +static int i2c_pxa_do_xfer(struct pxa_i2c *i2c, struct i2c_msg *msg, int num) +{ + long timeout; + int ret; + + /* + * Wait for the bus to become free. + */ + ret = i2c_pxa_wait_bus_not_busy(i2c); + if (ret) { + printk(KERN_INFO "i2c_pxa: timeout waiting for bus free\n"); + goto out; + } + + /* + * Set master mode. + */ + ret = i2c_pxa_set_master(i2c); + if (ret) { + printk(KERN_INFO "i2c_pxa_set_master: error %d\n", ret); + goto out; + } + + spin_lock_irq(&i2c->lock); + + i2c->msg = msg; + i2c->msg_num = num; + i2c->msg_idx = 0; + i2c->msg_ptr = 0; + i2c->irqlogidx = 0; + + i2c_pxa_start_message(i2c); + + spin_unlock_irq(&i2c->lock); + + /* + * The rest of the processing occurs in the interrupt handler. + */ + timeout = wait_event_timeout(i2c->wait, i2c->msg_num == 0, HZ * 5); + + /* + * We place the return code in i2c->msg_idx. + */ + ret = i2c->msg_idx; + + if (timeout == 0) + i2c_pxa_scream_blue_murder(i2c, "timeout"); + + out: + return ret; +} + +/* + * i2c_pxa_master_complete - complete the message and wake up. + */ +static void i2c_pxa_master_complete(struct pxa_i2c *i2c, int ret) +{ + i2c->msg_ptr = 0; + i2c->msg = NULL; + i2c->msg_idx ++; + i2c->msg_num = 0; + if (ret) + i2c->msg_idx = ret; + wake_up(&i2c->wait); +} + +static void i2c_pxa_irq_txempty(struct pxa_i2c *i2c, u32 isr) +{ + u32 icr = ICR & ~(ICR_START|ICR_STOP|ICR_ACKNAK|ICR_TB); + + again: + /* + * If ISR_ALD is set, we lost arbitration. + */ + if (isr & ISR_ALD) { + /* + * Do we need to do anything here? The PXA docs + * are vague about what happens. + */ + i2c_pxa_scream_blue_murder(i2c, "ALD set"); + + /* + * We ignore this error. We seem to see spurious ALDs + * for seemingly no reason. If we handle them as I think + * they should, we end up causing an I2C error, which + * is painful for some systems. + */ + return; /* ignore */ + } + + if (isr & ISR_BED) { + int ret = BUS_ERROR; + + /* + * I2C bus error - either the device NAK'd us, or + * something more serious happened. If we were NAK'd + * on the initial address phase, we can retry. + */ + if (isr & ISR_ACKNAK) { + if (i2c->msg_ptr == 0 && i2c->msg_idx == 0) + ret = I2C_RETRY; + else + ret = XFER_NAKED; + } + i2c_pxa_master_complete(i2c, ret); + } else if (isr & ISR_RWM) { + /* + * Read mode. We have just sent the address byte, and + * now we must initiate the transfer. + */ + if (i2c->msg_ptr == i2c->msg->len - 1 && + i2c->msg_idx == i2c->msg_num - 1) + icr |= ICR_STOP | ICR_ACKNAK; + + icr |= ICR_ALDIE | ICR_TB; + } else if (i2c->msg_ptr < i2c->msg->len) { + /* + * Write mode. Write the next data byte. + */ + IDBR = i2c->msg->buf[i2c->msg_ptr++]; + + icr |= ICR_ALDIE | ICR_TB; + + /* + * If this is the last byte of the last message, send + * a STOP. + */ + if (i2c->msg_ptr == i2c->msg->len && + i2c->msg_idx == i2c->msg_num - 1) + icr |= ICR_STOP; + } else if (i2c->msg_idx < i2c->msg_num - 1) { + /* + * Next segment of the message. + */ + i2c->msg_ptr = 0; + i2c->msg_idx ++; + i2c->msg++; + + /* + * If we aren't doing a repeated start and address, + * go back and try to send the next byte. Note that + * we do not support switching the R/W direction here. + */ + if (i2c->msg->flags & I2C_M_NOSTART) + goto again; + + /* + * Write the next address. + */ + IDBR = i2c_pxa_addr_byte(i2c->msg); + + /* + * And trigger a repeated start, and send the byte. + */ + icr &= ~ICR_ALDIE; + icr |= ICR_START | ICR_TB; + } else { + if (i2c->msg->len == 0) { + /* + * Device probes have a message length of zero + * and need the bus to be reset before it can + * be used again. + */ + i2c_pxa_reset(i2c); + } + i2c_pxa_master_complete(i2c, 0); + } + + i2c->icrlog[i2c->irqlogidx-1] = icr; + + ICR = icr; + show_state(i2c); +} + +static void i2c_pxa_irq_rxfull(struct pxa_i2c *i2c, u32 isr) +{ + u32 icr = ICR & ~(ICR_START|ICR_STOP|ICR_ACKNAK|ICR_TB); + + /* + * Read the byte. + */ + i2c->msg->buf[i2c->msg_ptr++] = IDBR; + + if (i2c->msg_ptr < i2c->msg->len) { + /* + * If this is the last byte of the last + * message, send a STOP. + */ + if (i2c->msg_ptr == i2c->msg->len - 1) + icr |= ICR_STOP | ICR_ACKNAK; + + icr |= ICR_ALDIE | ICR_TB; + } else { + i2c_pxa_master_complete(i2c, 0); + } + + i2c->icrlog[i2c->irqlogidx-1] = icr; + + ICR = icr; +} + +static irqreturn_t i2c_pxa_handler(int this_irq, void *dev_id, struct pt_regs *regs) +{ + struct pxa_i2c *i2c = dev_id; + u32 isr = ISR; + + if (i2c_debug > 2 && 0) { + printk(DBGLVL "i2c_pxa_handler: ISR=%08x, ICR=%08x, IBMR=%02x\n", + isr, ICR, IBMR); + decode_ISR(isr); + } + + if (i2c->irqlogidx < sizeof(i2c->isrlog)/sizeof(u32)) + i2c->isrlog[i2c->irqlogidx++] = isr; + + show_state(i2c); + + /* + * Always clear all pending IRQs. + */ + ISR = isr & (ISR_SSD|ISR_ALD|ISR_ITE|ISR_IRF|ISR_SAD|ISR_BED); + + if (isr & ISR_SAD) + i2c_pxa_slave_start(i2c, isr); + if (isr & ISR_SSD) + i2c_pxa_slave_stop(i2c); + + if (i2c_pxa_is_slavemode(i2c)) { + if (isr & ISR_ITE) + i2c_pxa_slave_txempty(i2c, isr); + if (isr & ISR_IRF) + i2c_pxa_slave_rxfull(i2c, isr); + } else if (i2c->msg) { + if (isr & ISR_ITE) + i2c_pxa_irq_txempty(i2c, isr); + if (isr & ISR_IRF) + i2c_pxa_irq_rxfull(i2c, isr); + } else { + i2c_pxa_scream_blue_murder(i2c, "spurious irq"); + } + + return IRQ_HANDLED; +} + + +static int i2c_pxa_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) +{ + struct pxa_i2c *i2c = adap->algo_data; + int ret, i; + + for (i = adap->retries; i >= 0; i--) { + ret = i2c_pxa_do_xfer(i2c, msgs, num); + if (ret != I2C_RETRY) + goto out; + + if (i2c_debug) + printk(KERN_INFO "Retrying transmission\n"); + udelay(100); + } + i2c_pxa_scream_blue_murder(i2c, "exhausted retries"); + ret = -EREMOTEIO; + out: + i2c_pxa_set_slave(i2c, ret); + return ret; +} + +static struct i2c_algorithm i2c_pxa_algorithm = { + .name = "PXA-I2C-Algorithm", + .id = I2C_ALGO_PXA, + .master_xfer = i2c_pxa_xfer, +}; + +static struct pxa_i2c i2c_pxa = { + .lock = SPIN_LOCK_UNLOCKED, + .wait = __WAIT_QUEUE_HEAD_INITIALIZER(i2c_pxa.wait), + .adap = { + .name = "pxa2xx-i2c", + .id = I2C_ALGO_PXA, + .algo = &i2c_pxa_algorithm, + .retries = 5, + }, +}; + +static int i2c_pxa_probe(struct device *dev) +{ + struct pxa_i2c *i2c = &i2c_pxa; + struct i2c_pxa_platform_data *plat = dev->platform_data; + int ret; + +#ifdef CONFIG_PXA27x + pxa_gpio_mode(GPIO117_I2CSCL_MD); + pxa_gpio_mode(GPIO118_I2CSDA_MD); + udelay(100); +#endif + + i2c->slave_addr = I2C_PXA_SLAVE_ADDR; + +#ifdef CONFIG_I2C_PXA_SLAVE + i2c->slave = &eeprom_client; + if (plat) { + i2c->slave_addr = plat->slave_addr; + if (plat->slave) + i2c->slave = plat->slave; + } +#endif + + pxa_set_cken(CKEN14_I2C, 1); + ret = request_irq(IRQ_I2C, i2c_pxa_handler, SA_INTERRUPT, + "pxa2xx-i2c", i2c); + if (ret) + goto out; + + i2c_pxa_reset(i2c); + + i2c->adap.algo_data = i2c; + i2c->adap.dev.parent = dev; + + ret = i2c_add_adapter(&i2c->adap); + if (ret < 0) { + printk(KERN_INFO "I2C: Failed to add bus\n"); + goto err_irq; + } + + dev_set_drvdata(dev, i2c); + +#ifdef CONFIG_I2C_PXA_SLAVE + printk(KERN_INFO "I2C: %s: PXA I2C adapter, slave address %d\n", + i2c->adap.dev.bus_id, i2c->slave_addr); +#else + printk(KERN_INFO "I2C: %s: PXA I2C adapter\n", + i2c->adap.dev.bus_id); +#endif + return 0; + + err_irq: + free_irq(IRQ_I2C, i2c); + out: + return ret; +} + +static int i2c_pxa_remove(struct device *dev) +{ + struct pxa_i2c *i2c = dev_get_drvdata(dev); + + dev_set_drvdata(dev, NULL); + + i2c_del_adapter(&i2c->adap); + free_irq(IRQ_I2C, i2c); + pxa_set_cken(CKEN14_I2C, 0); + + return 0; +} + +static struct device_driver i2c_pxa_driver = { + .name = "pxa2xx-i2c", + .bus = &platform_bus_type, + .probe = i2c_pxa_probe, + .remove = i2c_pxa_remove, +}; + +static int __init i2c_adap_pxa_init(void) +{ + return driver_register(&i2c_pxa_driver); +} + +static void i2c_adap_pxa_exit(void) +{ + return driver_unregister(&i2c_pxa_driver); +} + +module_init(i2c_adap_pxa_init); +module_exit(i2c_adap_pxa_exit); diff --git a/include/asm-arm/arch-pxa/i2c.h b/include/asm-arm/arch-pxa/i2c.h new file mode 100644 index 000000000000..46ec2243974a --- /dev/null +++ b/include/asm-arm/arch-pxa/i2c.h @@ -0,0 +1,70 @@ +/* + * i2c_pxa.h + * + * Copyright (C) 2002 Intrinsyc Software Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#ifndef _I2C_PXA_H_ +#define _I2C_PXA_H_ + +#if 0 +#define DEF_TIMEOUT 3 +#else +/* need a longer timeout if we're dealing with the fact we may well be + * looking at a multi-master environment +*/ +#define DEF_TIMEOUT 32 +#endif + +#define BUS_ERROR (-EREMOTEIO) +#define XFER_NAKED (-ECONNREFUSED) +#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ + +/* ICR initialize bit values +* +* 15. FM 0 (100 Khz operation) +* 14. UR 0 (No unit reset) +* 13. SADIE 0 (Disables the unit from interrupting on slave addresses +* matching its slave address) +* 12. ALDIE 0 (Disables the unit from interrupt when it loses arbitration +* in master mode) +* 11. SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) +* 10. BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) +* 9. IRFIE 1 (Enable interrupts from full buffer received) +* 8. ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) +* 7. GCD 1 (Disables i2c unit response to general call messages as a slave) +* 6. IUE 0 (Disable unit until we change settings) +* 5. SCLE 1 (Enables the i2c clock output for master mode (drives SCL) +* 4. MA 0 (Only send stop with the ICR stop bit) +* 3. TB 0 (We are not transmitting a byte initially) +* 2. ACKNAK 0 (Send an ACK after the unit receives a byte) +* 1. STOP 0 (Do not send a STOP) +* 0. START 0 (Do not send a START) +* +*/ +#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) + +/* I2C status register init values + * + * 10. BED 1 (Clear bus error detected) + * 9. SAD 1 (Clear slave address detected) + * 7. IRF 1 (Clear IDBR Receive Full) + * 6. ITE 1 (Clear IDBR Transmit Empty) + * 5. ALD 1 (Clear Arbitration Loss Detected) + * 4. SSD 1 (Clear Slave Stop Detected) + */ +#define I2C_ISR_INIT 0x7FF /* status register init */ + +struct i2c_slave_client; + +struct i2c_pxa_platform_data { + unsigned int slave_addr; + struct i2c_slave_client *slave; +}; + +extern void pxa_set_i2c_info(struct i2c_pxa_platform_data *info); +#endif diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 89270ce51470..74d6dc942715 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -203,6 +203,7 @@ #define I2C_ALGO_MV64XXX 0x190000 /* Marvell mv64xxx i2c ctlr */ #define I2C_ALGO_PCA 0x1a0000 /* PCA 9564 style adapters */ #define I2C_ALGO_AU1550 0x1b0000 /* Au1550 PSC algorithm */ +#define I2C_ALGO_PXA 0x1c0000 /* Intel PXA I2C algorithm */ #define I2C_ALGO_EXP 0x800000 /* experimental */ diff --git a/include/linux/i2c-pxa.h b/include/linux/i2c-pxa.h new file mode 100644 index 000000000000..5f3eaf802223 --- /dev/null +++ b/include/linux/i2c-pxa.h @@ -0,0 +1,48 @@ +#ifndef _LINUX_I2C_ALGO_PXA_H +#define _LINUX_I2C_ALGO_PXA_H + +struct i2c_eeprom_emu_watcher { + void (*write)(void *, unsigned int addr, unsigned char newval); +}; + +struct i2c_eeprom_emu_watch { + struct list_head node; + unsigned int start; + unsigned int end; + struct i2c_eeprom_emu_watcher *ops; + void *data; +}; + +#define I2C_EEPROM_EMU_SIZE (256) + +struct i2c_eeprom_emu { + unsigned int size; + unsigned int ptr; + unsigned int seen_start; + struct list_head watch; + + unsigned char bytes[I2C_EEPROM_EMU_SIZE]; +}; + +typedef enum i2c_slave_event_e { + I2C_SLAVE_EVENT_START_READ, + I2C_SLAVE_EVENT_START_WRITE, + I2C_SLAVE_EVENT_STOP +} i2c_slave_event_t; + +struct i2c_slave_client { + void *data; + void (*event)(void *ptr, i2c_slave_event_t event); + int (*read) (void *ptr); + void (*write)(void *ptr, unsigned int val); +}; + +extern int i2c_eeprom_emu_addwatcher(struct i2c_eeprom_emu *, void *data, + unsigned int addr, unsigned int size, + struct i2c_eeprom_emu_watcher *); + +extern void i2c_eeprom_emu_delwatcher(struct i2c_eeprom_emu *, void *data, struct i2c_eeprom_emu_watcher *watcher); + +extern struct i2c_eeprom_emu *i2c_pxa_get_eeprom(void); + +#endif /* _LINUX_I2C_ALGO_PXA_H */ -- cgit v1.2.3 From 0f45aa18e65cf3d768082d7d86054a0d2a20bb18 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 19 Jun 2005 19:35:50 +0100 Subject: AUDIT: Allow filtering of user messages Turn the field from a bitmask to an enumeration and add a list to allow filtering of messages generated by userspace. We also define a list for file system watches in anticipation of that feature. Signed-off-by: David Woodhouse --- include/linux/audit.h | 15 ++++++--- kernel/audit.c | 36 +++++++++++--------- kernel/auditsc.c | 92 ++++++++++++++++++++++++++++++--------------------- 3 files changed, 85 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index bf2ad3ba72eb..b5bda24f7365 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -75,10 +75,15 @@ #define AUDIT_KERNEL 2000 /* Asynchronous audit record. NOT A REQUEST. */ /* Rule flags */ -#define AUDIT_PER_TASK 0x01 /* Apply rule at task creation (not syscall) */ -#define AUDIT_AT_ENTRY 0x02 /* Apply rule at syscall entry */ -#define AUDIT_AT_EXIT 0x04 /* Apply rule at syscall exit */ -#define AUDIT_PREPEND 0x10 /* Prepend to front of list */ +#define AUDIT_FILTER_USER 0x00 /* Apply rule to user-generated messages */ +#define AUDIT_FILTER_TASK 0x01 /* Apply rule at task creation (not syscall) */ +#define AUDIT_FILTER_ENTRY 0x02 /* Apply rule at syscall entry */ +#define AUDIT_FILTER_WATCH 0x03 /* Apply rule to file system watches */ +#define AUDIT_FILTER_EXIT 0x04 /* Apply rule at syscall exit */ + +#define AUDIT_NR_FILTERS 5 + +#define AUDIT_FILTER_PREPEND 0x10 /* Prepend to front of list */ /* Rule actions */ #define AUDIT_NEVER 0 /* Do not build context if rule matches */ @@ -230,6 +235,7 @@ extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); +extern int audit_filter_user(struct task_struct *tsk, int type); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) @@ -246,6 +252,7 @@ extern void audit_signal_info(int sig, struct task_struct *t); #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) +#define audit_filter_user(struct ({ 1; }) #endif #ifdef CONFIG_AUDIT diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc29..f0bbfe073136 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -107,13 +107,6 @@ static struct sk_buff_head audit_skb_queue; static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); -/* There are three lists of rules -- one to search at task creation - * time, one to search at syscall entry time, and another to search at - * syscall exit time. */ -static LIST_HEAD(audit_tsklist); -static LIST_HEAD(audit_entlist); -static LIST_HEAD(audit_extlist); - /* The netlink socket is only to be read by 1 CPU, which lets us assume * that list additions and deletions never happen simultaneously in * auditsc.c */ @@ -376,6 +369,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) u16 msg_type = nlh->nlmsg_type; uid_t loginuid; /* loginuid of sender */ struct audit_sig_info sig_data; + struct task_struct *tsk; err = audit_netlink_ok(NETLINK_CB(skb).eff_cap, msg_type); if (err) @@ -435,15 +429,25 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: - ab = audit_log_start(NULL, msg_type); - if (!ab) - break; /* audit_panic has been called */ - audit_log_format(ab, - "user pid=%d uid=%u auid=%u" - " msg='%.1024s'", - pid, uid, loginuid, (char *)data); - audit_set_pid(ab, pid); - audit_log_end(ab); + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + if (tsk) + get_task_struct(tsk); + read_unlock(&tasklist_lock); + if (!tsk) + return -ESRCH; + + if (audit_filter_user(tsk, msg_type)) { + ab = audit_log_start(NULL, msg_type); + if (ab) { + audit_log_format(ab, + "user pid=%d uid=%u auid=%u msg='%.1024s'", + pid, uid, loginuid, (char *)data); + audit_set_pid(ab, pid); + audit_log_end(ab); + } + } + put_task_struct(tsk); break; case AUDIT_ADD: case AUDIT_DEL: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e75f84e1a1a0..6b4fbb1c0129 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -167,9 +167,16 @@ struct audit_context { /* There are three lists of rules -- one to search at task creation * time, one to search at syscall entry time, and another to search at * syscall exit time. */ -static LIST_HEAD(audit_tsklist); -static LIST_HEAD(audit_entlist); -static LIST_HEAD(audit_extlist); +static struct list_head audit_filter_list[AUDIT_NR_FILTERS] = { + LIST_HEAD_INIT(audit_filter_list[0]), + LIST_HEAD_INIT(audit_filter_list[1]), + LIST_HEAD_INIT(audit_filter_list[2]), + LIST_HEAD_INIT(audit_filter_list[3]), + LIST_HEAD_INIT(audit_filter_list[4]), +#if AUDIT_NR_FILTERS != 5 +#error Fix audit_filter_list initialiser +#endif +}; struct audit_entry { struct list_head list; @@ -210,16 +217,15 @@ static int audit_compare_rule(struct audit_rule *a, struct audit_rule *b) /* Note that audit_add_rule and audit_del_rule are called via * audit_receive() in audit.c, and are protected by * audit_netlink_sem. */ -static inline int audit_add_rule(struct audit_entry *entry, - struct list_head *list) +static inline void audit_add_rule(struct audit_entry *entry, + struct list_head *list) { - if (entry->rule.flags & AUDIT_PREPEND) { - entry->rule.flags &= ~AUDIT_PREPEND; + if (entry->rule.flags & AUDIT_FILTER_PREPEND) { + entry->rule.flags &= ~AUDIT_FILTER_PREPEND; list_add_rcu(&entry->list, list); } else { list_add_tail_rcu(&entry->list, list); } - return 0; } static void audit_free_rule(struct rcu_head *head) @@ -245,7 +251,7 @@ static inline int audit_del_rule(struct audit_rule *rule, return 0; } } - return -EFAULT; /* No matching rule */ + return -ENOENT; /* No matching rule */ } /* Copy rule from user-space to kernel-space. Called during @@ -260,6 +266,8 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) return -1; if (s->field_count < 0 || s->field_count > AUDIT_MAX_FIELDS) return -1; + if ((s->flags & ~AUDIT_FILTER_PREPEND) >= AUDIT_NR_FILTERS) + return -1; d->flags = s->flags; d->action = s->action; @@ -275,23 +283,20 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) int audit_receive_filter(int type, int pid, int uid, int seq, void *data, uid_t loginuid) { - u32 flags; struct audit_entry *entry; int err = 0; + int i; + unsigned listnr; switch (type) { case AUDIT_LIST: /* The *_rcu iterators not needed here because we are always called with audit_netlink_sem held. */ - list_for_each_entry(entry, &audit_tsklist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); - list_for_each_entry(entry, &audit_entlist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); - list_for_each_entry(entry, &audit_extlist, list) - audit_send_reply(pid, seq, AUDIT_LIST, 0, 1, - &entry->rule, sizeof(entry->rule)); + for (i=0; irule, sizeof(entry->rule)); + } audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); break; case AUDIT_ADD: @@ -301,26 +306,20 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, kfree(entry); return -EINVAL; } - flags = entry->rule.flags; - if (!err && (flags & AUDIT_PER_TASK)) - err = audit_add_rule(entry, &audit_tsklist); - if (!err && (flags & AUDIT_AT_ENTRY)) - err = audit_add_rule(entry, &audit_entlist); - if (!err && (flags & AUDIT_AT_EXIT)) - err = audit_add_rule(entry, &audit_extlist); + listnr = entry->rule.flags & ~AUDIT_FILTER_PREPEND; + audit_add_rule(entry, &audit_filter_list[listnr]); audit_log(NULL, AUDIT_CONFIG_CHANGE, "auid=%u added an audit rule\n", loginuid); break; case AUDIT_DEL: - flags =((struct audit_rule *)data)->flags; - if (!err && (flags & AUDIT_PER_TASK)) - err = audit_del_rule(data, &audit_tsklist); - if (!err && (flags & AUDIT_AT_ENTRY)) - err = audit_del_rule(data, &audit_entlist); - if (!err && (flags & AUDIT_AT_EXIT)) - err = audit_del_rule(data, &audit_extlist); - audit_log(NULL, AUDIT_CONFIG_CHANGE, - "auid=%u removed an audit rule\n", loginuid); + listnr =((struct audit_rule *)data)->flags & ~AUDIT_FILTER_PREPEND; + if (listnr >= AUDIT_NR_FILTERS) + return -EINVAL; + + err = audit_del_rule(data, &audit_filter_list[listnr]); + if (!err) + audit_log(NULL, AUDIT_CONFIG_CHANGE, + "auid=%u removed an audit rule\n", loginuid); break; default: return -EINVAL; @@ -454,7 +453,7 @@ static enum audit_state audit_filter_task(struct task_struct *tsk) enum audit_state state; rcu_read_lock(); - list_for_each_entry_rcu(e, &audit_tsklist, list) { + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_TASK], list) { if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { rcu_read_unlock(); return state; @@ -490,6 +489,23 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_BUILD_CONTEXT; } +int audit_filter_user(struct task_struct *tsk, int type) +{ + struct audit_entry *e; + enum audit_state state; + + rcu_read_lock(); + list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { + if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { + rcu_read_unlock(); + return state != AUDIT_DISABLED; + } + } + rcu_read_unlock(); + return 1; /* Audit by default */ + +} + /* This should be called with task_lock() held. */ static inline struct audit_context *audit_get_context(struct task_struct *tsk, int return_valid, @@ -504,7 +520,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk, if (context->in_syscall && !context->auditable) { enum audit_state state; - state = audit_filter_syscall(tsk, context, &audit_extlist); + state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]); if (state == AUDIT_RECORD_CONTEXT) context->auditable = 1; } @@ -876,7 +892,7 @@ void audit_syscall_entry(struct task_struct *tsk, int arch, int major, state = context->state; if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT) - state = audit_filter_syscall(tsk, context, &audit_entlist); + state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]); if (likely(state == AUDIT_DISABLED)) return; -- cgit v1.2.3 From df46b9a44ceb5af2ea2351ce8e28ae7bd840b00f Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 20 Jun 2005 14:04:44 +0200 Subject: [PATCH] Add blk_rq_map_kern() Add blk_rq_map_kern which takes a kernel buffer and maps it into a request and bio. This can be used by the dm hw_handlers, old sg_scsi_ioctl, and one day scsi special requests so all requests comming into scsi will have bios. All requests having bios should allow scsi to use scatter lists for all IO and allow it to use block layer functions. Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 56 ++++++++++++++++++++++++++++++++++++++++ fs/bio.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/bio.h | 2 ++ include/linux/blkdev.h | 2 ++ 4 files changed, 126 insertions(+) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index f20eba22b14b..e30a3c93b70c 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -281,6 +281,7 @@ static inline void rq_init(request_queue_t *q, struct request *rq) rq->special = NULL; rq->data_len = 0; rq->data = NULL; + rq->nr_phys_segments = 0; rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; @@ -2176,6 +2177,61 @@ int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) EXPORT_SYMBOL(blk_rq_unmap_user); +static int blk_rq_map_kern_endio(struct bio *bio, unsigned int bytes_done, + int error) +{ + if (bio->bi_size) + return 1; + + bio_put(bio); + return 0; +} + +/** + * blk_rq_map_kern - map kernel data to a request, for REQ_BLOCK_PC usage + * @q: request queue where request should be inserted + * @rw: READ or WRITE data + * @kbuf: the kernel buffer + * @len: length of user data + */ +struct request *blk_rq_map_kern(request_queue_t *q, int rw, void *kbuf, + unsigned int len, unsigned int gfp_mask) +{ + struct request *rq; + struct bio *bio; + + if (len > (q->max_sectors << 9)) + return ERR_PTR(-EINVAL); + if ((!len && kbuf) || (len && !kbuf)) + return ERR_PTR(-EINVAL); + + rq = blk_get_request(q, rw, gfp_mask); + if (!rq) + return ERR_PTR(-ENOMEM); + + bio = bio_map_kern(q, kbuf, len, gfp_mask); + if (!IS_ERR(bio)) { + if (rw) + bio->bi_rw |= (1 << BIO_RW); + bio->bi_end_io = blk_rq_map_kern_endio; + + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); + + rq->buffer = rq->data = NULL; + rq->data_len = len; + return rq; + } + + /* + * bio is the err-ptr + */ + blk_put_request(rq); + return (struct request *) bio; +} + +EXPORT_SYMBOL(blk_rq_map_kern); + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in diff --git a/fs/bio.c b/fs/bio.c index 3a1472acc361..707b9af2dd01 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -701,6 +701,71 @@ void bio_unmap_user(struct bio *bio) bio_put(bio); } +static struct bio *__bio_map_kern(request_queue_t *q, void *data, + unsigned int len, unsigned int gfp_mask) +{ + unsigned long kaddr = (unsigned long)data; + unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = kaddr >> PAGE_SHIFT; + const int nr_pages = end - start; + int offset, i; + struct bio *bio; + + bio = bio_alloc(gfp_mask, nr_pages); + if (!bio) + return ERR_PTR(-ENOMEM); + + offset = offset_in_page(kaddr); + for (i = 0; i < nr_pages; i++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + if (__bio_add_page(q, bio, virt_to_page(data), bytes, + offset) < bytes) + break; + + data += bytes; + len -= bytes; + offset = 0; + } + + return bio; +} + +/** + * bio_map_kern - map kernel address into bio + * @q: the request_queue_t for the bio + * @data: pointer to buffer to map + * @len: length in bytes + * @gfp_mask: allocation flags for bio allocation + * + * Map the kernel address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_kern(request_queue_t *q, void *data, unsigned int len, + unsigned int gfp_mask) +{ + struct bio *bio; + + bio = __bio_map_kern(q, data, len, gfp_mask); + if (IS_ERR(bio)) + return bio; + + if (bio->bi_size == len) + return bio; + + /* + * Don't support partial mappings. + */ + bio_put(bio); + return ERR_PTR(-EINVAL); +} + /* * bio_set_pages_dirty() and bio_check_pages_dirty() are support functions * for performing direct-IO in BIOs. @@ -1088,6 +1153,7 @@ EXPORT_SYMBOL(bio_add_page); EXPORT_SYMBOL(bio_get_nr_vecs); EXPORT_SYMBOL(bio_map_user); EXPORT_SYMBOL(bio_unmap_user); +EXPORT_SYMBOL(bio_map_kern); EXPORT_SYMBOL(bio_pair_release); EXPORT_SYMBOL(bio_split); EXPORT_SYMBOL(bio_split_pool); diff --git a/include/linux/bio.h b/include/linux/bio.h index 038022763f09..1dd2bc2e84ae 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -282,6 +282,8 @@ extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); extern void bio_unmap_user(struct bio *); +extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, + unsigned int); extern void bio_set_pages_dirty(struct bio *bio); extern void bio_check_pages_dirty(struct bio *bio); extern struct bio *bio_copy_user(struct request_queue *, unsigned long, unsigned int, int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a99b76c5a33..67339bc5f6bc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -560,6 +560,8 @@ extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); +extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, + unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3 From dd1cab95f356f1395278633565f198463cf6bd24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 20 Jun 2005 14:06:01 +0200 Subject: [PATCH] Cleanup blk_rq_map_* interfaces Change the blk_rq_map_user() and blk_rq_map_kern() interface to require a previously allocated request to be passed in. This is both more efficient for multiple iterations of mapping data to the same request, and it is also a much nicer API. Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 68 ++++++++++++++++++---------------------------- drivers/block/scsi_ioctl.c | 23 ++++++++++------ drivers/cdrom/cdrom.c | 13 ++++++--- include/linux/blkdev.h | 7 ++--- 4 files changed, 53 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 1471aca6fa18..42c4f3651cf8 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2107,21 +2107,19 @@ EXPORT_SYMBOL(blk_insert_request); * original bio must be passed back in to blk_rq_unmap_user() for proper * unmapping. */ -struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, - unsigned int len) +int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, + unsigned int len) { unsigned long uaddr; - struct request *rq; struct bio *bio; + int reading; if (len > (q->max_sectors << 9)) - return ERR_PTR(-EINVAL); - if ((!len && ubuf) || (len && !ubuf)) - return ERR_PTR(-EINVAL); + return -EINVAL; + if (!len || !ubuf) + return -EINVAL; - rq = blk_get_request(q, rw, __GFP_WAIT); - if (!rq) - return ERR_PTR(-ENOMEM); + reading = rq_data_dir(rq) == READ; /* * if alignment requirement is satisfied, map in user pages for @@ -2129,9 +2127,9 @@ struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, */ uaddr = (unsigned long) ubuf; if (!(uaddr & queue_dma_alignment(q)) && !(len & queue_dma_alignment(q))) - bio = bio_map_user(q, NULL, uaddr, len, rw == READ); + bio = bio_map_user(q, NULL, uaddr, len, reading); else - bio = bio_copy_user(q, uaddr, len, rw == READ); + bio = bio_copy_user(q, uaddr, len, reading); if (!IS_ERR(bio)) { rq->bio = rq->biotail = bio; @@ -2139,14 +2137,13 @@ struct request *blk_rq_map_user(request_queue_t *q, int rw, void __user *ubuf, rq->buffer = rq->data = NULL; rq->data_len = len; - return rq; + return 0; } /* * bio is the err-ptr */ - blk_put_request(rq); - return (struct request *) bio; + return PTR_ERR(bio); } EXPORT_SYMBOL(blk_rq_map_user); @@ -2160,7 +2157,7 @@ EXPORT_SYMBOL(blk_rq_map_user); * Description: * Unmap a request previously mapped by blk_rq_map_user(). */ -int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) +int blk_rq_unmap_user(struct bio *bio, unsigned int ulen) { int ret = 0; @@ -2171,8 +2168,7 @@ int blk_rq_unmap_user(struct request *rq, struct bio *bio, unsigned int ulen) ret = bio_uncopy_user(bio); } - blk_put_request(rq); - return ret; + return 0; } EXPORT_SYMBOL(blk_rq_unmap_user); @@ -2184,39 +2180,29 @@ EXPORT_SYMBOL(blk_rq_unmap_user); * @kbuf: the kernel buffer * @len: length of user data */ -struct request *blk_rq_map_kern(request_queue_t *q, int rw, void *kbuf, - unsigned int len, unsigned int gfp_mask) +int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, + unsigned int len, unsigned int gfp_mask) { - struct request *rq; struct bio *bio; if (len > (q->max_sectors << 9)) - return ERR_PTR(-EINVAL); - if ((!len && kbuf) || (len && !kbuf)) - return ERR_PTR(-EINVAL); - - rq = blk_get_request(q, rw, gfp_mask); - if (!rq) - return ERR_PTR(-ENOMEM); + return -EINVAL; + if (!len || !kbuf) + return -EINVAL; bio = bio_map_kern(q, kbuf, len, gfp_mask); - if (!IS_ERR(bio)) { - if (rw) - bio->bi_rw |= (1 << BIO_RW); + if (IS_ERR(bio)) + return PTR_ERR(bio); - rq->bio = rq->biotail = bio; - blk_rq_bio_prep(q, rq, bio); + if (rq_data_dir(rq) == WRITE) + bio->bi_rw |= (1 << BIO_RW); - rq->buffer = rq->data = NULL; - rq->data_len = len; - return rq; - } + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); - /* - * bio is the err-ptr - */ - blk_put_request(rq); - return (struct request *) bio; + rq->buffer = rq->data = NULL; + rq->data_len = len; + return 0; } EXPORT_SYMBOL(blk_rq_map_kern); diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 681871ca5d60..93c4ca874be3 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -216,7 +216,7 @@ static int sg_io(struct file *file, request_queue_t *q, struct gendisk *bd_disk, struct sg_io_hdr *hdr) { unsigned long start_time; - int reading, writing; + int reading, writing, ret; struct request *rq; struct bio *bio; char sense[SCSI_SENSE_BUFFERSIZE]; @@ -255,14 +255,17 @@ static int sg_io(struct file *file, request_queue_t *q, reading = 1; break; } + } - rq = blk_rq_map_user(q, writing ? WRITE : READ, hdr->dxferp, - hdr->dxfer_len); + rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; - if (IS_ERR(rq)) - return PTR_ERR(rq); - } else - rq = blk_get_request(q, READ, __GFP_WAIT); + if (reading || writing) { + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + if (ret) + goto out; + } /* * fill in request structure @@ -321,11 +324,13 @@ static int sg_io(struct file *file, request_queue_t *q, } if (blk_rq_unmap_user(rq, bio, hdr->dxfer_len)) - return -EFAULT; + ret = -EFAULT; /* may not have succeeded, but output values written to control * structure (struct sg_io_hdr). */ - return 0; +out: + blk_put_request(rq); + return ret; } #define OMAX_SB_LEN 16 /* For backward compatibility */ diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index beaa561f2ed8..6a7d926774a1 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2097,6 +2097,10 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (!q) return -ENXIO; + rq = blk_get_request(q, READ, GFP_KERNEL); + if (!rq) + return -ENOMEM; + cdi->last_sense = 0; while (nframes) { @@ -2108,9 +2112,9 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, len = nr * CD_FRAMESIZE_RAW; - rq = blk_rq_map_user(q, READ, ubuf, len); - if (IS_ERR(rq)) - return PTR_ERR(rq); + ret = blk_rq_map_user(q, rq, ubuf, len); + if (ret) + break; memset(rq->cmd, 0, sizeof(rq->cmd)); rq->cmd[0] = GPCMD_READ_CD; @@ -2138,7 +2142,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, cdi->last_sense = s->sense_key; } - if (blk_rq_unmap_user(rq, bio, len)) + if (blk_rq_unmap_user(bio, len)) ret = -EFAULT; if (ret) @@ -2149,6 +2153,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, ubuf += len; } + blk_put_request(rq); return ret; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 67339bc5f6bc..fc0dce078616 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -558,10 +558,9 @@ extern void blk_sync_queue(struct request_queue *q); extern void __blk_stop_queue(request_queue_t *q); extern void blk_run_queue(request_queue_t *); extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); -extern struct request *blk_rq_map_user(request_queue_t *, int, void __user *, unsigned int); -extern int blk_rq_unmap_user(struct request *, struct bio *, unsigned int); -extern struct request *blk_rq_map_kern(request_queue_t *, int, void *, - unsigned int, unsigned int); +extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); +extern int blk_rq_unmap_user(struct bio *, unsigned int); +extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3 From f1970baf6d74e03bd32072ab453f2fc01bc1b8d3 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:06:52 +0200 Subject: [PATCH] Add scatter-gather support for the block layer SG_IO Signed-off-by: Jens Axboe --- drivers/block/ll_rw_blk.c | 64 +++++++++++++++++-- drivers/block/scsi_ioctl.c | 34 ++++++---- fs/bio.c | 150 +++++++++++++++++++++++++++++++-------------- include/linux/bio.h | 4 ++ include/linux/blkdev.h | 1 + 5 files changed, 191 insertions(+), 62 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 42c4f3651cf8..874e46fc3748 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2148,6 +2148,50 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf, EXPORT_SYMBOL(blk_rq_map_user); +/** + * blk_rq_map_user_iov - map user data to a request, for REQ_BLOCK_PC usage + * @q: request queue where request should be inserted + * @rq: request to map data to + * @iov: pointer to the iovec + * @iov_count: number of elements in the iovec + * + * Description: + * Data will be mapped directly for zero copy io, if possible. Otherwise + * a kernel bounce buffer is used. + * + * A matching blk_rq_unmap_user() must be issued at the end of io, while + * still in process context. + * + * Note: The mapped bio may need to be bounced through blk_queue_bounce() + * before being submitted to the device, as pages mapped may be out of + * reach. It's the callers responsibility to make sure this happens. The + * original bio must be passed back in to blk_rq_unmap_user() for proper + * unmapping. + */ +int blk_rq_map_user_iov(request_queue_t *q, struct request *rq, + struct sg_iovec *iov, int iov_count) +{ + struct bio *bio; + + if (!iov || iov_count <= 0) + return -EINVAL; + + /* we don't allow misaligned data like bio_map_user() does. If the + * user is using sg, they're expected to know the alignment constraints + * and respect them accordingly */ + bio = bio_map_user_iov(q, NULL, iov, iov_count, rq_data_dir(rq)== READ); + if (IS_ERR(bio)) + return PTR_ERR(bio); + + rq->bio = rq->biotail = bio; + blk_rq_bio_prep(q, rq, bio); + rq->buffer = rq->data = NULL; + rq->data_len = bio->bi_size; + return 0; +} + +EXPORT_SYMBOL(blk_rq_map_user_iov); + /** * blk_rq_unmap_user - unmap a request with user data * @rq: request to be unmapped @@ -2207,6 +2251,19 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf, EXPORT_SYMBOL(blk_rq_map_kern); +void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, + struct request *rq, int at_head, + void (*done)(struct request *)) +{ + int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; + + rq->rq_disk = bd_disk; + rq->flags |= REQ_NOMERGE; + rq->end_io = done; + elv_add_request(q, rq, where, 1); + generic_unplug_device(q); +} + /** * blk_execute_rq - insert a request into queue for execution * @q: queue to insert the request in @@ -2224,8 +2281,6 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, char sense[SCSI_SENSE_BUFFERSIZE]; int err = 0; - rq->rq_disk = bd_disk; - /* * we need an extra reference to the request, so we can look at * it after io completion @@ -2238,11 +2293,8 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, rq->sense_len = 0; } - rq->flags |= REQ_NOMERGE; rq->waiting = &wait; - rq->end_io = blk_end_sync_rq; - elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1); - generic_unplug_device(q); + blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 93c4ca874be3..09a7e73a0812 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -231,17 +231,11 @@ static int sg_io(struct file *file, request_queue_t *q, if (verify_command(file, cmd)) return -EPERM; - /* - * we'll do that later - */ - if (hdr->iovec_count) - return -EOPNOTSUPP; - if (hdr->dxfer_len > (q->max_sectors << 9)) return -EIO; reading = writing = 0; - if (hdr->dxfer_len) { + if (hdr->dxfer_len) switch (hdr->dxfer_direction) { default: return -EINVAL; @@ -261,11 +255,29 @@ static int sg_io(struct file *file, request_queue_t *q, if (!rq) return -ENOMEM; - if (reading || writing) { - ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); - if (ret) + if (hdr->iovec_count) { + const int size = sizeof(struct sg_iovec) * hdr->iovec_count; + struct sg_iovec *iov; + + iov = kmalloc(size, GFP_KERNEL); + if (!iov) { + ret = -ENOMEM; goto out; - } + } + + if (copy_from_user(iov, hdr->dxferp, size)) { + kfree(iov); + ret = -EFAULT; + goto out; + } + + ret = blk_rq_map_user_iov(q, rq, iov, hdr->iovec_count); + kfree(iov); + } else if (hdr->dxfer_len) + ret = blk_rq_map_user(q, rq, hdr->dxferp, hdr->dxfer_len); + + if (ret) + goto out; /* * fill in request structure diff --git a/fs/bio.c b/fs/bio.c index c0d9140e470c..24e4045788e2 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -25,6 +25,7 @@ #include #include #include +#include /* for struct sg_iovec */ #define BIO_POOL_SIZE 256 @@ -549,22 +550,34 @@ out_bmd: return ERR_PTR(ret); } -static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, - unsigned long uaddr, unsigned int len, - int write_to_vm) +static struct bio *__bio_map_user_iov(request_queue_t *q, + struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { - unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned long start = uaddr >> PAGE_SHIFT; - const int nr_pages = end - start; - int ret, offset, i; + int i, j; + int nr_pages = 0; struct page **pages; struct bio *bio; + int cur_page = 0; + int ret, offset; - /* - * transfer and buffer must be aligned to at least hardsector - * size for now, in the future we can relax this restriction - */ - if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + + nr_pages += end - start; + /* + * transfer and buffer must be aligned to at least hardsector + * size for now, in the future we can relax this restriction + */ + if ((uaddr & queue_dma_alignment(q)) || (len & queue_dma_alignment(q))) + return ERR_PTR(-EINVAL); + } + + if (!nr_pages) return ERR_PTR(-EINVAL); bio = bio_alloc(GFP_KERNEL, nr_pages); @@ -576,42 +589,54 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!pages) goto out; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, nr_pages, - write_to_vm, 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - if (ret < nr_pages) - goto out; - - bio->bi_bdev = bdev; - - offset = uaddr & ~PAGE_MASK; - for (i = 0; i < nr_pages; i++) { - unsigned int bytes = PAGE_SIZE - offset; - - if (len <= 0) - break; - - if (bytes > len) - bytes = len; + memset(pages, 0, nr_pages * sizeof(struct page *)); + + for (i = 0; i < iov_count; i++) { + unsigned long uaddr = (unsigned long)iov[i].iov_base; + unsigned long len = iov[i].iov_len; + unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned long start = uaddr >> PAGE_SHIFT; + const int local_nr_pages = end - start; + const int page_limit = cur_page + local_nr_pages; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, uaddr, + local_nr_pages, + write_to_vm, 0, &pages[cur_page], NULL); + up_read(¤t->mm->mmap_sem); + + if (ret < local_nr_pages) + goto out_unmap; + + + offset = uaddr & ~PAGE_MASK; + for (j = cur_page; j < page_limit; j++) { + unsigned int bytes = PAGE_SIZE - offset; + + if (len <= 0) + break; + + if (bytes > len) + bytes = len; + + /* + * sorry... + */ + if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes) + break; + + len -= bytes; + offset = 0; + } + cur_page = j; /* - * sorry... + * release the pages we didn't map into the bio, if any */ - if (__bio_add_page(q, bio, pages[i], bytes, offset) < bytes) - break; - - len -= bytes; - offset = 0; + while (j < page_limit) + page_cache_release(pages[j++]); } - /* - * release the pages we didn't map into the bio, if any - */ - while (i < nr_pages) - page_cache_release(pages[i++]); - kfree(pages); /* @@ -620,9 +645,17 @@ static struct bio *__bio_map_user(request_queue_t *q, struct block_device *bdev, if (!write_to_vm) bio->bi_rw |= (1 << BIO_RW); + bio->bi_bdev = bdev; bio->bi_flags |= (1 << BIO_USER_MAPPED); return bio; -out: + + out_unmap: + for (i = 0; i < nr_pages; i++) { + if(!pages[i]) + break; + page_cache_release(pages[i]); + } + out: kfree(pages); bio_put(bio); return ERR_PTR(ret); @@ -641,10 +674,34 @@ out: */ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, unsigned long uaddr, unsigned int len, int write_to_vm) +{ + struct sg_iovec iov; + + iov.iov_base = (__user void *)uaddr; + iov.iov_len = len; + + return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); +} + +/** + * bio_map_user_iov - map user sg_iovec table into bio + * @q: the request_queue_t for the bio + * @bdev: destination block device + * @iov: the iovec. + * @iov_count: number of elements in the iovec + * @write_to_vm: bool indicating writing to pages or not + * + * Map the user space address into a bio suitable for io to a block + * device. Returns an error pointer in case of error. + */ +struct bio *bio_map_user_iov(request_queue_t *q, struct block_device *bdev, + struct sg_iovec *iov, int iov_count, + int write_to_vm) { struct bio *bio; + int len = 0, i; - bio = __bio_map_user(q, bdev, uaddr, len, write_to_vm); + bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); if (IS_ERR(bio)) return bio; @@ -657,6 +714,9 @@ struct bio *bio_map_user(request_queue_t *q, struct block_device *bdev, */ bio_get(bio); + for (i = 0; i < iov_count; i++) + len += iov[i].iov_len; + if (bio->bi_size == len) return bio; diff --git a/include/linux/bio.h b/include/linux/bio.h index 1dd2bc2e84ae..ebcd03ba2e20 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -281,6 +281,10 @@ extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_get_nr_vecs(struct block_device *); extern struct bio *bio_map_user(struct request_queue *, struct block_device *, unsigned long, unsigned int, int); +struct sg_iovec; +extern struct bio *bio_map_user_iov(struct request_queue *, + struct block_device *, + struct sg_iovec *, int, int); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fc0dce078616..0430ea3e5f2e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -561,6 +561,7 @@ extern void blk_queue_activity_fn(request_queue_t *, activity_fn *, void *); extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, unsigned int); extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); +extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) -- cgit v1.2.3 From 994ca9a19616f0d4161a9e825f0835925d522426 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 20 Jun 2005 14:11:09 +0200 Subject: [PATCH] update blk_execute_rq to take an at_head parameter Original From: Mike Christie Modified to split out block changes (this patch) and SCSI pieces. Signed-off-by: Jens Axboe Signed-off-by: James Bottomley --- drivers/block/ll_rw_blk.c | 7 ++++--- drivers/block/scsi_ioctl.c | 6 +++--- drivers/cdrom/cdrom.c | 2 +- drivers/ide/ide-disk.c | 2 +- include/linux/blkdev.h | 4 ++-- 5 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 874e46fc3748..d260a2ce9a70 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2269,13 +2269,14 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, * @q: queue to insert the request in * @bd_disk: matching gendisk * @rq: request to insert + * @at_head: insert request at head or tail of queue * * Description: * Insert a fully prepared request at the back of the io scheduler queue * for execution. */ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, - struct request *rq) + struct request *rq, int at_head) { DECLARE_COMPLETION(wait); char sense[SCSI_SENSE_BUFFERSIZE]; @@ -2294,7 +2295,7 @@ int blk_execute_rq(request_queue_t *q, struct gendisk *bd_disk, } rq->waiting = &wait; - blk_execute_rq_nowait(q, bd_disk, rq, 0, blk_end_sync_rq); + blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq); wait_for_completion(&wait); rq->waiting = NULL; @@ -2361,7 +2362,7 @@ int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, rq->data_len = 0; rq->timeout = 60 * HZ; - ret = blk_execute_rq(q, disk, rq); + ret = blk_execute_rq(q, disk, rq, 0); if (ret && error_sector) *error_sector = rq->sector; diff --git a/drivers/block/scsi_ioctl.c b/drivers/block/scsi_ioctl.c index 7717b76f7f20..abb2df249fd3 100644 --- a/drivers/block/scsi_ioctl.c +++ b/drivers/block/scsi_ioctl.c @@ -308,7 +308,7 @@ static int sg_io(struct file *file, request_queue_t *q, * (if he doesn't check that is his problem). * N.B. a non-zero SCSI status is _not_ necessarily an error. */ - blk_execute_rq(q, bd_disk, rq); + blk_execute_rq(q, bd_disk, rq, 0); /* write to all output members */ hdr->status = 0xff & rq->errors; @@ -420,7 +420,7 @@ static int sg_scsi_ioctl(struct file *file, request_queue_t *q, rq->data_len = bytes; rq->flags |= REQ_BLOCK_PC; - blk_execute_rq(q, bd_disk, rq); + blk_execute_rq(q, bd_disk, rq, 0); err = rq->errors & 0xff; /* only 8 bit SCSI status */ if (err) { if (rq->sense_len && rq->sense) { @@ -573,7 +573,7 @@ int scsi_cmd_ioctl(struct file *file, struct gendisk *bd_disk, unsigned int cmd, rq->cmd[0] = GPCMD_START_STOP_UNIT; rq->cmd[4] = 0x02 + (close != 0); rq->cmd_len = 6; - err = blk_execute_rq(q, bd_disk, rq); + err = blk_execute_rq(q, bd_disk, rq, 0); blk_put_request(rq); break; default: diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 6a7d926774a1..153960348414 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -2136,7 +2136,7 @@ static int cdrom_read_cdda_bpc(struct cdrom_device_info *cdi, __u8 __user *ubuf, if (rq->bio) blk_queue_bounce(q, &rq->bio); - if (blk_execute_rq(q, cdi->disk, rq)) { + if (blk_execute_rq(q, cdi->disk, rq, 0)) { struct request_sense *s = rq->sense; ret = -EIO; cdi->last_sense = s->sense_key; diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 3302cd8eab4c..9176da7a9858 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -750,7 +750,7 @@ static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk, idedisk_prepare_flush(q, rq); - ret = blk_execute_rq(q, disk, rq); + ret = blk_execute_rq(q, disk, rq, 0); /* * if we failed and caller wants error offset, get it diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0430ea3e5f2e..a48dc12c6699 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -562,8 +562,8 @@ extern int blk_rq_map_user(request_queue_t *, struct request *, void __user *, u extern int blk_rq_unmap_user(struct bio *, unsigned int); extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned int, unsigned int); extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int); -extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *); - +extern int blk_execute_rq(request_queue_t *, struct gendisk *, + struct request *, int); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; -- cgit v1.2.3 From ae7b961b1c943367dfe179411f120d7bf8eaba89 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 20 Jun 2005 16:11:05 +0100 Subject: AUDIT: Report lookup flags with path/inode records. When LOOKUP_PARENT is used, the inode which results is not the inode found at the pathname. Report the flags so that this doesn't generate misleading audit records. Signed-off-by: David Woodhouse --- fs/namei.c | 2 +- include/linux/audit.h | 4 ++-- kernel/auditsc.c | 18 +++++++++++------- 3 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index a7f7f44119b3..6e888dd10461 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1043,7 +1043,7 @@ int fastcall path_lookup(const char *name, unsigned int flags, struct nameidata out: if (unlikely(current->audit_context && nd && nd->dentry && nd->dentry->d_inode)) - audit_inode(name, nd->dentry->d_inode); + audit_inode(name, nd->dentry->d_inode, flags); return retval; } diff --git a/include/linux/audit.h b/include/linux/audit.h index b5bda24f7365..5f812e4d01e4 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -220,7 +220,7 @@ extern void audit_syscall_entry(struct task_struct *task, int arch, extern void audit_syscall_exit(struct task_struct *task, int failed, long return_code); extern void audit_getname(const char *name); extern void audit_putname(const char *name); -extern void audit_inode(const char *name, const struct inode *inode); +extern void audit_inode(const char *name, const struct inode *inode, unsigned flags); /* Private API (for audit.c only) */ extern int audit_receive_filter(int type, int pid, int uid, int seq, @@ -243,7 +243,7 @@ extern int audit_filter_user(struct task_struct *tsk, int type); #define audit_syscall_exit(t,f,r) do { ; } while (0) #define audit_getname(n) do { ; } while (0) #define audit_putname(n) do { ; } while (0) -#define audit_inode(n,i) do { ; } while (0) +#define audit_inode(n,i,f) do { ; } while (0) #define audit_receive_filter(t,p,u,s,d,l) ({ -EOPNOTSUPP; }) #define auditsc_get_stamp(c,t,s) do { BUG(); } while (0) #define audit_get_loginuid(c) ({ -1; }) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 48a39579c45c..031f979019d1 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -95,6 +95,7 @@ struct audit_names { uid_t uid; gid_t gid; dev_t rdev; + unsigned flags; }; struct audit_aux_data { @@ -792,6 +793,8 @@ static void audit_log_exit(struct audit_context *context) audit_log_format(ab, " name="); audit_log_untrustedstring(ab, context->names[i].name); } + audit_log_format(ab, " flags=%x\n", context->names[i].flags); + if (context->names[i].ino != (unsigned long)-1) audit_log_format(ab, " inode=%lu dev=%02x:%02x mode=%#o" " ouid=%u ogid=%u rdev=%02x:%02x", @@ -1018,7 +1021,7 @@ void audit_putname(const char *name) /* Store the inode and device from a lookup. Called from * fs/namei.c:path_lookup(). */ -void audit_inode(const char *name, const struct inode *inode) +void audit_inode(const char *name, const struct inode *inode, unsigned flags) { int idx; struct audit_context *context = current->audit_context; @@ -1044,12 +1047,13 @@ void audit_inode(const char *name, const struct inode *inode) ++context->ino_count; #endif } - context->names[idx].ino = inode->i_ino; - context->names[idx].dev = inode->i_sb->s_dev; - context->names[idx].mode = inode->i_mode; - context->names[idx].uid = inode->i_uid; - context->names[idx].gid = inode->i_gid; - context->names[idx].rdev = inode->i_rdev; + context->names[idx].flags = flags; + context->names[idx].ino = inode->i_ino; + context->names[idx].dev = inode->i_sb->s_dev; + context->names[idx].mode = inode->i_mode; + context->names[idx].uid = inode->i_uid; + context->names[idx].gid = inode->i_gid; + context->names[idx].rdev = inode->i_rdev; } void auditsc_get_stamp(struct audit_context *ctx, -- cgit v1.2.3 From f6a789d19858a951e7ff9e297a44b377c21b6c33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 21 Jun 2005 16:22:01 +0100 Subject: AUDIT: Spawn kernel thread to list filter rules. If we have enough rules to fill the netlink buffer space, it'll deadlock because auditctl isn't ever actually going to read from the socket until we return, and we aren't going to return until it reads... so we spawn a kernel thread to spew out the list and then exit. Signed-off-by: David Woodhouse --- include/linux/audit.h | 1 + kernel/audit.c | 2 +- kernel/auditsc.c | 53 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 47 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 5f812e4d01e4..5d1a9dda5acb 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -281,6 +281,7 @@ extern void audit_send_reply(int pid, int seq, int type, int done, int multi, void *payload, int size); extern void audit_log_lost(const char *message); +extern struct semaphore audit_netlink_sem; #else #define audit_log(c,t,f,...) do { ; } while (0) #define audit_log_start(c,t) ({ NULL; }) diff --git a/kernel/audit.c b/kernel/audit.c index ab6ac560cfe5..c1ab8dbbb67b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -110,7 +110,7 @@ static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); /* The netlink socket is only to be read by 1 CPU, which lets us assume * that list additions and deletions never happen simultaneously in * auditsc.c */ -static DECLARE_MUTEX(audit_netlink_sem); +DECLARE_MUTEX(audit_netlink_sem); /* AUDIT_BUFSIZ is the size of the temporary buffer used for formatting * audit records. Since printk uses a 1024 byte buffer, this buffer diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 031f979019d1..cb8a44945157 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include /* 0 = no checking @@ -281,24 +282,60 @@ static int audit_copy_rule(struct audit_rule *d, struct audit_rule *s) return 0; } +static int audit_list_rules(void *_dest) +{ + int pid, seq; + int *dest = _dest; + struct audit_entry *entry; + int i; + + pid = dest[0]; + seq = dest[1]; + kfree(dest); + + down(&audit_netlink_sem); + + /* The *_rcu iterators not needed here because we are + always called with audit_netlink_sem held. */ + for (i=0; irule, sizeof(entry->rule)); + } + audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); + + up(&audit_netlink_sem); + return 0; +} + int audit_receive_filter(int type, int pid, int uid, int seq, void *data, uid_t loginuid) { struct audit_entry *entry; + struct task_struct *tsk; + int *dest; int err = 0; - int i; unsigned listnr; switch (type) { case AUDIT_LIST: - /* The *_rcu iterators not needed here because we are - always called with audit_netlink_sem held. */ - for (i=0; irule, sizeof(entry->rule)); + /* We can't just spew out the rules here because we might fill + * the available socket buffer space and deadlock waiting for + * auditctl to read from it... which isn't ever going to + * happen if we're actually running in the context of auditctl + * trying to _send_ the stuff */ + + dest = kmalloc(2 * sizeof(int), GFP_KERNEL); + if (!dest) + return -ENOMEM; + dest[0] = pid; + dest[1] = seq; + + tsk = kthread_run(audit_list_rules, dest, "audit_list_rules"); + if (IS_ERR(tsk)) { + kfree(dest); + err = PTR_ERR(tsk); } - audit_send_reply(pid, seq, AUDIT_LIST, 1, 1, NULL, 0); break; case AUDIT_ADD: if (!(entry = kmalloc(sizeof(*entry), GFP_KERNEL))) -- cgit v1.2.3 From 4a4cd633b575609b741a1de7837223a2d9e1c34c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 22 Jun 2005 14:56:47 +0100 Subject: AUDIT: Optimise the audit-disabled case for discarding user messages Also exempt USER_AVC message from being discarded to preserve existing behaviour for SE Linux. Signed-off-by: David Woodhouse --- include/linux/audit.h | 7 ++++--- kernel/audit.c | 32 ++++++++++++++------------------ kernel/auditsc.c | 21 ++++++++++++++++----- 3 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 5d1a9dda5acb..77adef640537 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -51,7 +51,8 @@ #define AUDIT_WATCH_LIST 1009 /* List all file/dir watches */ #define AUDIT_SIGNAL_INFO 1010 /* Get info about sender of signal to auditd */ -#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages uninteresting to kernel */ +#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */ +#define AUDIT_USER_AVC 1107 /* We filter this differently */ #define AUDIT_LAST_USER_MSG 1199 #define AUDIT_DAEMON_START 1200 /* Daemon startup record */ @@ -235,7 +236,7 @@ extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); -extern int audit_filter_user(struct task_struct *tsk, int type); +extern int audit_filter_user(int pid, int type); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) @@ -252,7 +253,7 @@ extern int audit_filter_user(struct task_struct *tsk, int type); #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) -#define audit_filter_user(struct ({ 1; }) +#define audit_filter_user(p,t) ({ 1; }) #endif #ifdef CONFIG_AUDIT diff --git a/kernel/audit.c b/kernel/audit.c index c1ab8dbbb67b..09a37581213b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -429,25 +429,21 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) break; case AUDIT_USER: case AUDIT_FIRST_USER_MSG...AUDIT_LAST_USER_MSG: - read_lock(&tasklist_lock); - tsk = find_task_by_pid(pid); - if (tsk) - get_task_struct(tsk); - read_unlock(&tasklist_lock); - if (!tsk) - return -ESRCH; - - if (audit_enabled && audit_filter_user(tsk, msg_type)) { - ab = audit_log_start(NULL, msg_type); - if (ab) { - audit_log_format(ab, - "user pid=%d uid=%u auid=%u msg='%.1024s'", - pid, uid, loginuid, (char *)data); - audit_set_pid(ab, pid); - audit_log_end(ab); - } + if (!audit_enabled && msg_type != AUDIT_USER_AVC) + return 0; + + err = audit_filter_user(pid, msg_type); + if (err == 1) { + err = 0; + ab = audit_log_start(NULL, msg_type); + if (ab) { + audit_log_format(ab, + "user pid=%d uid=%u auid=%u msg='%.1024s'", + pid, uid, loginuid, (char *)data); + audit_set_pid(ab, pid); + audit_log_end(ab); + } } - put_task_struct(tsk); break; case AUDIT_ADD: case AUDIT_DEL: diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cb8a44945157..fc858b0c044a 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -530,22 +530,33 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_BUILD_CONTEXT; } -int audit_filter_user(struct task_struct *tsk, int type) +int audit_filter_user(int pid, int type) { + struct task_struct *tsk; struct audit_entry *e; enum audit_state state; + int ret = 1; - if (audit_pid && tsk->pid == audit_pid) - return AUDIT_DISABLED; + read_lock(&tasklist_lock); + tsk = find_task_by_pid(pid); + if (tsk) + get_task_struct(tsk); + read_unlock(&tasklist_lock); + + if (!tsk) + return -ESRCH; rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { - rcu_read_unlock(); - return state != AUDIT_DISABLED; + if (state == AUDIT_DISABLED) + ret = 0; + break; } } rcu_read_unlock(); + put_task_struct(tsk); + return 1; /* Audit by default */ } -- cgit v1.2.3 From 9ad9ad385be27fcc7c16d290d972c6173e780a61 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 22 Jun 2005 15:04:33 +0100 Subject: AUDIT: Wait for backlog to clear when generating messages. Add a gfp_mask to audit_log_start() and audit_log(), to reduce the amount of GFP_ATOMIC allocation -- most of it doesn't need to be GFP_ATOMIC. Also if the mask includes __GFP_WAIT, then wait up to 60 seconds for the auditd backlog to clear instead of immediately abandoning the message. The timeout should probably be made configurable, but for now it'll suffice that it only happens if auditd is actually running. Signed-off-by: David Woodhouse --- include/linux/audit.h | 8 +++--- kernel/audit.c | 60 +++++++++++++++++++++++++++++++----------- kernel/auditsc.c | 14 +++++----- security/selinux/avc.c | 4 +-- security/selinux/hooks.c | 2 +- security/selinux/ss/services.c | 4 +-- 6 files changed, 61 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 77adef640537..2f56546eb248 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -259,11 +259,11 @@ extern int audit_filter_user(int pid, int type); #ifdef CONFIG_AUDIT /* These are defined in audit.c */ /* Public API */ -extern void audit_log(struct audit_context *ctx, int type, - const char *fmt, ...) - __attribute__((format(printf,3,4))); +extern void audit_log(struct audit_context *ctx, int gfp_mask, + int type, const char *fmt, ...) + __attribute__((format(printf,4,5))); -extern struct audit_buffer *audit_log_start(struct audit_context *ctx,int type); +extern struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, int type); extern void audit_log_format(struct audit_buffer *ab, const char *fmt, ...) __attribute__((format(printf,2,3))); diff --git a/kernel/audit.c b/kernel/audit.c index 09a37581213b..644ab825118b 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -106,6 +106,7 @@ static LIST_HEAD(audit_freelist); static struct sk_buff_head audit_skb_queue; static struct task_struct *kauditd_task; static DECLARE_WAIT_QUEUE_HEAD(kauditd_wait); +static DECLARE_WAIT_QUEUE_HEAD(audit_backlog_wait); /* The netlink socket is only to be read by 1 CPU, which lets us assume * that list additions and deletions never happen simultaneously in @@ -130,6 +131,7 @@ struct audit_buffer { struct list_head list; struct sk_buff *skb; /* formatted skb ready to send */ struct audit_context *ctx; /* NULL or associated context */ + int gfp_mask; }; static void audit_set_pid(struct audit_buffer *ab, pid_t pid) @@ -226,7 +228,7 @@ static int audit_set_rate_limit(int limit, uid_t loginuid) { int old = audit_rate_limit; audit_rate_limit = limit; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_rate_limit=%d old=%d by auid=%u", audit_rate_limit, old, loginuid); return old; @@ -236,7 +238,7 @@ static int audit_set_backlog_limit(int limit, uid_t loginuid) { int old = audit_backlog_limit; audit_backlog_limit = limit; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_backlog_limit=%d old=%d by auid=%u", audit_backlog_limit, old, loginuid); return old; @@ -248,7 +250,7 @@ static int audit_set_enabled(int state, uid_t loginuid) if (state != 0 && state != 1) return -EINVAL; audit_enabled = state; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_enabled=%d old=%d by auid=%u", audit_enabled, old, loginuid); return old; @@ -262,7 +264,7 @@ static int audit_set_failure(int state, uid_t loginuid) && state != AUDIT_FAIL_PANIC) return -EINVAL; audit_failure = state; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_failure=%d old=%d by auid=%u", audit_failure, old, loginuid); return old; @@ -274,6 +276,7 @@ int kauditd_thread(void *dummy) while (1) { skb = skb_dequeue(&audit_skb_queue); + wake_up(&audit_backlog_wait); if (skb) { if (audit_pid) { int err = netlink_unicast(audit_sock, skb, audit_pid, 0); @@ -417,7 +420,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (status_get->mask & AUDIT_STATUS_PID) { int old = audit_pid; audit_pid = status_get->pid; - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "audit_pid=%d old=%d by auid=%u", audit_pid, old, loginuid); } @@ -435,7 +438,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) err = audit_filter_user(pid, msg_type); if (err == 1) { err = 0; - ab = audit_log_start(NULL, msg_type); + ab = audit_log_start(NULL, GFP_KERNEL, msg_type); if (ab) { audit_log_format(ab, "user pid=%d uid=%u auid=%u msg='%.1024s'", @@ -522,7 +525,7 @@ static int __init audit_init(void) skb_queue_head_init(&audit_skb_queue); audit_initialized = 1; audit_enabled = audit_default; - audit_log(NULL, AUDIT_KERNEL, "initialized"); + audit_log(NULL, GFP_KERNEL, AUDIT_KERNEL, "initialized"); return 0; } __initcall(audit_init); @@ -586,6 +589,7 @@ static struct audit_buffer * audit_buffer_alloc(struct audit_context *ctx, goto err; ab->ctx = ctx; + ab->gfp_mask = gfp_mask; nlh = (struct nlmsghdr *)skb_put(ab->skb, NLMSG_SPACE(0)); nlh->nlmsg_type = type; nlh->nlmsg_flags = 0; @@ -644,17 +648,42 @@ static inline void audit_get_stamp(struct audit_context *ctx, * syscall, then the syscall is marked as auditable and an audit record * will be written at syscall exit. If there is no associated task, tsk * should be NULL. */ -struct audit_buffer *audit_log_start(struct audit_context *ctx, int type) + +struct audit_buffer *audit_log_start(struct audit_context *ctx, int gfp_mask, + int type) { struct audit_buffer *ab = NULL; struct timespec t; unsigned int serial; + int reserve; if (!audit_initialized) return NULL; - if (audit_backlog_limit - && skb_queue_len(&audit_skb_queue) > audit_backlog_limit) { + if (gfp_mask & __GFP_WAIT) + reserve = 0; + else + reserve = 5; /* Allow atomic callers to go up to five + entries over the normal backlog limit */ + + while (audit_backlog_limit + && skb_queue_len(&audit_skb_queue) > audit_backlog_limit + reserve) { + if (gfp_mask & __GFP_WAIT) { + int ret = 1; + /* Wait for auditd to drain the queue a little */ + DECLARE_WAITQUEUE(wait, current); + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&audit_backlog_wait, &wait); + + if (audit_backlog_limit && + skb_queue_len(&audit_skb_queue) > audit_backlog_limit) + ret = schedule_timeout(HZ * 60); + + __set_current_state(TASK_RUNNING); + remove_wait_queue(&audit_backlog_wait, &wait); + if (ret) + continue; + } if (audit_rate_check()) printk(KERN_WARNING "audit: audit_backlog=%d > " @@ -665,7 +694,7 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, int type) return NULL; } - ab = audit_buffer_alloc(ctx, GFP_ATOMIC, type); + ab = audit_buffer_alloc(ctx, gfp_mask, type); if (!ab) { audit_log_lost("out of memory in audit_log_start"); return NULL; @@ -689,7 +718,7 @@ static inline int audit_expand(struct audit_buffer *ab, int extra) { struct sk_buff *skb = ab->skb; int ret = pskb_expand_head(skb, skb_headroom(skb), extra, - GFP_ATOMIC); + ab->gfp_mask); if (ret < 0) { audit_log_lost("out of memory in audit_expand"); return 0; @@ -808,7 +837,7 @@ void audit_log_d_path(struct audit_buffer *ab, const char *prefix, audit_log_format(ab, " %s", prefix); /* We will allow 11 spaces for ' (deleted)' to be appended */ - path = kmalloc(PATH_MAX+11, GFP_KERNEL); + path = kmalloc(PATH_MAX+11, ab->gfp_mask); if (!path) { audit_log_format(ab, ""); return; @@ -849,12 +878,13 @@ void audit_log_end(struct audit_buffer *ab) /* Log an audit record. This is a convenience function that calls * audit_log_start, audit_log_vformat, and audit_log_end. It may be * called in any context. */ -void audit_log(struct audit_context *ctx, int type, const char *fmt, ...) +void audit_log(struct audit_context *ctx, int gfp_mask, int type, + const char *fmt, ...) { struct audit_buffer *ab; va_list args; - ab = audit_log_start(ctx, type); + ab = audit_log_start(ctx, gfp_mask, type); if (ab) { va_start(args, fmt); audit_log_vformat(ab, fmt, args); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fc858b0c044a..f463fd230846 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -346,7 +346,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, } listnr = entry->rule.flags & ~AUDIT_FILTER_PREPEND; audit_add_rule(entry, &audit_filter_list[listnr]); - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "auid=%u added an audit rule\n", loginuid); break; case AUDIT_DEL: @@ -356,7 +356,7 @@ int audit_receive_filter(int type, int pid, int uid, int seq, void *data, err = audit_del_rule(data, &audit_filter_list[listnr]); if (!err) - audit_log(NULL, AUDIT_CONFIG_CHANGE, + audit_log(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE, "auid=%u removed an audit rule\n", loginuid); break; default: @@ -756,7 +756,7 @@ static void audit_log_exit(struct audit_context *context) struct audit_buffer *ab; struct audit_aux_data *aux; - ab = audit_log_start(context, AUDIT_SYSCALL); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); if (!ab) return; /* audit_panic has been called */ audit_log_format(ab, "arch=%x syscall=%d", @@ -788,7 +788,7 @@ static void audit_log_exit(struct audit_context *context) for (aux = context->aux; aux; aux = aux->next) { - ab = audit_log_start(context, aux->type); + ab = audit_log_start(context, GFP_KERNEL, aux->type); if (!ab) continue; /* audit_panic has been called */ @@ -825,14 +825,14 @@ static void audit_log_exit(struct audit_context *context) } if (context->pwd && context->pwdmnt) { - ab = audit_log_start(context, AUDIT_CWD); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD); if (ab) { audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt); audit_log_end(ab); } } for (i = 0; i < context->name_count; i++) { - ab = audit_log_start(context, AUDIT_PATH); + ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH); if (!ab) continue; /* audit_panic has been called */ @@ -1118,7 +1118,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) if (task->audit_context) { struct audit_buffer *ab; - ab = audit_log_start(NULL, AUDIT_LOGIN); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_LOGIN); if (ab) { audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u", diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 451502467a9b..2d088bb65ee8 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -242,7 +242,7 @@ void __init avc_init(void) avc_node_cachep = kmem_cache_create("avc_node", sizeof(struct avc_node), 0, SLAB_PANIC, NULL, NULL); - audit_log(current->audit_context, AUDIT_KERNEL, "AVC INITIALIZED\n"); + audit_log(current->audit_context, GFP_KERNEL, AUDIT_KERNEL, "AVC INITIALIZED\n"); } int avc_get_hash_stats(char *page) @@ -550,7 +550,7 @@ void avc_audit(u32 ssid, u32 tsid, return; } - ab = audit_log_start(current->audit_context, AUDIT_AVC); + ab = audit_log_start(current->audit_context, GFP_ATOMIC, AUDIT_AVC); if (!ab) return; /* audit_panic has been called */ audit_log_format(ab, "avc: %s ", denied ? "denied" : "granted"); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index db845cbd5841..b5220a266dce 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3419,7 +3419,7 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) err = selinux_nlmsg_lookup(isec->sclass, nlh->nlmsg_type, &perm); if (err) { if (err == -EINVAL) { - audit_log(current->audit_context, AUDIT_SELINUX_ERR, + audit_log(current->audit_context, GFP_KERNEL, AUDIT_SELINUX_ERR, "SELinux: unrecognized netlink message" " type=%hu for sclass=%hu\n", nlh->nlmsg_type, isec->sclass); diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index b6149147d5cb..2947cf85dc56 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -365,7 +365,7 @@ static int security_validtrans_handle_fail(struct context *ocontext, goto out; if (context_struct_to_string(tcontext, &t, &tlen) < 0) goto out; - audit_log(current->audit_context, AUDIT_SELINUX_ERR, + audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "security_validate_transition: denied for" " oldcontext=%s newcontext=%s taskcontext=%s tclass=%s", o, n, t, policydb.p_class_val_to_name[tclass-1]); @@ -742,7 +742,7 @@ static int compute_sid_handle_invalid_context( goto out; if (context_struct_to_string(newcontext, &n, &nlen) < 0) goto out; - audit_log(current->audit_context, AUDIT_SELINUX_ERR, + audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, "security_compute_sid: invalid context %s" " for scontext=%s" " tcontext=%s" -- cgit v1.2.3 From 559fb51ba7e66fe298b8355fabde1275b7def35f Mon Sep 17 00:00:00 2001 From: Scott Bardone Date: Thu, 23 Jun 2005 01:40:19 -0400 Subject: Update Chelsio gige net driver. - Use extern prefix for functions required. - Removed a lot of wrappers, including t1_read/write_reg_4. - Removed various macros, using native kernel calls now. - Enumerated various #defines. - Removed a lot of shared code which is not currently used in "NIC only" mode. - Removed dead code. Documentation/networking/cxgb.txt: - Updated release notes for version 2.1.1 drivers/net/chelsio/ch_ethtool.h - removed file, no longer using ETHTOOL namespace. drivers/net/chelsio/common.h - moved code from osdep.h to common.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/cphy.h - removed dead code. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/cxgb2.c - use DMA_{32,64}BIT_MASK in include/linux/dma-mapping.h. - removed unused code. - use printk message for link info resembling drivers/net/mii.c. - no longer using the MODULE_xxx namespace. - no longer using "pci_" namespace. - no longer using ETHTOOL namespace. drivers/net/chelsio/cxgb2.h - removed file, merged into common.h drivers/net/chelsio/elmer0.h - removed dead code. - added various enums. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/espi.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. drivers/net/chelsio/espi.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/gmac.h - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/mv88x201x.c - changes to sync with Chelsio TOT. drivers/net/chelsio/osdep.h - removed file, consolidation. osdep was used to translate wrapper functions since our code supports multiple OSs. removed wrappers. drivers/net/chelsio/pm3393.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. - removed unused code. drivers/net/chelsio/regs.h - added a few register entries for future and current feature support. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/sge.c - rewrote large portion of scatter-gather engine to stabilize performance. - using u8/u16/u32 kernel types instead of __u8/__u16/__u32 compiler types. drivers/net/chelsio/sge.h - rewrote large portion of scatter-gather engine to stabilize performance. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/subr.c - merged tp.c into subr.c - removed various macros, using native kernel calls now. - removed a lot of wrappers, including t1_read/write_reg_4. - removed unused code. drivers/net/chelsio/suni1x10gexp_regs.h - modified copyright and authorship of file. - added comment to #endif indicating which symbol it closes. drivers/net/chelsio/tp.c - removed file, merged into subr.c. drivers/net/chelsio/tp.h - removed file. include/linux/pci_ids.h - patched to include PCI_VENDOR_ID_CHELSIO 0x1425, removed define from our code. --- Documentation/networking/cxgb.txt | 72 +- drivers/net/chelsio/Makefile | 3 +- drivers/net/chelsio/ch_ethtool.h | 102 -- drivers/net/chelsio/common.h | 259 +++-- drivers/net/chelsio/cphy.h | 14 +- drivers/net/chelsio/cpl5_cmd.h | 118 +- drivers/net/chelsio/cxgb2.c | 537 ++++----- drivers/net/chelsio/cxgb2.h | 122 -- drivers/net/chelsio/elmer0.h | 16 +- drivers/net/chelsio/espi.c | 168 ++- drivers/net/chelsio/espi.h | 11 +- drivers/net/chelsio/gmac.h | 11 +- drivers/net/chelsio/mv88x201x.c | 36 +- drivers/net/chelsio/osdep.h | 169 --- drivers/net/chelsio/pm3393.c | 45 +- drivers/net/chelsio/regs.h | 21 +- drivers/net/chelsio/sge.c | 1859 +++++++++++++++++-------------- drivers/net/chelsio/sge.h | 48 +- drivers/net/chelsio/subr.c | 235 ++-- drivers/net/chelsio/suni1x10gexp_regs.h | 20 +- drivers/net/chelsio/tp.c | 188 ---- drivers/net/chelsio/tp.h | 110 -- include/linux/pci_ids.h | 1 + 23 files changed, 1882 insertions(+), 2283 deletions(-) delete mode 100644 drivers/net/chelsio/ch_ethtool.h delete mode 100644 drivers/net/chelsio/cxgb2.h delete mode 100644 drivers/net/chelsio/osdep.h delete mode 100644 drivers/net/chelsio/tp.c delete mode 100644 drivers/net/chelsio/tp.h (limited to 'include/linux') diff --git a/Documentation/networking/cxgb.txt b/Documentation/networking/cxgb.txt index 9f2eb646c6f5..76324638626b 100644 --- a/Documentation/networking/cxgb.txt +++ b/Documentation/networking/cxgb.txt @@ -2,9 +2,9 @@ Driver Release Notes for Linux - Version 2.1.0 + Version 2.1.1 - March 8, 2005 + June 20, 2005 CONTENTS ======== @@ -21,8 +21,7 @@ INTRODUCTION This document describes the Linux driver for Chelsio 10Gb Ethernet Network Controller. This driver supports the Chelsio N210 NIC and is backward - compatible with the Chelsio N110 model 10Gb NICs. This driver supports AMD64 - and EM64T, and x86 systems. + compatible with the Chelsio N110 model 10Gb NICs. FEATURES @@ -121,23 +120,17 @@ PERFORMANCE Disabling SACK: sysctl -w net.ipv4.tcp_sack=0 - Setting TCP read buffers (min/default/max): - sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" - - Setting TCP write buffers (min/pressure/max): - sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" - - Setting TCP buffer space (min/pressure/max): - sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" - - Setting large number of incoming connection requests (2.6.x only): + Setting large number of incoming connection requests: sysctl -w net.ipv4.tcp_max_syn_backlog=3000 Setting maximum receive socket buffer size: - sysctl -w net.core.rmem_max=524287 + sysctl -w net.core.rmem_max=1024000 Setting maximum send socket buffer size: - sysctl -w net.core.wmem_max=524287 + sysctl -w net.core.wmem_max=1024000 + + Set smp_affinity (on a multiprocessor system) to a single CPU: + echo 1 > /proc/irq//smp_affinity Setting default receive socket buffer size: sysctl -w net.core.rmem_default=524287 @@ -151,8 +144,14 @@ PERFORMANCE Setting maximum backlog (# of unprocessed packets before kernel drops): sysctl -w net.core.netdev_max_backlog=300000 - Set smp_affinity (on a multiprocessor system) to a single CPU: - echo 00000001 > /proc/irq//smp_affinity + Setting TCP read buffers (min/default/max): + sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000" + + Setting TCP write buffers (min/pressure/max): + sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000" + + Setting TCP buffer space (min/pressure/max): + sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000" TCP window size for single connections: The receive buffer (RX_WINDOW) size must be at least as large as the @@ -186,7 +185,7 @@ DRIVER MESSAGES may be found in /var/log/messages. Driver up: - Chelsio Network Driver - version 2.1.0 + Chelsio Network Driver - version 2.1.1 NIC detected: eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit @@ -282,13 +281,44 @@ KNOWN ISSUES the number of outstanding transactions, via BIOS configuration programming of the PCI-X card, to the following: - Data Length (bytes): 2k - Total allowed outstanding transactions: 1 + Data Length (bytes): 1k + Total allowed outstanding transactions: 2 Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004, section 56, "133-MHz Mode Split Completion Data Corruption" for more details with this bug and workarounds suggested by AMD. + It may be possible to work outside AMD's recommended PCI-X settings, try + increasing the Data Length to 2k bytes for increased performance. If you + have issues with these settings, please revert to the "safe" settings + and duplicate the problem before submitting a bug or asking for support. + + NOTE: The default setting on most systems is 8 outstanding transactions + and 2k bytes data length. + + 4. On multiprocessor systems, it has been noted that an application which + is handling 10Gb networking can switch between CPUs causing degraded + and/or unstable performance. + + If running on an SMP system and taking performance measurements, it + is suggested you either run the latest netperf-2.4.0+ or use a binding + tool such as Tim Hockin's procstate utilities (runon) + . + + Binding netserver and netperf (or other applications) to particular + CPUs will have a significant difference in performance measurements. + You may need to experiment which CPU to bind the application to in + order to achieve the best performance for your system. + + If you are developing an application designed for 10Gb networking, + please keep in mind you may want to look at kernel functions + sched_setaffinity & sched_getaffinity to bind your application. + + If you are just running user-space applications such as ftp, telnet, + etc., you may want to try the runon tool provided by Tim Hockin's + procstate utility. You could also try binding the interface to a + particular CPU: runon 0 ifup eth0 + SUPPORT ======= diff --git a/drivers/net/chelsio/Makefile b/drivers/net/chelsio/Makefile index ff8c11b3a4e1..91e927827c43 100644 --- a/drivers/net/chelsio/Makefile +++ b/drivers/net/chelsio/Makefile @@ -7,6 +7,5 @@ obj-$(CONFIG_CHELSIO_T1) += cxgb.o EXTRA_CFLAGS += -I$(TOPDIR)/drivers/net/chelsio $(DEBUG_FLAGS) -cxgb-objs := cxgb2.o espi.o tp.o pm3393.o sge.o subr.o mv88x201x.o - +cxgb-objs := cxgb2.o espi.o pm3393.o sge.o subr.o mv88x201x.o diff --git a/drivers/net/chelsio/ch_ethtool.h b/drivers/net/chelsio/ch_ethtool.h deleted file mode 100644 index c523d24836b5..000000000000 --- a/drivers/net/chelsio/ch_ethtool.h +++ /dev/null @@ -1,102 +0,0 @@ -/***************************************************************************** - * * - * File: ch_ethtool.h * - * $Revision: 1.5 $ * - * $Date: 2005/03/23 07:15:58 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CHETHTOOL_LINUX_H__ -#define __CHETHTOOL_LINUX_H__ - -/* TCB size in 32-bit words */ -#define TCB_WORDS (TCB_SIZE / 4) - -enum { - ETHTOOL_SETREG, - ETHTOOL_GETREG, - ETHTOOL_SETTPI, - ETHTOOL_GETTPI, - ETHTOOL_DEVUP, - ETHTOOL_GETMTUTAB, - ETHTOOL_SETMTUTAB, - ETHTOOL_GETMTU, - ETHTOOL_SET_PM, - ETHTOOL_GET_PM, - ETHTOOL_GET_TCAM, - ETHTOOL_SET_TCAM, - ETHTOOL_GET_TCB, - ETHTOOL_READ_TCAM_WORD, -}; - -struct ethtool_reg { - uint32_t cmd; - uint32_t addr; - uint32_t val; -}; - -struct ethtool_mtus { - uint32_t cmd; - uint16_t mtus[NMTUS]; -}; - -struct ethtool_pm { - uint32_t cmd; - uint32_t tx_pg_sz; - uint32_t tx_num_pg; - uint32_t rx_pg_sz; - uint32_t rx_num_pg; - uint32_t pm_total; -}; - -struct ethtool_tcam { - uint32_t cmd; - uint32_t tcam_size; - uint32_t nservers; - uint32_t nroutes; -}; - -struct ethtool_tcb { - uint32_t cmd; - uint32_t tcb_index; - uint32_t tcb_data[TCB_WORDS]; -}; - -struct ethtool_tcam_word { - uint32_t cmd; - uint32_t addr; - uint32_t buf[3]; -}; - -#define SIOCCHETHTOOL SIOCDEVPRIVATE -#endif diff --git a/drivers/net/chelsio/common.h b/drivers/net/chelsio/common.h index 017684ff48dc..f09348802b46 100644 --- a/drivers/net/chelsio/common.h +++ b/drivers/net/chelsio/common.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: common.h * - * $Revision: 1.5 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.21 $ * + * $Date: 2005/06/22 00:43:25 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,74 +36,101 @@ * * ****************************************************************************/ -#ifndef CHELSIO_COMMON_H -#define CHELSIO_COMMON_H +#ifndef _CXGB_COMMON_H_ +#define _CXGB_COMMON_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRV_DESCRIPTION "Chelsio 10Gb Ethernet Driver" +#define DRV_NAME "cxgb" +#define DRV_VERSION "2.1.1" +#define PFX DRV_NAME ": " + +#define CH_ERR(fmt, ...) printk(KERN_ERR PFX fmt, ## __VA_ARGS__) +#define CH_WARN(fmt, ...) printk(KERN_WARNING PFX fmt, ## __VA_ARGS__) +#define CH_ALERT(fmt, ...) printk(KERN_ALERT PFX fmt, ## __VA_ARGS__) + +#define CH_DEVICE(devid, ssid, idx) \ + { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx } + +#define SUPPORTED_PAUSE (1 << 13) +#define SUPPORTED_LOOPBACK (1 << 15) + +#define ADVERTISED_PAUSE (1 << 13) +#define ADVERTISED_ASYM_PAUSE (1 << 14) + +typedef struct adapter adapter_t; + +void t1_elmer0_ext_intr(adapter_t *adapter); +void t1_link_changed(adapter_t *adapter, int port_id, int link_status, + int speed, int duplex, int fc); + +struct t1_rx_mode { + struct net_device *dev; + u32 idx; + struct dev_mc_list *list; +}; + +#define t1_rx_mode_promisc(rm) (rm->dev->flags & IFF_PROMISC) +#define t1_rx_mode_allmulti(rm) (rm->dev->flags & IFF_ALLMULTI) +#define t1_rx_mode_mc_cnt(rm) (rm->dev->mc_count) + +static inline u8 *t1_get_next_mcaddr(struct t1_rx_mode *rm) +{ + u8 *addr = 0; -#define DIMOF(x) (sizeof(x)/sizeof(x[0])) + if (rm->idx++ < rm->dev->mc_count) { + addr = rm->list->dmi_addr; + rm->list = rm->list->next; + } + return addr; +} + +#define MAX_NPORTS 4 -#define NMTUS 8 -#define MAX_NPORTS 4 -#define TCB_SIZE 128 +#define SPEED_INVALID 0xffff +#define DUPLEX_INVALID 0xff enum { - CHBT_BOARD_7500, - CHBT_BOARD_8000, - CHBT_BOARD_CHT101, - CHBT_BOARD_CHT110, - CHBT_BOARD_CHT210, - CHBT_BOARD_CHT204, CHBT_BOARD_N110, - CHBT_BOARD_N210, - CHBT_BOARD_COUGAR, - CHBT_BOARD_6800, - CHBT_BOARD_SIMUL + CHBT_BOARD_N210 }; enum { - CHBT_TERM_FPGA, CHBT_TERM_T1, - CHBT_TERM_T2, - CHBT_TERM_T3 + CHBT_TERM_T2 }; enum { - CHBT_MAC_CHELSIO_A, - CHBT_MAC_IXF1010, CHBT_MAC_PM3393, - CHBT_MAC_VSC7321, - CHBT_MAC_DUMMY }; enum { - CHBT_PHY_88E1041, - CHBT_PHY_88E1111, CHBT_PHY_88X2010, - CHBT_PHY_XPAK, - CHBT_PHY_MY3126, - CHBT_PHY_DUMMY }; enum { - PAUSE_RX = 1, - PAUSE_TX = 2, - PAUSE_AUTONEG = 4 + PAUSE_RX = 1 << 0, + PAUSE_TX = 1 << 1, + PAUSE_AUTONEG = 1 << 2 }; /* Revisions of T1 chip */ -#define TERM_T1A 0 -#define TERM_T1B 1 -#define TERM_T2 3 - -struct tp_params { - unsigned int pm_size; - unsigned int cm_size; - unsigned int pm_rx_base; - unsigned int pm_tx_base; - unsigned int pm_rx_pg_size; - unsigned int pm_tx_pg_size; - unsigned int pm_rx_num_pgs; - unsigned int pm_tx_num_pgs; - unsigned int use_5tuple_mode; +enum { + TERM_T1A = 0, + TERM_T1B = 1, + TERM_T2 = 3 }; struct sge_params { @@ -118,17 +145,7 @@ struct sge_params { unsigned int polling; }; -struct mc5_params { - unsigned int mode; /* selects MC5 width */ - unsigned int nservers; /* size of server region */ - unsigned int nroutes; /* size of routing region */ -}; - -/* Default MC5 region sizes */ -#define DEFAULT_SERVER_REGION_LEN 256 -#define DEFAULT_RT_REGION_LEN 1024 - -struct pci_params { +struct chelsio_pci_params { unsigned short speed; unsigned char width; unsigned char is_pcix; @@ -136,31 +153,14 @@ struct pci_params { struct adapter_params { struct sge_params sge; - struct mc5_params mc5; - struct tp_params tp; - struct pci_params pci; + struct chelsio_pci_params pci; const struct board_info *brd_info; - unsigned short mtus[NMTUS]; - unsigned int nports; /* # of ethernet ports */ + unsigned int nports; /* # of ethernet ports */ unsigned int stats_update_period; unsigned short chip_revision; unsigned char chip_version; - unsigned char is_asic; -}; - -struct pci_err_cnt { - unsigned int master_parity_err; - unsigned int sig_target_abort; - unsigned int rcv_target_abort; - unsigned int rcv_master_abort; - unsigned int sig_sys_err; - unsigned int det_parity_err; - unsigned int pio_parity_err; - unsigned int wf_parity_err; - unsigned int rf_parity_err; - unsigned int cf_parity_err; }; struct link_config { @@ -175,8 +175,60 @@ struct link_config { unsigned char autoneg; /* autonegotiating? */ }; -#define SPEED_INVALID 0xffff -#define DUPLEX_INVALID 0xff +struct cmac; +struct cphy; + +struct port_info { + struct net_device *dev; + struct cmac *mac; + struct cphy *phy; + struct link_config link_config; + struct net_device_stats netstats; +}; + +struct sge; +struct peespi; + +struct adapter { + u8 *regs; + struct pci_dev *pdev; + unsigned long registered_device_map; + unsigned long open_device_map; + unsigned long flags; + + const char *name; + int msg_enable; + u32 mmio_len; + + struct work_struct ext_intr_handler_task; + struct adapter_params params; + + struct vlan_group *vlan_grp; + + /* Terminator modules. */ + struct sge *sge; + struct peespi *espi; + + struct port_info port[MAX_NPORTS]; + struct work_struct stats_update_task; + struct timer_list stats_update_timer; + + struct semaphore mib_mutex; + spinlock_t tpi_lock; + spinlock_t work_lock; + /* guards async operations */ + spinlock_t async_lock ____cacheline_aligned; + u32 slow_intr_mask; +}; + +enum { /* adapter flags */ + FULL_INIT_DONE = 1 << 0, + TSO_CAPABLE = 1 << 2, + TCP_CSUM_CAPABLE = 1 << 3, + UDP_CSUM_CAPABLE = 1 << 4, + VLAN_ACCEL_CAPABLE = 1 << 5, + RX_CSUM_ENABLED = 1 << 6, +}; struct mdio_ops; struct gmac; @@ -205,19 +257,8 @@ struct board_info { const char *desc; }; -#include "osdep.h" - -#ifndef PCI_VENDOR_ID_CHELSIO -#define PCI_VENDOR_ID_CHELSIO 0x1425 -#endif - extern struct pci_device_id t1_pci_tbl[]; -static inline int t1_is_asic(const adapter_t *adapter) -{ - return adapter->params.is_asic; -} - static inline int adapter_matches_type(const adapter_t *adapter, int version, int revision) { @@ -245,25 +286,29 @@ static inline unsigned int core_ticks_per_usec(const adapter_t *adap) return board_info(adap)->clock_core / 1000000; } -int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value); -int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *value); +extern int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value); +extern int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *value); -void t1_interrupts_enable(adapter_t *adapter); -void t1_interrupts_disable(adapter_t *adapter); -void t1_interrupts_clear(adapter_t *adapter); -int elmer0_ext_intr_handler(adapter_t *adapter); -int t1_slow_intr_handler(adapter_t *adapter); +extern void t1_interrupts_enable(adapter_t *adapter); +extern void t1_interrupts_disable(adapter_t *adapter); +extern void t1_interrupts_clear(adapter_t *adapter); +extern int elmer0_ext_intr_handler(adapter_t *adapter); +extern int t1_slow_intr_handler(adapter_t *adapter); -int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); -const struct board_info *t1_get_board_info(unsigned int board_id); -const struct board_info *t1_get_board_info_from_ids(unsigned int devid, +extern int t1_link_start(struct cphy *phy, struct cmac *mac, struct link_config *lc); +extern const struct board_info *t1_get_board_info(unsigned int board_id); +extern const struct board_info *t1_get_board_info_from_ids(unsigned int devid, unsigned short ssid); -int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data); -int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, +extern int t1_seeprom_read(adapter_t *adapter, u32 addr, u32 *data); +extern int t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, struct adapter_params *p); -int t1_init_hw_modules(adapter_t *adapter); -int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); -void t1_free_sw_modules(adapter_t *adapter); -void t1_fatal_err(adapter_t *adapter); -#endif +extern int t1_init_hw_modules(adapter_t *adapter); +extern int t1_init_sw_modules(adapter_t *adapter, const struct board_info *bi); +extern void t1_free_sw_modules(adapter_t *adapter); +extern void t1_fatal_err(adapter_t *adapter); + +extern void t1_tp_set_udp_checksum_offload(adapter_t *adapter, int enable); +extern void t1_tp_set_tcp_checksum_offload(adapter_t *adapter, int enable); +extern void t1_tp_set_ip_checksum_offload(adapter_t *adapter, int enable); +#endif /* _CXGB_COMMON_H_ */ diff --git a/drivers/net/chelsio/cphy.h b/drivers/net/chelsio/cphy.h index 1bc2248264c0..3412342f7345 100644 --- a/drivers/net/chelsio/cphy.h +++ b/drivers/net/chelsio/cphy.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cphy.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.7 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_CPHY_H -#define CHELSIO_CPHY_H +#ifndef _CXGB_CPHY_H_ +#define _CXGB_CPHY_H_ #include "common.h" @@ -142,9 +142,7 @@ struct gphy { int (*reset)(adapter_t *adapter); }; -extern struct gphy t1_my3126_ops; -extern struct gphy t1_mv88e1xxx_ops; -extern struct gphy t1_xpak_ops; extern struct gphy t1_mv88x201x_ops; extern struct gphy t1_dummy_phy_ops; -#endif + +#endif /* _CXGB_CPHY_H_ */ diff --git a/drivers/net/chelsio/cpl5_cmd.h b/drivers/net/chelsio/cpl5_cmd.h index 45e9248979f1..27925e487bcf 100644 --- a/drivers/net/chelsio/cpl5_cmd.h +++ b/drivers/net/chelsio/cpl5_cmd.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cpl5_cmd.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef _CPL5_CMD_H -#define _CPL5_CMD_H +#ifndef _CXGB_CPL5_CMD_H_ +#define _CXGB_CPL5_CMD_H_ #include @@ -59,12 +59,12 @@ enum { /* TX_PKT_LSO ethernet types */ }; struct cpl_rx_data { - __u32 rsvd0; - __u32 len; - __u32 seq; - __u16 urg; - __u8 rsvd1; - __u8 status; + u32 rsvd0; + u32 len; + u32 seq; + u16 urg; + u8 rsvd1; + u8 status; }; /* @@ -73,73 +73,73 @@ struct cpl_rx_data { * used so we break it into 2 16-bit parts to easily meet our alignment needs. */ struct cpl_tx_pkt { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 ip_csum_dis:1; - __u8 l4_csum_dis:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 ip_csum_dis:1; + u8 l4_csum_dis:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 l4_csum_dis:1; - __u8 ip_csum_dis:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 l4_csum_dis:1; + u8 ip_csum_dis:1; + u8 iff:4; #endif - __u16 vlan; - __u16 len_hi; - __u16 len_lo; + u16 vlan; + u16 len_hi; + u16 len_lo; }; struct cpl_tx_pkt_lso { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 ip_csum_dis:1; - __u8 l4_csum_dis:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 ip_csum_dis:1; + u8 l4_csum_dis:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 l4_csum_dis:1; - __u8 ip_csum_dis:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 l4_csum_dis:1; + u8 ip_csum_dis:1; + u8 iff:4; #endif - __u16 vlan; - __u32 len; + u16 vlan; + u32 len; - __u32 rsvd2; - __u8 rsvd3; + u32 rsvd2; + u8 rsvd3; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 tcp_hdr_words:4; - __u8 ip_hdr_words:4; + u8 tcp_hdr_words:4; + u8 ip_hdr_words:4; #else - __u8 ip_hdr_words:4; - __u8 tcp_hdr_words:4; + u8 ip_hdr_words:4; + u8 tcp_hdr_words:4; #endif - __u16 eth_type_mss; + u16 eth_type_mss; }; struct cpl_rx_pkt { - __u8 opcode; + u8 opcode; #if defined(__LITTLE_ENDIAN_BITFIELD) - __u8 iff:4; - __u8 csum_valid:1; - __u8 bad_pkt:1; - __u8 vlan_valid:1; - __u8 rsvd:1; + u8 iff:4; + u8 csum_valid:1; + u8 bad_pkt:1; + u8 vlan_valid:1; + u8 rsvd:1; #else - __u8 rsvd:1; - __u8 vlan_valid:1; - __u8 bad_pkt:1; - __u8 csum_valid:1; - __u8 iff:4; + u8 rsvd:1; + u8 vlan_valid:1; + u8 bad_pkt:1; + u8 csum_valid:1; + u8 iff:4; #endif - __u16 csum; - __u16 vlan; - __u16 len; + u16 csum; + u16 vlan; + u16 len; }; -#endif +#endif /* _CXGB_CPL5_CMD_H_ */ diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c index 48c4d5acfcd1..28ae478b386d 100644 --- a/drivers/net/chelsio/cxgb2.c +++ b/drivers/net/chelsio/cxgb2.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: cxgb2.c * - * $Revision: 1.11 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.25 $ * + * $Date: 2005/06/22 00:43:25 $ * * Description: * * Chelsio 10Gb Ethernet Driver. * * * @@ -37,7 +37,6 @@ ****************************************************************************/ #include "common.h" - #include #include #include @@ -48,44 +47,56 @@ #include #include #include -#include -#include +#include #include -#include "ch_ethtool.h" #include "cpl5_cmd.h" #include "regs.h" #include "gmac.h" #include "cphy.h" #include "sge.h" -#include "tp.h" #include "espi.h" +#ifdef work_struct +#include +#define INIT_WORK INIT_TQUEUE +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks + static inline void schedule_mac_stats_update(struct adapter *ap, int secs) { - schedule_delayed_work(&ap->stats_update_task, secs * HZ); + mod_timer(&ap->stats_update_timer, jiffies + secs * HZ); } static inline void cancel_mac_stats_update(struct adapter *ap) { - cancel_delayed_work(&ap->stats_update_task); + del_timer_sync(&ap->stats_update_timer); + flush_scheduled_tasks(); } -#if BITS_PER_LONG == 64 && !defined(CONFIG_X86_64) -# define FMT64 "l" -#else -# define FMT64 "ll" -#endif +/* + * Stats update timer for 2.4. It schedules a task to do the actual update as + * we need to access MAC statistics in process context. + */ +static void mac_stats_timer(unsigned long data) +{ + struct adapter *ap = (struct adapter *)data; -# define DRV_TYPE "" -# define MODULE_DESC "Chelsio Network Driver" + schedule_task(&ap->stats_update_task); +} +#else +#include -static char driver_name[] = DRV_NAME; -static char driver_string[] = "Chelsio " DRV_TYPE "Network Driver"; -static char driver_version[] = "2.1.0"; +static inline void schedule_mac_stats_update(struct adapter *ap, int secs) +{ + schedule_delayed_work(&ap->stats_update_task, secs * HZ); +} -#define PCI_DMA_64BIT ~0ULL -#define PCI_DMA_32BIT 0xffffffffULL +static inline void cancel_mac_stats_update(struct adapter *ap) +{ + cancel_delayed_work(&ap->stats_update_task); +} +#endif #define MAX_CMDQ_ENTRIES 16384 #define MAX_CMDQ1_ENTRIES 1024 @@ -107,10 +118,9 @@ static char driver_version[] = "2.1.0"; */ #define EEPROM_SIZE 32 -MODULE_DESCRIPTION(MODULE_DESC); +MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_AUTHOR("Chelsio Communications"); MODULE_LICENSE("GPL"); -MODULE_DEVICE_TABLE(pci, t1_pci_tbl); static int dflt_msg_enable = DFLT_MSG_ENABLE; @@ -140,17 +150,17 @@ static void t1_set_rxmode(struct net_device *dev) static void link_report(struct port_info *p) { if (!netif_carrier_ok(p->dev)) - printk(KERN_INFO "%s: link is down\n", p->dev->name); + printk(KERN_INFO "%s: link down\n", p->dev->name); else { - const char *s = "10 Mbps"; + const char *s = "10Mbps"; switch (p->link_config.speed) { - case SPEED_10000: s = "10 Gbps"; break; - case SPEED_1000: s = "1000 Mbps"; break; - case SPEED_100: s = "100 Mbps"; break; + case SPEED_10000: s = "10Gbps"; break; + case SPEED_1000: s = "1000Mbps"; break; + case SPEED_100: s = "100Mbps"; break; } - printk(KERN_INFO "%s: link is up at %s, %s duplex\n", + printk(KERN_INFO "%s: link up, %s, %s-duplex\n", p->dev->name, s, p->link_config.duplex == DUPLEX_FULL ? "full" : "half"); } @@ -186,10 +196,8 @@ static void link_start(struct port_info *p) static void enable_hw_csum(struct adapter *adapter) { if (adapter->flags & TSO_CAPABLE) - t1_tp_set_ip_checksum_offload(adapter->tp, 1); /* for TSO only */ - if (adapter->flags & UDP_CSUM_CAPABLE) - t1_tp_set_udp_checksum_offload(adapter->tp, 1); - t1_tp_set_tcp_checksum_offload(adapter->tp, 1); + t1_tp_set_ip_checksum_offload(adapter, 1); /* for TSO only */ + t1_tp_set_tcp_checksum_offload(adapter, 1); } /* @@ -210,15 +218,13 @@ static int cxgb_up(struct adapter *adapter) } t1_interrupts_clear(adapter); - - if ((err = request_irq(adapter->pdev->irq, &t1_interrupt, SA_SHIRQ, - adapter->name, adapter))) + if ((err = request_irq(adapter->pdev->irq, + t1_select_intr_handler(adapter), SA_SHIRQ, + adapter->name, adapter))) { goto out_err; - + } t1_sge_start(adapter->sge); t1_interrupts_enable(adapter); - - err = 0; out_err: return err; } @@ -339,47 +345,80 @@ static void set_msglevel(struct net_device *dev, u32 val) } static char stats_strings[][ETH_GSTRING_LEN] = { - "TxOctetsOK", - "TxOctetsBad", - "TxUnicastFramesOK", - "TxMulticastFramesOK", - "TxBroadcastFramesOK", - "TxPauseFrames", - "TxFramesWithDeferredXmissions", - "TxLateCollisions", - "TxTotalCollisions", - "TxFramesAbortedDueToXSCollisions", - "TxUnderrun", - "TxLengthErrors", - "TxInternalMACXmitError", - "TxFramesWithExcessiveDeferral", - "TxFCSErrors", - - "RxOctetsOK", - "RxOctetsBad", - "RxUnicastFramesOK", - "RxMulticastFramesOK", - "RxBroadcastFramesOK", - "RxPauseFrames", - "RxFCSErrors", - "RxAlignErrors", - "RxSymbolErrors", - "RxDataErrors", - "RxSequenceErrors", - "RxRuntErrors", - "RxJabberErrors", - "RxInternalMACRcvError", - "RxInRangeLengthErrors", - "RxOutOfRangeLengthField", - "RxFrameTooLongErrors" + "TxOctetsOK", + "TxOctetsBad", + "TxUnicastFramesOK", + "TxMulticastFramesOK", + "TxBroadcastFramesOK", + "TxPauseFrames", + "TxFramesWithDeferredXmissions", + "TxLateCollisions", + "TxTotalCollisions", + "TxFramesAbortedDueToXSCollisions", + "TxUnderrun", + "TxLengthErrors", + "TxInternalMACXmitError", + "TxFramesWithExcessiveDeferral", + "TxFCSErrors", + + "RxOctetsOK", + "RxOctetsBad", + "RxUnicastFramesOK", + "RxMulticastFramesOK", + "RxBroadcastFramesOK", + "RxPauseFrames", + "RxFCSErrors", + "RxAlignErrors", + "RxSymbolErrors", + "RxDataErrors", + "RxSequenceErrors", + "RxRuntErrors", + "RxJabberErrors", + "RxInternalMACRcvError", + "RxInRangeLengthErrors", + "RxOutOfRangeLengthField", + "RxFrameTooLongErrors", + + "TSO", + "VLANextractions", + "VLANinsertions", + "RxCsumGood", + "TxCsumOffload", + "RxDrops" + + "respQ_empty", + "respQ_overflow", + "freelistQ_empty", + "pkt_too_big", + "pkt_mismatch", + "cmdQ_full0", + "cmdQ_full1", + "tx_ipfrags", + "tx_reg_pkts", + "tx_lso_pkts", + "tx_do_cksum", + + "espi_DIP2ParityErr", + "espi_DIP4Err", + "espi_RxDrops", + "espi_TxDrops", + "espi_RxOvfl", + "espi_ParityErr" }; + +#define T2_REGMAP_SIZE (3 * 1024) + +static int get_regs_len(struct net_device *dev) +{ + return T2_REGMAP_SIZE; +} static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { struct adapter *adapter = dev->priv; - strcpy(info->driver, driver_name); - strcpy(info->version, driver_version); + strcpy(info->driver, DRV_NAME); + strcpy(info->version, DRV_VERSION); strcpy(info->fw_version, "N/A"); strcpy(info->bus_info, pci_name(adapter->pdev)); } @@ -401,42 +440,88 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, struct adapter *adapter = dev->priv; struct cmac *mac = adapter->port[dev->if_port].mac; const struct cmac_statistics *s; + const struct sge_port_stats *ss; + const struct sge_intr_counts *t; s = mac->ops->statistics_update(mac, MAC_STATS_UPDATE_FULL); + ss = t1_sge_get_port_stats(adapter->sge, dev->if_port); + t = t1_sge_get_intr_counts(adapter->sge); + + *data++ = s->TxOctetsOK; + *data++ = s->TxOctetsBad; + *data++ = s->TxUnicastFramesOK; + *data++ = s->TxMulticastFramesOK; + *data++ = s->TxBroadcastFramesOK; + *data++ = s->TxPauseFrames; + *data++ = s->TxFramesWithDeferredXmissions; + *data++ = s->TxLateCollisions; + *data++ = s->TxTotalCollisions; + *data++ = s->TxFramesAbortedDueToXSCollisions; + *data++ = s->TxUnderrun; + *data++ = s->TxLengthErrors; + *data++ = s->TxInternalMACXmitError; + *data++ = s->TxFramesWithExcessiveDeferral; + *data++ = s->TxFCSErrors; + + *data++ = s->RxOctetsOK; + *data++ = s->RxOctetsBad; + *data++ = s->RxUnicastFramesOK; + *data++ = s->RxMulticastFramesOK; + *data++ = s->RxBroadcastFramesOK; + *data++ = s->RxPauseFrames; + *data++ = s->RxFCSErrors; + *data++ = s->RxAlignErrors; + *data++ = s->RxSymbolErrors; + *data++ = s->RxDataErrors; + *data++ = s->RxSequenceErrors; + *data++ = s->RxRuntErrors; + *data++ = s->RxJabberErrors; + *data++ = s->RxInternalMACRcvError; + *data++ = s->RxInRangeLengthErrors; + *data++ = s->RxOutOfRangeLengthField; + *data++ = s->RxFrameTooLongErrors; + + *data++ = ss->tso; + *data++ = ss->vlan_xtract; + *data++ = ss->vlan_insert; + *data++ = ss->rx_cso_good; + *data++ = ss->tx_cso; + *data++ = ss->rx_drops; + + *data++ = (u64)t->respQ_empty; + *data++ = (u64)t->respQ_overflow; + *data++ = (u64)t->freelistQ_empty; + *data++ = (u64)t->pkt_too_big; + *data++ = (u64)t->pkt_mismatch; + *data++ = (u64)t->cmdQ_full[0]; + *data++ = (u64)t->cmdQ_full[1]; + *data++ = (u64)t->tx_ipfrags; + *data++ = (u64)t->tx_reg_pkts; + *data++ = (u64)t->tx_lso_pkts; + *data++ = (u64)t->tx_do_cksum; +} + +static inline void reg_block_dump(struct adapter *ap, void *buf, + unsigned int start, unsigned int end) +{ + u32 *p = buf + start; + + for ( ; start <= end; start += sizeof(u32)) + *p++ = readl(ap->regs + start); +} - *data++ = s->TxOctetsOK; - *data++ = s->TxOctetsBad; - *data++ = s->TxUnicastFramesOK; - *data++ = s->TxMulticastFramesOK; - *data++ = s->TxBroadcastFramesOK; - *data++ = s->TxPauseFrames; - *data++ = s->TxFramesWithDeferredXmissions; - *data++ = s->TxLateCollisions; - *data++ = s->TxTotalCollisions; - *data++ = s->TxFramesAbortedDueToXSCollisions; - *data++ = s->TxUnderrun; - *data++ = s->TxLengthErrors; - *data++ = s->TxInternalMACXmitError; - *data++ = s->TxFramesWithExcessiveDeferral; - *data++ = s->TxFCSErrors; - - *data++ = s->RxOctetsOK; - *data++ = s->RxOctetsBad; - *data++ = s->RxUnicastFramesOK; - *data++ = s->RxMulticastFramesOK; - *data++ = s->RxBroadcastFramesOK; - *data++ = s->RxPauseFrames; - *data++ = s->RxFCSErrors; - *data++ = s->RxAlignErrors; - *data++ = s->RxSymbolErrors; - *data++ = s->RxDataErrors; - *data++ = s->RxSequenceErrors; - *data++ = s->RxRuntErrors; - *data++ = s->RxJabberErrors; - *data++ = s->RxInternalMACRcvError; - *data++ = s->RxInRangeLengthErrors; - *data++ = s->RxOutOfRangeLengthField; - *data++ = s->RxFrameTooLongErrors; +static void get_regs(struct net_device *dev, struct ethtool_regs *regs, + void *buf) +{ + struct adapter *ap = dev->priv; + + /* + * Version scheme: bits 0..9: chip version, bits 10..15: chip revision + */ + regs->version = 2; + + memset(buf, 0, T2_REGMAP_SIZE); + reg_block_dump(ap, buf, 0, A_SG_RESPACCUTIMER); } static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd) @@ -455,12 +540,12 @@ static int get_settings(struct net_device *dev, struct ethtool_cmd *cmd) cmd->duplex = -1; } - cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; - cmd->phy_address = p->phy->addr; - cmd->transceiver = XCVR_EXTERNAL; - cmd->autoneg = p->link_config.autoneg; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + cmd->port = (cmd->supported & SUPPORTED_TP) ? PORT_TP : PORT_FIBRE; + cmd->phy_address = p->phy->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = p->link_config.autoneg; + cmd->maxtxpkt = 0; + cmd->maxrxpkt = 0; return 0; } @@ -506,7 +591,7 @@ static int set_settings(struct net_device *dev, struct ethtool_cmd *cmd) struct link_config *lc = &p->link_config; if (!(lc->supported & SUPPORTED_Autoneg)) - return -EOPNOTSUPP; /* can't change speed/duplex */ + return -EOPNOTSUPP; /* can't change speed/duplex */ if (cmd->autoneg == AUTONEG_DISABLE) { int cap = speed_duplex_to_caps(cmd->speed, cmd->duplex); @@ -631,7 +716,7 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e) return -EINVAL; if (adapter->flags & FULL_INIT_DONE) - return -EBUSY; + return -EBUSY; adapter->params.sge.freelQ_size[!jumbo_fl] = e->rx_pending; adapter->params.sge.freelQ_size[jumbo_fl] = e->rx_jumbo_pending; @@ -645,22 +730,20 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { struct adapter *adapter = dev->priv; - unsigned int sge_coalesce_usecs = 0; + /* + * If RX coalescing is requested we use NAPI, otherwise interrupts. + * This choice can be made only when all ports and the TOE are off. + */ + if (adapter->open_device_map == 0) + adapter->params.sge.polling = c->use_adaptive_rx_coalesce; - sge_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; - sge_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; - if ( (adapter->params.sge.coalesce_enable && !c->use_adaptive_rx_coalesce) && - (c->rx_coalesce_usecs == sge_coalesce_usecs) ) { - adapter->params.sge.rx_coalesce_usecs = - adapter->params.sge.default_rx_coalesce_usecs; + if (adapter->params.sge.polling) { + adapter->params.sge.rx_coalesce_usecs = 0; } else { adapter->params.sge.rx_coalesce_usecs = c->rx_coalesce_usecs; } - - adapter->params.sge.last_rx_coalesce_raw = adapter->params.sge.rx_coalesce_usecs; - adapter->params.sge.last_rx_coalesce_raw *= (board_info(adapter)->clock_core / 1000000); + adapter->params.sge.coalesce_enable = c->use_adaptive_rx_coalesce; adapter->params.sge.sample_interval_usecs = c->rate_sample_interval; - adapter->params.sge.coalesce_enable = c->use_adaptive_rx_coalesce; t1_sge_set_coalesce_params(adapter->sge, &adapter->params.sge); return 0; } @@ -669,12 +752,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) { struct adapter *adapter = dev->priv; - if (adapter->params.sge.coalesce_enable) { /* Adaptive algorithm on */ - c->rx_coalesce_usecs = adapter->params.sge.last_rx_coalesce_raw; - c->rx_coalesce_usecs /= board_info(adapter)->clock_core / 1000000; - } else { - c->rx_coalesce_usecs = adapter->params.sge.rx_coalesce_usecs; - } + c->rx_coalesce_usecs = adapter->params.sge.rx_coalesce_usecs; c->rate_sample_interval = adapter->params.sge.sample_interval_usecs; c->use_adaptive_rx_coalesce = adapter->params.sge.coalesce_enable; return 0; @@ -682,9 +760,7 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c) static int get_eeprom_len(struct net_device *dev) { - struct adapter *adapter = dev->priv; - - return t1_is_asic(adapter) ? EEPROM_SIZE : 0; + return EEPROM_SIZE; } #define EEPROM_MAGIC(ap) \ @@ -728,118 +804,55 @@ static struct ethtool_ops t1_ethtool_ops = { .get_strings = get_strings, .get_stats_count = get_stats_count, .get_ethtool_stats = get_stats, + .get_regs_len = get_regs_len, + .get_regs = get_regs, .get_tso = ethtool_op_get_tso, .set_tso = set_tso, }; -static int ethtool_ioctl(struct net_device *dev, void *useraddr) +static void cxgb_proc_cleanup(struct adapter *adapter, + struct proc_dir_entry *dir) { - u32 cmd; - struct adapter *adapter = dev->priv; - - if (copy_from_user(&cmd, useraddr, sizeof(cmd))) - return -EFAULT; - - switch (cmd) { - case ETHTOOL_SETREG: { - struct ethtool_reg edata; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) - return -EINVAL; - if (edata.addr == A_ESPI_MISC_CONTROL) - t1_espi_set_misc_ctrl(adapter, edata.val); - else { - if (edata.addr == 0x950) - t1_sge_set_ptimeout(adapter, edata.val); - else - writel(edata.val, adapter->regs + edata.addr); - } - break; - } - case ETHTOOL_GETREG: { - struct ethtool_reg edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0 || edata.addr >= adapter->mmio_len) - return -EINVAL; - if (edata.addr >= 0x900 && edata.addr <= 0x93c) - edata.val = t1_espi_get_mon(adapter, edata.addr, 1); - else { - if (edata.addr == 0x950) - edata.val = t1_sge_get_ptimeout(adapter); - else - edata.val = readl(adapter->regs + edata.addr); - } - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - break; - } - case ETHTOOL_SETTPI: { - struct ethtool_reg edata; - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0) - return -EINVAL; - t1_tpi_write(adapter, edata.addr, edata.val); - break; - } - case ETHTOOL_GETTPI: { - struct ethtool_reg edata; - - if (copy_from_user(&edata, useraddr, sizeof(edata))) - return -EFAULT; - if ((edata.addr & 3) != 0) - return -EINVAL; - t1_tpi_read(adapter, edata.addr, &edata.val); - if (copy_to_user(useraddr, &edata, sizeof(edata))) - return -EFAULT; - break; - } - default: - return -EOPNOTSUPP; - } - return 0; + const char *name; + name = adapter->name; + remove_proc_entry(name, dir); } +//#define chtoe_setup_toedev(adapter) NULL +#define update_mtu_tab(adapter) +#define write_smt_entry(adapter, idx) static int t1_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { - struct adapter *adapter = dev->priv; - struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; + struct adapter *adapter = dev->priv; + struct mii_ioctl_data *data = (struct mii_ioctl_data *)&req->ifr_data; switch (cmd) { - case SIOCGMIIPHY: - data->phy_id = adapter->port[dev->if_port].phy->addr; - /* FALLTHRU */ - case SIOCGMIIREG: { + case SIOCGMIIPHY: + data->phy_id = adapter->port[dev->if_port].phy->addr; + /* FALLTHRU */ + case SIOCGMIIREG: { struct cphy *phy = adapter->port[dev->if_port].phy; u32 val; - if (!phy->mdio_read) return -EOPNOTSUPP; + if (!phy->mdio_read) + return -EOPNOTSUPP; phy->mdio_read(adapter, data->phy_id, 0, data->reg_num & 0x1f, &val); - data->val_out = val; - break; + data->val_out = val; + break; } - case SIOCSMIIREG: { + case SIOCSMIIREG: { struct cphy *phy = adapter->port[dev->if_port].phy; - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (!phy->mdio_write) return -EOPNOTSUPP; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (!phy->mdio_write) + return -EOPNOTSUPP; phy->mdio_write(adapter, data->phy_id, 0, data->reg_num & 0x1f, - data->val_in); - break; + data->val_in); + break; } - case SIOCCHETHTOOL: - return ethtool_ioctl(dev, (void *)req->ifr_data); default: return -EOPNOTSUPP; } @@ -853,9 +866,9 @@ static int t1_change_mtu(struct net_device *dev, int new_mtu) struct cmac *mac = adapter->port[dev->if_port].mac; if (!mac->ops->set_mtu) - return -EOPNOTSUPP; + return -EOPNOTSUPP; if (new_mtu < 68) - return -EINVAL; + return -EINVAL; if ((ret = mac->ops->set_mtu(mac, new_mtu))) return ret; dev->mtu = new_mtu; @@ -902,9 +915,12 @@ static void vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) #ifdef CONFIG_NET_POLL_CONTROLLER static void t1_netpoll(struct net_device *dev) { + unsigned long flags; struct adapter *adapter = dev->priv; - t1_interrupt(adapter->pdev->irq, adapter, NULL); + local_irq_save(flags); + t1_select_intr_handler(adapter)(adapter->pdev->irq, adapter, NULL); + local_irq_restore(flags); } #endif @@ -938,16 +954,17 @@ static void mac_stats_task(void *data) */ static void ext_intr_task(void *data) { - u32 enable; struct adapter *adapter = data; elmer0_ext_intr_handler(adapter); /* Now reenable external interrupts */ - t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_EXT); - enable = t1_read_reg_4(adapter, A_PL_ENABLE); - t1_write_reg_4(adapter, A_PL_ENABLE, enable | F_PL_INTR_EXT); + spin_lock_irq(&adapter->async_lock); adapter->slow_intr_mask |= F_PL_INTR_EXT; + writel(F_PL_INTR_EXT, adapter->regs + A_PL_CAUSE); + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); + spin_unlock_irq(&adapter->async_lock); } /* @@ -955,15 +972,14 @@ static void ext_intr_task(void *data) */ void t1_elmer0_ext_intr(struct adapter *adapter) { - u32 enable = t1_read_reg_4(adapter, A_PL_ENABLE); - /* * Schedule a task to handle external interrupts as we require * a process context. We disable EXT interrupts in the interim * and let the task reenable them when it's done. */ adapter->slow_intr_mask &= ~F_PL_INTR_EXT; - t1_write_reg_4(adapter, A_PL_ENABLE, enable & ~F_PL_INTR_EXT); + writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA, + adapter->regs + A_PL_ENABLE); schedule_work(&adapter->ext_intr_handler_task); } @@ -977,7 +993,6 @@ void t1_fatal_err(struct adapter *adapter) adapter->name); } - static int __devinit init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -990,14 +1005,14 @@ static int __devinit init_one(struct pci_dev *pdev, struct port_info *pi; if (!version_printed) { - printk(KERN_INFO "%s - version %s\n", driver_string, - driver_version); + printk(KERN_INFO "%s - version %s\n", DRV_DESCRIPTION, + DRV_VERSION); ++version_printed; } err = pci_enable_device(pdev); if (err) - return err; + return err; if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { CH_ERR("%s: cannot find PCI device memory base address\n", @@ -1006,20 +1021,22 @@ static int __devinit init_one(struct pci_dev *pdev, goto out_disable_pdev; } - if (!pci_set_dma_mask(pdev, PCI_DMA_64BIT)) { + if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) { pci_using_dac = 1; - if (pci_set_consistent_dma_mask(pdev, PCI_DMA_64BIT)) { + + if (pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK)) { CH_ERR("%s: unable to obtain 64-bit DMA for" "consistent allocations\n", pci_name(pdev)); err = -ENODEV; goto out_disable_pdev; } - } else if ((err = pci_set_dma_mask(pdev, PCI_DMA_32BIT)) != 0) { + + } else if ((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK)) != 0) { CH_ERR("%s: no usable DMA configuration\n", pci_name(pdev)); goto out_disable_pdev; } - err = pci_request_regions(pdev, driver_name); + err = pci_request_regions(pdev, DRV_NAME); if (err) { CH_ERR("%s: cannot obtain PCI resources\n", pci_name(pdev)); goto out_disable_pdev; @@ -1027,7 +1044,7 @@ static int __devinit init_one(struct pci_dev *pdev, pci_set_master(pdev); - mmio_start = pci_resource_start(pdev, 0); + mmio_start = pci_resource_start(pdev, 0); mmio_len = pci_resource_len(pdev, 0); bi = t1_get_board_info(ent->driver_data); @@ -1074,9 +1091,14 @@ static int __devinit init_one(struct pci_dev *pdev, ext_intr_task, adapter); INIT_WORK(&adapter->stats_update_task, mac_stats_task, adapter); +#ifdef work_struct + init_timer(&adapter->stats_update_timer); + adapter->stats_update_timer.function = mac_stats_timer; + adapter->stats_update_timer.data = + (unsigned long)adapter; +#endif pci_set_drvdata(pdev, netdev); - } pi = &adapter->port[i]; @@ -1088,11 +1110,12 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->mem_end = mmio_start + mmio_len - 1; netdev->priv = adapter; netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM; + netdev->features |= NETIF_F_LLTX; + adapter->flags |= RX_CSUM_ENABLED | TCP_CSUM_CAPABLE; if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; if (vlan_tso_capable(adapter)) { - adapter->flags |= UDP_CSUM_CAPABLE; #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) adapter->flags |= VLAN_ACCEL_CAPABLE; netdev->features |= @@ -1120,7 +1143,7 @@ static int __devinit init_one(struct pci_dev *pdev, #endif netdev->weight = 64; - SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); + SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops); } if (t1_init_sw_modules(adapter, bi) < 0) { @@ -1147,7 +1170,7 @@ static int __devinit init_one(struct pci_dev *pdev, if (!adapter->registered_device_map) adapter->name = adapter->port[i].dev->name; - __set_bit(i, &adapter->registered_device_map); + __set_bit(i, &adapter->registered_device_map); } } if (!adapter->registered_device_map) { @@ -1166,11 +1189,12 @@ static int __devinit init_one(struct pci_dev *pdev, t1_free_sw_modules(adapter); out_free_dev: if (adapter) { - if (adapter->regs) - iounmap(adapter->regs); + if (adapter->regs) iounmap(adapter->regs); for (i = bi->port_number - 1; i >= 0; --i) - if (adapter->port[i].dev) - free_netdev(adapter->port[i].dev); + if (adapter->port[i].dev) { + cxgb_proc_cleanup(adapter, proc_root_driver); + kfree(adapter->port[i].dev); + } } pci_release_regions(pdev); out_disable_pdev: @@ -1200,8 +1224,10 @@ static void __devexit remove_one(struct pci_dev *pdev) t1_free_sw_modules(adapter); iounmap(adapter->regs); while (--i >= 0) - if (adapter->port[i].dev) - free_netdev(adapter->port[i].dev); + if (adapter->port[i].dev) { + cxgb_proc_cleanup(adapter, proc_root_driver); + kfree(adapter->port[i].dev); + } pci_release_regions(pdev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); @@ -1210,7 +1236,7 @@ static void __devexit remove_one(struct pci_dev *pdev) } static struct pci_driver driver = { - .name = driver_name, + .name = DRV_NAME, .id_table = t1_pci_tbl, .probe = init_one, .remove = __devexit_p(remove_one), @@ -1228,4 +1254,3 @@ static void __exit t1_cleanup_module(void) module_init(t1_init_module); module_exit(t1_cleanup_module); - diff --git a/drivers/net/chelsio/cxgb2.h b/drivers/net/chelsio/cxgb2.h deleted file mode 100644 index 6ac326afcf01..000000000000 --- a/drivers/net/chelsio/cxgb2.h +++ /dev/null @@ -1,122 +0,0 @@ -/***************************************************************************** - * * - * File: cxgb2.h * - * $Revision: 1.8 $ * - * $Date: 2005/03/23 07:41:27 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CXGB_LINUX_H__ -#define __CXGB_LINUX_H__ - -#include -#include -#include -#include -#include - -/* This belongs in if_ether.h */ -#define ETH_P_CPL5 0xf - -struct cmac; -struct cphy; - -struct port_info { - struct net_device *dev; - struct cmac *mac; - struct cphy *phy; - struct link_config link_config; - struct net_device_stats netstats; -}; - -struct cxgbdev; -struct t1_sge; -struct pemc3; -struct pemc4; -struct pemc5; -struct peulp; -struct petp; -struct pecspi; -struct peespi; -struct work_struct; -struct vlan_group; - -enum { /* adapter flags */ - FULL_INIT_DONE = 0x1, - USING_MSI = 0x2, - TSO_CAPABLE = 0x4, - TCP_CSUM_CAPABLE = 0x8, - UDP_CSUM_CAPABLE = 0x10, - VLAN_ACCEL_CAPABLE = 0x20, - RX_CSUM_ENABLED = 0x40, -}; - -struct adapter { - u8 *regs; - struct pci_dev *pdev; - unsigned long registered_device_map; - unsigned long open_device_map; - unsigned int flags; - - const char *name; - int msg_enable; - u32 mmio_len; - - struct work_struct ext_intr_handler_task; - struct adapter_params params; - - struct vlan_group *vlan_grp; - - /* Terminator modules. */ - struct sge *sge; - struct pemc3 *mc3; - struct pemc4 *mc4; - struct pemc5 *mc5; - struct petp *tp; - struct pecspi *cspi; - struct peespi *espi; - struct peulp *ulp; - - struct port_info port[MAX_NPORTS]; - struct work_struct stats_update_task; - struct timer_list stats_update_timer; - - struct semaphore mib_mutex; - spinlock_t tpi_lock; - spinlock_t work_lock; - - spinlock_t async_lock ____cacheline_aligned; /* guards async operations */ - u32 slow_intr_mask; -}; - -#endif diff --git a/drivers/net/chelsio/elmer0.h b/drivers/net/chelsio/elmer0.h index 08f148643e7f..5590cb2dac19 100644 --- a/drivers/net/chelsio/elmer0.h +++ b/drivers/net/chelsio/elmer0.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: elmer0.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 22:49:43 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,14 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_ELMER0_H -#define CHELSIO_ELMER0_H - -/* ELMER0 flavors */ -enum { - ELMER0_XC2S300E_6FT256_C, - ELMER0_XC2S100E_6TQ144_C -}; +#ifndef _CXGB_ELMER0_H_ +#define _CXGB_ELMER0_H_ /* ELMER0 registers */ #define A_ELMER0_VERSION 0x100000 @@ -154,4 +148,4 @@ enum { #define MI1_OP_INDIRECT_READ_INC 2 #define MI1_OP_INDIRECT_READ 3 -#endif +#endif /* _CXGB_ELMER0_H_ */ diff --git a/drivers/net/chelsio/espi.c b/drivers/net/chelsio/espi.c index 7ec2dc7bafac..230642571c92 100644 --- a/drivers/net/chelsio/espi.c +++ b/drivers/net/chelsio/espi.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: espi.c * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.14 $ * + * $Date: 2005/05/14 00:59:32 $ * * Description: * * Ethernet SPI functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -63,15 +63,16 @@ static int tricn_write(adapter_t *adapter, int bundle_addr, int module_addr, { int busy, attempts = TRICN_CMD_ATTEMPTS; - t1_write_reg_4(adapter, A_ESPI_CMD_ADDR, V_WRITE_DATA(wr_data) | - V_REGISTER_OFFSET(reg_offset) | - V_CHANNEL_ADDR(ch_addr) | V_MODULE_ADDR(module_addr) | - V_BUNDLE_ADDR(bundle_addr) | - V_SPI4_COMMAND(TRICN_CMD_WRITE)); - t1_write_reg_4(adapter, A_ESPI_GOSTAT, 0); + writel(V_WRITE_DATA(wr_data) | + V_REGISTER_OFFSET(reg_offset) | + V_CHANNEL_ADDR(ch_addr) | V_MODULE_ADDR(module_addr) | + V_BUNDLE_ADDR(bundle_addr) | + V_SPI4_COMMAND(TRICN_CMD_WRITE), + adapter->regs + A_ESPI_CMD_ADDR); + writel(0, adapter->regs + A_ESPI_GOSTAT); do { - busy = t1_read_reg_4(adapter, A_ESPI_GOSTAT) & F_ESPI_CMD_BUSY; + busy = readl(adapter->regs + A_ESPI_GOSTAT) & F_ESPI_CMD_BUSY; } while (busy && --attempts); if (busy) @@ -99,12 +100,12 @@ static int tricn_init(adapter_t *adapter) /* 1 */ timeout=1000; do { - stat = t1_read_reg_4(adapter, A_ESPI_RX_RESET); + stat = readl(adapter->regs + A_ESPI_RX_RESET); is_ready = (stat & 0x4); timeout--; udelay(5); } while (!is_ready || (timeout==0)); - t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x2); + writel(0x2, adapter->regs + A_ESPI_RX_RESET); if (timeout==0) { CH_ERR("ESPI : ERROR : Timeout tricn_init() \n"); @@ -127,14 +128,14 @@ static int tricn_init(adapter_t *adapter) for (i=8; i<= 8; i++) tricn_write(adapter, 0, 2, i, TRICN_CNFG, 0xf1); /* 3 */ - t1_write_reg_4(adapter, A_ESPI_RX_RESET, 0x3); + writel(0x3, adapter->regs + A_ESPI_RX_RESET); return 0; } void t1_espi_intr_enable(struct peespi *espi) { - u32 enable, pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + u32 enable, pl_intr = readl(espi->adapter->regs + A_PL_ENABLE); /* * Cannot enable ESPI interrupts on T1B because HW asserts the @@ -144,28 +145,28 @@ void t1_espi_intr_enable(struct peespi *espi) * cannot be cleared (HW bug). */ enable = t1_is_T1B(espi->adapter) ? 0 : ESPI_INTR_MASK; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, enable); - t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr | F_PL_INTR_ESPI); + writel(enable, espi->adapter->regs + A_ESPI_INTR_ENABLE); + writel(pl_intr | F_PL_INTR_ESPI, espi->adapter->regs + A_PL_ENABLE); } void t1_espi_intr_clear(struct peespi *espi) { - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, 0xffffffff); - t1_write_reg_4(espi->adapter, A_PL_CAUSE, F_PL_INTR_ESPI); + writel(0xffffffff, espi->adapter->regs + A_ESPI_INTR_STATUS); + writel(F_PL_INTR_ESPI, espi->adapter->regs + A_PL_CAUSE); } void t1_espi_intr_disable(struct peespi *espi) { - u32 pl_intr = t1_read_reg_4(espi->adapter, A_PL_ENABLE); + u32 pl_intr = readl(espi->adapter->regs + A_PL_ENABLE); - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, 0); - t1_write_reg_4(espi->adapter, A_PL_ENABLE, pl_intr & ~F_PL_INTR_ESPI); + writel(0, espi->adapter->regs + A_ESPI_INTR_ENABLE); + writel(pl_intr & ~F_PL_INTR_ESPI, espi->adapter->regs + A_PL_ENABLE); } int t1_espi_intr_handler(struct peespi *espi) { u32 cnt; - u32 status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + u32 status = readl(espi->adapter->regs + A_ESPI_INTR_STATUS); if (status & F_DIP4ERR) espi->intr_cnt.DIP4_err++; @@ -184,7 +185,7 @@ int t1_espi_intr_handler(struct peespi *espi) * Must read the error count to clear the interrupt * that it causes. */ - cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + cnt = readl(espi->adapter->regs + A_ESPI_DIP2_ERR_COUNT); } /* @@ -193,68 +194,28 @@ int t1_espi_intr_handler(struct peespi *espi) */ if (status && t1_is_T1B(espi->adapter)) status = 1; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + writel(status, espi->adapter->regs + A_ESPI_INTR_STATUS); return 0; } -static void espi_setup_for_pm3393(adapter_t *adapter) +const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi) { - u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; - - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(1) | V_TX_NPORTS(1)); + return &espi->intr_cnt; } -static void espi_setup_for_vsc7321(adapter_t *adapter) +static void espi_setup_for_pm3393(adapter_t *adapter) { u32 wmark = t1_is_T1B(adapter) ? 0x4000 : 0x3200; - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN0, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN1, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN2, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_SCH_TOKEN3, 0x1f4); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, 0x100); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, wmark); - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 3); - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0x08000008); - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(1) | V_TX_NPORTS(1)); -} - -/* - * Note that T1B requires at least 2 ports for IXF1010 due to a HW bug. - */ -static void espi_setup_for_ixf1010(adapter_t *adapter, int nports) -{ - t1_write_reg_4(adapter, A_ESPI_CALENDAR_LENGTH, 1); - if (nports == 4) { - if (is_T2(adapter)) { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0xf00); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x3c0); - } else { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0x7ff); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x1ff); - } - } else { - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK, - 0x1fff); - t1_write_reg_4(adapter, A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK, - 0x7ff); - } - t1_write_reg_4(adapter, A_PORT_CONFIG, - V_RX_NPORTS(nports) | V_TX_NPORTS(nports)); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN0); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN1); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN2); + writel(0x1f4, adapter->regs + A_ESPI_SCH_TOKEN3); + writel(0x100, adapter->regs + A_ESPI_RX_FIFO_ALMOST_EMPTY_WATERMARK); + writel(wmark, adapter->regs + A_ESPI_RX_FIFO_ALMOST_FULL_WATERMARK); + writel(3, adapter->regs + A_ESPI_CALENDAR_LENGTH); + writel(0x08000008, adapter->regs + A_ESPI_TRAIN); + writel(V_RX_NPORTS(1) | V_TX_NPORTS(1), adapter->regs + A_PORT_CONFIG); } /* T2 Init part -- */ @@ -263,43 +224,42 @@ static void espi_setup_for_ixf1010(adapter_t *adapter, int nports) /* 3. Init TriCN Hard Macro */ int t1_espi_init(struct peespi *espi, int mac_type, int nports) { + u32 cnt; + u32 status_enable_extra = 0; adapter_t *adapter = espi->adapter; - u32 cnt; u32 status, burstval = 0x800100; /* Disable ESPI training. MACs that can handle it enable it below. */ - t1_write_reg_4(adapter, A_ESPI_TRAIN, 0); + writel(0, adapter->regs + A_ESPI_TRAIN); if (is_T2(adapter)) { - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - V_OUT_OF_SYNC_COUNT(4) | - V_DIP2_PARITY_ERR_THRES(3) | V_DIP4_THRES(1)); + writel(V_OUT_OF_SYNC_COUNT(4) | + V_DIP2_PARITY_ERR_THRES(3) | + V_DIP4_THRES(1), adapter->regs + A_ESPI_MISC_CONTROL); if (nports == 4) { /* T204: maxburst1 = 0x40, maxburst2 = 0x20 */ burstval = 0x200040; } } - t1_write_reg_4(adapter, A_ESPI_MAXBURST1_MAXBURST2, burstval); + writel(burstval, adapter->regs + A_ESPI_MAXBURST1_MAXBURST2); - if (mac_type == CHBT_MAC_PM3393) + switch (mac_type) { + case CHBT_MAC_PM3393: espi_setup_for_pm3393(adapter); - else if (mac_type == CHBT_MAC_VSC7321) - espi_setup_for_vsc7321(adapter); - else if (mac_type == CHBT_MAC_IXF1010) { - status_enable_extra = F_INTEL1010MODE; - espi_setup_for_ixf1010(adapter, nports); - } else + break; + default: return -1; + } /* * Make sure any pending interrupts from the SPI are * Cleared before enabling the interrupt. */ - t1_write_reg_4(espi->adapter, A_ESPI_INTR_ENABLE, ESPI_INTR_MASK); - status = t1_read_reg_4(espi->adapter, A_ESPI_INTR_STATUS); + writel(ESPI_INTR_MASK, espi->adapter->regs + A_ESPI_INTR_ENABLE); + status = readl(espi->adapter->regs + A_ESPI_INTR_STATUS); if (status & F_DIP2PARITYERR) { - cnt = t1_read_reg_4(espi->adapter, A_ESPI_DIP2_ERR_COUNT); + cnt = readl(espi->adapter->regs + A_ESPI_DIP2_ERR_COUNT); } /* @@ -308,10 +268,10 @@ int t1_espi_init(struct peespi *espi, int mac_type, int nports) */ if (status && t1_is_T1B(espi->adapter)) status = 1; - t1_write_reg_4(espi->adapter, A_ESPI_INTR_STATUS, status); + writel(status, espi->adapter->regs + A_ESPI_INTR_STATUS); - t1_write_reg_4(adapter, A_ESPI_FIFO_STATUS_ENABLE, - status_enable_extra | F_RXSTATUSENABLE); + writel(status_enable_extra | F_RXSTATUSENABLE, + adapter->regs + A_ESPI_FIFO_STATUS_ENABLE); if (is_T2(adapter)) { tricn_init(adapter); @@ -319,10 +279,10 @@ int t1_espi_init(struct peespi *espi, int mac_type, int nports) * Always position the control at the 1st port egress IN * (sop,eop) counter to reduce PIOs for T/N210 workaround. */ - espi->misc_ctrl = (t1_read_reg_4(adapter, A_ESPI_MISC_CONTROL) + espi->misc_ctrl = (readl(adapter->regs + A_ESPI_MISC_CONTROL) & ~MON_MASK) | (F_MONITORED_DIRECTION | F_MONITORED_INTERFACE); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); spin_lock_init(&espi->lock); } @@ -354,15 +314,16 @@ void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val) spin_lock(&espi->lock); espi->misc_ctrl = (val & ~MON_MASK) | (espi->misc_ctrl & MON_MASK); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, espi->misc_ctrl); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); spin_unlock(&espi->lock); } u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) { - struct peespi *espi = adapter->espi; u32 sel; + struct peespi *espi = adapter->espi; + if (!is_T2(adapter)) return 0; sel = V_MONITORED_PORT_NUM((addr & 0x3c) >> 2); @@ -373,14 +334,13 @@ u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait) else spin_lock(&espi->lock); if ((sel != (espi->misc_ctrl & MON_MASK))) { - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - ((espi->misc_ctrl & ~MON_MASK) | sel)); - sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); - t1_write_reg_4(adapter, A_ESPI_MISC_CONTROL, - espi->misc_ctrl); + writel(((espi->misc_ctrl & ~MON_MASK) | sel), + adapter->regs + A_ESPI_MISC_CONTROL); + sel = readl(adapter->regs + A_ESPI_SCH_TOKEN3); + writel(espi->misc_ctrl, adapter->regs + A_ESPI_MISC_CONTROL); } else - sel = t1_read_reg_4(adapter, A_ESPI_SCH_TOKEN3); + sel = readl(adapter->regs + A_ESPI_SCH_TOKEN3); spin_unlock(&espi->lock); return sel; } diff --git a/drivers/net/chelsio/espi.h b/drivers/net/chelsio/espi.h index 0f84e8b6399f..c90e37f8457c 100644 --- a/drivers/net/chelsio/espi.h +++ b/drivers/net/chelsio/espi.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: espi.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.7 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,8 +36,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_ESPI_H -#define CHELSIO_ESPI_H +#ifndef _CXGB_ESPI_H_ +#define _CXGB_ESPI_H_ #include "common.h" @@ -60,8 +60,9 @@ void t1_espi_intr_enable(struct peespi *); void t1_espi_intr_clear(struct peespi *); void t1_espi_intr_disable(struct peespi *); int t1_espi_intr_handler(struct peespi *); +const struct espi_intr_counts *t1_espi_get_intr_counts(struct peespi *espi); void t1_espi_set_misc_ctrl(adapter_t *adapter, u32 val); u32 t1_espi_get_mon(adapter_t *adapter, u32 addr, u8 wait); -#endif +#endif /* _CXGB_ESPI_H_ */ diff --git a/drivers/net/chelsio/gmac.h b/drivers/net/chelsio/gmac.h index 24501e2232cc..746b0eeea964 100644 --- a/drivers/net/chelsio/gmac.h +++ b/drivers/net/chelsio/gmac.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: gmac.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:58 $ * + * $Revision: 1.6 $ * + * $Date: 2005/06/21 18:29:47 $ * * Description: * * Generic MAC functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -37,8 +37,8 @@ * * ****************************************************************************/ -#ifndef CHELSIO_GMAC_H -#define CHELSIO_GMAC_H +#ifndef _CXGB_GMAC_H_ +#define _CXGB_GMAC_H_ #include "common.h" @@ -130,4 +130,5 @@ extern struct gmac t1_chelsio_mac_ops; extern struct gmac t1_vsc7321_ops; extern struct gmac t1_ixf1010_ops; extern struct gmac t1_dummy_mac_ops; -#endif + +#endif /* _CXGB_GMAC_H_ */ diff --git a/drivers/net/chelsio/mv88x201x.c b/drivers/net/chelsio/mv88x201x.c index f54133af1bce..db5034282782 100644 --- a/drivers/net/chelsio/mv88x201x.c +++ b/drivers/net/chelsio/mv88x201x.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: mv88x201x.c * - * $Revision: 1.7 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.12 $ * + * $Date: 2005/04/15 19:27:14 $ * * Description: * * Marvell PHY (mv88x201x) functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -85,33 +85,29 @@ static int mv88x201x_reset(struct cphy *cphy, int wait) static int mv88x201x_interrupt_enable(struct cphy *cphy) { + u32 elmer; + /* Enable PHY LASI interrupts. */ mdio_write(cphy, 0x1, 0x9002, 0x1); /* Enable Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - u32 elmer; - - t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); - elmer |= ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); return 0; } static int mv88x201x_interrupt_disable(struct cphy *cphy) { + u32 elmer; + /* Disable PHY LASI interrupts. */ mdio_write(cphy, 0x1, 0x9002, 0x0); /* Disable Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - u32 elmer; - - t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); - elmer &= ~ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_ENABLE, &elmer); + elmer &= ~ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_ENABLE, elmer); return 0; } @@ -144,11 +140,9 @@ static int mv88x201x_interrupt_clear(struct cphy *cphy) #endif /* Clear Marvell interrupts through Elmer0. */ - if (t1_is_asic(cphy->adapter)) { - t1_tpi_read(cphy->adapter, A_ELMER0_INT_CAUSE, &elmer); - elmer |= ELMER0_GP_BIT6; - t1_tpi_write(cphy->adapter, A_ELMER0_INT_CAUSE, elmer); - } + t1_tpi_read(cphy->adapter, A_ELMER0_INT_CAUSE, &elmer); + elmer |= ELMER0_GP_BIT6; + t1_tpi_write(cphy->adapter, A_ELMER0_INT_CAUSE, elmer); return 0; } diff --git a/drivers/net/chelsio/osdep.h b/drivers/net/chelsio/osdep.h deleted file mode 100644 index 095cb474434f..000000000000 --- a/drivers/net/chelsio/osdep.h +++ /dev/null @@ -1,169 +0,0 @@ -/***************************************************************************** - * * - * File: osdep.h * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef __CHELSIO_OSDEP_H -#define __CHELSIO_OSDEP_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "cxgb2.h" - -#define DRV_NAME "cxgb" -#define PFX DRV_NAME ": " - -#define CH_ERR(fmt, ...) printk(KERN_ERR PFX fmt, ## __VA_ARGS__) -#define CH_WARN(fmt, ...) printk(KERN_WARNING PFX fmt, ## __VA_ARGS__) -#define CH_ALERT(fmt, ...) printk(KERN_ALERT PFX fmt, ## __VA_ARGS__) - -/* - * More powerful macro that selectively prints messages based on msg_enable. - * For info and debugging messages. - */ -#define CH_MSG(adapter, level, category, fmt, ...) do { \ - if ((adapter)->msg_enable & NETIF_MSG_##category) \ - printk(KERN_##level PFX "%s: " fmt, (adapter)->name, \ - ## __VA_ARGS__); \ -} while (0) - -#ifdef DEBUG -# define CH_DBG(adapter, category, fmt, ...) \ - CH_MSG(adapter, DEBUG, category, fmt, ## __VA_ARGS__) -#else -# define CH_DBG(fmt, ...) -#endif - -/* Additional NETIF_MSG_* categories */ -#define NETIF_MSG_MMIO 0x8000000 - -#define CH_DEVICE(devid, ssid, idx) \ - { PCI_VENDOR_ID_CHELSIO, devid, PCI_ANY_ID, ssid, 0, 0, idx } - -#define SUPPORTED_PAUSE (1 << 13) -#define SUPPORTED_LOOPBACK (1 << 15) - -#define ADVERTISED_PAUSE (1 << 13) -#define ADVERTISED_ASYM_PAUSE (1 << 14) - -/* - * Now that we have included the driver's main data structure, - * we typedef it to something the rest of the system understands. - */ -typedef struct adapter adapter_t; - -#define TPI_LOCK(adapter) spin_lock(&(adapter)->tpi_lock) -#define TPI_UNLOCK(adapter) spin_unlock(&(adapter)->tpi_lock) - -void t1_elmer0_ext_intr(adapter_t *adapter); -void t1_link_changed(adapter_t *adapter, int port_id, int link_status, - int speed, int duplex, int fc); - -static inline u16 t1_read_reg_2(adapter_t *adapter, u32 reg_addr) -{ - u16 val = readw(adapter->regs + reg_addr); - - CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, - val); - return val; -} - -static inline void t1_write_reg_2(adapter_t *adapter, u32 reg_addr, u16 val) -{ - CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, - val); - writew(val, adapter->regs + reg_addr); -} - -static inline u32 t1_read_reg_4(adapter_t *adapter, u32 reg_addr) -{ - u32 val = readl(adapter->regs + reg_addr); - - CH_DBG(adapter, MMIO, "read register 0x%x value 0x%x\n", reg_addr, - val); - return val; -} - -static inline void t1_write_reg_4(adapter_t *adapter, u32 reg_addr, u32 val) -{ - CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, - val); - writel(val, adapter->regs + reg_addr); -} - -static inline const char *port_name(adapter_t *adapter, int port_idx) -{ - return adapter->port[port_idx].dev->name; -} - -static inline void t1_set_hw_addr(adapter_t *adapter, int port_idx, - u8 hw_addr[]) -{ - memcpy(adapter->port[port_idx].dev->dev_addr, hw_addr, ETH_ALEN); -} - -struct t1_rx_mode { - struct net_device *dev; - u32 idx; - struct dev_mc_list *list; -}; - -#define t1_rx_mode_promisc(rm) (rm->dev->flags & IFF_PROMISC) -#define t1_rx_mode_allmulti(rm) (rm->dev->flags & IFF_ALLMULTI) -#define t1_rx_mode_mc_cnt(rm) (rm->dev->mc_count) - -static inline u8 *t1_get_next_mcaddr(struct t1_rx_mode *rm) -{ - u8 *addr = 0; - - if (rm->idx++ < rm->dev->mc_count) { - addr = rm->list->dmi_addr; - rm->list = rm->list->next; - } - return addr; -} - -#endif diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c index 17bd20f60d99..04a1404fc65e 100644 --- a/drivers/net/chelsio/pm3393.c +++ b/drivers/net/chelsio/pm3393.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: pm3393.c * - * $Revision: 1.9 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.16 $ * + * $Date: 2005/05/14 00:59:32 $ * * Description: * * PMC/SIERRA (pm3393) MAC-PHY functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -45,15 +45,19 @@ /* 802.3ae 10Gb/s MDIO Manageable Device(MMD) */ -#define MMD_RESERVED 0 -#define MMD_PMAPMD 1 -#define MMD_WIS 2 -#define MMD_PCS 3 -#define MMD_PHY_XGXS 4 /* XGMII Extender Sublayer */ -#define MMD_DTE_XGXS 5 +enum { + MMD_RESERVED, + MMD_PMAPMD, + MMD_WIS, + MMD_PCS, + MMD_PHY_XGXS, /* XGMII Extender Sublayer */ + MMD_DTE_XGXS, +}; -#define PHY_XGXS_CTRL_1 0 -#define PHY_XGXS_STATUS_1 1 +enum { + PHY_XGXS_CTRL_1, + PHY_XGXS_STATUS_1 +}; #define OFFSET(REG_ADDR) (REG_ADDR << 2) @@ -160,9 +164,9 @@ static int pm3393_interrupt_enable(struct cmac *cmac) 0 /*SUNI1x10GEXP_BITMSK_TOP_INTE */ ); /* TERMINATOR - PL_INTERUPTS_EXT */ - pl_intr = t1_read_reg_4(cmac->adapter, A_PL_ENABLE); + pl_intr = readl(cmac->adapter->regs + A_PL_ENABLE); pl_intr |= F_PL_INTR_EXT; - t1_write_reg_4(cmac->adapter, A_PL_ENABLE, pl_intr); + writel(pl_intr, cmac->adapter->regs + A_PL_ENABLE); return 0; } @@ -242,9 +246,9 @@ static int pm3393_interrupt_clear(struct cmac *cmac) /* TERMINATOR - PL_INTERUPTS_EXT */ - pl_intr = t1_read_reg_4(cmac->adapter, A_PL_CAUSE); + pl_intr = readl(cmac->adapter->regs + A_PL_CAUSE); pl_intr |= F_PL_INTR_EXT; - t1_write_reg_4(cmac->adapter, A_PL_CAUSE, pl_intr); + writel(pl_intr, cmac->adapter->regs + A_PL_CAUSE); return 0; } @@ -261,8 +265,6 @@ static int pm3393_interrupt_handler(struct cmac *cmac) /* Read the master interrupt status register. */ pmread(cmac, SUNI1x10GEXP_REG_MASTER_INTERRUPT_STATUS, &master_intr_status); - CH_DBG(cmac->adapter, INTR, "PM3393 intr cause 0x%x\n", - master_intr_status); /* TBD XXX Lets just clear everything for now */ pm3393_interrupt_clear(cmac); @@ -703,10 +705,9 @@ static struct cmac *pm3393_mac_create(adapter_t *adapter, int index) t1_tpi_write(adapter, OFFSET(0x3040), 0x0c32); /* # TXXG Config */ /* For T1 use timer based Mac flow control. */ - if (t1_is_T1B(adapter)) - t1_tpi_write(adapter, OFFSET(0x304d), 0x8000); + t1_tpi_write(adapter, OFFSET(0x304d), 0x8000); t1_tpi_write(adapter, OFFSET(0x2040), 0x059c); /* # RXXG Config */ - t1_tpi_write(adapter, OFFSET(0x2049), 0x0000); /* # RXXG Cut Through */ + t1_tpi_write(adapter, OFFSET(0x2049), 0x0001); /* # RXXG Cut Through */ t1_tpi_write(adapter, OFFSET(0x2070), 0x0000); /* # Disable promiscuous mode */ /* Setup Exact Match Filter 0 to allow broadcast packets. @@ -814,12 +815,6 @@ static int pm3393_mac_reset(adapter_t * adapter) successful_reset = (is_pl4_reset_finished && !is_pl4_outof_lock && is_xaui_mabc_pll_locked); - - CH_DBG(adapter, HW, - "PM3393 HW reset %d: pl4_reset 0x%x, val 0x%x, " - "is_pl4_outof_lock 0x%x, xaui_locked 0x%x\n", - i, is_pl4_reset_finished, val, is_pl4_outof_lock, - is_xaui_mabc_pll_locked); } return successful_reset ? 0 : 1; } diff --git a/drivers/net/chelsio/regs.h b/drivers/net/chelsio/regs.h index 5a70803eb1b6..b90e11f40d1f 100644 --- a/drivers/net/chelsio/regs.h +++ b/drivers/net/chelsio/regs.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: regs.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.8 $ * + * $Date: 2005/06/21 18:29:48 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,7 +36,8 @@ * * ****************************************************************************/ -/* Do not edit this file */ +#ifndef _CXGB_REGS_H_ +#define _CXGB_REGS_H_ /* SGE registers */ #define A_SG_CONTROL 0x0 @@ -74,6 +75,14 @@ #define V_DISABLE_CMDQ1_GTS(x) ((x) << S_DISABLE_CMDQ1_GTS) #define F_DISABLE_CMDQ1_GTS V_DISABLE_CMDQ1_GTS(1U) +#define S_DISABLE_FL0_GTS 10 +#define V_DISABLE_FL0_GTS(x) ((x) << S_DISABLE_FL0_GTS) +#define F_DISABLE_FL0_GTS V_DISABLE_FL0_GTS(1U) + +#define S_DISABLE_FL1_GTS 11 +#define V_DISABLE_FL1_GTS(x) ((x) << S_DISABLE_FL1_GTS) +#define F_DISABLE_FL1_GTS V_DISABLE_FL1_GTS(1U) + #define S_ENABLE_BIG_ENDIAN 12 #define V_ENABLE_BIG_ENDIAN(x) ((x) << S_ENABLE_BIG_ENDIAN) #define F_ENABLE_BIG_ENDIAN V_ENABLE_BIG_ENDIAN(1U) @@ -132,6 +141,7 @@ #define F_PACKET_MISMATCH V_PACKET_MISMATCH(1U) #define A_SG_INT_CAUSE 0xbc +#define A_SG_RESPACCUTIMER 0xc0 /* MC3 registers */ @@ -247,6 +257,10 @@ #define V_SYN_COOKIE_PARAMETER(x) ((x) << S_SYN_COOKIE_PARAMETER) #define A_TP_PC_CONFIG 0x348 +#define S_DIS_TX_FILL_WIN_PUSH 12 +#define V_DIS_TX_FILL_WIN_PUSH(x) ((x) << S_DIS_TX_FILL_WIN_PUSH) +#define F_DIS_TX_FILL_WIN_PUSH V_DIS_TX_FILL_WIN_PUSH(1U) + #define S_TP_PC_REV 30 #define M_TP_PC_REV 0x3 #define G_TP_PC_REV(x) (((x) >> S_TP_PC_REV) & M_TP_PC_REV) @@ -451,3 +465,4 @@ #define M_PCI_MODE_CLK 0x3 #define G_PCI_MODE_CLK(x) (((x) >> S_PCI_MODE_CLK) & M_PCI_MODE_CLK) +#endif /* _CXGB_REGS_H_ */ diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index bcf8b1e939b0..53b41d99b00b 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: sge.c * - * $Revision: 1.13 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.26 $ * + * $Date: 2005/06/21 18:29:48 $ * * Description: * * DMA engine. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -58,59 +58,62 @@ #include "regs.h" #include "espi.h" + +#ifdef NETIF_F_TSO #include +#endif #define SGE_CMDQ_N 2 #define SGE_FREELQ_N 2 -#define SGE_CMDQ0_E_N 512 +#define SGE_CMDQ0_E_N 1024 #define SGE_CMDQ1_E_N 128 #define SGE_FREEL_SIZE 4096 #define SGE_JUMBO_FREEL_SIZE 512 #define SGE_FREEL_REFILL_THRESH 16 #define SGE_RESPQ_E_N 1024 -#define SGE_INTR_BUCKETSIZE 100 -#define SGE_INTR_LATBUCKETS 5 -#define SGE_INTR_MAXBUCKETS 11 -#define SGE_INTRTIMER0 1 -#define SGE_INTRTIMER1 50 -#define SGE_INTRTIMER_NRES 10000 -#define SGE_RX_COPY_THRESHOLD 256 +#define SGE_INTRTIMER_NRES 1000 +#define SGE_RX_COPY_THRES 256 #define SGE_RX_SM_BUF_SIZE 1536 -#define SGE_RESPQ_REPLENISH_THRES ((3 * SGE_RESPQ_E_N) / 4) +# define SGE_RX_DROP_THRES 2 + +#define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) + +/* + * Period of the TX buffer reclaim timer. This timer does not need to run + * frequently as TX buffers are usually reclaimed by new TX packets. + */ +#define TX_RECLAIM_PERIOD (HZ / 4) -#define SGE_RX_OFFSET 2 #ifndef NET_IP_ALIGN -# define NET_IP_ALIGN SGE_RX_OFFSET +# define NET_IP_ALIGN 2 #endif +#define M_CMD_LEN 0x7fffffff +#define V_CMD_LEN(v) (v) +#define G_CMD_LEN(v) ((v) & M_CMD_LEN) +#define V_CMD_GEN1(v) ((v) << 31) +#define V_CMD_GEN2(v) (v) +#define F_CMD_DATAVALID (1 << 1) +#define F_CMD_SOP (1 << 2) +#define V_CMD_EOP(v) ((v) << 3) + /* - * Memory Mapped HW Command, Freelist and Response Queue Descriptors + * Command queue, receive buffer list, and response queue descriptors. */ #if defined(__BIG_ENDIAN_BITFIELD) struct cmdQ_e { - u32 AddrLow; - u32 GenerationBit : 1; - u32 BufferLength : 31; - u32 RespQueueSelector : 4; - u32 ResponseTokens : 12; - u32 CmdId : 8; - u32 Reserved : 3; - u32 TokenValid : 1; - u32 Eop : 1; - u32 Sop : 1; - u32 DataValid : 1; - u32 GenerationBit2 : 1; - u32 AddrHigh; + u32 addr_lo; + u32 len_gen; + u32 flags; + u32 addr_hi; }; struct freelQ_e { - u32 AddrLow; - u32 GenerationBit : 1; - u32 BufferLength : 31; - u32 Reserved : 31; - u32 GenerationBit2 : 1; - u32 AddrHigh; + u32 addr_lo; + u32 len_gen; + u32 gen2; + u32 addr_hi; }; struct respQ_e { @@ -128,31 +131,19 @@ struct respQ_e { u32 GenerationBit : 1; u32 BufferLength; }; - #elif defined(__LITTLE_ENDIAN_BITFIELD) struct cmdQ_e { - u32 BufferLength : 31; - u32 GenerationBit : 1; - u32 AddrLow; - u32 AddrHigh; - u32 GenerationBit2 : 1; - u32 DataValid : 1; - u32 Sop : 1; - u32 Eop : 1; - u32 TokenValid : 1; - u32 Reserved : 3; - u32 CmdId : 8; - u32 ResponseTokens : 12; - u32 RespQueueSelector : 4; + u32 len_gen; + u32 addr_lo; + u32 addr_hi; + u32 flags; }; struct freelQ_e { - u32 BufferLength : 31; - u32 GenerationBit : 1; - u32 AddrLow; - u32 AddrHigh; - u32 GenerationBit2 : 1; - u32 Reserved : 31; + u32 len_gen; + u32 addr_lo; + u32 addr_hi; + u32 gen2; }; struct respQ_e { @@ -179,7 +170,6 @@ struct cmdQ_ce { struct sk_buff *skb; DECLARE_PCI_UNMAP_ADDR(dma_addr); DECLARE_PCI_UNMAP_LEN(dma_len); - unsigned int single; }; struct freelQ_ce { @@ -189,44 +179,52 @@ struct freelQ_ce { }; /* - * SW Command, Freelist and Response Queue + * SW command, freelist and response rings */ struct cmdQ { - atomic_t asleep; /* HW DMA Fetch status */ - atomic_t credits; /* # available descriptors for TX */ - atomic_t pio_pidx; /* Variable updated on Doorbell */ - u16 entries_n; /* # descriptors for TX */ - u16 pidx; /* producer index (SW) */ - u16 cidx; /* consumer index (HW) */ - u8 genbit; /* current generation (=valid) bit */ - struct cmdQ_e *entries; /* HW command descriptor Q */ - struct cmdQ_ce *centries; /* SW command context descriptor Q */ - spinlock_t Qlock; /* Lock to protect cmdQ enqueuing */ - dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ + unsigned long status; /* HW DMA fetch status */ + unsigned int in_use; /* # of in-use command descriptors */ + unsigned int size; /* # of descriptors */ + unsigned int processed; /* total # of descs HW has processed */ + unsigned int cleaned; /* total # of descs SW has reclaimed */ + unsigned int stop_thres; /* SW TX queue suspend threshold */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ + u8 genbit; /* current generation (=valid) bit */ + u8 sop; /* is next entry start of packet? */ + struct cmdQ_e *entries; /* HW command descriptor Q */ + struct cmdQ_ce *centries; /* SW command context descriptor Q */ + spinlock_t lock; /* Lock to protect cmdQ enqueuing */ + dma_addr_t dma_addr; /* DMA addr HW command descriptor Q */ }; struct freelQ { - unsigned int credits; /* # of available RX buffers */ - unsigned int entries_n; /* free list capacity */ - u16 pidx; /* producer index (SW) */ - u16 cidx; /* consumer index (HW) */ + unsigned int credits; /* # of available RX buffers */ + unsigned int size; /* free list capacity */ + u16 pidx; /* producer index (SW) */ + u16 cidx; /* consumer index (HW) */ u16 rx_buffer_size; /* Buffer size on this free list */ u16 dma_offset; /* DMA offset to align IP headers */ - u8 genbit; /* current generation (=valid) bit */ - struct freelQ_e *entries; /* HW freelist descriptor Q */ - struct freelQ_ce *centries; /* SW freelist conext descriptor Q */ - dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ + u16 recycleq_idx; /* skb recycle q to use */ + u8 genbit; /* current generation (=valid) bit */ + struct freelQ_e *entries; /* HW freelist descriptor Q */ + struct freelQ_ce *centries; /* SW freelist context descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW freelist descriptor Q */ }; struct respQ { - u16 credits; /* # of available respQ descriptors */ - u16 credits_pend; /* # of not yet returned descriptors */ - u16 entries_n; /* # of response Q descriptors */ - u16 pidx; /* producer index (HW) */ - u16 cidx; /* consumer index (SW) */ - u8 genbit; /* current generation(=valid) bit */ + unsigned int credits; /* credits to be returned to SGE */ + unsigned int size; /* # of response Q descriptors */ + u16 cidx; /* consumer index (SW) */ + u8 genbit; /* current generation(=valid) bit */ struct respQ_e *entries; /* HW response descriptor Q */ - dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ + dma_addr_t dma_addr; /* DMA addr HW response descriptor Q */ +}; + +/* Bit flags for cmdQ.status */ +enum { + CMDQ_STAT_RUNNING = 1, /* fetch engine is running */ + CMDQ_STAT_LAST_PKT_DB = 2 /* last packet rung the doorbell */ }; /* @@ -239,134 +237,50 @@ struct respQ { */ struct sge { struct adapter *adapter; /* adapter backpointer */ - struct freelQ freelQ[SGE_FREELQ_N]; /* freelist Q(s) */ - struct respQ respQ; /* response Q instatiation */ + struct net_device *netdev; /* netdevice backpointer */ + struct freelQ freelQ[SGE_FREELQ_N]; /* buffer free lists */ + struct respQ respQ; /* response Q */ + unsigned long stopped_tx_queues; /* bitmap of suspended Tx queues */ unsigned int rx_pkt_pad; /* RX padding for L2 packets */ unsigned int jumbo_fl; /* jumbo freelist Q index */ - u32 intrtimer[SGE_INTR_MAXBUCKETS]; /* ! */ - u32 currIndex; /* current index into intrtimer[] */ - u32 intrtimer_nres; /* no resource interrupt timer value */ - u32 sge_control; /* shadow content of sge control reg */ - struct sge_intr_counts intr_cnt; - struct timer_list ptimer; - struct sk_buff *pskb; - u32 ptimeout; - struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned; /* command Q(s)*/ + unsigned int intrtimer_nres; /* no-resource interrupt timer */ + unsigned int fixed_intrtimer;/* non-adaptive interrupt timer */ + struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ + struct timer_list espibug_timer; + unsigned int espibug_timeout; + struct sk_buff *espibug_skb; + u32 sge_control; /* shadow value of sge control reg */ + struct sge_intr_counts stats; + struct sge_port_stats port_stats[MAX_NPORTS]; + struct cmdQ cmdQ[SGE_CMDQ_N] ____cacheline_aligned_in_smp; }; -static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, - unsigned int qid); - /* * PIO to indicate that memory mapped Q contains valid descriptor(s). */ -static inline void doorbell_pio(struct sge *sge, u32 val) +static inline void doorbell_pio(struct adapter *adapter, u32 val) { wmb(); - t1_write_reg_4(sge->adapter, A_SG_DOORBELL, val); -} - -/* - * Disables the DMA engine. - */ -void t1_sge_stop(struct sge *sge) -{ - t1_write_reg_4(sge->adapter, A_SG_CONTROL, 0); - t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ - if (is_T2(sge->adapter)) - del_timer_sync(&sge->ptimer); -} - -static u8 ch_mac_addr[ETH_ALEN] = {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; -static void t1_espi_workaround(void *data) -{ - struct adapter *adapter = (struct adapter *)data; - struct sge *sge = adapter->sge; - - if (netif_running(adapter->port[0].dev) && - atomic_read(&sge->cmdQ[0].asleep)) { - - u32 seop = t1_espi_get_mon(adapter, 0x930, 0); - - if ((seop & 0xfff0fff) == 0xfff && sge->pskb) { - struct sk_buff *skb = sge->pskb; - if (!skb->cb[0]) { - memcpy(skb->data+sizeof(struct cpl_tx_pkt), ch_mac_addr, ETH_ALEN); - memcpy(skb->data+skb->len-10, ch_mac_addr, ETH_ALEN); - - skb->cb[0] = 0xff; - } - t1_sge_tx(skb, adapter,0); - } - } - mod_timer(&adapter->sge->ptimer, jiffies + sge->ptimeout); -} - -/* - * Enables the DMA engine. - */ -void t1_sge_start(struct sge *sge) -{ - t1_write_reg_4(sge->adapter, A_SG_CONTROL, sge->sge_control); - t1_read_reg_4(sge->adapter, A_SG_CONTROL); /* flush write */ - if (is_T2(sge->adapter)) { - init_timer(&sge->ptimer); - sge->ptimer.function = (void *)&t1_espi_workaround; - sge->ptimer.data = (unsigned long)sge->adapter; - sge->ptimer.expires = jiffies + sge->ptimeout; - add_timer(&sge->ptimer); - } -} - -/* - * Creates a t1_sge structure and returns suggested resource parameters. - */ -struct sge * __devinit t1_sge_create(struct adapter *adapter, - struct sge_params *p) -{ - struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); - - if (!sge) - return NULL; - memset(sge, 0, sizeof(*sge)); - - if (is_T2(adapter)) - sge->ptimeout = 1; /* finest allowed */ - - sge->adapter = adapter; - sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : SGE_RX_OFFSET; - sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; - - p->cmdQ_size[0] = SGE_CMDQ0_E_N; - p->cmdQ_size[1] = SGE_CMDQ1_E_N; - p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; - p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; - p->rx_coalesce_usecs = SGE_INTRTIMER1; - p->last_rx_coalesce_raw = SGE_INTRTIMER1 * - (board_info(sge->adapter)->clock_core / 1000000); - p->default_rx_coalesce_usecs = SGE_INTRTIMER1; - p->coalesce_enable = 0; /* Turn off adaptive algorithm by default */ - p->sample_interval_usecs = 0; - return sge; + writel(val, adapter->regs + A_SG_DOORBELL); } /* * Frees all RX buffers on the freelist Q. The caller must make sure that * the SGE is turned off before calling this function. */ -static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *Q) +static void free_freelQ_buffers(struct pci_dev *pdev, struct freelQ *q) { - unsigned int cidx = Q->cidx, credits = Q->credits; + unsigned int cidx = q->cidx; - while (credits--) { - struct freelQ_ce *ce = &Q->centries[cidx]; + while (q->credits--) { + struct freelQ_ce *ce = &q->centries[cidx]; pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); dev_kfree_skb(ce->skb); ce->skb = NULL; - if (++cidx == Q->entries_n) + if (++cidx == q->size) cidx = 0; } } @@ -380,29 +294,29 @@ static void free_rx_resources(struct sge *sge) unsigned int size, i; if (sge->respQ.entries) { - size = sizeof(struct respQ_e) * sge->respQ.entries_n; + size = sizeof(struct respQ_e) * sge->respQ.size; pci_free_consistent(pdev, size, sge->respQ.entries, sge->respQ.dma_addr); } for (i = 0; i < SGE_FREELQ_N; i++) { - struct freelQ *Q = &sge->freelQ[i]; + struct freelQ *q = &sge->freelQ[i]; - if (Q->centries) { - free_freelQ_buffers(pdev, Q); - kfree(Q->centries); + if (q->centries) { + free_freelQ_buffers(pdev, q); + kfree(q->centries); } - if (Q->entries) { - size = sizeof(struct freelQ_e) * Q->entries_n; - pci_free_consistent(pdev, size, Q->entries, - Q->dma_addr); + if (q->entries) { + size = sizeof(struct freelQ_e) * q->size; + pci_free_consistent(pdev, size, q->entries, + q->dma_addr); } } } /* * Allocates basic RX resources, consisting of memory mapped freelist Qs and a - * response Q. + * response queue. */ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) { @@ -410,21 +324,22 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) unsigned int size, i; for (i = 0; i < SGE_FREELQ_N; i++) { - struct freelQ *Q = &sge->freelQ[i]; - - Q->genbit = 1; - Q->entries_n = p->freelQ_size[i]; - Q->dma_offset = SGE_RX_OFFSET - sge->rx_pkt_pad; - size = sizeof(struct freelQ_e) * Q->entries_n; - Q->entries = (struct freelQ_e *) - pci_alloc_consistent(pdev, size, &Q->dma_addr); - if (!Q->entries) + struct freelQ *q = &sge->freelQ[i]; + + q->genbit = 1; + q->size = p->freelQ_size[i]; + q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN; + size = sizeof(struct freelQ_e) * q->size; + q->entries = (struct freelQ_e *) + pci_alloc_consistent(pdev, size, &q->dma_addr); + if (!q->entries) goto err_no_mem; - memset(Q->entries, 0, size); - Q->centries = kcalloc(Q->entries_n, sizeof(struct freelQ_ce), - GFP_KERNEL); - if (!Q->centries) + memset(q->entries, 0, size); + size = sizeof(struct freelQ_ce) * q->size; + q->centries = kmalloc(size, GFP_KERNEL); + if (!q->centries) goto err_no_mem; + memset(q->centries, 0, size); } /* @@ -440,10 +355,17 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p) sge->freelQ[sge->jumbo_fl].rx_buffer_size = (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + /* + * Setup which skb recycle Q should be used when recycling buffers from + * each free list. + */ + sge->freelQ[!sge->jumbo_fl].recycleq_idx = 0; + sge->freelQ[sge->jumbo_fl].recycleq_idx = 1; + sge->respQ.genbit = 1; - sge->respQ.entries_n = SGE_RESPQ_E_N; - sge->respQ.credits = SGE_RESPQ_E_N; - size = sizeof(struct respQ_e) * sge->respQ.entries_n; + sge->respQ.size = SGE_RESPQ_E_N; + sge->respQ.credits = 0; + size = sizeof(struct respQ_e) * sge->respQ.size; sge->respQ.entries = (struct respQ_e *) pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr); if (!sge->respQ.entries) @@ -457,48 +379,37 @@ err_no_mem: } /* - * Frees 'credits_pend' TX buffers and returns the credits to Q->credits. - * - * The adaptive algorithm receives the total size of the buffers freed - * accumulated in @*totpayload. No initialization of this argument here. - * + * Reclaims n TX descriptors and frees the buffers associated with them. */ -static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *Q, - unsigned int credits_pend, unsigned int *totpayload) +static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n) { + struct cmdQ_ce *ce; struct pci_dev *pdev = sge->adapter->pdev; - struct sk_buff *skb; - struct cmdQ_ce *ce, *cq = Q->centries; - unsigned int entries_n = Q->entries_n, cidx = Q->cidx, - i = credits_pend; - + unsigned int cidx = q->cidx; - ce = &cq[cidx]; - while (i--) { - if (ce->single) + q->in_use -= n; + ce = &q->centries[cidx]; + while (n--) { + if (q->sop) pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), + pci_unmap_len(ce, dma_len), PCI_DMA_TODEVICE); else pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), + pci_unmap_len(ce, dma_len), PCI_DMA_TODEVICE); - if (totpayload) - *totpayload += pci_unmap_len(ce, dma_len); - - skb = ce->skb; - if (skb) - dev_kfree_skb_irq(skb); - + q->sop = 0; + if (ce->skb) { + dev_kfree_skb(ce->skb); + q->sop = 1; + } ce++; - if (++cidx == entries_n) { + if (++cidx == q->size) { cidx = 0; - ce = cq; + ce = q->centries; } } - - Q->cidx = cidx; - atomic_add(credits_pend, &Q->credits); + q->cidx = cidx; } /* @@ -512,20 +423,17 @@ static void free_tx_resources(struct sge *sge) unsigned int size, i; for (i = 0; i < SGE_CMDQ_N; i++) { - struct cmdQ *Q = &sge->cmdQ[i]; + struct cmdQ *q = &sge->cmdQ[i]; - if (Q->centries) { - unsigned int pending = Q->entries_n - - atomic_read(&Q->credits); - - if (pending) - free_cmdQ_buffers(sge, Q, pending, NULL); - kfree(Q->centries); + if (q->centries) { + if (q->in_use) + free_cmdQ_buffers(sge, q, q->in_use); + kfree(q->centries); } - if (Q->entries) { - size = sizeof(struct cmdQ_e) * Q->entries_n; - pci_free_consistent(pdev, size, Q->entries, - Q->dma_addr); + if (q->entries) { + size = sizeof(struct cmdQ_e) * q->size; + pci_free_consistent(pdev, size, q->entries, + q->dma_addr); } } } @@ -539,25 +447,38 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p) unsigned int size, i; for (i = 0; i < SGE_CMDQ_N; i++) { - struct cmdQ *Q = &sge->cmdQ[i]; - - Q->genbit = 1; - Q->entries_n = p->cmdQ_size[i]; - atomic_set(&Q->credits, Q->entries_n); - atomic_set(&Q->asleep, 1); - spin_lock_init(&Q->Qlock); - size = sizeof(struct cmdQ_e) * Q->entries_n; - Q->entries = (struct cmdQ_e *) - pci_alloc_consistent(pdev, size, &Q->dma_addr); - if (!Q->entries) + struct cmdQ *q = &sge->cmdQ[i]; + + q->genbit = 1; + q->sop = 1; + q->size = p->cmdQ_size[i]; + q->in_use = 0; + q->status = 0; + q->processed = q->cleaned = 0; + q->stop_thres = 0; + spin_lock_init(&q->lock); + size = sizeof(struct cmdQ_e) * q->size; + q->entries = (struct cmdQ_e *) + pci_alloc_consistent(pdev, size, &q->dma_addr); + if (!q->entries) goto err_no_mem; - memset(Q->entries, 0, size); - Q->centries = kcalloc(Q->entries_n, sizeof(struct cmdQ_ce), - GFP_KERNEL); - if (!Q->centries) + memset(q->entries, 0, size); + size = sizeof(struct cmdQ_ce) * q->size; + q->centries = kmalloc(size, GFP_KERNEL); + if (!q->centries) goto err_no_mem; + memset(q->centries, 0, size); } + /* + * CommandQ 0 handles Ethernet and TOE packets, while queue 1 is TOE + * only. For queue 0 set the stop threshold so we can handle one more + * packet from each port, plus reserve an additional 24 entries for + * Ethernet packets only. Queue 1 never suspends nor do we reserve + * space for Ethernet packets. + */ + sge->cmdQ[0].stop_thres = sge->adapter->params.nports * + (MAX_SKB_FRAGS + 1); return 0; err_no_mem: @@ -569,9 +490,9 @@ static inline void setup_ring_params(struct adapter *adapter, u64 addr, u32 size, int base_reg_lo, int base_reg_hi, int size_reg) { - t1_write_reg_4(adapter, base_reg_lo, (u32)addr); - t1_write_reg_4(adapter, base_reg_hi, addr >> 32); - t1_write_reg_4(adapter, size_reg, size); + writel((u32)addr, adapter->regs + base_reg_lo); + writel(addr >> 32, adapter->regs + base_reg_hi); + writel(size, adapter->regs + size_reg); } /* @@ -585,29 +506,11 @@ void t1_set_vlan_accel(struct adapter *adapter, int on_off) if (on_off) sge->sge_control |= F_VLAN_XTRACT; if (adapter->open_device_map) { - t1_write_reg_4(adapter, A_SG_CONTROL, sge->sge_control); - t1_read_reg_4(adapter, A_SG_CONTROL); /* flush */ + writel(sge->sge_control, adapter->regs + A_SG_CONTROL); + readl(adapter->regs + A_SG_CONTROL); /* flush */ } } -/* - * Sets the interrupt latency timer when the adaptive Rx coalescing - * is turned off. Do nothing when it is turned on again. - * - * This routine relies on the fact that the caller has already set - * the adaptive policy in adapter->sge_params before calling it. -*/ -int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) -{ - if (!p->coalesce_enable) { - u32 newTimer = p->rx_coalesce_usecs * - (board_info(sge->adapter)->clock_core / 1000000); - - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, newTimer); - } - return 0; -} - /* * Programs the various SGE registers. However, the engine is not yet enabled, * but sge->sge_control is setup and ready to go. @@ -615,67 +518,40 @@ int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) static void configure_sge(struct sge *sge, struct sge_params *p) { struct adapter *ap = sge->adapter; - int i; - - t1_write_reg_4(ap, A_SG_CONTROL, 0); - setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].entries_n, + + writel(0, ap->regs + A_SG_CONTROL); + setup_ring_params(ap, sge->cmdQ[0].dma_addr, sge->cmdQ[0].size, A_SG_CMD0BASELWR, A_SG_CMD0BASEUPR, A_SG_CMD0SIZE); - setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].entries_n, + setup_ring_params(ap, sge->cmdQ[1].dma_addr, sge->cmdQ[1].size, A_SG_CMD1BASELWR, A_SG_CMD1BASEUPR, A_SG_CMD1SIZE); setup_ring_params(ap, sge->freelQ[0].dma_addr, - sge->freelQ[0].entries_n, A_SG_FL0BASELWR, + sge->freelQ[0].size, A_SG_FL0BASELWR, A_SG_FL0BASEUPR, A_SG_FL0SIZE); setup_ring_params(ap, sge->freelQ[1].dma_addr, - sge->freelQ[1].entries_n, A_SG_FL1BASELWR, + sge->freelQ[1].size, A_SG_FL1BASELWR, A_SG_FL1BASEUPR, A_SG_FL1SIZE); /* The threshold comparison uses <. */ - t1_write_reg_4(ap, A_SG_FLTHRESHOLD, SGE_RX_SM_BUF_SIZE + 1); + writel(SGE_RX_SM_BUF_SIZE + 1, ap->regs + A_SG_FLTHRESHOLD); - setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.entries_n, - A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); - t1_write_reg_4(ap, A_SG_RSPQUEUECREDIT, (u32)sge->respQ.entries_n); + setup_ring_params(ap, sge->respQ.dma_addr, sge->respQ.size, + A_SG_RSPBASELWR, A_SG_RSPBASEUPR, A_SG_RSPSIZE); + writel((u32)sge->respQ.size - 1, ap->regs + A_SG_RSPQUEUECREDIT); sge->sge_control = F_CMDQ0_ENABLE | F_CMDQ1_ENABLE | F_FL0_ENABLE | F_FL1_ENABLE | F_CPL_ENABLE | F_RESPONSE_QUEUE_ENABLE | V_CMDQ_PRIORITY(2) | F_DISABLE_CMDQ1_GTS | F_ISCSI_COALESCE | + F_DISABLE_FL0_GTS | F_DISABLE_FL1_GTS | V_RX_PKT_OFFSET(sge->rx_pkt_pad); #if defined(__BIG_ENDIAN_BITFIELD) sge->sge_control |= F_ENABLE_BIG_ENDIAN; #endif - /* - * Initialize the SGE Interrupt Timer arrray: - * intrtimer[0] = (SGE_INTRTIMER0) usec - * intrtimer[0intrtimer[0] = board_info(sge->adapter)->clock_core / 1000000; - for (i = 1; i < SGE_INTR_LATBUCKETS; ++i) { - sge->intrtimer[i] = SGE_INTRTIMER0 + (2 * i); - sge->intrtimer[i] *= sge->intrtimer[0]; - } - for (i = SGE_INTR_LATBUCKETS; i < SGE_INTR_MAXBUCKETS - 1; ++i) { - sge->intrtimer[i] = (i - 3) * 6; - sge->intrtimer[i] *= sge->intrtimer[0]; - } - sge->intrtimer[SGE_INTR_MAXBUCKETS - 1] = - sge->intrtimer[0] * SGE_INTRTIMER1; - /* Initialize resource timer */ - sge->intrtimer_nres = sge->intrtimer[0] * SGE_INTRTIMER_NRES; - /* Finally finish initialization of intrtimer[0] */ - sge->intrtimer[0] *= SGE_INTRTIMER0; - /* Initialize for a throughput oriented workload */ - sge->currIndex = SGE_INTR_MAXBUCKETS - 1; - - if (p->coalesce_enable) - t1_write_reg_4(ap, A_SG_INTRTIMER, - sge->intrtimer[sge->currIndex]); - else - t1_sge_set_coalesce_params(sge, p); + /* Initialize no-resource timer */ + sge->intrtimer_nres = SGE_INTRTIMER_NRES * core_ticks_per_usec(ap); + + t1_sge_set_coalesce_params(sge, p); } /* @@ -684,31 +560,8 @@ static void configure_sge(struct sge *sge, struct sge_params *p) static inline unsigned int jumbo_payload_capacity(const struct sge *sge) { return sge->freelQ[sge->jumbo_fl].rx_buffer_size - - sizeof(struct cpl_rx_data) - SGE_RX_OFFSET + sge->rx_pkt_pad; -} - -/* - * Allocates both RX and TX resources and configures the SGE. However, - * the hardware is not enabled yet. - */ -int t1_sge_configure(struct sge *sge, struct sge_params *p) -{ - if (alloc_rx_resources(sge, p)) - return -ENOMEM; - if (alloc_tx_resources(sge, p)) { - free_rx_resources(sge); - return -ENOMEM; - } - configure_sge(sge, p); - - /* - * Now that we have sized the free lists calculate the payload - * capacity of the large buffers. Other parts of the driver use - * this to set the max offload coalescing size so that RX packets - * do not overflow our large buffers. - */ - p->large_buf_capacity = jumbo_payload_capacity(sge); - return 0; + sge->freelQ[sge->jumbo_fl].dma_offset - + sizeof(struct cpl_rx_data); } /* @@ -716,8 +569,9 @@ int t1_sge_configure(struct sge *sge, struct sge_params *p) */ void t1_sge_destroy(struct sge *sge) { - if (sge->pskb) - dev_kfree_skb(sge->pskb); + if (sge->espibug_skb) + kfree_skb(sge->espibug_skb); + free_tx_resources(sge); free_rx_resources(sge); kfree(sge); @@ -735,75 +589,75 @@ void t1_sge_destroy(struct sge *sge) * we specify a RX_OFFSET in order to make sure that the IP header is 4B * aligned. */ -static void refill_free_list(struct sge *sge, struct freelQ *Q) +static void refill_free_list(struct sge *sge, struct freelQ *q) { struct pci_dev *pdev = sge->adapter->pdev; - struct freelQ_ce *ce = &Q->centries[Q->pidx]; - struct freelQ_e *e = &Q->entries[Q->pidx]; - unsigned int dma_len = Q->rx_buffer_size - Q->dma_offset; + struct freelQ_ce *ce = &q->centries[q->pidx]; + struct freelQ_e *e = &q->entries[q->pidx]; + unsigned int dma_len = q->rx_buffer_size - q->dma_offset; - while (Q->credits < Q->entries_n) { - if (e->GenerationBit != Q->genbit) { - struct sk_buff *skb; - dma_addr_t mapping; + while (q->credits < q->size) { + struct sk_buff *skb; + dma_addr_t mapping; - skb = alloc_skb(Q->rx_buffer_size, GFP_ATOMIC); - if (!skb) - break; - if (Q->dma_offset) - skb_reserve(skb, Q->dma_offset); - mapping = pci_map_single(pdev, skb->data, dma_len, - PCI_DMA_FROMDEVICE); - ce->skb = skb; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, dma_len); - e->AddrLow = (u32)mapping; - e->AddrHigh = (u64)mapping >> 32; - e->BufferLength = dma_len; - e->GenerationBit = e->GenerationBit2 = Q->genbit; - } + skb = alloc_skb(q->rx_buffer_size, GFP_ATOMIC); + if (!skb) + break; + + skb_reserve(skb, q->dma_offset); + mapping = pci_map_single(pdev, skb->data, dma_len, + PCI_DMA_FROMDEVICE); + ce->skb = skb; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, dma_len); + e->addr_lo = (u32)mapping; + e->addr_hi = (u64)mapping >> 32; + e->len_gen = V_CMD_LEN(dma_len) | V_CMD_GEN1(q->genbit); + wmb(); + e->gen2 = V_CMD_GEN2(q->genbit); e++; ce++; - if (++Q->pidx == Q->entries_n) { - Q->pidx = 0; - Q->genbit ^= 1; - ce = Q->centries; - e = Q->entries; + if (++q->pidx == q->size) { + q->pidx = 0; + q->genbit ^= 1; + ce = q->centries; + e = q->entries; } - Q->credits++; + q->credits++; } } /* - * Calls refill_free_list for both freelist Qs. If we cannot - * fill at least 1/4 of both Qs, we go into 'few interrupt mode' in order - * to give the system time to free up resources. + * Calls refill_free_list for both free lists. If we cannot fill at least 1/4 + * of both rings, we go into 'few interrupt mode' in order to give the system + * time to free up resources. */ static void freelQs_empty(struct sge *sge) { - u32 irq_reg = t1_read_reg_4(sge->adapter, A_SG_INT_ENABLE); + struct adapter *adapter = sge->adapter; + u32 irq_reg = readl(adapter->regs + A_SG_INT_ENABLE); u32 irqholdoff_reg; refill_free_list(sge, &sge->freelQ[0]); refill_free_list(sge, &sge->freelQ[1]); - if (sge->freelQ[0].credits > (sge->freelQ[0].entries_n >> 2) && - sge->freelQ[1].credits > (sge->freelQ[1].entries_n >> 2)) { + if (sge->freelQ[0].credits > (sge->freelQ[0].size >> 2) && + sge->freelQ[1].credits > (sge->freelQ[1].size >> 2)) { irq_reg |= F_FL_EXHAUSTED; - irqholdoff_reg = sge->intrtimer[sge->currIndex]; + irqholdoff_reg = sge->fixed_intrtimer; } else { /* Clear the F_FL_EXHAUSTED interrupts for now */ irq_reg &= ~F_FL_EXHAUSTED; irqholdoff_reg = sge->intrtimer_nres; } - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, irqholdoff_reg); - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, irq_reg); + writel(irqholdoff_reg, adapter->regs + A_SG_INTRTIMER); + writel(irq_reg, adapter->regs + A_SG_INT_ENABLE); /* We reenable the Qs to force a freelist GTS interrupt later */ - doorbell_pio(sge, F_FL0_ENABLE | F_FL1_ENABLE); + doorbell_pio(adapter, F_FL0_ENABLE | F_FL1_ENABLE); } #define SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA) @@ -816,10 +670,10 @@ static void freelQs_empty(struct sge *sge) */ void t1_sge_intr_disable(struct sge *sge) { - u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + u32 val = readl(sge->adapter->regs + A_PL_ENABLE); - t1_write_reg_4(sge->adapter, A_PL_ENABLE, val & ~SGE_PL_INTR_MASK); - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, 0); + writel(val & ~SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_ENABLE); + writel(0, sge->adapter->regs + A_SG_INT_ENABLE); } /* @@ -828,12 +682,12 @@ void t1_sge_intr_disable(struct sge *sge) void t1_sge_intr_enable(struct sge *sge) { u32 en = SGE_INT_ENABLE; - u32 val = t1_read_reg_4(sge->adapter, A_PL_ENABLE); + u32 val = readl(sge->adapter->regs + A_PL_ENABLE); if (sge->adapter->flags & TSO_CAPABLE) en &= ~F_PACKET_TOO_BIG; - t1_write_reg_4(sge->adapter, A_SG_INT_ENABLE, en); - t1_write_reg_4(sge->adapter, A_PL_ENABLE, val | SGE_PL_INTR_MASK); + writel(en, sge->adapter->regs + A_SG_INT_ENABLE); + writel(val | SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_ENABLE); } /* @@ -841,8 +695,8 @@ void t1_sge_intr_enable(struct sge *sge) */ void t1_sge_intr_clear(struct sge *sge) { - t1_write_reg_4(sge->adapter, A_PL_CAUSE, SGE_PL_INTR_MASK); - t1_write_reg_4(sge->adapter, A_SG_INT_CAUSE, 0xffffffff); + writel(SGE_PL_INTR_MASK, sge->adapter->regs + A_PL_CAUSE); + writel(0xffffffff, sge->adapter->regs + A_SG_INT_CAUSE); } /* @@ -851,464 +705,673 @@ void t1_sge_intr_clear(struct sge *sge) int t1_sge_intr_error_handler(struct sge *sge) { struct adapter *adapter = sge->adapter; - u32 cause = t1_read_reg_4(adapter, A_SG_INT_CAUSE); + u32 cause = readl(adapter->regs + A_SG_INT_CAUSE); if (adapter->flags & TSO_CAPABLE) cause &= ~F_PACKET_TOO_BIG; if (cause & F_RESPQ_EXHAUSTED) - sge->intr_cnt.respQ_empty++; + sge->stats.respQ_empty++; if (cause & F_RESPQ_OVERFLOW) { - sge->intr_cnt.respQ_overflow++; + sge->stats.respQ_overflow++; CH_ALERT("%s: SGE response queue overflow\n", adapter->name); } if (cause & F_FL_EXHAUSTED) { - sge->intr_cnt.freelistQ_empty++; + sge->stats.freelistQ_empty++; freelQs_empty(sge); } if (cause & F_PACKET_TOO_BIG) { - sge->intr_cnt.pkt_too_big++; + sge->stats.pkt_too_big++; CH_ALERT("%s: SGE max packet size exceeded\n", adapter->name); } if (cause & F_PACKET_MISMATCH) { - sge->intr_cnt.pkt_mismatch++; + sge->stats.pkt_mismatch++; CH_ALERT("%s: SGE packet mismatch\n", adapter->name); } if (cause & SGE_INT_FATAL) t1_fatal_err(adapter); - t1_write_reg_4(adapter, A_SG_INT_CAUSE, cause); + writel(cause, adapter->regs + A_SG_INT_CAUSE); return 0; } -/* - * The following code is copied from 2.6, where the skb_pull is doing the - * right thing and only pulls ETH_HLEN. +const struct sge_intr_counts *t1_sge_get_intr_counts(struct sge *sge) +{ + return &sge->stats; +} + +const struct sge_port_stats *t1_sge_get_port_stats(struct sge *sge, int port) +{ + return &sge->port_stats[port]; +} + +/** + * recycle_fl_buf - recycle a free list buffer + * @fl: the free list + * @idx: index of buffer to recycle * - * Determine the packet's protocol ID. The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. + * Recycles the specified buffer on the given free list by adding it at + * the next available slot on the list. */ -static unsigned short sge_eth_type_trans(struct sk_buff *skb, - struct net_device *dev) +static void recycle_fl_buf(struct freelQ *fl, int idx) { - struct ethhdr *eth; - unsigned char *rawp; + struct freelQ_e *from = &fl->entries[idx]; + struct freelQ_e *to = &fl->entries[fl->pidx]; - skb->mac.raw = skb->data; - skb_pull(skb, ETH_HLEN); - eth = (struct ethhdr *)skb->mac.raw; + fl->centries[fl->pidx] = fl->centries[idx]; + to->addr_lo = from->addr_lo; + to->addr_hi = from->addr_hi; + to->len_gen = G_CMD_LEN(from->len_gen) | V_CMD_GEN1(fl->genbit); + wmb(); + to->gen2 = V_CMD_GEN2(fl->genbit); + fl->credits++; - if (*eth->h_dest&1) { - if(memcmp(eth->h_dest, dev->broadcast, ETH_ALEN) == 0) - skb->pkt_type = PACKET_BROADCAST; - else - skb->pkt_type = PACKET_MULTICAST; + if (++fl->pidx == fl->size) { + fl->pidx = 0; + fl->genbit ^= 1; } +} - /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. - * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. - */ +/** + * get_packet - return the next ingress packet buffer + * @pdev: the PCI device that received the packet + * @fl: the SGE free list holding the packet + * @len: the actual packet length, excluding any SGE padding + * @dma_pad: padding at beginning of buffer left by SGE DMA + * @skb_pad: padding to be used if the packet is copied + * @copy_thres: length threshold under which a packet should be copied + * @drop_thres: # of remaining buffers before we start dropping packets + * + * Get the next packet from a free list and complete setup of the + * sk_buff. If the packet is small we make a copy and recycle the + * original buffer, otherwise we use the original buffer itself. If a + * positive drop threshold is supplied packets are dropped and their + * buffers recycled if (a) the number of remaining buffers is under the + * threshold and the packet is too big to copy, or (b) the packet should + * be copied but there is no memory for the copy. + */ +static inline struct sk_buff *get_packet(struct pci_dev *pdev, + struct freelQ *fl, unsigned int len, + int dma_pad, int skb_pad, + unsigned int copy_thres, + unsigned int drop_thres) +{ + struct sk_buff *skb; + struct freelQ_ce *ce = &fl->centries[fl->cidx]; + + if (len < copy_thres) { + skb = alloc_skb(len + skb_pad, GFP_ATOMIC); + if (likely(skb != NULL)) { + skb_reserve(skb, skb_pad); + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + memcpy(skb->data, ce->skb->data + dma_pad, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); + } else if (!drop_thres) + goto use_orig_buf; - else if (1 /*dev->flags&IFF_PROMISC*/) - { - if(memcmp(eth->h_dest,dev->dev_addr, ETH_ALEN)) - skb->pkt_type=PACKET_OTHERHOST; + recycle_fl_buf(fl, fl->cidx); + return skb; } - if (ntohs(eth->h_proto) >= 1536) - return eth->h_proto; - - rawp = skb->data; + if (fl->credits < drop_thres) { + recycle_fl_buf(fl, fl->cidx); + return NULL; + } - /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. - */ - if (*(unsigned short *)rawp == 0xFFFF) - return htons(ETH_P_802_3); +use_orig_buf: + pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); + skb = ce->skb; + skb_reserve(skb, dma_pad); + skb_put(skb, len); + return skb; +} - /* - * Real 802.2 LLC - */ - return htons(ETH_P_802_2); +/** + * unexpected_offload - handle an unexpected offload packet + * @adapter: the adapter + * @fl: the free list that received the packet + * + * Called when we receive an unexpected offload packet (e.g., the TOE + * function is disabled or the card is a NIC). Prints a message and + * recycles the buffer. + */ +static void unexpected_offload(struct adapter *adapter, struct freelQ *fl) +{ + struct freelQ_ce *ce = &fl->centries[fl->cidx]; + struct sk_buff *skb = ce->skb; + + pci_dma_sync_single_for_cpu(adapter->pdev, pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); + CH_ERR("%s: unexpected offload packet, cmd %u\n", + adapter->name, *skb->data); + recycle_fl_buf(fl, fl->cidx); } /* - * Prepare the received buffer and pass it up the stack. If it is small enough - * and allocation doesn't fail, we use a new sk_buff and copy the content. + * Write the command descriptors to transmit the given skb starting at + * descriptor pidx with the given generation. */ -static unsigned int t1_sge_rx(struct sge *sge, struct freelQ *Q, - unsigned int len, unsigned int offload) +static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb, + unsigned int pidx, unsigned int gen, + struct cmdQ *q) { - struct sk_buff *skb; - struct adapter *adapter = sge->adapter; - struct freelQ_ce *ce = &Q->centries[Q->cidx]; + dma_addr_t mapping; + struct cmdQ_e *e, *e1; + struct cmdQ_ce *ce; + unsigned int i, flags, nfrags = skb_shinfo(skb)->nr_frags; + + mapping = pci_map_single(adapter->pdev, skb->data, + skb->len - skb->data_len, PCI_DMA_TODEVICE); + ce = &q->centries[pidx]; + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); - if (len <= SGE_RX_COPY_THRESHOLD && - (skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC))) { - struct freelQ_e *e; - char *src = ce->skb->data; + flags = F_CMD_DATAVALID | F_CMD_SOP | V_CMD_EOP(nfrags == 0) | + V_CMD_GEN2(gen); + e = &q->entries[pidx]; + e->addr_lo = (u32)mapping; + e->addr_hi = (u64)mapping >> 32; + e->len_gen = V_CMD_LEN(skb->len - skb->data_len) | V_CMD_GEN1(gen); + for (e1 = e, i = 0; nfrags--; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - pci_dma_sync_single_for_cpu(adapter->pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - if (!offload) { - skb_reserve(skb, NET_IP_ALIGN); - src += sge->rx_pkt_pad; + ce++; + e1++; + if (++pidx == q->size) { + pidx = 0; + gen ^= 1; + ce = q->centries; + e1 = q->entries; } - memcpy(skb->data, src, len); - /* Reuse the entry. */ - e = &Q->entries[Q->cidx]; - e->GenerationBit ^= 1; - e->GenerationBit2 ^= 1; - } else { - pci_unmap_single(adapter->pdev, pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - skb = ce->skb; - if (!offload && sge->rx_pkt_pad) - __skb_pull(skb, sge->rx_pkt_pad); + mapping = pci_map_page(adapter->pdev, frag->page, + frag->page_offset, frag->size, + PCI_DMA_TODEVICE); + ce->skb = NULL; + pci_unmap_addr_set(ce, dma_addr, mapping); + pci_unmap_len_set(ce, dma_len, frag->size); + + e1->addr_lo = (u32)mapping; + e1->addr_hi = (u64)mapping >> 32; + e1->len_gen = V_CMD_LEN(frag->size) | V_CMD_GEN1(gen); + e1->flags = F_CMD_DATAVALID | V_CMD_EOP(nfrags == 0) | + V_CMD_GEN2(gen); } - skb_put(skb, len); + ce->skb = skb; + wmb(); + e->flags = flags; +} +/* + * Clean up completed Tx buffers. + */ +static inline void reclaim_completed_tx(struct sge *sge, struct cmdQ *q) +{ + unsigned int reclaim = q->processed - q->cleaned; - if (unlikely(offload)) { - { - printk(KERN_ERR - "%s: unexpected offloaded packet, cmd %u\n", - adapter->name, *skb->data); - dev_kfree_skb_any(skb); - } - } else { - struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)skb->data; - - skb_pull(skb, sizeof(*p)); - skb->dev = adapter->port[p->iff].dev; - skb->dev->last_rx = jiffies; - skb->protocol = sge_eth_type_trans(skb, skb->dev); - if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && - skb->protocol == htons(ETH_P_IP) && - (skb->data[9] == IPPROTO_TCP || - skb->data[9] == IPPROTO_UDP)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; - if (adapter->vlan_grp && p->vlan_valid) - vlan_hwaccel_rx(skb, adapter->vlan_grp, - ntohs(p->vlan)); - else - netif_rx(skb); + if (reclaim) { + free_cmdQ_buffers(sge, q, reclaim); + q->cleaned += reclaim; } +} - if (++Q->cidx == Q->entries_n) - Q->cidx = 0; +#ifndef SET_ETHTOOL_OPS +# define __netif_rx_complete(dev) netif_rx_complete(dev) +#endif - if (unlikely(--Q->credits < Q->entries_n - SGE_FREEL_REFILL_THRESH)) - refill_free_list(sge, Q); - return 1; +/* + * We cannot use the standard netif_rx_schedule_prep() because we have multiple + * ports plus the TOE all multiplexing onto a single response queue, therefore + * accepting new responses cannot depend on the state of any particular port. + * So define our own equivalent that omits the netif_running() test. + */ +static inline int napi_schedule_prep(struct net_device *dev) +{ + return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state); } -/* - * Adaptive interrupt timer logic to keep the CPU utilization to - * manageable levels. Basically, as the Average Packet Size (APS) - * gets higher, the interrupt latency setting gets longer. Every - * SGE_INTR_BUCKETSIZE (of 100B) causes a bump of 2usec to the - * base value of SGE_INTRTIMER0. At large values of payload the - * latency hits the ceiling value of SGE_INTRTIMER1 stored at - * index SGE_INTR_MAXBUCKETS-1 in sge->intrtimer[]. +/** + * sge_rx - process an ingress ethernet packet + * @sge: the sge structure + * @fl: the free list that contains the packet buffer + * @len: the packet length * - * sge->currIndex caches the last index to save unneeded PIOs. + * Process an ingress ethernet pakcet and deliver it to the stack. */ -static inline void update_intr_timer(struct sge *sge, unsigned int avg_payload) +static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) { - unsigned int newIndex; + struct sk_buff *skb; + struct cpl_rx_pkt *p; + struct adapter *adapter = sge->adapter; - newIndex = avg_payload / SGE_INTR_BUCKETSIZE; - if (newIndex > SGE_INTR_MAXBUCKETS - 1) { - newIndex = SGE_INTR_MAXBUCKETS - 1; - } - /* Save a PIO with this check....maybe */ - if (newIndex != sge->currIndex) { - t1_write_reg_4(sge->adapter, A_SG_INTRTIMER, - sge->intrtimer[newIndex]); - sge->currIndex = newIndex; - sge->adapter->params.sge.last_rx_coalesce_raw = - sge->intrtimer[newIndex]; + sge->stats.ethernet_pkts++; + skb = get_packet(adapter->pdev, fl, len - sge->rx_pkt_pad, + sge->rx_pkt_pad, 2, SGE_RX_COPY_THRES, + SGE_RX_DROP_THRES); + if (!skb) { + sge->port_stats[0].rx_drops++; /* charge only port 0 for now */ + return 0; } + + p = (struct cpl_rx_pkt *)skb->data; + skb_pull(skb, sizeof(*p)); + skb->dev = adapter->port[p->iff].dev; + skb->dev->last_rx = jiffies; + skb->protocol = eth_type_trans(skb, skb->dev); + if ((adapter->flags & RX_CSUM_ENABLED) && p->csum == 0xffff && + skb->protocol == htons(ETH_P_IP) && + (skb->data[9] == IPPROTO_TCP || skb->data[9] == IPPROTO_UDP)) { + sge->port_stats[p->iff].rx_cso_good++; + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else + skb->ip_summed = CHECKSUM_NONE; + + if (unlikely(adapter->vlan_grp && p->vlan_valid)) { + sge->port_stats[p->iff].vlan_xtract++; + if (adapter->params.sge.polling) + vlan_hwaccel_receive_skb(skb, adapter->vlan_grp, + ntohs(p->vlan)); + else + vlan_hwaccel_rx(skb, adapter->vlan_grp, + ntohs(p->vlan)); + } else if (adapter->params.sge.polling) + netif_receive_skb(skb); + else + netif_rx(skb); + return 0; } /* - * Returns true if command queue q_num has enough available descriptors that + * Returns true if a command queue has enough available descriptors that * we can resume Tx operation after temporarily disabling its packet queue. */ -static inline int enough_free_Tx_descs(struct sge *sge, int q_num) +static inline int enough_free_Tx_descs(const struct cmdQ *q) { - return atomic_read(&sge->cmdQ[q_num].credits) > - (sge->cmdQ[q_num].entries_n >> 2); + unsigned int r = q->processed - q->cleaned; + + return q->in_use - r < (q->size >> 1); } /* - * Main interrupt handler, optimized assuming that we took a 'DATA' - * interrupt. - * - * 1. Clear the interrupt - * 2. Loop while we find valid descriptors and process them; accumulate - * information that can be processed after the loop - * 3. Tell the SGE at which index we stopped processing descriptors - * 4. Bookkeeping; free TX buffers, ring doorbell if there are any - * outstanding TX buffers waiting, replenish RX buffers, potentially - * reenable upper layers if they were turned off due to lack of TX - * resources which are available again. - * 5. If we took an interrupt, but no valid respQ descriptors was found we - * let the slow_intr_handler run and do error handling. + * Called when sufficient space has become available in the SGE command queues + * after the Tx packet schedulers have been suspended to restart the Tx path. */ -irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) +static void restart_tx_queues(struct sge *sge) { - struct net_device *netdev; - struct adapter *adapter = cookie; - struct sge *sge = adapter->sge; - struct respQ *Q = &sge->respQ; - unsigned int credits = Q->credits, flags = 0, ret = 0; - unsigned int tot_rxpayload = 0, tot_txpayload = 0, n_rx = 0, n_tx = 0; - unsigned int credits_pend[SGE_CMDQ_N] = { 0, 0 }; + struct adapter *adap = sge->adapter; - struct respQ_e *e = &Q->entries[Q->cidx]; - prefetch(e); + if (enough_free_Tx_descs(&sge->cmdQ[0])) { + int i; + + for_each_port(adap, i) { + struct net_device *nd = adap->port[i].dev; + + if (test_and_clear_bit(nd->if_port, + &sge->stopped_tx_queues) && + netif_running(nd)) { + sge->stats.cmdQ_restarted[3]++; + netif_wake_queue(nd); + } + } + } +} + +/* + * update_tx_info is called from the interrupt handler/NAPI to return cmdQ0 + * information. + */ +static unsigned int update_tx_info(struct adapter *adapter, + unsigned int flags, + unsigned int pr0) +{ + struct sge *sge = adapter->sge; + struct cmdQ *cmdq = &sge->cmdQ[0]; - t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_SGE_DATA); + cmdq->processed += pr0; + if (flags & F_CMDQ0_ENABLE) { + clear_bit(CMDQ_STAT_RUNNING, &cmdq->status); + + if (cmdq->cleaned + cmdq->in_use != cmdq->processed && + !test_and_set_bit(CMDQ_STAT_LAST_PKT_DB, &cmdq->status)) { + set_bit(CMDQ_STAT_RUNNING, &cmdq->status); + writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); + } + flags &= ~F_CMDQ0_ENABLE; + } + + if (unlikely(sge->stopped_tx_queues != 0)) + restart_tx_queues(sge); - while (e->GenerationBit == Q->genbit) { - if (--credits < SGE_RESPQ_REPLENISH_THRES) { - u32 n = Q->entries_n - credits - 1; + return flags; +} - t1_write_reg_4(adapter, A_SG_RSPQUEUECREDIT, n); - credits += n; +/* + * Process SGE responses, up to the supplied budget. Returns the number of + * responses processed. A negative budget is effectively unlimited. + */ +static int process_responses(struct adapter *adapter, int budget) +{ + struct sge *sge = adapter->sge; + struct respQ *q = &sge->respQ; + struct respQ_e *e = &q->entries[q->cidx]; + int budget_left = budget; + unsigned int flags = 0; + unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; + + + while (likely(budget_left && e->GenerationBit == q->genbit)) { + flags |= e->Qsleeping; + + cmdq_processed[0] += e->Cmdq0CreditReturn; + cmdq_processed[1] += e->Cmdq1CreditReturn; + + /* We batch updates to the TX side to avoid cacheline + * ping-pong of TX state information on MP where the sender + * might run on a different CPU than this function... + */ + if (unlikely(flags & F_CMDQ0_ENABLE || cmdq_processed[0] > 64)) { + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + cmdq_processed[0] = 0; + } + if (unlikely(cmdq_processed[1] > 16)) { + sge->cmdQ[1].processed += cmdq_processed[1]; + cmdq_processed[1] = 0; } if (likely(e->DataValid)) { - if (!e->Sop || !e->Eop) + struct freelQ *fl = &sge->freelQ[e->FreelistQid]; + + if (unlikely(!e->Sop || !e->Eop)) BUG(); - t1_sge_rx(sge, &sge->freelQ[e->FreelistQid], - e->BufferLength, e->Offload); - tot_rxpayload += e->BufferLength; - ++n_rx; - } - flags |= e->Qsleeping; - credits_pend[0] += e->Cmdq0CreditReturn; - credits_pend[1] += e->Cmdq1CreditReturn; + if (unlikely(e->Offload)) + unexpected_offload(adapter, fl); + else + sge_rx(sge, fl, e->BufferLength); + + /* + * Note: this depends on each packet consuming a + * single free-list buffer; cf. the BUG above. + */ + if (++fl->cidx == fl->size) + fl->cidx = 0; + if (unlikely(--fl->credits < + fl->size - SGE_FREEL_REFILL_THRESH)) + refill_free_list(sge, fl); + } else + sge->stats.pure_rsps++; -#ifdef CONFIG_SMP - /* - * If enough cmdQ0 buffers have finished DMAing free them so - * anyone that may be waiting for their release can continue. - * We do this only on MP systems to allow other CPUs to proceed - * promptly. UP systems can wait for the free_cmdQ_buffers() - * calls after this loop as the sole CPU is currently busy in - * this loop. - */ - if (unlikely(credits_pend[0] > SGE_FREEL_REFILL_THRESH)) { - free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], - &tot_txpayload); - n_tx += credits_pend[0]; - credits_pend[0] = 0; - } -#endif - ret++; e++; - if (unlikely(++Q->cidx == Q->entries_n)) { - Q->cidx = 0; - Q->genbit ^= 1; - e = Q->entries; + if (unlikely(++q->cidx == q->size)) { + q->cidx = 0; + q->genbit ^= 1; + e = q->entries; + } + prefetch(e); + + if (++q->credits > SGE_RESPQ_REPLENISH_THRES) { + writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); + q->credits = 0; } + --budget_left; } - Q->credits = credits; - t1_write_reg_4(adapter, A_SG_SLEEPING, Q->cidx); + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + sge->cmdQ[1].processed += cmdq_processed[1]; - if (credits_pend[0]) - free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0], &tot_txpayload); - if (credits_pend[1]) - free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1], &tot_txpayload); + budget -= budget_left; + return budget; +} - /* Do any coalescing and interrupt latency timer adjustments */ - if (adapter->params.sge.coalesce_enable) { - unsigned int avg_txpayload = 0, avg_rxpayload = 0; +/* + * A simpler version of process_responses() that handles only pure (i.e., + * non data-carrying) responses. Such respones are too light-weight to justify + * calling a softirq when using NAPI, so we handle them specially in hard + * interrupt context. The function is called with a pointer to a response, + * which the caller must ensure is a valid pure response. Returns 1 if it + * encounters a valid data-carrying response, 0 otherwise. + */ +static int process_pure_responses(struct adapter *adapter, struct respQ_e *e) +{ + struct sge *sge = adapter->sge; + struct respQ *q = &sge->respQ; + unsigned int flags = 0; + unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0}; - n_tx += credits_pend[0] + credits_pend[1]; + do { + flags |= e->Qsleeping; - /* - * Choose larger avg. payload size to increase - * throughput and reduce [CPU util., intr/s.] - * - * Throughput behavior favored in mixed-mode. - */ - if (n_tx) - avg_txpayload = tot_txpayload/n_tx; - if (n_rx) - avg_rxpayload = tot_rxpayload/n_rx; - - if (n_tx && avg_txpayload > avg_rxpayload){ - update_intr_timer(sge, avg_txpayload); - } else if (n_rx) { - update_intr_timer(sge, avg_rxpayload); + cmdq_processed[0] += e->Cmdq0CreditReturn; + cmdq_processed[1] += e->Cmdq1CreditReturn; + + e++; + if (unlikely(++q->cidx == q->size)) { + q->cidx = 0; + q->genbit ^= 1; + e = q->entries; } - } - - if (flags & F_CMDQ0_ENABLE) { - struct cmdQ *cmdQ = &sge->cmdQ[0]; + prefetch(e); - atomic_set(&cmdQ->asleep, 1); - if (atomic_read(&cmdQ->pio_pidx) != cmdQ->pidx) { - doorbell_pio(sge, F_CMDQ0_ENABLE); - atomic_set(&cmdQ->pio_pidx, cmdQ->pidx); + if (++q->credits > SGE_RESPQ_REPLENISH_THRES) { + writel(q->credits, adapter->regs + A_SG_RSPQUEUECREDIT); + q->credits = 0; } - } - if (unlikely(flags & (F_FL0_ENABLE | F_FL1_ENABLE))) - freelQs_empty(sge); + sge->stats.pure_rsps++; + } while (e->GenerationBit == q->genbit && !e->DataValid); - netdev = adapter->port[0].dev; - if (unlikely(netif_queue_stopped(netdev) && netif_carrier_ok(netdev) && - enough_free_Tx_descs(sge, 0) && - enough_free_Tx_descs(sge, 1))) { - netif_wake_queue(netdev); - } - if (unlikely(!ret)) - ret = t1_slow_intr_handler(adapter); + flags = update_tx_info(adapter, flags, cmdq_processed[0]); + sge->cmdQ[1].processed += cmdq_processed[1]; - return IRQ_RETVAL(ret != 0); + return e->GenerationBit == q->genbit; } /* - * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. - * - * The code figures out how many entries the sk_buff will require in the - * cmdQ and updates the cmdQ data structure with the state once the enqueue - * has complete. Then, it doesn't access the global structure anymore, but - * uses the corresponding fields on the stack. In conjuction with a spinlock - * around that code, we can make the function reentrant without holding the - * lock when we actually enqueue (which might be expensive, especially on - * architectures with IO MMUs). + * Handler for new data events when using NAPI. This does not need any locking + * or protection from interrupts as data interrupts are off at this point and + * other adapter interrupts do not interfere. */ -static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, - unsigned int qid) +static int t1_poll(struct net_device *dev, int *budget) { - struct sge *sge = adapter->sge; - struct cmdQ *Q = &sge->cmdQ[qid]; - struct cmdQ_e *e; - struct cmdQ_ce *ce; - dma_addr_t mapping; - unsigned int credits, pidx, genbit; + struct adapter *adapter = dev->priv; + int effective_budget = min(*budget, dev->quota); + + int work_done = process_responses(adapter, effective_budget); + *budget -= work_done; + dev->quota -= work_done; - unsigned int count = 1 + skb_shinfo(skb)->nr_frags; + if (work_done >= effective_budget) + return 1; + + __netif_rx_complete(dev); /* - * Coming from the timer + * Because we don't atomically flush the following write it is + * possible that in very rare cases it can reach the device in a way + * that races with a new response being written plus an error interrupt + * causing the NAPI interrupt handler below to return unhandled status + * to the OS. To protect against this would require flushing the write + * and doing both the write and the flush with interrupts off. Way too + * expensive and unjustifiable given the rarity of the race. */ - if ((skb == sge->pskb)) { - /* - * Quit if any cmdQ activities - */ - if (!spin_trylock(&Q->Qlock)) - return 0; - if (atomic_read(&Q->credits) != Q->entries_n) { - spin_unlock(&Q->Qlock); - return 0; - } - } - else - spin_lock(&Q->Qlock); - - genbit = Q->genbit; - pidx = Q->pidx; - credits = atomic_read(&Q->credits); - - credits -= count; - atomic_sub(count, &Q->credits); - Q->pidx += count; - if (Q->pidx >= Q->entries_n) { - Q->pidx -= Q->entries_n; - Q->genbit ^= 1; - } + writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING); + return 0; +} - if (unlikely(credits < (MAX_SKB_FRAGS + 1))) { - sge->intr_cnt.cmdQ_full[qid]++; - netif_stop_queue(adapter->port[0].dev); - } - spin_unlock(&Q->Qlock); +/* + * Returns true if the device is already scheduled for polling. + */ +static inline int napi_is_scheduled(struct net_device *dev) +{ + return test_bit(__LINK_STATE_RX_SCHED, &dev->state); +} - mapping = pci_map_single(adapter->pdev, skb->data, - skb->len - skb->data_len, PCI_DMA_TODEVICE); - ce = &Q->centries[pidx]; - ce->skb = NULL; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, skb->len - skb->data_len); - ce->single = 1; +/* + * NAPI version of the main interrupt handler. + */ +static irqreturn_t t1_interrupt_napi(int irq, void *data, struct pt_regs *regs) +{ + int handled; + struct adapter *adapter = data; + struct sge *sge = adapter->sge; + struct respQ *q = &adapter->sge->respQ; - e = &Q->entries[pidx]; - e->Sop = 1; - e->DataValid = 1; - e->BufferLength = skb->len - skb->data_len; - e->AddrHigh = (u64)mapping >> 32; - e->AddrLow = (u32)mapping; + /* + * Clear the SGE_DATA interrupt first thing. Normally the NAPI + * handler has control of the response queue and the interrupt handler + * can look at the queue reliably only once it knows NAPI is off. + * We can't wait that long to clear the SGE_DATA interrupt because we + * could race with t1_poll rearming the SGE interrupt, so we need to + * clear the interrupt speculatively and really early on. + */ + writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); + + spin_lock(&adapter->async_lock); + if (!napi_is_scheduled(sge->netdev)) { + struct respQ_e *e = &q->entries[q->cidx]; + + if (e->GenerationBit == q->genbit) { + if (e->DataValid || + process_pure_responses(adapter, e)) { + if (likely(napi_schedule_prep(sge->netdev))) + __netif_rx_schedule(sge->netdev); + else + printk(KERN_CRIT + "NAPI schedule failure!\n"); + } else + writel(q->cidx, adapter->regs + A_SG_SLEEPING); + handled = 1; + goto unlock; + } else + writel(q->cidx, adapter->regs + A_SG_SLEEPING); + } else + if (readl(adapter->regs + A_PL_CAUSE) & F_PL_INTR_SGE_DATA) + printk(KERN_ERR "data interrupt while NAPI running\n"); + + handled = t1_slow_intr_handler(adapter); + if (!handled) + sge->stats.unhandled_irqs++; + unlock: + spin_unlock(&adapter->async_lock); + return IRQ_RETVAL(handled != 0); +} - if (--count > 0) { - unsigned int i; +/* + * Main interrupt handler, optimized assuming that we took a 'DATA' + * interrupt. + * + * 1. Clear the interrupt + * 2. Loop while we find valid descriptors and process them; accumulate + * information that can be processed after the loop + * 3. Tell the SGE at which index we stopped processing descriptors + * 4. Bookkeeping; free TX buffers, ring doorbell if there are any + * outstanding TX buffers waiting, replenish RX buffers, potentially + * reenable upper layers if they were turned off due to lack of TX + * resources which are available again. + * 5. If we took an interrupt, but no valid respQ descriptors was found we + * let the slow_intr_handler run and do error handling. + */ +static irqreturn_t t1_interrupt(int irq, void *cookie, struct pt_regs *regs) +{ + int work_done; + struct respQ_e *e; + struct adapter *adapter = cookie; + struct respQ *Q = &adapter->sge->respQ; - e->Eop = 0; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; + spin_lock(&adapter->async_lock); + e = &Q->entries[Q->cidx]; + prefetch(e); - for (i = 0; i < count; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE); - ce++; e++; - if (++pidx == Q->entries_n) { - pidx = 0; - genbit ^= 1; - ce = Q->centries; - e = Q->entries; - } + if (likely(e->GenerationBit == Q->genbit)) + work_done = process_responses(adapter, -1); + else + work_done = t1_slow_intr_handler(adapter); - mapping = pci_map_page(adapter->pdev, frag->page, - frag->page_offset, - frag->size, - PCI_DMA_TODEVICE); - ce->skb = NULL; - pci_unmap_addr_set(ce, dma_addr, mapping); - pci_unmap_len_set(ce, dma_len, frag->size); - ce->single = 0; - - e->Sop = 0; - e->DataValid = 1; - e->BufferLength = frag->size; - e->AddrHigh = (u64)mapping >> 32; - e->AddrLow = (u32)mapping; - - if (i < count - 1) { - e->Eop = 0; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; - } + /* + * The unconditional clearing of the PL_CAUSE above may have raced + * with DMA completion and the corresponding generation of a response + * to cause us to miss the resulting data interrupt. The next write + * is also unconditional to recover the missed interrupt and render + * this race harmless. + */ + writel(Q->cidx, adapter->regs + A_SG_SLEEPING); + + if (!work_done) + adapter->sge->stats.unhandled_irqs++; + spin_unlock(&adapter->async_lock); + return IRQ_RETVAL(work_done != 0); +} + +intr_handler_t t1_select_intr_handler(adapter_t *adapter) +{ + return adapter->params.sge.polling ? t1_interrupt_napi : t1_interrupt; +} + +/* + * Enqueues the sk_buff onto the cmdQ[qid] and has hardware fetch it. + * + * The code figures out how many entries the sk_buff will require in the + * cmdQ and updates the cmdQ data structure with the state once the enqueue + * has complete. Then, it doesn't access the global structure anymore, but + * uses the corresponding fields on the stack. In conjuction with a spinlock + * around that code, we can make the function reentrant without holding the + * lock when we actually enqueue (which might be expensive, especially on + * architectures with IO MMUs). + * + * This runs with softirqs disabled. + */ +unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid, struct net_device *dev) +{ + struct sge *sge = adapter->sge; + struct cmdQ *q = &sge->cmdQ[qid]; + unsigned int credits, pidx, genbit, count; + + spin_lock(&q->lock); + reclaim_completed_tx(sge, q); + + pidx = q->pidx; + credits = q->size - q->in_use; + count = 1 + skb_shinfo(skb)->nr_frags; + + { /* Ethernet packet */ + if (unlikely(credits < count)) { + netif_stop_queue(dev); + set_bit(dev->if_port, &sge->stopped_tx_queues); + sge->stats.cmdQ_full[3]++; + spin_unlock(&q->lock); + CH_ERR("%s: Tx ring full while queue awake!\n", + adapter->name); + return 1; } + if (unlikely(credits - count < q->stop_thres)) { + sge->stats.cmdQ_full[3]++; + netif_stop_queue(dev); + set_bit(dev->if_port, &sge->stopped_tx_queues); + } + } + q->in_use += count; + genbit = q->genbit; + q->pidx += count; + if (q->pidx >= q->size) { + q->pidx -= q->size; + q->genbit ^= 1; } + spin_unlock(&q->lock); - if (skb != sge->pskb) - ce->skb = skb; - e->Eop = 1; - wmb(); - e->GenerationBit = e->GenerationBit2 = genbit; + write_tx_descs(adapter, skb, pidx, genbit, q); /* * We always ring the doorbell for cmdQ1. For cmdQ0, we only ring @@ -1317,50 +1380,50 @@ static unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, * then the interrupt handler will detect the outstanding TX packet * and ring the doorbell for us. */ - if (qid) { - doorbell_pio(sge, F_CMDQ1_ENABLE); - } else if (atomic_read(&Q->asleep)) { - atomic_set(&Q->asleep, 0); - doorbell_pio(sge, F_CMDQ0_ENABLE); - atomic_set(&Q->pio_pidx, Q->pidx); + if (qid) + doorbell_pio(adapter, F_CMDQ1_ENABLE); + else { + clear_bit(CMDQ_STAT_LAST_PKT_DB, &q->status); + if (test_and_set_bit(CMDQ_STAT_RUNNING, &q->status) == 0) { + set_bit(CMDQ_STAT_LAST_PKT_DB, &q->status); + writel(F_CMDQ0_ENABLE, adapter->regs + A_SG_DOORBELL); + } } return 0; } #define MK_ETH_TYPE_MSS(type, mss) (((mss) & 0x3FFF) | ((type) << 14)) +/* + * eth_hdr_len - return the length of an Ethernet header + * @data: pointer to the start of the Ethernet header + * + * Returns the length of an Ethernet header, including optional VLAN tag. + */ +static inline int eth_hdr_len(const void *data) +{ + const struct ethhdr *e = data; + + return e->h_proto == htons(ETH_P_8021Q) ? VLAN_ETH_HLEN : ETH_HLEN; +} + /* * Adds the CPL header to the sk_buff and passes it to t1_sge_tx. */ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct adapter *adapter = dev->priv; + struct sge_port_stats *st = &adapter->sge->port_stats[dev->if_port]; + struct sge *sge = adapter->sge; struct cpl_tx_pkt *cpl; - struct ethhdr *eth; - size_t max_len; - - /* - * We are using a non-standard hard_header_len and some kernel - * components, such as pktgen, do not handle it right. Complain - * when this happens but try to fix things up. - */ - if (unlikely(skb_headroom(skb) < dev->hard_header_len - ETH_HLEN)) { - struct sk_buff *orig_skb = skb; - - if (net_ratelimit()) - printk(KERN_ERR - "%s: Tx packet has inadequate headroom\n", - dev->name); - skb = skb_realloc_headroom(skb, sizeof(struct cpl_tx_pkt_lso)); - dev_kfree_skb_any(orig_skb); - if (!skb) - return -ENOMEM; - } +#ifdef NETIF_F_TSO if (skb_shinfo(skb)->tso_size) { int eth_type; struct cpl_tx_pkt_lso *hdr; + st->tso++; + eth_type = skb->nh.raw - skb->data == ETH_HLEN ? CPL_ETH_II : CPL_ETH_II_VLAN; @@ -1373,40 +1436,72 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_shinfo(skb)->tso_size)); hdr->len = htonl(skb->len - sizeof(*hdr)); cpl = (struct cpl_tx_pkt *)hdr; + sge->stats.tx_lso_pkts++; } else +#endif { /* - * An Ethernet packet must have at least space for - * the DIX Ethernet header and be no greater than - * the device set MTU. Otherwise trash the packet. + * Packets shorter than ETH_HLEN can break the MAC, drop them + * early. Also, we may get oversized packets because some + * parts of the kernel don't handle our unusual hard_header_len + * right, drop those too. */ - if (skb->len < ETH_HLEN) - goto t1_start_xmit_fail2; - eth = (struct ethhdr *)skb->data; - if (eth->h_proto == htons(ETH_P_8021Q)) - max_len = dev->mtu + VLAN_ETH_HLEN; - else - max_len = dev->mtu + ETH_HLEN; - if (skb->len > max_len) - goto t1_start_xmit_fail2; + if (unlikely(skb->len < ETH_HLEN || + skb->len > dev->mtu + eth_hdr_len(skb->data))) { + dev_kfree_skb_any(skb); + return NET_XMIT_SUCCESS; + } + + /* + * We are using a non-standard hard_header_len and some kernel + * components, such as pktgen, do not handle it right. + * Complain when this happens but try to fix things up. + */ + if (unlikely(skb_headroom(skb) < + dev->hard_header_len - ETH_HLEN)) { + struct sk_buff *orig_skb = skb; + + if (net_ratelimit()) + printk(KERN_ERR "%s: inadequate headroom in " + "Tx packet\n", dev->name); + skb = skb_realloc_headroom(skb, sizeof(*cpl)); + dev_kfree_skb_any(orig_skb); + if (!skb) + return -ENOMEM; + } if (!(adapter->flags & UDP_CSUM_CAPABLE) && skb->ip_summed == CHECKSUM_HW && - skb->nh.iph->protocol == IPPROTO_UDP && - skb_checksum_help(skb, 0)) - goto t1_start_xmit_fail3; - + skb->nh.iph->protocol == IPPROTO_UDP) + if (unlikely(skb_checksum_help(skb, 0))) { + dev_kfree_skb_any(skb); + return -ENOMEM; + } - if (!adapter->sge->pskb) { + /* Hmmm, assuming to catch the gratious arp... and we'll use + * it to flush out stuck espi packets... + */ + if (unlikely(!adapter->sge->espibug_skb)) { if (skb->protocol == htons(ETH_P_ARP) && - skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) - adapter->sge->pskb = skb; + skb->nh.arph->ar_op == htons(ARPOP_REQUEST)) { + adapter->sge->espibug_skb = skb; + /* We want to re-use this skb later. We + * simply bump the reference count and it + * will not be freed... + */ + skb = skb_get(skb); + } } - cpl = (struct cpl_tx_pkt *)skb_push(skb, sizeof(*cpl)); + + cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl)); cpl->opcode = CPL_TX_PKT; cpl->ip_csum_dis = 1; /* SW calculates IP csum */ cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; /* the length field isn't used so don't bother setting it */ + + st->tx_cso += (skb->ip_summed == CHECKSUM_HW); + sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW); + sge->stats.tx_reg_pkts++; } cpl->iff = dev->if_port; @@ -1414,38 +1509,176 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) if (adapter->vlan_grp && vlan_tx_tag_present(skb)) { cpl->vlan_valid = 1; cpl->vlan = htons(vlan_tx_tag_get(skb)); + st->vlan_insert++; } else #endif cpl->vlan_valid = 0; dev->trans_start = jiffies; - return t1_sge_tx(skb, adapter, 0); + return t1_sge_tx(skb, adapter, 0, dev); +} -t1_start_xmit_fail3: - printk(KERN_INFO "%s: Unable to complete checksum\n", dev->name); - goto t1_start_xmit_fail1; +/* + * Callback for the Tx buffer reclaim timer. Runs with softirqs disabled. + */ +static void sge_tx_reclaim_cb(unsigned long data) +{ + int i; + struct sge *sge = (struct sge *)data; + + for (i = 0; i < SGE_CMDQ_N; ++i) { + struct cmdQ *q = &sge->cmdQ[i]; + + if (!spin_trylock(&q->lock)) + continue; -t1_start_xmit_fail2: - printk(KERN_INFO "%s: Invalid packet length %d, dropping\n", - dev->name, skb->len); + reclaim_completed_tx(sge, q); + if (i == 0 && q->in_use) /* flush pending credits */ + writel(F_CMDQ0_ENABLE, + sge->adapter->regs + A_SG_DOORBELL); -t1_start_xmit_fail1: - dev_kfree_skb_any(skb); + spin_unlock(&q->lock); + } + mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); +} + +/* + * Propagate changes of the SGE coalescing parameters to the HW. + */ +int t1_sge_set_coalesce_params(struct sge *sge, struct sge_params *p) +{ + sge->netdev->poll = t1_poll; + sge->fixed_intrtimer = p->rx_coalesce_usecs * + core_ticks_per_usec(sge->adapter); + writel(sge->fixed_intrtimer, sge->adapter->regs + A_SG_INTRTIMER); return 0; } -void t1_sge_set_ptimeout(adapter_t *adapter, u32 val) +/* + * Allocates both RX and TX resources and configures the SGE. However, + * the hardware is not enabled yet. + */ +int t1_sge_configure(struct sge *sge, struct sge_params *p) { - struct sge *sge = adapter->sge; + if (alloc_rx_resources(sge, p)) + return -ENOMEM; + if (alloc_tx_resources(sge, p)) { + free_rx_resources(sge); + return -ENOMEM; + } + configure_sge(sge, p); + + /* + * Now that we have sized the free lists calculate the payload + * capacity of the large buffers. Other parts of the driver use + * this to set the max offload coalescing size so that RX packets + * do not overflow our large buffers. + */ + p->large_buf_capacity = jumbo_payload_capacity(sge); + return 0; +} - if (is_T2(adapter)) - sge->ptimeout = max((u32)((HZ * val) / 1000), (u32)1); +/* + * Disables the DMA engine. + */ +void t1_sge_stop(struct sge *sge) +{ + writel(0, sge->adapter->regs + A_SG_CONTROL); + (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ + if (is_T2(sge->adapter)) + del_timer_sync(&sge->espibug_timer); + del_timer_sync(&sge->tx_reclaim_timer); } -u32 t1_sge_get_ptimeout(adapter_t *adapter) +/* + * Enables the DMA engine. + */ +void t1_sge_start(struct sge *sge) { + refill_free_list(sge, &sge->freelQ[0]); + refill_free_list(sge, &sge->freelQ[1]); + + writel(sge->sge_control, sge->adapter->regs + A_SG_CONTROL); + doorbell_pio(sge->adapter, F_FL0_ENABLE | F_FL1_ENABLE); + (void) readl(sge->adapter->regs + A_SG_CONTROL); /* flush */ + + mod_timer(&sge->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); + + if (is_T2(sge->adapter)) + mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); +} + +/* + * Callback for the T2 ESPI 'stuck packet feature' workaorund + */ +static void espibug_workaround(void *data) +{ + struct adapter *adapter = (struct adapter *)data; struct sge *sge = adapter->sge; - return (is_T2(adapter) ? ((sge->ptimeout * 1000) / HZ) : 0); + if (netif_running(adapter->port[0].dev)) { + struct sk_buff *skb = sge->espibug_skb; + + u32 seop = t1_espi_get_mon(adapter, 0x930, 0); + + if ((seop & 0xfff0fff) == 0xfff && skb) { + if (!skb->cb[0]) { + u8 ch_mac_addr[ETH_ALEN] = + {0x0, 0x7, 0x43, 0x0, 0x0, 0x0}; + memcpy(skb->data + sizeof(struct cpl_tx_pkt), + ch_mac_addr, ETH_ALEN); + memcpy(skb->data + skb->len - 10, ch_mac_addr, + ETH_ALEN); + skb->cb[0] = 0xff; + } + + /* bump the reference count to avoid freeing of the + * skb once the DMA has completed. + */ + skb = skb_get(skb); + t1_sge_tx(skb, adapter, 0, adapter->port[0].dev); + } + } + mod_timer(&sge->espibug_timer, jiffies + sge->espibug_timeout); } +/* + * Creates a t1_sge structure and returns suggested resource parameters. + */ +struct sge * __devinit t1_sge_create(struct adapter *adapter, + struct sge_params *p) +{ + struct sge *sge = kmalloc(sizeof(*sge), GFP_KERNEL); + + if (!sge) + return NULL; + memset(sge, 0, sizeof(*sge)); + + sge->adapter = adapter; + sge->netdev = adapter->port[0].dev; + sge->rx_pkt_pad = t1_is_T1B(adapter) ? 0 : 2; + sge->jumbo_fl = t1_is_T1B(adapter) ? 1 : 0; + + init_timer(&sge->tx_reclaim_timer); + sge->tx_reclaim_timer.data = (unsigned long)sge; + sge->tx_reclaim_timer.function = sge_tx_reclaim_cb; + + if (is_T2(sge->adapter)) { + init_timer(&sge->espibug_timer); + sge->espibug_timer.function = (void *)&espibug_workaround; + sge->espibug_timer.data = (unsigned long)sge->adapter; + sge->espibug_timeout = 1; + } + + + p->cmdQ_size[0] = SGE_CMDQ0_E_N; + p->cmdQ_size[1] = SGE_CMDQ1_E_N; + p->freelQ_size[!sge->jumbo_fl] = SGE_FREEL_SIZE; + p->freelQ_size[sge->jumbo_fl] = SGE_JUMBO_FREEL_SIZE; + p->rx_coalesce_usecs = 50; + p->coalesce_enable = 0; + p->sample_interval_usecs = 0; + p->polling = 0; + + return sge; +} diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h index 140f896def60..434b25586851 100644 --- a/drivers/net/chelsio/sge.h +++ b/drivers/net/chelsio/sge.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: sge.h * - * $Revision: 1.7 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.11 $ * + * $Date: 2005/06/21 22:10:55 $ * * Description: * * part of the Chelsio 10Gb Ethernet Driver. * * * @@ -36,25 +36,50 @@ * * ****************************************************************************/ -#ifndef _CHELSIO_LINUX_SGE_H_ -#define _CHELSIO_LINUX_SGE_H_ +#ifndef _CXGB_SGE_H_ +#define _CXGB_SGE_H_ #include #include #include +#ifndef IRQ_RETVAL +#define IRQ_RETVAL(x) +typedef void irqreturn_t; +#endif + +typedef irqreturn_t (*intr_handler_t)(int, void *, struct pt_regs *); + struct sge_intr_counts { unsigned int respQ_empty; /* # times respQ empty */ unsigned int respQ_overflow; /* # respQ overflow (fatal) */ unsigned int freelistQ_empty; /* # times freelist empty */ unsigned int pkt_too_big; /* packet too large (fatal) */ unsigned int pkt_mismatch; - unsigned int cmdQ_full[2]; /* not HW interrupt, host cmdQ[] full */ + unsigned int cmdQ_full[3]; /* not HW IRQ, host cmdQ[] full */ + unsigned int cmdQ_restarted[3];/* # of times cmdQ X was restarted */ + unsigned int ethernet_pkts; /* # of Ethernet packets received */ + unsigned int offload_pkts; /* # of offload packets received */ + unsigned int offload_bundles; /* # of offload pkt bundles delivered */ + unsigned int pure_rsps; /* # of non-payload responses */ + unsigned int unhandled_irqs; /* # of unhandled interrupts */ + unsigned int tx_ipfrags; + unsigned int tx_reg_pkts; + unsigned int tx_lso_pkts; + unsigned int tx_do_cksum; +}; + +struct sge_port_stats { + unsigned long rx_cso_good; /* # of successful RX csum offloads */ + unsigned long tx_cso; /* # of TX checksum offloads */ + unsigned long vlan_xtract; /* # of VLAN tag extractions */ + unsigned long vlan_insert; /* # of VLAN tag extractions */ + unsigned long tso; /* # of TSO requests */ + unsigned long rx_drops; /* # of packets dropped due to no mem */ }; struct sk_buff; struct net_device; -struct cxgbdev; struct adapter; struct sge_params; struct sge; @@ -63,7 +88,9 @@ struct sge *t1_sge_create(struct adapter *, struct sge_params *); int t1_sge_configure(struct sge *, struct sge_params *); int t1_sge_set_coalesce_params(struct sge *, struct sge_params *); void t1_sge_destroy(struct sge *); -irqreturn_t t1_interrupt(int, void *, struct pt_regs *); +intr_handler_t t1_select_intr_handler(adapter_t *adapter); +unsigned int t1_sge_tx(struct sk_buff *skb, struct adapter *adapter, + unsigned int qid, struct net_device *netdev); int t1_start_xmit(struct sk_buff *skb, struct net_device *dev); void t1_set_vlan_accel(struct adapter *adapter, int on_off); void t1_sge_start(struct sge *); @@ -72,8 +99,7 @@ int t1_sge_intr_error_handler(struct sge *); void t1_sge_intr_enable(struct sge *); void t1_sge_intr_disable(struct sge *); void t1_sge_intr_clear(struct sge *); +const struct sge_intr_counts *t1_sge_get_intr_counts(struct sge *sge); +const struct sge_port_stats *t1_sge_get_port_stats(struct sge *sge, int port); -void t1_sge_set_ptimeout(adapter_t *adapter, u32 val); -u32 t1_sge_get_ptimeout(adapter_t *adapter); - -#endif /* _CHELSIO_LINUX_SGE_H_ */ +#endif /* _CXGB_SGE_H_ */ diff --git a/drivers/net/chelsio/subr.c b/drivers/net/chelsio/subr.c index a90a3f95fcac..1ebb5d149aef 100644 --- a/drivers/net/chelsio/subr.c +++ b/drivers/net/chelsio/subr.c @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: subr.c * - * $Revision: 1.12 $ * - * $Date: 2005/03/23 07:41:27 $ * + * $Revision: 1.27 $ * + * $Date: 2005/06/22 01:08:36 $ * * Description: * * Various subroutines (intr,pio,etc.) used by Chelsio 10G Ethernet driver. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -40,11 +40,9 @@ #include "common.h" #include "elmer0.h" #include "regs.h" - #include "gmac.h" #include "cphy.h" #include "sge.h" -#include "tp.h" #include "espi.h" /** @@ -64,7 +62,7 @@ static int t1_wait_op_done(adapter_t *adapter, int reg, u32 mask, int polarity, int attempts, int delay) { while (1) { - u32 val = t1_read_reg_4(adapter, reg) & mask; + u32 val = readl(adapter->regs + reg) & mask; if (!!val == polarity) return 0; @@ -84,9 +82,9 @@ static int __t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) { int tpi_busy; - t1_write_reg_4(adapter, A_TPI_ADDR, addr); - t1_write_reg_4(adapter, A_TPI_WR_DATA, value); - t1_write_reg_4(adapter, A_TPI_CSR, F_TPIWR); + writel(addr, adapter->regs + A_TPI_ADDR); + writel(value, adapter->regs + A_TPI_WR_DATA); + writel(F_TPIWR, adapter->regs + A_TPI_CSR); tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, TPI_ATTEMPTS, 3); @@ -100,9 +98,9 @@ int t1_tpi_write(adapter_t *adapter, u32 addr, u32 value) { int ret; - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); ret = __t1_tpi_write(adapter, addr, value); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return ret; } @@ -113,8 +111,8 @@ static int __t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) { int tpi_busy; - t1_write_reg_4(adapter, A_TPI_ADDR, addr); - t1_write_reg_4(adapter, A_TPI_CSR, 0); + writel(addr, adapter->regs + A_TPI_ADDR); + writel(0, adapter->regs + A_TPI_CSR); tpi_busy = t1_wait_op_done(adapter, A_TPI_CSR, F_TPIRDY, 1, TPI_ATTEMPTS, 3); @@ -122,7 +120,7 @@ static int __t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) CH_ALERT("%s: TPI read from 0x%x failed\n", adapter->name, addr); else - *valp = t1_read_reg_4(adapter, A_TPI_RD_DATA); + *valp = readl(adapter->regs + A_TPI_RD_DATA); return tpi_busy; } @@ -130,20 +128,12 @@ int t1_tpi_read(adapter_t *adapter, u32 addr, u32 *valp) { int ret; - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); ret = __t1_tpi_read(adapter, addr, valp); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return ret; } -/* - * Set a TPI parameter. - */ -static void t1_tpi_par(adapter_t *adapter, u32 value) -{ - t1_write_reg_4(adapter, A_TPI_PAR, V_TPIPAR(value)); -} - /* * Called when a port's link settings change to propagate the new values to the * associated PHY and MAC. After performing the common tasks it invokes an @@ -227,7 +217,7 @@ static int mi1_mdio_ext_read(adapter_t *adapter, int phy_addr, int mmd_addr, { u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); /* Write the address we want. */ __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); @@ -242,7 +232,7 @@ static int mi1_mdio_ext_read(adapter_t *adapter, int phy_addr, int mmd_addr, /* Read the data. */ __t1_tpi_read(adapter, A_ELMER0_PORT0_MI1_DATA, valp); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return 0; } @@ -251,7 +241,7 @@ static int mi1_mdio_ext_write(adapter_t *adapter, int phy_addr, int mmd_addr, { u32 addr = V_MI1_REG_ADDR(mmd_addr) | V_MI1_PHY_ADDR(phy_addr); - TPI_LOCK(adapter); + spin_lock(&(adapter)->tpi_lock); /* Write the address we want. */ __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_ADDR, addr); @@ -264,7 +254,7 @@ static int mi1_mdio_ext_write(adapter_t *adapter, int phy_addr, int mmd_addr, __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_DATA, val); __t1_tpi_write(adapter, A_ELMER0_PORT0_MI1_OP, MI1_OP_INDIRECT_WRITE); mi1_wait_until_ready(adapter, A_ELMER0_PORT0_MI1_OP); - TPI_UNLOCK(adapter); + spin_unlock(&(adapter)->tpi_lock); return 0; } @@ -277,7 +267,6 @@ static struct mdio_ops mi1_mdio_ext_ops = { enum { CH_BRD_N110_1F, CH_BRD_N210_1F, - CH_BRD_T210_1F, }; static struct board_info t1_board[] = { @@ -308,13 +297,15 @@ struct pci_device_id t1_pci_tbl[] = { { 0, } }; +MODULE_DEVICE_TABLE(pci, t1_pci_tbl); + /* * Return the board_info structure with a given index. Out-of-range indices * return NULL. */ const struct board_info *t1_get_board_info(unsigned int board_id) { - return board_id < DIMOF(t1_board) ? &t1_board[board_id] : NULL; + return board_id < ARRAY_SIZE(t1_board) ? &t1_board[board_id] : NULL; } struct chelsio_vpd_t { @@ -436,7 +427,6 @@ int elmer0_ext_intr_handler(adapter_t *adapter) t1_tpi_read(adapter, A_ELMER0_INT_CAUSE, &cause); switch (board_info(adapter)->board) { - case CHBT_BOARD_CHT210: case CHBT_BOARD_N210: case CHBT_BOARD_N110: if (cause & ELMER0_GP_BIT6) { /* Marvell 88x2010 interrupt */ @@ -446,23 +436,6 @@ int elmer0_ext_intr_handler(adapter_t *adapter) link_changed(adapter, 0); } break; - case CHBT_BOARD_8000: - case CHBT_BOARD_CHT110: - CH_DBG(adapter, INTR, "External interrupt cause 0x%x\n", - cause); - if (cause & ELMER0_GP_BIT1) { /* PMC3393 INTB */ - struct cmac *mac = adapter->port[0].mac; - - mac->ops->interrupt_handler(mac); - } - if (cause & ELMER0_GP_BIT5) { /* XPAK MOD_DETECT */ - u32 mod_detect; - - t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); - CH_MSG(adapter, INFO, LINK, "XPAK %s\n", - mod_detect ? "removed" : "inserted"); - } - break; } t1_tpi_write(adapter, A_ELMER0_INT_CAUSE, cause); return 0; @@ -472,11 +445,11 @@ int elmer0_ext_intr_handler(adapter_t *adapter) void t1_interrupts_enable(adapter_t *adapter) { unsigned int i; + u32 pl_intr; - adapter->slow_intr_mask = F_PL_INTR_SGE_ERR | F_PL_INTR_TP; + adapter->slow_intr_mask = F_PL_INTR_SGE_ERR; t1_sge_intr_enable(adapter->sge); - t1_tp_intr_enable(adapter->tp); if (adapter->espi) { adapter->slow_intr_mask |= F_PL_INTR_ESPI; t1_espi_intr_enable(adapter->espi); @@ -489,17 +462,15 @@ void t1_interrupts_enable(adapter_t *adapter) } /* Enable PCIX & external chip interrupts on ASIC boards. */ - if (t1_is_asic(adapter)) { - u32 pl_intr = t1_read_reg_4(adapter, A_PL_ENABLE); + pl_intr = readl(adapter->regs + A_PL_ENABLE); - /* PCI-X interrupts */ - pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, - 0xffffffff); + /* PCI-X interrupts */ + pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, + 0xffffffff); - adapter->slow_intr_mask |= F_PL_INTR_EXT | F_PL_INTR_PCIX; - pl_intr |= F_PL_INTR_EXT | F_PL_INTR_PCIX; - t1_write_reg_4(adapter, A_PL_ENABLE, pl_intr); - } + adapter->slow_intr_mask |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + pl_intr |= F_PL_INTR_EXT | F_PL_INTR_PCIX; + writel(pl_intr, adapter->regs + A_PL_ENABLE); } /* Disables all interrupts. */ @@ -508,7 +479,6 @@ void t1_interrupts_disable(adapter_t* adapter) unsigned int i; t1_sge_intr_disable(adapter->sge); - t1_tp_intr_disable(adapter->tp); if (adapter->espi) t1_espi_intr_disable(adapter->espi); @@ -519,8 +489,7 @@ void t1_interrupts_disable(adapter_t* adapter) } /* Disable PCIX & external chip interrupts. */ - if (t1_is_asic(adapter)) - t1_write_reg_4(adapter, A_PL_ENABLE, 0); + writel(0, adapter->regs + A_PL_ENABLE); /* PCI-X interrupts */ pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_ENABLE, 0); @@ -532,9 +501,10 @@ void t1_interrupts_disable(adapter_t* adapter) void t1_interrupts_clear(adapter_t* adapter) { unsigned int i; + u32 pl_intr; + t1_sge_intr_clear(adapter->sge); - t1_tp_intr_clear(adapter->tp); if (adapter->espi) t1_espi_intr_clear(adapter->espi); @@ -545,12 +515,10 @@ void t1_interrupts_clear(adapter_t* adapter) } /* Enable interrupts for external devices. */ - if (t1_is_asic(adapter)) { - u32 pl_intr = t1_read_reg_4(adapter, A_PL_CAUSE); + pl_intr = readl(adapter->regs + A_PL_CAUSE); - t1_write_reg_4(adapter, A_PL_CAUSE, - pl_intr | F_PL_INTR_EXT | F_PL_INTR_PCIX); - } + writel(pl_intr | F_PL_INTR_EXT | F_PL_INTR_PCIX, + adapter->regs + A_PL_CAUSE); /* PCI-X interrupts */ pci_write_config_dword(adapter->pdev, A_PCICFG_INTR_CAUSE, 0xffffffff); @@ -559,17 +527,15 @@ void t1_interrupts_clear(adapter_t* adapter) /* * Slow path interrupt handler for ASICs. */ -static int asic_slow_intr(adapter_t *adapter) +int t1_slow_intr_handler(adapter_t *adapter) { - u32 cause = t1_read_reg_4(adapter, A_PL_CAUSE); + u32 cause = readl(adapter->regs + A_PL_CAUSE); cause &= adapter->slow_intr_mask; if (!cause) return 0; if (cause & F_PL_INTR_SGE_ERR) t1_sge_intr_error_handler(adapter->sge); - if (cause & F_PL_INTR_TP) - t1_tp_intr_handler(adapter->tp); if (cause & F_PL_INTR_ESPI) t1_espi_intr_handler(adapter->espi); if (cause & F_PL_INTR_PCIX) @@ -578,41 +544,82 @@ static int asic_slow_intr(adapter_t *adapter) t1_elmer0_ext_intr(adapter); /* Clear the interrupts just processed. */ - t1_write_reg_4(adapter, A_PL_CAUSE, cause); - (void)t1_read_reg_4(adapter, A_PL_CAUSE); /* flush writes */ + writel(cause, adapter->regs + A_PL_CAUSE); + (void)readl(adapter->regs + A_PL_CAUSE); /* flush writes */ return 1; } -int t1_slow_intr_handler(adapter_t *adapter) +/* Pause deadlock avoidance parameters */ +#define DROP_MSEC 16 +#define DROP_PKTS_CNT 1 + +static void set_csum_offload(adapter_t *adapter, u32 csum_bit, int enable) +{ + u32 val = readl(adapter->regs + A_TP_GLOBAL_CONFIG); + + if (enable) + val |= csum_bit; + else + val &= ~csum_bit; + writel(val, adapter->regs + A_TP_GLOBAL_CONFIG); +} + +void t1_tp_set_ip_checksum_offload(adapter_t *adapter, int enable) { - return asic_slow_intr(adapter); + set_csum_offload(adapter, F_IP_CSUM, enable); } -/* Power sequencing is a work-around for Intel's XPAKs. */ -static void power_sequence_xpak(adapter_t* adapter) +void t1_tp_set_udp_checksum_offload(adapter_t *adapter, int enable) { - u32 mod_detect; - u32 gpo; + set_csum_offload(adapter, F_UDP_CSUM, enable); +} - /* Check for XPAK */ - t1_tpi_read(adapter, A_ELMER0_GPI_STAT, &mod_detect); - if (!(ELMER0_GP_BIT5 & mod_detect)) { - /* XPAK is present */ - t1_tpi_read(adapter, A_ELMER0_GPO, &gpo); - gpo |= ELMER0_GP_BIT18; - t1_tpi_write(adapter, A_ELMER0_GPO, gpo); +void t1_tp_set_tcp_checksum_offload(adapter_t *adapter, int enable) +{ + set_csum_offload(adapter, F_TCP_CSUM, enable); +} + +static void t1_tp_reset(adapter_t *adapter, unsigned int tp_clk) +{ + u32 val; + + val = F_TP_IN_CSPI_CPL | F_TP_IN_CSPI_CHECK_IP_CSUM | + F_TP_IN_CSPI_CHECK_TCP_CSUM | F_TP_IN_ESPI_ETHERNET; + val |= F_TP_IN_ESPI_CHECK_IP_CSUM | + F_TP_IN_ESPI_CHECK_TCP_CSUM; + writel(val, adapter->regs + A_TP_IN_CONFIG); + writel(F_TP_OUT_CSPI_CPL | + F_TP_OUT_ESPI_ETHERNET | + F_TP_OUT_ESPI_GENERATE_IP_CSUM | + F_TP_OUT_ESPI_GENERATE_TCP_CSUM, + adapter->regs + A_TP_OUT_CONFIG); + + val = readl(adapter->regs + A_TP_GLOBAL_CONFIG); + val &= ~(F_IP_CSUM | F_UDP_CSUM | F_TCP_CSUM); + writel(val, adapter->regs + A_TP_GLOBAL_CONFIG); + + /* + * Enable pause frame deadlock prevention. + */ + if (is_T2(adapter)) { + u32 drop_ticks = DROP_MSEC * (tp_clk / 1000); + + writel(F_ENABLE_TX_DROP | F_ENABLE_TX_ERROR | + V_DROP_TICKS_CNT(drop_ticks) | + V_NUM_PKTS_DROPPED(DROP_PKTS_CNT), + adapter->regs + A_TP_TX_DROP_CONFIG); } + + writel(F_TP_RESET, adapter->regs + A_TP_RESET); } int __devinit t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, struct adapter_params *p) { p->chip_version = bi->chip_term; - p->is_asic = (p->chip_version != CHBT_TERM_FPGA); if (p->chip_version == CHBT_TERM_T1 || - p->chip_version == CHBT_TERM_T2 || - p->chip_version == CHBT_TERM_FPGA) { - u32 val = t1_read_reg_4(adapter, A_TP_PC_CONFIG); + p->chip_version == CHBT_TERM_T2) { + u32 val = readl(adapter->regs + A_TP_PC_CONFIG); val = G_TP_PC_REV(val); if (val == 2) @@ -633,23 +640,11 @@ int __devinit t1_get_board_rev(adapter_t *adapter, const struct board_info *bi, static int board_init(adapter_t *adapter, const struct board_info *bi) { switch (bi->board) { - case CHBT_BOARD_8000: case CHBT_BOARD_N110: case CHBT_BOARD_N210: - case CHBT_BOARD_CHT210: - case CHBT_BOARD_COUGAR: - t1_tpi_par(adapter, 0xf); + writel(V_TPIPAR(0xf), adapter->regs + A_TPI_PAR); t1_tpi_write(adapter, A_ELMER0_GPO, 0x800); break; - case CHBT_BOARD_CHT110: - t1_tpi_par(adapter, 0xf); - t1_tpi_write(adapter, A_ELMER0_GPO, 0x1800); - - /* TBD XXX Might not need. This fixes a problem - * described in the Intel SR XPAK errata. - */ - power_sequence_xpak(adapter); - break; } return 0; } @@ -663,20 +658,19 @@ int t1_init_hw_modules(adapter_t *adapter) int err = -EIO; const struct board_info *bi = board_info(adapter); - if (!adapter->mc4) { - u32 val = t1_read_reg_4(adapter, A_MC4_CFG); + if (!bi->clock_mc4) { + u32 val = readl(adapter->regs + A_MC4_CFG); - t1_write_reg_4(adapter, A_MC4_CFG, val | F_READY | F_MC4_SLOW); - t1_write_reg_4(adapter, A_MC5_CONFIG, - F_M_BUS_ENABLE | F_TCAM_RESET); + writel(val | F_READY | F_MC4_SLOW, adapter->regs + A_MC4_CFG); + writel(F_M_BUS_ENABLE | F_TCAM_RESET, + adapter->regs + A_MC5_CONFIG); } if (adapter->espi && t1_espi_init(adapter->espi, bi->chip_mac, bi->espi_nports)) goto out_err; - if (t1_tp_reset(adapter->tp, &adapter->params.tp, bi->clock_core)) - goto out_err; + t1_tp_reset(adapter, bi->clock_core); err = t1_sge_configure(adapter->sge, &adapter->params.sge); if (err) @@ -690,7 +684,7 @@ int t1_init_hw_modules(adapter_t *adapter) /* * Determine a card's PCI mode. */ -static void __devinit get_pci_mode(adapter_t *adapter, struct pci_params *p) +static void __devinit get_pci_mode(adapter_t *adapter, struct chelsio_pci_params *p) { static unsigned short speed_map[] = { 33, 66, 100, 133 }; u32 pci_mode; @@ -720,8 +714,6 @@ void t1_free_sw_modules(adapter_t *adapter) if (adapter->sge) t1_sge_destroy(adapter->sge); - if (adapter->tp) - t1_tp_destroy(adapter->tp); if (adapter->espi) t1_espi_destroy(adapter->espi); } @@ -764,21 +756,12 @@ int __devinit t1_init_sw_modules(adapter_t *adapter, goto error; } - - if (bi->espi_nports && !(adapter->espi = t1_espi_create(adapter))) { CH_ERR("%s: ESPI initialization failed\n", adapter->name); goto error; } - adapter->tp = t1_tp_create(adapter, &adapter->params.tp); - if (!adapter->tp) { - CH_ERR("%s: TP initialization failed\n", - adapter->name); - goto error; - } - board_init(adapter, bi); bi->mdio_ops->init(adapter, bi); if (bi->gphy->reset) @@ -810,14 +793,12 @@ int __devinit t1_init_sw_modules(adapter_t *adapter, * Get the port's MAC addresses either from the EEPROM if one * exists or the one hardcoded in the MAC. */ - if (!t1_is_asic(adapter) || bi->chip_mac == CHBT_MAC_DUMMY) - mac->ops->macaddress_get(mac, hw_addr); - else if (vpd_macaddress_get(adapter, i, hw_addr)) { + if (vpd_macaddress_get(adapter, i, hw_addr)) { CH_ERR("%s: could not read MAC address from VPD ROM\n", - port_name(adapter, i)); + adapter->port[i].dev->name); goto error; } - t1_set_hw_addr(adapter, i, hw_addr); + memcpy(adapter->port[i].dev->dev_addr, hw_addr, ETH_ALEN); init_link_config(&adapter->port[i].link_config, bi); } diff --git a/drivers/net/chelsio/suni1x10gexp_regs.h b/drivers/net/chelsio/suni1x10gexp_regs.h index 98352bdda89b..81816c2b708a 100644 --- a/drivers/net/chelsio/suni1x10gexp_regs.h +++ b/drivers/net/chelsio/suni1x10gexp_regs.h @@ -1,8 +1,8 @@ /***************************************************************************** * * * File: suni1x10gexp_regs.h * - * $Revision: 1.4 $ * - * $Date: 2005/03/23 07:15:59 $ * + * $Revision: 1.9 $ * + * $Date: 2005/06/22 00:17:04 $ * * Description: * * PMC/SIERRA (pm3393) MAC-PHY functionality. * * part of the Chelsio 10Gb Ethernet Driver. * @@ -21,24 +21,16 @@ * * * http://www.chelsio.com * * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * * Maintainers: maintainers@chelsio.com * * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * + * Authors: PMC/SIERRA * * * * History: * * * ****************************************************************************/ -#ifndef _SUNI1x10GEXP_REGS_H -#define _SUNI1x10GEXP_REGS_H +#ifndef _CXGB_SUNI1x10GEXP_REGS_H_ +#define _CXGB_SUNI1x10GEXP_REGS_H_ /******************************************************************************/ /** S/UNI-1x10GE-XP REGISTER ADDRESS MAP **/ @@ -217,5 +209,5 @@ #define SUNI1x10GEXP_BITMSK_TXXG_FCRX 0x0004 #define SUNI1x10GEXP_BITMSK_TXXG_PADEN 0x0002 -#endif /* _SUNI1x10GEXP_REGS_H */ +#endif /* _CXGB_SUNI1x10GEXP_REGS_H_ */ diff --git a/drivers/net/chelsio/tp.c b/drivers/net/chelsio/tp.c deleted file mode 100644 index 9ad5c539fd28..000000000000 --- a/drivers/net/chelsio/tp.c +++ /dev/null @@ -1,188 +0,0 @@ -/***************************************************************************** - * * - * File: tp.c * - * $Revision: 1.6 $ * - * $Date: 2005/03/23 07:15:59 $ * - * Description: * - * Core ASIC Management. * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#include "common.h" -#include "regs.h" -#include "tp.h" - -struct petp { - adapter_t *adapter; -}; - -/* Pause deadlock avoidance parameters */ -#define DROP_MSEC 16 -#define DROP_PKTS_CNT 1 - - -static void tp_init(adapter_t *ap, const struct tp_params *p, - unsigned int tp_clk) -{ - if (t1_is_asic(ap)) { - u32 val; - - val = F_TP_IN_CSPI_CPL | F_TP_IN_CSPI_CHECK_IP_CSUM | - F_TP_IN_CSPI_CHECK_TCP_CSUM | F_TP_IN_ESPI_ETHERNET; - if (!p->pm_size) - val |= F_OFFLOAD_DISABLE; - else - val |= F_TP_IN_ESPI_CHECK_IP_CSUM | - F_TP_IN_ESPI_CHECK_TCP_CSUM; - t1_write_reg_4(ap, A_TP_IN_CONFIG, val); - t1_write_reg_4(ap, A_TP_OUT_CONFIG, F_TP_OUT_CSPI_CPL | - F_TP_OUT_ESPI_ETHERNET | - F_TP_OUT_ESPI_GENERATE_IP_CSUM | - F_TP_OUT_ESPI_GENERATE_TCP_CSUM); - t1_write_reg_4(ap, A_TP_GLOBAL_CONFIG, V_IP_TTL(64) | - F_PATH_MTU /* IP DF bit */ | - V_5TUPLE_LOOKUP(p->use_5tuple_mode) | - V_SYN_COOKIE_PARAMETER(29)); - - /* - * Enable pause frame deadlock prevention. - */ - if (is_T2(ap)) { - u32 drop_ticks = DROP_MSEC * (tp_clk / 1000); - - t1_write_reg_4(ap, A_TP_TX_DROP_CONFIG, - F_ENABLE_TX_DROP | F_ENABLE_TX_ERROR | - V_DROP_TICKS_CNT(drop_ticks) | - V_NUM_PKTS_DROPPED(DROP_PKTS_CNT)); - } - - } -} - -void t1_tp_destroy(struct petp *tp) -{ - kfree(tp); -} - -struct petp * __devinit t1_tp_create(adapter_t *adapter, struct tp_params *p) -{ - struct petp *tp = kmalloc(sizeof(*tp), GFP_KERNEL); - if (!tp) - return NULL; - memset(tp, 0, sizeof(*tp)); - tp->adapter = adapter; - - return tp; -} - -void t1_tp_intr_enable(struct petp *tp) -{ - u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); - - { - /* We don't use any TP interrupts */ - t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); - t1_write_reg_4(tp->adapter, A_PL_ENABLE, - tp_intr | F_PL_INTR_TP); - } -} - -void t1_tp_intr_disable(struct petp *tp) -{ - u32 tp_intr = t1_read_reg_4(tp->adapter, A_PL_ENABLE); - - { - t1_write_reg_4(tp->adapter, A_TP_INT_ENABLE, 0); - t1_write_reg_4(tp->adapter, A_PL_ENABLE, - tp_intr & ~F_PL_INTR_TP); - } -} - -void t1_tp_intr_clear(struct petp *tp) -{ - t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, 0xffffffff); - t1_write_reg_4(tp->adapter, A_PL_CAUSE, F_PL_INTR_TP); -} - -int t1_tp_intr_handler(struct petp *tp) -{ - u32 cause; - - - cause = t1_read_reg_4(tp->adapter, A_TP_INT_CAUSE); - t1_write_reg_4(tp->adapter, A_TP_INT_CAUSE, cause); - return 0; -} - -static void set_csum_offload(struct petp *tp, u32 csum_bit, int enable) -{ - u32 val = t1_read_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG); - - if (enable) - val |= csum_bit; - else - val &= ~csum_bit; - t1_write_reg_4(tp->adapter, A_TP_GLOBAL_CONFIG, val); -} - -void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_IP_CSUM, enable); -} - -void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_UDP_CSUM, enable); -} - -void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable) -{ - set_csum_offload(tp, F_TCP_CSUM, enable); -} - -/* - * Initialize TP state. tp_params contains initial settings for some TP - * parameters, particularly the one-time PM and CM settings. - */ -int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk) -{ - int busy = 0; - adapter_t *adapter = tp->adapter; - - tp_init(adapter, p, tp_clk); - if (!busy) - t1_write_reg_4(adapter, A_TP_RESET, F_TP_RESET); - else - CH_ERR("%s: TP initialization timed out\n", - adapter->name); - return busy; -} diff --git a/drivers/net/chelsio/tp.h b/drivers/net/chelsio/tp.h deleted file mode 100644 index 2ebc5c0d62e7..000000000000 --- a/drivers/net/chelsio/tp.h +++ /dev/null @@ -1,110 +0,0 @@ -/***************************************************************************** - * * - * File: tp.h * - * $Revision: 1.3 $ * - * $Date: 2005/03/23 07:15:59 $ * - * Description: * - * part of the Chelsio 10Gb Ethernet Driver. * - * * - * This program is free software; you can redistribute it and/or modify * - * it under the terms of the GNU General Public License, version 2, as * - * published by the Free Software Foundation. * - * * - * You should have received a copy of the GNU General Public License along * - * with this program; if not, write to the Free Software Foundation, Inc., * - * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - * * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * - * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. * - * * - * http://www.chelsio.com * - * * - * Copyright (c) 2003 - 2005 Chelsio Communications, Inc. * - * All rights reserved. * - * * - * Maintainers: maintainers@chelsio.com * - * * - * Authors: Dimitrios Michailidis * - * Tina Yang * - * Felix Marti * - * Scott Bardone * - * Kurt Ottaway * - * Frank DiMambro * - * * - * History: * - * * - ****************************************************************************/ - -#ifndef CHELSIO_TP_H -#define CHELSIO_TP_H - -#include "common.h" - -#define TP_MAX_RX_COALESCING_SIZE 16224U - -struct tp_mib_statistics { - - /* IP */ - u32 ipInReceive_hi; - u32 ipInReceive_lo; - u32 ipInHdrErrors_hi; - u32 ipInHdrErrors_lo; - u32 ipInAddrErrors_hi; - u32 ipInAddrErrors_lo; - u32 ipInUnknownProtos_hi; - u32 ipInUnknownProtos_lo; - u32 ipInDiscards_hi; - u32 ipInDiscards_lo; - u32 ipInDelivers_hi; - u32 ipInDelivers_lo; - u32 ipOutRequests_hi; - u32 ipOutRequests_lo; - u32 ipOutDiscards_hi; - u32 ipOutDiscards_lo; - u32 ipOutNoRoutes_hi; - u32 ipOutNoRoutes_lo; - u32 ipReasmTimeout; - u32 ipReasmReqds; - u32 ipReasmOKs; - u32 ipReasmFails; - - u32 reserved[8]; - - /* TCP */ - u32 tcpActiveOpens; - u32 tcpPassiveOpens; - u32 tcpAttemptFails; - u32 tcpEstabResets; - u32 tcpOutRsts; - u32 tcpCurrEstab; - u32 tcpInSegs_hi; - u32 tcpInSegs_lo; - u32 tcpOutSegs_hi; - u32 tcpOutSegs_lo; - u32 tcpRetransSeg_hi; - u32 tcpRetransSeg_lo; - u32 tcpInErrs_hi; - u32 tcpInErrs_lo; - u32 tcpRtoMin; - u32 tcpRtoMax; -}; - -struct petp; -struct tp_params; - -struct petp *t1_tp_create(adapter_t *adapter, struct tp_params *p); -void t1_tp_destroy(struct petp *tp); - -void t1_tp_intr_disable(struct petp *tp); -void t1_tp_intr_enable(struct petp *tp); -void t1_tp_intr_clear(struct petp *tp); -int t1_tp_intr_handler(struct petp *tp); - -void t1_tp_get_mib_statistics(adapter_t *adap, struct tp_mib_statistics *tps); -void t1_tp_set_udp_checksum_offload(struct petp *tp, int enable); -void t1_tp_set_tcp_checksum_offload(struct petp *tp, int enable); -void t1_tp_set_ip_checksum_offload(struct petp *tp, int enable); -int t1_tp_set_coalescing_size(struct petp *tp, unsigned int size); -int t1_tp_reset(struct petp *tp, struct tp_params *p, unsigned int tp_clk); -#endif diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 63e89e47b8e9..41e35a72b964 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2120,6 +2120,7 @@ #define PCI_DEVICE_ID_ENE_1225 0x1225 #define PCI_DEVICE_ID_ENE_1410 0x1410 #define PCI_DEVICE_ID_ENE_1420 0x1420 +#define PCI_VENDOR_ID_CHELSIO 0x1425 #define PCI_VENDOR_ID_SYBA 0x1592 #define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782 -- cgit v1.2.3 From 5bb289b5a0becb53ac3e1d60815ff8b779296b73 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 24 Jun 2005 14:14:05 +0100 Subject: AUDIT: Clean up user message filtering Don't look up the task by its pid and then use the syscall filtering helper. Just implement our own filter helper which operates solely on the information in the netlink_skb_parms. Signed-off-by: David Woodhouse --- include/linux/audit.h | 5 +++-- kernel/audit.c | 2 +- kernel/auditsc.c | 56 ++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 46 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2f56546eb248..38999f827a36 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -205,6 +205,7 @@ struct audit_sig_info { struct audit_buffer; struct audit_context; struct inode; +struct netlink_skb_parms; #define AUDITSC_INVALID 0 #define AUDITSC_SUCCESS 1 @@ -236,7 +237,7 @@ extern int audit_socketcall(int nargs, unsigned long *args); extern int audit_sockaddr(int len, void *addr); extern int audit_avc_path(struct dentry *dentry, struct vfsmount *mnt); extern void audit_signal_info(int sig, struct task_struct *t); -extern int audit_filter_user(int pid, int type); +extern int audit_filter_user(struct netlink_skb_parms *cb, int type); #else #define audit_alloc(t) ({ 0; }) #define audit_free(t) do { ; } while (0) @@ -253,7 +254,7 @@ extern int audit_filter_user(int pid, int type); #define audit_sockaddr(len, addr) ({ 0; }) #define audit_avc_path(dentry, mnt) ({ 0; }) #define audit_signal_info(s,t) do { ; } while (0) -#define audit_filter_user(p,t) ({ 1; }) +#define audit_filter_user(cb,t) ({ 1; }) #endif #ifdef CONFIG_AUDIT diff --git a/kernel/audit.c b/kernel/audit.c index 9af947a63ed1..6f1784dd80af 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -434,7 +434,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!audit_enabled && msg_type != AUDIT_USER_AVC) return 0; - err = audit_filter_user(pid, msg_type); + err = audit_filter_user(&NETLINK_CB(skb), msg_type); if (err == 1) { err = 0; ab = audit_log_start(NULL, GFP_KERNEL, msg_type); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 7b123f0a9481..34a990223c9e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -40,6 +40,7 @@ #include #include #include +#include #include /* 0 = no checking @@ -530,35 +531,62 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk, return AUDIT_BUILD_CONTEXT; } -int audit_filter_user(int pid, int type) +static int audit_filter_user_rules(struct netlink_skb_parms *cb, + struct audit_rule *rule, + enum audit_state *state) +{ + int i; + + for (i = 0; i < rule->field_count; i++) { + u32 field = rule->fields[i] & ~AUDIT_NEGATE; + u32 value = rule->values[i]; + int result = 0; + + switch (field) { + case AUDIT_PID: + result = (cb->creds.pid == value); + break; + case AUDIT_UID: + result = (cb->creds.uid == value); + break; + case AUDIT_GID: + result = (cb->creds.gid == value); + break; + case AUDIT_LOGINUID: + result = (cb->loginuid == value); + break; + } + + if (rule->fields[i] & AUDIT_NEGATE) + result = !result; + if (!result) + return 0; + } + switch (rule->action) { + case AUDIT_NEVER: *state = AUDIT_DISABLED; break; + case AUDIT_POSSIBLE: *state = AUDIT_BUILD_CONTEXT; break; + case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; + } + return 1; +} + +int audit_filter_user(struct netlink_skb_parms *cb, int type) { - struct task_struct *tsk; struct audit_entry *e; enum audit_state state; int ret = 1; - read_lock(&tasklist_lock); - tsk = find_task_by_pid(pid); - if (tsk) - get_task_struct(tsk); - read_unlock(&tasklist_lock); - - if (!tsk) - return -ESRCH; - rcu_read_lock(); list_for_each_entry_rcu(e, &audit_filter_list[AUDIT_FILTER_USER], list) { - if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { + if (audit_filter_user_rules(cb, &e->rule, &state)) { if (state == AUDIT_DISABLED) ret = 0; break; } } rcu_read_unlock(); - put_task_struct(tsk); return ret; /* Audit by default */ - } /* This should be called with task_lock() held. */ -- cgit v1.2.3 From 9bd481f85940726bf66aae5cd03c5b912ad0ae4c Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 28 Jun 2005 01:46:35 -0400 Subject: wireless: fix ipw warning; add is_broadcast_ether_addr() to linux/etherdevice.h --- drivers/net/wireless/ipw2200.c | 2 +- include/linux/etherdevice.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index 16cfd907e715..133666d43d7e 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -1170,7 +1170,7 @@ static int ipw_send_cmd(struct ipw_priv *priv, struct host_cmd *cmd) HOST_COMPLETE_TIMEOUT); if (rc == 0) { IPW_DEBUG_INFO("Command completion failed out after %dms.\n", - HOST_COMPLETE_TIMEOUT / (HZ / 1000)); + jiffies_to_msecs(HOST_COMPLETE_TIMEOUT)); priv->status &= ~STATUS_HCMD_ACTIVE; return -EIO; } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 8a2df4dfbc59..ae588aab21b6 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -68,6 +68,12 @@ static inline int is_multicast_ether_addr(const u8 *addr) return ((addr[0] != 0xff) && (0x01 & addr[0])); } +static inline int is_broadcast_ether_addr(const u8 *addr) +{ + return ((addr[0] == 0xff) && (addr[1] == 0xff) && (addr[2] == 0xff) && + (addr[3] == 0xff) && (addr[4] == 0xff) && (addr[5] == 0xff)); +} + /** * is_valid_ether_addr - Determine if the given Ethernet address is valid * @addr: Pointer to a six-byte array containing the Ethernet address -- cgit v1.2.3 From 13774024da8ebdf17212c0f5a83f5b0681a649eb Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 2 Jul 2005 13:49:07 +0100 Subject: AUDIT: Fix definition of audit_log_start() if audit not enabled audit_log_start() seems to take 3 arguments, but its defined to take only 2 when AUDIT is turned off. security/selinux/avc.c:553:75: macro "audit_log_start" passed 3 arguments, but takes just 2 Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 38999f827a36..c22405fc2ddf 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -286,7 +286,7 @@ extern void audit_log_lost(const char *message); extern struct semaphore audit_netlink_sem; #else #define audit_log(c,t,f,...) do { ; } while (0) -#define audit_log_start(c,t) ({ NULL; }) +#define audit_log_start(c,g,t) ({ NULL; }) #define audit_log_vformat(b,f,a) do { ; } while (0) #define audit_log_format(b,f,...) do { ; } while (0) #define audit_log_end(b) do { ; } while (0) -- cgit v1.2.3 From 7b430437c0de81681ecfa8efa8f55823df733529 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 2 Jul 2005 13:50:40 +0100 Subject: AUDIT: Fix definition of audit_log() if audit not enabled audit_log() also takes an extra argument, although it's a vararg function so the compiler didn't really notice. Signed-off-by: David Woodhouse --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index c22405fc2ddf..d68e85580a53 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -285,7 +285,7 @@ extern void audit_send_reply(int pid, int seq, int type, extern void audit_log_lost(const char *message); extern struct semaphore audit_netlink_sem; #else -#define audit_log(c,t,f,...) do { ; } while (0) +#define audit_log(c,g,t,f,...) do { ; } while (0) #define audit_log_start(c,g,t) ({ NULL; }) #define audit_log_vformat(b,f,a) do { ; } while (0) #define audit_log_format(b,f,...) do { ; } while (0) -- cgit v1.2.3 From b67dbf9d4c1987c370fd18fdc4cf9d8aaea604c2 Mon Sep 17 00:00:00 2001 From: Greg KH Date: Thu, 7 Jul 2005 14:37:53 -0700 Subject: [PATCH] add securityfs for all LSMs to use Here's a small patch against 2.6.13-rc2 that adds securityfs, a virtual fs that all LSMs can use instead of creating their own. The fs should be mounted at /sys/kernel/security, and the fs creates that mount point. This will make the LSB people happy that we aren't creating a new /my_lsm_fs directory in the root for every different LSM. It has changed a bit since the last version, thanks to comments from Mike Waychison. Signed-off-by: Greg Kroah-Hartman Signed-off-by: Chris Wright --- include/linux/security.h | 5 + security/Makefile | 2 +- security/inode.c | 347 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 353 insertions(+), 1 deletion(-) create mode 100644 security/inode.c (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1c..cd3d8a9f951e 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1983,6 +1983,11 @@ extern int register_security (struct security_operations *ops); extern int unregister_security (struct security_operations *ops); extern int mod_reg_security (const char *name, struct security_operations *ops); extern int mod_unreg_security (const char *name, struct security_operations *ops); +extern struct dentry *securityfs_create_file(const char *name, mode_t mode, + struct dentry *parent, void *data, + struct file_operations *fops); +extern struct dentry *securityfs_create_dir(const char *name, struct dentry *parent); +extern void securityfs_remove(struct dentry *dentry); #else /* CONFIG_SECURITY */ diff --git a/security/Makefile b/security/Makefile index 197cc2f3f1ec..8cbbf2f36709 100644 --- a/security/Makefile +++ b/security/Makefile @@ -11,7 +11,7 @@ obj-y += commoncap.o endif # Object file lists -obj-$(CONFIG_SECURITY) += security.o dummy.o +obj-$(CONFIG_SECURITY) += security.o dummy.o inode.o # Must precede capability.o in order to stack properly. obj-$(CONFIG_SECURITY_SELINUX) += selinux/built-in.o obj-$(CONFIG_SECURITY_CAPABILITIES) += commoncap.o capability.o diff --git a/security/inode.c b/security/inode.c new file mode 100644 index 000000000000..a5964502ae30 --- /dev/null +++ b/security/inode.c @@ -0,0 +1,347 @@ +/* + * inode.c - securityfs + * + * Copyright (C) 2005 Greg Kroah-Hartman + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * Based on fs/debugfs/inode.c which had the following copyright notice: + * Copyright (C) 2004 Greg Kroah-Hartman + * Copyright (C) 2004 IBM Inc. + */ + +/* #define DEBUG */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define SECURITYFS_MAGIC 0x73636673 + +static struct vfsmount *mount; +static int mount_count; + +/* + * TODO: + * I think I can get rid of these default_file_ops, but not quite sure... + */ +static ssize_t default_read_file(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + return 0; +} + +static ssize_t default_write_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + return count; +} + +static int default_open(struct inode *inode, struct file *file) +{ + if (inode->u.generic_ip) + file->private_data = inode->u.generic_ip; + + return 0; +} + +static struct file_operations default_file_ops = { + .read = default_read_file, + .write = default_write_file, + .open = default_open, +}; + +static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) +{ + struct inode *inode = new_inode(sb); + + if (inode) { + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + default: + init_special_inode(inode, mode, dev); + break; + case S_IFREG: + inode->i_fop = &default_file_ops; + break; + case S_IFDIR: + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + } + } + return inode; +} + +/* SMP-safe */ +static int mknod(struct inode *dir, struct dentry *dentry, + int mode, dev_t dev) +{ + struct inode *inode; + int error = -EPERM; + + if (dentry->d_inode) + return -EEXIST; + + inode = get_inode(dir->i_sb, mode, dev); + if (inode) { + d_instantiate(dentry, inode); + dget(dentry); + error = 0; + } + return error; +} + +static int mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + int res; + + mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; + res = mknod(dir, dentry, mode, 0); + if (!res) + dir->i_nlink++; + return res; +} + +static int create(struct inode *dir, struct dentry *dentry, int mode) +{ + mode = (mode & S_IALLUGO) | S_IFREG; + return mknod(dir, dentry, mode, 0); +} + +static inline int positive(struct dentry *dentry) +{ + return dentry->d_inode && !d_unhashed(dentry); +} + +static int fill_super(struct super_block *sb, void *data, int silent) +{ + static struct tree_descr files[] = {{""}}; + + return simple_fill_super(sb, SECURITYFS_MAGIC, files); +} + +static struct super_block *get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_single(fs_type, flags, data, fill_super); +} + +static struct file_system_type fs_type = { + .owner = THIS_MODULE, + .name = "securityfs", + .get_sb = get_sb, + .kill_sb = kill_litter_super, +}; + +static int create_by_name(const char *name, mode_t mode, + struct dentry *parent, + struct dentry **dentry) +{ + int error = 0; + + *dentry = NULL; + + /* If the parent is not specified, we create it in the root. + * We need the root dentry to do this, which is in the super + * block. A pointer to that is in the struct vfsmount that we + * have around. + */ + if (!parent ) { + if (mount && mount->mnt_sb) { + parent = mount->mnt_sb->s_root; + } + } + if (!parent) { + pr_debug("securityfs: Ah! can not find a parent!\n"); + return -EFAULT; + } + + down(&parent->d_inode->i_sem); + *dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry)) { + if ((mode & S_IFMT) == S_IFDIR) + error = mkdir(parent->d_inode, *dentry, mode); + else + error = create(parent->d_inode, *dentry, mode); + } else + error = PTR_ERR(dentry); + up(&parent->d_inode->i_sem); + + return error; +} + +/** + * securityfs_create_file - create a file in the securityfs filesystem + * + * @name: a pointer to a string containing the name of the file to create. + * @mode: the permission that the file should have + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this paramater is NULL, then the + * file will be created in the root of the securityfs filesystem. + * @data: a pointer to something that the caller will want to get to later + * on. The inode.u.generic_ip pointer will point to this value on + * the open() call. + * @fops: a pointer to a struct file_operations that should be used for + * this file. + * + * This is the basic "create a file" function for securityfs. It allows for a + * wide range of flexibility in createing a file, or a directory (if you + * want to create a directory, the securityfs_create_dir() function is + * recommended to be used instead.) + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the securityfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, NULL will be returned. + * + * If securityfs is not enabled in the kernel, the value -ENODEV will be + * returned. It is not wise to check for this value, but rather, check for + * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * code. + */ +struct dentry *securityfs_create_file(const char *name, mode_t mode, + struct dentry *parent, void *data, + struct file_operations *fops) +{ + struct dentry *dentry = NULL; + int error; + + pr_debug("securityfs: creating file '%s'\n",name); + + error = simple_pin_fs("securityfs", &mount, &mount_count); + if (error) { + dentry = ERR_PTR(error); + goto exit; + } + + error = create_by_name(name, mode, parent, &dentry); + if (error) { + dentry = ERR_PTR(error); + simple_release_fs(&mount, &mount_count); + goto exit; + } + + if (dentry->d_inode) { + if (fops) + dentry->d_inode->i_fop = fops; + if (data) + dentry->d_inode->u.generic_ip = data; + } +exit: + return dentry; +} +EXPORT_SYMBOL_GPL(securityfs_create_file); + +/** + * securityfs_create_dir - create a directory in the securityfs filesystem + * + * @name: a pointer to a string containing the name of the directory to + * create. + * @parent: a pointer to the parent dentry for this file. This should be a + * directory dentry if set. If this paramater is NULL, then the + * directory will be created in the root of the securityfs filesystem. + * + * This function creates a directory in securityfs with the given name. + * + * This function will return a pointer to a dentry if it succeeds. This + * pointer must be passed to the securityfs_remove() function when the file is + * to be removed (no automatic cleanup happens if your module is unloaded, + * you are responsible here.) If an error occurs, NULL will be returned. + * + * If securityfs is not enabled in the kernel, the value -ENODEV will be + * returned. It is not wise to check for this value, but rather, check for + * NULL or !NULL instead as to eliminate the need for #ifdef in the calling + * code. + */ +struct dentry *securityfs_create_dir(const char *name, struct dentry *parent) +{ + return securityfs_create_file(name, + S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + parent, NULL, NULL); +} +EXPORT_SYMBOL_GPL(securityfs_create_dir); + +/** + * securityfs_remove - removes a file or directory from the securityfs filesystem + * + * @dentry: a pointer to a the dentry of the file or directory to be + * removed. + * + * This function removes a file or directory in securityfs that was previously + * created with a call to another securityfs function (like + * securityfs_create_file() or variants thereof.) + * + * This function is required to be called in order for the file to be + * removed, no automatic cleanup of files will happen when a module is + * removed, you are responsible here. + */ +void securityfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + + if (!dentry) + return; + + parent = dentry->d_parent; + if (!parent || !parent->d_inode) + return; + + down(&parent->d_inode->i_sem); + if (positive(dentry)) { + if (dentry->d_inode) { + if (S_ISDIR(dentry->d_inode->i_mode)) + simple_rmdir(parent->d_inode, dentry); + else + simple_unlink(parent->d_inode, dentry); + dput(dentry); + } + } + up(&parent->d_inode->i_sem); + simple_release_fs(&mount, &mount_count); +} +EXPORT_SYMBOL_GPL(securityfs_remove); + +static decl_subsys(security, NULL, NULL); + +static int __init securityfs_init(void) +{ + int retval; + + kset_set_kset_s(&security_subsys, kernel_subsys); + retval = subsystem_register(&security_subsys); + if (retval) + return retval; + + retval = register_filesystem(&fs_type); + if (retval) + subsystem_unregister(&security_subsys); + return retval; +} + +static void __exit securityfs_exit(void) +{ + simple_release_fs(&mount, &mount_count); + unregister_filesystem(&fs_type); + subsystem_unregister(&security_subsys); +} + +core_initcall(securityfs_init); +module_exit(securityfs_exit); +MODULE_LICENSE("GPL"); + -- cgit v1.2.3 From 241fc4367b3ca5d407b043599ed980304a70b91f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:35:54 +0100 Subject: [SERIAL] Expose 8250_pci setup/removal/suspend/resume functions Re-jig the setup/removal/suspend/resume of 8250 pci ports so that they know slightly less about how they're attached to a PCI device. Expose this as the new interface for registering PCI serial ports, as well as the pciserial_board structure and associated flag definitions. Signed-off-by: Russell King --- drivers/serial/8250_pci.c | 233 ++++++++++++++++++++++++---------------------- include/linux/8250_pci.h | 38 ++++++++ 2 files changed, 159 insertions(+), 112 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 7ca07651c10c..4e9084edfc7e 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -33,38 +33,6 @@ #undef SERIAL_DEBUG_PCI -/* - * Definitions for PCI support. - */ -#define FL_BASE_MASK 0x0007 -#define FL_BASE0 0x0000 -#define FL_BASE1 0x0001 -#define FL_BASE2 0x0002 -#define FL_BASE3 0x0003 -#define FL_BASE4 0x0004 -#define FL_GET_BASE(x) (x & FL_BASE_MASK) - -/* Use successive BARs (PCI base address registers), - else use offset into some specified BAR */ -#define FL_BASE_BARS 0x0008 - -/* do not assign an irq */ -#define FL_NOIRQ 0x0080 - -/* Use the Base address register size to cap number of ports */ -#define FL_REGION_SZ_CAP 0x0100 - -struct pciserial_board { - unsigned int flags; - unsigned int num_ports; - unsigned int base_baud; - unsigned int uart_offset; - unsigned int reg_shift; - unsigned int first_offset; -}; - -struct serial_private; - /* * init function returns: * > 0 - number of ports @@ -1528,60 +1496,14 @@ serial_pci_matches(struct pciserial_board *board, board->first_offset == guessed->first_offset; } -/* - * Probe one serial board. Unfortunately, there is no rhyme nor reason - * to the arrangement of serial ports on a PCI card. - */ -static int __devinit -pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) +struct serial_private * +pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board) { struct uart_port serial_port; struct serial_private *priv; - struct pciserial_board *board, tmp; struct pci_serial_quirk *quirk; int rc, nr_ports, i; - if (ent->driver_data >= ARRAY_SIZE(pci_boards)) { - printk(KERN_ERR "pci_init_one: invalid driver_data: %ld\n", - ent->driver_data); - return -EINVAL; - } - - board = &pci_boards[ent->driver_data]; - - rc = pci_enable_device(dev); - if (rc) - return rc; - - if (ent->driver_data == pbn_default) { - /* - * Use a copy of the pci_board entry for this; - * avoid changing entries in the table. - */ - memcpy(&tmp, board, sizeof(struct pciserial_board)); - board = &tmp; - - /* - * We matched one of our class entries. Try to - * determine the parameters of this board. - */ - rc = serial_pci_guess_board(dev, board); - if (rc) - goto disable; - } else { - /* - * We matched an explicit entry. If we are able to - * detect this boards settings with our heuristic, - * then we no longer need this entry. - */ - memcpy(&tmp, &pci_boards[pbn_default], - sizeof(struct pciserial_board)); - rc = serial_pci_guess_board(dev, &tmp); - if (rc == 0 && serial_pci_matches(board, &tmp)) - moan_device("Redundant entry in serial pci_table.", - dev); - } - nr_ports = board->num_ports; /* @@ -1598,8 +1520,10 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) */ if (quirk->init) { rc = quirk->init(dev); - if (rc < 0) - goto disable; + if (rc < 0) { + priv = ERR_PTR(rc); + goto err_out; + } if (rc) nr_ports = rc; } @@ -1608,8 +1532,8 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) sizeof(unsigned int) * nr_ports, GFP_KERNEL); if (!priv) { - rc = -ENOMEM; - goto deinit; + priv = ERR_PTR(-ENOMEM); + goto err_deinit; } memset(priv, 0, sizeof(struct serial_private) + @@ -1617,7 +1541,6 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) priv->dev = dev; priv->quirk = quirk; - pci_set_drvdata(dev, priv); memset(&serial_port, 0, sizeof(struct uart_port)); serial_port.flags = UPF_SKIP_TEST | UPF_BOOT_AUTOCONF | UPF_SHARE_IRQ; @@ -1643,24 +1566,21 @@ pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) priv->nr = i; - return 0; + return priv; - deinit: + err_deinit: if (quirk->exit) quirk->exit(dev); - disable: - pci_disable_device(dev); - return rc; + err_out: + return priv; } +EXPORT_SYMBOL_GPL(pciserial_init_ports); -static void __devexit pciserial_remove_one(struct pci_dev *dev) +void pciserial_remove_ports(struct serial_private *priv) { - struct serial_private *priv = pci_get_drvdata(dev); struct pci_serial_quirk *quirk; int i; - pci_set_drvdata(dev, NULL); - for (i = 0; i < priv->nr; i++) serial8250_unregister_port(priv->line[i]); @@ -1673,25 +1593,123 @@ static void __devexit pciserial_remove_one(struct pci_dev *dev) /* * Find the exit quirks. */ - quirk = find_quirk(dev); + quirk = find_quirk(priv->dev); if (quirk->exit) - quirk->exit(dev); + quirk->exit(priv->dev); + + kfree(priv); +} +EXPORT_SYMBOL_GPL(pciserial_remove_ports); + +void pciserial_suspend_ports(struct serial_private *priv) +{ + int i; + + for (i = 0; i < priv->nr; i++) + if (priv->line[i] >= 0) + serial8250_suspend_port(priv->line[i]); +} +EXPORT_SYMBOL_GPL(pciserial_suspend_ports); + +void pciserial_resume_ports(struct serial_private *priv) +{ + int i; + + /* + * Ensure that the board is correctly configured. + */ + if (priv->quirk->init) + priv->quirk->init(priv->dev); + + for (i = 0; i < priv->nr; i++) + if (priv->line[i] >= 0) + serial8250_resume_port(priv->line[i]); +} +EXPORT_SYMBOL_GPL(pciserial_resume_ports); + +/* + * Probe one serial board. Unfortunately, there is no rhyme nor reason + * to the arrangement of serial ports on a PCI card. + */ +static int __devinit +pciserial_init_one(struct pci_dev *dev, const struct pci_device_id *ent) +{ + struct serial_private *priv; + struct pciserial_board *board, tmp; + int rc; + + if (ent->driver_data >= ARRAY_SIZE(pci_boards)) { + printk(KERN_ERR "pci_init_one: invalid driver_data: %ld\n", + ent->driver_data); + return -EINVAL; + } + + board = &pci_boards[ent->driver_data]; + + rc = pci_enable_device(dev); + if (rc) + return rc; + + if (ent->driver_data == pbn_default) { + /* + * Use a copy of the pci_board entry for this; + * avoid changing entries in the table. + */ + memcpy(&tmp, board, sizeof(struct pciserial_board)); + board = &tmp; + + /* + * We matched one of our class entries. Try to + * determine the parameters of this board. + */ + rc = serial_pci_guess_board(dev, board); + if (rc) + goto disable; + } else { + /* + * We matched an explicit entry. If we are able to + * detect this boards settings with our heuristic, + * then we no longer need this entry. + */ + memcpy(&tmp, &pci_boards[pbn_default], + sizeof(struct pciserial_board)); + rc = serial_pci_guess_board(dev, &tmp); + if (rc == 0 && serial_pci_matches(board, &tmp)) + moan_device("Redundant entry in serial pci_table.", + dev); + } + + priv = pciserial_init_ports(dev, board); + if (!IS_ERR(priv)) { + pci_set_drvdata(dev, priv); + return 0; + } + + rc = PTR_ERR(priv); + disable: pci_disable_device(dev); + return rc; +} - kfree(priv); +static void __devexit pciserial_remove_one(struct pci_dev *dev) +{ + struct serial_private *priv = pci_get_drvdata(dev); + + pci_set_drvdata(dev, NULL); + + pciserial_remove_ports(priv); + + pci_disable_device(dev); } static int pciserial_suspend_one(struct pci_dev *dev, pm_message_t state) { struct serial_private *priv = pci_get_drvdata(dev); - if (priv) { - int i; + if (priv) + pciserial_suspend_ports(priv); - for (i = 0; i < priv->nr; i++) - serial8250_suspend_port(priv->line[i]); - } pci_save_state(dev); pci_set_power_state(dev, pci_choose_state(dev, state)); return 0; @@ -1705,21 +1723,12 @@ static int pciserial_resume_one(struct pci_dev *dev) pci_restore_state(dev); if (priv) { - int i; - /* * The device may have been disabled. Re-enable it. */ pci_enable_device(dev); - /* - * Ensure that the board is correctly configured. - */ - if (priv->quirk->init) - priv->quirk->init(dev); - - for (i = 0; i < priv->nr; i++) - serial8250_resume_port(priv->line[i]); + pciserial_resume_ports(priv); } return 0; } diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 5f3ab21b339b..192c0ff7a774 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -1,2 +1,40 @@ +/* + * Definitions for PCI support. + */ +#define FL_BASE_MASK 0x0007 +#define FL_BASE0 0x0000 +#define FL_BASE1 0x0001 +#define FL_BASE2 0x0002 +#define FL_BASE3 0x0003 +#define FL_BASE4 0x0004 +#define FL_GET_BASE(x) (x & FL_BASE_MASK) + +/* Use successive BARs (PCI base address registers), + else use offset into some specified BAR */ +#define FL_BASE_BARS 0x0008 + +/* do not assign an irq */ +#define FL_NOIRQ 0x0080 + +/* Use the Base address register size to cap number of ports */ +#define FL_REGION_SZ_CAP 0x0100 + +struct pciserial_board { + unsigned int flags; + unsigned int num_ports; + unsigned int base_baud; + unsigned int uart_offset; + unsigned int reg_shift; + unsigned int first_offset; +}; + +struct serial_private; + +struct serial_private * +pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); +void pciserial_remove_ports(struct serial_private *priv); +void pciserial_suspend_ports(struct serial_private *priv); +void pciserial_resume_ports(struct serial_private *priv); + int pci_siig10x_fn(struct pci_dev *dev, int enable); int pci_siig20x_fn(struct pci_dev *dev, int enable); -- cgit v1.2.3 From 05caac585f8abd6c0113856bc8858e3ef214d8a6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:41:18 +0100 Subject: [SERIAL] Convert parport_serial to use new 8250_pci interfaces Convert parport_serial to use the new 8250_pci interface, converting the table to a pciserial_board table. This also unuses the SPCI_* definitions in serialP.h, which can now be removed. Signed-off-by: Russell King --- drivers/parport/parport_serial.c | 339 +++++++++++++++++++-------------------- drivers/serial/8250_pci.c | 21 +-- include/linux/8250_pci.h | 3 - include/linux/serialP.h | 40 ----- 4 files changed, 167 insertions(+), 236 deletions(-) (limited to 'include/linux') diff --git a/drivers/parport/parport_serial.c b/drivers/parport/parport_serial.c index 00498e2f1205..d3dad0aac7cb 100644 --- a/drivers/parport/parport_serial.c +++ b/drivers/parport/parport_serial.c @@ -23,13 +23,8 @@ #include #include #include -#include -#include -#include #include -#include - enum parport_pc_pci_cards { titan_110l = 0, titan_210l, @@ -168,182 +163,147 @@ static struct pci_device_id parport_serial_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci,parport_serial_pci_tbl); -struct pci_board_no_ids { - int flags; - int num_ports; - int base_baud; - int uart_offset; - int reg_shift; - int (*init_fn)(struct pci_dev *dev, struct pci_board_no_ids *board, - int enable); - int first_uart_offset; -}; - -static int __devinit siig10x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - return pci_siig10x_fn(dev, enable); -} - -static int __devinit siig20x_init_fn(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - return pci_siig20x_fn(dev, enable); -} - -static int __devinit netmos_serial_init(struct pci_dev *dev, struct pci_board_no_ids *board, int enable) -{ - board->num_ports = dev->subsystem_device & 0xf; - return 0; -} - -static struct pci_board_no_ids pci_boards[] __devinitdata = { - /* - * PCI Flags, Number of Ports, Base (Maximum) Baud Rate, - * Offset to get to next UART's registers, - * Register shift to use for memory-mapped I/O, - * Initialization function, first UART offset - */ - -// Cards not tested are marked n/t -// If you have one of these cards and it works for you, please tell me.. - -/* titan_110l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 1, 921600 }, -/* titan_210l */ { SPCI_FL_BASE1 | SPCI_FL_BASE_TABLE, 2, 921600 }, -/* netmos_9xx5_combo */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, -/* netmos_9855 */ { SPCI_FL_BASE2 | SPCI_FL_BASE_TABLE, 1, 115200, 0, 0, netmos_serial_init }, -/* avlab_1s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_1s2p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 1, 115200 }, -/* avlab_2s1p (n/t) */ { SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* avlab_2s1p_650 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* avlab_2s1p_850 (nt)*/{ SPCI_FL_BASE0 | SPCI_FL_BASE_TABLE, 2, 115200 }, -/* siig_1s1p_10x */ { SPCI_FL_BASE2, 1, 460800, 0, 0, siig10x_init_fn }, -/* siig_2s1p_10x */ { SPCI_FL_BASE2, 1, 921600, 0, 0, siig10x_init_fn }, -/* siig_2p1s_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, -/* siig_1s1p_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, -/* siig_2s1p_20x */ { SPCI_FL_BASE0, 1, 921600, 0, 0, siig20x_init_fn }, +/* + * This table describes the serial "geometry" of these boards. Any + * quirks for these can be found in drivers/serial/8250_pci.c + * + * Cards not tested are marked n/t + * If you have one of these cards and it works for you, please tell me.. + */ +static struct pciserial_board pci_parport_serial_boards[] __devinitdata = { + [titan_110l] = { + .flags = FL_BASE1 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [titan_210l] = { + .flags = FL_BASE1 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 921600, + .uart_offset = 8, + }, + [netmos_9xx5_combo] = { + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [netmos_9855] = { + .flags = FL_BASE2 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s1p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_1s2p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 1, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p] = { /* n/t */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p_650] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [avlab_2s1p_850] = { /* nt */ + .flags = FL_BASE0 | FL_BASE_BARS, + .num_ports = 2, + .base_baud = 115200, + .uart_offset = 8, + }, + [siig_1s1p_10x] = { + .flags = FL_BASE2, + .num_ports = 1, + .base_baud = 460800, + .uart_offset = 8, + }, + [siig_2s1p_10x] = { + .flags = FL_BASE2, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_2p1s_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_1s1p_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, + [siig_2s1p_20x] = { + .flags = FL_BASE0, + .num_ports = 1, + .base_baud = 921600, + .uart_offset = 8, + }, }; struct parport_serial_private { - int num_ser; - int line[20]; - struct pci_board_no_ids ser; + struct serial_private *serial; int num_par; struct parport *port[PARPORT_MAX]; struct parport_pc_pci par; }; -static int __devinit get_pci_port (struct pci_dev *dev, - struct pci_board_no_ids *board, - struct serial_struct *req, - int idx) -{ - unsigned long port; - int base_idx; - int max_port; - int offset; - - base_idx = SPCI_FL_GET_BASE(board->flags); - if (board->flags & SPCI_FL_BASE_TABLE) - base_idx += idx; - - if (board->flags & SPCI_FL_REGION_SZ_CAP) { - max_port = pci_resource_len(dev, base_idx) / 8; - if (idx >= max_port) - return 1; - } - - offset = board->first_uart_offset; - - /* Timedia/SUNIX uses a mixture of BARs and offsets */ - /* Ugh, this is ugly as all hell --- TYT */ - if(dev->vendor == PCI_VENDOR_ID_TIMEDIA ) /* 0x1409 */ - switch(idx) { - case 0: base_idx=0; - break; - case 1: base_idx=0; offset=8; - break; - case 2: base_idx=1; - break; - case 3: base_idx=1; offset=8; - break; - case 4: /* BAR 2*/ - case 5: /* BAR 3 */ - case 6: /* BAR 4*/ - case 7: base_idx=idx-2; /* BAR 5*/ - } - - port = pci_resource_start(dev, base_idx) + offset; - - if ((board->flags & SPCI_FL_BASE_TABLE) == 0) - port += idx * (board->uart_offset ? board->uart_offset : 8); - - if (pci_resource_flags (dev, base_idx) & IORESOURCE_IO) { - int high_bits_offset = ((sizeof(long)-sizeof(int))*8); - req->port = port; - if (high_bits_offset) - req->port_high = port >> high_bits_offset; - else - req->port_high = 0; - return 0; - } - req->io_type = SERIAL_IO_MEM; - req->iomem_base = ioremap(port, board->uart_offset); - req->iomem_reg_shift = board->reg_shift; - req->port = 0; - return req->iomem_base ? 0 : 1; -} - /* Register the serial port(s) of a PCI card. */ static int __devinit serial_register (struct pci_dev *dev, const struct pci_device_id *id) { - struct pci_board_no_ids *board; struct parport_serial_private *priv = pci_get_drvdata (dev); - struct serial_struct serial_req; - int base_baud; - int k; - int success = 0; - - priv->ser = pci_boards[id->driver_data]; - board = &priv->ser; - if (board->init_fn && ((board->init_fn) (dev, board, 1) != 0)) - return 1; - - base_baud = board->base_baud; - if (!base_baud) - base_baud = BASE_BAUD; - memset (&serial_req, 0, sizeof (serial_req)); - - for (k = 0; k < board->num_ports; k++) { - int line; + struct pciserial_board *board; + struct serial_private *serial; - if (priv->num_ser == ARRAY_SIZE (priv->line)) { - printk (KERN_WARNING - "parport_serial: %s: only %u serial lines " - "supported (%d reported)\n", pci_name (dev), - ARRAY_SIZE (priv->line), board->num_ports); - break; - } + board = &pci_parport_serial_boards[id->driver_data]; + serial = pciserial_init_ports(dev, board); - serial_req.irq = dev->irq; - if (get_pci_port (dev, board, &serial_req, k)) - break; - serial_req.flags = ASYNC_SKIP_TEST | ASYNC_AUTOPROBE; - serial_req.baud_base = base_baud; - line = register_serial (&serial_req); - if (line < 0) { - printk (KERN_DEBUG - "parport_serial: register_serial failed\n"); - continue; - } - priv->line[priv->num_ser++] = line; - success = 1; - } + if (IS_ERR(serial)) + return PTR_ERR(serial); - return success ? 0 : 1; + priv->serial = serial; + return 0; } /* Register the parallel port(s) of a PCI card. */ @@ -411,7 +371,7 @@ static int __devinit parport_serial_pci_probe (struct pci_dev *dev, priv = kmalloc (sizeof *priv, GFP_KERNEL); if (!priv) return -ENOMEM; - priv->num_ser = priv->num_par = 0; + memset(priv, 0, sizeof(struct parport_serial_private)); pci_set_drvdata (dev, priv); err = pci_enable_device (dev); @@ -444,15 +404,12 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev) struct parport_serial_private *priv = pci_get_drvdata (dev); int i; + pci_set_drvdata(dev, NULL); + // Serial ports - for (i = 0; i < priv->num_ser; i++) { - unregister_serial (priv->line[i]); + if (priv->serial) + pciserial_remove_ports(priv->serial); - if (priv->ser.init_fn) - (priv->ser.init_fn) (dev, &priv->ser, 0); - } - pci_set_drvdata (dev, NULL); - // Parallel ports for (i = 0; i < priv->num_par; i++) parport_pc_unregister_port (priv->port[i]); @@ -461,11 +418,47 @@ static void __devexit parport_serial_pci_remove (struct pci_dev *dev) return; } +static int parport_serial_pci_suspend(struct pci_dev *dev, pm_message_t state) +{ + struct parport_serial_private *priv = pci_get_drvdata(dev); + + if (priv->serial) + pciserial_suspend_ports(priv->serial); + + /* FIXME: What about parport? */ + + pci_save_state(dev); + pci_set_power_state(dev, pci_choose_state(dev, state)); + return 0; +} + +static int parport_serial_pci_resume(struct pci_dev *dev) +{ + struct parport_serial_private *priv = pci_get_drvdata(dev); + + pci_set_power_state(dev, PCI_D0); + pci_restore_state(dev); + + /* + * The device may have been disabled. Re-enable it. + */ + pci_enable_device(dev); + + if (priv->serial) + pciserial_resume_ports(priv->serial); + + /* FIXME: What about parport? */ + + return 0; +} + static struct pci_driver parport_serial_pci_driver = { .name = "parport_serial", .id_table = parport_serial_pci_tbl, .probe = parport_serial_pci_probe, .remove = __devexit_p(parport_serial_pci_remove), + .suspend = parport_serial_pci_suspend, + .resume = parport_serial_pci_resume, }; diff --git a/drivers/serial/8250_pci.c b/drivers/serial/8250_pci.c index 4e9084edfc7e..52b0a0558ed4 100644 --- a/drivers/serial/8250_pci.c +++ b/drivers/serial/8250_pci.c @@ -46,7 +46,7 @@ struct pci_serial_quirk { u32 subdevice; int (*init)(struct pci_dev *dev); int (*setup)(struct serial_private *, struct pciserial_board *, - struct uart_port *port, int idx); + struct uart_port *, int); void (*exit)(struct pci_dev *dev); }; @@ -436,25 +436,6 @@ static int pci_siig_init(struct pci_dev *dev) return -ENODEV; } -int pci_siig10x_fn(struct pci_dev *dev, int enable) -{ - int ret = 0; - if (enable) - ret = pci_siig10x_init(dev); - return ret; -} - -int pci_siig20x_fn(struct pci_dev *dev, int enable) -{ - int ret = 0; - if (enable) - ret = pci_siig20x_init(dev); - return ret; -} - -EXPORT_SYMBOL(pci_siig10x_fn); -EXPORT_SYMBOL(pci_siig20x_fn); - /* * Timedia has an explosion of boards, and to avoid the PCI table from * growing *huge*, we use this function to collapse some 70 entries diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 192c0ff7a774..3209dd46ea7d 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -35,6 +35,3 @@ pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); void pciserial_remove_ports(struct serial_private *priv); void pciserial_suspend_ports(struct serial_private *priv); void pciserial_resume_ports(struct serial_private *priv); - -int pci_siig10x_fn(struct pci_dev *dev, int enable); -int pci_siig20x_fn(struct pci_dev *dev, int enable); diff --git a/include/linux/serialP.h b/include/linux/serialP.h index 2b2f35a64d75..2b9e6b9554d5 100644 --- a/include/linux/serialP.h +++ b/include/linux/serialP.h @@ -140,44 +140,4 @@ struct rs_multiport_struct { #define ALPHA_KLUDGE_MCR 0 #endif -/* - * Definitions for PCI support. - */ -#define SPCI_FL_BASE_MASK 0x0007 -#define SPCI_FL_BASE0 0x0000 -#define SPCI_FL_BASE1 0x0001 -#define SPCI_FL_BASE2 0x0002 -#define SPCI_FL_BASE3 0x0003 -#define SPCI_FL_BASE4 0x0004 -#define SPCI_FL_GET_BASE(x) (x & SPCI_FL_BASE_MASK) - -#define SPCI_FL_IRQ_MASK (0x0007 << 4) -#define SPCI_FL_IRQBASE0 (0x0000 << 4) -#define SPCI_FL_IRQBASE1 (0x0001 << 4) -#define SPCI_FL_IRQBASE2 (0x0002 << 4) -#define SPCI_FL_IRQBASE3 (0x0003 << 4) -#define SPCI_FL_IRQBASE4 (0x0004 << 4) -#define SPCI_FL_GET_IRQBASE(x) ((x & SPCI_FL_IRQ_MASK) >> 4) - -/* Use successive BARs (PCI base address registers), - else use offset into some specified BAR */ -#define SPCI_FL_BASE_TABLE 0x0100 - -/* Use successive entries in the irq resource table */ -#define SPCI_FL_IRQ_TABLE 0x0200 - -/* Use the irq resource table instead of dev->irq */ -#define SPCI_FL_IRQRESOURCE 0x0400 - -/* Use the Base address register size to cap number of ports */ -#define SPCI_FL_REGION_SZ_CAP 0x0800 - -/* Do not use irq sharing for this device */ -#define SPCI_FL_NO_SHIRQ 0x1000 - -/* This is a PNP device */ -#define SPCI_FL_ISPNP 0x2000 - -#define SPCI_FL_PNPDEFAULT (SPCI_FL_IRQRESOURCE|SPCI_FL_ISPNP) - #endif /* _LINUX_SERIAL_H */ -- cgit v1.2.3 From 00db8189d984d6c51226dafbbe4a667ce9b7d5da Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Sat, 30 Jul 2005 19:31:23 -0400 Subject: This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- Documentation/networking/phy.txt | 288 +++++++++++++ drivers/net/Kconfig | 2 + drivers/net/Makefile | 1 + drivers/net/phy/Kconfig | 57 +++ drivers/net/phy/Makefile | 9 + drivers/net/phy/cicada.c | 134 ++++++ drivers/net/phy/davicom.c | 195 +++++++++ drivers/net/phy/lxt.c | 179 ++++++++ drivers/net/phy/marvell.c | 140 +++++++ drivers/net/phy/mdio_bus.c | 173 ++++++++ drivers/net/phy/phy.c | 862 +++++++++++++++++++++++++++++++++++++++ drivers/net/phy/phy.c.orig | 860 ++++++++++++++++++++++++++++++++++++++ drivers/net/phy/phy_device.c | 682 +++++++++++++++++++++++++++++++ drivers/net/phy/qsemi.c | 143 +++++++ include/linux/ethtool.h | 4 + include/linux/mii.h | 9 +- include/linux/phy.h | 378 +++++++++++++++++ 17 files changed, 4115 insertions(+), 1 deletion(-) create mode 100644 Documentation/networking/phy.txt create mode 100644 drivers/net/phy/Kconfig create mode 100644 drivers/net/phy/Makefile create mode 100644 drivers/net/phy/cicada.c create mode 100644 drivers/net/phy/davicom.c create mode 100644 drivers/net/phy/lxt.c create mode 100644 drivers/net/phy/marvell.c create mode 100644 drivers/net/phy/mdio_bus.c create mode 100644 drivers/net/phy/phy.c create mode 100644 drivers/net/phy/phy.c.orig create mode 100644 drivers/net/phy/phy_device.c create mode 100644 drivers/net/phy/qsemi.c create mode 100644 include/linux/phy.h (limited to 'include/linux') diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt new file mode 100644 index 000000000000..29ccae409031 --- /dev/null +++ b/Documentation/networking/phy.txt @@ -0,0 +1,288 @@ + +------- +PHY Abstraction Layer +(Updated 2005-07-21) + +Purpose + + Most network devices consist of set of registers which provide an interface + to a MAC layer, which communicates with the physical connection through a + PHY. The PHY concerns itself with negotiating link parameters with the link + partner on the other side of the network connection (typically, an ethernet + cable), and provides a register interface to allow drivers to determine what + settings were chosen, and to configure what settings are allowed. + + While these devices are distinct from the network devices, and conform to a + standard layout for the registers, it has been common practice to integrate + the PHY management code with the network driver. This has resulted in large + amounts of redundant code. Also, on embedded systems with multiple (and + sometimes quite different) ethernet controllers connected to the same + management bus, it is difficult to ensure safe use of the bus. + + Since the PHYs are devices, and the management busses through which they are + accessed are, in fact, busses, the PHY Abstraction Layer treats them as such. + In doing so, it has these goals: + + 1) Increase code-reuse + 2) Increase overall code-maintainability + 3) Speed development time for new network drivers, and for new systems + + Basically, this layer is meant to provide an interface to PHY devices which + allows network driver writers to write as little code as possible, while + still providing a full feature set. + +The MDIO bus + + Most network devices are connected to a PHY by means of a management bus. + Different devices use different busses (though some share common interfaces). + In order to take advantage of the PAL, each bus interface needs to be + registered as a distinct device. + + 1) read and write functions must be implemented. Their prototypes are: + + int write(struct mii_bus *bus, int mii_id, int regnum, u16 value); + int read(struct mii_bus *bus, int mii_id, int regnum); + + mii_id is the address on the bus for the PHY, and regnum is the register + number. These functions are guaranteed not to be called from interrupt + time, so it is safe for them to block, waiting for an interrupt to signal + the operation is complete + + 2) A reset function is necessary. This is used to return the bus to an + initialized state. + + 3) A probe function is needed. This function should set up anything the bus + driver needs, setup the mii_bus structure, and register with the PAL using + mdiobus_register. Similarly, there's a remove function to undo all of + that (use mdiobus_unregister). + + 4) Like any driver, the device_driver structure must be configured, and init + exit functions are used to register the driver. + + 5) The bus must also be declared somewhere as a device, and registered. + + As an example for how one driver implemented an mdio bus driver, see + drivers/net/gianfar_mii.c and arch/ppc/syslib/mpc85xx_devices.c + +Connecting to a PHY + + Sometime during startup, the network driver needs to establish a connection + between the PHY device, and the network device. At this time, the PHY's bus + and drivers need to all have been loaded, so it is ready for the connection. + At this point, there are several ways to connect to the PHY: + + 1) The PAL handles everything, and only calls the network driver when + the link state changes, so it can react. + + 2) The PAL handles everything except interrupts (usually because the + controller has the interrupt registers). + + 3) The PAL handles everything, but checks in with the driver every second, + allowing the network driver to react first to any changes before the PAL + does. + + 4) The PAL serves only as a library of functions, with the network device + manually calling functions to update status, and configure the PHY + + +Letting the PHY Abstraction Layer do Everything + + If you choose option 1 (The hope is that every driver can, but to still be + useful to drivers that can't), connecting to the PHY is simple: + + First, you need a function to react to changes in the link state. This + function follows this protocol: + + static void adjust_link(struct net_device *dev); + + Next, you need to know the device name of the PHY connected to this device. + The name will look something like, "phy0:0", where the first number is the + bus id, and the second is the PHY's address on that bus. + + Now, to connect, just call this function: + + phydev = phy_connect(dev, phy_name, &adjust_link, flags); + + phydev is a pointer to the phy_device structure which represents the PHY. If + phy_connect is successful, it will return the pointer. dev, here, is the + pointer to your net_device. Once done, this function will have started the + PHY's software state machine, and registered for the PHY's interrupt, if it + has one. The phydev structure will be populated with information about the + current state, though the PHY will not yet be truly operational at this + point. + + flags is a u32 which can optionally contain phy-specific flags. + This is useful if the system has put hardware restrictions on + the PHY/controller, of which the PHY needs to be aware. + + Now just make sure that phydev->supported and phydev->advertising have any + values pruned from them which don't make sense for your controller (a 10/100 + controller may be connected to a gigabit capable PHY, so you would need to + mask off SUPPORTED_1000baseT*). See include/linux/ethtool.h for definitions + for these bitfields. Note that you should not SET any bits, or the PHY may + get put into an unsupported state. + + Lastly, once the controller is ready to handle network traffic, you call + phy_start(phydev). This tells the PAL that you are ready, and configures the + PHY to connect to the network. If you want to handle your own interrupts, + just set phydev->irq to PHY_IGNORE_INTERRUPT before you call phy_start. + Similarly, if you don't want to use interrupts, set phydev->irq to PHY_POLL. + + When you want to disconnect from the network (even if just briefly), you call + phy_stop(phydev). + +Keeping Close Tabs on the PAL + + It is possible that the PAL's built-in state machine needs a little help to + keep your network device and the PHY properly in sync. If so, you can + register a helper function when connecting to the PHY, which will be called + every second before the state machine reacts to any changes. To do this, you + need to manually call phy_attach() and phy_prepare_link(), and then call + phy_start_machine() with the second argument set to point to your special + handler. + + Currently there are no examples of how to use this functionality, and testing + on it has been limited because the author does not have any drivers which use + it (they all use option 1). So Caveat Emptor. + +Doing it all yourself + + There's a remote chance that the PAL's built-in state machine cannot track + the complex interactions between the PHY and your network device. If this is + so, you can simply call phy_attach(), and not call phy_start_machine or + phy_prepare_link(). This will mean that phydev->state is entirely yours to + handle (phy_start and phy_stop toggle between some of the states, so you + might need to avoid them). + + An effort has been made to make sure that useful functionality can be + accessed without the state-machine running, and most of these functions are + descended from functions which did not interact with a complex state-machine. + However, again, no effort has been made so far to test running without the + state machine, so tryer beware. + + Here is a brief rundown of the functions: + + int phy_read(struct phy_device *phydev, u16 regnum); + int phy_write(struct phy_device *phydev, u16 regnum, u16 val); + + Simple read/write primitives. They invoke the bus's read/write function + pointers. + + void phy_print_status(struct phy_device *phydev); + + A convenience function to print out the PHY status neatly. + + int phy_clear_interrupt(struct phy_device *phydev); + int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); + + Clear the PHY's interrupt, and configure which ones are allowed, + respectively. Currently only supports all on, or all off. + + int phy_enable_interrupts(struct phy_device *phydev); + int phy_disable_interrupts(struct phy_device *phydev); + + Functions which enable/disable PHY interrupts, clearing them + before and after, respectively. + + int phy_start_interrupts(struct phy_device *phydev); + int phy_stop_interrupts(struct phy_device *phydev); + + Requests the IRQ for the PHY interrupts, then enables them for + start, or disables then frees them for stop. + + struct phy_device * phy_attach(struct net_device *dev, const char *phy_id, + u32 flags); + + Attaches a network device to a particular PHY, binding the PHY to a generic + driver if none was found during bus initialization. Passes in + any phy-specific flags as needed. + + int phy_start_aneg(struct phy_device *phydev); + + Using variables inside the phydev structure, either configures advertising + and resets autonegotiation, or disables autonegotiation, and configures + forced settings. + + static inline int phy_read_status(struct phy_device *phydev); + + Fills the phydev structure with up-to-date information about the current + settings in the PHY. + + void phy_sanitize_settings(struct phy_device *phydev) + + Resolves differences between currently desired settings, and + supported settings for the given PHY device. Does not make + the changes in the hardware, though. + + int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); + int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); + + Ethtool convenience functions. + + int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); + + The MII ioctl. Note that this function will completely screw up the state + machine if you write registers like BMCR, BMSR, ADVERTISE, etc. Best to + use this only to write registers which are not standard, and don't set off + a renegotiation. + + +PHY Device Drivers + + With the PHY Abstraction Layer, adding support for new PHYs is + quite easy. In some cases, no work is required at all! However, + many PHYs require a little hand-holding to get up-and-running. + +Generic PHY driver + + If the desired PHY doesn't have any errata, quirks, or special + features you want to support, then it may be best to not add + support, and let the PHY Abstraction Layer's Generic PHY Driver + do all of the work. + +Writing a PHY driver + + If you do need to write a PHY driver, the first thing to do is + make sure it can be matched with an appropriate PHY device. + This is done during bus initialization by reading the device's + UID (stored in registers 2 and 3), then comparing it to each + driver's phy_id field by ANDing it with each driver's + phy_id_mask field. Also, it needs a name. Here's an example: + + static struct phy_driver dm9161_driver = { + .phy_id = 0x0181b880, + .name = "Davicom DM9161E", + .phy_id_mask = 0x0ffffff0, + ... + } + + Next, you need to specify what features (speed, duplex, autoneg, + etc) your PHY device and driver support. Most PHYs support + PHY_BASIC_FEATURES, but you can look in include/mii.h for other + features. + + Each driver consists of a number of function pointers: + + config_init: configures PHY into a sane state after a reset. + For instance, a Davicom PHY requires descrambling disabled. + probe: Does any setup needed by the driver + suspend/resume: power management + config_aneg: Changes the speed/duplex/negotiation settings + read_status: Reads the current speed/duplex/negotiation settings + ack_interrupt: Clear a pending interrupt + config_intr: Enable or disable interrupts + remove: Does any driver take-down + + Of these, only config_aneg and read_status are required to be + assigned by the driver code. The rest are optional. Also, it is + preferred to use the generic phy driver's versions of these two + functions if at all possible: genphy_read_status and + genphy_config_aneg. If this is not possible, it is likely that + you only need to perform some actions before and after invoking + these functions, and so your functions will wrap the generic + ones. + + Feel free to look at the Marvell, Cicada, and Davicom drivers in + drivers/net/phy/ for examples (the lxt and qsemi drivers have + not been tested as of this writing) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 8a835eb58808..1e50b8e32add 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -131,6 +131,8 @@ config NET_SB1000 source "drivers/net/arcnet/Kconfig" +source "drivers/net/phy/Kconfig" + # # Ethernet # diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 63c6d1e6d4d9..a369ae284a9a 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_ADAPTEC_STARFIRE) += starfire.o # obj-$(CONFIG_MII) += mii.o +obj-$(CONFIG_PHYLIB) += phy/ obj-$(CONFIG_SUNDANCE) += sundance.o obj-$(CONFIG_HAMACHI) += hamachi.o diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig new file mode 100644 index 000000000000..8b5db2343cc3 --- /dev/null +++ b/drivers/net/phy/Kconfig @@ -0,0 +1,57 @@ +# +# PHY Layer Configuration +# + +menu "PHY device support" + +config PHYLIB + bool "PHY Device support and infrastructure" + depends on NET_ETHERNET + help + Ethernet controllers are usually attached to PHY + devices. This option provides infrastructure for + managing PHY devices. + +config PHYCONTROL + bool "Support for automatically handling PHY state changes" + depends on PHYLIB + help + Adds code to perform all the work for keeping PHY link + state (speed/duplex/etc) up-to-date. Also handles + interrupts. + +comment "MII PHY device drivers" + depends on PHYLIB + +config MARVELL_PHY + bool "Drivers for Marvell PHYs" + depends on PHYLIB + ---help--- + Currently has a driver for the 88E1011S + +config DAVICOM_PHY + bool "Drivers for Davicom PHYs" + depends on PHYLIB + ---help--- + Currently supports dm9161e and dm9131 + +config QSEMI_PHY + bool "Drivers for Quality Semiconductor PHYs" + depends on PHYLIB + ---help--- + Currently supports the qs6612 + +config LXT_PHY + bool "Drivers for the Intel LXT PHYs" + depends on PHYLIB + ---help--- + Currently supports the lxt970, lxt971 + +config CICADA_PHY + bool "Drivers for the Cicada PHYs" + depends on PHYLIB + ---help--- + Currently supports the cis8204 + +endmenu + diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile new file mode 100644 index 000000000000..1af05de6ced0 --- /dev/null +++ b/drivers/net/phy/Makefile @@ -0,0 +1,9 @@ +# Makefile for Linux PHY drivers + +obj-$(CONFIG_PHYLIB) += phy.o phy_device.o mdio_bus.o + +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c new file mode 100644 index 000000000000..c47fb2ecd147 --- /dev/null +++ b/drivers/net/phy/cicada.c @@ -0,0 +1,134 @@ +/* + * drivers/net/phy/cicada.c + * + * Driver for Cicada PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Cicada Extended Control Register 1 */ +#define MII_CIS8201_EXT_CON1 0x17 +#define MII_CIS8201_EXTCON1_INIT 0x0000 + +/* Cicada Interrupt Mask Register */ +#define MII_CIS8201_IMASK 0x19 +#define MII_CIS8201_IMASK_IEN 0x8000 +#define MII_CIS8201_IMASK_SPEED 0x4000 +#define MII_CIS8201_IMASK_LINK 0x2000 +#define MII_CIS8201_IMASK_DUPLEX 0x1000 +#define MII_CIS8201_IMASK_MASK 0xf000 + +/* Cicada Interrupt Status Register */ +#define MII_CIS8201_ISTAT 0x1a +#define MII_CIS8201_ISTAT_STATUS 0x8000 +#define MII_CIS8201_ISTAT_SPEED 0x4000 +#define MII_CIS8201_ISTAT_LINK 0x2000 +#define MII_CIS8201_ISTAT_DUPLEX 0x1000 + +/* Cicada Auxiliary Control/Status Register */ +#define MII_CIS8201_AUX_CONSTAT 0x1c +#define MII_CIS8201_AUXCONSTAT_INIT 0x0004 +#define MII_CIS8201_AUXCONSTAT_DUPLEX 0x0020 +#define MII_CIS8201_AUXCONSTAT_SPEED 0x0018 +#define MII_CIS8201_AUXCONSTAT_GBIT 0x0010 +#define MII_CIS8201_AUXCONSTAT_100 0x0008 + +MODULE_DESCRIPTION("Cicadia PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int cis820x_config_init(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_CIS8201_AUX_CONSTAT, + MII_CIS8201_AUXCONSTAT_INIT); + + if (err < 0) + return err; + + err = phy_write(phydev, MII_CIS8201_EXT_CON1, + MII_CIS8201_EXTCON1_INIT); + + return err; +} + +static int cis820x_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_CIS8201_ISTAT); + + return (err < 0) ? err : 0; +} + +static int cis820x_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_CIS8201_IMASK, + MII_CIS8201_IMASK_MASK); + else + err = phy_write(phydev, MII_CIS8201_IMASK, 0); + + return err; +} + +/* Cicada 820x */ +static struct phy_driver cis8204_driver = { + .phy_id = 0x000fc440, + .name = "Cicada Cis8204", + .phy_id_mask = 0x000fffc0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = &cis820x_config_init, + .config_aneg = &genphy_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &cis820x_ack_interrupt, + .config_intr = &cis820x_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init cis8204_init(void) +{ + return phy_driver_register(&cis8204_driver); +} + +static void __exit cis8204_exit(void) +{ + phy_driver_unregister(&cis8204_driver); +} + +module_init(cis8204_init); +module_exit(cis8204_exit); diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c new file mode 100644 index 000000000000..6caf499fae32 --- /dev/null +++ b/drivers/net/phy/davicom.c @@ -0,0 +1,195 @@ +/* + * drivers/net/phy/davicom.c + * + * Driver for Davicom PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MII_DM9161_SCR 0x10 +#define MII_DM9161_SCR_INIT 0x0610 + +/* DM9161 Interrupt Register */ +#define MII_DM9161_INTR 0x15 +#define MII_DM9161_INTR_PEND 0x8000 +#define MII_DM9161_INTR_DPLX_MASK 0x0800 +#define MII_DM9161_INTR_SPD_MASK 0x0400 +#define MII_DM9161_INTR_LINK_MASK 0x0200 +#define MII_DM9161_INTR_MASK 0x0100 +#define MII_DM9161_INTR_DPLX_CHANGE 0x0010 +#define MII_DM9161_INTR_SPD_CHANGE 0x0008 +#define MII_DM9161_INTR_LINK_CHANGE 0x0004 +#define MII_DM9161_INTR_INIT 0x0000 +#define MII_DM9161_INTR_STOP \ +(MII_DM9161_INTR_DPLX_MASK | MII_DM9161_INTR_SPD_MASK \ + | MII_DM9161_INTR_LINK_MASK | MII_DM9161_INTR_MASK) + +/* DM9161 10BT Configuration/Status */ +#define MII_DM9161_10BTCSR 0x12 +#define MII_DM9161_10BTCSR_INIT 0x7800 + +MODULE_DESCRIPTION("Davicom PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + + +#define DM9161_DELAY 1 +static int dm9161_config_intr(struct phy_device *phydev) +{ + int temp; + + temp = phy_read(phydev, MII_DM9161_INTR); + + if (temp < 0) + return temp; + + if(PHY_INTERRUPT_ENABLED == phydev->interrupts ) + temp &= ~(MII_DM9161_INTR_STOP); + else + temp |= MII_DM9161_INTR_STOP; + + temp = phy_write(phydev, MII_DM9161_INTR, temp); + + return temp; +} + +static int dm9161_config_aneg(struct phy_device *phydev) +{ + int err; + + /* Isolate the PHY */ + err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE); + + if (err < 0) + return err; + + /* Configure the new settings */ + err = genphy_config_aneg(phydev); + + if (err < 0) + return err; + + return 0; +} + +static int dm9161_config_init(struct phy_device *phydev) +{ + int err; + + /* Isolate the PHY */ + err = phy_write(phydev, MII_BMCR, BMCR_ISOLATE); + + if (err < 0) + return err; + + /* Do not bypass the scrambler/descrambler */ + err = phy_write(phydev, MII_DM9161_SCR, MII_DM9161_SCR_INIT); + + if (err < 0) + return err; + + /* Clear 10BTCSR to default */ + err = phy_write(phydev, MII_DM9161_10BTCSR, MII_DM9161_10BTCSR_INIT); + + if (err < 0) + return err; + + /* Reconnect the PHY, and enable Autonegotiation */ + err = phy_write(phydev, MII_BMCR, BMCR_ANENABLE); + + if (err < 0) + return err; + + return 0; +} + +static int dm9161_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_DM9161_INTR); + + return (err < 0) ? err : 0; +} + +static struct phy_driver dm9161_driver = { + .phy_id = 0x0181b880, + .name = "Davicom DM9161E", + .phy_id_mask = 0x0ffffff0, + .features = PHY_BASIC_FEATURES, + .config_init = dm9161_config_init, + .config_aneg = dm9161_config_aneg, + .read_status = genphy_read_status, + .driver = { .owner = THIS_MODULE,}, +}; + +static struct phy_driver dm9131_driver = { + .phy_id = 0x00181b80, + .name = "Davicom DM9131", + .phy_id_mask = 0x0ffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = dm9161_ack_interrupt, + .config_intr = dm9161_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init davicom_init(void) +{ + int ret; + + ret = phy_driver_register(&dm9161_driver); + if (ret) + goto err1; + + ret = phy_driver_register(&dm9131_driver); + if (ret) + goto err2; + return 0; + + err2: + phy_driver_unregister(&dm9161_driver); + err1: + return ret; +} + +static void __exit davicom_exit(void) +{ + phy_driver_unregister(&dm9161_driver); + phy_driver_unregister(&dm9131_driver); +} + +module_init(davicom_init); +module_exit(davicom_exit); diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c new file mode 100644 index 000000000000..4c840448ec86 --- /dev/null +++ b/drivers/net/phy/lxt.c @@ -0,0 +1,179 @@ +/* + * drivers/net/phy/lxt.c + * + * Driver for Intel LXT PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* The Level one LXT970 is used by many boards */ + +#define MII_LXT970_IER 17 /* Interrupt Enable Register */ + +#define MII_LXT970_IER_IEN 0x0002 + +#define MII_LXT970_ISR 18 /* Interrupt Status Register */ + +#define MII_LXT970_CONFIG 19 /* Configuration Register */ + +/* ------------------------------------------------------------------------- */ +/* The Level one LXT971 is used on some of my custom boards */ + +/* register definitions for the 971 */ +#define MII_LXT971_IER 18 /* Interrupt Enable Register */ +#define MII_LXT971_IER_IEN 0x00f2 + +#define MII_LXT971_ISR 19 /* Interrupt Status Register */ + + +MODULE_DESCRIPTION("Intel LXT PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int lxt970_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, MII_BMSR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_LXT970_ISR); + + if (err < 0) + return err; + + return 0; +} + +static int lxt970_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_LXT970_IER, MII_LXT970_IER_IEN); + else + err = phy_write(phydev, MII_LXT970_IER, 0); + + return err; +} + +static int lxt970_config_init(struct phy_device *phydev) +{ + int err; + + err = phy_write(phydev, MII_LXT970_CONFIG, 0); + + return err; +} + + +static int lxt971_ack_interrupt(struct phy_device *phydev) +{ + int err = phy_read(phydev, MII_LXT971_ISR); + + if (err < 0) + return err; + + return 0; +} + +static int lxt971_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_LXT971_IER, MII_LXT971_IER_IEN); + else + err = phy_write(phydev, MII_LXT971_IER, 0); + + return err; +} + +static struct phy_driver lxt970_driver = { + .phy_id = 0x07810000, + .name = "LXT970", + .phy_id_mask = 0x0fffffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = lxt970_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = lxt970_ack_interrupt, + .config_intr = lxt970_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static struct phy_driver lxt971_driver = { + .phy_id = 0x0001378e, + .name = "LXT971", + .phy_id_mask = 0x0fffffff, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = lxt971_ack_interrupt, + .config_intr = lxt971_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init lxt_init(void) +{ + int ret; + + ret = phy_driver_register(&lxt970_driver); + if (ret) + goto err1; + + ret = phy_driver_register(&lxt971_driver); + if (ret) + goto err2; + return 0; + + err2: + phy_driver_unregister(&lxt970_driver); + err1: + return ret; +} + +static void __exit lxt_exit(void) +{ + phy_driver_unregister(&lxt970_driver); + phy_driver_unregister(&lxt971_driver); +} + +module_init(lxt_init); +module_exit(lxt_exit); diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c new file mode 100644 index 000000000000..4a72b025006b --- /dev/null +++ b/drivers/net/phy/marvell.c @@ -0,0 +1,140 @@ +/* + * drivers/net/phy/marvell.c + * + * Driver for Marvell PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define MII_M1011_IEVENT 0x13 +#define MII_M1011_IEVENT_CLEAR 0x0000 + +#define MII_M1011_IMASK 0x12 +#define MII_M1011_IMASK_INIT 0x6400 +#define MII_M1011_IMASK_CLEAR 0x0000 + +MODULE_DESCRIPTION("Marvell PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +static int marvell_ack_interrupt(struct phy_device *phydev) +{ + int err; + + /* Clear the interrupts by reading the reg */ + err = phy_read(phydev, MII_M1011_IEVENT); + + if (err < 0) + return err; + + return 0; +} + +static int marvell_config_intr(struct phy_device *phydev) +{ + int err; + + if(phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_INIT); + else + err = phy_write(phydev, MII_M1011_IMASK, MII_M1011_IMASK_CLEAR); + + return err; +} + +static int marvell_config_aneg(struct phy_device *phydev) +{ + int err; + + /* The Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x1f); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x200c); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1d, 0x5); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0); + if (err < 0) + return err; + + err = phy_write(phydev, 0x1e, 0x100); + if (err < 0) + return err; + + + err = genphy_config_aneg(phydev); + + return err; +} + + +static struct phy_driver m88e1101_driver = { + .phy_id = 0x01410c00, + .phy_id_mask = 0xffffff00, + .name = "Marvell 88E1101", + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_aneg = &marvell_config_aneg, + .read_status = &genphy_read_status, + .ack_interrupt = &marvell_ack_interrupt, + .config_intr = &marvell_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init marvell_init(void) +{ + return phy_driver_register(&m88e1101_driver); +} + +static void __exit marvell_exit(void) +{ + phy_driver_unregister(&m88e1101_driver); +} + +module_init(marvell_init); +module_exit(marvell_exit); diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c new file mode 100644 index 000000000000..e75103ba6f86 --- /dev/null +++ b/drivers/net/phy/mdio_bus.c @@ -0,0 +1,173 @@ +/* + * drivers/net/phy/mdio_bus.c + * + * MDIO Bus interface + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* mdiobus_register + * + * description: Called by a bus driver to bring up all the PHYs + * on a given bus, and attach them to the bus + */ +int mdiobus_register(struct mii_bus *bus) +{ + int i; + int err = 0; + + spin_lock_init(&bus->mdio_lock); + + if (NULL == bus || NULL == bus->name || + NULL == bus->read || + NULL == bus->write) + return -EINVAL; + + if (bus->reset) + bus->reset(bus); + + for (i = 0; i < PHY_MAX_ADDR; i++) { + struct phy_device *phydev; + + phydev = get_phy_device(bus, i); + + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + + /* There's a PHY at this address + * We need to set: + * 1) IRQ + * 2) bus_id + * 3) parent + * 4) bus + * 5) mii_bus + * And, we need to register it */ + if (phydev) { + phydev->irq = bus->irq[i]; + + phydev->dev.parent = bus->dev; + phydev->dev.bus = &mdio_bus_type; + sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); + + phydev->bus = bus; + + err = device_register(&phydev->dev); + + if (err) + printk(KERN_ERR "phy %d failed to register\n", + i); + } + + bus->phy_map[i] = phydev; + } + + pr_info("%s: probed\n", bus->name); + + return err; +} +EXPORT_SYMBOL(mdiobus_register); + +void mdiobus_unregister(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if (bus->phy_map[i]) { + device_unregister(&bus->phy_map[i]->dev); + kfree(bus->phy_map[i]); + } + } +} +EXPORT_SYMBOL(mdiobus_unregister); + +/* mdio_bus_match + * + * description: Given a PHY device, and a PHY driver, return 1 if + * the driver supports the device. Otherwise, return 0 + */ +static int mdio_bus_match(struct device *dev, struct device_driver *drv) +{ + struct phy_device *phydev = to_phy_device(dev); + struct phy_driver *phydrv = to_phy_driver(drv); + + return (phydrv->phy_id == (phydev->phy_id & phydrv->phy_id_mask)); +} + +/* Suspend and resume. Copied from platform_suspend and + * platform_resume + */ +static int mdio_bus_suspend(struct device * dev, u32 state) +{ + int ret = 0; + struct device_driver *drv = dev->driver; + + if (drv && drv->suspend) { + ret = drv->suspend(dev, state, SUSPEND_DISABLE); + if (ret == 0) + ret = drv->suspend(dev, state, SUSPEND_SAVE_STATE); + if (ret == 0) + ret = drv->suspend(dev, state, SUSPEND_POWER_DOWN); + } + return ret; +} + +static int mdio_bus_resume(struct device * dev) +{ + int ret = 0; + struct device_driver *drv = dev->driver; + + if (drv && drv->resume) { + ret = drv->resume(dev, RESUME_POWER_ON); + if (ret == 0) + ret = drv->resume(dev, RESUME_RESTORE_STATE); + if (ret == 0) + ret = drv->resume(dev, RESUME_ENABLE); + } + return ret; +} + +struct bus_type mdio_bus_type = { + .name = "mdio_bus", + .match = mdio_bus_match, + .suspend = mdio_bus_suspend, + .resume = mdio_bus_resume, +}; + +static int __init mdio_bus_init(void) +{ + return bus_register(&mdio_bus_type); +} + +subsys_initcall(mdio_bus_init); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c new file mode 100644 index 000000000000..e2c6896b92d2 --- /dev/null +++ b/drivers/net/phy/phy.c @@ -0,0 +1,862 @@ +/* + * drivers/net/phy/phy.c + * + * Framework for configuring and reading PHY devices + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static void phy_change(void *data); +static void phy_timer(unsigned long data); + +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); + + +/* Convenience functions for reading/writing a given PHY + * register. They MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. */ +int phy_read(struct phy_device *phydev, u16 regnum) +{ + int retval; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + retval = bus->read(bus, phydev->addr, regnum); + spin_unlock_bh(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(phy_read); + +int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + int err; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + err = bus->write(bus, phydev->addr, regnum, val); + spin_unlock_bh(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(phy_write); + + +int phy_clear_interrupt(struct phy_device *phydev) +{ + int err = 0; + + if (phydev->drv->ack_interrupt) + err = phydev->drv->ack_interrupt(phydev); + + return err; +} + + +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) +{ + int err = 0; + + phydev->interrupts = interrupts; + if (phydev->drv->config_intr) + err = phydev->drv->config_intr(phydev); + + return err; +} + + +/* phy_aneg_done + * + * description: Reads the status register and returns 0 either if + * auto-negotiation is incomplete, or if there was an error. + * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + */ +static inline int phy_aneg_done(struct phy_device *phydev) +{ + int retval; + + retval = phy_read(phydev, MII_BMSR); + + return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); +} + +/* phy_start_aneg + * + * description: Calls the PHY driver's config_aneg, and then + * sets the PHY state to PHY_AN if auto-negotiation is enabled, + * and to PHY_FORCING if auto-negotiation is disabled. Unless + * the PHY is currently HALTED. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value */ +struct phy_setting { + int speed; + int duplex; + u32 setting; +}; + +/* A mapping of all SUPPORTED settings to speed/duplex */ +static struct phy_setting settings[] = { + { + .speed = 10000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_1000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_1000baseT_Half, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_100baseT_Full, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_100baseT_Half, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10baseT_Full, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_10baseT_Half, + }, +}; + +#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) + +/* phy_find_setting + * + * description: Searches the settings array for the setting which + * matches the desired speed and duplex, and returns the index + * of that setting. Returns the index of the last setting if + * none of the others match. + */ +static inline int phy_find_setting(int speed, int duplex) +{ + int idx = 0; + + while (idx < ARRAY_SIZE(settings) && + (settings[idx].speed != speed || + settings[idx].duplex != duplex)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_find_valid + * idx: The first index in settings[] to search + * features: A mask of the valid settings + * + * description: Returns the index of the first valid setting less + * than or equal to the one pointed to by idx, as determined by + * the mask in features. Returns the index of the last setting + * if nothing else matches. + */ +static inline int phy_find_valid(int idx, u32 features) +{ + while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_sanitize_settings + * + * description: Make sure the PHY is set to supported speeds and + * duplexes. Drop down by one in this order: 1000/FULL, + * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF + */ +void phy_sanitize_settings(struct phy_device *phydev) +{ + u32 features = phydev->supported; + int idx; + + /* Sanitize settings based on PHY capabilities */ + if ((features & SUPPORTED_Autoneg) == 0) + phydev->autoneg = 0; + + idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), + features); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; +} +EXPORT_SYMBOL(phy_sanitize_settings); + +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + +/* phy_ethtool_sset: + * A generic ethtool sset function. Handles all the details + * + * A few notes about parameter checking: + * - We don't set port or transceiver, so we don't care what they + * were set to. + * - phy_start_aneg() will make sure forced settings are sane, and + * choose the next best ones from the ones selected, so we don't + * care if ethtool tries to give us bad values + */ +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + if (cmd->phy_address != phydev->addr) + return -EINVAL; + + /* We make sure that we don't pass unsupported + * values in to the PHY */ + cmd->advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_DISABLE + && ((cmd->speed != SPEED_1000 + && cmd->speed != SPEED_100 + && cmd->speed != SPEED_10) + || (cmd->duplex != DUPLEX_HALF + && cmd->duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = cmd->autoneg; + + phydev->speed = cmd->speed; + + phydev->advertising = cmd->advertising; + + if (AUTONEG_ENABLE == cmd->autoneg) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = cmd->duplex; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} + +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + cmd->supported = phydev->supported; + + cmd->advertising = phydev->advertising; + + cmd->speed = phydev->speed; + cmd->duplex = phydev->duplex; + cmd->port = PORT_MII; + cmd->phy_address = phydev->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = phydev->autoneg; + + return 0; +} + + +/* Note that this function is currently incompatible with the + * PHYCONTROL layer. It changes registers without regard to + * current state. Use at own risk + */ +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd) +{ + u16 val = mii_data->val_in; + + switch (cmd) { + case SIOCGMIIPHY: + mii_data->phy_id = phydev->addr; + break; + case SIOCGMIIREG: + mii_data->val_out = phy_read(phydev, mii_data->reg_num); + break; + + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == phydev->addr) { + switch(mii_data->reg_num) { + case MII_BMCR: + if (val & (BMCR_RESET|BMCR_ANENABLE)) + phydev->autoneg = AUTONEG_DISABLE; + else + phydev->autoneg = AUTONEG_ENABLE; + if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + break; + case MII_ADVERTISE: + phydev->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + phy_write(phydev, mii_data->reg_num, val); + + if (mii_data->reg_num == MII_BMCR + && val & BMCR_RESET + && phydev->drv->config_init) + phydev->drv->config_init(phydev); + break; + } + + return 0; +} + +/* phy_start_machine: + * + * description: The PHY infrastructure can run a state machine + * which tracks whether the PHY is starting up, negotiating, + * etc. This function starts the timer which tracks the state + * of the PHY. If you want to be notified when the state + * changes, pass in the callback, otherwise, pass NULL. If you + * want to maintain your own state machine, do not call this + * function. */ +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_state = handler; + + init_timer(&phydev->phy_timer); + phydev->phy_timer.function = &phy_timer; + phydev->phy_timer.data = (unsigned long) phydev; + mod_timer(&phydev->phy_timer, jiffies + HZ); +} + +/* phy_stop_machine + * + * description: Stops the state machine timer, sets the state to + * UP (unless it wasn't up yet), and then frees the interrupt, + * if it is in use. This function must be called BEFORE + * phy_detach. + */ +void phy_stop_machine(struct phy_device *phydev) +{ + del_timer_sync(&phydev->phy_timer); + + spin_lock(&phydev->lock); + if (phydev->state > PHY_UP) + phydev->state = PHY_UP; + spin_unlock(&phydev->lock); + + if (phydev->irq != PHY_POLL) + phy_stop_interrupts(phydev); + + phydev->adjust_state = NULL; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_error: + * + * Moves the PHY to the HALTED state in response to a read + * or write error, and tells the controller the link is down. + * Must not be called from interrupt context, or while the + * phydev->lock is held. + */ +void phy_error(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + phydev->state = PHY_HALTED; + spin_unlock(&phydev->lock); +} + +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_clear_interrupt(phydev); + + if (err < 0) + return err; + + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + return err; +} +EXPORT_SYMBOL(phy_enable_interrupts); + +/* Disable the PHY interrupts from the PHY side */ +int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} +EXPORT_SYMBOL(phy_disable_interrupts); + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); + +/* PHY timer which handles the state machine */ +static void phy_timer(unsigned long data) +{ + struct phy_device *phydev = (struct phy_device *)data; + int needs_aneg = 0; + int err = 0; + + spin_lock(&phydev->lock); + + if (phydev->adjust_state) + phydev->adjust_state(phydev->attached_dev); + + switch(phydev->state) { + case PHY_DOWN: + case PHY_STARTING: + case PHY_READY: + case PHY_PENDING: + break; + case PHY_UP: + needs_aneg = 1; + + phydev->link_timeout = PHY_AN_TIMEOUT; + + break; + case PHY_AN: + /* Check if negotiation is done. Break + * if there's an error */ + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* If auto-negotiation is done, we change to + * either RUNNING, or NOLINK */ + if (err > 0) { + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + } else if (0 == phydev->link_timeout--) { + /* The counter expired, so either we + * switch to forced mode, or the + * magic_aneg bit exists, and we try aneg + * again */ + if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { + int idx; + + /* We'll start from the + * fastest speed, and work + * our way down */ + idx = phy_find_valid(0, + phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + phydev->autoneg = AUTONEG_DISABLE; + phydev->state = PHY_FORCING; + phydev->link_timeout = + PHY_FORCE_TIMEOUT; + + pr_info("Trying %d/%s\n", + phydev->speed, + DUPLEX_FULL == + phydev->duplex ? + "FULL" : "HALF"); + } + + needs_aneg = 1; + } + break; + case PHY_NOLINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_FORCING: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + if (0 == phydev->link_timeout--) { + phy_force_reduction(phydev); + needs_aneg = 1; + } + } + + phydev->adjust_link(phydev->attached_dev); + break; + case PHY_RUNNING: + /* Only register a CHANGE if we are + * polling */ + if (PHY_POLL == phydev->irq) + phydev->state = PHY_CHANGELINK; + break; + case PHY_CHANGELINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + if (PHY_POLL != phydev->irq) + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + break; + case PHY_HALTED: + if (phydev->link) { + phydev->link = 0; + netif_carrier_off(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_RESUMING: + + err = phy_clear_interrupt(phydev); + + if (err) + break; + + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + + if (err) + break; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* err > 0 if AN is done. + * Otherwise, it's 0, and we're + * still waiting for AN */ + if (err > 0) { + phydev->state = PHY_RUNNING; + } else { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } + } else + phydev->state = PHY_RUNNING; + break; + } + + spin_unlock(&phydev->lock); + + if (needs_aneg) + err = phy_start_aneg(phydev); + + if (err < 0) + phy_error(phydev); + + mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); +} + +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy.c.orig b/drivers/net/phy/phy.c.orig new file mode 100644 index 000000000000..6af17cec9ace --- /dev/null +++ b/drivers/net/phy/phy.c.orig @@ -0,0 +1,860 @@ +/* + * drivers/net/phy/phy.c + * + * Framework for configuring and reading PHY devices + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static void phy_change(void *data); +static void phy_timer(unsigned long data); + +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); + + +/* Convenience functions for reading/writing a given PHY + * register. They MUST NOT be called from interrupt context, + * because the bus read/write functions may wait for an interrupt + * to conclude the operation. */ +int phy_read(struct phy_device *phydev, u16 regnum) +{ + int retval; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + retval = bus->read(bus, phydev->addr, regnum); + spin_unlock_bh(&bus->mdio_lock); + + return retval; +} +EXPORT_SYMBOL(phy_read); + +int phy_write(struct phy_device *phydev, u16 regnum, u16 val) +{ + int err; + struct mii_bus *bus = phydev->bus; + + spin_lock_bh(&bus->mdio_lock); + err = bus->write(bus, phydev->addr, regnum, val); + spin_unlock_bh(&bus->mdio_lock); + + return err; +} +EXPORT_SYMBOL(phy_write); + + +int phy_clear_interrupt(struct phy_device *phydev) +{ + int err = 0; + + if (phydev->drv->ack_interrupt) + err = phydev->drv->ack_interrupt(phydev); + + return err; +} + + +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) +{ + int err = 0; + + phydev->interrupts = interrupts; + if (phydev->drv->config_intr) + err = phydev->drv->config_intr(phydev); + + return err; +} + + +/* phy_aneg_done + * + * description: Reads the status register and returns 0 either if + * auto-negotiation is incomplete, or if there was an error. + * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. + */ +static inline int phy_aneg_done(struct phy_device *phydev) +{ + int retval; + + retval = phy_read(phydev, MII_BMSR); + + return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); +} + +/* phy_start_aneg + * + * description: Calls the PHY driver's config_aneg, and then + * sets the PHY state to PHY_AN if auto-negotiation is enabled, + * and to PHY_FORCING if auto-negotiation is disabled. Unless + * the PHY is currently HALTED. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +/* A structure for mapping a particular speed and duplex + * combination to a particular SUPPORTED and ADVERTISED value */ +struct phy_setting { + int speed; + int duplex; + u32 setting; +}; + +/* A mapping of all SUPPORTED settings to speed/duplex */ +static struct phy_setting settings[] = { + { + .speed = 10000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_1000baseT_Full, + }, + { + .speed = SPEED_1000, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_1000baseT_Half, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_100baseT_Full, + }, + { + .speed = SPEED_100, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_100baseT_Half, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_FULL, + .setting = SUPPORTED_10baseT_Full, + }, + { + .speed = SPEED_10, + .duplex = DUPLEX_HALF, + .setting = SUPPORTED_10baseT_Half, + }, +}; + +#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) + +/* phy_find_setting + * + * description: Searches the settings array for the setting which + * matches the desired speed and duplex, and returns the index + * of that setting. Returns the index of the last setting if + * none of the others match. + */ +static inline int phy_find_setting(int speed, int duplex) +{ + int idx = 0; + + while (idx < ARRAY_SIZE(settings) && + (settings[idx].speed != speed || + settings[idx].duplex != duplex)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_find_valid + * idx: The first index in settings[] to search + * features: A mask of the valid settings + * + * description: Returns the index of the first valid setting less + * than or equal to the one pointed to by idx, as determined by + * the mask in features. Returns the index of the last setting + * if nothing else matches. + */ +static inline int phy_find_valid(int idx, u32 features) +{ + while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) + idx++; + + return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; +} + +/* phy_sanitize_settings + * + * description: Make sure the PHY is set to supported speeds and + * duplexes. Drop down by one in this order: 1000/FULL, + * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF + */ +void phy_sanitize_settings(struct phy_device *phydev) +{ + u32 features = phydev->supported; + int idx; + + /* Sanitize settings based on PHY capabilities */ + if ((features & SUPPORTED_Autoneg) == 0) + phydev->autoneg = 0; + + idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), + features); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; +} +EXPORT_SYMBOL(phy_sanitize_settings); + +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + +/* phy_ethtool_sset: + * A generic ethtool sset function. Handles all the details + * + * A few notes about parameter checking: + * - We don't set port or transceiver, so we don't care what they + * were set to. + * - phy_start_aneg() will make sure forced settings are sane, and + * choose the next best ones from the ones selected, so we don't + * care if ethtool tries to give us bad values + */ +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + if (cmd->phy_address != phydev->addr) + return -EINVAL; + + /* We make sure that we don't pass unsupported + * values in to the PHY */ + cmd->advertising &= phydev->supported; + + /* Verify the settings we care about. */ + if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) + return -EINVAL; + + if (cmd->autoneg == AUTONEG_DISABLE + && ((cmd->speed != SPEED_1000 + && cmd->speed != SPEED_100 + && cmd->speed != SPEED_10) + || (cmd->duplex != DUPLEX_HALF + && cmd->duplex != DUPLEX_FULL))) + return -EINVAL; + + phydev->autoneg = cmd->autoneg; + + phydev->speed = cmd->speed; + + phydev->advertising = cmd->advertising; + + if (AUTONEG_ENABLE == cmd->autoneg) + phydev->advertising |= ADVERTISED_Autoneg; + else + phydev->advertising &= ~ADVERTISED_Autoneg; + + phydev->duplex = cmd->duplex; + + /* Restart the PHY */ + phy_start_aneg(phydev); + + return 0; +} + +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) +{ + cmd->supported = phydev->supported; + + cmd->advertising = phydev->advertising; + + cmd->speed = phydev->speed; + cmd->duplex = phydev->duplex; + cmd->port = PORT_MII; + cmd->phy_address = phydev->addr; + cmd->transceiver = XCVR_EXTERNAL; + cmd->autoneg = phydev->autoneg; + + return 0; +} + + +/* Note that this function is currently incompatible with the + * PHYCONTROL layer. It changes registers without regard to + * current state. Use at own risk + */ +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd) +{ + u16 val = mii_data->val_in; + + switch (cmd) { + case SIOCGMIIPHY: + mii_data->phy_id = phydev->addr; + break; + case SIOCGMIIREG: + mii_data->val_out = phy_read(phydev, mii_data->reg_num); + break; + + case SIOCSMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == phydev->addr) { + switch(mii_data->reg_num) { + case MII_BMCR: + if (val & (BMCR_RESET|BMCR_ANENABLE)) + phydev->autoneg = AUTONEG_DISABLE; + else + phydev->autoneg = AUTONEG_ENABLE; + if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + break; + case MII_ADVERTISE: + phydev->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + phy_write(phydev, mii_data->reg_num, val); + + if (mii_data->reg_num == MII_BMCR + && val & BMCR_RESET + && phydev->drv->config_init) + phydev->drv->config_init(phydev); + break; + } + + return 0; +} + +/* phy_start_machine: + * + * description: The PHY infrastructure can run a state machine + * which tracks whether the PHY is starting up, negotiating, + * etc. This function starts the timer which tracks the state + * of the PHY. If you want to be notified when the state + * changes, pass in the callback, otherwise, pass NULL. If you + * want to maintain your own state machine, do not call this + * function. */ +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_state = handler; + + init_timer(&phydev->phy_timer); + phydev->phy_timer.function = &phy_timer; + phydev->phy_timer.data = (unsigned long) phydev; + mod_timer(&phydev->phy_timer, jiffies + HZ); +} + +/* phy_stop_machine + * + * description: Stops the state machine timer, sets the state to + * UP (unless it wasn't up yet), and then frees the interrupt, + * if it is in use. This function must be called BEFORE + * phy_detach. + */ +void phy_stop_machine(struct phy_device *phydev) +{ + del_timer_sync(&phydev->phy_timer); + + spin_lock(&phydev->lock); + if (phydev->state > PHY_UP) + phydev->state = PHY_UP; + spin_unlock(&phydev->lock); + + if (phydev->irq != PHY_POLL) + phy_stop_interrupts(phydev); + + phydev->adjust_state = NULL; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_error: + * + * Moves the PHY to the HALTED state in response to a read + * or write error, and tells the controller the link is down. + * Must not be called from interrupt context, or while the + * phydev->lock is held. + */ +void phy_error(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + phydev->state = PHY_HALTED; + spin_unlock(&phydev->lock); +} + +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_clear_interrupt(phydev); + + if (err < 0) + return err; + + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + return err; +} + +/* Disable the PHY interrupts from the PHY side */ +int phy_disable_interrupts(struct phy_device *phydev) +{ + int err; + + /* Disable PHY interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + + if (err) + goto phy_err; + + /* Clear the interrupt */ + err = phy_clear_interrupt(phydev); + + if (err) + goto phy_err; + + return 0; + +phy_err: + phy_error(phydev); + + return err; +} + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); + +/* PHY timer which handles the state machine */ +static void phy_timer(unsigned long data) +{ + struct phy_device *phydev = (struct phy_device *)data; + int needs_aneg = 0; + int err = 0; + + spin_lock(&phydev->lock); + + if (phydev->adjust_state) + phydev->adjust_state(phydev->attached_dev); + + switch(phydev->state) { + case PHY_DOWN: + case PHY_STARTING: + case PHY_READY: + case PHY_PENDING: + break; + case PHY_UP: + needs_aneg = 1; + + phydev->link_timeout = PHY_AN_TIMEOUT; + + break; + case PHY_AN: + /* Check if negotiation is done. Break + * if there's an error */ + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* If auto-negotiation is done, we change to + * either RUNNING, or NOLINK */ + if (err > 0) { + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + } else if (0 == phydev->link_timeout--) { + /* The counter expired, so either we + * switch to forced mode, or the + * magic_aneg bit exists, and we try aneg + * again */ + if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { + int idx; + + /* We'll start from the + * fastest speed, and work + * our way down */ + idx = phy_find_valid(0, + phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + phydev->autoneg = AUTONEG_DISABLE; + phydev->state = PHY_FORCING; + phydev->link_timeout = + PHY_FORCE_TIMEOUT; + + pr_info("Trying %d/%s\n", + phydev->speed, + DUPLEX_FULL == + phydev->duplex ? + "FULL" : "HALF"); + } + + needs_aneg = 1; + } + break; + case PHY_NOLINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_FORCING: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + if (0 == phydev->link_timeout--) { + phy_force_reduction(phydev); + needs_aneg = 1; + } + } + + phydev->adjust_link(phydev->attached_dev); + break; + case PHY_RUNNING: + /* Only register a CHANGE if we are + * polling */ + if (PHY_POLL == phydev->irq) + phydev->state = PHY_CHANGELINK; + break; + case PHY_CHANGELINK: + err = phy_read_status(phydev); + + if (err) + break; + + if (phydev->link) { + phydev->state = PHY_RUNNING; + netif_carrier_on(phydev->attached_dev); + } else { + phydev->state = PHY_NOLINK; + netif_carrier_off(phydev->attached_dev); + } + + phydev->adjust_link(phydev->attached_dev); + + if (PHY_POLL != phydev->irq) + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + break; + case PHY_HALTED: + if (phydev->link) { + phydev->link = 0; + netif_carrier_off(phydev->attached_dev); + phydev->adjust_link(phydev->attached_dev); + } + break; + case PHY_RESUMING: + + err = phy_clear_interrupt(phydev); + + if (err) + break; + + err = phy_config_interrupt(phydev, + PHY_INTERRUPT_ENABLED); + + if (err) + break; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = phy_aneg_done(phydev); + if (err < 0) + break; + + /* err > 0 if AN is done. + * Otherwise, it's 0, and we're + * still waiting for AN */ + if (err > 0) { + phydev->state = PHY_RUNNING; + } else { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } + } else + phydev->state = PHY_RUNNING; + break; + } + + spin_unlock(&phydev->lock); + + if (needs_aneg) + err = phy_start_aneg(phydev); + + if (err < 0) + phy_error(phydev); + + mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); +} + +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c new file mode 100644 index 000000000000..f0595af4c837 --- /dev/null +++ b/drivers/net/phy/phy_device.c @@ -0,0 +1,682 @@ +/* + * drivers/net/phy/phy_device.c + * + * Framework for finding and configuring PHYs. + * Also contains generic PHY driver + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* get_phy_device + * + * description: Reads the ID registers of the PHY at addr on the + * bus, then allocates and returns the phy_device to + * represent it. + */ +struct phy_device * get_phy_device(struct mii_bus *bus, int addr) +{ + int phy_reg; + u32 phy_id; + struct phy_device *dev = NULL; + + /* Grab the bits from PHYIR1, and put them + * in the upper half */ + phy_reg = bus->read(bus, addr, MII_PHYSID1); + + if (phy_reg < 0) + return ERR_PTR(phy_reg); + + phy_id = (phy_reg & 0xffff) << 16; + + /* Grab the bits from PHYIR2, and put them in the lower half */ + phy_reg = bus->read(bus, addr, MII_PHYSID2); + + if (phy_reg < 0) + return ERR_PTR(phy_reg); + + phy_id |= (phy_reg & 0xffff); + + /* If the phy_id is all Fs, there is no device there */ + if (0xffffffff == phy_id) + return NULL; + + /* Otherwise, we allocate the device, and initialize the + * default values */ + dev = kcalloc(1, sizeof(*dev), GFP_KERNEL); + + if (NULL == dev) + return ERR_PTR(-ENOMEM); + + dev->speed = 0; + dev->duplex = -1; + dev->pause = dev->asym_pause = 0; + dev->link = 1; + + dev->autoneg = AUTONEG_ENABLE; + + dev->addr = addr; + dev->phy_id = phy_id; + dev->bus = bus; + + dev->state = PHY_DOWN; + + spin_lock_init(&dev->lock); + + return dev; +} + +/* phy_prepare_link: + * + * description: Tells the PHY infrastructure to handle the + * gory details on monitoring link status (whether through + * polling or an interrupt), and to call back to the + * connected device driver when the link status changes. + * If you want to monitor your own link state, don't call + * this function */ +void phy_prepare_link(struct phy_device *phydev, + void (*handler)(struct net_device *)) +{ + phydev->adjust_link = handler; +} + +#ifdef CONFIG_PHYCONTROL +/* phy_connect: + * + * description: Convenience function for connecting ethernet + * devices to PHY devices. The default behavior is for + * the PHY infrastructure to handle everything, and only notify + * the connected driver when the link status changes. If you + * don't want, or can't use the provided functionality, you may + * choose to call only the subset of functions which provide + * the desired functionality. + */ +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags) +{ + struct phy_device *phydev; + + phydev = phy_attach(dev, phy_id, flags); + + if (IS_ERR(phydev)) + return phydev; + + phy_prepare_link(phydev, handler); + + phy_start_machine(phydev, NULL); + + if (phydev->irq > 0) + phy_start_interrupts(phydev); + + return phydev; +} +EXPORT_SYMBOL(phy_connect); + +void phy_disconnect(struct phy_device *phydev) +{ + if (phydev->irq > 0) + phy_stop_interrupts(phydev); + + phy_stop_machine(phydev); + + phydev->adjust_link = NULL; + + phy_detach(phydev); +} +EXPORT_SYMBOL(phy_disconnect); + +#endif /* CONFIG_PHYCONTROL */ + +/* phy_attach: + * + * description: Called by drivers to attach to a particular PHY + * device. The phy_device is found, and properly hooked up + * to the phy_driver. If no driver is attached, then the + * genphy_driver is used. The phy_device is given a ptr to + * the attaching device, and given a callback for link status + * change. The phy_device is returned to the attaching + * driver. + */ +static int phy_compare_id(struct device *dev, void *data) +{ + return strcmp((char *)data, dev->bus_id) ? 0 : 1; +} + +struct phy_device *phy_attach(struct net_device *dev, + const char *phy_id, u32 flags) +{ + struct bus_type *bus = &mdio_bus_type; + struct phy_device *phydev; + struct device *d; + + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); + + if (d) { + phydev = to_phy_device(d); + } else { + printk(KERN_ERR "%s not found\n", phy_id); + return ERR_PTR(-ENODEV); + } + + /* Assume that if there is no driver, that it doesn't + * exist, and we should use the genphy driver. */ + if (NULL == d->driver) { + int err; + down_write(&d->bus->subsys.rwsem); + d->driver = &genphy_driver.driver; + + err = d->driver->probe(d); + + if (err < 0) + return ERR_PTR(err); + + device_bind_driver(d); + up_write(&d->bus->subsys.rwsem); + } + + if (phydev->attached_dev) { + printk(KERN_ERR "%s: %s already attached\n", + dev->name, phy_id); + return ERR_PTR(-EBUSY); + } + + phydev->attached_dev = dev; + + phydev->dev_flags = flags; + + return phydev; +} +EXPORT_SYMBOL(phy_attach); + +void phy_detach(struct phy_device *phydev) +{ + phydev->attached_dev = NULL; + + /* If the device had no specific driver before (i.e. - it + * was using the generic driver), we unbind the device + * from the generic driver so that there's a chance a + * real driver could be loaded */ + if (phydev->dev.driver == &genphy_driver.driver) { + down_write(&phydev->dev.bus->subsys.rwsem); + device_release_driver(&phydev->dev); + up_write(&phydev->dev.bus->subsys.rwsem); + } +} +EXPORT_SYMBOL(phy_detach); + + +/* Generic PHY support and helper functions */ + +/* genphy_config_advert + * + * description: Writes MII_ADVERTISE with the appropriate values, + * after sanitizing the values to make sure we only advertise + * what is supported + */ +int genphy_config_advert(struct phy_device *phydev) +{ + u32 advertise; + int adv; + int err; + + /* Only allow advertising what + * this PHY supports */ + phydev->advertising &= phydev->supported; + advertise = phydev->advertising; + + /* Setup standard advertisement */ + adv = phy_read(phydev, MII_ADVERTISE); + + if (adv < 0) + return adv; + + adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | + ADVERTISE_PAUSE_ASYM); + if (advertise & ADVERTISED_10baseT_Half) + adv |= ADVERTISE_10HALF; + if (advertise & ADVERTISED_10baseT_Full) + adv |= ADVERTISE_10FULL; + if (advertise & ADVERTISED_100baseT_Half) + adv |= ADVERTISE_100HALF; + if (advertise & ADVERTISED_100baseT_Full) + adv |= ADVERTISE_100FULL; + if (advertise & ADVERTISED_Pause) + adv |= ADVERTISE_PAUSE_CAP; + if (advertise & ADVERTISED_Asym_Pause) + adv |= ADVERTISE_PAUSE_ASYM; + + err = phy_write(phydev, MII_ADVERTISE, adv); + + if (err < 0) + return err; + + /* Configure gigabit if it's supported */ + if (phydev->supported & (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full)) { + adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + + adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF); + if (advertise & SUPPORTED_1000baseT_Half) + adv |= ADVERTISE_1000HALF; + if (advertise & SUPPORTED_1000baseT_Full) + adv |= ADVERTISE_1000FULL; + err = phy_write(phydev, MII_CTRL1000, adv); + + if (err < 0) + return err; + } + + return adv; +} +EXPORT_SYMBOL(genphy_config_advert); + +/* genphy_setup_forced + * + * description: Configures MII_BMCR to force speed/duplex + * to the values in phydev. Assumes that the values are valid. + * Please see phy_sanitize_settings() */ +int genphy_setup_forced(struct phy_device *phydev) +{ + int ctl = BMCR_RESET; + + phydev->pause = phydev->asym_pause = 0; + + if (SPEED_1000 == phydev->speed) + ctl |= BMCR_SPEED1000; + else if (SPEED_100 == phydev->speed) + ctl |= BMCR_SPEED100; + + if (DUPLEX_FULL == phydev->duplex) + ctl |= BMCR_FULLDPLX; + + ctl = phy_write(phydev, MII_BMCR, ctl); + + if (ctl < 0) + return ctl; + + /* We just reset the device, so we'd better configure any + * settings the PHY requires to operate */ + if (phydev->drv->config_init) + ctl = phydev->drv->config_init(phydev); + + return ctl; +} + + +/* Enable and Restart Autonegotiation */ +int genphy_restart_aneg(struct phy_device *phydev) +{ + int ctl; + + ctl = phy_read(phydev, MII_BMCR); + + if (ctl < 0) + return ctl; + + ctl |= (BMCR_ANENABLE | BMCR_ANRESTART); + + /* Don't isolate the PHY if we're negotiating */ + ctl &= ~(BMCR_ISOLATE); + + ctl = phy_write(phydev, MII_BMCR, ctl); + + return ctl; +} + + +/* genphy_config_aneg + * + * description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we write the BMCR + */ +int genphy_config_aneg(struct phy_device *phydev) +{ + int err = 0; + + if (AUTONEG_ENABLE == phydev->autoneg) { + err = genphy_config_advert(phydev); + + if (err < 0) + return err; + + err = genphy_restart_aneg(phydev); + } else + err = genphy_setup_forced(phydev); + + return err; +} +EXPORT_SYMBOL(genphy_config_aneg); + +/* genphy_update_link + * + * description: Update the value in phydev->link to reflect the + * current link value. In order to do this, we need to read + * the status register twice, keeping the second value + */ +int genphy_update_link(struct phy_device *phydev) +{ + int status; + + /* Do a fake read */ + status = phy_read(phydev, MII_BMSR); + + if (status < 0) + return status; + + /* Read link and autonegotiation status */ + status = phy_read(phydev, MII_BMSR); + + if (status < 0) + return status; + + if ((status & BMSR_LSTATUS) == 0) + phydev->link = 0; + else + phydev->link = 1; + + return 0; +} + +/* genphy_read_status + * + * description: Check the link, then figure out the current state + * by comparing what we advertise with what the link partner + * advertises. Start by checking the gigabit possibilities, + * then move on to 10/100. + */ +int genphy_read_status(struct phy_device *phydev) +{ + int adv; + int err; + int lpa; + int lpagb = 0; + + /* Update the link, but return if there + * was an error */ + err = genphy_update_link(phydev); + if (err) + return err; + + if (AUTONEG_ENABLE == phydev->autoneg) { + if (phydev->supported & (SUPPORTED_1000baseT_Half + | SUPPORTED_1000baseT_Full)) { + lpagb = phy_read(phydev, MII_STAT1000); + + if (lpagb < 0) + return lpagb; + + adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + + lpagb &= adv << 2; + } + + lpa = phy_read(phydev, MII_LPA); + + if (lpa < 0) + return lpa; + + adv = phy_read(phydev, MII_ADVERTISE); + + if (adv < 0) + return adv; + + lpa &= adv; + + phydev->speed = SPEED_10; + phydev->duplex = DUPLEX_HALF; + phydev->pause = phydev->asym_pause = 0; + + if (lpagb & (LPA_1000FULL | LPA_1000HALF)) { + phydev->speed = SPEED_1000; + + if (lpagb & LPA_1000FULL) + phydev->duplex = DUPLEX_FULL; + } else if (lpa & (LPA_100FULL | LPA_100HALF)) { + phydev->speed = SPEED_100; + + if (lpa & LPA_100FULL) + phydev->duplex = DUPLEX_FULL; + } else + if (lpa & LPA_10FULL) + phydev->duplex = DUPLEX_FULL; + + if (phydev->duplex == DUPLEX_FULL){ + phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + int bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + if (bmcr & BMCR_FULLDPLX) + phydev->duplex = DUPLEX_FULL; + else + phydev->duplex = DUPLEX_HALF; + + if (bmcr & BMCR_SPEED1000) + phydev->speed = SPEED_1000; + else if (bmcr & BMCR_SPEED100) + phydev->speed = SPEED_100; + else + phydev->speed = SPEED_10; + + phydev->pause = phydev->asym_pause = 0; + } + + return 0; +} +EXPORT_SYMBOL(genphy_read_status); + +static int genphy_config_init(struct phy_device *phydev) +{ + u32 val; + u32 features; + + /* For now, I'll claim that the generic driver supports + * all possible port types */ + features = (SUPPORTED_TP | SUPPORTED_MII + | SUPPORTED_AUI | SUPPORTED_FIBRE | + SUPPORTED_BNC); + + /* Do we support autonegotiation? */ + val = phy_read(phydev, MII_BMSR); + + if (val < 0) + return val; + + if (val & BMSR_ANEGCAPABLE) + features |= SUPPORTED_Autoneg; + + if (val & BMSR_100FULL) + features |= SUPPORTED_100baseT_Full; + if (val & BMSR_100HALF) + features |= SUPPORTED_100baseT_Half; + if (val & BMSR_10FULL) + features |= SUPPORTED_10baseT_Full; + if (val & BMSR_10HALF) + features |= SUPPORTED_10baseT_Half; + + if (val & BMSR_ESTATEN) { + val = phy_read(phydev, MII_ESTATUS); + + if (val < 0) + return val; + + if (val & ESTATUS_1000_TFULL) + features |= SUPPORTED_1000baseT_Full; + if (val & ESTATUS_1000_THALF) + features |= SUPPORTED_1000baseT_Half; + } + + phydev->supported = features; + phydev->advertising = features; + + return 0; +} + + +/* phy_probe + * + * description: Take care of setting up the phy_device structure, + * set the state to READY (the driver's init function should + * set it to STARTING if needed). + */ +static int phy_probe(struct device *dev) +{ + struct phy_device *phydev; + struct phy_driver *phydrv; + struct device_driver *drv; + int err = 0; + + phydev = to_phy_device(dev); + + /* Make sure the driver is held. + * XXX -- Is this correct? */ + drv = get_driver(phydev->dev.driver); + phydrv = to_phy_driver(drv); + phydev->drv = phydrv; + + /* Disable the interrupt if the PHY doesn't support it */ + if (!(phydrv->flags & PHY_HAS_INTERRUPT)) + phydev->irq = PHY_POLL; + + spin_lock(&phydev->lock); + + /* Start out supporting everything. Eventually, + * a controller will attach, and may modify one + * or both of these values */ + phydev->supported = phydrv->features; + phydev->advertising = phydrv->features; + + /* Set the state to READY by default */ + phydev->state = PHY_READY; + + if (phydev->drv->probe) + err = phydev->drv->probe(phydev); + + spin_unlock(&phydev->lock); + + if (err < 0) + return err; + + if (phydev->drv->config_init) + err = phydev->drv->config_init(phydev); + + return err; +} + +static int phy_remove(struct device *dev) +{ + struct phy_device *phydev; + + phydev = to_phy_device(dev); + + spin_lock(&phydev->lock); + phydev->state = PHY_DOWN; + spin_unlock(&phydev->lock); + + if (phydev->drv->remove) + phydev->drv->remove(phydev); + + put_driver(dev->driver); + phydev->drv = NULL; + + return 0; +} + +int phy_driver_register(struct phy_driver *new_driver) +{ + int retval; + + memset(&new_driver->driver, 0, sizeof(new_driver->driver)); + new_driver->driver.name = new_driver->name; + new_driver->driver.bus = &mdio_bus_type; + new_driver->driver.probe = phy_probe; + new_driver->driver.remove = phy_remove; + + retval = driver_register(&new_driver->driver); + + if (retval) { + printk(KERN_ERR "%s: Error %d in registering driver\n", + new_driver->name, retval); + + return retval; + } + + pr_info("%s: Registered new driver\n", new_driver->name); + + return 0; +} +EXPORT_SYMBOL(phy_driver_register); + +void phy_driver_unregister(struct phy_driver *drv) +{ + driver_unregister(&drv->driver); +} +EXPORT_SYMBOL(phy_driver_unregister); + +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner = THIS_MODULE, }, +}; + +static int __init genphy_init(void) +{ + return phy_driver_register(&genphy_driver); + +} + +static void __exit genphy_exit(void) +{ + phy_driver_unregister(&genphy_driver); +} + +module_init(genphy_init); +module_exit(genphy_exit); diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c new file mode 100644 index 000000000000..d461ba457631 --- /dev/null +++ b/drivers/net/phy/qsemi.c @@ -0,0 +1,143 @@ +/* + * drivers/net/phy/qsemi.c + * + * Driver for Quality Semiconductor PHYs + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* ------------------------------------------------------------------------- */ +/* The Quality Semiconductor QS6612 is used on the RPX CLLF */ + +/* register definitions */ + +#define MII_QS6612_MCR 17 /* Mode Control Register */ +#define MII_QS6612_FTR 27 /* Factory Test Register */ +#define MII_QS6612_MCO 28 /* Misc. Control Register */ +#define MII_QS6612_ISR 29 /* Interrupt Source Register */ +#define MII_QS6612_IMR 30 /* Interrupt Mask Register */ +#define MII_QS6612_IMR_INIT 0x003a +#define MII_QS6612_PCR 31 /* 100BaseTx PHY Control Reg. */ + +#define QS6612_PCR_AN_COMPLETE 0x1000 +#define QS6612_PCR_RLBEN 0x0200 +#define QS6612_PCR_DCREN 0x0100 +#define QS6612_PCR_4B5BEN 0x0040 +#define QS6612_PCR_TX_ISOLATE 0x0020 +#define QS6612_PCR_MLT3_DIS 0x0002 +#define QS6612_PCR_SCRM_DESCRM 0x0001 + +MODULE_DESCRIPTION("Quality Semiconductor PHY driver"); +MODULE_AUTHOR("Andy Fleming"); +MODULE_LICENSE("GPL"); + +/* Returns 0, unless there's a write error */ +static int qs6612_config_init(struct phy_device *phydev) +{ + /* The PHY powers up isolated on the RPX, + * so send a command to allow operation. + * XXX - My docs indicate this should be 0x0940 + * ...or something. The current value sets three + * reserved bits, bit 11, which specifies it should be + * set to one, bit 10, which specifies it should be set + * to 0, and bit 7, which doesn't specify. However, my + * docs are preliminary, and I will leave it like this + * until someone more knowledgable corrects me or it. + * -- Andy Fleming + */ + return phy_write(phydev, MII_QS6612_PCR, 0x0dc0); +} + +static int qs6612_ack_interrupt(struct phy_device *phydev) +{ + int err; + + err = phy_read(phydev, MII_QS6612_ISR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_BMSR); + + if (err < 0) + return err; + + err = phy_read(phydev, MII_EXPANSION); + + if (err < 0) + return err; + + return 0; +} + +static int qs6612_config_intr(struct phy_device *phydev) +{ + int err; + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + err = phy_write(phydev, MII_QS6612_IMR, + MII_QS6612_IMR_INIT); + else + err = phy_write(phydev, MII_QS6612_IMR, 0); + + return err; + +} + +static struct phy_driver qs6612_driver = { + .phy_id = 0x00181440, + .name = "QS6612", + .phy_id_mask = 0xfffffff0, + .features = PHY_BASIC_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .config_init = qs6612_config_init, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .ack_interrupt = qs6612_ack_interrupt, + .config_intr = qs6612_config_intr, + .driver = { .owner = THIS_MODULE,}, +}; + +static int __init qs6612_init(void) +{ + return phy_driver_register(&qs6612_driver); +} + +static void __exit qs6612_exit(void) +{ + phy_driver_unregister(&qs6612_driver); +} + +module_init(qs6612_init); +module_exit(qs6612_exit); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a0ab26aab450..d7021c391b2b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -408,6 +408,8 @@ struct ethtool_ops { #define SUPPORTED_FIBRE (1 << 10) #define SUPPORTED_BNC (1 << 11) #define SUPPORTED_10000baseT_Full (1 << 12) +#define SUPPORTED_Pause (1 << 13) +#define SUPPORTED_Asym_Pause (1 << 14) /* Indicates what features are advertised by the interface. */ #define ADVERTISED_10baseT_Half (1 << 0) @@ -423,6 +425,8 @@ struct ethtool_ops { #define ADVERTISED_FIBRE (1 << 10) #define ADVERTISED_BNC (1 << 11) #define ADVERTISED_10000baseT_Full (1 << 12) +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the diff --git a/include/linux/mii.h b/include/linux/mii.h index 374b615ea9ea..9b8d0476988a 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -22,6 +22,7 @@ #define MII_EXPANSION 0x06 /* Expansion register */ #define MII_CTRL1000 0x09 /* 1000BASE-T control */ #define MII_STAT1000 0x0a /* 1000BASE-T status */ +#define MII_ESTATUS 0x0f /* Extended Status */ #define MII_DCOUNTER 0x12 /* Disconnect counter */ #define MII_FCSCOUNTER 0x13 /* False carrier counter */ #define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */ @@ -54,7 +55,10 @@ #define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ #define BMSR_RFAULT 0x0010 /* Remote fault detected */ #define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ -#define BMSR_RESV 0x07c0 /* Unused... */ +#define BMSR_RESV 0x00c0 /* Unused... */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +#define BMSR_100FULL2 0x0200 /* Can do 100BASE-T2 HDX */ +#define BMSR_100HALF2 0x0400 /* Can do 100BASE-T2 FDX */ #define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ #define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ #define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ @@ -114,6 +118,9 @@ #define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */ #define EXPANSION_RESV 0xffe0 /* Unused... */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + /* N-way test register. */ #define NWAYTEST_RESV1 0x00ff /* Unused... */ #define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */ diff --git a/include/linux/phy.h b/include/linux/phy.h new file mode 100644 index 000000000000..3404804dc22d --- /dev/null +++ b/include/linux/phy.h @@ -0,0 +1,378 @@ +/* + * include/linux/phy.h + * + * Framework and drivers for configuring and reading different PHYs + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __PHY_H +#define __PHY_H + +#include +#include + +#define PHY_BASIC_FEATURES (SUPPORTED_10baseT_Half | \ + SUPPORTED_10baseT_Full | \ + SUPPORTED_100baseT_Half | \ + SUPPORTED_100baseT_Full | \ + SUPPORTED_Autoneg | \ + SUPPORTED_TP | \ + SUPPORTED_MII) + +#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ + SUPPORTED_1000baseT_Half | \ + SUPPORTED_1000baseT_Full) + +/* Set phydev->irq to PHY_POLL if interrupts are not supported, + * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if + * the attached driver handles the interrupt + */ +#define PHY_POLL -1 +#define PHY_IGNORE_INTERRUPT -2 + +#define PHY_HAS_INTERRUPT 0x00000001 +#define PHY_HAS_MAGICANEG 0x00000002 + +#define MII_BUS_MAX 4 + + +#define PHY_INIT_TIMEOUT 100000 +#define PHY_STATE_TIME 1 +#define PHY_FORCE_TIMEOUT 10 +#define PHY_AN_TIMEOUT 10 + +#define PHY_MAX_ADDR 32 + +/* The Bus class for PHYs. Devices which provide access to + * PHYs should register using this structure */ +struct mii_bus { + const char *name; + int id; + void *priv; + int (*read)(struct mii_bus *bus, int phy_id, int regnum); + int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); + int (*reset)(struct mii_bus *bus); + + /* A lock to ensure that only one thing can read/write + * the MDIO bus at a time */ + spinlock_t mdio_lock; + + struct device *dev; + + /* list of all PHYs on bus */ + struct phy_device *phy_map[PHY_MAX_ADDR]; + + /* Pointer to an array of interrupts, each PHY's + * interrupt at the index matching its address */ + int *irq; +}; + +#define PHY_INTERRUPT_DISABLED 0x0 +#define PHY_INTERRUPT_ENABLED 0x80000000 + +/* PHY state machine states: + * + * DOWN: PHY device and driver are not ready for anything. probe + * should be called if and only if the PHY is in this state, + * given that the PHY device exists. + * - PHY driver probe function will, depending on the PHY, set + * the state to STARTING or READY + * + * STARTING: PHY device is coming up, and the ethernet driver is + * not ready. PHY drivers may set this in the probe function. + * If they do, they are responsible for making sure the state is + * eventually set to indicate whether the PHY is UP or READY, + * depending on the state when the PHY is done starting up. + * - PHY driver will set the state to READY + * - start will set the state to PENDING + * + * READY: PHY is ready to send and receive packets, but the + * controller is not. By default, PHYs which do not implement + * probe will be set to this state by phy_probe(). If the PHY + * driver knows the PHY is ready, and the PHY state is STARTING, + * then it sets this STATE. + * - start will set the state to UP + * + * PENDING: PHY device is coming up, but the ethernet driver is + * ready. phy_start will set this state if the PHY state is + * STARTING. + * - PHY driver will set the state to UP when the PHY is ready + * + * UP: The PHY and attached device are ready to do work. + * Interrupts should be started here. + * - timer moves to AN + * + * AN: The PHY is currently negotiating the link state. Link is + * therefore down for now. phy_timer will set this state when it + * detects the state is UP. config_aneg will set this state + * whenever called with phydev->autoneg set to AUTONEG_ENABLE. + * - If autonegotiation finishes, but there's no link, it sets + * the state to NOLINK. + * - If aneg finishes with link, it sets the state to RUNNING, + * and calls adjust_link + * - If autonegotiation did not finish after an arbitrary amount + * of time, autonegotiation should be tried again if the PHY + * supports "magic" autonegotiation (back to AN) + * - If it didn't finish, and no magic_aneg, move to FORCING. + * + * NOLINK: PHY is up, but not currently plugged in. + * - If the timer notes that the link comes back, we move to RUNNING + * - config_aneg moves to AN + * - phy_stop moves to HALTED + * + * FORCING: PHY is being configured with forced settings + * - if link is up, move to RUNNING + * - If link is down, we drop to the next highest setting, and + * retry (FORCING) after a timeout + * - phy_stop moves to HALTED + * + * RUNNING: PHY is currently up, running, and possibly sending + * and/or receiving packets + * - timer will set CHANGELINK if we're polling (this ensures the + * link state is polled every other cycle of this state machine, + * which makes it every other second) + * - irq will set CHANGELINK + * - config_aneg will set AN + * - phy_stop moves to HALTED + * + * CHANGELINK: PHY experienced a change in link state + * - timer moves to RUNNING if link + * - timer moves to NOLINK if the link is down + * - phy_stop moves to HALTED + * + * HALTED: PHY is up, but no polling or interrupts are done. Or + * PHY is in an error state. + * + * - phy_start moves to RESUMING + * + * RESUMING: PHY was halted, but now wants to run again. + * - If we are forcing, or aneg is done, timer moves to RUNNING + * - If aneg is not done, timer moves to AN + * - phy_stop moves to HALTED + */ +enum phy_state { + PHY_DOWN=0, + PHY_STARTING, + PHY_READY, + PHY_PENDING, + PHY_UP, + PHY_AN, + PHY_RUNNING, + PHY_NOLINK, + PHY_FORCING, + PHY_CHANGELINK, + PHY_HALTED, + PHY_RESUMING +}; + +/* phy_device: An instance of a PHY + * + * drv: Pointer to the driver for this PHY instance + * bus: Pointer to the bus this PHY is on + * dev: driver model device structure for this PHY + * phy_id: UID for this device found during discovery + * state: state of the PHY for management purposes + * dev_flags: Device-specific flags used by the PHY driver. + * addr: Bus address of PHY + * link_timeout: The number of timer firings to wait before the + * giving up on the current attempt at acquiring a link + * irq: IRQ number of the PHY's interrupt (-1 if none) + * phy_timer: The timer for handling the state machine + * phy_queue: A work_queue for the interrupt + * attached_dev: The attached enet driver's device instance ptr + * adjust_link: Callback for the enet controller to respond to + * changes in the link state. + * adjust_state: Callback for the enet driver to respond to + * changes in the state machine. + * + * speed, duplex, pause, supported, advertising, and + * autoneg are used like in mii_if_info + * + * interrupts currently only supports enabled or disabled, + * but could be changed in the future to support enabling + * and disabling specific interrupts + * + * Contains some infrastructure for polling and interrupt + * handling, as well as handling shifts in PHY hardware state + */ +struct phy_device { + /* Information about the PHY type */ + /* And management functions */ + struct phy_driver *drv; + + struct mii_bus *bus; + + struct device dev; + + u32 phy_id; + + enum phy_state state; + + u32 dev_flags; + + /* Bus address of the PHY (0-32) */ + int addr; + + /* forced speed & duplex (no autoneg) + * partner speed & duplex & pause (autoneg) + */ + int speed; + int duplex; + int pause; + int asym_pause; + + /* The most recently read link state */ + int link; + + /* Enabled Interrupts */ + u32 interrupts; + + /* Union of PHY and Attached devices' supported modes */ + /* See mii.h for more info */ + u32 supported; + u32 advertising; + + int autoneg; + + int link_timeout; + + /* Interrupt number for this PHY + * -1 means no interrupt */ + int irq; + + /* private data pointer */ + /* For use by PHYs to maintain extra state */ + void *priv; + + /* Interrupt and Polling infrastructure */ + struct work_struct phy_queue; + struct timer_list phy_timer; + + spinlock_t lock; + + struct net_device *attached_dev; + + void (*adjust_link)(struct net_device *dev); + + void (*adjust_state)(struct net_device *dev); +}; +#define to_phy_device(d) container_of(d, struct phy_device, dev) + +/* struct phy_driver: Driver structure for a particular PHY type + * + * phy_id: The result of reading the UID registers of this PHY + * type, and ANDing them with the phy_id_mask. This driver + * only works for PHYs with IDs which match this field + * name: The friendly name of this PHY type + * phy_id_mask: Defines the important bits of the phy_id + * features: A list of features (speed, duplex, etc) supported + * by this PHY + * flags: A bitfield defining certain other features this PHY + * supports (like interrupts) + * + * The drivers must implement config_aneg and read_status. All + * other functions are optional. Note that none of these + * functions should be called from interrupt time. The goal is + * for the bus read/write functions to be able to block when the + * bus transaction is happening, and be freed up by an interrupt + * (The MPC85xx has this ability, though it is not currently + * supported in the driver). + */ +struct phy_driver { + u32 phy_id; + char *name; + unsigned int phy_id_mask; + u32 features; + u32 flags; + + /* Called to initialize the PHY, + * including after a reset */ + int (*config_init)(struct phy_device *phydev); + + /* Called during discovery. Used to set + * up device-specific structures, if any */ + int (*probe)(struct phy_device *phydev); + + /* PHY Power Management */ + int (*suspend)(struct phy_device *phydev); + int (*resume)(struct phy_device *phydev); + + /* Configures the advertisement and resets + * autonegotiation if phydev->autoneg is on, + * forces the speed to the current settings in phydev + * if phydev->autoneg is off */ + int (*config_aneg)(struct phy_device *phydev); + + /* Determines the negotiated speed and duplex */ + int (*read_status)(struct phy_device *phydev); + + /* Clears any pending interrupts */ + int (*ack_interrupt)(struct phy_device *phydev); + + /* Enables or disables interrupts */ + int (*config_intr)(struct phy_device *phydev); + + /* Clears up any memory if needed */ + void (*remove)(struct phy_device *phydev); + + struct device_driver driver; +}; +#define to_phy_driver(d) container_of(d, struct phy_driver, driver) + +int phy_read(struct phy_device *phydev, u16 regnum); +int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +struct phy_device* get_phy_device(struct mii_bus *bus, int addr); +int phy_clear_interrupt(struct phy_device *phydev); +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); + +static inline int phy_read_status(struct phy_device *phydev) { + return phydev->drv->read_status(phydev); +} + +int genphy_config_advert(struct phy_device *phydev); +int genphy_setup_forced(struct phy_device *phydev); +int genphy_restart_aneg(struct phy_device *phydev); +int genphy_config_aneg(struct phy_device *phydev); +int genphy_update_link(struct phy_device *phydev); +int genphy_read_status(struct phy_device *phydev); +void phy_driver_unregister(struct phy_driver *drv); +int phy_driver_register(struct phy_driver *new_driver); +void phy_prepare_link(struct phy_device *phydev, + void (*adjust_link)(struct net_device *)); +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)); +void phy_stop_machine(struct phy_device *phydev); +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); + +extern struct bus_type mdio_bus_type; +extern struct phy_driver genphy_driver; +#endif /* __PHY_H */ -- cgit v1.2.3 From 541134cfe7af179f45458b68421ee1da7bab9cba Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sun, 3 Jul 2005 13:44:39 +0100 Subject: [PATCH] sata_nv: Support MCP51/MCP55 device IDs This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- drivers/scsi/sata_nv.c | 21 +++++++++++++++++++-- include/linux/pci_ids.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index b0403ccd8a25..9b9142790bd6 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -20,6 +20,12 @@ * If you do not delete the provisions above, a recipient may use your * version of this file under either the OSL or the GPL. * + * 0.08 + * - Added support for MCP51 and MCP55. + * + * 0.07 + * - Added support for RAID class code. + * * 0.06 * - Added generic SATA support by using a pci_device_id that filters on * the IDE storage class code. @@ -48,7 +54,7 @@ #include #define DRV_NAME "sata_nv" -#define DRV_VERSION "0.6" +#define DRV_VERSION "0.8" #define NV_PORTS 2 #define NV_PIO_MASK 0x1f @@ -116,7 +122,9 @@ enum nv_host_type GENERIC, NFORCE2, NFORCE3, - CK804 + CK804, + MCP51, + MCP55 }; static struct pci_device_id nv_pci_tbl[] = { @@ -134,9 +142,18 @@ static struct pci_device_id nv_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CK804 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC }, + { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, + PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_STORAGE_RAID<<8, 0xffff00, GENERIC }, { 0, } /* terminate list */ }; diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bc4cc10fabe9..639291fe8ac0 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1249,6 +1249,7 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B -- cgit v1.2.3 From 8a60a07129fad60bba779a2a4038c7518b167fc7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 31 Jul 2005 13:13:24 -0400 Subject: libata: trim trailing whitespace. Also, fixup a tabs-to-spaces block of code in ata_piix. --- drivers/scsi/ata_piix.c | 14 ++--- drivers/scsi/libata-core.c | 4 +- drivers/scsi/libata.h | 2 +- drivers/scsi/sata_qstor.c | 2 +- drivers/scsi/sata_sil.c | 4 +- drivers/scsi/sata_sis.c | 2 +- drivers/scsi/sata_svw.c | 10 ++-- drivers/scsi/sata_sx4.c | 138 ++++++++++++++++++++++----------------------- drivers/scsi/sata_uli.c | 2 +- drivers/scsi/sata_via.c | 2 +- drivers/scsi/sata_vsc.c | 2 +- include/linux/libata.h | 2 +- 12 files changed, 92 insertions(+), 92 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index a2cfade2c1c6..9f1bdfbd8d0a 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -629,13 +629,13 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) port_info[1] = NULL; if (port_info[0]->host_flags & PIIX_FLAG_AHCI) { - u8 tmp; - pci_read_config_byte(pdev, PIIX_SCC, &tmp); - if (tmp == PIIX_AHCI_DEVICE) { - int rc = piix_disable_ahci(pdev); - if (rc) - return rc; - } + u8 tmp; + pci_read_config_byte(pdev, PIIX_SCC, &tmp); + if (tmp == PIIX_AHCI_DEVICE) { + int rc = piix_disable_ahci(pdev); + if (rc) + return rc; + } } if (port_info[0]->host_flags & PIIX_FLAG_COMBINED) { diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 73b1f72b7e43..6e56af23957b 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1304,12 +1304,12 @@ static inline u8 ata_dev_knobble(struct ata_port *ap) /** * ata_dev_config - Run device specific handlers and check for * SATA->PATA bridges - * @ap: Bus + * @ap: Bus * @i: Device * * LOCKING: */ - + void ata_dev_config(struct ata_port *ap, unsigned int i) { /* limit bridge transfers to udma5, 200 sectors */ diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index d90430bbb0de..91b68eedb3c9 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -72,7 +72,7 @@ extern unsigned int ata_scsiop_report_luns(struct ata_scsi_args *args, u8 *rbuf, extern void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8 asc, u8 ascq); -extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, +extern void ata_scsi_rbuf_fill(struct ata_scsi_args *args, unsigned int (*actor) (struct ata_scsi_args *args, u8 *rbuf, unsigned int buflen)); diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 1383e8a28d72..dca9ed7ac760 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -431,7 +431,7 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) continue; DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n", ap->id, qc->tf.protocol, status); - + /* complete taskfile transaction */ pp->state = qs_state_idle; ata_qc_complete(qc, status); diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 49ed557a4b66..a1b81d43b11f 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -323,13 +323,13 @@ static void sil_dev_config(struct ata_port *ap, struct ata_device *dev) while ((len > 0) && (s[len - 1] == ' ')) len--; - for (n = 0; sil_blacklist[n].product; n++) + for (n = 0; sil_blacklist[n].product; n++) if (!memcmp(sil_blacklist[n].product, s, strlen(sil_blacklist[n].product))) { quirks = sil_blacklist[n].quirk; break; } - + /* limit requests to 15 sectors */ if (quirks & SIL_QUIRK_MOD15WRITE) { printk(KERN_INFO "ata%u(%u): applying Seagate errata fix\n", diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index e418b89c6b9d..b250ae0c7773 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -234,7 +234,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) pci_read_config_dword(pdev, SIS_GENCTL, &genctl); if ((genctl & GENCTL_IOMAPPED_SCR) == 0) probe_ent->host_flags |= SIS_FLAG_CFGSCR; - + /* if hardware thinks SCRs are in IO space, but there are * no IO resources assigned, change to PCI cfg space. */ diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 858e07185dbd..6fd2ce1ffcd8 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -195,18 +195,18 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) /* start host DMA transaction */ dmactl = readb(mmio + ATA_DMA_CMD); writeb(dmactl | ATA_DMA_START, mmio + ATA_DMA_CMD); - /* There is a race condition in certain SATA controllers that can - be seen when the r/w command is given to the controller before the + /* There is a race condition in certain SATA controllers that can + be seen when the r/w command is given to the controller before the host DMA is started. On a Read command, the controller would initiate the command to the drive even before it sees the DMA start. When there - are very fast drives connected to the controller, or when the data request + are very fast drives connected to the controller, or when the data request hits in the drive cache, there is the possibility that the drive returns a part or all of the requested data to the controller before the DMA start is issued. In this case, the controller would become confused as to what to do with the data. In the worst case when all the data is returned back to the controller, the controller could hang. In other cases it could return partial data returning in data corruption. This problem has been seen in PPC systems and can also appear - on an system with very fast disks, where the SATA controller is sitting behind a + on an system with very fast disks, where the SATA controller is sitting behind a number of bridges, and hence there is significant latency between the r/w command and the start command. */ /* issue r/w command if the access is to ATA*/ @@ -214,7 +214,7 @@ static void k2_bmdma_start_mmio (struct ata_queued_cmd *qc) ap->ops->exec_command(ap, &qc->tf); } - + static u8 k2_stat_check_status(struct ata_port *ap) { return readl((void *) ap->ioaddr.status_addr); diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 140cea05de3f..8e59868b24bb 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -94,7 +94,7 @@ enum { PDC_DIMM1_CONTROL_OFFSET = 0x84, PDC_SDRAM_CONTROL_OFFSET = 0x88, PDC_I2C_WRITE = 0x00000000, - PDC_I2C_READ = 0x00000040, + PDC_I2C_READ = 0x00000040, PDC_I2C_START = 0x00000080, PDC_I2C_MASK_INT = 0x00000020, PDC_I2C_COMPLETE = 0x00010000, @@ -105,16 +105,16 @@ enum { PDC_DIMM_SPD_COLUMN_NUM = 4, PDC_DIMM_SPD_MODULE_ROW = 5, PDC_DIMM_SPD_TYPE = 11, - PDC_DIMM_SPD_FRESH_RATE = 12, - PDC_DIMM_SPD_BANK_NUM = 17, + PDC_DIMM_SPD_FRESH_RATE = 12, + PDC_DIMM_SPD_BANK_NUM = 17, PDC_DIMM_SPD_CAS_LATENCY = 18, - PDC_DIMM_SPD_ATTRIBUTE = 21, + PDC_DIMM_SPD_ATTRIBUTE = 21, PDC_DIMM_SPD_ROW_PRE_CHARGE = 27, - PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28, + PDC_DIMM_SPD_ROW_ACTIVE_DELAY = 28, PDC_DIMM_SPD_RAS_CAS_DELAY = 29, PDC_DIMM_SPD_ACTIVE_PRECHARGE = 30, PDC_DIMM_SPD_SYSTEM_FREQ = 126, - PDC_CTL_STATUS = 0x08, + PDC_CTL_STATUS = 0x08, PDC_DIMM_WINDOW_CTLR = 0x0C, PDC_TIME_CONTROL = 0x3C, PDC_TIME_PERIOD = 0x40, @@ -157,15 +157,15 @@ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); static void pdc20621_host_stop(struct ata_host_set *host_set); static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe); static int pdc20621_detect_dimm(struct ata_probe_ent *pe); -static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, +static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, u32 subaddr, u32 *pdata); static int pdc20621_prog_dimm0(struct ata_probe_ent *pe); static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe); #ifdef ATA_VERBOSE_DEBUG -static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, +static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size); #endif -static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, +static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size); static void pdc20621_irq_clear(struct ata_port *ap); static int pdc20621_qc_issue_prot(struct ata_queued_cmd *qc); @@ -922,7 +922,7 @@ static void pdc_sata_setup_port(struct ata_ioports *port, unsigned long base) #ifdef ATA_VERBOSE_DEBUG -static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, +static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size) { u32 window_size; @@ -936,9 +936,9 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; - page_mask = 0x00; - window_size = 0x2000 * 4; /* 32K byte uchar size */ - idx = (u16) (offset / window_size); + page_mask = 0x00; + window_size = 0x2000 * 4; /* 32K byte uchar size */ + idx = (u16) (offset / window_size); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); @@ -947,19 +947,19 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, offset -= (idx * window_size); idx++; - dist = ((long) (window_size - (offset + size))) >= 0 ? size : + dist = ((long) (window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); - memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), + memcpy_fromio((char *) psource, (char *) (dimm_mmio + offset / 4), dist); - psource += dist; + psource += dist; size -= dist; for (; (long) size >= (long) window_size ;) { writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), + memcpy_fromio((char *) psource, (char *) (dimm_mmio), window_size / 4); psource += window_size; size -= window_size; @@ -971,14 +971,14 @@ static void pdc20621_get_from_dimm(struct ata_probe_ent *pe, void *psource, readl(mmio + PDC_GENERAL_CTLR); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_fromio((char *) psource, (char *) (dimm_mmio), + memcpy_fromio((char *) psource, (char *) (dimm_mmio), size / 4); } } #endif -static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, +static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, u32 offset, u32 size) { u32 window_size; @@ -989,16 +989,16 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, struct pdc_host_priv *hpriv = pe->private_data; void *dimm_mmio = hpriv->dimm_mmio; - /* hard-code chip #0 */ + /* hard-code chip #0 */ mmio += PDC_CHIP0_OFS; - page_mask = 0x00; - window_size = 0x2000 * 4; /* 32K byte uchar size */ + page_mask = 0x00; + window_size = 0x2000 * 4; /* 32K byte uchar size */ idx = (u16) (offset / window_size); writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - offset -= (idx * window_size); + offset -= (idx * window_size); idx++; dist = ((long)(s32)(window_size - (offset + size))) >= 0 ? size : (long) (window_size - offset); @@ -1006,12 +1006,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); - psource += dist; + psource += dist; size -= dist; for (; (long) size >= (long) window_size ;) { writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); - memcpy_toio((char *) (dimm_mmio), (char *) psource, + memcpy_toio((char *) (dimm_mmio), (char *) psource, window_size / 4); writel(0x01, mmio + PDC_GENERAL_CTLR); readl(mmio + PDC_GENERAL_CTLR); @@ -1019,7 +1019,7 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, size -= window_size; idx ++; } - + if (size) { writel(((idx) << page_mask), mmio + PDC_DIMM_WINDOW_CTLR); readl(mmio + PDC_DIMM_WINDOW_CTLR); @@ -1030,12 +1030,12 @@ static void pdc20621_put_to_dimm(struct ata_probe_ent *pe, void *psource, } -static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, +static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, u32 subaddr, u32 *pdata) { void *mmio = pe->mmio_base; u32 i2creg = 0; - u32 status; + u32 status; u32 count =0; /* hard-code chip #0 */ @@ -1049,7 +1049,7 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, readl(mmio + PDC_I2C_ADDR_DATA_OFFSET); /* Write Control to perform read operation, mask int */ - writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT, + writel(PDC_I2C_READ | PDC_I2C_START | PDC_I2C_MASK_INT, mmio + PDC_I2C_CONTROL_OFFSET); for (count = 0; count <= 1000; count ++) { @@ -1062,26 +1062,26 @@ static unsigned int pdc20621_i2c_read(struct ata_probe_ent *pe, u32 device, } *pdata = (status >> 8) & 0x000000ff; - return 1; + return 1; } static int pdc20621_detect_dimm(struct ata_probe_ent *pe) { u32 data=0 ; - if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_SYSTEM_FREQ, &data)) { if (data == 100) return 100; } else return 0; - + if (pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, 9, &data)) { - if(data <= 0x75) + if(data <= 0x75) return 133; } else return 0; - + return 0; } @@ -1091,15 +1091,15 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) u32 spd0[50]; u32 data = 0; int size, i; - u8 bdimmsize; + u8 bdimmsize; void *mmio = pe->mmio_base; static const struct { unsigned int reg; unsigned int ofs; } pdc_i2c_read_data [] = { - { PDC_DIMM_SPD_TYPE, 11 }, + { PDC_DIMM_SPD_TYPE, 11 }, { PDC_DIMM_SPD_FRESH_RATE, 12 }, - { PDC_DIMM_SPD_COLUMN_NUM, 4 }, + { PDC_DIMM_SPD_COLUMN_NUM, 4 }, { PDC_DIMM_SPD_ATTRIBUTE, 21 }, { PDC_DIMM_SPD_ROW_NUM, 3 }, { PDC_DIMM_SPD_BANK_NUM, 17 }, @@ -1108,7 +1108,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) { PDC_DIMM_SPD_ROW_ACTIVE_DELAY, 28 }, { PDC_DIMM_SPD_RAS_CAS_DELAY, 29 }, { PDC_DIMM_SPD_ACTIVE_PRECHARGE, 30 }, - { PDC_DIMM_SPD_CAS_LATENCY, 18 }, + { PDC_DIMM_SPD_CAS_LATENCY, 18 }, }; /* hard-code chip #0 */ @@ -1116,17 +1116,17 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) for(i=0; i spd0[28]) - ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10; + data |= (((((spd0[29] > spd0[28]) + ? spd0[29] : spd0[28]) + 9) / 10) - 1) << 10; data |= ((spd0[30] - spd0[29] + 9) / 10 - 2) << 12; - - if (spd0[18] & 0x08) + + if (spd0[18] & 0x08) data |= ((0x03) << 14); else if (spd0[18] & 0x04) data |= ((0x02) << 14); @@ -1135,7 +1135,7 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) else data |= (0 << 14); - /* + /* Calculate the size of bDIMMSize (power of 2) and merge the DIMM size by program start/end address. */ @@ -1145,9 +1145,9 @@ static int pdc20621_prog_dimm0(struct ata_probe_ent *pe) data |= (((size / 16) - 1) << 16); data |= (0 << 23); data |= 8; - writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET); + writel(data, mmio + PDC_DIMM0_CONTROL_OFFSET); readl(mmio + PDC_DIMM0_CONTROL_OFFSET); - return size; + return size; } @@ -1167,12 +1167,12 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe) Refresh Enable (bit 17) */ - data = 0x022259F1; + data = 0x022259F1; writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET); readl(mmio + PDC_SDRAM_CONTROL_OFFSET); /* Turn on for ECC */ - pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0); if (spd0 == 0x02) { data |= (0x01 << 16); @@ -1186,22 +1186,22 @@ static unsigned int pdc20621_prog_dimm_global(struct ata_probe_ent *pe) data |= (1<<19); writel(data, mmio + PDC_SDRAM_CONTROL_OFFSET); - error = 1; + error = 1; for (i = 1; i <= 10; i++) { /* polling ~5 secs */ data = readl(mmio + PDC_SDRAM_CONTROL_OFFSET); if (!(data & (1<<19))) { error = 0; - break; + break; } msleep(i*100); } return error; } - + static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) { - int speed, size, length; + int speed, size, length; u32 addr,spd0,pci_status; u32 tmp=0; u32 time_period=0; @@ -1228,7 +1228,7 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) /* Wait 3 seconds */ msleep(3000); - /* + /* When timer is enabled, counter is decreased every internal clock cycle. */ @@ -1236,24 +1236,24 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) tcount = readl(mmio + PDC_TIME_COUNTER); VPRINTK("Time Counter Register (0x44): 0x%x\n", tcount); - /* + /* If SX4 is on PCI-X bus, after 3 seconds, the timer counter register should be >= (0xffffffff - 3x10^8). */ if(tcount >= PCI_X_TCOUNT) { ticks = (time_period - tcount); VPRINTK("Num counters 0x%x (%d)\n", ticks, ticks); - + clock = (ticks / 300000); VPRINTK("10 * Internal clk = 0x%x (%d)\n", clock, clock); - + clock = (clock * 33); VPRINTK("10 * Internal clk * 33 = 0x%x (%d)\n", clock, clock); /* PLL F Param (bit 22:16) */ fparam = (1400000 / clock) - 2; VPRINTK("PLL F Param: 0x%x (%d)\n", fparam, fparam); - + /* OD param = 0x2 (bit 31:30), R param = 0x5 (bit 29:25) */ pci_status = (0x8a001824 | (fparam << 16)); } else @@ -1264,21 +1264,21 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) writel(pci_status, mmio + PDC_CTL_STATUS); readl(mmio + PDC_CTL_STATUS); - /* + /* Read SPD of DIMM by I2C interface, and program the DIMM Module Controller. */ if (!(speed = pdc20621_detect_dimm(pe))) { - printk(KERN_ERR "Detect Local DIMM Fail\n"); + printk(KERN_ERR "Detect Local DIMM Fail\n"); return 1; /* DIMM error */ } VPRINTK("Local DIMM Speed = %d\n", speed); - /* Programming DIMM0 Module Control Register (index_CID0:80h) */ + /* Programming DIMM0 Module Control Register (index_CID0:80h) */ size = pdc20621_prog_dimm0(pe); VPRINTK("Local DIMM Size = %dMB\n",size); - /* Programming DIMM Module Global Control Register (index_CID0:88h) */ + /* Programming DIMM Module Global Control Register (index_CID0:88h) */ if (pdc20621_prog_dimm_global(pe)) { printk(KERN_ERR "Programming DIMM Module Global Control Register Fail\n"); return 1; @@ -1297,30 +1297,30 @@ static unsigned int pdc20621_dimm_init(struct ata_probe_ent *pe) pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x10040, 40); pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); - pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040, + pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x10040, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); pdc20621_put_to_dimm(pe, (void *) test_parttern1, 0x40, 40); pdc20621_get_from_dimm(pe, (void *) test_parttern2, 0x40, 40); - printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], + printk(KERN_ERR "%x, %x, %s\n", test_parttern2[0], test_parttern2[1], &(test_parttern2[2])); } #endif /* ECC initiliazation. */ - pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, + pdc20621_i2c_read(pe, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0); if (spd0 == 0x02) { VPRINTK("Start ECC initialization\n"); addr = 0; length = size * 1024 * 1024; while (addr < length) { - pdc20621_put_to_dimm(pe, (void *) &tmp, addr, + pdc20621_put_to_dimm(pe, (void *) &tmp, addr, sizeof(u32)); addr += sizeof(u32); } diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index a71fb54eebd3..eb202a73bc0e 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -214,7 +214,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = -ENOMEM; goto err_out_regions; } - + switch (board_idx) { case uli_5287: probe_ent->port[0].scr_addr = ULI5287_BASE; diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index f43183c19a12..feff10980487 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -347,7 +347,7 @@ static int svia_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent = vt6420_init_probe_ent(pdev); else probe_ent = vt6421_init_probe_ent(pdev); - + if (!probe_ent) { printk(KERN_ERR DRV_NAME "(%s): out of memory\n", pci_name(pdev)); diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index c5e09dc6f3de..cb3a6d89cf00 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -342,7 +342,7 @@ static int __devinit vsc_sata_init_one (struct pci_dev *pdev, const struct pci_d pci_set_master(pdev); - /* + /* * Config offset 0x98 is "Extended Control and Status Register 0" * Default value is (1 << 28). All bits except bit 28 are reserved in * DPA mode. If bit 28 is set, LED 0 reflects all ports' activity. diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563b..85b0aaee0ef8 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -644,7 +644,7 @@ static inline void scr_write(struct ata_port *ap, unsigned int reg, u32 val) ap->ops->scr_write(ap, reg, val); } -static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, +static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, u32 val) { ap->ops->scr_write(ap, reg, val); -- cgit v1.2.3 From 6b8b3e8a8b3e62b4209eaa36697e3c9df457e196 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:35 -0700 Subject: [PATCH] md: make sure md bitmap updates are flushed when array is stopped. The recent change to never ignore the bitmap, revealed that the bitmap isn't begin flushed properly when an array is stopped. We call bitmap_daemon_work three times as there is a three-stage pipeline for flushing updates to the bitmap file. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 23 +++++++++++++++++++++++ drivers/md/md.c | 2 ++ include/linux/raid/bitmap.h | 1 + 3 files changed, 26 insertions(+) (limited to 'include/linux') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 09d32db06d20..41df4cda66e2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1450,6 +1450,29 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset) } +/* + * flush out any pending updates + */ +void bitmap_flush(mddev_t *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + int sleep; + + if (!bitmap) /* there was no bitmap */ + return; + + /* run the daemon_work three time to ensure everything is flushed + * that can be + */ + sleep = bitmap->daemon_sleep; + bitmap->daemon_sleep = 0; + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap->daemon_sleep = sleep; + bitmap_update_sb(bitmap); +} + /* * free memory that was allocated */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 9fd4dbea0d0d..480f658db6f2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1798,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro) goto out; mddev->ro = 1; } else { + bitmap_flush(mddev); + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); if (mddev->ro) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index 6213e976eade..4bf1659f8aa8 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -248,6 +248,7 @@ struct bitmap { /* these are used only by md/bitmap */ int bitmap_create(mddev_t *mddev); +void bitmap_flush(mddev_t *mddev); void bitmap_destroy(mddev_t *mddev); int bitmap_active(struct bitmap *bitmap); -- cgit v1.2.3 From 1f3a6a15771ed70d3b2581663dcc6b9bc134baa5 Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 28 Jul 2005 14:42:00 -0400 Subject: [ACPI] acpi_register_gsi() can return error Current acpi_register_gsi() function has no way to indicate errors to its callers even though acpi_register_gsi() can fail to register gsi because of some reasons (out of memory, lack of interrupt vectors, incorrect BIOS, and so on). As a result, caller of acpi_register_gsi() cannot handle the case that acpi_register_gsi() fails. I think failure of acpi_register_gsi() should be handled properly. This series of patches changes acpi_register_gsi() to return negative value on error, and also changes callers of acpi_register_gsi() to handle failure of acpi_register_gsi(). This patch changes the type of return value of acpi_register_gsi() from "unsigned int" to "int" to indicate an error. If acpi_register_gsi() fails to register gsi, it returns negative value. Signed-off-by: Kenji Kaneshige Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/i386/kernel/acpi/boot.c | 6 +++++- arch/ia64/kernel/acpi.c | 6 +++++- include/linux/acpi.h | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 848bb97af7ca..364f4b7c4e3e 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -457,7 +457,11 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) return 0; } -unsigned int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(u32 gsi, int edge_level, int active_high_low) { unsigned int irq; unsigned int plat_gsi = gsi; diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 1c118b72df3c..7513ff9361a0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -565,7 +565,11 @@ acpi_numa_arch_fixup (void) } #endif /* CONFIG_ACPI_NUMA */ -unsigned int +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low) { if (has_8259 && gsi < 16) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ca0cd240cee0..9378bcde73a4 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -432,7 +432,7 @@ static inline int acpi_boot_table_init(void) #endif /*!CONFIG_ACPI_BOOT*/ -unsigned int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); +int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); /* -- cgit v1.2.3 From 53de49f52e305e96143375d1741f15acff7bf34b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 30 Jul 2005 04:18:00 -0400 Subject: [ACPI] CONFIG_ACPI=n build fix Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- include/linux/acpi.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9378bcde73a4..bf96ae9d93a3 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -420,16 +420,6 @@ extern int sbf_port ; #define acpi_mp_config 0 -static inline int acpi_boot_init(void) -{ - return 0; -} - -static inline int acpi_boot_table_init(void) -{ - return 0; -} - #endif /*!CONFIG_ACPI_BOOT*/ int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); @@ -536,5 +526,17 @@ static inline int acpi_get_pxm(acpi_handle handle) extern int pnpacpi_disabled; +#else /* CONFIG_ACPI */ + +static inline int acpi_boot_init(void) +{ + return 0; +} + +static inline int acpi_boot_table_init(void) +{ + return 0; +} + #endif /* CONFIG_ACPI */ #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From fec59a711eef002d4ef9eb8de09dd0a26986eb77 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 4 Aug 2005 18:06:10 -0700 Subject: [PATCH] PCI: restore BAR values after D3hot->D0 for devices that need it Some PCI devices (e.g. 3c905B, 3c556B) lose all configuration (including BARs) when transitioning from D3hot->D0. This leaves such a device in an inaccessible state. The patch below causes the BARs to be restored when enabling such a device, so that its driver will be able to access it. The patch also adds pci_restore_bars as a new global symbol, and adds a correpsonding EXPORT_SYMBOL_GPL for that. Some firmware (e.g. Thinkpad T21) leaves devices in D3hot after a (re)boot. Most drivers call pci_enable_device very early, so devices left in D3hot that lose configuration during the D3hot->D0 transition will be inaccessible to their drivers. Drivers could be modified to account for this, but it would be difficult to know which drivers need modification. This is especially true since often many devices are covered by the same driver. It likely would be necessary to replicate code across dozens of drivers. The patch below should trigger only when transitioning from D3hot->D0 (or at boot), and only for devices that have the "no soft reset" bit cleared in the PM control register. I believe it is safe to include this patch as part of the PCI infrastructure. The cleanest implementation of pci_restore_bars was to call pci_update_resource. Unfortunately, that does not currently exist for the sparc64 architecture. The patch below includes a null implemenation of pci_update_resource for sparc64. Some have expressed interest in making general use of the the pci_restore_bars function, so that has been exported to GPL licensed modules. Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/sparc64/kernel/pci.c | 6 +++++ drivers/pci/pci.c | 59 +++++++++++++++++++++++++++++++++++++++++++---- drivers/pci/setup-res.c | 2 +- include/linux/pci.h | 3 +++ 4 files changed, 65 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index bba140d98b1b..914e125d3971 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -413,6 +413,12 @@ static int pci_assign_bus_resource(const struct pci_bus *bus, return -EBUSY; } +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) +{ + /* Not implemented for sparc64... */ + BUG(); +} + int pci_assign_resource(struct pci_dev *pdev, int resource) { struct pcidev_cookie *pcp = pdev->sysdata; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1b34fc56067e..65ea7d25f691 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -221,6 +221,37 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) return best; } +/** + * pci_restore_bars - restore a devices BAR values (e.g. after wake-up) + * @dev: PCI device to have its BARs restored + * + * Restore the BAR values for a given device, so as to make it + * accessible by its driver. + */ +void +pci_restore_bars(struct pci_dev *dev) +{ + int i, numres; + + switch (dev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + numres = 6; + break; + case PCI_HEADER_TYPE_BRIDGE: + numres = 2; + break; + case PCI_HEADER_TYPE_CARDBUS: + numres = 1; + break; + default: + /* Should never get here, but just in case... */ + return; + } + + for (i = 0; i < numres; i ++) + pci_update_resource(dev, &dev->resource[i], i); +} + /** * pci_set_power_state - Set the power state of a PCI device * @dev: PCI device to be suspended @@ -239,7 +270,7 @@ int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t); int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { - int pm; + int pm, need_restore = 0; u16 pmcsr, pmc; /* bound the state we're entering */ @@ -278,14 +309,17 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return -EIO; } + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + /* If we're in D3, force entire word to 0. * This doesn't affect PME_Status, disables PME_En, and * sets PowerState to 0. */ - if (dev->current_state >= PCI_D3hot) + if (dev->current_state >= PCI_D3hot) { + if (!(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) + need_restore = 1; pmcsr = 0; - else { - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + } else { pmcsr &= ~PCI_PM_CTRL_STATE_MASK; pmcsr |= state; } @@ -308,6 +342,22 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) platform_pci_set_power_state(dev, state); dev->current_state = state; + + /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT + * INTERFACE SPECIFICATION, REV. 1.2", a device transitioning + * from D3hot to D0 _may_ perform an internal reset, thereby + * going to "D0 Uninitialized" rather than "D0 Initialized". + * For example, at least some versions of the 3c905B and the + * 3c556B exhibit this behaviour. + * + * At least some laptop BIOSen (e.g. the Thinkpad T21) leave + * devices in a D3hot state at boot. Consequently, we need to + * restore at least the BARs so that the device will be + * accessible to its driver. + */ + if (need_restore) + pci_restore_bars(dev); + return 0; } @@ -805,6 +855,7 @@ struct pci_dev *isa_bridge; EXPORT_SYMBOL(isa_bridge); #endif +EXPORT_SYMBOL_GPL(pci_restore_bars); EXPORT_SYMBOL(pci_enable_device_bars); EXPORT_SYMBOL(pci_enable_device); EXPORT_SYMBOL(pci_disable_device); diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 1ca21d2ba11c..878fd0a65c02 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,7 +26,7 @@ #include "pci.h" -static void +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; diff --git a/include/linux/pci.h b/include/linux/pci.h index 8621cf42b46f..98bdd95fcee9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -225,6 +225,7 @@ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ @@ -816,7 +817,9 @@ int pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int pci_assign_resource(struct pci_dev *dev, int i); +void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size); -- cgit v1.2.3 From 0c3dba1534569734ba353afdf3f11def497ff2ac Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Thu, 4 Aug 2005 21:12:54 -0400 Subject: [PATCH] Clean up inotify delete race fix This avoids the whole #ifdef mess by just getting a copy of dentry->d_inode before d_delete is called - that makes the codepaths the same for the INOTIFY/DNOTIFY cases as for the regular no-notify case. I've been running this under a Gnome session for the last 10 minutes. Inotify is being used extensively. Signed-off-by: John McCutchan Signed-off-by: Linus Torvalds --- fs/namei.c | 9 ++------- include/linux/fsnotify.h | 4 +--- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 83559dce4286..32accb6a672f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1874,14 +1874,9 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { -#if defined(CONFIG_INOTIFY) || defined(CONFIG_DNOTIFY) - dget(dentry); + struct inode *inode = dentry->d_inode; d_delete(dentry); - fsnotify_unlink(dentry, dir); - dput(dentry); -#else - d_delete(dentry); -#endif + fsnotify_unlink(dentry, inode, dir); } return error; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index e96a4306ab3b..1cb4935348d8 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -46,10 +46,8 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, /* * fsnotify_unlink - file was unlinked */ -static inline void fsnotify_unlink(struct dentry *dentry, struct inode *dir) +static inline void fsnotify_unlink(struct dentry *dentry, struct inode *inode, struct inode *dir) { - struct inode *inode = dentry->d_inode; - inode_dir_notify(dir, DN_DELETE); inotify_inode_queue_event(dir, IN_DELETE, 0, dentry->d_name.name); inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL); -- cgit v1.2.3 From ba02508248e90a9d696aebd18b48a3290235b53c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 5 Aug 2005 13:28:11 -0700 Subject: [PATCH] blk: fix tag shrinking (revive real_max_size) My patch in commit fa72b903f75e4f0f0b2c2feed093005167da4023 incorrectly removed blk_queue_tag->real_max_depth. The original resize implementation was incorrect in the following points. * actual allocation size of tag_index was shorter than real_max_size, but assumed to be of the same size, possibly causing memory access beyond the allocated area. * bits in tag_map between max_deptn and real_max_depth were initialized to 1's, making the tags permanently reserved. In an attempt to fix above two bugs, I had removed allocation optimization in init_tag_map and real_max_size. Tag map/index were allocated and freed immediately during resize. Unfortunately, I wasn't considering that tag map/index can be resized dynamically with tags beyond new_depth active. This led to accessing freed area after shrinking tags and led to the following bug reporting thread on linux-scsi. http://marc.theaimsgroup.com/?l=linux-scsi&m=112319898111885&w=2 To fix the problem, I've revived real_max_depth without allocation optimization in init_tag_map, and Andrew Vasquez confirmed that the problem was fixed. As Jens is not going to be available for a week, he asked me to make sure that this patch reaches you. http://marc.theaimsgroup.com/?l=linux-scsi&m=112325778530886&w=2 Also, a comment was added to make sure that real_max_size is needed for dynamic shrinking. Signed-off-by: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/ll_rw_blk.c | 18 +++++++++++++++--- include/linux/blkdev.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 692a5fced76e..3c818544475e 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -719,7 +719,7 @@ struct request *blk_queue_find_tag(request_queue_t *q, int tag) { struct blk_queue_tag *bqt = q->queue_tags; - if (unlikely(bqt == NULL || tag >= bqt->max_depth)) + if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) return NULL; return bqt->tag_index[tag]; @@ -798,6 +798,7 @@ init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth) memset(tag_index, 0, depth * sizeof(struct request *)); memset(tag_map, 0, nr_ulongs * sizeof(unsigned long)); + tags->real_max_depth = depth; tags->max_depth = depth; tags->tag_index = tag_index; tags->tag_map = tag_map; @@ -871,12 +872,23 @@ int blk_queue_resize_tags(request_queue_t *q, int new_depth) if (!bqt) return -ENXIO; + /* + * if we already have large enough real_max_depth. just + * adjust max_depth. *NOTE* as requests with tag value + * between new_depth and real_max_depth can be in-flight, tag + * map can not be shrunk blindly here. + */ + if (new_depth <= bqt->real_max_depth) { + bqt->max_depth = new_depth; + return 0; + } + /* * save the old state info, so we can copy it back */ tag_index = bqt->tag_index; tag_map = bqt->tag_map; - max_depth = bqt->max_depth; + max_depth = bqt->real_max_depth; if (init_tag_map(q, bqt, new_depth)) return -ENOMEM; @@ -913,7 +925,7 @@ void blk_queue_end_tag(request_queue_t *q, struct request *rq) BUG_ON(tag == -1); - if (unlikely(tag >= bqt->max_depth)) + if (unlikely(tag >= bqt->real_max_depth)) /* * This can happen after tag depth has been reduced. * FIXME: how about a warning or info message here? diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0881b5cdee3d..19bd8e7e11bf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -301,6 +301,7 @@ struct blk_queue_tag { struct list_head busy_list; /* fifo list of busy tags */ int busy; /* current depth */ int max_depth; /* what we will send to device */ + int real_max_depth; /* what the array can hold */ atomic_t refcnt; /* map can be shared */ }; -- cgit v1.2.3 From 243393c90f2b7cb781fd794e22786e9c8547901a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 6 Aug 2005 09:39:57 -0700 Subject: Add fakey 'deflateBound()' function to the in-kernel zlib routines It's not the real deflateBound() in newer zlib libraries, partly because the upcoming usage of it won't have the "stream" available, so we can't have the same interfaces anyway. --- include/linux/zlib.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/zlib.h b/include/linux/zlib.h index 850076ea14d3..74f7b78c22d2 100644 --- a/include/linux/zlib.h +++ b/include/linux/zlib.h @@ -506,6 +506,11 @@ extern int zlib_deflateReset (z_streamp strm); stream state was inconsistent (such as zalloc or state being NULL). */ +static inline unsigned long deflateBound(unsigned long s) +{ + return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; +} + extern int zlib_deflateParams (z_streamp strm, int level, int strategy); /* Dynamically update the compression level and compression strategy. The -- cgit v1.2.3 From 9ae5b3c703cce89a7d8ccf25fe16955ec6f016c0 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Sun, 7 Aug 2005 09:42:24 -0700 Subject: [PATCH] remove linux/pagemap.h from linux/swap.h sparc can not include linux/pagemap.h because of the following circular dependency: asm-sparc/pgtable include linux/swap.h linux/swap.h include now linux/pagemap.h linux/pagemap.h include linux/mm.h linux/mm.h include asm/pgtable.h It needs to have the swp_entry_t type fully visible in pgtable.h, we can't work around this using macros. Signed-off-by: Olaf Hering Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 239f520cc49e..bfe3e763ccf2 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -255,6 +254,8 @@ static inline void put_swap_token(struct mm_struct *mm) #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) +/* only sparc can not include linux/pagemap.h in this file + * so leave page_cache_release and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ page_cache_release(page) #define free_pages_and_swap_cache(pages, nr) \ -- cgit v1.2.3 From 7a91bf7f5c22c8407a9991cbd9ce5bb87caa6b4a Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Mon, 8 Aug 2005 13:52:16 -0400 Subject: [PATCH] fsnotify_name/inoderemove The patch below unhooks fsnotify from vfs_unlink & vfs_rmdir. It introduces two new fsnotify calls, that are hooked in at the dcache level. This not only more closely matches how the VFS layer works, it also avoids the problem with locking and inode lifetimes. The two functions are - fsnotify_nameremove -- called when a directory entry is going away. It notifies the PARENT of the deletion. This is called from d_delete(). - inoderemove -- called when the files inode itself is going away. It notifies the inode that is being deleted. This is called from dentry_iput(). Signed-off-by: John McCutchan Signed-off-by: Linus Torvalds --- fs/dcache.c | 7 +++++++ fs/namei.c | 3 --- include/linux/fsnotify.h | 20 ++++++++++++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/dcache.c b/fs/dcache.c index 3aa8a7e980d8..a15a2e1f5520 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -101,6 +102,7 @@ static inline void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -1165,13 +1167,16 @@ out: void d_delete(struct dentry * dentry) { + int isdir = 0; /* * Are we the only user? */ spin_lock(&dcache_lock); spin_lock(&dentry->d_lock); + isdir = S_ISDIR(dentry->d_inode->i_mode); if (atomic_read(&dentry->d_count) == 1) { dentry_iput(dentry); + fsnotify_nameremove(dentry, isdir); return; } @@ -1180,6 +1185,8 @@ void d_delete(struct dentry * dentry) spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); + + fsnotify_nameremove(dentry, isdir); } static void __d_rehash(struct dentry * entry, struct hlist_head *list) diff --git a/fs/namei.c b/fs/namei.c index 32accb6a672f..57046d98a746 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1802,7 +1802,6 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) up(&dentry->d_inode->i_sem); if (!error) { d_delete(dentry); - fsnotify_rmdir(dentry, dentry->d_inode, dir); } dput(dentry); @@ -1874,9 +1873,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) /* We don't d_delete() NFS sillyrenamed files--they still exist. */ if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) { - struct inode *inode = dentry->d_inode; d_delete(dentry); - fsnotify_unlink(dentry, inode, dir); } return error; diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 1cb4935348d8..9db31d251c20 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -67,6 +67,26 @@ static inline void fsnotify_rmdir(struct dentry *dentry, struct inode *inode, inotify_inode_is_dead(inode); } +/* + * fsnotify_nameremove - a filename was removed from a directory + */ +static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) +{ + if (isdir) + isdir = IN_ISDIR; + dnotify_parent(dentry, DN_DELETE); + inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); +} + +/* + * fsnotify_inoderemove - an inode is going away + */ +static inline void fsnotify_inoderemove(struct inode *inode) +{ + inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL); + inotify_inode_is_dead(inode); +} + /* * fsnotify_create - 'name' was linked in */ -- cgit v1.2.3 From 4d479e40e1748a877a24015fc6727b27b77110cd Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 8 Aug 2005 13:48:02 -0700 Subject: [NETLINK]: Allocate and kill some netlink numbers. NETLINK_ARPD is unused, allocate it to the Open-iSCSI folks. NETLINK_ROUTE6 and NETLINK_TAPBASE are no longer used, delete them. Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 70c2a9dc4b2b..6552b71bfa73 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -12,15 +12,13 @@ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ -#define NETLINK_ARPD 8 +#define NETLINK_ISCSI 8 /* Open-iSCSI */ #define NETLINK_AUDIT 9 /* auditing */ #define NETLINK_FIB_LOOKUP 10 -#define NETLINK_ROUTE6 11 /* af_inet6 route comm channel */ #define NETLINK_NETFILTER 12 /* netfilter subsystem */ #define NETLINK_IP6_FW 13 #define NETLINK_DNRTMSG 14 /* DECnet routing messages */ #define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */ -#define NETLINK_TAPBASE 16 /* 16 to 31 are ethertap */ #define MAX_LINKS 32 -- cgit v1.2.3 From dc836b5b6fcde95f750a4790d8200fabaf563dc9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Aug 2005 18:46:09 -0700 Subject: Revert "[PATCH] PCI: restore BAR values..." Revert commit fec59a711eef002d4ef9eb8de09dd0a26986eb77, which is breaking sparc64 that doesn't have a working pci_update_resource. We'll re-do this after 2.6.13 when we'll do it all properly. --- arch/sparc64/kernel/pci.c | 6 ----- drivers/pci/pci.c | 59 ++++------------------------------------------- drivers/pci/setup-res.c | 2 +- include/linux/pci.h | 3 --- 4 files changed, 5 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index 914e125d3971..bba140d98b1b 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -413,12 +413,6 @@ static int pci_assign_bus_resource(const struct pci_bus *bus, return -EBUSY; } -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) -{ - /* Not implemented for sparc64... */ - BUG(); -} - int pci_assign_resource(struct pci_dev *pdev, int resource) { struct pcidev_cookie *pcp = pdev->sysdata; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 65ea7d25f691..1b34fc56067e 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -221,37 +221,6 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) return best; } -/** - * pci_restore_bars - restore a devices BAR values (e.g. after wake-up) - * @dev: PCI device to have its BARs restored - * - * Restore the BAR values for a given device, so as to make it - * accessible by its driver. - */ -void -pci_restore_bars(struct pci_dev *dev) -{ - int i, numres; - - switch (dev->hdr_type) { - case PCI_HEADER_TYPE_NORMAL: - numres = 6; - break; - case PCI_HEADER_TYPE_BRIDGE: - numres = 2; - break; - case PCI_HEADER_TYPE_CARDBUS: - numres = 1; - break; - default: - /* Should never get here, but just in case... */ - return; - } - - for (i = 0; i < numres; i ++) - pci_update_resource(dev, &dev->resource[i], i); -} - /** * pci_set_power_state - Set the power state of a PCI device * @dev: PCI device to be suspended @@ -270,7 +239,7 @@ int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t); int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { - int pm, need_restore = 0; + int pm; u16 pmcsr, pmc; /* bound the state we're entering */ @@ -309,17 +278,14 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return -EIO; } - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); - /* If we're in D3, force entire word to 0. * This doesn't affect PME_Status, disables PME_En, and * sets PowerState to 0. */ - if (dev->current_state >= PCI_D3hot) { - if (!(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) - need_restore = 1; + if (dev->current_state >= PCI_D3hot) pmcsr = 0; - } else { + else { + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); pmcsr &= ~PCI_PM_CTRL_STATE_MASK; pmcsr |= state; } @@ -342,22 +308,6 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) platform_pci_set_power_state(dev, state); dev->current_state = state; - - /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT - * INTERFACE SPECIFICATION, REV. 1.2", a device transitioning - * from D3hot to D0 _may_ perform an internal reset, thereby - * going to "D0 Uninitialized" rather than "D0 Initialized". - * For example, at least some versions of the 3c905B and the - * 3c556B exhibit this behaviour. - * - * At least some laptop BIOSen (e.g. the Thinkpad T21) leave - * devices in a D3hot state at boot. Consequently, we need to - * restore at least the BARs so that the device will be - * accessible to its driver. - */ - if (need_restore) - pci_restore_bars(dev); - return 0; } @@ -855,7 +805,6 @@ struct pci_dev *isa_bridge; EXPORT_SYMBOL(isa_bridge); #endif -EXPORT_SYMBOL_GPL(pci_restore_bars); EXPORT_SYMBOL(pci_enable_device_bars); EXPORT_SYMBOL(pci_enable_device); EXPORT_SYMBOL(pci_disable_device); diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 589486704ce3..84eedc965688 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,7 +26,7 @@ #include "pci.h" -void +static void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; diff --git a/include/linux/pci.h b/include/linux/pci.h index 98bdd95fcee9..8621cf42b46f 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -225,7 +225,6 @@ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ @@ -817,9 +816,7 @@ int pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); -void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int pci_assign_resource(struct pci_dev *dev, int i); -void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size); -- cgit v1.2.3 From 00dd1e433967872f3997a45d5adf35056fdf2f56 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Mon, 8 Aug 2005 22:13:05 -0400 Subject: [PATCH] fsnotify-cleanups This removes the now unused fsnotify_unlink & fsnotify_rmdir code. Compile tested. Signed-off-by: John McCutchan Signed-off-by: Linus Torvalds --- include/linux/fsnotify.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 9db31d251c20..602c305c8585 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -43,30 +43,6 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, } } -/* - * fsnotify_unlink - file was unlinked - */ -static inline void fsnotify_unlink(struct dentry *dentry, struct inode *inode, struct inode *dir) -{ - inode_dir_notify(dir, DN_DELETE); - inotify_inode_queue_event(dir, IN_DELETE, 0, dentry->d_name.name); - inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL); - - inotify_inode_is_dead(inode); -} - -/* - * fsnotify_rmdir - directory was removed - */ -static inline void fsnotify_rmdir(struct dentry *dentry, struct inode *inode, - struct inode *dir) -{ - inode_dir_notify(dir, DN_DELETE); - inotify_inode_queue_event(dir,IN_DELETE|IN_ISDIR,0,dentry->d_name.name); - inotify_inode_queue_event(inode, IN_DELETE_SELF | IN_ISDIR, 0, NULL); - inotify_inode_is_dead(inode); -} - /* * fsnotify_nameremove - a filename was removed from a directory */ -- cgit v1.2.3 From 86b3786078d63242d3194ffc58ae8dae1d1bbef3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Tue, 9 Aug 2005 19:59:21 -0700 Subject: [PATCH] Fix ide-disk.c oops caused by hwif == NULL 1. Move hwif_to_node to ide.h 2. Use hwif_to_node in ide-disk.c Signed-off-by: Christoph Lameter Signed-off-by: Linus Torvalds --- drivers/ide/ide-disk.c | 2 +- drivers/ide/ide-probe.c | 9 --------- include/linux/ide.h | 6 ++++++ 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index f9c1acb4ed6a..c9d3a00a3c0c 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -1220,7 +1220,7 @@ static int ide_disk_probe(struct device *dev) goto failed; g = alloc_disk_node(1 << PARTN_BITS, - pcibus_to_node(drive->hwif->pci_dev->bus)); + hwif_to_node(drive->hwif)); if (!g) goto out_free_idkp; diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c index 94daf40ae323..c1128ae5cd2f 100644 --- a/drivers/ide/ide-probe.c +++ b/drivers/ide/ide-probe.c @@ -960,15 +960,6 @@ static void save_match(ide_hwif_t *hwif, ide_hwif_t *new, ide_hwif_t **match) } #endif /* MAX_HWIFS > 1 */ -static inline int hwif_to_node(ide_hwif_t *hwif) -{ - if (hwif->pci_dev) - return pcibus_to_node(hwif->pci_dev->bus); - else - /* Add ways to determine the node of other busses here */ - return -1; -} - /* * init request queue */ diff --git a/include/linux/ide.h b/include/linux/ide.h index 92129078d4f3..a6dbb51ecd7b 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1501,4 +1501,10 @@ extern struct bus_type ide_bus_type; #define ide_id_has_flush_cache_ext(id) \ (((id)->cfs_enable_2 & 0x2400) == 0x2400) +static inline int hwif_to_node(ide_hwif_t *hwif) +{ + struct pci_dev *dev = hwif->pci_dev; + return dev ? pcibus_to_node(dev->bus) : -1; +} + #endif /* _IDE_H */ -- cgit v1.2.3 From 67c4f3fa25502ce7ed82fb0307e09cf36f1f81da Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:07:25 -0400 Subject: Fix numerous minor problems with new phy subsystem. Includes fixes for problems noted by Adrian Bunk, Andrew Morton, and one other person lost in the annals of history (and email folders). --- drivers/net/phy/Kconfig | 12 +- drivers/net/phy/Makefile | 12 +- drivers/net/phy/mdio_bus.c | 4 +- drivers/net/phy/phy.c | 9 +- drivers/net/phy/phy.c.orig | 860 ------------------------------------------- drivers/net/phy/phy_device.c | 48 ++- include/linux/phy.h | 1 - 7 files changed, 53 insertions(+), 893 deletions(-) delete mode 100644 drivers/net/phy/phy.c.orig (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 8b5db2343cc3..c2f1bf1d02d2 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -5,7 +5,7 @@ menu "PHY device support" config PHYLIB - bool "PHY Device support and infrastructure" + tristate "PHY Device support and infrastructure" depends on NET_ETHERNET help Ethernet controllers are usually attached to PHY @@ -24,31 +24,31 @@ comment "MII PHY device drivers" depends on PHYLIB config MARVELL_PHY - bool "Drivers for Marvell PHYs" + tristate "Drivers for Marvell PHYs" depends on PHYLIB ---help--- Currently has a driver for the 88E1011S config DAVICOM_PHY - bool "Drivers for Davicom PHYs" + tristate "Drivers for Davicom PHYs" depends on PHYLIB ---help--- Currently supports dm9161e and dm9131 config QSEMI_PHY - bool "Drivers for Quality Semiconductor PHYs" + tristate "Drivers for Quality Semiconductor PHYs" depends on PHYLIB ---help--- Currently supports the qs6612 config LXT_PHY - bool "Drivers for the Intel LXT PHYs" + tristate "Drivers for the Intel LXT PHYs" depends on PHYLIB ---help--- Currently supports the lxt970, lxt971 config CICADA_PHY - bool "Drivers for the Cicada PHYs" + tristate "Drivers for the Cicada PHYs" depends on PHYLIB ---help--- Currently supports the cis8204 diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 1af05de6ced0..fb7cb385a659 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,9 +1,9 @@ # Makefile for Linux PHY drivers -obj-$(CONFIG_PHYLIB) += phy.o phy_device.o mdio_bus.o +libphy-objs := phy.o phy_device.o mdio_bus.o -obj-$(CONFIG_MARVELL_PHY) += marvell.o -obj-$(CONFIG_DAVICOM_PHY) += davicom.o -obj-$(CONFIG_CICADA_PHY) += cicada.o -obj-$(CONFIG_LXT_PHY) += lxt.o -obj-$(CONFIG_QSEMI_PHY) += qsemi.o +obj-$(CONFIG_MARVELL_PHY) += libphy.o marvell.o +obj-$(CONFIG_DAVICOM_PHY) += libphy.o davicom.o +obj-$(CONFIG_CICADA_PHY) += libphy.o cicada.o +obj-$(CONFIG_LXT_PHY) += libphy.o lxt.o +obj-$(CONFIG_QSEMI_PHY) += libphy.o qsemi.o diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index e75103ba6f86..5fbea6acfe80 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -165,9 +165,9 @@ struct bus_type mdio_bus_type = { .resume = mdio_bus_resume, }; -static int __init mdio_bus_init(void) +int __init mdio_bus_init(void) { return bus_register(&mdio_bus_type); } -subsys_initcall(mdio_bus_init); + diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index e2c6896b92d2..934065dd6371 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -39,7 +39,6 @@ #include #include -static void phy_change(void *data); static void phy_timer(unsigned long data); /* Convenience function to print out the current phy status @@ -464,7 +463,6 @@ void phy_stop_machine(struct phy_device *phydev) phydev->adjust_state = NULL; } -#ifdef CONFIG_PHYCONTROL /* phy_error: * * Moves the PHY to the HALTED state in response to a read @@ -479,6 +477,10 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } +#ifdef CONFIG_PHYCONTROL + +static void phy_change(void *data); + /* phy_interrupt * * description: When a PHY interrupt occurs, the handler disables @@ -672,6 +674,8 @@ void phy_start(struct phy_device *phydev) EXPORT_SYMBOL(phy_stop); EXPORT_SYMBOL(phy_start); +#endif /* CONFIG_PHYCONTROL */ + /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) { @@ -859,4 +863,3 @@ static void phy_timer(unsigned long data) mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); } -#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy.c.orig b/drivers/net/phy/phy.c.orig deleted file mode 100644 index 6af17cec9ace..000000000000 --- a/drivers/net/phy/phy.c.orig +++ /dev/null @@ -1,860 +0,0 @@ -/* - * drivers/net/phy/phy.c - * - * Framework for configuring and reading PHY devices - * Based on code in sungem_phy.c and gianfar_phy.c - * - * Author: Andy Fleming - * - * Copyright (c) 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static void phy_change(void *data); -static void phy_timer(unsigned long data); - -/* Convenience function to print out the current phy status - */ -void phy_print_status(struct phy_device *phydev) -{ - pr_info("%s: Link is %s", phydev->dev.bus_id, - phydev->link ? "Up" : "Down"); - if (phydev->link) - printk(" - %d/%s", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "Full" : "Half"); - - printk("\n"); -} -EXPORT_SYMBOL(phy_print_status); - - -/* Convenience functions for reading/writing a given PHY - * register. They MUST NOT be called from interrupt context, - * because the bus read/write functions may wait for an interrupt - * to conclude the operation. */ -int phy_read(struct phy_device *phydev, u16 regnum) -{ - int retval; - struct mii_bus *bus = phydev->bus; - - spin_lock_bh(&bus->mdio_lock); - retval = bus->read(bus, phydev->addr, regnum); - spin_unlock_bh(&bus->mdio_lock); - - return retval; -} -EXPORT_SYMBOL(phy_read); - -int phy_write(struct phy_device *phydev, u16 regnum, u16 val) -{ - int err; - struct mii_bus *bus = phydev->bus; - - spin_lock_bh(&bus->mdio_lock); - err = bus->write(bus, phydev->addr, regnum, val); - spin_unlock_bh(&bus->mdio_lock); - - return err; -} -EXPORT_SYMBOL(phy_write); - - -int phy_clear_interrupt(struct phy_device *phydev) -{ - int err = 0; - - if (phydev->drv->ack_interrupt) - err = phydev->drv->ack_interrupt(phydev); - - return err; -} - - -int phy_config_interrupt(struct phy_device *phydev, u32 interrupts) -{ - int err = 0; - - phydev->interrupts = interrupts; - if (phydev->drv->config_intr) - err = phydev->drv->config_intr(phydev); - - return err; -} - - -/* phy_aneg_done - * - * description: Reads the status register and returns 0 either if - * auto-negotiation is incomplete, or if there was an error. - * Returns BMSR_ANEGCOMPLETE if auto-negotiation is done. - */ -static inline int phy_aneg_done(struct phy_device *phydev) -{ - int retval; - - retval = phy_read(phydev, MII_BMSR); - - return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); -} - -/* phy_start_aneg - * - * description: Calls the PHY driver's config_aneg, and then - * sets the PHY state to PHY_AN if auto-negotiation is enabled, - * and to PHY_FORCING if auto-negotiation is disabled. Unless - * the PHY is currently HALTED. - */ -int phy_start_aneg(struct phy_device *phydev) -{ - int err; - - spin_lock(&phydev->lock); - - if (AUTONEG_DISABLE == phydev->autoneg) - phy_sanitize_settings(phydev); - - err = phydev->drv->config_aneg(phydev); - - if (err < 0) - goto out_unlock; - - if (phydev->state != PHY_HALTED) { - if (AUTONEG_ENABLE == phydev->autoneg) { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; - } - } - -out_unlock: - spin_unlock(&phydev->lock); - return err; -} -EXPORT_SYMBOL(phy_start_aneg); - - -/* A structure for mapping a particular speed and duplex - * combination to a particular SUPPORTED and ADVERTISED value */ -struct phy_setting { - int speed; - int duplex; - u32 setting; -}; - -/* A mapping of all SUPPORTED settings to speed/duplex */ -static struct phy_setting settings[] = { - { - .speed = 10000, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_10000baseT_Full, - }, - { - .speed = SPEED_1000, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_1000baseT_Full, - }, - { - .speed = SPEED_1000, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_1000baseT_Half, - }, - { - .speed = SPEED_100, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_100baseT_Full, - }, - { - .speed = SPEED_100, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_100baseT_Half, - }, - { - .speed = SPEED_10, - .duplex = DUPLEX_FULL, - .setting = SUPPORTED_10baseT_Full, - }, - { - .speed = SPEED_10, - .duplex = DUPLEX_HALF, - .setting = SUPPORTED_10baseT_Half, - }, -}; - -#define MAX_NUM_SETTINGS (sizeof(settings)/sizeof(struct phy_setting)) - -/* phy_find_setting - * - * description: Searches the settings array for the setting which - * matches the desired speed and duplex, and returns the index - * of that setting. Returns the index of the last setting if - * none of the others match. - */ -static inline int phy_find_setting(int speed, int duplex) -{ - int idx = 0; - - while (idx < ARRAY_SIZE(settings) && - (settings[idx].speed != speed || - settings[idx].duplex != duplex)) - idx++; - - return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; -} - -/* phy_find_valid - * idx: The first index in settings[] to search - * features: A mask of the valid settings - * - * description: Returns the index of the first valid setting less - * than or equal to the one pointed to by idx, as determined by - * the mask in features. Returns the index of the last setting - * if nothing else matches. - */ -static inline int phy_find_valid(int idx, u32 features) -{ - while (idx < MAX_NUM_SETTINGS && !(settings[idx].setting & features)) - idx++; - - return idx < MAX_NUM_SETTINGS ? idx : MAX_NUM_SETTINGS - 1; -} - -/* phy_sanitize_settings - * - * description: Make sure the PHY is set to supported speeds and - * duplexes. Drop down by one in this order: 1000/FULL, - * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF - */ -void phy_sanitize_settings(struct phy_device *phydev) -{ - u32 features = phydev->supported; - int idx; - - /* Sanitize settings based on PHY capabilities */ - if ((features & SUPPORTED_Autoneg) == 0) - phydev->autoneg = 0; - - idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), - features); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; -} -EXPORT_SYMBOL(phy_sanitize_settings); - -/* phy_force_reduction - * - * description: Reduces the speed/duplex settings by - * one notch. The order is so: - * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, - * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. - */ -static void phy_force_reduction(struct phy_device *phydev) -{ - int idx; - - idx = phy_find_setting(phydev->speed, phydev->duplex); - - idx++; - - idx = phy_find_valid(idx, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - pr_info("Trying %d/%s\n", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "FULL" : "HALF"); -} - -/* phy_ethtool_sset: - * A generic ethtool sset function. Handles all the details - * - * A few notes about parameter checking: - * - We don't set port or transceiver, so we don't care what they - * were set to. - * - phy_start_aneg() will make sure forced settings are sane, and - * choose the next best ones from the ones selected, so we don't - * care if ethtool tries to give us bad values - */ -int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) -{ - if (cmd->phy_address != phydev->addr) - return -EINVAL; - - /* We make sure that we don't pass unsupported - * values in to the PHY */ - cmd->advertising &= phydev->supported; - - /* Verify the settings we care about. */ - if (cmd->autoneg != AUTONEG_ENABLE && cmd->autoneg != AUTONEG_DISABLE) - return -EINVAL; - - if (cmd->autoneg == AUTONEG_ENABLE && cmd->advertising == 0) - return -EINVAL; - - if (cmd->autoneg == AUTONEG_DISABLE - && ((cmd->speed != SPEED_1000 - && cmd->speed != SPEED_100 - && cmd->speed != SPEED_10) - || (cmd->duplex != DUPLEX_HALF - && cmd->duplex != DUPLEX_FULL))) - return -EINVAL; - - phydev->autoneg = cmd->autoneg; - - phydev->speed = cmd->speed; - - phydev->advertising = cmd->advertising; - - if (AUTONEG_ENABLE == cmd->autoneg) - phydev->advertising |= ADVERTISED_Autoneg; - else - phydev->advertising &= ~ADVERTISED_Autoneg; - - phydev->duplex = cmd->duplex; - - /* Restart the PHY */ - phy_start_aneg(phydev); - - return 0; -} - -int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd) -{ - cmd->supported = phydev->supported; - - cmd->advertising = phydev->advertising; - - cmd->speed = phydev->speed; - cmd->duplex = phydev->duplex; - cmd->port = PORT_MII; - cmd->phy_address = phydev->addr; - cmd->transceiver = XCVR_EXTERNAL; - cmd->autoneg = phydev->autoneg; - - return 0; -} - - -/* Note that this function is currently incompatible with the - * PHYCONTROL layer. It changes registers without regard to - * current state. Use at own risk - */ -int phy_mii_ioctl(struct phy_device *phydev, - struct mii_ioctl_data *mii_data, int cmd) -{ - u16 val = mii_data->val_in; - - switch (cmd) { - case SIOCGMIIPHY: - mii_data->phy_id = phydev->addr; - break; - case SIOCGMIIREG: - mii_data->val_out = phy_read(phydev, mii_data->reg_num); - break; - - case SIOCSMIIREG: - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (mii_data->phy_id == phydev->addr) { - switch(mii_data->reg_num) { - case MII_BMCR: - if (val & (BMCR_RESET|BMCR_ANENABLE)) - phydev->autoneg = AUTONEG_DISABLE; - else - phydev->autoneg = AUTONEG_ENABLE; - if ((!phydev->autoneg) && (val & BMCR_FULLDPLX)) - phydev->duplex = DUPLEX_FULL; - else - phydev->duplex = DUPLEX_HALF; - break; - case MII_ADVERTISE: - phydev->advertising = val; - break; - default: - /* do nothing */ - break; - } - } - - phy_write(phydev, mii_data->reg_num, val); - - if (mii_data->reg_num == MII_BMCR - && val & BMCR_RESET - && phydev->drv->config_init) - phydev->drv->config_init(phydev); - break; - } - - return 0; -} - -/* phy_start_machine: - * - * description: The PHY infrastructure can run a state machine - * which tracks whether the PHY is starting up, negotiating, - * etc. This function starts the timer which tracks the state - * of the PHY. If you want to be notified when the state - * changes, pass in the callback, otherwise, pass NULL. If you - * want to maintain your own state machine, do not call this - * function. */ -void phy_start_machine(struct phy_device *phydev, - void (*handler)(struct net_device *)) -{ - phydev->adjust_state = handler; - - init_timer(&phydev->phy_timer); - phydev->phy_timer.function = &phy_timer; - phydev->phy_timer.data = (unsigned long) phydev; - mod_timer(&phydev->phy_timer, jiffies + HZ); -} - -/* phy_stop_machine - * - * description: Stops the state machine timer, sets the state to - * UP (unless it wasn't up yet), and then frees the interrupt, - * if it is in use. This function must be called BEFORE - * phy_detach. - */ -void phy_stop_machine(struct phy_device *phydev) -{ - del_timer_sync(&phydev->phy_timer); - - spin_lock(&phydev->lock); - if (phydev->state > PHY_UP) - phydev->state = PHY_UP; - spin_unlock(&phydev->lock); - - if (phydev->irq != PHY_POLL) - phy_stop_interrupts(phydev); - - phydev->adjust_state = NULL; -} - -#ifdef CONFIG_PHYCONTROL -/* phy_error: - * - * Moves the PHY to the HALTED state in response to a read - * or write error, and tells the controller the link is down. - * Must not be called from interrupt context, or while the - * phydev->lock is held. - */ -void phy_error(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - phydev->state = PHY_HALTED; - spin_unlock(&phydev->lock); -} - -/* phy_interrupt - * - * description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. - */ -static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) -{ - struct phy_device *phydev = phy_dat; - - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. */ - disable_irq_nosync(irq); - - schedule_work(&phydev->phy_queue); - - return IRQ_HANDLED; -} - -/* Enable the interrupts from the PHY side */ -int phy_enable_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_clear_interrupt(phydev); - - if (err < 0) - return err; - - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - return err; -} - -/* Disable the PHY interrupts from the PHY side */ -int phy_disable_interrupts(struct phy_device *phydev) -{ - int err; - - /* Disable PHY interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - - if (err) - goto phy_err; - - /* Clear the interrupt */ - err = phy_clear_interrupt(phydev); - - if (err) - goto phy_err; - - return 0; - -phy_err: - phy_error(phydev); - - return err; -} - -/* phy_start_interrupts - * - * description: Request the interrupt for the given PHY. If - * this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. - * Returns 0 on success. - * This should only be called with a valid IRQ number. - */ -int phy_start_interrupts(struct phy_device *phydev) -{ - int err = 0; - - INIT_WORK(&phydev->phy_queue, phy_change, phydev); - - if (request_irq(phydev->irq, phy_interrupt, - SA_SHIRQ, - "phy_interrupt", - phydev) < 0) { - printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, - phydev->irq); - phydev->irq = PHY_POLL; - return 0; - } - - err = phy_enable_interrupts(phydev); - - return err; -} -EXPORT_SYMBOL(phy_start_interrupts); - -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - - -/* Scheduled by the phy_interrupt/timer to handle PHY changes */ -static void phy_change(void *data) -{ - int err; - struct phy_device *phydev = data; - - err = phy_disable_interrupts(phydev); - - if (err) - goto phy_err; - - spin_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - spin_unlock(&phydev->lock); - - enable_irq(phydev->irq); - - /* Reenable interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - if (err) - goto irq_enable_err; - - return; - -irq_enable_err: - disable_irq(phydev->irq); -phy_err: - phy_error(phydev); -} - -/* Bring down the PHY link, and stop checking the status. */ -void phy_stop(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - if (PHY_HALTED == phydev->state) - goto out_unlock; - - if (phydev->irq != PHY_POLL) { - /* Clear any pending interrupts */ - phy_clear_interrupt(phydev); - - /* Disable PHY Interrupts */ - phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - } - - phydev->state = PHY_HALTED; - -out_unlock: - spin_unlock(&phydev->lock); -} - - -/* phy_start - * - * description: Indicates the attached device's readiness to - * handle PHY-related work. Used during startup to start the - * PHY, and after a call to phy_stop() to resume operation. - * Also used to indicate the MDIO bus has cleared an error - * condition. - */ -void phy_start(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - switch (phydev->state) { - case PHY_STARTING: - phydev->state = PHY_PENDING; - break; - case PHY_READY: - phydev->state = PHY_UP; - break; - case PHY_HALTED: - phydev->state = PHY_RESUMING; - default: - break; - } - spin_unlock(&phydev->lock); -} -EXPORT_SYMBOL(phy_stop); -EXPORT_SYMBOL(phy_start); - -/* PHY timer which handles the state machine */ -static void phy_timer(unsigned long data) -{ - struct phy_device *phydev = (struct phy_device *)data; - int needs_aneg = 0; - int err = 0; - - spin_lock(&phydev->lock); - - if (phydev->adjust_state) - phydev->adjust_state(phydev->attached_dev); - - switch(phydev->state) { - case PHY_DOWN: - case PHY_STARTING: - case PHY_READY: - case PHY_PENDING: - break; - case PHY_UP: - needs_aneg = 1; - - phydev->link_timeout = PHY_AN_TIMEOUT; - - break; - case PHY_AN: - /* Check if negotiation is done. Break - * if there's an error */ - err = phy_aneg_done(phydev); - if (err < 0) - break; - - /* If auto-negotiation is done, we change to - * either RUNNING, or NOLINK */ - if (err > 0) { - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - phydev->state = PHY_NOLINK; - netif_carrier_off(phydev->attached_dev); - } - - phydev->adjust_link(phydev->attached_dev); - - } else if (0 == phydev->link_timeout--) { - /* The counter expired, so either we - * switch to forced mode, or the - * magic_aneg bit exists, and we try aneg - * again */ - if (!(phydev->drv->flags & PHY_HAS_MAGICANEG)) { - int idx; - - /* We'll start from the - * fastest speed, and work - * our way down */ - idx = phy_find_valid(0, - phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - phydev->autoneg = AUTONEG_DISABLE; - phydev->state = PHY_FORCING; - phydev->link_timeout = - PHY_FORCE_TIMEOUT; - - pr_info("Trying %d/%s\n", - phydev->speed, - DUPLEX_FULL == - phydev->duplex ? - "FULL" : "HALF"); - } - - needs_aneg = 1; - } - break; - case PHY_NOLINK: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); - } - break; - case PHY_FORCING: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - if (0 == phydev->link_timeout--) { - phy_force_reduction(phydev); - needs_aneg = 1; - } - } - - phydev->adjust_link(phydev->attached_dev); - break; - case PHY_RUNNING: - /* Only register a CHANGE if we are - * polling */ - if (PHY_POLL == phydev->irq) - phydev->state = PHY_CHANGELINK; - break; - case PHY_CHANGELINK: - err = phy_read_status(phydev); - - if (err) - break; - - if (phydev->link) { - phydev->state = PHY_RUNNING; - netif_carrier_on(phydev->attached_dev); - } else { - phydev->state = PHY_NOLINK; - netif_carrier_off(phydev->attached_dev); - } - - phydev->adjust_link(phydev->attached_dev); - - if (PHY_POLL != phydev->irq) - err = phy_config_interrupt(phydev, - PHY_INTERRUPT_ENABLED); - break; - case PHY_HALTED: - if (phydev->link) { - phydev->link = 0; - netif_carrier_off(phydev->attached_dev); - phydev->adjust_link(phydev->attached_dev); - } - break; - case PHY_RESUMING: - - err = phy_clear_interrupt(phydev); - - if (err) - break; - - err = phy_config_interrupt(phydev, - PHY_INTERRUPT_ENABLED); - - if (err) - break; - - if (AUTONEG_ENABLE == phydev->autoneg) { - err = phy_aneg_done(phydev); - if (err < 0) - break; - - /* err > 0 if AN is done. - * Otherwise, it's 0, and we're - * still waiting for AN */ - if (err > 0) { - phydev->state = PHY_RUNNING; - } else { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } - } else - phydev->state = PHY_RUNNING; - break; - } - - spin_unlock(&phydev->lock); - - if (needs_aneg) - err = phy_start_aneg(phydev); - - if (err < 0) - phy_error(phydev); - - mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); -} - -#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index f0595af4c837..c11138330fed 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -39,6 +39,19 @@ #include #include +static int genphy_config_init(struct phy_device *phydev); + +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner = THIS_MODULE, }, +}; + /* get_phy_device * * description: Reads the ID registers of the PHY at addr on the @@ -656,27 +669,32 @@ void phy_driver_unregister(struct phy_driver *drv) } EXPORT_SYMBOL(phy_driver_unregister); -static struct phy_driver genphy_driver = { - .phy_id = 0xffffffff, - .phy_id_mask = 0xffffffff, - .name = "Generic PHY", - .config_init = genphy_config_init, - .features = 0, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .driver = {.owner = THIS_MODULE, }, -}; -static int __init genphy_init(void) +static int __init phy_init(void) { - return phy_driver_register(&genphy_driver); + int rc; + extern int mdio_bus_init(void); + + rc = phy_driver_register(&genphy_driver); + if (rc) + goto out; + + rc = mdio_bus_init(); + if (rc) + goto out_unreg; + return 0; + +out_unreg: + phy_driver_unregister(&genphy_driver); +out: + return rc; } -static void __exit genphy_exit(void) +static void __exit phy_exit(void) { phy_driver_unregister(&genphy_driver); } -module_init(genphy_init); -module_exit(genphy_exit); +module_init(phy_init); +module_exit(phy_exit); diff --git a/include/linux/phy.h b/include/linux/phy.h index 3404804dc22d..72cb67b66e0c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -374,5 +374,4 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; -extern struct phy_driver genphy_driver; #endif /* __PHY_H */ -- cgit v1.2.3 From 2bf69b5fe90b3246ab50064c5a690a363e8c53e2 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:47:54 -0400 Subject: phy subsystem: more cleanups - unexport symbols never used outside of home module - remove dead code - remove CONFIG_PHYCONTROL, make it unconditionally enabled --- drivers/net/phy/Kconfig | 8 -- drivers/net/phy/mdio_bus.c | 74 ---------------- drivers/net/phy/phy.c | 197 +++---------------------------------------- drivers/net/phy/phy_device.c | 130 +--------------------------- include/linux/phy.h | 17 ---- 5 files changed, 12 insertions(+), 414 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index c2f1bf1d02d2..6450bd71deb4 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -12,14 +12,6 @@ config PHYLIB devices. This option provides infrastructure for managing PHY devices. -config PHYCONTROL - bool "Support for automatically handling PHY state changes" - depends on PHYLIB - help - Adds code to perform all the work for keeping PHY link - state (speed/duplex/etc) up-to-date. Also handles - interrupts. - comment "MII PHY device drivers" depends on PHYLIB diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index 5fbea6acfe80..d5a05be28818 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,80 +38,6 @@ #include #include -/* mdiobus_register - * - * description: Called by a bus driver to bring up all the PHYs - * on a given bus, and attach them to the bus - */ -int mdiobus_register(struct mii_bus *bus) -{ - int i; - int err = 0; - - spin_lock_init(&bus->mdio_lock); - - if (NULL == bus || NULL == bus->name || - NULL == bus->read || - NULL == bus->write) - return -EINVAL; - - if (bus->reset) - bus->reset(bus); - - for (i = 0; i < PHY_MAX_ADDR; i++) { - struct phy_device *phydev; - - phydev = get_phy_device(bus, i); - - if (IS_ERR(phydev)) - return PTR_ERR(phydev); - - /* There's a PHY at this address - * We need to set: - * 1) IRQ - * 2) bus_id - * 3) parent - * 4) bus - * 5) mii_bus - * And, we need to register it */ - if (phydev) { - phydev->irq = bus->irq[i]; - - phydev->dev.parent = bus->dev; - phydev->dev.bus = &mdio_bus_type; - sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); - - phydev->bus = bus; - - err = device_register(&phydev->dev); - - if (err) - printk(KERN_ERR "phy %d failed to register\n", - i); - } - - bus->phy_map[i] = phydev; - } - - pr_info("%s: probed\n", bus->name); - - return err; -} -EXPORT_SYMBOL(mdiobus_register); - -void mdiobus_unregister(struct mii_bus *bus) -{ - int i; - - for (i = 0; i < PHY_MAX_ADDR; i++) { - if (bus->phy_map[i]) { - device_unregister(&bus->phy_map[i]->dev); - kfree(bus->phy_map[i]); - } - } -} -EXPORT_SYMBOL(mdiobus_unregister); - /* mdio_bus_match * * description: Given a PHY device, and a PHY driver, return 1 if diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 934065dd6371..d3e43631b89b 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -40,21 +40,9 @@ #include static void phy_timer(unsigned long data); - -/* Convenience function to print out the current phy status - */ -void phy_print_status(struct phy_device *phydev) -{ - pr_info("%s: Link is %s", phydev->dev.bus_id, - phydev->link ? "Up" : "Down"); - if (phydev->link) - printk(" - %d/%s", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "Full" : "Half"); - - printk("\n"); -} -EXPORT_SYMBOL(phy_print_status); +static int phy_disable_interrupts(struct phy_device *phydev); +static void phy_sanitize_settings(struct phy_device *phydev); +static int phy_stop_interrupts(struct phy_device *phydev); /* Convenience functions for reading/writing a given PHY @@ -133,7 +121,7 @@ static inline int phy_aneg_done(struct phy_device *phydev) * and to PHY_FORCING if auto-negotiation is disabled. Unless * the PHY is currently HALTED. */ -int phy_start_aneg(struct phy_device *phydev) +static int phy_start_aneg(struct phy_device *phydev) { int err; @@ -161,8 +149,6 @@ out_unlock: spin_unlock(&phydev->lock); return err; } -EXPORT_SYMBOL(phy_start_aneg); - /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value */ @@ -255,7 +241,7 @@ static inline int phy_find_valid(int idx, u32 features) * duplexes. Drop down by one in this order: 1000/FULL, * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF */ -void phy_sanitize_settings(struct phy_device *phydev) +static void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; int idx; @@ -270,7 +256,6 @@ void phy_sanitize_settings(struct phy_device *phydev) phydev->speed = settings[idx].speed; phydev->duplex = settings[idx].duplex; } -EXPORT_SYMBOL(phy_sanitize_settings); /* phy_force_reduction * @@ -477,48 +462,22 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } -#ifdef CONFIG_PHYCONTROL - -static void phy_change(void *data); - -/* phy_interrupt - * - * description: When a PHY interrupt occurs, the handler disables - * interrupts, and schedules a work task to clear the interrupt. - */ -static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) -{ - struct phy_device *phydev = phy_dat; - - /* The MDIO bus is not allowed to be written in interrupt - * context, so we need to disable the irq here. A work - * queue will write the PHY to disable and clear the - * interrupt, and then reenable the irq line. */ - disable_irq_nosync(irq); - - schedule_work(&phydev->phy_queue); - - return IRQ_HANDLED; -} - -/* Enable the interrupts from the PHY side */ -int phy_enable_interrupts(struct phy_device *phydev) +static int phy_stop_interrupts(struct phy_device *phydev) { int err; - err = phy_clear_interrupt(phydev); + err = phy_disable_interrupts(phydev); - if (err < 0) - return err; + if (err) + phy_error(phydev); - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + free_irq(phydev->irq, phydev); return err; } -EXPORT_SYMBOL(phy_enable_interrupts); /* Disable the PHY interrupts from the PHY side */ -int phy_disable_interrupts(struct phy_device *phydev) +static int phy_disable_interrupts(struct phy_device *phydev) { int err; @@ -541,140 +500,6 @@ phy_err: return err; } -EXPORT_SYMBOL(phy_disable_interrupts); - -/* phy_start_interrupts - * - * description: Request the interrupt for the given PHY. If - * this fails, then we set irq to PHY_POLL. - * Otherwise, we enable the interrupts in the PHY. - * Returns 0 on success. - * This should only be called with a valid IRQ number. - */ -int phy_start_interrupts(struct phy_device *phydev) -{ - int err = 0; - - INIT_WORK(&phydev->phy_queue, phy_change, phydev); - - if (request_irq(phydev->irq, phy_interrupt, - SA_SHIRQ, - "phy_interrupt", - phydev) < 0) { - printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", - phydev->bus->name, - phydev->irq); - phydev->irq = PHY_POLL; - return 0; - } - - err = phy_enable_interrupts(phydev); - - return err; -} -EXPORT_SYMBOL(phy_start_interrupts); - -int phy_stop_interrupts(struct phy_device *phydev) -{ - int err; - - err = phy_disable_interrupts(phydev); - - if (err) - phy_error(phydev); - - free_irq(phydev->irq, phydev); - - return err; -} -EXPORT_SYMBOL(phy_stop_interrupts); - - -/* Scheduled by the phy_interrupt/timer to handle PHY changes */ -static void phy_change(void *data) -{ - int err; - struct phy_device *phydev = data; - - err = phy_disable_interrupts(phydev); - - if (err) - goto phy_err; - - spin_lock(&phydev->lock); - if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) - phydev->state = PHY_CHANGELINK; - spin_unlock(&phydev->lock); - - enable_irq(phydev->irq); - - /* Reenable interrupts */ - err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); - - if (err) - goto irq_enable_err; - - return; - -irq_enable_err: - disable_irq(phydev->irq); -phy_err: - phy_error(phydev); -} - -/* Bring down the PHY link, and stop checking the status. */ -void phy_stop(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - if (PHY_HALTED == phydev->state) - goto out_unlock; - - if (phydev->irq != PHY_POLL) { - /* Clear any pending interrupts */ - phy_clear_interrupt(phydev); - - /* Disable PHY Interrupts */ - phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); - } - - phydev->state = PHY_HALTED; - -out_unlock: - spin_unlock(&phydev->lock); -} - - -/* phy_start - * - * description: Indicates the attached device's readiness to - * handle PHY-related work. Used during startup to start the - * PHY, and after a call to phy_stop() to resume operation. - * Also used to indicate the MDIO bus has cleared an error - * condition. - */ -void phy_start(struct phy_device *phydev) -{ - spin_lock(&phydev->lock); - - switch (phydev->state) { - case PHY_STARTING: - phydev->state = PHY_PENDING; - break; - case PHY_READY: - phydev->state = PHY_UP; - break; - case PHY_HALTED: - phydev->state = PHY_RESUMING; - default: - break; - } - spin_unlock(&phydev->lock); -} -EXPORT_SYMBOL(phy_stop); -EXPORT_SYMBOL(phy_start); - -#endif /* CONFIG_PHYCONTROL */ /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c11138330fed..c44d54f6310a 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -124,133 +124,6 @@ void phy_prepare_link(struct phy_device *phydev, phydev->adjust_link = handler; } -#ifdef CONFIG_PHYCONTROL -/* phy_connect: - * - * description: Convenience function for connecting ethernet - * devices to PHY devices. The default behavior is for - * the PHY infrastructure to handle everything, and only notify - * the connected driver when the link status changes. If you - * don't want, or can't use the provided functionality, you may - * choose to call only the subset of functions which provide - * the desired functionality. - */ -struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, - void (*handler)(struct net_device *), u32 flags) -{ - struct phy_device *phydev; - - phydev = phy_attach(dev, phy_id, flags); - - if (IS_ERR(phydev)) - return phydev; - - phy_prepare_link(phydev, handler); - - phy_start_machine(phydev, NULL); - - if (phydev->irq > 0) - phy_start_interrupts(phydev); - - return phydev; -} -EXPORT_SYMBOL(phy_connect); - -void phy_disconnect(struct phy_device *phydev) -{ - if (phydev->irq > 0) - phy_stop_interrupts(phydev); - - phy_stop_machine(phydev); - - phydev->adjust_link = NULL; - - phy_detach(phydev); -} -EXPORT_SYMBOL(phy_disconnect); - -#endif /* CONFIG_PHYCONTROL */ - -/* phy_attach: - * - * description: Called by drivers to attach to a particular PHY - * device. The phy_device is found, and properly hooked up - * to the phy_driver. If no driver is attached, then the - * genphy_driver is used. The phy_device is given a ptr to - * the attaching device, and given a callback for link status - * change. The phy_device is returned to the attaching - * driver. - */ -static int phy_compare_id(struct device *dev, void *data) -{ - return strcmp((char *)data, dev->bus_id) ? 0 : 1; -} - -struct phy_device *phy_attach(struct net_device *dev, - const char *phy_id, u32 flags) -{ - struct bus_type *bus = &mdio_bus_type; - struct phy_device *phydev; - struct device *d; - - /* Search the list of PHY devices on the mdio bus for the - * PHY with the requested name */ - d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); - - if (d) { - phydev = to_phy_device(d); - } else { - printk(KERN_ERR "%s not found\n", phy_id); - return ERR_PTR(-ENODEV); - } - - /* Assume that if there is no driver, that it doesn't - * exist, and we should use the genphy driver. */ - if (NULL == d->driver) { - int err; - down_write(&d->bus->subsys.rwsem); - d->driver = &genphy_driver.driver; - - err = d->driver->probe(d); - - if (err < 0) - return ERR_PTR(err); - - device_bind_driver(d); - up_write(&d->bus->subsys.rwsem); - } - - if (phydev->attached_dev) { - printk(KERN_ERR "%s: %s already attached\n", - dev->name, phy_id); - return ERR_PTR(-EBUSY); - } - - phydev->attached_dev = dev; - - phydev->dev_flags = flags; - - return phydev; -} -EXPORT_SYMBOL(phy_attach); - -void phy_detach(struct phy_device *phydev) -{ - phydev->attached_dev = NULL; - - /* If the device had no specific driver before (i.e. - it - * was using the generic driver), we unbind the device - * from the generic driver so that there's a chance a - * real driver could be loaded */ - if (phydev->dev.driver == &genphy_driver.driver) { - down_write(&phydev->dev.bus->subsys.rwsem); - device_release_driver(&phydev->dev); - up_write(&phydev->dev.bus->subsys.rwsem); - } -} -EXPORT_SYMBOL(phy_detach); - - /* Generic PHY support and helper functions */ /* genphy_config_advert @@ -259,7 +132,7 @@ EXPORT_SYMBOL(phy_detach); * after sanitizing the values to make sure we only advertise * what is supported */ -int genphy_config_advert(struct phy_device *phydev) +static int genphy_config_advert(struct phy_device *phydev) { u32 advertise; int adv; @@ -317,7 +190,6 @@ int genphy_config_advert(struct phy_device *phydev) return adv; } -EXPORT_SYMBOL(genphy_config_advert); /* genphy_setup_forced * diff --git a/include/linux/phy.h b/include/linux/phy.h index 72cb67b66e0c..4f2b5effc16b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,26 +334,11 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); -struct phy_device * phy_attach(struct net_device *dev, - const char *phy_id, u32 flags); -struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, - void (*handler)(struct net_device *), u32 flags); -void phy_disconnect(struct phy_device *phydev); -void phy_detach(struct phy_device *phydev); -void phy_start(struct phy_device *phydev); -void phy_stop(struct phy_device *phydev); -int phy_start_aneg(struct phy_device *phydev); - -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void phy_sanitize_settings(struct phy_device *phydev); -int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } -int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -370,8 +355,6 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); -int phy_start_interrupts(struct phy_device *phydev); -void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ -- cgit v1.2.3 From 972dcafb6d743a6c7611a2e4681ed814e30d6230 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 11 Aug 2005 03:35:53 -0400 Subject: [libata scsi] add START STOP UNIT translation --- drivers/scsi/libata-scsi.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/ata.h | 2 ++ 2 files changed, 58 insertions(+) (limited to 'include/linux') diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 6a75ec2187fd..f58311b8c050 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -391,6 +391,60 @@ int ata_scsi_error(struct Scsi_Host *host) return 0; } +/** + * ata_scsi_start_stop_xlat - Translate SCSI START STOP UNIT command + * @qc: Storage for translated ATA taskfile + * @scsicmd: SCSI command to translate + * + * Sets up an ATA taskfile to issue STANDBY (to stop) or READ VERIFY + * (to start). Perhaps these commands should be preceded by + * CHECK POWER MODE to see what power mode the device is already in. + * [See SAT revision 5 at www.t10.org] + * + * LOCKING: + * spin_lock_irqsave(host_set lock) + * + * RETURNS: + * Zero on success, non-zero on error. + */ + +static unsigned int ata_scsi_start_stop_xlat(struct ata_queued_cmd *qc, + u8 *scsicmd) +{ + struct ata_taskfile *tf = &qc->tf; + + tf->flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; + tf->protocol = ATA_PROT_NODATA; + if (scsicmd[1] & 0x1) { + ; /* ignore IMMED bit, violates sat-r05 */ + } + if (scsicmd[4] & 0x2) + return 1; /* LOEJ bit set not supported */ + if (((scsicmd[4] >> 4) & 0xf) != 0) + return 1; /* power conditions not supported */ + if (scsicmd[4] & 0x1) { + tf->nsect = 1; /* 1 sector, lba=0 */ + tf->lbah = 0x0; + tf->lbam = 0x0; + tf->lbal = 0x0; + tf->device |= ATA_LBA; + tf->command = ATA_CMD_VERIFY; /* READ VERIFY */ + } else { + tf->nsect = 0; /* time period value (0 implies now) */ + tf->command = ATA_CMD_STANDBY; + /* Consider: ATA STANDBY IMMEDIATE command */ + } + /* + * Standby and Idle condition timers could be implemented but that + * would require libata to implement the Power condition mode page + * and allow the user to change it. Changing mode pages requires + * MODE SELECT to be implemented. + */ + + return 0; +} + + /** * ata_scsi_flush_xlat - Translate SCSI SYNCHRONIZE CACHE command * @qc: Storage for translated ATA taskfile @@ -1435,6 +1489,8 @@ static inline ata_xlat_func_t ata_get_xlat_func(struct ata_device *dev, u8 cmd) case VERIFY: case VERIFY_16: return ata_scsi_verify_xlat; + case START_STOP: + return ata_scsi_start_stop_xlat; } return NULL; diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf9981..9d25e9886d60 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -108,6 +108,8 @@ enum { /* ATA device commands */ ATA_CMD_CHK_POWER = 0xE5, /* check power mode */ + ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ + ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, -- cgit v1.2.3 From a0d3bea3cf6c7c1b53a46432bd490b5dc784ca42 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 11 Aug 2005 16:05:50 -0700 Subject: [NET]: Make skb->protocol __be16 There are many instances of skb->protocol = htons(ETH_P_*); skb->protocol = __constant_htons(ETH_P_*); and skb->protocol = *_type_trans(...); Most of *_type_trans() are already endian-annotated, so, let's shift attention on other warnings. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0061c9470482..948527e42a60 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -255,7 +255,7 @@ struct sk_buff { nohdr:1; /* 3 bits spare */ __u8 pkt_type; - __u16 protocol; + __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER -- cgit v1.2.3 From 0db1d6fc1ea051af49ebe03c503d23996a7c5bbb Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:25:54 -0700 Subject: [NETPOLL]: add retry timeout Add limited retry logic to netpoll_send_skb Each time we attempt to send, decrement our per-device retry counter. On every successful send, we reset the counter. We delay 50us between attempts with up to 20000 retries for a total of 1 second. After we've exhausted our retries, subsequent failed attempts will try only once until reset by success. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- include/linux/netpoll.h | 1 + net/core/netpoll.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index bcd0ac33f592..be68d94b03d5 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -26,6 +26,7 @@ struct netpoll { struct netpoll_info { spinlock_t poll_lock; int poll_owner; + int tries; int rx_flags; spinlock_t rx_lock; struct netpoll *rx_np; /* netpoll that registered an rx_hook */ diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 59ed186e4f46..d09affdbad3c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -33,6 +33,7 @@ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 #define MAX_QUEUE_DEPTH (MAX_SKBS / 2) +#define MAX_RETRIES 20000 static DEFINE_SPINLOCK(skb_list_lock); static int nr_skbs; @@ -265,7 +266,8 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) return; } - while (1) { + do { + npinfo->tries--; spin_lock(&np->dev->xmit_lock); np->dev->xmit_lock_owner = smp_processor_id(); @@ -277,6 +279,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) np->dev->xmit_lock_owner = -1; spin_unlock(&np->dev->xmit_lock); netpoll_poll(np); + udelay(50); continue; } @@ -285,12 +288,15 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) spin_unlock(&np->dev->xmit_lock); /* success */ - if(!status) + if(!status) { + npinfo->tries = MAX_RETRIES; /* reset */ return; + } /* transmit busy */ netpoll_poll(np); - } + udelay(50); + } while (npinfo->tries > 0); } void netpoll_send_udp(struct netpoll *np, const char *msg, int len) @@ -642,6 +648,7 @@ int netpoll_setup(struct netpoll *np) npinfo->rx_np = NULL; npinfo->poll_lock = SPIN_LOCK_UNLOCKED; npinfo->poll_owner = -1; + npinfo->tries = MAX_RETRIES; npinfo->rx_lock = SPIN_LOCK_UNLOCKED; } else npinfo = ndev->npinfo; -- cgit v1.2.3 From 53fb95d3c14290fd6ee808b221e35493f096246f Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Thu, 11 Aug 2005 19:27:43 -0700 Subject: [NETPOLL]: fix initialization/NAPI race This fixes a race during initialization with the NAPI softirq processing by using an RCU approach. This race was discovered when refill_skbs() was added to the setup code. Signed-off-by: Matt Mackall Signed-off-by: David S. Miller --- include/linux/netpoll.h | 19 +++++++++++++------ net/core/dev.c | 9 +++++---- net/core/netpoll.c | 3 +++ 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h index be68d94b03d5..5ade54a78dbb 100644 --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -9,6 +9,7 @@ #include #include +#include #include struct netpoll; @@ -61,25 +62,31 @@ static inline int netpoll_rx(struct sk_buff *skb) return ret; } -static inline void netpoll_poll_lock(struct net_device *dev) +static inline void *netpoll_poll_lock(struct net_device *dev) { + rcu_read_lock(); /* deal with race on ->npinfo */ if (dev->npinfo) { spin_lock(&dev->npinfo->poll_lock); dev->npinfo->poll_owner = smp_processor_id(); + return dev->npinfo; } + return NULL; } -static inline void netpoll_poll_unlock(struct net_device *dev) +static inline void netpoll_poll_unlock(void *have) { - if (dev->npinfo) { - dev->npinfo->poll_owner = -1; - spin_unlock(&dev->npinfo->poll_lock); + struct netpoll_info *npi = have; + + if (npi) { + npi->poll_owner = -1; + spin_unlock(&npi->poll_lock); } + rcu_read_unlock(); } #else #define netpoll_rx(a) 0 -#define netpoll_poll_lock(a) +#define netpoll_poll_lock(a) 0 #define netpoll_poll_unlock(a) #endif diff --git a/net/core/dev.c b/net/core/dev.c index 52a3bf7ae177..faf59b02c4bf 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1696,7 +1696,8 @@ static void net_rx_action(struct softirq_action *h) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; int budget = netdev_budget; - + void *have; + local_irq_disable(); while (!list_empty(&queue->poll_list)) { @@ -1709,10 +1710,10 @@ static void net_rx_action(struct softirq_action *h) dev = list_entry(queue->poll_list.next, struct net_device, poll_list); - netpoll_poll_lock(dev); + have = netpoll_poll_lock(dev); if (dev->quota <= 0 || dev->poll(dev, &budget)) { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); local_irq_disable(); list_del(&dev->poll_list); list_add_tail(&dev->poll_list, &queue->poll_list); @@ -1721,7 +1722,7 @@ static void net_rx_action(struct softirq_action *h) else dev->quota = dev->weight; } else { - netpoll_poll_unlock(dev); + netpoll_poll_unlock(have); dev_put(dev); local_irq_disable(); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c02a08da6d42..996787bca17f 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -732,6 +732,9 @@ int netpoll_setup(struct netpoll *np) /* last thing to do is link it to the net device structure */ ndev->npinfo = npinfo; + /* avoid racing with NAPI reading npinfo */ + synchronize_rcu(); + return 0; release: -- cgit v1.2.3 From d0a7e574007fd547d72ec693bfa35778623d0738 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 14 Aug 2005 17:09:01 -0500 Subject: [SCSI] correct transport class abstraction to work outside SCSI I recently tried to construct a totally generic transport class and found there were certain features missing from the current abstract transport class. Most notable is that you have to hang the data on the class_device but most of the API is framed in terms of the generic device, not the class_device. These changes are two fold - Provide the class_device to all of the setup and configure APIs - Provide and extra API to take the device and the attribute class and return the corresponding class_device Signed-off-by: James Bottomley --- drivers/base/attribute_container.c | 38 +++++++++++++++++++++++++++++++++++++ drivers/base/transport_class.c | 17 +++++++++++------ drivers/scsi/scsi_transport_fc.c | 6 ++++-- drivers/scsi/scsi_transport_spi.c | 11 ++++++++--- include/linux/attribute_container.h | 9 +++------ include/linux/transport_class.h | 11 ++++++++--- 6 files changed, 72 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index ec615d854be9..62c093db11e6 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -58,6 +58,7 @@ attribute_container_register(struct attribute_container *cont) { INIT_LIST_HEAD(&cont->node); INIT_LIST_HEAD(&cont->containers); + spin_lock_init(&cont->containers_lock); down(&attribute_container_mutex); list_add_tail(&cont->node, &attribute_container_list); @@ -77,11 +78,13 @@ attribute_container_unregister(struct attribute_container *cont) { int retval = -EBUSY; down(&attribute_container_mutex); + spin_lock(&cont->containers_lock); if (!list_empty(&cont->containers)) goto out; retval = 0; list_del(&cont->node); out: + spin_unlock(&cont->containers_lock); up(&attribute_container_mutex); return retval; @@ -151,7 +154,9 @@ attribute_container_add_device(struct device *dev, fn(cont, dev, &ic->classdev); else attribute_container_add_class_device(&ic->classdev); + spin_lock(&cont->containers_lock); list_add_tail(&ic->node, &cont->containers); + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -189,6 +194,7 @@ attribute_container_remove_device(struct device *dev, if (!cont->match(cont, dev)) continue; + spin_lock(&cont->containers_lock); list_for_each_entry_safe(ic, tmp, &cont->containers, node) { if (dev != ic->classdev.dev) continue; @@ -200,6 +206,7 @@ attribute_container_remove_device(struct device *dev, class_device_unregister(&ic->classdev); } } + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -230,10 +237,12 @@ attribute_container_device_trigger(struct device *dev, if (!cont->match(cont, dev)) continue; + spin_lock(&cont->containers_lock); list_for_each_entry_safe(ic, tmp, &cont->containers, node) { if (dev == ic->classdev.dev) fn(cont, dev, &ic->classdev); } + spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -368,6 +377,35 @@ attribute_container_class_device_del(struct class_device *classdev) } EXPORT_SYMBOL_GPL(attribute_container_class_device_del); +/** + * attribute_container_find_class_device - find the corresponding class_device + * + * @cont: the container + * @dev: the generic device + * + * Looks up the device in the container's list of class devices and returns + * the corresponding class_device. + */ +struct class_device * +attribute_container_find_class_device(struct attribute_container *cont, + struct device *dev) +{ + struct class_device *cdev = NULL; + struct internal_container *ic; + + spin_lock(&cont->containers_lock); + list_for_each_entry(ic, &cont->containers, node) { + if (ic->classdev.dev == dev) { + cdev = &ic->classdev; + break; + } + } + spin_unlock(&cont->containers_lock); + + return cdev; +} +EXPORT_SYMBOL_GPL(attribute_container_find_class_device); + int __init attribute_container_init(void) { diff --git a/drivers/base/transport_class.c b/drivers/base/transport_class.c index 6c2b447a3336..4fb4c5de8470 100644 --- a/drivers/base/transport_class.c +++ b/drivers/base/transport_class.c @@ -64,7 +64,9 @@ void transport_class_unregister(struct transport_class *tclass) } EXPORT_SYMBOL_GPL(transport_class_unregister); -static int anon_transport_dummy_function(struct device *dev) +static int anon_transport_dummy_function(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { /* do nothing */ return 0; @@ -115,9 +117,10 @@ static int transport_setup_classdev(struct attribute_container *cont, struct class_device *classdev) { struct transport_class *tclass = class_to_transport_class(cont->class); + struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->setup) - tclass->setup(dev); + tclass->setup(tcont, dev, classdev); return 0; } @@ -178,12 +181,14 @@ void transport_add_device(struct device *dev) EXPORT_SYMBOL_GPL(transport_add_device); static int transport_configure(struct attribute_container *cont, - struct device *dev) + struct device *dev, + struct class_device *cdev) { struct transport_class *tclass = class_to_transport_class(cont->class); + struct transport_container *tcont = attribute_container_to_transport_container(cont); if (tclass->configure) - tclass->configure(dev); + tclass->configure(tcont, dev, cdev); return 0; } @@ -202,7 +207,7 @@ static int transport_configure(struct attribute_container *cont, */ void transport_configure_device(struct device *dev) { - attribute_container_trigger(dev, transport_configure); + attribute_container_device_trigger(dev, transport_configure); } EXPORT_SYMBOL_GPL(transport_configure_device); @@ -215,7 +220,7 @@ static int transport_remove_classdev(struct attribute_container *cont, struct transport_class *tclass = class_to_transport_class(cont->class); if (tclass->remove) - tclass->remove(dev); + tclass->remove(tcont, dev, classdev); if (tclass->remove != anon_transport_dummy_function) { if (tcont->statistics) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 35d1c1e8e345..96243c7fe110 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -252,7 +252,8 @@ struct fc_internal { #define to_fc_internal(tmpl) container_of(tmpl, struct fc_internal, t) -static int fc_target_setup(struct device *dev) +static int fc_target_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct scsi_target *starget = to_scsi_target(dev); struct fc_rport *rport = starget_to_rport(starget); @@ -281,7 +282,8 @@ static DECLARE_TRANSPORT_CLASS(fc_transport_class, NULL, NULL); -static int fc_host_setup(struct device *dev) +static int fc_host_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c index 02134fce2174..89f6b7feb9c2 100644 --- a/drivers/scsi/scsi_transport_spi.c +++ b/drivers/scsi/scsi_transport_spi.c @@ -162,7 +162,8 @@ static inline enum spi_signal_type spi_signal_to_value(const char *name) return SPI_SIGNAL_UNKNOWN; } -static int spi_host_setup(struct device *dev) +static int spi_host_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) { struct Scsi_Host *shost = dev_to_shost(dev); @@ -196,7 +197,9 @@ static int spi_host_match(struct attribute_container *cont, return &i->t.host_attrs.ac == cont; } -static int spi_device_configure(struct device *dev) +static int spi_device_configure(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { struct scsi_device *sdev = to_scsi_device(dev); struct scsi_target *starget = sdev->sdev_target; @@ -214,7 +217,9 @@ static int spi_device_configure(struct device *dev) return 0; } -static int spi_setup_transport_attrs(struct device *dev) +static int spi_setup_transport_attrs(struct transport_container *tc, + struct device *dev, + struct class_device *cdev) { struct scsi_target *starget = to_scsi_target(dev); diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index af1010b6dab7..f54b05b052b3 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,10 +11,12 @@ #include #include +#include struct attribute_container { struct list_head node; struct list_head containers; + spinlock_t containers_lock; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); @@ -62,12 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); - - - - - - +struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); #endif diff --git a/include/linux/transport_class.h b/include/linux/transport_class.h index 87d98d1faefb..1d6cc22e5f42 100644 --- a/include/linux/transport_class.h +++ b/include/linux/transport_class.h @@ -12,11 +12,16 @@ #include #include +struct transport_container; + struct transport_class { struct class class; - int (*setup)(struct device *); - int (*configure)(struct device *); - int (*remove)(struct device *); + int (*setup)(struct transport_container *, struct device *, + struct class_device *); + int (*configure)(struct transport_container *, struct device *, + struct class_device *); + int (*remove)(struct transport_container *, struct device *, + struct class_device *); }; #define DECLARE_TRANSPORT_CLASS(cls, nm, su, rm, cfg) \ -- cgit v1.2.3 From 89204c40a03346cd951e698d854105db4cfedc28 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Mon, 15 Aug 2005 12:13:28 -0400 Subject: [PATCH] inotify: add MOVE_SELF event This adds a MOVE_SELF event to inotify. It is sent whenever the inode you are watching is moved. We need this event so that we can catch something like this: - app1: watch /etc/mtab - app2: cp /etc/mtab /tmp/mtab-work mv /etc/mtab /etc/mtab~ mv /tmp/mtab-work /etc/mtab app1 still thinks it's watching /etc/mtab but it's actually watching /etc/mtab~. Signed-off-by: John McCutchan Signed-off-by: Robert Love Signed-off-by: Linus Torvalds --- fs/namei.c | 3 ++- include/linux/fsnotify.h | 6 +++++- include/linux/inotify.h | 4 +++- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 57046d98a746..b85f158aef0c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2216,7 +2216,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry); if (!error) { const char *new_name = old_dentry->d_name.name; - fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, new_dentry->d_inode); + fsnotify_move(old_dir, new_dir, old_name, new_name, is_dir, + new_dentry->d_inode, old_dentry->d_inode); } fsnotify_oldname_free(old_name); diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 602c305c8585..03b8e7932b83 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -21,7 +21,7 @@ */ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, const char *old_name, const char *new_name, - int isdir, struct inode *target) + int isdir, struct inode *target, struct inode *source) { u32 cookie = inotify_get_cookie(); @@ -41,6 +41,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL); inotify_inode_is_dead(target); } + + if (source) { + inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL); + } } /* diff --git a/include/linux/inotify.h b/include/linux/inotify.h index a40c2bf0408e..93bb3afe646b 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -35,6 +35,7 @@ struct inotify_event { #define IN_CREATE 0x00000100 /* Subfile was created */ #define IN_DELETE 0x00000200 /* Subfile was deleted */ #define IN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define IN_MOVE_SELF 0x00000800 /* Self was moved */ /* the following are legal events. they are sent as needed to any watch */ #define IN_UNMOUNT 0x00002000 /* Backing fs was unmounted */ @@ -56,7 +57,8 @@ struct inotify_event { */ #define IN_ALL_EVENTS (IN_ACCESS | IN_MODIFY | IN_ATTRIB | IN_CLOSE_WRITE | \ IN_CLOSE_NOWRITE | IN_OPEN | IN_MOVED_FROM | \ - IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF) + IN_MOVED_TO | IN_DELETE | IN_CREATE | IN_DELETE_SELF | \ + IN_MOVE_SELF) #ifdef __KERNEL__ -- cgit v1.2.3 From 58fcb8df0bf663bb6b8f46cd3010bfe8d13d97cf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 10 Aug 2005 18:15:12 -0400 Subject: [PATCH] NFS: Ensure ACL xdr code doesn't overflow. Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs_common/nfsacl.c | 1 + include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/fs/nfs_common/nfsacl.c b/fs/nfs_common/nfsacl.c index 18c58c32e326..251e5a1bb1c4 100644 --- a/fs/nfs_common/nfsacl.c +++ b/fs/nfs_common/nfsacl.c @@ -239,6 +239,7 @@ nfsacl_decode(struct xdr_buf *buf, unsigned int base, unsigned int *aclcnt, if (xdr_decode_word(buf, base, &entries) || entries > NFS_ACL_MAX_ENTRIES) return -EINVAL; + nfsacl_desc.desc.array_maxlen = entries; err = xdr_decode_array2(buf, base + 4, &nfsacl_desc.desc); if (err) return err; diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 34ec3e8d99b3..23448d0fb5bc 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -177,6 +177,7 @@ typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); struct xdr_array2_desc { unsigned int elem_size; unsigned int array_len; + unsigned int array_maxlen; xdr_xcode_elem_t xcode; }; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 8a4d9c106af1..fde16f40a581 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -993,6 +993,7 @@ xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, return -EINVAL; } else { if (xdr_decode_word(buf, base, &desc->array_len) != 0 || + desc->array_len > desc->array_maxlen || (unsigned long) base + 4 + desc->array_len * desc->elem_size > buf->len) return -EINVAL; -- cgit v1.2.3 From 65e4308d2500e7daf60c3dccc202c61ffb066c63 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 16 Aug 2005 11:49:44 -0400 Subject: [PATCH] NFS: Ensure we always update inode->i_mode when doing O_EXCL creates When the client performs an exclusive create and opens the file for writing, a Netapp filer will first create the file using the mode 01777. It does this since an NFSv3/v4 exclusive create cannot immediately set the mode bits. The 01777 mode then gets put into the inode->i_mode. After the file creation is successful, we then do a setattr to change the mode to the correct value (as per the NFS spec). The problem is that nfs_refresh_inode() no longer updates inode->i_mode, so the latter retains the 01777 mode. A bit later, the VFS notices this, and calls remove_suid(). This of course now resets the file mode to inode->i_mode & 0777. Hey presto, the file mode on the server is now magically changed to 0777. Duh... Fixes http://bugzilla.linux-nfs.org/show_bug.cgi?id=32 Signed-off-by: Trond Myklebust Signed-off-by: Linus Torvalds --- fs/nfs/inode.c | 37 ++++++++++++++++++++++++------------- fs/nfs/nfs3proc.c | 4 ++++ fs/nfs/nfs4proc.c | 10 ++++++++-- fs/nfs/proc.c | 2 ++ include/linux/nfs_fs.h | 1 + 5 files changed, 39 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 4845911f1c63..bb7ca022bcb2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -814,28 +814,39 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) nfs_wb_all(inode); } error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error == 0) { + if (error == 0) nfs_refresh_inode(inode, &fattr); + nfs_end_data_update(inode); + unlock_kernel(); + return error; +} + +/** + * nfs_setattr_update_inode - Update inode metadata after a setattr call. + * @inode: pointer to struct inode + * @attr: pointer to struct iattr + * + * Note: we do this in the *proc.c in order to ensure that + * it works for things like exclusive creates too. + */ +void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) +{ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) { - int mode; - mode = inode->i_mode & ~S_IALLUGO; - mode |= attr->ia_mode & S_IALLUGO; + int mode = attr->ia_mode & S_IALLUGO; + mode |= inode->i_mode & ~S_IALLUGO; inode->i_mode = mode; } if ((attr->ia_valid & ATTR_UID) != 0) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - if ((attr->ia_valid & ATTR_SIZE) != 0) { - inode->i_size = attr->ia_size; - vmtruncate(inode, attr->ia_size); - } - } - if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfs_end_data_update(inode); - unlock_kernel(); - return error; + } + if ((attr->ia_valid & ATTR_SIZE) != 0) { + inode->i_size = attr->ia_size; + vmtruncate(inode, attr->ia_size); + } } /* diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 7851569b31c6..2681485cf2d0 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -120,6 +120,8 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFS3PROC_SETATTR, &arg, fattr, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); return status; } @@ -370,6 +372,8 @@ again: * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ status = nfs3_proc_setattr(dentry, &fattr, sattr); + if (status == 0) + nfs_setattr_update_inode(dentry->d_inode, sattr); nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 1b76f80aedb9..0c5a308e4963 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -753,6 +753,7 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, .rpc_argp = &arg, .rpc_resp = &res, }; + int status; fattr->valid = 0; @@ -762,7 +763,8 @@ static int _nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, } else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, 0); + return status; } static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, @@ -1145,6 +1147,8 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); if (state != NULL) nfs4_close_state(state, FMODE_WRITE); put_rpccred(cred); @@ -1449,8 +1453,10 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) + if (status == 0) { + nfs_setattr_update_inode(state->inode, sattr); goto out; + } } else if (flags != 0) goto out; nfs4_close_state(state, flags); diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index cedf636bcf3c..be23c3fb9260 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -114,6 +114,8 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, dprintk("NFS call setattr\n"); fattr->valid = 0; status = rpc_call(NFS_CLIENT(inode), NFSPROC_SETATTR, &arg, fattr, 0); + if (status == 0) + nfs_setattr_update_inode(inode, sattr); dprintk("NFS reply setattr: %d\n", status); return status; } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 8ea249110fb0..7d78a783c64a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -292,6 +292,7 @@ extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); +extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); extern void nfs_begin_attr_update(struct inode *); extern void nfs_end_attr_update(struct inode *); extern void nfs_begin_data_update(struct inode *); -- cgit v1.2.3 From 4602b88d9743b5f20655de8078fb42e9fd25581f Mon Sep 17 00:00:00 2001 From: Kristen Accardi Date: Tue, 16 Aug 2005 15:15:58 -0700 Subject: [PATCH] PCI: 6700/6702PXH quirk On the 6700/6702 PXH part, a MSI may get corrupted if an ACPI hotplug driver and SHPC driver in MSI mode are used together. This patch will prevent MSI from being enabled for the SHPC as part of an early pci quirk, as well as on any pci device which sets the no_msi bit. Signed-off-by: Kristen Carlson Accardi Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/pci/msi.c | 5 ++++- drivers/pci/pci.h | 2 +- drivers/pci/quirks.c | 21 +++++++++++++++++++++ include/linux/pci.h | 3 ++- include/linux/pci_ids.h | 5 +++++ 5 files changed, 33 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index b5ab9aa6ff7c..2b85aa39f954 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -453,7 +453,7 @@ static void enable_msi_mode(struct pci_dev *dev, int pos, int type) } } -static void disable_msi_mode(struct pci_dev *dev, int pos, int type) +void disable_msi_mode(struct pci_dev *dev, int pos, int type) { u16 control; @@ -699,6 +699,9 @@ int pci_enable_msi(struct pci_dev* dev) if (!pci_msi_enable || !dev) return status; + if (dev->no_msi) + return status; + temp = dev->irq; if ((status = msi_init()) < 0) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d94d7af4f7a0..fa36094aa0f9 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -46,7 +46,7 @@ extern int pci_msi_quirk; #else #define pci_msi_quirk 0 #endif - +void disable_msi_mode(struct pci_dev *dev, int pos, int type); extern int pcie_mch_quirk; extern struct device_attribute pci_dev_attrs[]; extern struct class_device_attribute class_device_attr_cpuaffinity; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index a9160ad16581..bb36bb69803f 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -1291,6 +1291,27 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quir DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_pcie_mch ); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_pcie_mch ); + +/* + * It's possible for the MSI to get corrupted if shpc and acpi + * are used together on certain PXH-based systems. + */ +static void __devinit quirk_pcie_pxh(struct pci_dev *dev) +{ + disable_msi_mode(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), + PCI_CAP_ID_MSI); + dev->no_msi = 1; + + printk(KERN_WARNING "PCI: PXH quirk detected, " + "disabling MSI for SHPC device\n"); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHD_0, quirk_pcie_pxh); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHD_1, quirk_pcie_pxh); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_0, quirk_pcie_pxh); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXH_1, quirk_pcie_pxh); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_PXHV, quirk_pcie_pxh); + + static void __devinit quirk_netmos(struct pci_dev *dev) { unsigned int num_parallel = (dev->subsystem_device & 0xf0) >> 4; diff --git a/include/linux/pci.h b/include/linux/pci.h index 8621cf42b46f..bc4c40000c0d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -556,7 +556,8 @@ struct pci_dev { /* keep track of device state */ unsigned int is_enabled:1; /* pci_enable_device has been called */ unsigned int is_busmaster:1; /* device is busmaster */ - + unsigned int no_msi:1; /* device may not use msi */ + u32 saved_config_space[16]; /* config space saved at suspend time */ struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bc4cc10fabe9..51e61e96051c 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2281,6 +2281,11 @@ #define PCI_VENDOR_ID_INTEL 0x8086 #define PCI_DEVICE_ID_INTEL_EESSC 0x0008 #define PCI_DEVICE_ID_INTEL_21145 0x0039 +#define PCI_DEVICE_ID_INTEL_PXHD_0 0x0320 +#define PCI_DEVICE_ID_INTEL_PXHD_1 0x0321 +#define PCI_DEVICE_ID_INTEL_PXH_0 0x0329 +#define PCI_DEVICE_ID_INTEL_PXH_1 0x032A +#define PCI_DEVICE_ID_INTEL_PXHV 0x032C #define PCI_DEVICE_ID_INTEL_82375 0x0482 #define PCI_DEVICE_ID_INTEL_82424 0x0483 #define PCI_DEVICE_ID_INTEL_82378 0x0484 -- cgit v1.2.3 From 5529680981807b44abf3be30fb6d612ff04f68ff Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:09 -0700 Subject: [PATCH] NFS: split nfsi->flags into two fields Certain bits in nfsi->flags can be manipulated with atomic bitops, and some are better manipulated via logical bitmask operations. This patch splits the flags field into two. The next patch introduces atomic bitops for one of the fields. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 16 +++++++------ fs/nfs/file.c | 5 +++-- fs/nfs/inode.c | 61 ++++++++++++++++++++++++++------------------------ fs/nfs/nfs3acl.c | 2 +- fs/nfs/read.c | 4 ++-- include/linux/nfs_fs.h | 27 +++++++++++++--------- 6 files changed, 63 insertions(+), 52 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b38a57e78a63..5732e13cd0da 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +462,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -608,7 +608,7 @@ static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + if ((NFS_I(dir)->cache_validity & NFS_INO_INVALID_ATTR) != 0 || nfs_attribute_timeout(dir)) return 0; return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); @@ -1575,11 +1575,12 @@ out: int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != cred || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (NFS_FLAGS(inode) & NFS_INO_INVALID_ACCESS)) + || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) return -ENOENT; memcpy(res, cache, sizeof(*res)); return 0; @@ -1587,14 +1588,15 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { - struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache = &nfsi->cache_access; if (cache->cred != set->cred) { if (cache->cred) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } - NFS_FLAGS(inode) &= ~NFS_INO_INVALID_ACCESS; + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5621ba9885f4..f6b9eda925c5 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -134,9 +134,10 @@ nfs_file_release(struct inode *inode, struct file *filp) */ static int nfs_revalidate_file(struct inode *inode, struct file *filp) { + struct nfs_inode *nfsi = NFS_I(inode); int retval = 0; - if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); nfs_revalidate_mapping(inode, filp->f_mapping); return 0; @@ -164,7 +165,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) goto force_reval; if (nfsi->npages != 0) return 0; - if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) return 0; force_reval: return __nfs_revalidate_inode(server, inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index bb7ca022bcb2..622184553516 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -620,9 +620,9 @@ nfs_zap_caches(struct inode *inode) memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +632,7 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); - NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; } /* @@ -841,7 +841,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -872,8 +872,7 @@ nfs_wait_on_inode(struct inode *inode, int flag) int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - struct nfs_inode *nfsi = NFS_I(inode); - int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err; if (__IS_FLG(inode, MS_NOATIME)) @@ -1019,7 +1018,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) struct nfs_fattr fattr; struct nfs_inode *nfsi = NFS_I(inode); unsigned long verifier; - unsigned int flags; + unsigned long cache_validity; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1036,7 +1035,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out_nowait; if (NFS_ATTRTIMEO(inode) == 0) continue; - if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) continue; status = NFS_STALE(inode) ? -ESTALE : 0; goto out_nowait; @@ -1065,18 +1064,21 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } - flags = nfsi->flags; - nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; + cache_validity = nfsi->cache_validity; + nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; + /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). */ if (verifier == nfsi->cache_change_attribute) - nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); - /* Do the page cache invalidation */ + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + nfs_revalidate_mapping(inode, inode->i_mapping); - if (flags & NFS_INO_INVALID_ACL) + + if (cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1107,7 +1109,7 @@ int nfs_attribute_timeout(struct inode *inode) */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + if (!(NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; return __nfs_revalidate_inode(server, inode); @@ -1122,14 +1124,14 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) { struct nfs_inode *nfsi = NFS_I(inode); - if (nfsi->flags & NFS_INO_INVALID_DATA) { + if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { if (S_ISREG(inode->i_mode)) { if (filemap_fdatawrite(mapping) == 0) filemap_fdatawait(mapping); nfs_wb_all(inode); } invalidate_inode_pages2(mapping); - nfsi->flags &= ~NFS_INO_INVALID_DATA; + nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ @@ -1164,10 +1166,10 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + nfsi->cache_validity |= NFS_INO_INVALID_DATA; } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1200,9 +1202,9 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; if (nfsi->change_attr != fattr->change_attr) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } } @@ -1227,28 +1229,28 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!data_unstable) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } if (cur_size != new_isize) { - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (nfsi->npages == 0) - nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) - nfsi->flags |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!timespec_equal(&inode->i_atime, &fattr->atime)) - nfsi->flags |= NFS_INO_INVALID_ATIME; + nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; return 0; @@ -1384,7 +1386,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; if (!nfs_have_delegation(inode, FMODE_READ)) - nfsi->flags |= invalid; + nfsi->cache_validity |= invalid; return 0; out_changed: @@ -1961,7 +1963,8 @@ static struct inode *nfs_alloc_inode(struct super_block *sb) nfsi = (struct nfs_inode *)kmem_cache_alloc(nfs_inode_cachep, SLAB_KERNEL); if (!nfsi) return NULL; - nfsi->flags = 0; + nfsi->flags = 0UL; + nfsi->cache_validity = 0UL; #ifdef CONFIG_NFS_V3_ACL nfsi->acl_access = ERR_PTR(-EAGAIN); nfsi->acl_default = ERR_PTR(-EAGAIN); diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 1b7a3ef2f813..a020e650ffc2 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6f866b8aa2d5..90df0500ca1b 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +473,7 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } - NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; data->complete(data, status); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 7d78a783c64a..229a1755842a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -113,6 +113,7 @@ struct nfs_inode { * Various flags */ unsigned int flags; + unsigned long cache_validity; /* * read_cache_jiffies is when we started read-caching this inode, @@ -188,17 +189,21 @@ struct nfs_inode { }; /* - * Legal inode flag values + * Cache validity bit flags */ -#define NFS_INO_STALE 0x0001 /* possible stale inode */ -#define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ -#define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */ -#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ -#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ -#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ -#define NFS_INO_INVALID_ACCESS 0x0040 /* cached access cred invalid */ -#define NFS_INO_INVALID_ACL 0x0080 /* cached acls are invalid */ -#define NFS_INO_REVAL_PAGECACHE 0x1000 /* must revalidate pagecache */ +#define NFS_INO_INVALID_ATTR 0x0001 /* cached attrs are invalid */ +#define NFS_INO_INVALID_DATA 0x0002 /* cached data is invalid */ +#define NFS_INO_INVALID_ATIME 0x0004 /* cached atime is invalid */ +#define NFS_INO_INVALID_ACCESS 0x0008 /* cached access cred invalid */ +#define NFS_INO_INVALID_ACL 0x0010 /* cached acls are invalid */ +#define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ + +/* + * Legal values of flags field + */ +#define NFS_INO_REVALIDATING 0x0001 /* revalidating attrs */ +#define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ +#define NFS_INO_STALE 0x0004 /* possible stale inode */ static inline struct nfs_inode *NFS_I(struct inode *inode) { @@ -237,7 +242,7 @@ static inline int nfs_caches_unstable(struct inode *inode) static inline void NFS_CACHEINV(struct inode *inode) { if (!nfs_caches_unstable(inode)) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; } static inline int nfs_server_capable(struct inode *inode, int cap) -- cgit v1.2.3 From 412d582ec1dd59aab2353f8cb7e74f2c79cd20b9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:11 -0700 Subject: [PATCH] NFS: use atomic bitops to manipulate flags in nfsi->flags Introduce atomic bitops to manipulate the bits in the nfs_inode structure's "flags" field. Using bitops means we can use a generic wait_on_bit call instead of an ad hoc locking scheme in fs/nfs/inode.c, so we can remove the "nfs_i_wait" field from nfs_inode at the same time. The other new flags field will continue to use bitmask and logic AND and OR. This permits several flags to be set at the same time efficiently. The following patch adds a spin lock to protect these flags, and this spin lock will later cover other fields in the nfs_inode structure, amortizing the cost of using this type of serialization. Test plan: Millions of fsx ops on SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 4 +-- fs/nfs/inode.c | 69 +++++++++++++++++++++++++++++++------------------- include/linux/nfs_fs.h | 19 ++++++-------- 3 files changed, 53 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5732e13cd0da..27cf5577f239 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -182,7 +182,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); desc->plus = 0; goto again; } @@ -545,7 +545,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) break; } if (res == -ETOOSMALL && desc->plus) { - NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); nfs_zap_caches(inode); desc->plus = 0; desc->entry->eof = 0; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 622184553516..ee27578277f3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -739,7 +739,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) - NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -849,26 +849,43 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } } +static int nfs_wait_schedule(void *word) +{ + if (signal_pending(current)) + return -ERESTARTSYS; + schedule(); + return 0; +} + /* * Wait for the inode to get unlocked. - * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). */ -static int -nfs_wait_on_inode(struct inode *inode, int flag) +static int nfs_wait_on_inode(struct inode *inode) { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_inode *nfsi = NFS_I(inode); - + sigset_t oldmask; int error; - if (!(NFS_FLAGS(inode) & flag)) - return 0; + atomic_inc(&inode->i_count); - error = nfs_wait_event(clnt, nfsi->nfs_i_wait, - !(NFS_FLAGS(inode) & flag)); + rpc_clnt_sigmask(clnt, &oldmask); + error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, + nfs_wait_schedule, TASK_INTERRUPTIBLE); + rpc_clnt_sigunmask(clnt, &oldmask); iput(inode); + return error; } +static void nfs_wake_up_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); + smp_mb__after_clear_bit(); + wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); +} + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -1029,18 +1046,19 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (NFS_STALE(inode)) goto out_nowait; - while (NFS_REVALIDATING(inode)) { - status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); - if (status < 0) - goto out_nowait; - if (NFS_ATTRTIMEO(inode) == 0) - continue; - if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) - continue; - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; + status = nfs_wait_on_inode(inode); + if (status < 0) + goto out; + if (NFS_STALE(inode)) { + status = -ESTALE; + /* Do we trust the cached ESTALE? */ + if (NFS_ATTRTIMEO(inode) != 0) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) { + /* no */ + } else + goto out; + } } - NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; /* Protect against RPC races by saving the change attribute */ verifier = nfs_save_change_attribute(inode); @@ -1052,7 +1070,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (status == -ESTALE) { nfs_zap_caches(inode); if (!S_ISDIR(inode->i_mode)) - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); } goto out; } @@ -1083,9 +1101,9 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) inode->i_sb->s_id, (long long)NFS_FILEID(inode)); -out: - NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&nfsi->nfs_i_wait); + out: + nfs_wake_up_inode(inode); + out_nowait: unlock_kernel(); return status; @@ -1404,7 +1422,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign */ nfs_invalidate_inode(inode); out_err: - NFS_FLAGS(inode) |= NFS_INO_STALE; + set_bit(NFS_INO_STALE, &NFS_FLAGS(inode)); return -ESTALE; } @@ -1996,7 +2014,6 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; - init_waitqueue_head(&nfsi->nfs_i_wait); nfs4_init_once(nfsi); } } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 229a1755842a..deef9567788a 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -112,8 +112,8 @@ struct nfs_inode { /* * Various flags */ - unsigned int flags; - unsigned long cache_validity; + unsigned long flags; /* atomic bit ops */ + unsigned long cache_validity; /* bit mask */ /* * read_cache_jiffies is when we started read-caching this inode, @@ -175,8 +175,6 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; - wait_queue_head_t nfs_i_wait; - #ifdef CONFIG_NFS_V4 struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ @@ -199,11 +197,11 @@ struct nfs_inode { #define NFS_INO_REVAL_PAGECACHE 0x0020 /* must revalidate pagecache */ /* - * Legal values of flags field + * Bit offsets in flags field */ -#define NFS_INO_REVALIDATING 0x0001 /* revalidating attrs */ -#define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ -#define NFS_INO_STALE 0x0004 /* possible stale inode */ +#define NFS_INO_REVALIDATING (0) /* revalidating attrs */ +#define NFS_INO_ADVISE_RDPLUS (1) /* advise readdirplus */ +#define NFS_INO_STALE (2) /* possible stale inode */ static inline struct nfs_inode *NFS_I(struct inode *inode) { @@ -229,8 +227,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp) #define NFS_FLAGS(inode) (NFS_I(inode)->flags) -#define NFS_REVALIDATING(inode) (NFS_FLAGS(inode) & NFS_INO_REVALIDATING) -#define NFS_STALE(inode) (NFS_FLAGS(inode) & NFS_INO_STALE) +#define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) @@ -252,7 +249,7 @@ static inline int nfs_server_capable(struct inode *inode, int cap) static inline int NFS_USE_READDIRPLUS(struct inode *inode) { - return NFS_FLAGS(inode) & NFS_INO_ADVISE_RDPLUS; + return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } /** -- cgit v1.2.3 From dc59250c6ebed099a9bc0a11298e2281dd896657 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 18 Aug 2005 11:24:12 -0700 Subject: [PATCH] NFS: Introduce the use of inode->i_lock to protect fields in nfsi Down the road we want to eliminate the use of the global kernel lock entirely from the NFS client. To do this, we need to protect the fields in the nfs_inode structure adequately. Start by serializing updates to the "cache_validity" field. Note this change addresses an SMP hang found by njw@osdl.org, where processes deadlock because nfs_end_data_update and nfs_revalidate_mapping update the "cache_validity" field without proper serialization. Test plan: Millions of fsx ops on SMP clients. Run Nick Wilson's breaknfs program on large SMP clients. Signed-off-by: Chuck Lever Cc: Trond Myklebust Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/dir.c | 7 +++++++ fs/nfs/inode.c | 34 +++++++++++++++++++++++++++++++--- fs/nfs/nfs3acl.c | 2 ++ fs/nfs/read.c | 4 ++++ include/linux/nfs_fs.h | 5 ++++- 5 files changed, 48 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 27cf5577f239..147cbf9261ce 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -189,7 +189,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } SetPageUptodate(page); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_sem or some other mechanism. @@ -462,7 +464,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -1596,7 +1600,10 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) put_rpccred(cache->cred); cache->cred = get_rpccred(set->cred); } + /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); cache->jiffies = set->jiffies; cache->mask = set->mask; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ee27578277f3..541b418327c8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -615,6 +615,8 @@ nfs_zap_caches(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + spin_lock(&inode->i_lock); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; @@ -623,6 +625,8 @@ nfs_zap_caches(struct inode *inode) nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; + + spin_unlock(&inode->i_lock); } static void nfs_zap_acl_cache(struct inode *inode) @@ -632,7 +636,9 @@ static void nfs_zap_acl_cache(struct inode *inode) clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; if (clear_acl_cache != NULL) clear_acl_cache(inode); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } /* @@ -841,7 +847,9 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + spin_unlock(&inode->i_lock); } if ((attr->ia_valid & ATTR_SIZE) != 0) { inode->i_size = attr->ia_size; @@ -1082,6 +1090,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode), status); goto out; } + spin_lock(&inode->i_lock); cache_validity = nfsi->cache_validity; nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; @@ -1091,6 +1100,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) */ if (verifier == nfsi->cache_change_attribute) nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + spin_unlock(&inode->i_lock); nfs_revalidate_mapping(inode, inode->i_mapping); @@ -1149,12 +1159,16 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) nfs_wb_all(inode); } invalidate_inode_pages2(mapping); + + spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); /* This ensures we revalidate child dentries */ nfsi->cache_change_attribute++; } + spin_unlock(&inode->i_lock); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1184,10 +1198,12 @@ void nfs_end_data_update(struct inode *inode) if (!nfs_have_delegation(inode, FMODE_READ)) { /* Mark the attribute cache for revalidation */ + spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR; /* Directories and symlinks: invalidate page cache too */ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); @@ -1212,6 +1228,8 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) if (nfs_have_delegation(inode, FMODE_READ)) return 0; + spin_lock(&inode->i_lock); + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1226,13 +1244,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) } } - if ((fattr->valid & NFS_ATTR_FATTR) == 0) + if ((fattr->valid & NFS_ATTR_FATTR) == 0) { + spin_unlock(&inode->i_lock); return 0; + } /* Has the inode gone and changed behind our back? */ if (nfsi->fileid != fattr->fileid - || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); return -EIO; + } cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); @@ -1271,6 +1293,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->timestamp; + spin_unlock(&inode->i_lock); return 0; } @@ -1309,11 +1332,15 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign goto out_err; } + spin_lock(&inode->i_lock); + /* * Make sure the inode's type hasn't changed. */ - if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) { + spin_unlock(&inode->i_lock); goto out_changed; + } /* * Update the read time so we don't revalidate too often. @@ -1406,6 +1433,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsign if (!nfs_have_delegation(inode, FMODE_READ)) nfsi->cache_validity |= invalid; + spin_unlock(&inode->i_lock); return 0; out_changed: /* diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index a020e650ffc2..6a5bbc0ae941 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -308,7 +308,9 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, nfs_begin_data_update(inode); status = rpc_call(server->client_acl, ACLPROC3_SETACL, &args, &fattr, 0); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 90df0500ca1b..6ceb1d471f20 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -140,7 +140,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, if (rdata->res.eof != 0 || result == 0) break; } while (count); + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); if (count) memclear_highpage_flush(page, rdata->args.pgbase, count); @@ -473,7 +475,9 @@ void nfs_readpage_result(struct rpc_task *task) } task->tk_status = -EIO; } + spin_lock(&data->inode->i_lock); NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); data->complete(data, status); } diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index deef9567788a..9a6047ff1b25 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -238,8 +238,11 @@ static inline int nfs_caches_unstable(struct inode *inode) static inline void NFS_CACHEINV(struct inode *inode) { - if (!nfs_caches_unstable(inode)) + if (!nfs_caches_unstable(inode)) { + spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); + } } static inline int nfs_server_capable(struct inode *inode, int cap) -- cgit v1.2.3 From b07e5eccaf512ae3209beae5cd2e3a27c92c300b Mon Sep 17 00:00:00 2001 From: Grant Coady Date: Thu, 18 Aug 2005 22:19:55 +0200 Subject: [PATCH] ide: fix PCI_DEVIEC_ID_APPLE_UNI_N_ATA spelling Signed-off-by: Grant Coady Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ppc/pmac.c | 2 +- include/linux/pci_ids.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index be0fcc8f4b15..ea65b070a367 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1664,7 +1664,7 @@ static struct macio_driver pmac_ide_macio_driver = }; static struct pci_device_id pmac_ide_pci_match[] = { - { PCI_VENDOR_ID_APPLE, PCI_DEVIEC_ID_APPLE_UNI_N_ATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, + { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_UNI_N_ATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_IPID_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_K2_ATA100, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0}, { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_SH_ATA, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 51e61e96051c..953b4dc819ae 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -881,7 +881,7 @@ #define PCI_DEVICE_ID_APPLE_UNI_N_PCI15 0x002e #define PCI_DEVICE_ID_APPLE_UNI_N_FW2 0x0030 #define PCI_DEVICE_ID_APPLE_UNI_N_GMAC2 0x0032 -#define PCI_DEVIEC_ID_APPLE_UNI_N_ATA 0x0033 +#define PCI_DEVICE_ID_APPLE_UNI_N_ATA 0x0033 #define PCI_DEVICE_ID_APPLE_UNI_N_AGP2 0x0034 #define PCI_DEVICE_ID_APPLE_IPID_ATA100 0x003b #define PCI_DEVICE_ID_APPLE_KEYLARGO_I 0x003e -- cgit v1.2.3 From 2f09a7f4af131bf23c013ead89373deba1c7593c Mon Sep 17 00:00:00 2001 From: Matt Gillette Date: Thu, 18 Aug 2005 22:27:07 +0200 Subject: [PATCH] ide: add support for Netcell Revolution to pci-ide generic driver Adds support for Netcell Revolution to pci-ide generic driver by including it in the list of devices matched. Includes the Revolution in the list of simplex devices forced into DMA mode. Signed-off-by: Matt Gillette Cc: Bartlomiej Zolnierkiewicz Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/pci/generic.c | 7 +++++++ drivers/ide/setup-pci.c | 1 + include/linux/pci_ids.h | 3 +++ 3 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/pci/generic.c b/drivers/ide/pci/generic.c index da46577380f3..6e3ab0c38c4d 100644 --- a/drivers/ide/pci/generic.c +++ b/drivers/ide/pci/generic.c @@ -173,6 +173,12 @@ static ide_pci_device_t generic_chipsets[] __devinitdata = { .channels = 2, .autodma = NOAUTODMA, .bootable = ON_BOARD, + },{ /* 14 */ + .name = "Revolution", + .init_hwif = init_hwif_generic, + .channels = 2, + .autodma = AUTODMA, + .bootable = OFF_BOARD, } }; @@ -231,6 +237,7 @@ static struct pci_device_id generic_pci_tbl[] = { { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 11}, { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 12}, { PCI_VENDOR_ID_TOSHIBA,PCI_DEVICE_ID_TOSHIBA_PICCOLO_2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 13}, + { PCI_VENDOR_ID_NETCELL,PCI_DEVICE_ID_REVOLUTION, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 14}, /* Must come last. If you add entries adjust this table appropriately and the init_one code */ { PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE << 8, 0xFFFFFF00UL, 0}, { 0, }, diff --git a/drivers/ide/setup-pci.c b/drivers/ide/setup-pci.c index 77da827b2898..18ed7765417c 100644 --- a/drivers/ide/setup-pci.c +++ b/drivers/ide/setup-pci.c @@ -229,6 +229,7 @@ second_chance_to_dma: case PCI_DEVICE_ID_AMD_VIPER_7409: case PCI_DEVICE_ID_CMD_643: case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE: + case PCI_DEVICE_ID_REVOLUTION: simplex_stat = hwif->INB(dma_base + 2); hwif->OUTB((simplex_stat&0x60),(dma_base + 2)); simplex_stat = hwif->INB(dma_base + 2); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 953b4dc819ae..163036867ac7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2184,6 +2184,9 @@ #define PCI_VENDOR_ID_SIBYTE 0x166d #define PCI_DEVICE_ID_BCM1250_HT 0x0002 +#define PCI_VENDOR_ID_NETCELL 0x169c +#define PCI_DEVICE_ID_REVOLUTION 0x0044 + #define PCI_VENDOR_ID_LINKSYS 0x1737 #define PCI_DEVICE_ID_LINKSYS_EG1032 0x1032 #define PCI_DEVICE_ID_LINKSYS_EG1064 0x1064 -- cgit v1.2.3 From 84f57fbc724e3b56dc87c37dddac89f82cf75ef6 Mon Sep 17 00:00:00 2001 From: Narendra Sankar Date: Thu, 18 Aug 2005 22:30:35 +0200 Subject: [PATCH] serverworks: add support for new southbridge IDE BCM5785 (HT1000) is a Opteron Southbridge from Serverworks/Broadcom that incorporates a single channel ATA100 IDE controller that is functionally identical to the Serverworks CSB6 IDE controller. This patch adds support for the new PCI device ID and also the support for this controller. Signed-off-by: Narendra Sankar Acked-by: Jeff Garzik Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/pci/serverworks.c | 23 +++++++++++++++++++++++ include/linux/pci_ids.h | 1 + 2 files changed, 24 insertions(+) (limited to 'include/linux') diff --git a/drivers/ide/pci/serverworks.c b/drivers/ide/pci/serverworks.c index c6f5fa4b4ca6..ff2e217a8c84 100644 --- a/drivers/ide/pci/serverworks.c +++ b/drivers/ide/pci/serverworks.c @@ -21,6 +21,9 @@ * * CSB6: `Champion South Bridge' IDE Interface (optional: third channel) * + * HT1000: AKA BCM5785 - Hypertransport Southbridge for Opteron systems. IDE + * controller same as the CSB6. Single channel ATA100 only. + * * Documentation: * Available under NDA only. Errata info very hard to get. * @@ -71,6 +74,8 @@ static u8 svwks_ratemask (ide_drive_t *drive) if (!svwks_revision) pci_read_config_byte(dev, PCI_REVISION_ID, &svwks_revision); + if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) + return 2; if (dev->device == PCI_DEVICE_ID_SERVERWORKS_OSB4IDE) { u32 reg = 0; if (isa_dev) @@ -109,6 +114,7 @@ static u8 svwks_csb_check (struct pci_dev *dev) case PCI_DEVICE_ID_SERVERWORKS_CSB5IDE: case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE: case PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2: + case PCI_DEVICE_ID_SERVERWORKS_HT1000IDE: return 1; default: break; @@ -438,6 +444,13 @@ static unsigned int __devinit init_chipset_svwks (struct pci_dev *dev, const cha btr |= (svwks_revision >= SVWKS_CSB5_REVISION_NEW) ? 0x3 : 0x2; pci_write_config_byte(dev, 0x5A, btr); } + /* Setup HT1000 SouthBridge Controller - Single Channel Only */ + else if (dev->device == PCI_DEVICE_ID_SERVERWORKS_HT1000IDE) { + pci_read_config_byte(dev, 0x5A, &btr); + btr &= ~0x40; + btr |= 0x3; + pci_write_config_byte(dev, 0x5A, btr); + } return (dev->irq) ? dev->irq : 0; } @@ -629,6 +642,15 @@ static ide_pci_device_t serverworks_chipsets[] __devinitdata = { .channels = 1, /* 2 */ .autodma = AUTODMA, .bootable = ON_BOARD, + },{ /* 4 */ + .name = "SvrWks HT1000", + .init_setup = init_setup_svwks, + .init_chipset = init_chipset_svwks, + .init_hwif = init_hwif_svwks, + .init_dma = init_dma_svwks, + .channels = 1, /* 2 */ + .autodma = AUTODMA, + .bootable = ON_BOARD, } }; @@ -653,6 +675,7 @@ static struct pci_device_id svwks_pci_tbl[] = { { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB5IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 1}, { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 2}, { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 3}, + { PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT1000IDE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4}, { 0, }, }; MODULE_DEVICE_TABLE(pci, svwks_pci_tbl); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 163036867ac7..927ed487630d 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1580,6 +1580,7 @@ #define PCI_DEVICE_ID_SERVERWORKS_OSB4IDE 0x0211 #define PCI_DEVICE_ID_SERVERWORKS_CSB5IDE 0x0212 #define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE 0x0213 +#define PCI_DEVICE_ID_SERVERWORKS_HT1000IDE 0x0214 #define PCI_DEVICE_ID_SERVERWORKS_CSB6IDE2 0x0217 #define PCI_DEVICE_ID_SERVERWORKS_OSB4USB 0x0220 #define PCI_DEVICE_ID_SERVERWORKS_CSB5USB PCI_DEVICE_ID_SERVERWORKS_OSB4USB -- cgit v1.2.3 From d366b6436386875b1310ce8f70e3f9dea4647bac Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:40:08 +0100 Subject: [MMC] Add mmc_hostname() macro mmc_hostname() returns a pointer to the hostname for the mmc_host. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 6 +++--- drivers/mmc/mmc_sysfs.c | 2 +- drivers/mmc/mmci.c | 4 ++-- drivers/mmc/wbsd.c | 2 +- include/linux/mmc/host.h | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index eeb9f6668e69..e02e5df80be9 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -361,7 +361,7 @@ static void mmc_decode_cid(struct mmc_card *card) default: printk("%s: card has unknown MMCA version %d\n", - card->host->host_name, card->csd.mmca_vsn); + mmc_hostname(card->host), card->csd.mmca_vsn); mmc_card_set_bad(card); break; } @@ -383,7 +383,7 @@ static void mmc_decode_csd(struct mmc_card *card) csd_struct = UNSTUFF_BITS(resp, 126, 2); if (csd_struct != 1 && csd_struct != 2) { printk("%s: unrecognised CSD structure version %d\n", - card->host->host_name, csd_struct); + mmc_hostname(card->host), csd_struct); mmc_card_set_bad(card); return; } @@ -551,7 +551,7 @@ static void mmc_discover_cards(struct mmc_host *host) } if (err != MMC_ERR_NONE) { printk(KERN_ERR "%s: error requesting CID: %d\n", - host->host_name, err); + mmc_hostname(host), err); break; } diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 5556cd3b5559..3a6b325a9149 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -206,7 +206,7 @@ void mmc_init_card(struct mmc_card *card, struct mmc_host *host) int mmc_register_card(struct mmc_card *card) { snprintf(card->dev.bus_id, sizeof(card->dev.bus_id), - "%s:%04x", card->host->host_name, card->rca); + "%s:%04x", mmc_hostname(card->host), card->rca); return device_add(&card->dev); } diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 7a42966d755b..716c4ef4faf6 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -34,7 +34,7 @@ #ifdef CONFIG_MMC_DEBUG #define DBG(host,fmt,args...) \ - pr_debug("%s: %s: " fmt, host->mmc->host_name, __func__ , args) + pr_debug("%s: %s: " fmt, mmc_hostname(host->mmc), __func__ , args) #else #define DBG(host,fmt,args...) do { } while (0) #endif @@ -541,7 +541,7 @@ static int mmci_probe(struct amba_device *dev, void *id) mmc_add_host(mmc); printk(KERN_INFO "%s: MMCI rev %x cfg %02x at 0x%08lx irq %d,%d\n", - mmc->host_name, amba_rev(dev), amba_config(dev), + mmc_hostname(mmc), amba_rev(dev), amba_config(dev), dev->res.start, dev->irq[0], dev->irq[1]); init_timer(&host->timer); diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c index 974f2f36bdbe..402c2d661fb2 100644 --- a/drivers/mmc/wbsd.c +++ b/drivers/mmc/wbsd.c @@ -1796,7 +1796,7 @@ static int __devinit wbsd_init(struct device* dev, int base, int irq, int dma, mmc_add_host(mmc); - printk(KERN_INFO "%s: W83L51xD", mmc->host_name); + printk(KERN_INFO "%s: W83L51xD", mmc_hostname(mmc)); if (host->chip_id != 0) printk(" id %x", (int)host->chip_id); printk(" at 0x%x irq %d", (int)host->base, (int)host->irq); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f90f674eb3b0..307862308596 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -97,6 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) +#define mmc_hostname(x) ((x)->host_name) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); -- cgit v1.2.3 From 00b137cfda5276b3d2c87d44236fe4c5ee68b405 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:41:24 +0100 Subject: [MMC] Add MMC class devices Create a mmc_host class to allow enumeration of MMC host controllers even though they have no card(s) inserted. Patch based on work by Pierre Ossman. Signed-off-by: Russell King --- drivers/mmc/mmc.c | 23 +++++++--------- drivers/mmc/mmc.h | 5 ++++ drivers/mmc/mmc_sysfs.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/mmc/host.h | 1 + 4 files changed, 87 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index e02e5df80be9..3c5904834fe8 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -796,17 +796,13 @@ struct mmc_host *mmc_alloc_host(int extra, struct device *dev) { struct mmc_host *host; - host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); + host = mmc_alloc_host_sysfs(extra, dev); if (host) { - memset(host, 0, sizeof(struct mmc_host) + extra); - spin_lock_init(&host->lock); init_waitqueue_head(&host->wq); INIT_LIST_HEAD(&host->cards); INIT_WORK(&host->detect, mmc_rescan, host); - host->dev = dev; - /* * By default, hosts do not support SGIO or large requests. * They have to set these according to their abilities. @@ -828,15 +824,15 @@ EXPORT_SYMBOL(mmc_alloc_host); */ int mmc_add_host(struct mmc_host *host) { - static unsigned int host_num; + int ret; - snprintf(host->host_name, sizeof(host->host_name), - "mmc%d", host_num++); - - mmc_power_off(host); - mmc_detect_change(host); + ret = mmc_add_host_sysfs(host); + if (ret == 0) { + mmc_power_off(host); + mmc_detect_change(host); + } - return 0; + return ret; } EXPORT_SYMBOL(mmc_add_host); @@ -859,6 +855,7 @@ void mmc_remove_host(struct mmc_host *host) } mmc_power_off(host); + mmc_remove_host_sysfs(host); } EXPORT_SYMBOL(mmc_remove_host); @@ -872,7 +869,7 @@ EXPORT_SYMBOL(mmc_remove_host); void mmc_free_host(struct mmc_host *host) { flush_scheduled_work(); - kfree(host); + mmc_free_host_sysfs(host); } EXPORT_SYMBOL(mmc_free_host); diff --git a/drivers/mmc/mmc.h b/drivers/mmc/mmc.h index b498dffe0b11..97bae00292fa 100644 --- a/drivers/mmc/mmc.h +++ b/drivers/mmc/mmc.h @@ -13,4 +13,9 @@ void mmc_init_card(struct mmc_card *card, struct mmc_host *host); int mmc_register_card(struct mmc_card *card); void mmc_remove_card(struct mmc_card *card); + +struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev); +int mmc_add_host_sysfs(struct mmc_host *host); +void mmc_remove_host_sysfs(struct mmc_host *host); +void mmc_free_host_sysfs(struct mmc_host *host); #endif diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 3a6b325a9149..96c192057df3 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -20,6 +20,7 @@ #define dev_to_mmc_card(d) container_of(d, struct mmc_card, dev) #define to_mmc_driver(d) container_of(d, struct mmc_driver, drv) +#define cls_dev_to_mmc_host(d) container_of(d, struct mmc_host, class_dev) #define MMC_ATTR(name, fmt, args...) \ static ssize_t mmc_##name##_show (struct device *dev, struct device_attribute *attr, char *buf) \ @@ -224,13 +225,82 @@ void mmc_remove_card(struct mmc_card *card) } +static void mmc_host_classdev_release(struct class_device *dev) +{ + struct mmc_host *host = cls_dev_to_mmc_host(dev); + kfree(host); +} + +static struct class mmc_host_class = { + .name = "mmc_host", + .release = mmc_host_classdev_release, +}; + +/* + * Internal function. Allocate a new MMC host. + */ +struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) +{ + struct mmc_host *host; + + host = kmalloc(sizeof(struct mmc_host) + extra, GFP_KERNEL); + if (host) { + memset(host, 0, sizeof(struct mmc_host) + extra); + + host->dev = dev; + host->class_dev.dev = host->dev; + host->class_dev.class = &mmc_host_class; + class_device_initialize(&host->class_dev); + } + + return host; +} + +/* + * Internal function. Register a new MMC host with the MMC class. + */ +int mmc_add_host_sysfs(struct mmc_host *host) +{ + static unsigned int host_num; + + snprintf(host->host_name, sizeof(host->host_name), + "mmc%d", host_num++); + + strlcpy(host->class_dev.class_id, host->host_name, BUS_ID_SIZE); + return class_device_add(&host->class_dev); +} + +/* + * Internal function. Unregister a MMC host with the MMC class. + */ +void mmc_remove_host_sysfs(struct mmc_host *host) +{ + class_device_del(&host->class_dev); +} + +/* + * Internal function. Free a MMC host. + */ +void mmc_free_host_sysfs(struct mmc_host *host) +{ + class_device_put(&host->class_dev); +} + + static int __init mmc_init(void) { - return bus_register(&mmc_bus_type); + int ret = bus_register(&mmc_bus_type); + if (ret == 0) { + ret = class_register(&mmc_host_class); + if (ret) + bus_unregister(&mmc_bus_type); + } + return ret; } static void __exit mmc_exit(void) { + class_unregister(&mmc_host_class); bus_unregister(&mmc_bus_type); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 307862308596..a74a810a1302 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -63,6 +63,7 @@ struct device; struct mmc_host { struct device *dev; + struct class_device class_dev; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; -- cgit v1.2.3 From 1ad434d7cf5f490c71cfbbb2fb91076c01c8704e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:21 +0100 Subject: [MMC] Use class device name for mmc host name There's no point in having the host name duplicated between the mmc_host structure and the encapsulated class device structure. Signed-off-by: Russell King --- drivers/mmc/mmc_sysfs.c | 3 +-- include/linux/mmc/host.h | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 96c192057df3..34fa4a3a02d5 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -263,10 +263,9 @@ int mmc_add_host_sysfs(struct mmc_host *host) { static unsigned int host_num; - snprintf(host->host_name, sizeof(host->host_name), + snprintf(host->class_dev.class_id, BUS_ID_SIZE, "mmc%d", host_num++); - strlcpy(host->class_dev.class_id, host->host_name, BUS_ID_SIZE); return class_device_add(&host->class_dev); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a74a810a1302..113cc27865f0 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -68,7 +68,6 @@ struct mmc_host { unsigned int f_min; unsigned int f_max; u32 ocr_avail; - char host_name[8]; /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ @@ -98,7 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) -#define mmc_hostname(x) ((x)->host_name) +#define mmc_hostname(x) ((x)->class_dev.class_id) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); -- cgit v1.2.3 From dce773771834221817e2d359a7e07a618ba08807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:52 +0100 Subject: [MMC] Use an IDR for host name indicies Signed-off-by: Russell King --- drivers/mmc/mmc_sysfs.c | 21 +++++++++++++++++++-- include/linux/mmc/host.h | 1 + 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c index 34fa4a3a02d5..ad8949810fc5 100644 --- a/drivers/mmc/mmc_sysfs.c +++ b/drivers/mmc/mmc_sysfs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -236,6 +237,9 @@ static struct class mmc_host_class = { .release = mmc_host_classdev_release, }; +static DEFINE_IDR(mmc_host_idr); +static DEFINE_SPINLOCK(mmc_host_lock); + /* * Internal function. Allocate a new MMC host. */ @@ -261,10 +265,19 @@ struct mmc_host *mmc_alloc_host_sysfs(int extra, struct device *dev) */ int mmc_add_host_sysfs(struct mmc_host *host) { - static unsigned int host_num; + int err; + + if (!idr_pre_get(&mmc_host_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmc_host_lock); + err = idr_get_new(&mmc_host_idr, host, &host->index); + spin_unlock(&mmc_host_lock); + if (err) + return err; snprintf(host->class_dev.class_id, BUS_ID_SIZE, - "mmc%d", host_num++); + "mmc%d", host->index); return class_device_add(&host->class_dev); } @@ -275,6 +288,10 @@ int mmc_add_host_sysfs(struct mmc_host *host) void mmc_remove_host_sysfs(struct mmc_host *host) { class_device_del(&host->class_dev); + + spin_lock(&mmc_host_lock); + idr_remove(&mmc_host_idr, host->index); + spin_unlock(&mmc_host_lock); } /* diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 113cc27865f0..9a0893f3249e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -64,6 +64,7 @@ struct device; struct mmc_host { struct device *dev; struct class_device class_dev; + int index; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; -- cgit v1.2.3 From cc314eef0128a807e50fa03baf2d0abc0647952c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 19 Aug 2005 18:02:56 -0700 Subject: Fix nasty ncpfs symlink handling bug. This bug could cause oopses and page state corruption, because ncpfs used the generic page-cache symlink handlign functions. But those functions only work if the page cache is guaranteed to be "stable", ie a page that was installed when the symlink walk was started has to still be installed in the page cache at the end of the walk. We could have fixed ncpfs to not use the generic helper routines, but it is in many ways much cleaner to instead improve on the symlink walking helper routines so that they don't require that absolute stability. We do this by allowing "follow_link()" to return a error-pointer as a cookie, which is fed back to the cleanup "put_link()" routine. This also simplifies NFS symlink handling. Signed-off-by: Linus Torvalds --- fs/autofs/symlink.c | 5 +++-- fs/cifs/cifsfs.h | 4 ++-- fs/cifs/link.c | 6 +++--- fs/ext2/symlink.c | 4 ++-- fs/ext3/symlink.c | 4 ++-- fs/namei.c | 40 +++++++++++++++++++++------------------- fs/nfs/symlink.c | 37 ++++++++----------------------------- fs/sysfs/symlink.c | 6 +++--- include/linux/fs.h | 8 ++++---- mm/shmem.c | 17 ++++++----------- 10 files changed, 54 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index f028396f1383..52e8772b066e 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -12,11 +12,12 @@ #include "autofs_i.h" -static int autofs_follow_link(struct dentry *dentry, struct nameidata *nd) +/* Nothing to release.. */ +static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd) { char *s=((struct autofs_symlink *)dentry->d_inode->u.generic_ip)->data; nd_set_link(nd, s); - return 0; + return NULL; } struct inode_operations autofs_symlink_inode_operations = { diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 78af5850c558..1fd21f66f243 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -83,8 +83,8 @@ extern int cifs_dir_notify(struct file *, unsigned long arg); extern struct dentry_operations cifs_dentry_ops; /* Functions related to symlinks */ -extern int cifs_follow_link(struct dentry *direntry, struct nameidata *nd); -extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd); +extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); +extern void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/link.c b/fs/cifs/link.c index bde0fabfece0..ab925ef4f863 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -92,7 +92,7 @@ cifs_hl_exit: return rc; } -int +void * cifs_follow_link(struct dentry *direntry, struct nameidata *nd) { struct inode *inode = direntry->d_inode; @@ -148,7 +148,7 @@ out: out_no_free: FreeXid(xid); nd_set_link(nd, target_path); - return 0; + return NULL; /* No cookie */ } int @@ -330,7 +330,7 @@ cifs_readlink(struct dentry *direntry, char __user *pBuffer, int buflen) return rc; } -void cifs_put_link(struct dentry *direntry, struct nameidata *nd) +void cifs_put_link(struct dentry *direntry, struct nameidata *nd, void *cookie) { char *p = nd_get_link(nd); if (!IS_ERR(p)) diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 9f7bac01d557..1e67d87cfa91 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -21,11 +21,11 @@ #include "xattr.h" #include -static int ext2_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext2_inode_info *ei = EXT2_I(dentry->d_inode); nd_set_link(nd, (char *)ei->i_data); - return 0; + return NULL; } struct inode_operations ext2_symlink_inode_operations = { diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c index 8c3e72818fb0..4f79122cde67 100644 --- a/fs/ext3/symlink.c +++ b/fs/ext3/symlink.c @@ -23,11 +23,11 @@ #include #include "xattr.h" -static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd) +static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) { struct ext3_inode_info *ei = EXT3_I(dentry->d_inode); nd_set_link(nd, (char*)ei->i_data); - return 0; + return NULL; } struct inode_operations ext3_symlink_inode_operations = { diff --git a/fs/namei.c b/fs/namei.c index b85f158aef0c..6ec1f0fefc5b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -501,6 +501,7 @@ struct path { static inline int __do_follow_link(struct path *path, struct nameidata *nd) { int error; + void *cookie; struct dentry *dentry = path->dentry; touch_atime(path->mnt, dentry); @@ -508,13 +509,15 @@ static inline int __do_follow_link(struct path *path, struct nameidata *nd) if (path->mnt == nd->mnt) mntget(path->mnt); - error = dentry->d_inode->i_op->follow_link(dentry, nd); - if (!error) { + cookie = dentry->d_inode->i_op->follow_link(dentry, nd); + error = PTR_ERR(cookie); + if (!IS_ERR(cookie)) { char *s = nd_get_link(nd); + error = 0; if (s) error = __vfs_follow_link(nd, s); if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, nd); + dentry->d_inode->i_op->put_link(dentry, nd, cookie); } dput(dentry); mntput(path->mnt); @@ -2344,15 +2347,17 @@ out: int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { struct nameidata nd; - int res; + void *cookie; + nd.depth = 0; - res = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (!res) { - res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); + cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); + if (!IS_ERR(cookie)) { + int res = vfs_readlink(dentry, buffer, buflen, nd_get_link(&nd)); if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd); + dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + cookie = ERR_PTR(res); } - return res; + return PTR_ERR(cookie); } int vfs_follow_link(struct nameidata *nd, const char *link) @@ -2395,23 +2400,20 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) return res; } -int page_follow_link_light(struct dentry *dentry, struct nameidata *nd) +void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) { - struct page *page; + struct page *page = NULL; nd_set_link(nd, page_getlink(dentry, &page)); - return 0; + return page; } -void page_put_link(struct dentry *dentry, struct nameidata *nd) +void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - if (!IS_ERR(nd_get_link(nd))) { - struct page *page; - page = find_get_page(dentry->d_inode->i_mapping, 0); - if (!page) - BUG(); + struct page *page = cookie; + + if (page) { kunmap(page); page_cache_release(page); - page_cache_release(page); } } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 35f106599144..18dc95b0b646 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -27,26 +27,14 @@ /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. - * - * At the beginning of the page we store pointer to struct page in question, - * simplifying nfs_put_link() (if inode got invalidated we can't find the page - * to be freed via pagecache lookup). - * The NUL-terminated string follows immediately thereafter. */ -struct nfs_symlink { - struct page *page; - char body[0]; -}; - static int nfs_symlink_filler(struct inode *inode, struct page *page) { - const unsigned int pgbase = offsetof(struct nfs_symlink, body); - const unsigned int pglen = PAGE_SIZE - pgbase; int error; lock_kernel(); - error = NFS_PROTO(inode)->readlink(inode, page, pgbase, pglen); + error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE); unlock_kernel(); if (error < 0) goto error; @@ -60,11 +48,10 @@ error: return -EIO; } -static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct page *page; - struct nfs_symlink *p; void *err = ERR_PTR(nfs_revalidate_inode(NFS_SERVER(inode), inode)); if (err) goto read_failed; @@ -78,28 +65,20 @@ static int nfs_follow_link(struct dentry *dentry, struct nameidata *nd) err = ERR_PTR(-EIO); goto getlink_read_error; } - p = kmap(page); - p->page = page; - nd_set_link(nd, p->body); - return 0; + nd_set_link(nd, kmap(page)); + return page; getlink_read_error: page_cache_release(page); read_failed: nd_set_link(nd, err); - return 0; + return NULL; } -static void nfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void nfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { - char *s = nd_get_link(nd); - if (!IS_ERR(s)) { - struct nfs_symlink *p; - struct page *page; - - p = container_of(s, struct nfs_symlink, body[0]); - page = p->page; - + if (cookie) { + struct page *page = cookie; kunmap(page); page_cache_release(page); } diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index fae57c83a722..de402fa915f2 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -151,17 +151,17 @@ static int sysfs_getlink(struct dentry *dentry, char * path) } -static int sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *sysfs_follow_link(struct dentry *dentry, struct nameidata *nd) { int error = -ENOMEM; unsigned long page = get_zeroed_page(GFP_KERNEL); if (page) error = sysfs_getlink(dentry, (char *) page); nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); - return 0; + return NULL; } -static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd) +static void sysfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { char *page = nd_get_link(nd); if (!IS_ERR(page)) diff --git a/include/linux/fs.h b/include/linux/fs.h index f9adf75fd9b4..67e6732d4fdc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -993,8 +993,8 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); int (*readlink) (struct dentry *, char __user *,int); - int (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *); + void * (*follow_link) (struct dentry *, struct nameidata *); + void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); @@ -1602,8 +1602,8 @@ extern struct file_operations generic_ro_fops; extern int vfs_readlink(struct dentry *, char __user *, int, const char *); extern int vfs_follow_link(struct nameidata *, const char *); extern int page_readlink(struct dentry *, char __user *, int); -extern int page_follow_link_light(struct dentry *, struct nameidata *); -extern void page_put_link(struct dentry *, struct nameidata *); +extern void *page_follow_link_light(struct dentry *, struct nameidata *); +extern void page_put_link(struct dentry *, struct nameidata *, void *); extern int page_symlink(struct inode *inode, const char *symname, int len); extern struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); diff --git a/mm/shmem.c b/mm/shmem.c index e64fa726a790..5a81b1ee4f7a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1773,32 +1773,27 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s return 0; } -static int shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) +static void *shmem_follow_link_inline(struct dentry *dentry, struct nameidata *nd) { nd_set_link(nd, (char *)SHMEM_I(dentry->d_inode)); - return 0; + return NULL; } -static int shmem_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *shmem_follow_link(struct dentry *dentry, struct nameidata *nd) { struct page *page = NULL; int res = shmem_getpage(dentry->d_inode, 0, &page, SGP_READ, NULL); nd_set_link(nd, res ? ERR_PTR(res) : kmap(page)); - return 0; + return page; } -static void shmem_put_link(struct dentry *dentry, struct nameidata *nd) +static void shmem_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { if (!IS_ERR(nd_get_link(nd))) { - struct page *page; - - page = find_get_page(dentry->d_inode->i_mapping, 0); - if (!page) - BUG(); + struct page *page = cookie; kunmap(page); mark_page_accessed(page); page_cache_release(page); - page_cache_release(page); } } -- cgit v1.2.3 From c1389503710ef4b4e5d21bea284afde19e9619cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Aug 2005 14:59:24 +0900 Subject: [PATCH] fix atapi_packet_task vs. intr race (take 2) Interrupts from devices sharing the same IRQ could cause ata_host_intr to finish commands being processed by atapi_packet_task if the commands are using ATA_PROT_ATAPI_NODATA or ATA_PROT_ATAPI_DMA protocol. This is because libata interrupt handler is unaware that interrupts are not expected during that period. This patch adds ATA_FLAG_NOINTR flag to tell the interrupt handler that we're not expecting interrupts. Note that once proper HSM is implemented for interrupt-driven PIO, this should be merged into it and this flag will be removed. ahci.c is a different kind of beast, so it's left alone. * The following drivers use ata_qc_issue_prot and ata_interrupt, so changes in libata core will do. ata_piix sata_sil sata_svw sata_via sata_sis sata_uli * The following drivers use ata_qc_issue_prot and custom intr handler. They need this change to work correctly. sata_nv sata_vsc * The following drivers use custom issue function and intr handler. Currently all custom issue functions don't support ATAPI, so this change is irrelevant, updated for consistency and to avoid later mistakes. sata_promise sata_qstor sata_sx4 Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 33 ++++++++++++++++++++++----------- drivers/scsi/sata_nv.c | 3 ++- drivers/scsi/sata_promise.c | 3 ++- drivers/scsi/sata_qstor.c | 6 ++++-- drivers/scsi/sata_sx4.c | 3 ++- drivers/scsi/sata_vsc.c | 3 ++- include/linux/libata.h | 2 ++ 7 files changed, 36 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9a6aacf467b8..c92439fe5dae 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3350,11 +3350,13 @@ int ata_qc_issue_prot(struct ata_queued_cmd *qc) break; case ATA_PROT_ATAPI_NODATA: + ap->flags |= ATA_FLAG_NOINTR; ata_tf_to_host_nolock(ap, &qc->tf); queue_work(ata_wq, &ap->packet_task); break; case ATA_PROT_ATAPI_DMA: + ap->flags |= ATA_FLAG_NOINTR; ap->ops->tf_load(ap, &qc->tf); /* load tf registers */ ap->ops->bmdma_setup(qc); /* set up bmdma */ queue_work(ata_wq, &ap->packet_task); @@ -3708,7 +3710,8 @@ irqreturn_t ata_interrupt (int irq, void *dev_instance, struct pt_regs *regs) struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); @@ -3760,19 +3763,27 @@ static void atapi_packet_task(void *_data) /* send SCSI cdb */ DPRINTK("send cdb\n"); assert(ap->cdb_len >= 12); - ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); - /* if we are DMA'ing, irq handler takes over from here */ - if (qc->tf.protocol == ATA_PROT_ATAPI_DMA) - ap->ops->bmdma_start(qc); /* initiate bmdma */ + if (qc->tf.protocol == ATA_PROT_ATAPI_DMA || + qc->tf.protocol == ATA_PROT_ATAPI_NODATA) { + unsigned long flags; - /* non-data commands are also handled via irq */ - else if (qc->tf.protocol == ATA_PROT_ATAPI_NODATA) { - /* do nothing */ - } + /* Once we're done issuing command and kicking bmdma, + * irq handler takes over. To not lose irq, we need + * to clear NOINTR flag before sending cdb, but + * interrupt handler shouldn't be invoked before we're + * finished. Hence, the following locking. + */ + spin_lock_irqsave(&ap->host_set->lock, flags); + ap->flags &= ~ATA_FLAG_NOINTR; + ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); + if (qc->tf.protocol == ATA_PROT_ATAPI_DMA) + ap->ops->bmdma_start(qc); /* initiate bmdma */ + spin_unlock_irqrestore(&ap->host_set->lock, flags); + } else { + ata_data_xfer(ap, qc->cdb, ap->cdb_len, 1); - /* PIO commands are handled by polling */ - else { + /* PIO commands are handled by polling */ ap->pio_task_state = PIO_ST; queue_work(ata_wq, &ap->pio_task); } diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 9b9142790bd6..41a3421b02b4 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -291,7 +291,8 @@ static irqreturn_t nv_interrupt (int irq, void *dev_instance, struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index cc613b3c6ce6..6defd7962359 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -445,7 +445,8 @@ static irqreturn_t pdc_interrupt (int irq, void *dev_instance, struct pt_regs *r VPRINTK("port %u\n", i); ap = host_set->ports[i]; tmp = mask & (1 << (i + 1)); - if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (tmp && ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index dca9ed7ac760..08a84042ce09 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -386,7 +386,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set) DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n", sff1, sff0, port_no, sHST, sDST); handled = 1; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && !(ap->flags & + (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; struct qs_port_priv *pp = ap->private_data; if (!pp || pp->state != qs_state_pkt) @@ -417,7 +418,8 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set) for (port_no = 0; port_no < host_set->n_ports; ++port_no) { struct ata_port *ap; ap = host_set->ports[port_no]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; struct qs_port_priv *pp = ap->private_data; if (!pp || pp->state != qs_state_mmio) diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 76644ea62d67..e2db499f22dd 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -825,7 +825,8 @@ static irqreturn_t pdc20621_interrupt (int irq, void *dev_instance, struct pt_re ap = host_set->ports[port_no]; tmp = mask & (1 << i); VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp); - if (tmp && ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (tmp && ap && + !(ap->flags & (ATA_FLAG_PORT_DISABLED | ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index cb3a6d89cf00..6f2562171be0 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -173,7 +173,8 @@ static irqreturn_t vsc_sata_interrupt (int irq, void *dev_instance, struct ata_port *ap; ap = host_set->ports[i]; - if (ap && (!(ap->flags & ATA_FLAG_PORT_DISABLED))) { + if (ap && !(ap->flags & + (ATA_FLAG_PORT_DISABLED|ATA_FLAG_NOINTR))) { struct ata_queued_cmd *qc; qc = ata_qc_from_tag(ap, ap->active_tag); diff --git a/include/linux/libata.h b/include/linux/libata.h index 85b0aaee0ef8..724b7d1c18ea 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -113,6 +113,8 @@ enum { ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */ ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */ ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */ + ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once + * proper HSM is in place. */ ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ -- cgit v1.2.3 From 888ba6c62bc61a995d283977eb3a6cbafd6f4ac6 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:07:20 -0400 Subject: [ACPI] delete CONFIG_ACPI_BOOT it has been a synonym for CONFIG_ACPI since 2.6.12 Signed-off-by: Len Brown --- arch/i386/Kconfig | 1 - arch/i386/defconfig | 1 - arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/acpi/Makefile | 2 +- arch/i386/kernel/io_apic.c | 4 ++-- arch/i386/kernel/mpparse.c | 10 ++++------ arch/i386/kernel/setup.c | 8 ++++---- arch/i386/mach-es7000/es7000plat.c | 4 ++-- arch/ia64/configs/bigsur_defconfig | 1 - arch/ia64/configs/sn2_defconfig | 1 - arch/ia64/configs/tiger_defconfig | 1 - arch/ia64/configs/zx1_defconfig | 1 - arch/ia64/defconfig | 1 - arch/ia64/kernel/acpi.c | 4 ++-- arch/ia64/kernel/setup.c | 4 ++-- arch/ia64/kernel/topology.c | 2 +- arch/x86_64/Kconfig | 2 -- arch/x86_64/defconfig | 1 - arch/x86_64/kernel/Makefile | 2 +- arch/x86_64/kernel/acpi/Makefile | 4 ++-- arch/x86_64/kernel/io_apic.c | 4 ++-- arch/x86_64/kernel/mpparse.c | 10 ++++------ arch/x86_64/kernel/setup.c | 8 ++++---- drivers/Makefile | 2 +- drivers/acpi/Kconfig | 6 +----- drivers/acpi/Makefile | 2 +- include/asm-i386/acpi.h | 4 ++-- include/asm-i386/fixmap.h | 2 +- include/asm-i386/io_apic.h | 4 ++-- include/asm-i386/mpspec.h | 4 ++-- include/asm-x86_64/acpi.h | 6 +++--- include/asm-x86_64/io_apic.h | 2 +- include/asm-x86_64/mpspec.h | 2 +- include/linux/acpi.h | 6 +++--- 34 files changed, 50 insertions(+), 68 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 619d843ba231..9ba334908742 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1203,7 +1203,6 @@ config PCI_DIRECT config PCI_MMCONFIG bool depends on PCI && ACPI && (PCI_GOMMCONFIG || PCI_GOANY) - select ACPI_BOOT default y source "drivers/pci/pcie/Kconfig" diff --git a/arch/i386/defconfig b/arch/i386/defconfig index ca07b95c06b8..1c0076e22dda 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -131,7 +131,6 @@ CONFIG_SOFTWARE_SUSPEND=y # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_SLEEP=y CONFIG_ACPI_SLEEP_PROC_FS=y diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 4cc83b322b36..c52b4fad011b 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ obj-y += cpu/ obj-y += timers/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o obj-$(CONFIG_MCA) += mca.o obj-$(CONFIG_X86_MSR) += msr.o diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 5e291a20c03d..267ca48e1b6c 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile @@ -1,4 +1,4 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o +obj-y := boot.o obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 6578f40bd501..ebedd2e21670 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2421,7 +2421,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __init io_apic_get_unique_id (int ioapic, int apic_id) { @@ -2574,4 +2574,4 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index ce838abb27d8..9a4db7d30001 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -653,8 +653,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -810,7 +808,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -856,7 +854,7 @@ void __init mp_register_lapic ( MP_processor_info(&processor); } -#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT)) +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -1138,5 +1136,5 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) return gsi; } -#endif /*CONFIG_X86_IO_APIC && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT)*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ +#endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index af4de58cab54..d3943e5edc8f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -94,7 +94,7 @@ unsigned long mmu_cr4_features; #endif EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI int __initdata acpi_force = 0; extern acpi_interrupt_flags acpi_sci_flags; #endif @@ -794,7 +794,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter */ else if (!memcmp(from, "acpi=off", 8)) { disable_acpi(); @@ -850,7 +850,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) else if (!memcmp(from, "noapic", 6)) disable_ioapic_setup(); #endif /* CONFIG_X86_LOCAL_APIC */ -#endif /* CONFIG_ACPI_BOOT */ +#endif /* CONFIG_ACPI */ #ifdef CONFIG_X86_LOCAL_APIC /* enable local APIC */ @@ -1575,7 +1575,7 @@ void __init setup_arch(char **cmdline_p) if (efi_enabled) efi_map_memmap(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Parse the ACPI tables for possible boot-time SMP configuration. */ diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c index d5936d500479..baac9da042ce 100644 --- a/arch/i386/mach-es7000/es7000plat.c +++ b/arch/i386/mach-es7000/es7000plat.c @@ -51,7 +51,7 @@ struct mip_reg *host_reg; int mip_port; unsigned long mip_addr, host_addr; -#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_ACPI_BOOT)) +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) /* * GSI override for ES7000 platforms. @@ -73,7 +73,7 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER || CONFIG_ACPI_BOOT) +#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER) /* * Parse the OEM Table diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index b95fcf86ea00..456c65689bef 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -107,7 +107,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index dccf35c60b94..dc483c18343f 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -130,7 +130,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y # CONFIG_ACPI_BUTTON is not set CONFIG_ACPI_VIDEO=m diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index c853cfcd2d11..cd2d6375a853 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -128,7 +128,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m # CONFIG_ACPI_VIDEO is not set diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 88e8867fa8e8..cf58404769a8 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -128,7 +128,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_VIDEO=m diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 8444add76380..f38c677f7afd 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -118,7 +118,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 78bc21987121..318787c84ac0 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -132,7 +132,7 @@ const char *acpi_get_sysname(void) #endif } -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI #define ACPI_MAX_PLATFORM_INTERRUPTS 256 @@ -917,4 +917,4 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) EXPORT_SYMBOL(acpi_unregister_ioapic); -#endif /* CONFIG_ACPI_BOOT */ +#endif /* CONFIG_ACPI */ diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 84f89da7c640..1f5c26dbe705 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -384,7 +384,7 @@ setup_arch (char **cmdline_p) if (early_console_setup(*cmdline_p) == 0) mark_bsp_online(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* Initialize the ACPI boot-time table parser */ acpi_table_init(); # ifdef CONFIG_ACPI_NUMA @@ -420,7 +420,7 @@ setup_arch (char **cmdline_p) cpu_init(); /* initialize the bootstrap CPU */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI acpi_boot_init(); #endif diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c index 92ff46ad21e2..706b7734e191 100644 --- a/arch/ia64/kernel/topology.c +++ b/arch/ia64/kernel/topology.c @@ -36,7 +36,7 @@ int arch_register_cpu(int num) parent = &sysfs_nodes[cpu_to_node(num)]; #endif /* CONFIG_NUMA */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * If CPEI cannot be re-targetted, and this is * CPEI target, then dont create the control file diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 660a03a89e66..40242c61e90a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -144,7 +144,6 @@ config X86_CPUID with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to /dev/cpu/31/cpuid. -# disable it for opteron optimized builds because it pulls in ACPI_BOOT config X86_HT bool depends on SMP && !MK8 @@ -461,7 +460,6 @@ config PCI_DIRECT config PCI_MMCONFIG bool "Support mmconfig PCI config space access" depends on PCI && ACPI - select ACPI_BOOT config UNORDERED_IO bool "Unordered IO mapping access" diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 776f3c866b70..aed77c1c5ccc 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -135,7 +135,6 @@ CONFIG_PM_STD_PARTITION="" # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -CONFIG_ACPI_BOOT=y CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 48f9e2c19cd6..0296ca6cbfa3 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -12,7 +12,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ obj-$(CONFIG_X86_MSR) += msr.o obj-$(CONFIG_MICROCODE) += microcode.o obj-$(CONFIG_X86_CPUID) += cpuid.o diff --git a/arch/x86_64/kernel/acpi/Makefile b/arch/x86_64/kernel/acpi/Makefile index d2c2ee5f9a88..7da9ace890bd 100644 --- a/arch/x86_64/kernel/acpi/Makefile +++ b/arch/x86_64/kernel/acpi/Makefile @@ -1,3 +1,3 @@ -obj-$(CONFIG_ACPI_BOOT) := boot.o -boot-$(CONFIG_ACPI_BOOT) := ../../../i386/kernel/acpi/boot.o +obj-y := boot.o +boot-y := ../../../i386/kernel/acpi/boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e49cf5..1013a8fe44d7 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -1841,7 +1841,7 @@ device_initcall(ioapic_init_sysfs); ACPI-based IOAPIC Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI #define IO_APIC_MAX_ID 0xFE @@ -1923,7 +1923,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a return 0; } -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ /* diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 79c362d03e2e..86445f320d86 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -74,7 +74,7 @@ static unsigned int num_processors = 0; physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; /* ACPI MADT entry parsing functions */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern struct acpi_boot_flags acpi_boot; #ifdef CONFIG_X86_LOCAL_APIC extern int acpi_parse_lapic (acpi_table_entry_header *header); @@ -84,7 +84,7 @@ extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header); #ifdef CONFIG_X86_IO_APIC extern int acpi_parse_ioapic (acpi_table_entry_header *header); #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; @@ -519,8 +519,6 @@ void __init get_smp_config (void) struct intel_mp_floating *mpf = mpf_found; /* - * ACPI may be used to obtain the entire SMP configuration or just to - * enumerate/configure processors (CONFIG_ACPI_BOOT). Note that * ACPI supports both logical (e.g. Hyper-Threading) and physical * processors, where MPS only supports physical. */ @@ -673,7 +671,7 @@ void __init find_smp_config (void) ACPI-based MP Configuration -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI void __init mp_register_lapic_address ( u64 address) @@ -994,4 +992,4 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) } #endif /*CONFIG_X86_IO_APIC*/ -#endif /*CONFIG_ACPI_BOOT*/ +#endif /*CONFIG_ACPI*/ diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 116a491e2961..cfcebc8ab7b4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -71,7 +71,7 @@ unsigned long mmu_cr4_features; int acpi_disabled; EXPORT_SYMBOL(acpi_disabled); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int __initdata acpi_ht; extern acpi_interrupt_flags acpi_sci_flags; int __initdata acpi_force = 0; @@ -294,7 +294,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) maxcpus = simple_strtoul(from + 8, NULL, 0); } #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* "acpi=off" disables both ACPI table parsing and interpreter init */ if (!memcmp(from, "acpi=off", 8)) disable_acpi(); @@ -566,7 +566,7 @@ void __init setup_arch(char **cmdline_p) init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). * Call this early for SRAT node setup. @@ -658,7 +658,7 @@ void __init setup_arch(char **cmdline_p) check_ioapic(); -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI /* * Read APIC and some other early information from ACPI tables. */ diff --git a/drivers/Makefile b/drivers/Makefile index 126a851d5653..784b93c8888d 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_PCI) += pci/ obj-$(CONFIG_PARISC) += parisc/ obj-y += video/ -obj-$(CONFIG_ACPI_BOOT) += acpi/ +obj-$(CONFIG_ACPI) += acpi/ # PnP must come after ACPI since it will eventually need to check if acpi # was used and do nothing if so obj-$(CONFIG_PNP) += pnp/ diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 66c60982ba46..14b70c259f3e 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -43,10 +43,6 @@ config ACPI if ACPI -config ACPI_BOOT - bool - default y - config ACPI_INTERPRETER bool default y @@ -312,7 +308,7 @@ endif # ACPI_INTERPRETER config X86_PM_TIMER bool "Power Management Timer Support" depends on X86 - depends on ACPI_BOOT && EXPERIMENTAL + depends on EXPERIMENTAL depends on !X86_64 default n help diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index ad67e8f61e6c..952ab352af9e 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -15,7 +15,7 @@ EXTRA_CFLAGS += $(ACPI_CFLAGS) # # ACPI Boot-Time Table Parsing # -obj-$(CONFIG_ACPI_BOOT) += tables.o +obj-y += tables.o obj-$(CONFIG_ACPI_INTERPRETER) += blacklist.o # diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index cf828ace13f9..1f1ade923d69 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h @@ -103,7 +103,7 @@ __acpi_release_global_lock (unsigned int *lock) :"=r"(n_hi), "=r"(n_lo) \ :"0"(n_hi), "1"(n_lo)) -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int acpi_lapic; extern int acpi_ioapic; extern int acpi_noirq; @@ -146,7 +146,7 @@ static inline void check_acpi_pci(void) { } #endif -#else /* CONFIG_ACPI_BOOT */ +#else /* !CONFIG_ACPI */ # define acpi_lapic 0 # define acpi_ioapic 0 diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index c94cac958389..cfb1c61d3b9c 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -76,7 +76,7 @@ enum fixed_addresses { FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI FIX_ACPI_BEGIN, FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1, #endif diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h index 002c203ccd6a..51c4e5fe6062 100644 --- a/include/asm-i386/io_apic.h +++ b/include/asm-i386/io_apic.h @@ -195,12 +195,12 @@ extern int skip_ioapic_setup; */ #define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int io_apic_get_unique_id (int ioapic, int apic_id); extern int io_apic_get_version (int ioapic); extern int io_apic_get_redir_entries (int ioapic); extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low); -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ extern int (*ioapic_renumber_irq)(int ioapic, int irq); diff --git a/include/asm-i386/mpspec.h b/include/asm-i386/mpspec.h index d9fafba075bc..b9e9f66d2721 100644 --- a/include/asm-i386/mpspec.h +++ b/include/asm-i386/mpspec.h @@ -26,14 +26,14 @@ extern unsigned long mp_lapic_addr; extern int pic_mode; extern int using_apic_timer; -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern void mp_register_lapic (u8 id, u8 enabled); extern void mp_register_lapic_address (u64 address); extern void mp_register_ioapic (u8 id, u32 address, u32 gsi_base); extern void mp_override_legacy_irq (u8 bus_irq, u8 polarity, u8 trigger, u32 gsi); extern void mp_config_acpi_legacy_irqs (void); extern int mp_register_gsi (u32 gsi, int edge_level, int active_high_low); -#endif /*CONFIG_ACPI_BOOT*/ +#endif /* CONFIG_ACPI */ #define PHYSID_ARRAY_SIZE BITS_TO_LONGS(MAX_APICS) diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h index dc8c981af27f..7d537e1867c7 100644 --- a/include/asm-x86_64/acpi.h +++ b/include/asm-x86_64/acpi.h @@ -101,7 +101,7 @@ __acpi_release_global_lock (unsigned int *lock) :"=r"(n_hi), "=r"(n_lo) \ :"0"(n_hi), "1"(n_lo)) -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int acpi_lapic; extern int acpi_ioapic; extern int acpi_noirq; @@ -122,10 +122,10 @@ static inline void disable_acpi(void) extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); -#else /* !CONFIG_ACPI_BOOT */ +#else /* !CONFIG_ACPI */ #define acpi_lapic 0 #define acpi_ioapic 0 -#endif /* !CONFIG_ACPI_BOOT */ +#endif /* !CONFIG_ACPI */ extern int acpi_numa; extern int acpi_scan_nodes(unsigned long start, unsigned long end); diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h index a8babd2bbe84..ee1bc69aec9c 100644 --- a/include/asm-x86_64/io_apic.h +++ b/include/asm-x86_64/io_apic.h @@ -201,7 +201,7 @@ extern int skip_ioapic_setup; */ #define io_apic_assign_pci_irqs (mp_irq_entries && !skip_ioapic_setup && io_apic_irqs) -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern int io_apic_get_version (int ioapic); extern int io_apic_get_redir_entries (int ioapic); extern int io_apic_set_pci_routing (int ioapic, int pin, int irq, int, int); diff --git a/include/asm-x86_64/mpspec.h b/include/asm-x86_64/mpspec.h index 331f6a3c72a2..f267e10c023d 100644 --- a/include/asm-x86_64/mpspec.h +++ b/include/asm-x86_64/mpspec.h @@ -179,7 +179,7 @@ extern int mpc_default_type; extern unsigned long mp_lapic_addr; extern int pic_mode; -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI extern void mp_register_lapic (u8 id, u8 enabled); extern void mp_register_lapic_address (u64 address); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fd48db320f5b..fa1ad1a60a09 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -41,7 +41,7 @@ #include -#ifdef CONFIG_ACPI_BOOT +#ifdef CONFIG_ACPI enum acpi_irq_model_id { ACPI_IRQ_MODEL_PIC = 0, @@ -429,11 +429,11 @@ extern int pci_mmcfg_config_num; extern int sbf_port ; -#else /*!CONFIG_ACPI_BOOT*/ +#else /* !CONFIG_ACPI */ #define acpi_mp_config 0 -#endif /*!CONFIG_ACPI_BOOT*/ +#endif /* !CONFIG_ACPI */ int acpi_register_gsi (u32 gsi, int edge_level, int active_high_low); int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); -- cgit v1.2.3 From 8466361ad5233d4356a4601e16b66c25277920d1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Wed, 24 Aug 2005 12:09:07 -0400 Subject: [ACPI] delete CONFIG_ACPI_INTERPRETER it is a synonym for CONFIG_ACPI Signed-off-by: Len Brown --- arch/i386/defconfig | 1 - arch/i386/kernel/acpi/boot.c | 8 +++----- arch/i386/kernel/mpparse.c | 4 ++-- arch/i386/kernel/setup.c | 2 +- arch/i386/mach-es7000/es7000plat.c | 4 ++-- arch/ia64/configs/bigsur_defconfig | 1 - arch/ia64/configs/sn2_defconfig | 1 - arch/ia64/configs/tiger_defconfig | 1 - arch/ia64/configs/zx1_defconfig | 1 - arch/ia64/defconfig | 1 - arch/x86_64/defconfig | 1 - drivers/acpi/Kconfig | 10 ---------- drivers/acpi/Makefile | 4 ++-- drivers/char/ipmi/ipmi_si_intf.c | 6 +++--- include/linux/acpi.h | 11 ----------- 15 files changed, 13 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 1c0076e22dda..f137a32634ac 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -131,7 +131,6 @@ CONFIG_SOFTWARE_SUSPEND=y # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_SLEEP=y CONFIG_ACPI_SLEEP_PROC_FS=y CONFIG_ACPI_AC=y diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 09700d894668..84befaecedf8 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -303,7 +303,7 @@ acpi_parse_lapic_nmi(acpi_table_entry_header * header, const unsigned long end) #endif /*CONFIG_X86_LOCAL_APIC */ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC static int __init acpi_parse_ioapic(acpi_table_entry_header * header, const unsigned long end) @@ -634,10 +634,8 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) printk(KERN_WARNING PREFIX "Unable to map FADT\n"); return 0; } -#ifdef CONFIG_ACPI_INTERPRETER /* initialize sci_int early for INT_SRC_OVR MADT parsing */ acpi_fadt.sci_int = fadt->sci_int; -#endif #ifdef CONFIG_ACPI_BUS /* initialize rev and apic_phys_dest_mode for x86_64 genapic */ @@ -735,7 +733,7 @@ static int __init acpi_parse_madt_lapic_entries(void) } #endif /* CONFIG_X86_LOCAL_APIC */ -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC /* * Parse IOAPIC related entries in MADT * returns 0 on success, < 0 on error @@ -810,7 +808,7 @@ static inline int acpi_parse_madt_ioapic_entries(void) { return -1; } -#endif /* !(CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER) */ +#endif /* !CONFIG_X86_IO_APIC */ static void __init acpi_process_madt(void) { diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 9a4db7d30001..db90d141481d 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -854,7 +854,7 @@ void __init mp_register_lapic ( MP_processor_info(&processor); } -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 #define MP_MAX_IOAPIC_PIN 127 @@ -1136,5 +1136,5 @@ int mp_register_gsi (u32 gsi, int edge_level, int active_high_low) return gsi; } -#endif /* CONFIG_X86_IO_APIC && CONFIG_ACPI_INTERPRETER */ +#endif /* CONFIG_X86_IO_APIC */ #endif /* CONFIG_ACPI */ diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index d3943e5edc8f..d52eda399a7a 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -87,7 +87,7 @@ EXPORT_SYMBOL(boot_cpu_data); unsigned long mmu_cr4_features; -#ifdef CONFIG_ACPI_INTERPRETER +#ifdef CONFIG_ACPI int acpi_disabled = 0; #else int acpi_disabled = 1; diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c index baac9da042ce..f549c0efdb9f 100644 --- a/arch/i386/mach-es7000/es7000plat.c +++ b/arch/i386/mach-es7000/es7000plat.c @@ -51,7 +51,7 @@ struct mip_reg *host_reg; int mip_port; unsigned long mip_addr, host_addr; -#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI_INTERPRETER) +#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_ACPI) /* * GSI override for ES7000 platforms. @@ -73,7 +73,7 @@ es7000_rename_gsi(int ioapic, int gsi) return gsi; } -#endif // (CONFIG_X86_IO_APIC) && (CONFIG_ACPI_INTERPRETER) +#endif /* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */ /* * Parse the OEM Table diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index 456c65689bef..2c3ba6a6ec7f 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -107,7 +107,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m CONFIG_ACPI_FAN=m diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index dc483c18343f..6a0c114e086a 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -130,7 +130,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_INTERPRETER=y # CONFIG_ACPI_BUTTON is not set CONFIG_ACPI_VIDEO=m CONFIG_ACPI_HOTKEY=m diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index cd2d6375a853..dec24a6de6a4 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -128,7 +128,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m # CONFIG_ACPI_VIDEO is not set # CONFIG_ACPI_HOTKEY is not set diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index cf58404769a8..d318087bfcbd 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -128,7 +128,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=y CONFIG_ACPI_VIDEO=m CONFIG_ACPI_HOTKEY=m diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index f38c677f7afd..e6d34df7d2ff 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -118,7 +118,6 @@ CONFIG_ACPI=y # # ACPI (Advanced Configuration and Power Interface) Support # -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_BUTTON=m CONFIG_ACPI_VIDEO=m CONFIG_ACPI_FAN=m diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index aed77c1c5ccc..8ccb4a12eed5 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -135,7 +135,6 @@ CONFIG_PM_STD_PARTITION="" # ACPI (Advanced Configuration and Power Interface) Support # CONFIG_ACPI=y -CONFIG_ACPI_INTERPRETER=y CONFIG_ACPI_AC=y CONFIG_ACPI_BATTERY=y CONFIG_ACPI_BUTTON=y diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 14b70c259f3e..f023a88ca398 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -43,12 +43,6 @@ config ACPI if ACPI -config ACPI_INTERPRETER - bool - default y - -if ACPI_INTERPRETER - config ACPI_SLEEP bool "Sleep States (EXPERIMENTAL)" depends on X86 && (!SMP || SUSPEND_SMP) @@ -126,7 +120,6 @@ config ACPI_VIDEO config ACPI_HOTKEY tristate "Generic Hotkey" - depends on ACPI_INTERPRETER depends on EXPERIMENTAL depends on !IA64_SGI_SN default n @@ -257,7 +250,6 @@ config ACPI_CUSTOM_DSDT_FILE config ACPI_BLACKLIST_YEAR int "Disable ACPI for systems before Jan 1st this year" - depends on ACPI_INTERPRETER default 0 help enter a 4-digit year, eg. 2001 to disable ACPI by default @@ -303,8 +295,6 @@ config ACPI_SYSTEM This driver will enable your system to shut down using ACPI, and dump your ACPI DSDT table using /proc/acpi/dsdt. -endif # ACPI_INTERPRETER - config X86_PM_TIMER bool "Power Management Timer Support" depends on X86 diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 952ab352af9e..060afaf962a3 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -16,12 +16,12 @@ EXTRA_CFLAGS += $(ACPI_CFLAGS) # ACPI Boot-Time Table Parsing # obj-y += tables.o -obj-$(CONFIG_ACPI_INTERPRETER) += blacklist.o +obj-y += blacklist.o # # ACPI Core Subsystem (Interpreter) # -obj-$(CONFIG_ACPI_INTERPRETER) += osl.o utils.o \ +obj-y += osl.o utils.o \ dispatcher/ events/ executer/ hardware/ \ namespace/ parser/ resources/ tables/ \ utilities/ diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index a44b97304e95..c51b02d9dfd0 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -986,7 +986,7 @@ MODULE_PARM_DESC(slave_addrs, "Set the default IPMB slave address for" #define IPMI_MEM_ADDR_SPACE 1 #define IPMI_IO_ADDR_SPACE 2 -#if defined(CONFIG_ACPI_INTERPRETER) || defined(CONFIG_X86) || defined(CONFIG_PCI) +#if defined(CONFIG_ACPI) || defined(CONFIG_X86) || defined(CONFIG_PCI) static int is_new_interface(int intf, u8 addr_space, unsigned long base_addr) { int i; @@ -1362,7 +1362,7 @@ static int try_init_mem(int intf_num, struct smi_info **new_info) } -#ifdef CONFIG_ACPI_INTERPRETER +#ifdef CONFIG_ACPI #include @@ -2067,7 +2067,7 @@ static int init_one_smi(int intf_num, struct smi_info **smi) rv = try_init_mem(intf_num, &new_smi); if (rv) rv = try_init_port(intf_num, &new_smi); -#ifdef CONFIG_ACPI_INTERPRETER +#ifdef CONFIG_ACPI if ((rv) && (si_trydefaults)) { rv = try_init_acpi(intf_num, &new_smi); } diff --git a/include/linux/acpi.h b/include/linux/acpi.h index fa1ad1a60a09..6882b32aa40f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -488,20 +488,9 @@ extern int ec_write(u8 addr, u8 val); #endif /*CONFIG_ACPI_EC*/ -#ifdef CONFIG_ACPI_INTERPRETER - extern int acpi_blacklisted(void); extern void acpi_bios_year(char *s); -#else /*!CONFIG_ACPI_INTERPRETER*/ - -static inline int acpi_blacklisted(void) -{ - return 0; -} - -#endif /*!CONFIG_ACPI_INTERPRETER*/ - #define ACPI_CSTATE_LIMIT_DEFINED /* for driver builds */ #ifdef CONFIG_ACPI -- cgit v1.2.3 From 6153df7b2f4d27c8bde054db1b947369a6f64d83 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 25 Aug 2005 12:27:09 -0400 Subject: [ACPI] delete CONFIG_ACPI_PCI Delete the ability to build an ACPI kernel that does not include PCI support. When such a machine is created and it requires a tuned kernel, send a patch. http://bugzilla.kernel.org/show_bug.cgi?id=1364 Signed-off-by: Len Brown --- arch/i386/defconfig | 1 - arch/i386/kernel/acpi/boot.c | 9 --------- arch/i386/pci/Makefile | 2 +- arch/i386/pci/irq.c | 2 +- arch/ia64/configs/bigsur_defconfig | 1 - arch/ia64/configs/sn2_defconfig | 1 - arch/ia64/configs/tiger_defconfig | 1 - arch/ia64/configs/zx1_defconfig | 1 - arch/ia64/defconfig | 1 - arch/x86_64/defconfig | 1 - arch/x86_64/pci/Makefile | 2 +- arch/x86_64/pci/Makefile-BUS | 2 +- drivers/acpi/Kconfig | 7 ++----- drivers/acpi/Makefile | 2 +- drivers/acpi/osl.c | 28 ---------------------------- include/acpi/acpi_drivers.h | 4 ---- include/asm-i386/acpi.h | 18 ++++++++---------- include/asm-x86_64/acpi.h | 25 +++++++++++-------------- include/linux/acpi.h | 4 ++-- 19 files changed, 28 insertions(+), 84 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 1a387856c870..6a431b926019 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -144,7 +144,6 @@ CONFIG_ACPI_THERMAL=y # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_X86_PM_TIMER is not set diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 552fc85691ac..0fb23c30eb98 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -66,13 +66,8 @@ static inline int ioapic_setup_disabled(void) #define PREFIX "ACPI: " -#ifdef CONFIG_ACPI_PCI int acpi_noirq __initdata; /* skip ACPI IRQ initialization */ int acpi_pci_disabled __initdata; /* skip ACPI PCI scan and IRQ initialization */ -#else -int acpi_noirq __initdata = 1; -int acpi_pci_disabled __initdata = 1; -#endif int acpi_ht __initdata = 1; /* enable HT */ int acpi_lapic; @@ -849,7 +844,6 @@ extern int acpi_force; #ifdef __i386__ -#ifdef CONFIG_ACPI_PCI static int __init disable_acpi_irq(struct dmi_system_id *d) { if (!acpi_force) { @@ -869,7 +863,6 @@ static int __init disable_acpi_pci(struct dmi_system_id *d) } return 0; } -#endif static int __init dmi_disable_acpi(struct dmi_system_id *d) { @@ -1017,7 +1010,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }, }, -#ifdef CONFIG_ACPI_PCI /* * Boxes that need ACPI PCI IRQ routing disabled */ @@ -1055,7 +1047,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, -#endif {} }; diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index 1bff03f36965..ead6122dd06d 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o obj-$(CONFIG_PCI_DIRECT) += direct.o pci-y := fixup.o -pci-$(CONFIG_ACPI_PCI) += acpi.o +pci-$(CONFIG_ACPI) += acpi.o pci-y += legacy.o irq.o pci-$(CONFIG_X86_VISWS) := visws.o fixup.o diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 86348b68fda1..326a2edc3834 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -1075,7 +1075,7 @@ static void pirq_penalize_isa_irq(int irq, int active) void pcibios_penalize_isa_irq(int irq, int active) { -#ifdef CONFIG_ACPI_PCI +#ifdef CONFIG_ACPI if (!acpi_noirq) acpi_penalize_isa_irq(irq, active); else diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig index 71dcfe0e3f73..3b65cbb31b1d 100644 --- a/arch/ia64/configs/bigsur_defconfig +++ b/arch/ia64/configs/bigsur_defconfig @@ -113,7 +113,6 @@ CONFIG_ACPI_PROCESSOR=m CONFIG_ACPI_THERMAL=m # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig index ac17ed2ff979..1ca6e6e11b42 100644 --- a/arch/ia64/configs/sn2_defconfig +++ b/arch/ia64/configs/sn2_defconfig @@ -136,7 +136,6 @@ CONFIG_ACPI=y CONFIG_ACPI_NUMA=y # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig index 3a629414229e..3ec94a12eac0 100644 --- a/arch/ia64/configs/tiger_defconfig +++ b/arch/ia64/configs/tiger_defconfig @@ -135,7 +135,6 @@ CONFIG_ACPI_PROCESSOR=m CONFIG_ACPI_THERMAL=m # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 84cdf32ede7c..d4cf73d124bc 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -134,7 +134,6 @@ CONFIG_ACPI_PROCESSOR=y CONFIG_ACPI_THERMAL=y # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig index 7002d5a3cc14..b6ec8d32c346 100644 --- a/arch/ia64/defconfig +++ b/arch/ia64/defconfig @@ -126,7 +126,6 @@ CONFIG_ACPI_THERMAL=m CONFIG_ACPI_NUMA=y # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y CONFIG_ACPI_CONTAINER=m diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 62abdc0adab8..b95c6cf26591 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig @@ -151,7 +151,6 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001 # CONFIG_ACPI_DEBUG is not set CONFIG_ACPI_EC=y CONFIG_ACPI_POWER=y -CONFIG_ACPI_PCI=y CONFIG_ACPI_SYSTEM=y # CONFIG_ACPI_CONTAINER is not set diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index 37c92e841dec..bb34e5ef916c 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile @@ -8,7 +8,7 @@ CFLAGS += -Iarch/i386/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-y += fixup.o -obj-$(CONFIG_ACPI_PCI) += acpi.o +obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o diff --git a/arch/x86_64/pci/Makefile-BUS b/arch/x86_64/pci/Makefile-BUS index 291985f0d2e4..4f0c05abd408 100644 --- a/arch/x86_64/pci/Makefile-BUS +++ b/arch/x86_64/pci/Makefile-BUS @@ -8,7 +8,7 @@ CFLAGS += -I arch/i386/pci obj-y := i386.o obj-$(CONFIG_PCI_DIRECT)+= direct.o obj-y += fixup.o -obj-$(CONFIG_ACPI_PCI) += acpi.o +obj-$(CONFIG_ACPI) += acpi.o obj-y += legacy.o irq.o common.o # mmconfig has a 64bit special obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 83cac52308db..3998c9d35fe1 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -3,7 +3,6 @@ # menu "ACPI (Advanced Configuration and Power Interface) Support" - depends on PM depends on !X86_VISWS depends on !IA64_HP_SIM depends on IA64 || X86 @@ -11,6 +10,8 @@ menu "ACPI (Advanced Configuration and Power Interface) Support" config ACPI bool "ACPI Support" depends on IA64 || X86 + select PM + select PCI default y ---help--- @@ -281,10 +282,6 @@ config ACPI_POWER bool default y -config ACPI_PCI - bool - default PCI - config ACPI_SYSTEM bool default y diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index b6a3c9192385..a18243488c66 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -44,7 +44,7 @@ obj-$(CONFIG_ACPI_EC) += ec.o obj-$(CONFIG_ACPI_FAN) += fan.o obj-$(CONFIG_ACPI_VIDEO) += video.o obj-$(CONFIG_ACPI_HOTKEY) += hotkey.o -obj-$(CONFIG_ACPI_PCI) += pci_root.o pci_link.o pci_irq.o pci_bind.o +obj-y += pci_root.o pci_link.o pci_irq.o pci_bind.o obj-$(CONFIG_ACPI_POWER) += power.o obj-$(CONFIG_ACPI_PROCESSOR) += processor.o obj-$(CONFIG_ACPI_CONTAINER) += container.o diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 81f0eb863a76..dc69d8760a54 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -86,13 +86,11 @@ acpi_status acpi_os_initialize1(void) * Initialize PCI configuration space access, as we'll need to access * it while walking the namespace (bus 0 and root bridges w/ _BBNs). */ -#ifdef CONFIG_ACPI_PCI if (!raw_pci_ops) { printk(KERN_ERR PREFIX "Access to PCI configuration space unavailable\n"); return AE_NULL_ENTRY; } -#endif kacpid_wq = create_singlethread_workqueue("kacpid"); BUG_ON(!kacpid_wq); @@ -484,8 +482,6 @@ acpi_os_write_memory(acpi_physical_address phys_addr, u32 value, u32 width) return AE_OK; } -#ifdef CONFIG_ACPI_PCI - acpi_status acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, u32 reg, void *value, u32 width) @@ -618,30 +614,6 @@ void acpi_os_derive_pci_id(acpi_handle rhandle, /* upper bound */ acpi_os_derive_pci_id_2(rhandle, chandle, id, &is_bridge, &bus_number); } -#else /*!CONFIG_ACPI_PCI */ - -acpi_status -acpi_os_write_pci_configuration(struct acpi_pci_id * pci_id, - u32 reg, acpi_integer value, u32 width) -{ - return AE_SUPPORT; -} - -acpi_status -acpi_os_read_pci_configuration(struct acpi_pci_id * pci_id, - u32 reg, void *value, u32 width) -{ - return AE_SUPPORT; -} - -void acpi_os_derive_pci_id(acpi_handle rhandle, /* upper bound */ - acpi_handle chandle, /* current node */ - struct acpi_pci_id **id) -{ -} - -#endif /*CONFIG_ACPI_PCI */ - static void acpi_os_execute_deferred(void *context) { struct acpi_os_dpc *dpc = NULL; diff --git a/include/acpi/acpi_drivers.h b/include/acpi/acpi_drivers.h index e976cb109b13..c1b4e1f882e4 100644 --- a/include/acpi/acpi_drivers.h +++ b/include/acpi/acpi_drivers.h @@ -47,8 +47,6 @@ PCI -------------------------------------------------------------------------- */ -#ifdef CONFIG_ACPI_PCI - #define ACPI_PCI_COMPONENT 0x00400000 /* ACPI PCI Interrupt Link (pci_link.c) */ @@ -78,8 +76,6 @@ int acpi_pci_bind_root(struct acpi_device *device, struct acpi_pci_id *id, struct pci_bus *pci_acpi_scan_root(struct acpi_device *device, int domain, int bus); -#endif /*CONFIG_ACPI_PCI */ - /* -------------------------------------------------------------------------- Power Resource -------------------------------------------------------------------------- */ diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h index 1f1ade923d69..df4ed323aa4d 100644 --- a/include/asm-i386/acpi.h +++ b/include/asm-i386/acpi.h @@ -146,13 +146,6 @@ static inline void check_acpi_pci(void) { } #endif -#else /* !CONFIG_ACPI */ -# define acpi_lapic 0 -# define acpi_ioapic 0 - -#endif - -#ifdef CONFIG_ACPI_PCI static inline void acpi_noirq_set(void) { acpi_noirq = 1; } static inline void acpi_disable_pci(void) { @@ -160,11 +153,16 @@ static inline void acpi_disable_pci(void) acpi_noirq_set(); } extern int acpi_irq_balance_set(char *str); -#else + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } -static inline int acpi_irq_balance_set(char *str) { return 0; } -#endif + +#endif /* !CONFIG_ACPI */ + #ifdef CONFIG_ACPI_SLEEP diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h index 7d537e1867c7..aa1c7b2e438c 100644 --- a/include/asm-x86_64/acpi.h +++ b/include/asm-x86_64/acpi.h @@ -121,17 +121,6 @@ static inline void disable_acpi(void) #define FIX_ACPI_PAGES 4 extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq); - -#else /* !CONFIG_ACPI */ -#define acpi_lapic 0 -#define acpi_ioapic 0 -#endif /* !CONFIG_ACPI */ - -extern int acpi_numa; -extern int acpi_scan_nodes(unsigned long start, unsigned long end); -#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) - -#ifdef CONFIG_ACPI_PCI static inline void acpi_noirq_set(void) { acpi_noirq = 1; } static inline void acpi_disable_pci(void) { @@ -139,11 +128,19 @@ static inline void acpi_disable_pci(void) acpi_noirq_set(); } extern int acpi_irq_balance_set(char *str); -#else + +#else /* !CONFIG_ACPI */ + +#define acpi_lapic 0 +#define acpi_ioapic 0 static inline void acpi_noirq_set(void) { } static inline void acpi_disable_pci(void) { } -static inline int acpi_irq_balance_set(char *str) { return 0; } -#endif + +#endif /* !CONFIG_ACPI */ + +extern int acpi_numa; +extern int acpi_scan_nodes(unsigned long start, unsigned long end); +#define NR_NODE_MEMBLKS (MAX_NUMNODES*2) #ifdef CONFIG_ACPI_SLEEP diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6882b32aa40f..026c3c011dc0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -445,7 +445,7 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); */ void acpi_unregister_gsi (u32 gsi); -#ifdef CONFIG_ACPI_PCI +#ifdef CONFIG_ACPI struct acpi_prt_entry { struct list_head node; @@ -479,7 +479,7 @@ struct acpi_pci_driver { int acpi_pci_register_driver(struct acpi_pci_driver *driver); void acpi_pci_unregister_driver(struct acpi_pci_driver *driver); -#endif /*CONFIG_ACPI_PCI*/ +#endif /* CONFIG_ACPI */ #ifdef CONFIG_ACPI_EC -- cgit v1.2.3 From b73fc89f6d1f84326e5e897ad249d00a9f218fd7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 26 Aug 2005 16:03:19 +0100 Subject: [PATCH] libata: regularize dma_start/stop arguments Needed for a few PATA drivers. Also fix up a wrong comment. Signed-off-by: Jeff Garzik --- drivers/scsi/libata-core.c | 11 ++++++----- drivers/scsi/sata_qstor.c | 4 ++-- include/linux/libata.h | 5 +++-- 3 files changed, 11 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 19b45c2bfa26..f15a07f9f471 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -3017,7 +3017,7 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc) host_stat = ap->ops->bmdma_status(ap); /* before we do anything else, clear DMA-Start bit */ - ap->ops->bmdma_stop(ap); + ap->ops->bmdma_stop(qc); /* fall through */ @@ -3399,7 +3399,7 @@ static void ata_bmdma_setup_mmio (struct ata_queued_cmd *qc) } /** - * ata_bmdma_start - Start a PCI IDE BMDMA transaction + * ata_bmdma_start_mmio - Start a PCI IDE BMDMA transaction * @qc: Info associated with this ATA transaction. * * LOCKING: @@ -3570,7 +3570,7 @@ u8 ata_bmdma_status(struct ata_port *ap) /** * ata_bmdma_stop - Stop PCI IDE BMDMA transfer - * @ap: Port associated with this ATA transaction. + * @qc: Command we are ending DMA for * * Clears the ATA_DMA_START flag in the dma control register * @@ -3580,8 +3580,9 @@ u8 ata_bmdma_status(struct ata_port *ap) * spin_lock_irqsave(host_set lock) */ -void ata_bmdma_stop(struct ata_port *ap) +void ata_bmdma_stop(struct ata_queued_cmd *qc) { + struct ata_port *ap = qc->ap; if (ap->flags & ATA_FLAG_MMIO) { void __iomem *mmio = (void __iomem *) ap->ioaddr.bmdma_addr; @@ -3633,7 +3634,7 @@ inline unsigned int ata_host_intr (struct ata_port *ap, goto idle_irq; /* before we do anything else, clear DMA-Start bit */ - ap->ops->bmdma_stop(ap); + ap->ops->bmdma_stop(qc); /* fall through */ diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 08a84042ce09..2926846cdd6a 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -117,7 +117,7 @@ static void qs_phy_reset(struct ata_port *ap); static void qs_qc_prep(struct ata_queued_cmd *qc); static int qs_qc_issue(struct ata_queued_cmd *qc); static int qs_check_atapi_dma(struct ata_queued_cmd *qc); -static void qs_bmdma_stop(struct ata_port *ap); +static void qs_bmdma_stop(struct ata_queued_cmd *qc); static u8 qs_bmdma_status(struct ata_port *ap); static void qs_irq_clear(struct ata_port *ap); static void qs_eng_timeout(struct ata_port *ap); @@ -198,7 +198,7 @@ static int qs_check_atapi_dma(struct ata_queued_cmd *qc) return 1; /* ATAPI DMA not supported */ } -static void qs_bmdma_stop(struct ata_port *ap) +static void qs_bmdma_stop(struct ata_queud_cmd *qc) { /* nothing */ } diff --git a/include/linux/libata.h b/include/linux/libata.h index 724b7d1c18ea..33f3ab4eb827 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -365,7 +365,7 @@ struct ata_port_operations { void (*host_stop) (struct ata_host_set *host_set); - void (*bmdma_stop) (struct ata_port *ap); + void (*bmdma_stop) (struct ata_queued_cmd *qc); u8 (*bmdma_status) (struct ata_port *ap); }; @@ -424,9 +424,10 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); +extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); -extern void ata_bmdma_stop(struct ata_port *ap); +extern void ata_bmdma_stop(struct ata_queued_cmd *qc); extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat); -- cgit v1.2.3 From d18d36b4edbb980c9de7fe00724c3ded5de1b7a7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 27 Aug 2005 04:13:52 -0400 Subject: libata: fix a few alan-isms --- drivers/scsi/sata_qstor.c | 2 +- include/linux/libata.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 2926846cdd6a..93fd06fb4f15 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -198,7 +198,7 @@ static int qs_check_atapi_dma(struct ata_queued_cmd *qc) return 1; /* ATAPI DMA not supported */ } -static void qs_bmdma_stop(struct ata_queud_cmd *qc) +static void qs_bmdma_stop(struct ata_queued_cmd *qc) { /* nothing */ } diff --git a/include/linux/libata.h b/include/linux/libata.h index 33f3ab4eb827..7c09540c52bc 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -424,7 +424,6 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); -extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); -- cgit v1.2.3 From caca1779870b1bcc0fb07e48ebd2403901f356b8 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 17:26:10 -0500 Subject: [SCSI] add missing attribute container function prototype attribute_container_classdev_to_container is an exported function of the attribute_container.c file. However, there's no prototype for it. Now I actually want to use it, so add one. Signed-off-by: James Bottomley --- include/linux/attribute_container.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index f54b05b052b3..ee83fe64a102 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -64,6 +64,7 @@ int attribute_container_add_class_device_adapter(struct attribute_container *con struct class_device *classdev); void attribute_container_remove_attrs(struct class_device *classdev); void attribute_container_class_device_del(struct class_device *classdev); +struct attribute_container *attribute_container_classdev_to_container(struct class_device *); struct class_device *attribute_container_find_class_device(struct attribute_container *, struct device *); struct class_device_attribute **attribute_container_classdev_to_attrs(const struct class_device *classdev); -- cgit v1.2.3 From af36d7f0df56de3e3e4bbfb15d0915097ecb8cab Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 28 Aug 2005 20:18:39 -0400 Subject: [libata] license change, other bits - changes license of all code from OSL+GPL to plain ole GPL - except for NVIDIA, who hasn't yet responded about sata_nv - copyright holders were already contacted privately - adds info in each driver about where hardware/protocol docs may be obtained - where I have made major contributions, updated copyright dates --- drivers/scsi/ahci.c | 40 +++++++++++++++++------------ drivers/scsi/ata_piix.c | 58 +++++++++++++++++++++++++++--------------- drivers/scsi/libata-core.c | 52 ++++++++++++++++++++++--------------- drivers/scsi/libata-scsi.c | 53 +++++++++++++++++++++++--------------- drivers/scsi/libata.h | 45 +++++++++++++++++--------------- drivers/scsi/sata_nv.c | 11 ++++++++ drivers/scsi/sata_promise.c | 33 ++++++++++++++---------- drivers/scsi/sata_promise.h | 31 +++++++++++++---------- drivers/scsi/sata_qstor.c | 31 +++++++++++++---------- drivers/scsi/sata_sil.c | 33 +++++++++++++----------- drivers/scsi/sata_sis.c | 33 ++++++++++++++---------- drivers/scsi/sata_svw.c | 33 ++++++++++++++---------- drivers/scsi/sata_sx4.c | 33 ++++++++++++++---------- drivers/scsi/sata_uli.c | 33 ++++++++++++++---------- drivers/scsi/sata_via.c | 62 ++++++++++++++++++++++++--------------------- drivers/scsi/sata_vsc.c | 26 ++++++++++++++++--- include/linux/ata.h | 43 +++++++++++++++++-------------- include/linux/libata.h | 41 ++++++++++++++++-------------- 18 files changed, 409 insertions(+), 282 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index e3b9692b9688..7eaaf7a2744d 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -1,26 +1,34 @@ /* * ahci.c - AHCI SATA support * - * Copyright 2004 Red Hat, Inc. + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. + * Copyright 2004-2005 Red Hat, Inc. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. * - * Version 1.0 of the AHCI specification: + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * AHCI hardware documentation: * http://www.intel.com/technology/serialata/pdf/rev1_0.pdf + * http://www.intel.com/technology/serialata/pdf/rev1_1.pdf * */ diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index d96ebf9d2228..6898b7f74389 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -1,24 +1,42 @@ /* - - ata_piix.c - Intel PATA/SATA controllers - - Maintained by: Jeff Garzik - Please ALWAYS copy linux-ide@vger.kernel.org - on emails. - - - Copyright 2003-2004 Red Hat Inc - Copyright 2003-2004 Jeff Garzik - - - Copyright header from piix.c: - - Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer - Copyright (C) 1998-2000 Andre Hedrick - Copyright (C) 2003 Red Hat Inc - - May be copied or modified under the terms of the GNU General Public License - + * ata_piix.c - Intel PATA/SATA controllers + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * + * Copyright 2003-2005 Red Hat Inc + * Copyright 2003-2005 Jeff Garzik + * + * + * Copyright header from piix.c: + * + * Copyright (C) 1998-1999 Andrzej Krzysztofowicz, Author and Maintainer + * Copyright (C) 1998-2000 Andre Hedrick + * Copyright (C) 2003 Red Hat Inc + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available at http://developer.intel.com/ + * */ #include diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index f4e7dcb6492b..4154e5b6bad8 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -1,25 +1,35 @@ /* - libata-core.c - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata-core.c - helper library for ATA + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from http://www.t13.org/ and + * http://www.sata-io.org/ + * */ #include diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 6a75ec2187fd..c6aeab1630ee 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1,25 +1,36 @@ /* - libata-scsi.c - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata-scsi.c - helper library for ATA + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org + * on emails. + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from + * - http://www.t10.org/ + * - http://www.t13.org/ + * */ #include diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 3e7f4843020f..c51d658903d7 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -1,25 +1,28 @@ /* - libata.h - helper library for ATA - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * libata.h - helper library for ATA + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * */ #ifndef __LIBATA_H__ diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index b0403ccd8a25..1e10370adc34 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -20,6 +20,17 @@ * If you do not delete the provisions above, a recipient may use your * version of this file under either the OSL or the GPL. * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * No hardware documentation available outside of NVIDIA. + * This driver programs the NVIDIA SATA controller in a similar + * fashion as with other PCI IDE BMDMA controllers, with a few + * NV-specific details such as register offsets, SATA phy location, + * hotplug info, etc. + * + * * 0.06 * - Added generic SATA support by using a pci_device_id that filters on * the IDE storage class code. diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index 919fb314ad10..b27e2e20280b 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -7,21 +7,26 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware information only available under NDA. * */ diff --git a/drivers/scsi/sata_promise.h b/drivers/scsi/sata_promise.h index 6e7e96b9ee13..6ee5e190262d 100644 --- a/drivers/scsi/sata_promise.h +++ b/drivers/scsi/sata_promise.h @@ -3,21 +3,24 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 1383e8a28d72..f6b716f4fab8 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -6,21 +6,24 @@ * Copyright 2005 Pacific Digital Corporation. * (OSL/GPL code release authorized by Jalil Fadavi). * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index 49ed557a4b66..345e6f2d28a8 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -5,24 +5,27 @@ * Please ALWAYS copy linux-ide@vger.kernel.org * on emails. * - * Copyright 2003 Red Hat, Inc. + * Copyright 2003-2005 Red Hat, Inc. * Copyright 2003 Benjamin Herrenschmidt * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* * */ diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index e418b89c6b9d..6db8b09db401 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -7,21 +7,26 @@ * * Copyright 2004 Uwe Koziolek * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 858e07185dbd..3884a3cce75c 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -13,21 +13,26 @@ * This driver probably works with non-Apple versions of the * Broadcom chipset... * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index efd7d7a61135..c7f6ec262a15 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -7,21 +7,26 @@ * * Copyright 2003-2004 Red Hat, Inc. * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index a71fb54eebd3..fa1021980b23 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -1,21 +1,26 @@ /* * sata_uli.c - ULi Electronics SATA * - * The contents of this file are subject to the Open - * Software License version 1.1 that can be found at - * http://www.opensource.org/licenses/osl-1.1.txt and is included herein - * by reference. * - * Alternatively, the contents of this file may be used under the terms - * of the GNU General Public License version 2 (the "GPL") as distributed - * in the kernel source COPYING file, in which case the provisions of - * the GPL are applicable instead of the above. If you wish to allow - * the use of your version of this file only under the terms of the - * GPL and not to allow others to use your version of this file under - * the OSL, indicate your decision by deleting the provisions above and - * replace them with the notice and other provisions required by the GPL. - * If you do not delete the provisions above, a recipient may use your - * version of this file under either the OSL or the GPL. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. * */ diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index f43183c19a12..6653ffe956f5 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -1,34 +1,38 @@ /* - sata_via.c - VIA Serial ATA controllers - - Maintained by: Jeff Garzik - Please ALWAYS copy linux-ide@vger.kernel.org + * sata_via.c - VIA Serial ATA controllers + * + * Maintained by: Jeff Garzik + * Please ALWAYS copy linux-ide@vger.kernel.org on emails. - - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - - ---------------------------------------------------------------------- - - To-do list: - * VT6421 PATA support - + * + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available under NDA. + * + * + * To-do list: + * - VT6421 PATA support + * */ #include diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index c5e09dc6f3de..8bddb8228d58 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -9,9 +9,29 @@ * * Bits from Jeff Garzik, Copyright RedHat, Inc. * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Vitesse hardware documentation presumably available under NDA. + * Intel 31244 (same hardware interface) documentation presumably + * available from http://developer.intel.com/ + * */ #include diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf9981..19c3e2853f17 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -1,24 +1,29 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from http://www.t13.org/ + * */ #ifndef __LINUX_ATA_H__ diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563b..51d2b20d34f2 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1,23 +1,26 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2005 Red Hat, Inc. All rights reserved. + * Copyright 2003-2005 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * */ #ifndef __LINUX_LIBATA_H__ -- cgit v1.2.3 From e13934563db047043ccead26412f552375cea90c Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 24 Aug 2005 18:46:21 -0500 Subject: [PATCH] PHY Layer fixup This patch adds back the code that was taken out, thus re-enabling: * The PHY Layer to initialize without crashing * Drivers to actually connect to PHYs * The entire PHY Control Layer This patch is used by the gianfar driver, and other drivers which are in development. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- drivers/net/phy/Kconfig | 8 ++ drivers/net/phy/Makefile | 11 +- drivers/net/phy/mdio_bus.c | 79 ++++++++++- drivers/net/phy/phy.c | 325 +++++++++++++++++++++++++++++++++---------- drivers/net/phy/phy_device.c | 172 +++++++++++++++++++---- include/linux/phy.h | 17 +++ 6 files changed, 510 insertions(+), 102 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 6450bd71deb4..6a2fe3583478 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -12,6 +12,14 @@ config PHYLIB devices. This option provides infrastructure for managing PHY devices. +config PHYCONTROL + bool " Support for automatically handling PHY state changes" + depends on PHYLIB + help + Adds code to perform all the work for keeping PHY link + state (speed/duplex/etc) up-to-date. Also handles + interrupts. + comment "MII PHY device drivers" depends on PHYLIB diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index fb7cb385a659..e4116a5fbb4c 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,8 +2,9 @@ libphy-objs := phy.o phy_device.o mdio_bus.o -obj-$(CONFIG_MARVELL_PHY) += libphy.o marvell.o -obj-$(CONFIG_DAVICOM_PHY) += libphy.o davicom.o -obj-$(CONFIG_CICADA_PHY) += libphy.o cicada.o -obj-$(CONFIG_LXT_PHY) += libphy.o lxt.o -obj-$(CONFIG_QSEMI_PHY) += libphy.o qsemi.o +obj-$(CONFIG_PHYLIB) += libphy.o +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c index d5a05be28818..41f62c0c5fcb 100644 --- a/drivers/net/phy/mdio_bus.c +++ b/drivers/net/phy/mdio_bus.c @@ -38,6 +38,80 @@ #include #include +/* mdiobus_register + * + * description: Called by a bus driver to bring up all the PHYs + * on a given bus, and attach them to the bus + */ +int mdiobus_register(struct mii_bus *bus) +{ + int i; + int err = 0; + + spin_lock_init(&bus->mdio_lock); + + if (NULL == bus || NULL == bus->name || + NULL == bus->read || + NULL == bus->write) + return -EINVAL; + + if (bus->reset) + bus->reset(bus); + + for (i = 0; i < PHY_MAX_ADDR; i++) { + struct phy_device *phydev; + + phydev = get_phy_device(bus, i); + + if (IS_ERR(phydev)) + return PTR_ERR(phydev); + + /* There's a PHY at this address + * We need to set: + * 1) IRQ + * 2) bus_id + * 3) parent + * 4) bus + * 5) mii_bus + * And, we need to register it */ + if (phydev) { + phydev->irq = bus->irq[i]; + + phydev->dev.parent = bus->dev; + phydev->dev.bus = &mdio_bus_type; + sprintf(phydev->dev.bus_id, "phy%d:%d", bus->id, i); + + phydev->bus = bus; + + err = device_register(&phydev->dev); + + if (err) + printk(KERN_ERR "phy %d failed to register\n", + i); + } + + bus->phy_map[i] = phydev; + } + + pr_info("%s: probed\n", bus->name); + + return err; +} +EXPORT_SYMBOL(mdiobus_register); + +void mdiobus_unregister(struct mii_bus *bus) +{ + int i; + + for (i = 0; i < PHY_MAX_ADDR; i++) { + if (bus->phy_map[i]) { + device_unregister(&bus->phy_map[i]->dev); + kfree(bus->phy_map[i]); + } + } +} +EXPORT_SYMBOL(mdiobus_unregister); + /* mdio_bus_match * * description: Given a PHY device, and a PHY driver, return 1 if @@ -96,4 +170,7 @@ int __init mdio_bus_init(void) return bus_register(&mdio_bus_type); } - +void __exit mdio_bus_exit(void) +{ + bus_unregister(&mdio_bus_type); +} diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index d3e43631b89b..d9e11f93bf3a 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -39,10 +39,20 @@ #include #include -static void phy_timer(unsigned long data); -static int phy_disable_interrupts(struct phy_device *phydev); -static void phy_sanitize_settings(struct phy_device *phydev); -static int phy_stop_interrupts(struct phy_device *phydev); +/* Convenience function to print out the current phy status + */ +void phy_print_status(struct phy_device *phydev) +{ + pr_info("%s: Link is %s", phydev->dev.bus_id, + phydev->link ? "Up" : "Down"); + if (phydev->link) + printk(" - %d/%s", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "Full" : "Half"); + + printk("\n"); +} +EXPORT_SYMBOL(phy_print_status); /* Convenience functions for reading/writing a given PHY @@ -114,42 +124,6 @@ static inline int phy_aneg_done(struct phy_device *phydev) return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE); } -/* phy_start_aneg - * - * description: Calls the PHY driver's config_aneg, and then - * sets the PHY state to PHY_AN if auto-negotiation is enabled, - * and to PHY_FORCING if auto-negotiation is disabled. Unless - * the PHY is currently HALTED. - */ -static int phy_start_aneg(struct phy_device *phydev) -{ - int err; - - spin_lock(&phydev->lock); - - if (AUTONEG_DISABLE == phydev->autoneg) - phy_sanitize_settings(phydev); - - err = phydev->drv->config_aneg(phydev); - - if (err < 0) - goto out_unlock; - - if (phydev->state != PHY_HALTED) { - if (AUTONEG_ENABLE == phydev->autoneg) { - phydev->state = PHY_AN; - phydev->link_timeout = PHY_AN_TIMEOUT; - } else { - phydev->state = PHY_FORCING; - phydev->link_timeout = PHY_FORCE_TIMEOUT; - } - } - -out_unlock: - spin_unlock(&phydev->lock); - return err; -} - /* A structure for mapping a particular speed and duplex * combination to a particular SUPPORTED and ADVERTISED value */ struct phy_setting { @@ -241,7 +215,7 @@ static inline int phy_find_valid(int idx, u32 features) * duplexes. Drop down by one in this order: 1000/FULL, * 1000/HALF, 100/FULL, 100/HALF, 10/FULL, 10/HALF */ -static void phy_sanitize_settings(struct phy_device *phydev) +void phy_sanitize_settings(struct phy_device *phydev) { u32 features = phydev->supported; int idx; @@ -256,31 +230,7 @@ static void phy_sanitize_settings(struct phy_device *phydev) phydev->speed = settings[idx].speed; phydev->duplex = settings[idx].duplex; } - -/* phy_force_reduction - * - * description: Reduces the speed/duplex settings by - * one notch. The order is so: - * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, - * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. - */ -static void phy_force_reduction(struct phy_device *phydev) -{ - int idx; - - idx = phy_find_setting(phydev->speed, phydev->duplex); - - idx++; - - idx = phy_find_valid(idx, phydev->supported); - - phydev->speed = settings[idx].speed; - phydev->duplex = settings[idx].duplex; - - pr_info("Trying %d/%s\n", phydev->speed, - DUPLEX_FULL == phydev->duplex ? - "FULL" : "HALF"); -} +EXPORT_SYMBOL(phy_sanitize_settings); /* phy_ethtool_sset: * A generic ethtool sset function. Handles all the details @@ -291,6 +241,11 @@ static void phy_force_reduction(struct phy_device *phydev) * - phy_start_aneg() will make sure forced settings are sane, and * choose the next best ones from the ones selected, so we don't * care if ethtool tries to give us bad values + * + * A note about the PHYCONTROL Layer. If you turn off + * CONFIG_PHYCONTROL, you will need to read the PHY status + * registers after this function completes, and update your + * controller manually. */ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd) { @@ -406,6 +361,51 @@ int phy_mii_ioctl(struct phy_device *phydev, return 0; } +/* phy_start_aneg + * + * description: Sanitizes the settings (if we're not + * autonegotiating them), and then calls the driver's + * config_aneg function. If the PHYCONTROL Layer is operating, + * we change the state to reflect the beginning of + * Auto-negotiation or forcing. + */ +int phy_start_aneg(struct phy_device *phydev) +{ + int err; + + spin_lock(&phydev->lock); + + if (AUTONEG_DISABLE == phydev->autoneg) + phy_sanitize_settings(phydev); + + err = phydev->drv->config_aneg(phydev); + +#ifdef CONFIG_PHYCONTROL + if (err < 0) + goto out_unlock; + + if (phydev->state != PHY_HALTED) { + if (AUTONEG_ENABLE == phydev->autoneg) { + phydev->state = PHY_AN; + phydev->link_timeout = PHY_AN_TIMEOUT; + } else { + phydev->state = PHY_FORCING; + phydev->link_timeout = PHY_FORCE_TIMEOUT; + } + } + +out_unlock: +#endif + spin_unlock(&phydev->lock); + return err; +} +EXPORT_SYMBOL(phy_start_aneg); + + +#ifdef CONFIG_PHYCONTROL +static void phy_change(void *data); +static void phy_timer(unsigned long data); + /* phy_start_machine: * * description: The PHY infrastructure can run a state machine @@ -448,6 +448,32 @@ void phy_stop_machine(struct phy_device *phydev) phydev->adjust_state = NULL; } +/* phy_force_reduction + * + * description: Reduces the speed/duplex settings by + * one notch. The order is so: + * 1000/FULL, 1000/HALF, 100/FULL, 100/HALF, + * 10/FULL, 10/HALF. The function bottoms out at 10/HALF. + */ +static void phy_force_reduction(struct phy_device *phydev) +{ + int idx; + + idx = phy_find_setting(phydev->speed, phydev->duplex); + + idx++; + + idx = phy_find_valid(idx, phydev->supported); + + phydev->speed = settings[idx].speed; + phydev->duplex = settings[idx].duplex; + + pr_info("Trying %d/%s\n", phydev->speed, + DUPLEX_FULL == phydev->duplex ? + "FULL" : "HALF"); +} + + /* phy_error: * * Moves the PHY to the HALTED state in response to a read @@ -462,22 +488,44 @@ void phy_error(struct phy_device *phydev) spin_unlock(&phydev->lock); } -static int phy_stop_interrupts(struct phy_device *phydev) +/* phy_interrupt + * + * description: When a PHY interrupt occurs, the handler disables + * interrupts, and schedules a work task to clear the interrupt. + */ +static irqreturn_t phy_interrupt(int irq, void *phy_dat, struct pt_regs *regs) +{ + struct phy_device *phydev = phy_dat; + + /* The MDIO bus is not allowed to be written in interrupt + * context, so we need to disable the irq here. A work + * queue will write the PHY to disable and clear the + * interrupt, and then reenable the irq line. */ + disable_irq_nosync(irq); + + schedule_work(&phydev->phy_queue); + + return IRQ_HANDLED; +} + +/* Enable the interrupts from the PHY side */ +int phy_enable_interrupts(struct phy_device *phydev) { int err; - err = phy_disable_interrupts(phydev); + err = phy_clear_interrupt(phydev); - if (err) - phy_error(phydev); + if (err < 0) + return err; - free_irq(phydev->irq, phydev); + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); return err; } +EXPORT_SYMBOL(phy_enable_interrupts); /* Disable the PHY interrupts from the PHY side */ -static int phy_disable_interrupts(struct phy_device *phydev) +int phy_disable_interrupts(struct phy_device *phydev) { int err; @@ -500,6 +548,138 @@ phy_err: return err; } +EXPORT_SYMBOL(phy_disable_interrupts); + +/* phy_start_interrupts + * + * description: Request the interrupt for the given PHY. If + * this fails, then we set irq to PHY_POLL. + * Otherwise, we enable the interrupts in the PHY. + * Returns 0 on success. + * This should only be called with a valid IRQ number. + */ +int phy_start_interrupts(struct phy_device *phydev) +{ + int err = 0; + + INIT_WORK(&phydev->phy_queue, phy_change, phydev); + + if (request_irq(phydev->irq, phy_interrupt, + SA_SHIRQ, + "phy_interrupt", + phydev) < 0) { + printk(KERN_WARNING "%s: Can't get IRQ %d (PHY)\n", + phydev->bus->name, + phydev->irq); + phydev->irq = PHY_POLL; + return 0; + } + + err = phy_enable_interrupts(phydev); + + return err; +} +EXPORT_SYMBOL(phy_start_interrupts); + +int phy_stop_interrupts(struct phy_device *phydev) +{ + int err; + + err = phy_disable_interrupts(phydev); + + if (err) + phy_error(phydev); + + free_irq(phydev->irq, phydev); + + return err; +} +EXPORT_SYMBOL(phy_stop_interrupts); + + +/* Scheduled by the phy_interrupt/timer to handle PHY changes */ +static void phy_change(void *data) +{ + int err; + struct phy_device *phydev = data; + + err = phy_disable_interrupts(phydev); + + if (err) + goto phy_err; + + spin_lock(&phydev->lock); + if ((PHY_RUNNING == phydev->state) || (PHY_NOLINK == phydev->state)) + phydev->state = PHY_CHANGELINK; + spin_unlock(&phydev->lock); + + enable_irq(phydev->irq); + + /* Reenable interrupts */ + err = phy_config_interrupt(phydev, PHY_INTERRUPT_ENABLED); + + if (err) + goto irq_enable_err; + + return; + +irq_enable_err: + disable_irq(phydev->irq); +phy_err: + phy_error(phydev); +} + +/* Bring down the PHY link, and stop checking the status. */ +void phy_stop(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + if (PHY_HALTED == phydev->state) + goto out_unlock; + + if (phydev->irq != PHY_POLL) { + /* Clear any pending interrupts */ + phy_clear_interrupt(phydev); + + /* Disable PHY Interrupts */ + phy_config_interrupt(phydev, PHY_INTERRUPT_DISABLED); + } + + phydev->state = PHY_HALTED; + +out_unlock: + spin_unlock(&phydev->lock); +} + + +/* phy_start + * + * description: Indicates the attached device's readiness to + * handle PHY-related work. Used during startup to start the + * PHY, and after a call to phy_stop() to resume operation. + * Also used to indicate the MDIO bus has cleared an error + * condition. + */ +void phy_start(struct phy_device *phydev) +{ + spin_lock(&phydev->lock); + + switch (phydev->state) { + case PHY_STARTING: + phydev->state = PHY_PENDING; + break; + case PHY_READY: + phydev->state = PHY_UP; + break; + case PHY_HALTED: + phydev->state = PHY_RESUMING; + default: + break; + } + spin_unlock(&phydev->lock); +} +EXPORT_SYMBOL(phy_stop); +EXPORT_SYMBOL(phy_start); /* PHY timer which handles the state machine */ static void phy_timer(unsigned long data) @@ -688,3 +868,4 @@ static void phy_timer(unsigned long data) mod_timer(&phydev->phy_timer, jiffies + PHY_STATE_TIME * HZ); } +#endif /* CONFIG_PHYCONTROL */ diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index c44d54f6310a..33f7bdb5857c 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -39,18 +39,9 @@ #include #include -static int genphy_config_init(struct phy_device *phydev); - -static struct phy_driver genphy_driver = { - .phy_id = 0xffffffff, - .phy_id_mask = 0xffffffff, - .name = "Generic PHY", - .config_init = genphy_config_init, - .features = 0, - .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, - .driver = {.owner = THIS_MODULE, }, -}; +static struct phy_driver genphy_driver; +extern int mdio_bus_init(void); +extern void mdio_bus_exit(void); /* get_phy_device * @@ -110,6 +101,7 @@ struct phy_device * get_phy_device(struct mii_bus *bus, int addr) return dev; } +#ifdef CONFIG_PHYCONTROL /* phy_prepare_link: * * description: Tells the PHY infrastructure to handle the @@ -124,6 +116,132 @@ void phy_prepare_link(struct phy_device *phydev, phydev->adjust_link = handler; } +/* phy_connect: + * + * description: Convenience function for connecting ethernet + * devices to PHY devices. The default behavior is for + * the PHY infrastructure to handle everything, and only notify + * the connected driver when the link status changes. If you + * don't want, or can't use the provided functionality, you may + * choose to call only the subset of functions which provide + * the desired functionality. + */ +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags) +{ + struct phy_device *phydev; + + phydev = phy_attach(dev, phy_id, flags); + + if (IS_ERR(phydev)) + return phydev; + + phy_prepare_link(phydev, handler); + + phy_start_machine(phydev, NULL); + + if (phydev->irq > 0) + phy_start_interrupts(phydev); + + return phydev; +} +EXPORT_SYMBOL(phy_connect); + +void phy_disconnect(struct phy_device *phydev) +{ + if (phydev->irq > 0) + phy_stop_interrupts(phydev); + + phy_stop_machine(phydev); + + phydev->adjust_link = NULL; + + phy_detach(phydev); +} +EXPORT_SYMBOL(phy_disconnect); + +#endif /* CONFIG_PHYCONTROL */ + +/* phy_attach: + * + * description: Called by drivers to attach to a particular PHY + * device. The phy_device is found, and properly hooked up + * to the phy_driver. If no driver is attached, then the + * genphy_driver is used. The phy_device is given a ptr to + * the attaching device, and given a callback for link status + * change. The phy_device is returned to the attaching + * driver. + */ +static int phy_compare_id(struct device *dev, void *data) +{ + return strcmp((char *)data, dev->bus_id) ? 0 : 1; +} + +struct phy_device *phy_attach(struct net_device *dev, + const char *phy_id, u32 flags) +{ + struct bus_type *bus = &mdio_bus_type; + struct phy_device *phydev; + struct device *d; + + /* Search the list of PHY devices on the mdio bus for the + * PHY with the requested name */ + d = bus_find_device(bus, NULL, (void *)phy_id, phy_compare_id); + + if (d) { + phydev = to_phy_device(d); + } else { + printk(KERN_ERR "%s not found\n", phy_id); + return ERR_PTR(-ENODEV); + } + + /* Assume that if there is no driver, that it doesn't + * exist, and we should use the genphy driver. */ + if (NULL == d->driver) { + int err; + down_write(&d->bus->subsys.rwsem); + d->driver = &genphy_driver.driver; + + err = d->driver->probe(d); + + if (err < 0) + return ERR_PTR(err); + + device_bind_driver(d); + up_write(&d->bus->subsys.rwsem); + } + + if (phydev->attached_dev) { + printk(KERN_ERR "%s: %s already attached\n", + dev->name, phy_id); + return ERR_PTR(-EBUSY); + } + + phydev->attached_dev = dev; + + phydev->dev_flags = flags; + + return phydev; +} +EXPORT_SYMBOL(phy_attach); + +void phy_detach(struct phy_device *phydev) +{ + phydev->attached_dev = NULL; + + /* If the device had no specific driver before (i.e. - it + * was using the generic driver), we unbind the device + * from the generic driver so that there's a chance a + * real driver could be loaded */ + if (phydev->dev.driver == &genphy_driver.driver) { + down_write(&phydev->dev.bus->subsys.rwsem); + device_release_driver(&phydev->dev); + up_write(&phydev->dev.bus->subsys.rwsem); + } +} +EXPORT_SYMBOL(phy_detach); + + /* Generic PHY support and helper functions */ /* genphy_config_advert @@ -132,7 +250,7 @@ void phy_prepare_link(struct phy_device *phydev, * after sanitizing the values to make sure we only advertise * what is supported */ -static int genphy_config_advert(struct phy_device *phydev) +int genphy_config_advert(struct phy_device *phydev) { u32 advertise; int adv; @@ -190,6 +308,7 @@ static int genphy_config_advert(struct phy_device *phydev) return adv; } +EXPORT_SYMBOL(genphy_config_advert); /* genphy_setup_forced * @@ -541,32 +660,37 @@ void phy_driver_unregister(struct phy_driver *drv) } EXPORT_SYMBOL(phy_driver_unregister); +static struct phy_driver genphy_driver = { + .phy_id = 0xffffffff, + .phy_id_mask = 0xffffffff, + .name = "Generic PHY", + .config_init = genphy_config_init, + .features = 0, + .config_aneg = genphy_config_aneg, + .read_status = genphy_read_status, + .driver = {.owner= THIS_MODULE, }, +}; static int __init phy_init(void) { int rc; - extern int mdio_bus_init(void); - - rc = phy_driver_register(&genphy_driver); - if (rc) - goto out; rc = mdio_bus_init(); if (rc) - goto out_unreg; + return rc; - return 0; + rc = phy_driver_register(&genphy_driver); + if (rc) + mdio_bus_exit(); -out_unreg: - phy_driver_unregister(&genphy_driver); -out: return rc; } static void __exit phy_exit(void) { phy_driver_unregister(&genphy_driver); + mdio_bus_exit(); } -module_init(phy_init); +subsys_initcall(phy_init); module_exit(phy_exit); diff --git a/include/linux/phy.h b/include/linux/phy.h index 4f2b5effc16b..72cb67b66e0c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,11 +334,26 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } +int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -355,6 +370,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ -- cgit v1.2.3 From 9e2d3cd34a159948dc753a14573e16bffc04dba8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Aug 2005 03:19:14 +0100 Subject: [PATCH] mod_devicetable.h fixes * ieee1394_device_id has kernel_ulong_t field after an odd number of __u32 ones. Since mod_devicetable.h is included both from kernel and from host build helper, we may be in trouble if we are building on 32bit host for 64bit target - userland sees unsigned long long, kernel sees unsigned long and while their sizes match, alignments might not. Fixed by forcing alignment. Fortunately, almost nobody else needs that - the rest of such fields is naturally aligned as it is. * of_device_id has void * in it. Host userland helpers need kernel_ulong_t instead, since their void * might have nothing to do with the kernel one. Fixed in the same way it's done for similar problems in pcmcia_device_id (ifdef __KERNEL__). * pcmcia_device_id has the same problem as ieee1394_device_id. Fixed the same way. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index dce53ac1625d..97bbccdbcca3 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,7 +33,8 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data; + kernel_ulong_t driver_data + __attribute__((aligned(sizeof(kernel_ulong_t)))); }; @@ -182,7 +183,11 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; +#if __KERNEL__ void *data; +#else + kernel_ulong_t data; +#endif }; @@ -208,7 +213,8 @@ struct pcmcia_device_id { #ifdef __KERNEL__ const char * prod_id[4]; #else - kernel_ulong_t prod_id[4]; + kernel_ulong_t prod_id[4] + __attribute__((aligned(sizeof(kernel_ulong_t)))); #endif /* not matched against */ -- cgit v1.2.3 From bf3a46aa9b96f6eb3a49a568f72a2801c3e830c0 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:22:01 -0700 Subject: [NETFILTER]: convert nfmark and conntrack mark to 32bit As discussed at netconf'05, we convert nfmark and conntrack-mark to be 32bits even on 64bit architectures. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 +- include/linux/skbuff.h | 2 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 2 +- net/ipv4/netfilter/ipt_CLUSTERIP.c | 2 +- net/ipv4/netfilter/ipt_CONNMARK.c | 11 ++++++++--- net/ipv4/netfilter/ipt_MARK.c | 12 ++++++++++++ net/ipv4/netfilter/ipt_connmark.c | 7 +++++++ net/ipv4/netfilter/ipt_mark.c | 7 +++++++ 8 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 08fe5f7d14a0..4ed720f0c4cd 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -171,7 +171,7 @@ struct ip_conntrack #endif /* CONFIG_IP_NF_NAT_NEEDED */ #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - unsigned long mark; + u_int32_t mark; #endif /* Traversed often, so hopefully in different cacheline to top */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 948527e42a60..2e40f4c9f7a6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -259,7 +259,7 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + __u32 nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 61798c46e91d..dccd4abab7ae 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%lu ", conntrack->mark)) + if (seq_printf(s, "mark=%u ", conntrack->mark)) return -ENOSPC; #endif diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6706d3a1bc4f..2d05cafec221 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,7 +367,7 @@ target(struct sk_buff **pskb, #ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); + DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 30ddd3e18eb7..8ed744157b1a 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -40,9 +40,9 @@ target(struct sk_buff **pskb, void *userinfo) { const struct ipt_connmark_target_info *markinfo = targinfo; - unsigned long diff; - unsigned long nfmark; - unsigned long newmark; + u_int32_t diff; + u_int32_t nfmark; + u_int32_t newmark; enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); @@ -94,6 +94,11 @@ checkentry(const char *tablename, } } + if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { + printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 33c6f9b63b8d..8526398346cf 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -76,6 +76,8 @@ checkentry_v0(const char *tablename, unsigned int targinfosize, unsigned int hook_mask) { + struct ipt_mark_target_info *markinfo = targinfo; + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", targinfosize, @@ -88,6 +90,11 @@ checkentry_v0(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } @@ -120,6 +127,11 @@ checkentry_v1(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_connmark.c b/net/ipv4/netfilter/ipt_connmark.c index 2706f96cea55..bf8de47ce004 100644 --- a/net/ipv4/netfilter/ipt_connmark.c +++ b/net/ipv4/netfilter/ipt_connmark.c @@ -54,9 +54,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_connmark_info *cm = + (struct ipt_connmark_info *)matchinfo; if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) return 0; + if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { + printk(KERN_WARNING "connmark: only support 32bit mark\n"); + return 0; + } + return 1; } diff --git a/net/ipv4/netfilter/ipt_mark.c b/net/ipv4/netfilter/ipt_mark.c index 8955728127b9..00bef6cdd3f8 100644 --- a/net/ipv4/netfilter/ipt_mark.c +++ b/net/ipv4/netfilter/ipt_mark.c @@ -37,9 +37,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo; + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) return 0; + if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { + printk(KERN_WARNING "mark: only supports 32bit mark\n"); + return 0; + } + return 1; } -- cgit v1.2.3 From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:24:19 -0700 Subject: [NETFILTER]: reduce netfilter sk_buff enlargement As discussed at netconf'05, we're trying to save every bit in sk_buff. The patch below makes sk_buff 8 bytes smaller. I did some basic testing on my notebook and it seems to work. The only real in-tree user of nfcache was IPVS, who only needs a single bit. Unfortunately I couldn't find some other free bit in sk_buff to stuff that bit into, so I introduced a separate field for them. Maybe the IPVS guys can resolve that to further save space. Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and alike are only 6 values defined), but unfortunately the bluetooth code overloads pkt_type :( The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just came up with a way how to do it without any skb fields, so it's safe to remove it. - remove all never-implemented 'nfcache' code - don't have ipvs code abuse 'nfcache' field. currently get's their own compile-conditional skb->ipvs_property field. IPVS maintainers can decide to move this bit elswhere, but nfcache needs to die. - remove skb->nfcache field to save 4 bytes - move skb->nfctinfo into three unused bits to save further 4 bytes Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 +++ include/linux/netfilter_decnet.h | 3 +++ include/linux/netfilter_ipv4.h | 3 +++ include/linux/netfilter_ipv6.h | 3 +++ include/linux/skbuff.h | 10 +++++----- net/bridge/netfilter/ebt_mark.c | 5 ++--- net/core/skbuff.c | 2 -- net/ipv4/ip_output.c | 1 - net/ipv4/ipvs/ip_vs_core.c | 9 +++++---- net/ipv4/ipvs/ip_vs_xmit.c | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 7 +------ net/ipv4/netfilter/ip_nat_core.c | 1 - net/ipv4/netfilter/ip_nat_standalone.c | 2 -- net/ipv4/netfilter/ip_queue.c | 1 - net/ipv4/netfilter/ip_tables.c | 1 - net/ipv4/netfilter/ipt_CLASSIFY.c | 4 +--- net/ipv4/netfilter/ipt_CONNMARK.c | 4 +--- net/ipv4/netfilter/ipt_DSCP.c | 1 - net/ipv4/netfilter/ipt_ECN.c | 2 -- net/ipv4/netfilter/ipt_MARK.c | 10 ++++------ net/ipv4/netfilter/ipt_REJECT.c | 1 - net/ipv4/netfilter/ipt_TCPMSS.c | 1 - net/ipv4/netfilter/ipt_TOS.c | 1 - net/ipv6/ip6_output.c | 16 ++-------------- net/ipv6/netfilter/ip6_queue.c | 1 - net/ipv6/netfilter/ip6_tables.c | 1 - net/ipv6/netfilter/ip6t_MARK.c | 5 ++--- 27 files changed, 36 insertions(+), 64 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2e2045482cb1..ec60856408fd 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,10 +21,13 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* Generic cache responses from hook functions. <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 +#endif #ifdef __KERNEL__ #include diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 3064eec9cb8e..018979484150 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -9,6 +9,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_DN_SRC 0x0001 @@ -18,6 +20,7 @@ #define NFC_DN_IF_IN 0x0004 /* Output device. */ #define NFC_DN_IF_OUT 0x0008 +#endif /* ! __KERNEL__ */ /* DECnet Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 3ebc36afae1a..552815b8193e 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -8,6 +8,8 @@ #include #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP_SRC 0x0001 @@ -35,6 +37,7 @@ #define NFC_IP_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index bee7a5ec7c66..20c069a5e4ac 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -10,6 +10,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP6_SRC 0x0001 @@ -38,6 +40,7 @@ #define NFC_IP6_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP6_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP6 Hooks */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e40f4c9f7a6..4b929c3c1a98 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -190,7 +190,6 @@ struct skb_shared_info { * @end: End pointer * @destructor: Destruct function * @nfmark: Can be used for communication between hooks - * @nfcache: Cache info * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -252,17 +251,18 @@ struct sk_buff { __u8 local_df:1, cloned:1, ip_summed:2, - nohdr:1; - /* 3 bits spare */ + nohdr:1, + nfctinfo:3; __u8 pkt_type; __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER __u32 nfmark; - __u32 nfcache; - __u32 nfctinfo; struct nf_conntrack *nfct; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + __u8 ipvs_property:1; +#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 02c632b4d325..c93d35ab95c0 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -23,10 +23,9 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, { struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data; - if ((*pskb)->nfmark != info->mark) { + if ((*pskb)->nfmark != info->mark) (*pskb)->nfmark = info->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return info->target; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7eab867ede59..096991cb09d9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -361,7 +361,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->destructor = NULL; #ifdef CONFIG_NETFILTER C(nfmark); - C(nfcache); C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); @@ -424,7 +423,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; - new->nfcache = old->nfcache; new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 80d13103b2b0..766564cb4207 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -392,7 +392,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) #endif #ifdef CONFIG_NETFILTER to->nfmark = from->nfmark; - to->nfcache = from->nfcache; /* Connection association is same as pre-frag packet */ nf_conntrack_put(to->nfct); to->nfct = from->nfct; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 5fb257dd07cb..3ac7eeca04ac 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -22,6 +22,7 @@ * * Changes: * Paul `Rusty' Russell properly handle non-linear skbs + * Harald Welte don't use nfcache * */ @@ -529,7 +530,7 @@ static unsigned int ip_vs_post_routing(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!((*pskb)->nfcache & NFC_IPVS_PROPERTY)) + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ @@ -701,7 +702,7 @@ static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; verdict = NF_ACCEPT; out: @@ -739,7 +740,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, EnterFunction(11); - if (skb->nfcache & NFC_IPVS_PROPERTY) + if (skb->ipvs_property) return NF_ACCEPT; iph = skb->nh.iph; @@ -821,7 +822,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); ip_vs_conn_put(cp); - skb->nfcache |= NFC_IPVS_PROPERTY; + skb->ipvs_property = 1; LeaveFunction(11); return NF_ACCEPT; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index a8512a3fd08a..3b87482049cf 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -127,7 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ - (skb)->nfcache |= NFC_IPVS_PROPERTY; \ + (skb)->ipvs_property = 1; \ (skb)->ip_summed = CHECKSUM_NONE; \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a7f0c821a9b2..04c3414361d4 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -625,9 +625,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return NF_DROP; } - /* FIXME: Do this right please. --RR */ - (*pskb)->nfcache |= NFC_UNKNOWN; - /* Doesn't cover locally-generated broadcast, so not worth it. */ #if 0 /* Ignore broadcast: no `connection'. */ @@ -943,10 +940,8 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) skb = ip_defrag(skb, user); local_bh_enable(); - if (skb) { + if (skb) ip_send_check(skb->nh.iph); - skb->nfcache |= NFC_ALTERED; - } return skb; } diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 739b6dde1c82..ed4d731880f7 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -321,7 +321,6 @@ manip_pkt(u_int16_t proto, { struct iphdr *iph; - (*pskb)->nfcache |= NFC_ALTERED; if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 91d5ea1dbbc9..9ecba979033a 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -73,8 +73,6 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); - (*pskb)->nfcache |= NFC_UNKNOWN; - /* If we had a hardware checksum before, it's now invalid */ if ((*pskb)->ip_summed == CHECKSUM_HW) if (skb_checksum_help(*pskb, (out == NULL))) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index c6baa8174389..bc0af8d8e910 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -392,7 +392,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index c88dfcd38c56..ff8d85d2070d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -312,7 +312,6 @@ ipt_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { struct ipt_entry_target *t; diff --git a/net/ipv4/netfilter/ipt_CLASSIFY.c b/net/ipv4/netfilter/ipt_CLASSIFY.c index 9842e6e23184..dab78d8bd494 100644 --- a/net/ipv4/netfilter/ipt_CLASSIFY.c +++ b/net/ipv4/netfilter/ipt_CLASSIFY.c @@ -32,10 +32,8 @@ target(struct sk_buff **pskb, { const struct ipt_classify_target_info *clinfo = targinfo; - if((*pskb)->priority != clinfo->priority) { + if((*pskb)->priority != clinfo->priority) (*pskb)->priority = clinfo->priority; - (*pskb)->nfcache |= NFC_ALTERED; - } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_CONNMARK.c b/net/ipv4/netfilter/ipt_CONNMARK.c index 8ed744157b1a..134638021339 100644 --- a/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/net/ipv4/netfilter/ipt_CONNMARK.c @@ -61,10 +61,8 @@ target(struct sk_buff **pskb, case IPT_CONNMARK_RESTORE: nfmark = (*pskb)->nfmark; diff = (ct->mark ^ nfmark) & markinfo->mask; - if (diff != 0) { + if (diff != 0) (*pskb)->nfmark = nfmark ^ diff; - (*pskb)->nfcache |= NFC_ALTERED; - } break; } } diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 3ea4509099f9..975476fef27a 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -51,7 +51,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^ 0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 94a0ce1c1c9d..f63a9bc0e4d2 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -43,7 +43,6 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return 1; } @@ -87,7 +86,6 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) tcph->check = csum_fold(csum_partial((char *)diffs, sizeof(diffs), tcph->check^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; return 1; } diff --git a/net/ipv4/netfilter/ipt_MARK.c b/net/ipv4/netfilter/ipt_MARK.c index 8526398346cf..52b4f2c296bf 100644 --- a/net/ipv4/netfilter/ipt_MARK.c +++ b/net/ipv4/netfilter/ipt_MARK.c @@ -29,10 +29,9 @@ target_v0(struct sk_buff **pskb, { const struct ipt_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } @@ -61,10 +60,9 @@ target_v1(struct sk_buff **pskb, break; } - if((*pskb)->nfmark != mark) { + if((*pskb)->nfmark != mark) (*pskb)->nfmark = mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 915696446020..f115a84a4ac6 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -156,7 +156,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) /* This packet will not be the same as the other: clear nf fields */ nf_reset(nskb); - nskb->nfcache = 0; nskb->nfmark = 0; #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(nskb->nf_bridge); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 7b84a254440e..949288319ca8 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -190,7 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, newmss); retmodified: - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; return IPT_CONTINUE; } diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 85c70d240f8b..49abb7eef0a4 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -46,7 +46,6 @@ target(struct sk_buff **pskb, sizeof(diffs), (*pskb)->nh.iph->check ^0xFFFF)); - (*pskb)->nfcache |= NFC_ALTERED; } return IPT_CONTINUE; } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ae652ca14bc9..590d2b797197 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -185,19 +185,6 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif -static inline int ip6_maybe_reroute(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER - if (skb->nfcache & NFC_ALTERED){ - if (ip6_route_me_harder(skb) != 0){ - kfree_skb(skb); - return -EINVAL; - } - } -#endif /* CONFIG_NETFILTER */ - return dst_output(skb); -} - /* * xmit an sk_buff (used by TCP) */ @@ -266,7 +253,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || ipfragok) { IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); - return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute); + return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, + dst_output); } if (net_ratelimit()) diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index a16df5b27c84..83ccedceed17 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -388,7 +388,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - e->skb->nfcache |= NFC_ALTERED; /* * Extra routing may needed on local out, as the QUEUE target never diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 73034511c8db..41a67cf6e33a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -401,7 +401,6 @@ ip6t_do_table(struct sk_buff **pskb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - (*pskb)->nfcache |= e->nfcache; if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, &protoff, &offset)) { struct ip6t_entry_target *t; diff --git a/net/ipv6/netfilter/ip6t_MARK.c b/net/ipv6/netfilter/ip6t_MARK.c index d09ceb05013a..81924fcc5857 100644 --- a/net/ipv6/netfilter/ip6t_MARK.c +++ b/net/ipv6/netfilter/ip6t_MARK.c @@ -28,10 +28,9 @@ target(struct sk_buff **pskb, { const struct ip6t_mark_target_info *markinfo = targinfo; - if((*pskb)->nfmark != markinfo->mark) { + if((*pskb)->nfmark != markinfo->mark) (*pskb)->nfmark = markinfo->mark; - (*pskb)->nfcache |= NFC_ALTERED; - } + return IP6T_CONTINUE; } -- cgit v1.2.3 From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:25:21 -0700 Subject: [NET]: Kill skb->list Remove the "list" member of struct sk_buff, as it is entirely redundant. All SKB list removal callers know which list the SKB is on, so storing this in sk_buff does nothing other than taking up some space. Two tricky bits were SCTP, which I took care of, and two ATM drivers which Francois Romieu fixed up. Signed-off-by: David S. Miller Signed-off-by: Francois Romieu --- drivers/atm/nicstar.c | 157 ++++++++++++++++++--------------------- drivers/atm/nicstar.h | 16 +++- drivers/atm/zatm.c | 8 +- drivers/bluetooth/bfusb.c | 8 +- drivers/ieee1394/ieee1394_core.c | 4 +- drivers/isdn/act2000/capi.c | 2 +- drivers/net/shaper.c | 50 +------------ drivers/net/wan/sdla_fr.c | 22 ++---- drivers/usb/net/usbnet.c | 21 +++--- include/linux/skbuff.h | 16 ++-- net/atm/ipcommon.c | 3 - net/ax25/ax25_subr.c | 2 +- net/core/skbuff.c | 57 ++++++-------- net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_out.c | 2 +- net/econet/af_econet.c | 4 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_input.c | 29 ++++---- net/ipv4/tcp_output.c | 6 +- net/irda/irlap_frame.c | 6 -- net/lapb/lapb_subr.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 6 +- net/netrom/nr_subr.c | 2 +- net/rose/rose_subr.c | 2 +- net/sctp/socket.c | 4 +- net/sctp/ulpqueue.c | 63 +++++++++------- net/unix/garbage.c | 12 +-- net/x25/x25_subr.c | 2 +- 29 files changed, 229 insertions(+), 283 deletions(-) (limited to 'include/linux') diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index b2a7b754fd14..a0e3bd861f1c 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -214,8 +214,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev); static void __devinit ns_init_card_error(ns_dev *card, int error); static scq_info *get_scq(int size, u32 scd); static void free_scq(scq_info *scq, struct atm_vcc *vcc); -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2); +static void push_rxbufs(ns_dev *, struct sk_buff *); static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs); static int ns_open(struct atm_vcc *vcc); static void ns_close(struct atm_vcc *vcc); @@ -766,6 +765,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -786,9 +786,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); /* Due to the implementation of push_rxbufs() this is 1, not 0 */ if (j == 1) { @@ -822,9 +823,10 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } /* Test for strange behaviour which leads to crashes */ if ((bcount = ns_stat_sfbqc_get(readl(card->membase + STAT))) < card->sbnr.min) @@ -852,6 +854,7 @@ static int __devinit ns_init_card(int i, struct pci_dev *pcidev) ns_init_card_error(card, error); return error; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; } @@ -1078,12 +1081,18 @@ static void free_scq(scq_info *scq, struct atm_vcc *vcc) /* The handles passed must be pointers to the sk_buff containing the small or large buffer(s) cast to u32. */ -static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, - u32 handle2, u32 addr2) +static void push_rxbufs(ns_dev *card, struct sk_buff *skb) { + struct ns_skb_cb *cb = NS_SKB_CB(skb); + u32 handle1, addr1; + u32 handle2, addr2; u32 stat; unsigned long flags; + /* *BARF* */ + handle2 = addr2 = 0; + handle1 = (u32)skb; + addr1 = (u32)virt_to_bus(skb->data); #ifdef GENERAL_DEBUG if (!addr1) @@ -1093,7 +1102,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, stat = readl(card->membase + STAT); card->sbfqc = ns_stat_sfbqc_get(stat); card->lbfqc = ns_stat_lfbqc_get(stat); - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (!addr2) { @@ -1111,7 +1120,7 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, } } } - else /* type == BUF_LG */ + else /* buf_type == BUF_LG */ { if (!addr2) { @@ -1132,26 +1141,26 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, if (addr2) { - if (type == BUF_SM) + if (cb->buf_type == BUF_SM) { if (card->sbfqc >= card->sbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->sbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } else card->sbfqc += 2; } - else /* (type == BUF_LG) */ + else /* (buf_type == BUF_LG) */ { if (card->lbfqc >= card->lbnr.max) { - skb_unlink((struct sk_buff *) handle1); + skb_unlink((struct sk_buff *) handle1, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle1); - skb_unlink((struct sk_buff *) handle2); + skb_unlink((struct sk_buff *) handle2, &card->lbpool.queue); dev_kfree_skb_any((struct sk_buff *) handle2); return; } @@ -1166,12 +1175,12 @@ static void push_rxbufs(ns_dev *card, u32 type, u32 handle1, u32 addr1, writel(handle2, card->membase + DR2); writel(addr1, card->membase + DR1); writel(handle1, card->membase + DR0); - writel(NS_CMD_WRITE_FREEBUFQ | (u32) type, card->membase + CMD); + writel(NS_CMD_WRITE_FREEBUFQ | cb->buf_type, card->membase + CMD); spin_unlock_irqrestore(&card->res_lock, flags); XPRINTK("nicstar%d: Pushing %s buffers at 0x%x and 0x%x.\n", card->index, - (type == BUF_SM ? "small" : "large"), addr1, addr2); + (cb->buf_type == BUF_SM ? "small" : "large"), addr1, addr2); } if (!card->efbie && card->sbfqc >= card->sbnr.min && @@ -1322,9 +1331,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) card->efbie = 0; break; } + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } card->sbfqc = i; process_rsq(card); @@ -1348,9 +1358,10 @@ static irqreturn_t ns_irq_handler(int irq, void *dev_id, struct pt_regs *regs) card->efbie = 0; break; } + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } card->lbfqc = i; process_rsq(card); @@ -2227,6 +2238,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) recycle_rx_buf(card, skb); return; } + NS_SKB_CB(iovb)->buf_type = BUF_NONE; } else if (--card->iovpool.count < card->iovnr.min) @@ -2234,6 +2246,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) struct sk_buff *new_iovb; if ((new_iovb = alloc_skb(NS_IOVBUFSIZE, GFP_ATOMIC)) != NULL) { + NS_SKB_CB(iovb)->buf_type = BUF_NONE; skb_queue_tail(&card->iovpool.queue, new_iovb); card->iovpool.count++; } @@ -2264,7 +2277,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) if (NS_SKB(iovb)->iovcnt == 1) { - if (skb->list != &card->sbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_SM) { printk("nicstar%d: Expected a small buffer, and this is not one.\n", card->index); @@ -2278,7 +2291,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) } else /* NS_SKB(iovb)->iovcnt >= 2 */ { - if (skb->list != &card->lbpool.queue) + if (NS_SKB_CB(skb)->buf_type != BUF_LG) { printk("nicstar%d: Expected a large buffer, and this is not one.\n", card->index); @@ -2322,8 +2335,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) /* skb points to a small buffer */ if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2350,8 +2362,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { if (!atm_charge(vcc, sb->truesize)) { - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2367,16 +2378,14 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); } else /* len > NS_SMBUFSIZE, the usual case */ { if (!atm_charge(vcc, skb->truesize)) { - push_rxbufs(card, BUF_LG, (u32) skb, - (u32) virt_to_bus(skb->data), 0, 0); + push_rxbufs(card, skb); atomic_inc(&vcc->stats->rx_drop); } else @@ -2394,8 +2403,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) atomic_inc(&vcc->stats->rx); } - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); } @@ -2430,6 +2438,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) card->hbpool.count++; } } + NS_SKB_CB(hb)->buf_type = BUF_NONE; } else if (--card->hbpool.count < card->hbnr.min) @@ -2437,6 +2446,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) struct sk_buff *new_hb; if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2444,6 +2454,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) { if ((new_hb = dev_alloc_skb(NS_HBUFSIZE)) != NULL) { + NS_SKB_CB(new_hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, new_hb); card->hbpool.count++; } @@ -2473,8 +2484,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) remaining = len - iov->iov_len; iov++; /* Free the small buffer */ - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), - 0, 0); + push_rxbufs(card, sb); /* Copy all large buffers to the huge buffer and free them */ for (j = 1; j < NS_SKB(iovb)->iovcnt; j++) @@ -2485,8 +2495,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) skb_put(hb, tocopy); iov++; remaining -= tocopy; - push_rxbufs(card, BUF_LG, (u32) lb, - (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } #ifdef EXTRA_DEBUG if (remaining != 0 || hb->len != len) @@ -2527,9 +2536,10 @@ static void ns_sb_destructor(struct sk_buff *sb) sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) break; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } while (card->sbfqc < card->sbnr.min); } @@ -2550,9 +2560,10 @@ static void ns_lb_destructor(struct sk_buff *lb) lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) break; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } while (card->lbfqc < card->lbnr.min); } @@ -2569,6 +2580,7 @@ static void ns_hb_destructor(struct sk_buff *hb) hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) break; + NS_SKB_CB(hb)->buf_type = BUF_NONE; skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; } @@ -2577,45 +2589,25 @@ static void ns_hb_destructor(struct sk_buff *hb) #endif /* NS_USE_DESTRUCTORS */ - static void recycle_rx_buf(ns_dev *card, struct sk_buff *skb) { - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } -} + struct ns_skb_cb *cb = NS_SKB_CB(skb); + if (unlikely(cb->buf_type == BUF_NONE)) { + printk("nicstar%d: What kind of rx buffer is this?\n", card->index); + dev_kfree_skb_any(skb); + } else + push_rxbufs(card, skb); +} static void recycle_iovec_rx_bufs(ns_dev *card, struct iovec *iov, int count) { - struct sk_buff *skb; - - for (; count > 0; count--) - { - skb = (struct sk_buff *) (iov++)->iov_base; - if (skb->list == &card->sbpool.queue) - push_rxbufs(card, BUF_SM, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else if (skb->list == &card->lbpool.queue) - push_rxbufs(card, BUF_LG, (u32) skb, (u32) virt_to_bus(skb->data), - 0, 0); - else - { - printk("nicstar%d: What kind of rx buffer is this?\n", card->index); - dev_kfree_skb_any(skb); - } - } + while (count-- > 0) + recycle_rx_buf(card, (struct sk_buff *) (iov++)->iov_base); } - static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) { if (card->iovpool.count < card->iovnr.max) @@ -2631,7 +2623,7 @@ static void recycle_iov_buf(ns_dev *card, struct sk_buff *iovb) static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) { - skb_unlink(sb); + skb_unlink(sb, &card->sbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->sbfqc < card->sbnr.min) #else @@ -2640,10 +2632,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } if (card->sbfqc < card->sbnr.init) @@ -2652,10 +2644,10 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) struct sk_buff *new_sb; if ((new_sb = dev_alloc_skb(NS_SMSKBSIZE)) != NULL) { + NS_SKB_CB(new_sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, new_sb); skb_reserve(new_sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) new_sb, - (u32) virt_to_bus(new_sb->data), 0, 0); + push_rxbufs(card, new_sb); } } } @@ -2664,7 +2656,7 @@ static void dequeue_sm_buf(ns_dev *card, struct sk_buff *sb) static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) { - skb_unlink(lb); + skb_unlink(lb, &card->lbpool.queue); #ifdef NS_USE_DESTRUCTORS if (card->lbfqc < card->lbnr.min) #else @@ -2673,10 +2665,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } if (card->lbfqc < card->lbnr.init) @@ -2685,10 +2677,10 @@ static void dequeue_lg_buf(ns_dev *card, struct sk_buff *lb) struct sk_buff *new_lb; if ((new_lb = dev_alloc_skb(NS_LGSKBSIZE)) != NULL) { + NS_SKB_CB(new_lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, new_lb); skb_reserve(new_lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) new_lb, - (u32) virt_to_bus(new_lb->data), 0, 0); + push_rxbufs(card, new_lb); } } } @@ -2880,9 +2872,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) sb = __dev_alloc_skb(NS_SMSKBSIZE, GFP_KERNEL); if (sb == NULL) return -ENOMEM; + NS_SKB_CB(sb)->buf_type = BUF_SM; skb_queue_tail(&card->sbpool.queue, sb); skb_reserve(sb, NS_AAL0_HEADER); - push_rxbufs(card, BUF_SM, (u32) sb, (u32) virt_to_bus(sb->data), 0, 0); + push_rxbufs(card, sb); } break; @@ -2894,9 +2887,10 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) lb = __dev_alloc_skb(NS_LGSKBSIZE, GFP_KERNEL); if (lb == NULL) return -ENOMEM; + NS_SKB_CB(lb)->buf_type = BUF_LG; skb_queue_tail(&card->lbpool.queue, lb); skb_reserve(lb, NS_SMBUFSIZE); - push_rxbufs(card, BUF_LG, (u32) lb, (u32) virt_to_bus(lb->data), 0, 0); + push_rxbufs(card, lb); } break; @@ -2923,6 +2917,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) hb = __dev_alloc_skb(NS_HBUFSIZE, GFP_KERNEL); if (hb == NULL) return -ENOMEM; + NS_SKB_CB(hb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->hbpool.queue, hb); card->hbpool.count++; @@ -2953,6 +2948,7 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) iovb = alloc_skb(NS_IOVBUFSIZE, GFP_KERNEL); if (iovb == NULL) return -ENOMEM; + NS_SKB_CB(iovb)->buf_type = BUF_NONE; ns_grab_int_lock(card, flags); skb_queue_tail(&card->iovpool.queue, iovb); card->iovpool.count++; @@ -2979,17 +2975,12 @@ static int ns_ioctl(struct atm_dev *dev, unsigned int cmd, void __user *arg) } - static void which_list(ns_dev *card, struct sk_buff *skb) { - printk("It's a %s buffer.\n", skb->list == &card->sbpool.queue ? - "small" : skb->list == &card->lbpool.queue ? "large" : - skb->list == &card->hbpool.queue ? "huge" : - skb->list == &card->iovpool.queue ? "iovec" : "unknown"); + printk("skb buf_type: 0x%08x\n", NS_SKB_CB(skb)->buf_type); } - static void ns_poll(unsigned long arg) { int i; diff --git a/drivers/atm/nicstar.h b/drivers/atm/nicstar.h index ea83c46c8ba5..5997bcb45b59 100644 --- a/drivers/atm/nicstar.h +++ b/drivers/atm/nicstar.h @@ -103,8 +103,14 @@ #define NS_IOREMAP_SIZE 4096 -#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ -#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +/* + * BUF_XX distinguish the Rx buffers depending on their (small/large) size. + * BUG_SM and BUG_LG are both used by the driver and the device. + * BUF_NONE is only used by the driver. + */ +#define BUF_SM 0x00000000 /* These two are used for push_rxbufs() */ +#define BUF_LG 0x00000001 /* CMD, Write_FreeBufQ, LBUF bit */ +#define BUF_NONE 0xffffffff /* Software only: */ #define NS_HBUFSIZE 65568 /* Size of max. AAL5 PDU */ #define NS_MAX_IOVECS (2 + (65568 - NS_SMBUFSIZE) / \ @@ -684,6 +690,12 @@ enum ns_regs /* Device driver structures ***************************************************/ +struct ns_skb_cb { + u32 buf_type; /* BUF_SM/BUF_LG/BUF_NONE */ +}; + +#define NS_SKB_CB(skb) ((struct ns_skb_cb *)((skb)->cb)) + typedef struct tsq_info { void *org; diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index a2b236a966e0..85fee9530fa9 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -417,10 +417,12 @@ printk("dummy: 0x%08lx, 0x%08lx\n",dummy[0],dummy[1]); chan = (here[3] & uPD98401_AAL5_CHAN) >> uPD98401_AAL5_CHAN_SHIFT; if (chan < zatm_dev->chans && zatm_dev->rx_map[chan]) { + int pos = ZATM_VCC(vcc)->pool; + vcc = zatm_dev->rx_map[chan]; - if (skb == zatm_dev->last_free[ZATM_VCC(vcc)->pool]) - zatm_dev->last_free[ZATM_VCC(vcc)->pool] = NULL; - skb_unlink(skb); + if (skb == zatm_dev->last_free[pos]) + zatm_dev->last_free[pos] = NULL; + skb_unlink(skb, zatm_dev->pool + pos); } else { printk(KERN_ERR DEV_LABEL "(itf %d): RX indication " diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c index c42d7e6ac1c5..e8d2a340356d 100644 --- a/drivers/bluetooth/bfusb.c +++ b/drivers/bluetooth/bfusb.c @@ -158,7 +158,7 @@ static int bfusb_send_bulk(struct bfusb *bfusb, struct sk_buff *skb) if (err) { BT_ERR("%s bulk tx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); usb_free_urb(urb); } else atomic_inc(&bfusb->pending_tx); @@ -212,7 +212,7 @@ static void bfusb_tx_complete(struct urb *urb, struct pt_regs *regs) read_lock(&bfusb->lock); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); skb_queue_tail(&bfusb->completed_q, skb); bfusb_tx_wakeup(bfusb); @@ -253,7 +253,7 @@ static int bfusb_rx_submit(struct bfusb *bfusb, struct urb *urb) if (err) { BT_ERR("%s bulk rx submit failed urb %p err %d", bfusb->hdev->name, urb, err); - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); usb_free_urb(urb); } @@ -398,7 +398,7 @@ static void bfusb_rx_complete(struct urb *urb, struct pt_regs *regs) buf += len; } - skb_unlink(skb); + skb_unlink(skb, &bfusb->pending_q); kfree_skb(skb); bfusb_rx_submit(bfusb, urb); diff --git a/drivers/ieee1394/ieee1394_core.c b/drivers/ieee1394/ieee1394_core.c index b248d89de8b4..d633770fac8e 100644 --- a/drivers/ieee1394/ieee1394_core.c +++ b/drivers/ieee1394/ieee1394_core.c @@ -681,7 +681,7 @@ static void handle_packet_response(struct hpsb_host *host, int tcode, return; } - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); if (packet->state == hpsb_queued) { packet->sendtime = jiffies; @@ -989,7 +989,7 @@ void abort_timedouts(unsigned long __opaque) packet = (struct hpsb_packet *)skb->data; if (time_before(packet->sendtime + expire, jiffies)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &host->pending_packet_queue); packet->state = hpsb_complete; packet->ack_code = ACKX_TIMEOUT; queue_packet_complete(packet); diff --git a/drivers/isdn/act2000/capi.c b/drivers/isdn/act2000/capi.c index afa46681f983..6ae6eb322111 100644 --- a/drivers/isdn/act2000/capi.c +++ b/drivers/isdn/act2000/capi.c @@ -606,7 +606,7 @@ handle_ack(act2000_card *card, act2000_chan *chan, __u8 blocknr) { if ((((m->msg.data_b3_req.fakencci >> 8) & 0xff) == chan->ncci) && (m->msg.data_b3_req.blocknr == blocknr)) { /* found corresponding DATA_B3_REQ */ - skb_unlink(tmp); + skb_unlink(tmp, &card->ackq); chan->queued -= m->msg.data_b3_req.datalen; if (m->msg.data_b3_req.flags) ret = m->msg.data_b3_req.datalen; diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c index 3ad0b6751f6f..221354eea21f 100644 --- a/drivers/net/shaper.c +++ b/drivers/net/shaper.c @@ -156,52 +156,6 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) SHAPERCB(skb)->shapelen= shaper_clocks(shaper,skb); -#ifdef SHAPER_COMPLEX /* and broken.. */ - - while(ptr && ptr!=(struct sk_buff *)&shaper->sendq) - { - if(ptr->pripri - && jiffies - SHAPERCB(ptr)->shapeclock < SHAPER_MAXSLIP) - { - struct sk_buff *tmp=ptr->prev; - - /* - * It goes before us therefore we slip the length - * of the new frame. - */ - - SHAPERCB(ptr)->shapeclock+=SHAPERCB(skb)->shapelen; - SHAPERCB(ptr)->shapelatency+=SHAPERCB(skb)->shapelen; - - /* - * The packet may have slipped so far back it - * fell off. - */ - if(SHAPERCB(ptr)->shapelatency > SHAPER_LATENCY) - { - skb_unlink(ptr); - dev_kfree_skb(ptr); - } - ptr=tmp; - } - else - break; - } - if(ptr==NULL || ptr==(struct sk_buff *)&shaper->sendq) - skb_queue_head(&shaper->sendq,skb); - else - { - struct sk_buff *tmp; - /* - * Set the packet clock out time according to the - * frames ahead. Im sure a bit of thought could drop - * this loop. - */ - for(tmp=skb_peek(&shaper->sendq); tmp!=NULL && tmp!=ptr; tmp=tmp->next) - SHAPERCB(skb)->shapeclock+=tmp->shapelen; - skb_append(ptr,skb); - } -#else { struct sk_buff *tmp; /* @@ -220,7 +174,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev) } else skb_queue_tail(&shaper->sendq, skb); } -#endif + if(sh_debug) printk("Frame queued.\n"); if(skb_queue_len(&shaper->sendq)>SHAPER_QLEN) @@ -302,7 +256,7 @@ static void shaper_kick(struct shaper *shaper) * Pull the frame and get interrupts back on. */ - skb_unlink(skb); + skb_unlink(skb, &shaper->sendq); if (shaper->recovery < SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen) shaper->recovery = SHAPERCB(skb)->shapeclock + SHAPERCB(skb)->shapelen; diff --git a/drivers/net/wan/sdla_fr.c b/drivers/net/wan/sdla_fr.c index c5f5e62aab8b..0497dbdb8631 100644 --- a/drivers/net/wan/sdla_fr.c +++ b/drivers/net/wan/sdla_fr.c @@ -445,7 +445,7 @@ void s508_s514_unlock(sdla_t *card, unsigned long *smp_flags); void s508_s514_lock(sdla_t *card, unsigned long *smp_flags); unsigned short calc_checksum (char *, int); -static int setup_fr_header(struct sk_buff** skb, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode); @@ -1372,7 +1372,7 @@ static int if_send(struct sk_buff* skb, struct net_device* dev) /* Move the if_header() code to here. By inserting frame * relay header in if_header() we would break the * tcpdump and other packet sniffers */ - chan->fr_header_len = setup_fr_header(&skb,dev,chan->common.usedby); + chan->fr_header_len = setup_fr_header(skb,dev,chan->common.usedby); if (chan->fr_header_len < 0 ){ ++chan->ifstats.tx_dropped; ++card->wandev.stats.tx_dropped; @@ -1597,8 +1597,6 @@ static int setup_for_delayed_transmit(struct net_device* dev, return 1; } - skb_unlink(skb); - chan->transmit_length = len; chan->delay_skb = skb; @@ -4871,18 +4869,15 @@ static void unconfig_fr (sdla_t *card) } } -static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, +static int setup_fr_header(struct sk_buff *skb, struct net_device* dev, char op_mode) { - struct sk_buff *skb = *skb_orig; fr_channel_t *chan=dev->priv; - if (op_mode == WANPIPE){ - + if (op_mode == WANPIPE) { chan->fr_header[0]=Q922_UI; switch (htons(skb->protocol)){ - case ETH_P_IP: chan->fr_header[1]=NLPID_IP; break; @@ -4894,16 +4889,14 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, } /* If we are in bridging mode, we must apply - * an Ethernet header */ - if (op_mode == BRIDGE || op_mode == BRIDGE_NODE){ - - + * an Ethernet header + */ + if (op_mode == BRIDGE || op_mode == BRIDGE_NODE) { /* Encapsulate the packet as a bridged Ethernet frame. */ #ifdef DEBUG printk(KERN_INFO "%s: encapsulating skb for frame relay\n", dev->name); #endif - chan->fr_header[0] = 0x03; chan->fr_header[1] = 0x00; chan->fr_header[2] = 0x80; @@ -4916,7 +4909,6 @@ static int setup_fr_header(struct sk_buff **skb_orig, struct net_device* dev, /* Yuck. */ skb->protocol = ETH_P_802_3; return 8; - } return 0; diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 4528a00c45b0..a2f67245f6da 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -2903,19 +2903,18 @@ static struct net_device_stats *usbnet_get_stats (struct net_device *net) * completion callbacks. 2.5 should have fixed those bugs... */ -static void defer_bh (struct usbnet *dev, struct sk_buff *skb) +static void defer_bh(struct usbnet *dev, struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; unsigned long flags; - spin_lock_irqsave (&list->lock, flags); - __skb_unlink (skb, list); - spin_unlock (&list->lock); - spin_lock (&dev->done.lock); - __skb_queue_tail (&dev->done, skb); + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock(&list->lock); + spin_lock(&dev->done.lock); + __skb_queue_tail(&dev->done, skb); if (dev->done.qlen == 1) - tasklet_schedule (&dev->bh); - spin_unlock_irqrestore (&dev->done.lock, flags); + tasklet_schedule(&dev->bh); + spin_unlock_irqrestore(&dev->done.lock, flags); } /* some work can't be done in tasklets, so we use keventd @@ -3120,7 +3119,7 @@ block: break; } - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->rxq); if (urb) { if (netif_running (dev->net) @@ -3490,7 +3489,7 @@ static void tx_complete (struct urb *urb, struct pt_regs *regs) urb->dev = NULL; entry->state = tx_done; - defer_bh (dev, skb); + defer_bh(dev, skb, &dev->txq); } /*-------------------------------------------------------------------------*/ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b929c3c1a98..76c68851474c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -204,7 +204,6 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - struct sk_buff_head *list; struct sock *sk; struct timeval stamp; struct net_device *dev; @@ -597,7 +596,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; prev = (struct sk_buff *)list; next = prev->next; @@ -622,7 +620,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; next = (struct sk_buff *)list; prev = next->prev; @@ -655,7 +652,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) next->prev = prev; prev->next = next; result->next = result->prev = NULL; - result->list = NULL; } return result; } @@ -664,7 +660,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) /* * Insert a packet on a list. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -672,24 +668,23 @@ static inline void __skb_insert(struct sk_buff *newsk, newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; - newsk->list = list; list->qlen++; } /* * Place a packet after a given packet in a list. */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { - __skb_insert(newsk, old, old->next, old->list); + __skb_insert(newsk, old, old->next, list); } /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb); +extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -698,7 +693,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - skb->list = NULL; next->prev = prev; prev->next = next; } diff --git a/net/atm/ipcommon.c b/net/atm/ipcommon.c index 181a3002d8ad..4b1faca5013f 100644 --- a/net/atm/ipcommon.c +++ b/net/atm/ipcommon.c @@ -34,7 +34,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) { - struct sk_buff *skb; unsigned long flags; struct sk_buff *skb_from = (struct sk_buff *) from; struct sk_buff *skb_to = (struct sk_buff *) to; @@ -47,8 +46,6 @@ void skb_migrate(struct sk_buff_head *from,struct sk_buff_head *to) prev->next = skb_to; to->prev->next = from->next; to->prev = from->prev; - for (skb = from->next; skb != skb_to; skb = skb->next) - skb->list = to; to->qlen += from->qlen; spin_unlock(&to->lock); from->prev = skb_from; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 99694b57f6f5..eb7343c10a9f 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -76,7 +76,7 @@ void ax25_requeue_frames(ax25_cb *ax25) if (skb_prev == NULL) skb_queue_head(&ax25->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &ax25->write_queue); skb_prev = skb; } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 096991cb09d9..e6564b0a6839 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -281,8 +281,6 @@ void kfree_skbmem(struct sk_buff *skb) void __kfree_skb(struct sk_buff *skb) { - BUG_ON(skb->list != NULL); - dst_release(skb->dst); #ifdef CONFIG_XFRM secpath_put(skb->sp); @@ -333,7 +331,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) #define C(x) n->x = skb->x n->next = n->prev = NULL; - n->list = NULL; n->sk = NULL; C(stamp); C(dev); @@ -403,7 +400,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; - new->list = NULL; new->sk = NULL; new->dev = old->dev; new->real_dev = old->real_dev; @@ -1342,50 +1338,43 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } + /** * skb_unlink - remove a buffer from a list * @skb: buffer to remove + * @list: list to use * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls + * Remove a packet from a list. The list locks are taken and this + * function is atomic with respect to other list locked calls * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. + * You must know what list the SKB is on. */ -void skb_unlink(struct sk_buff *skb) +void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; + unsigned long flags; - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } + spin_lock_irqsave(&list->lock, flags); + __skb_unlink(skb, list); + spin_unlock_irqrestore(&list->lock, flags); } - /** * skb_append - append a buffer * @old: buffer to insert after * @newsk: buffer to insert + * @list: list to use * * Place a packet after a given packet in a list. The list locks are taken * and this function is atomic with respect to other list locked calls. * A buffer cannot be placed on two lists at the same time. */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) +void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_append(old, newsk, list); + spin_unlock_irqrestore(&list->lock, flags); } @@ -1393,19 +1382,21 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk) * skb_insert - insert a buffer * @old: buffer to insert before * @newsk: buffer to insert + * @list: list to use + * + * Place a packet before a given packet in a list. The list locks are + * taken and this function is atomic with respect to other list locked + * calls. * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls * A buffer cannot be placed on two lists at the same time. */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) +void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { unsigned long flags; - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); + spin_lock_irqsave(&list->lock, flags); + __skb_insert(newsk, old->prev, old, list); + spin_unlock_irqrestore(&list->lock, flags); } #if 0 diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index acdd18e6adb2..0c30409fe9e5 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1763,7 +1763,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock, nskb = skb->next; if (skb->len == 0) { - skb_unlink(skb); + skb_unlink(skb, queue); kfree_skb(skb); /* * N.B. Don't refer to skb or cb after this point diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 8cce1fdbda90..e0bebf4bbcad 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -479,7 +479,7 @@ int dn_nsp_check_xmit_queue(struct sock *sk, struct sk_buff *skb, struct sk_buff xmit_count = cb2->xmit_count; segnum = cb2->segnum; /* Remove and drop ack'ed packet */ - skb_unlink(ack); + skb_unlink(ack, q); kfree_skb(ack); ack = NULL; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index de691e119e17..b807a314269e 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -869,7 +869,7 @@ static void aun_tx_ack(unsigned long seq, int result) foundit: tx_result(skb->sk, eb->cookie, result); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); spin_unlock_irqrestore(&aun_queue_lock, flags); kfree_skb(skb); } @@ -947,7 +947,7 @@ static void ab_cleanup(unsigned long h) { tx_result(skb->sk, eb->cookie, ECTYPE_TRANSMIT_NOT_PRESENT); - skb_unlink(skb); + skb_unlink(skb, &aun_queue); kfree_skb(skb); } skb = newskb; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 69b1fcf70077..d2696af46c70 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -975,7 +975,7 @@ do_fault: if (!skb->len) { if (sk->sk_send_head == skb) sk->sk_send_head = NULL; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53a8a5399f1e..ffa24025cd02 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2085,7 +2085,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt seq_rtt = now - scb->when; tcp_dec_pcount_approx(&tp->fackets_out, skb); tcp_packets_out_dec(tp, skb); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_write_queue); sk_stream_free_skb(sk, skb); } @@ -2853,7 +2853,7 @@ static void tcp_ofo_queue(struct sock *sk) if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { SOCK_DEBUG(sk, "ofo packet was already received \n"); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __kfree_skb(skb); continue; } @@ -2861,7 +2861,7 @@ static void tcp_ofo_queue(struct sock *sk) tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &tp->out_of_order_queue); __skb_queue_tail(&sk->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; if(skb->h.th->fin) @@ -3027,7 +3027,7 @@ drop: u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (seq == TCP_SKB_CB(skb1)->end_seq) { - __skb_append(skb1, skb); + __skb_append(skb1, skb, &tp->out_of_order_queue); if (!tp->rx_opt.num_sacks || tp->selective_acks[0].end_seq != seq) @@ -3071,7 +3071,7 @@ drop: tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq); break; } - __skb_unlink(skb1, skb1->list); + __skb_unlink(skb1, &tp->out_of_order_queue); tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); __kfree_skb(skb1); } @@ -3088,8 +3088,9 @@ add_sack: * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff *head, - struct sk_buff *tail, u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, + struct sk_buff *head, struct sk_buff *tail, + u32 start, u32 end) { struct sk_buff *skb; @@ -3099,7 +3100,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3145,7 +3146,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, nskb->mac.raw = nskb->head + (skb->mac.raw-skb->head); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, skb->list); + __skb_insert(nskb, skb->prev, skb, list); sk_stream_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -3164,7 +3165,7 @@ tcp_collapse(struct sock *sk, struct sk_buff *head, } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { struct sk_buff *next = skb->next; - __skb_unlink(skb, skb->list); + __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS_BH(LINUX_MIB_TCPRCVCOLLAPSED); skb = next; @@ -3200,7 +3201,8 @@ static void tcp_collapse_ofo_queue(struct sock *sk) if (skb == (struct sk_buff *)&tp->out_of_order_queue || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, head, skb, start, end); + tcp_collapse(sk, &tp->out_of_order_queue, + head, skb, start, end); head = skb; if (skb == (struct sk_buff *)&tp->out_of_order_queue) break; @@ -3237,7 +3239,8 @@ static int tcp_prune_queue(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss); tcp_collapse_ofo_queue(sk); - tcp_collapse(sk, sk->sk_receive_queue.next, + tcp_collapse(sk, &sk->sk_receive_queue, + sk->sk_receive_queue.next, (struct sk_buff*)&sk->sk_receive_queue, tp->copied_seq, tp->rcv_nxt); sk_stream_mem_reclaim(sk); @@ -3462,7 +3465,7 @@ static void tcp_check_urg(struct sock * sk, struct tcphdr * th) struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); tp->copied_seq++; if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dd30dd137b74..a4d1eb9a0926 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -505,7 +505,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -893,7 +893,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* Link BUFF into the send queue. */ skb_header_release(buff); - __skb_append(skb, buff); + __skb_append(skb, buff, &sk->sk_write_queue); return 0; } @@ -1238,7 +1238,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m tcp_skb_pcount(next_skb) != 1); /* Ok. We will be able to collapse the packet. */ - __skb_unlink(next_skb, next_skb->list); + __skb_unlink(next_skb, &sk->sk_write_queue); memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index 6dafbb43b529..eb65b4925b51 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -988,9 +988,6 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command) IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; @@ -1063,9 +1060,6 @@ void irlap_resend_rejected_frame(struct irlap_cb *self, int command) IRDA_DEBUG(0, "%s(), unable to copy\n", __FUNCTION__); return; } - /* Unlink tx_skb from list */ - tx_skb->next = tx_skb->prev = NULL; - tx_skb->list = NULL; /* Clear old Nr field + poll bit */ tx_skb->data[1] &= 0x0f; diff --git a/net/lapb/lapb_subr.c b/net/lapb/lapb_subr.c index 5de05a0bc0ff..8b5eefd70f03 100644 --- a/net/lapb/lapb_subr.c +++ b/net/lapb/lapb_subr.c @@ -78,7 +78,7 @@ void lapb_requeue_frames(struct lapb_cb *lapb) if (!skb_prev) skb_queue_head(&lapb->write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &lapb->write_queue); skb_prev = skb; } } diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 20b4cfebd74c..f49b82da8264 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -714,7 +714,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, if (uaddr) memcpy(uaddr, llc_ui_skb_cb(skb), sizeof(*uaddr)); msg->msg_namelen = sizeof(*uaddr); - if (!skb->list) { + if (!skb->next) { dgram_free: kfree_skb(skb); } diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index eba812a9c69c..571548619469 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -71,7 +71,11 @@ int llc_conn_state_process(struct sock *sk, struct sk_buff *skb) if (!ev->ind_prim && !ev->cfm_prim) { /* indicate or confirm not required */ - if (!skb->list) + /* XXX this is not very pretty, perhaps we should store + * XXX indicate/confirm-needed state in the llc_conn_state_ev + * XXX control block of the SKB instead? -DaveM + */ + if (!skb->next) goto out_kfree_skb; goto out_skb_put; } diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 0627347b14b8..252c1b3ecd78 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -77,7 +77,7 @@ void nr_requeue_frames(struct sock *sk) if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7db7e1cedc3a..ae135e27799b 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -74,7 +74,7 @@ void rose_requeue_frames(struct sock *sk) if (skb_prev == NULL) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 091a66f06a35..4454afe4727e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4892,7 +4892,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsk->sk_receive_queue); __skb_queue_tail(&newsk->sk_receive_queue, skb); } } @@ -4921,7 +4921,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) { event = sctp_skb2event(skb); if (event->asoc == assoc) { - __skb_unlink(skb, skb->list); + __skb_unlink(skb, &oldsp->pd_lobby); __skb_queue_tail(queue, skb); } } diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 8bbc279d6c99..ec2c857eae7f 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -50,9 +50,9 @@ /* Forward declarations for internal helpers. */ static struct sctp_ulpevent * sctp_ulpq_reasm(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *); + struct sctp_ulpevent *); static struct sctp_ulpevent * sctp_ulpq_order(struct sctp_ulpq *, - struct sctp_ulpevent *); + struct sctp_ulpevent *); /* 1st Level Abstractions */ @@ -125,7 +125,9 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, event = sctp_ulpq_order(ulpq, event); } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); @@ -158,14 +160,18 @@ static int sctp_ulpq_clear_pd(struct sctp_ulpq *ulpq) return sctp_clear_pd(ulpq->asoc->base.sk); } - - +/* If the SKB of 'event' is on a list, it is the first such member + * of that list. + */ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { struct sock *sk = ulpq->asoc->base.sk; - struct sk_buff_head *queue; + struct sk_buff_head *queue, *skb_list; + struct sk_buff *skb = sctp_event2skb(event); int clear_pd = 0; + skb_list = (struct sk_buff_head *) skb->prev; + /* If the socket is just going to throw this away, do not * even try to deliver it. */ @@ -197,10 +203,10 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) /* If we are harvesting multiple skbs they will be * collected on a list. */ - if (sctp_event2skb(event)->list) - sctp_skb_list_tail(sctp_event2skb(event)->list, queue); + if (skb_list) + sctp_skb_list_tail(skb_list, queue); else - __skb_queue_tail(queue, sctp_event2skb(event)); + __skb_queue_tail(queue, skb); /* Did we just complete partial delivery and need to get * rolling again? Move pending data to the receive @@ -214,10 +220,11 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) return 1; out_free: - if (sctp_event2skb(event)->list) - sctp_queue_purge_ulpevents(sctp_event2skb(event)->list); + if (skb_list) + sctp_queue_purge_ulpevents(skb_list); else sctp_ulpevent_free(event); + return 0; } @@ -269,7 +276,7 @@ static inline void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, * payload was fragmented on the way and ip had to reassemble them. * We add the rest of skb's to the first skb's fraglist. */ -static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, struct sk_buff *l_frag) +static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *queue, struct sk_buff *f_frag, struct sk_buff *l_frag) { struct sk_buff *pos; struct sctp_ulpevent *event; @@ -294,7 +301,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, skb_shinfo(f_frag)->frag_list = pos; /* Remove the first fragment from the reassembly queue. */ - __skb_unlink(f_frag, f_frag->list); + __skb_unlink(f_frag, queue); while (pos) { pnext = pos->next; @@ -304,7 +311,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff *f_frag, f_frag->data_len += pos->len; /* Remove the fragment from the reassembly queue. */ - __skb_unlink(pos, pos->list); + __skb_unlink(pos, queue); /* Break if we have reached the last fragment. */ if (pos == l_frag) @@ -375,7 +382,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_u done: return retval; found: - retval = sctp_make_reassembled_event(first_frag, pos); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, pos); if (retval) retval->msg_flags |= MSG_EOR; goto done; @@ -435,7 +442,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); if (retval && is_last) retval->msg_flags |= MSG_EOR; @@ -527,7 +534,7 @@ static inline struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *u * further. */ done: - retval = sctp_make_reassembled_event(first_frag, last_frag); + retval = sctp_make_reassembled_event(&ulpq->reasm, first_frag, last_frag); return retval; } @@ -537,6 +544,7 @@ done: static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) { + struct sk_buff_head *event_list; struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; struct sctp_stream *in; @@ -547,6 +555,8 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, ssn = event->ssn; in = &ulpq->asoc->ssnmap->in; + event_list = (struct sk_buff_head *) sctp_event2skb(event)->prev; + /* We are holding the chunks by stream, by SSN. */ sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; @@ -567,10 +577,10 @@ static inline void sctp_ulpq_retrieve_ordered(struct sctp_ulpq *ulpq, /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, sid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(event_list, pos); } } @@ -626,7 +636,7 @@ static inline void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, - struct sctp_ulpevent *event) + struct sctp_ulpevent *event) { __u16 sid, ssn; struct sctp_stream *in; @@ -667,7 +677,7 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) { struct sk_buff *pos, *tmp; struct sctp_ulpevent *cevent; - struct sctp_ulpevent *event = NULL; + struct sctp_ulpevent *event; struct sctp_stream *in; struct sk_buff_head temp; __u16 csid, cssn; @@ -675,6 +685,8 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) in = &ulpq->asoc->ssnmap->in; /* We are holding the chunks by stream, by SSN. */ + skb_queue_head_init(&temp); + event = NULL; sctp_skb_for_each(pos, &ulpq->lobby, tmp) { cevent = (struct sctp_ulpevent *) pos->cb; csid = cevent->stream; @@ -686,19 +698,20 @@ static inline void sctp_ulpq_reap_ordered(struct sctp_ulpq *ulpq) /* Found it, so mark in the ssnmap. */ sctp_ssn_next(in, csid); - __skb_unlink(pos, pos->list); + __skb_unlink(pos, &ulpq->lobby); if (!event) { /* Create a temporary list to collect chunks on. */ event = sctp_skb2event(pos); - skb_queue_head_init(&temp); __skb_queue_tail(&temp, sctp_event2skb(event)); } else { /* Attach all gathered skbs to the event. */ - __skb_queue_tail(sctp_event2skb(event)->list, pos); + __skb_queue_tail(&temp, pos); } } - /* Send event to the ULP. */ + /* Send event to the ULP. 'event' is the sctp_ulpevent for + * very first SKB on the 'temp' list. + */ if (event) sctp_ulpq_tail_event(ulpq, event); } diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 4bd95c8f5934..46252d2807bb 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -286,16 +286,16 @@ void unix_gc(void) skb = skb_peek(&s->sk_receive_queue); while (skb && skb != (struct sk_buff *)&s->sk_receive_queue) { - nextsk=skb->next; + nextsk = skb->next; /* * Do we have file descriptors ? */ - if(UNIXCB(skb).fp) - { - __skb_unlink(skb, skb->list); - __skb_queue_tail(&hitlist,skb); + if (UNIXCB(skb).fp) { + __skb_unlink(skb, + &s->sk_receive_queue); + __skb_queue_tail(&hitlist, skb); } - skb=nextsk; + skb = nextsk; } spin_unlock(&s->sk_receive_queue.lock); } diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 7fd872ad0c20..e20cfadad4d9 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -80,7 +80,7 @@ void x25_requeue_frames(struct sock *sk) if (!skb_prev) skb_queue_head(&sk->sk_write_queue, skb); else - skb_append(skb_prev, skb); + skb_append(skb_prev, skb, &sk->sk_write_queue); skb_prev = skb; } } -- cgit v1.2.3 From abc3bc58047efa72ee9c2e208cbeb73d261ad703 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:25:56 -0700 Subject: [NET]: Kill skb->tc_classid Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- include/net/act_api.h | 2 +- net/core/skbuff.c | 2 -- net/sched/act_api.c | 7 +------ net/sched/gact.c | 2 +- net/sched/ipt.c | 2 +- net/sched/mirred.c | 2 +- net/sched/pedit.c | 2 +- net/sched/police.c | 3 ++- net/sched/simple.c | 2 +- 10 files changed, 9 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 76c68851474c..f10a8b9628b0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ struct skb_shared_info { * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @tc_classid: traffic control classid */ struct sk_buff { @@ -275,9 +274,7 @@ struct sk_buff { __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u32 tc_verd; /* traffic control verdict */ - __u32 tc_classid; /* traffic control classid */ #endif - #endif diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a995f576..b55eb7c7f033 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -63,7 +63,7 @@ struct tc_action_ops __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; - int (*act)(struct sk_buff **, struct tc_action *); + int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e6564b0a6839..8896e6f8aa42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -300,7 +300,6 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_index = 0; #ifdef CONFIG_NET_CLS_ACT skb->tc_verd = 0; - skb->tc_classid = 0; #endif #endif @@ -376,7 +375,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); - C(tc_classid); #endif #endif diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 249c61936ea0..c896a0118a32 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -165,7 +165,7 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, while ((a = act) != NULL) { repeat: if (a->ops && a->ops->act) { - ret = a->ops->act(&skb, a); + ret = a->ops->act(&skb, a, res); if (TC_MUNGED & skb->tc_verd) { /* copied already, allow trampling */ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -179,11 +179,6 @@ repeat: act = a->next; } exec_done: - if (skb->tc_classid > 0) { - res->classid = skb->tc_classid; - res->class = 0; - skb->tc_classid = 0; - } return ret; } diff --git a/net/sched/gact.c b/net/sched/gact.c index a811c89fef7f..d1c6d542912a 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -135,7 +135,7 @@ tcf_gact_cleanup(struct tc_action *a, int bind) } static int -tcf_gact(struct sk_buff **pskb, struct tc_action *a) +tcf_gact(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_gact *p = PRIV(a, gact); struct sk_buff *skb = *pskb; diff --git a/net/sched/ipt.c b/net/sched/ipt.c index b114d994d523..f50136eed211 100644 --- a/net/sched/ipt.c +++ b/net/sched/ipt.c @@ -201,7 +201,7 @@ tcf_ipt_cleanup(struct tc_action *a, int bind) } static int -tcf_ipt(struct sk_buff **pskb, struct tc_action *a) +tcf_ipt(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; struct tcf_ipt *p = PRIV(a, ipt); diff --git a/net/sched/mirred.c b/net/sched/mirred.c index f309ce336803..20d06916dc0b 100644 --- a/net/sched/mirred.c +++ b/net/sched/mirred.c @@ -158,7 +158,7 @@ tcf_mirred_cleanup(struct tc_action *a, int bind) } static int -tcf_mirred(struct sk_buff **pskb, struct tc_action *a) +tcf_mirred(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_mirred *p = PRIV(a, mirred); struct net_device *dev; diff --git a/net/sched/pedit.c b/net/sched/pedit.c index 678be6a645fb..767d24f4610e 100644 --- a/net/sched/pedit.c +++ b/net/sched/pedit.c @@ -130,7 +130,7 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) } static int -tcf_pedit(struct sk_buff **pskb, struct tc_action *a) +tcf_pedit(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct tcf_pedit *p = PRIV(a, pedit); struct sk_buff *skb = *pskb; diff --git a/net/sched/police.c b/net/sched/police.c index c03545faf523..eb39fb2f39b6 100644 --- a/net/sched/police.c +++ b/net/sched/police.c @@ -284,7 +284,8 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a) +static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a, + struct tcf_result *res) { psched_time_t now; struct sk_buff *skb = *pskb; diff --git a/net/sched/simple.c b/net/sched/simple.c index 3ab4c675ab5d..8a6ae4f491e8 100644 --- a/net/sched/simple.c +++ b/net/sched/simple.c @@ -44,7 +44,7 @@ static DEFINE_RWLOCK(simp_lock); #include #include -static int tcf_simp(struct sk_buff **pskb, struct tc_action *a) +static int tcf_simp(struct sk_buff **pskb, struct tc_action *a, struct tcf_result *res) { struct sk_buff *skb = *pskb; struct tcf_defact *p = PRIV(a, defact); -- cgit v1.2.3 From ac3247baf8ecadf168642e3898b0212c29c79715 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:28:03 -0700 Subject: [NETFILTER]: connection tracking event notifiers This adds a notifier chain based event mechanism for ip_conntrack state changes. As opposed to the previous implementations in patch-o-matic, we do no longer need a field in the skb to achieve this. Thanks to the valuable input from Patrick McHardy and Rusty on the idea of a per_cpu implementation. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 144 +++++++++++++++++++++++ include/linux/netfilter_ipv4/ip_conntrack_core.h | 17 ++- net/ipv4/netfilter/Kconfig | 10 ++ net/ipv4/netfilter/ip_conntrack_core.c | 122 ++++++++++++++++++- net/ipv4/netfilter/ip_conntrack_ftp.c | 12 +- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 1 + net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 2 + net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 4 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 3 +- net/ipv4/netfilter/ip_conntrack_standalone.c | 10 ++ 10 files changed, 311 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ed720f0c4cd..ae1270c97b50 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -65,6 +65,63 @@ enum ip_conntrack_status { /* Both together */ IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), + + /* Connection is dying (removed from lists), can not be unset. */ + IPS_DYING_BIT = 9, + IPS_DYING = (1 << IPS_DYING_BIT), +}; + +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), }; #ifdef __KERNEL__ @@ -280,6 +337,11 @@ static inline int is_confirmed(struct ip_conntrack *ct) return test_bit(IPS_CONFIRMED_BIT, &ct->status); } +static inline int is_dying(struct ip_conntrack *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + extern unsigned int ip_conntrack_htable_size; struct ip_conntrack_stat @@ -303,6 +365,88 @@ struct ip_conntrack_stat #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#include + +struct ip_conntrack_ecache { + struct ip_conntrack *ct; + unsigned int events; +}; +DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x) + +extern struct notifier_block *ip_conntrack_chain; +extern struct notifier_block *ip_conntrack_expect_chain; + +static inline int ip_conntrack_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_chain, nb); +} + +static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_chain, nb); +} + +static inline int +ip_conntrack_expect_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_expect_chain, nb); +} + +static inline int +ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); +} + +static inline void +ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { + if (net_ratelimit()) { + printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); + dump_stack(); + } + } + ecache->events |= event; +} + +extern void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); +extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); + +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) +{ + if (is_confirmed(ct) && !is_dying(ct)) + notifier_call_chain(&ip_conntrack_chain, event, ct); +} + +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) +{ + notifier_call_chain(&ip_conntrack_expect_chain, event, exp); +} +#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */ +static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) {} +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) {} +static inline void ip_conntrack_deliver_cached_events_for( + struct ip_conntrack *ct) {} +static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) {} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + #ifdef CONFIG_IP_NF_NAT_NEEDED static inline int ip_nat_initialized(struct ip_conntrack *conntrack, enum ip_nat_manip_type manip) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 694aec9b4784..46eeea1e2733 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -38,12 +38,21 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int ip_conntrack_confirm(struct sk_buff **pskb) { - if ((*pskb)->nfct - && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) - return __ip_conntrack_confirm(pskb); - return NF_ACCEPT; + struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; + int ret = NF_ACCEPT; + + if (ct && !is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_conntrack_deliver_cached_events_for(ct); + + return ret; } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct ip_conntrack_ecache; +extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); +#endif + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 46d4cb1c06f0..ff3393eba924 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -40,6 +40,16 @@ config IP_NF_CONNTRACK_MARK of packets, but this mark value is kept in the conntrack session instead of the individual packets. +config IP_NF_CONNTRACK_EVENTS + bool "Connection tracking events" + depends on IP_NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + provide a notifier chain that can be used by other kernel code + to get notified about changes in the connection tracking state. + + IF unsure, say `N'. + config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 04c3414361d4..caf89deae116 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -37,6 +37,7 @@ #include #include #include +#include /* ip_conntrack_lock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -49,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.1" +#define IP_CONNTRACK_VERSION "2.2" #if 0 #define DEBUGP printk @@ -76,6 +77,81 @@ unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct notifier_block *ip_conntrack_chain; +struct notifier_block *ip_conntrack_expect_chain; + +DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) + notifier_call_chain(&ip_conntrack_chain, ecache->events, + ecache->ct); + ecache->events = 0; +} + +void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) +{ + __deliver_cached_events(ecache); +} + +/* Deliver all cached events for a particular conntrack. This is called + * by code prior to async packet handling or freeing the skb */ +void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (!ct) + return; + + if (ecache->ct == ct) { + DEBUGP("ecache: delivering event for %p\n", ct); + __deliver_cached_events(ecache); + } else { + if (net_ratelimit()) + printk(KERN_WARNING "ecache: want to deliver for %p, " + "but cache has %p\n", ct, ecache->ct); + } + + /* signalize that events have already been delivered */ + ecache->ct = NULL; +} + +/* Deliver cached events for old pending events, if current conntrack != old */ +void ip_conntrack_event_cache_init(const struct sk_buff *skb) +{ + struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + /* take care of delivering potentially old events */ + if (ecache->ct != ct) { + enum ip_conntrack_info ctinfo; + /* we have to check, since at startup the cache is NULL */ + if (likely(ecache->ct)) { + DEBUGP("ecache: entered for different conntrack: " + "ecache->ct=%p, skb->nfct=%p. delivering " + "events\n", ecache->ct, ct); + __deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + } else { + DEBUGP("ecache: entered for conntrack %p, " + "cache was clean before\n", ct); + } + + /* initialize for this conntrack/packet */ + ecache->ct = ip_conntrack_get(skb, &ctinfo); + /* ecache->events cleared by __deliver_cached_devents() */ + } else { + DEBUGP("ecache: re-entered for conntrack %p.\n", ct); + } +} + +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); void @@ -223,6 +299,8 @@ destroy_conntrack(struct nf_conntrack *nfct) IP_NF_ASSERT(atomic_read(&nfct->use) == 0); IP_NF_ASSERT(!timer_pending(&ct->timeout)); + set_bit(IPS_DYING_BIT, &ct->status); + /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -261,6 +339,7 @@ static void death_by_timeout(unsigned long ul_conntrack) { struct ip_conntrack *ct = (void *)ul_conntrack; + ip_conntrack_event(IPCT_DESTROY, ct); write_lock_bh(&ip_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ @@ -374,6 +453,16 @@ __ip_conntrack_confirm(struct sk_buff **pskb) set_bit(IPS_CONFIRMED_BIT, &ct->status); CONNTRACK_STAT_INC(insert); write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); +#ifdef CONFIG_IP_NF_NAT_NEEDED + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); +#endif + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; } @@ -607,7 +696,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; struct ip_conntrack_protocol *proto; - int set_reply; + int set_reply = 0; int ret; /* Previously seen (loopback or untracked)? Ignore. */ @@ -666,6 +755,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, IP_NF_ASSERT((*pskb)->nfct); + ip_conntrack_event_cache_init(*pskb); + ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells @@ -676,8 +767,8 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return -ret; } - if (set_reply) - set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_STATUS, *pskb); return ret; } @@ -824,6 +915,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) evict_oldest_expect(expect->master); ip_conntrack_expect_insert(expect); + ip_conntrack_expect_event(IPEXP_NEW, expect); ret = 0; out: write_unlock_bh(&ip_conntrack_lock); @@ -861,8 +953,10 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { - if (tuplehash_to_ctrack(i)->helper == me) + if (tuplehash_to_ctrack(i)->helper == me) { + ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; + } return 0; } @@ -924,6 +1018,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); @@ -1012,6 +1107,23 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) ip_conntrack_put(ct); } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + { + /* we need to deliver all cached events in order to drop + * the reference counts */ + int cpu; + for_each_cpu(cpu) { + struct ip_conntrack_ecache *ecache = + &per_cpu(ip_conntrack_ecache, cpu); + if (ecache->ct) { + __ip_ct_deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + ecache->ct = NULL; + } + } + } +#endif } /* Fast function for those who don't want to parse /proc (and I don't diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 7a3b773be3f9..9658896f899a 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -262,7 +262,8 @@ static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) +static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, + struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -276,10 +277,13 @@ static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir) oldest = i; } - if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - else if (oldest != NUM_SEQ_TO_REMEMBER) + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } else if (oldest != NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][oldest] = nl_seq; + ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + } } static int help(struct sk_buff **pskb, @@ -439,7 +443,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info,dir); + update_nl_seq(seq, ct_ftp_info,dir, *pskb); out: spin_unlock_bh(&ip_ftp_lock); return ret; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 602c74db3252..dca1f63d6f51 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -102,6 +102,7 @@ static int icmp_packet(struct ip_conntrack *ct, ct->timeout.function((unsigned long)ct); } else { atomic_inc(&ct->proto.icmp.count); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 31d75390bf12..3d5f878a07d1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -404,6 +404,8 @@ static int sctp_packet(struct ip_conntrack *conntrack, } conntrack->proto.sctp.state = newconntrack; + if (oldsctpstate != newconntrack) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); write_unlock_bh(&sctp_lock); } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 809dfed766d4..a569ad1ee4d9 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -973,6 +973,10 @@ static int tcp_packet(struct ip_conntrack *conntrack, ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); + ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + if (new_state != old_state) + ip_conntrack_event_cache(IPCT_PROTOINFO, skb); + if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { /* If only reply is a RST, we can consider ourselves not to have an established connection: this is a fairly common diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 8c1eaba098d4..6066eaf4d825 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -73,7 +73,8 @@ static int udp_packet(struct ip_conntrack *conntrack, ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); + if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) + ip_conntrack_event_cache(IPCT_STATUS, skb); } else ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dccd4abab7ae..f0880004115d 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -402,6 +402,7 @@ static unsigned int ip_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + ip_conntrack_event_cache_init(*pskb); /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(pskb); } @@ -419,6 +420,7 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, ct = ip_conntrack_get(*pskb, &ctinfo); if (ct && ct->helper) { unsigned int ret; + ip_conntrack_event_cache_init(*pskb); ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) return ret; @@ -889,6 +891,7 @@ static int init_or_cleanup(int init) return ret; cleanup: + synchronize_net(); #ifdef CONFIG_SYSCTL unregister_sysctl_table(ip_ct_sysctl_header); cleanup_localinops: @@ -971,6 +974,13 @@ void need_ip_conntrack(void) { } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +EXPORT_SYMBOL_GPL(ip_conntrack_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); +EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); +EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); +EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); +#endif EXPORT_SYMBOL(ip_conntrack_protocol_register); EXPORT_SYMBOL(ip_conntrack_protocol_unregister); EXPORT_SYMBOL(ip_ct_get_tuple); -- cgit v1.2.3 From f9e815b376dc19e6afc551cd755ac64e9e42d81f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:30:24 -0700 Subject: [NETFITLER]: Add nfnetlink layer. Introduce "nfnetlink" (netfilter netlink) layer. This layer is used as transport layer for all userspace communication of the new upcoming netfilter subsystems, such as ctnetlink, nfnetlink_queue and some day even the mythical pkttables ;) Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 145 +++++++++++++++ net/Kconfig | 2 + net/Makefile | 1 + net/netfilter/Kconfig | 5 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink.c | 343 ++++++++++++++++++++++++++++++++++++ 6 files changed, 497 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink.h create mode 100644 net/netfilter/Kconfig create mode 100644 net/netfilter/Makefile create mode 100644 net/netfilter/nfnetlink.c (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h new file mode 100644 index 000000000000..8f1bfb8d650b --- /dev/null +++ b/include/linux/netfilter/nfnetlink.h @@ -0,0 +1,145 @@ +#ifndef _NFNETLINK_H +#define _NFNETLINK_H +#include + +/* nfnetlink groups: Up to 32 maximum */ +#define NF_NETLINK_CONNTRACK_NEW 0x00000001 +#define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 +#define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 +#define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 +#define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 +#define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 + +/* Generic structure for encapsulation optional netfilter information. + * It is reminiscent of sockaddr, but with sa_family replaced + * with attribute type. + * ! This should someday be put somewhere generic as now rtnetlink and + * ! nfnetlink use the same attributes methods. - J. Schulist. + */ + +struct nfattr +{ + u_int16_t nfa_len; + u_int16_t nfa_type; +} __attribute__ ((packed)); + +/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time + * to put this in a generic file */ + +#define NFA_ALIGNTO 4 +#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) +#define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \ + && (nfa)->nfa_len <= (len)) +#define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \ + (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len))) +#define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len)) +#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len)) +#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) +#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) +#define NFA_NEST(skb, type) \ +({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ + NFA_PUT(skb, type, 0, NULL); \ + __start; }) +#define NFA_NEST_END(skb, start) \ +({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) +#define NFA_NEST_CANCEL(skb, start) \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + u_int8_t nfgen_family; /* AF_xxx */ + u_int8_t version; /* nfnetlink version */ + u_int16_t res_id; /* resource id */ +} __attribute__ ((packed)); + +#define NFNETLINK_V1 1 + +#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) +#define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg)) + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +enum nfnl_subsys_id { + NFNL_SUBSYS_NONE = 0, + NFNL_SUBSYS_CTNETLINK, + NFNL_SUBSYS_CTNETLINK_EXP, + NFNL_SUBSYS_IPTNETLINK, + NFNL_SUBSYS_QUEUE, + NFNL_SUBSYS_ULOG, + NFNL_SUBSYS_COUNT, +}; + +#ifdef __KERNEL__ + +#include + +struct nfnl_callback +{ + kernel_cap_t cap_required; /* capabilities required for this msg */ + int (*call)(struct sock *nl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); +}; + +struct nfnetlink_subsystem +{ + const char *name; + __u8 subsys_id; /* nfnetlink subsystem ID */ + __u8 cb_count; /* number of callbacks */ + u_int32_t attr_count; /* number of nfattr's */ + struct nfnl_callback *cb; /* callback for individual types */ +}; + +extern void __nfa_fill(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +#define NFA_PUT(skb, attrtype, attrlen, data) \ +({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ + __nfa_fill(skb, attrtype, attrlen, data); }) + +extern struct semaphore nfnl_sem; + +#define nfnl_shlock() down(&nfnl_sem) +#define nfnl_shlock_nowait() down_trylock(&nfnl_sem) + +#define nfnl_shunlock() do { up(&nfnl_sem); \ + if(nfnl && nfnl->sk_receive_queue.qlen) \ + nfnl->sk_data_ready(nfnl, 0); \ + } while(0) + +extern void nfnl_lock(void); +extern void nfnl_unlock(void); + +extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); +extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); + +extern int nfattr_parse(struct nfattr *tb[], int maxattr, + struct nfattr *nfa, int len); + +#define nfattr_parse_nested(tb, max, nfa) \ + nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa))) + +#define nfattr_bad_size(tb, max, cta_min) \ +({ int __i, __res = 0; \ + for (__i=0; __i, + * (C) 2002-2005 by Harald Welte + * (C) 2005 by Pablo Neira Ayuso + * + * Initial netfilter messages via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata nfversion[] = "0.30"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static struct sock *nfnl = NULL; +static struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; +DECLARE_MUTEX(nfnl_sem); + +void nfnl_lock(void) +{ + nfnl_shlock(); +} + +void nfnl_unlock(void) +{ + nfnl_shunlock(); +} + +int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) +{ + DEBUGP("registering subsystem ID %u\n", n->subsys_id); + + /* If the netlink socket wasn't created, then fail */ + if (!nfnl) + return -1; + + nfnl_lock(); + subsys_table[n->subsys_id] = n; + nfnl_unlock(); + + return 0; +} + +int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n) +{ + DEBUGP("unregistering subsystem ID %u\n", n->subsys_id); + + nfnl_lock(); + subsys_table[n->subsys_id] = NULL; + nfnl_unlock(); + + return 0; +} + +static inline struct nfnetlink_subsystem *nfnetlink_get_subsys(u_int16_t type) +{ + u_int8_t subsys_id = NFNL_SUBSYS_ID(type); + + if (subsys_id >= NFNL_SUBSYS_COUNT + || subsys_table[subsys_id] == NULL) + return NULL; + + return subsys_table[subsys_id]; +} + +static inline struct nfnl_callback * +nfnetlink_find_client(u_int16_t type, struct nfnetlink_subsystem *ss) +{ + u_int8_t cb_id = NFNL_MSG_TYPE(type); + + if (cb_id >= ss->cb_count) { + DEBUGP("msgtype %u >= %u, returning\n", type, ss->cb_count); + return NULL; + } + + return &ss->cb[cb_id]; +} + +void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, + const void *data) +{ + struct nfattr *nfa; + int size = NFA_LENGTH(attrlen); + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = attrtype; + nfa->nfa_len = size; + memcpy(NFA_DATA(nfa), data, attrlen); +} + +int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) +{ + memset(tb, 0, sizeof(struct nfattr *) * maxattr); + + while (NFA_OK(nfa, len)) { + unsigned flavor = nfa->nfa_type; + if (flavor && flavor <= maxattr) + tb[flavor-1] = nfa; + nfa = NFA_NEXT(nfa, len); + } + + return 0; +} + +/** + * nfnetlink_check_attributes - check and parse nfnetlink attributes + * + * subsys: nfnl subsystem for which this message is to be parsed + * nlmsghdr: netlink message to be checked/parsed + * cda: array of pointers, needs to be at least subsys->attr_count big + * + */ +static int +nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, + struct nlmsghdr *nlh, struct nfattr *cda[]) +{ + int min_len; + + memset(cda, 0, sizeof(struct nfattr *) * subsys->attr_count); + + /* check attribute lengths. */ + min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); + if (nlh->nlmsg_len < min_len) + return -EINVAL; + + if (nlh->nlmsg_len > min_len) { + struct nfattr *attr = NFM_NFA(NLMSG_DATA(nlh)); + int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); + + while (NFA_OK(attr, attrlen)) { + unsigned flavor = attr->nfa_type; + if (flavor) { + if (flavor > subsys->attr_count) + return -EINVAL; + cda[flavor - 1] = attr; + } + attr = NFA_NEXT(attr, attrlen); + } + } else + return -EINVAL; + + return 0; +} + +int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +{ + int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; + int err = 0; + + NETLINK_CB(skb).dst_groups = group; + if (echo) + atomic_inc(&skb->users); + netlink_broadcast(nfnl, skb, pid, group, allocation); + if (echo) + err = netlink_unicast(nfnl, skb, pid, MSG_DONTWAIT); + + return err; +} + +int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) +{ + return netlink_unicast(nfnl, skb, pid, flags); +} + +/* Process one complete nfnetlink message. */ +static inline int nfnetlink_rcv_msg(struct sk_buff *skb, + struct nlmsghdr *nlh, int *errp) +{ + struct nfnl_callback *nc; + struct nfnetlink_subsystem *ss; + int type, err = 0; + + DEBUGP("entered; subsys=%u, msgtype=%u\n", + NFNL_SUBSYS_ID(nlh->nlmsg_type), + NFNL_MSG_TYPE(nlh->nlmsg_type)); + + /* Only requests are handled by kernel now. */ + if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { + DEBUGP("received non-request message\n"); + return 0; + } + + /* All the messages must at least contain nfgenmsg */ + if (nlh->nlmsg_len < + NLMSG_LENGTH(NLMSG_ALIGN(sizeof(struct nfgenmsg)))) { + DEBUGP("received message was too short\n"); + return 0; + } + + type = nlh->nlmsg_type; + ss = nfnetlink_get_subsys(type); + if (!ss) + goto err_inval; + + nc = nfnetlink_find_client(type, ss); + if (!nc) { + DEBUGP("unable to find client for type %d\n", type); + goto err_inval; + } + + if (nc->cap_required && + !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { + DEBUGP("permission denied for type %d\n", type); + *errp = -EPERM; + return -1; + } + + { + struct nfattr *cda[ss->attr_count]; + + memset(cda, 0, ss->attr_count*sizeof(struct nfattr *)); + + err = nfnetlink_check_attributes(ss, nlh, cda); + if (err < 0) + goto err_inval; + + err = nc->call(nfnl, skb, nlh, cda, errp); + *errp = err; + return err; + } + +err_inval: + *errp = -EINVAL; + return -1; +} + +/* Process one packet of messages. */ +static inline int nfnetlink_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr *nlh; + + while (skb->len >= NLMSG_SPACE(0)) { + u32 rlen; + + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(struct nlmsghdr) + || skb->len < nlh->nlmsg_len) + return 0; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + if (nfnetlink_rcv_msg(skb, nlh, &err)) { + if (!err) + return -1; + netlink_ack(skb, nlh, err); + } else + if (nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, 0); + skb_pull(skb, rlen); + } + + return 0; +} + +static void nfnetlink_rcv(struct sock *sk, int len) +{ + do { + struct sk_buff *skb; + + if (nfnl_shlock_nowait()) + return; + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (nfnetlink_rcv_skb(skb)) { + if (skb->len) + skb_queue_head(&sk->sk_receive_queue, + skb); + else + kfree_skb(skb); + break; + } + kfree_skb(skb); + } + + up(&nfnl_sem); + } while(nfnl && nfnl->sk_receive_queue.qlen); +} + +void __exit nfnetlink_exit(void) +{ + printk("Removing netfilter NETLINK layer.\n"); + sock_release(nfnl->sk_socket); + return; +} + +int __init nfnetlink_init(void) +{ + printk("Netfilter messages via NETLINK v%s.\n", nfversion); + + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + if (!nfnl) { + printk(KERN_ERR "cannot initialize nfnetlink!\n"); + return -1; + } + + return 0; +} + +module_init(nfnetlink_init); +module_exit(nfnetlink_exit); + +EXPORT_SYMBOL_GPL(nfnetlink_subsys_register); +EXPORT_SYMBOL_GPL(nfnetlink_subsys_unregister); +EXPORT_SYMBOL_GPL(nfnetlink_send); +EXPORT_SYMBOL_GPL(nfnetlink_unicast); +EXPORT_SYMBOL_GPL(nfattr_parse); +EXPORT_SYMBOL_GPL(__nfa_fill); -- cgit v1.2.3 From 6f1cf16582160c4839f05007c978743911aa022b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 9 Aug 2005 19:31:17 -0700 Subject: [NET]: Remove HIPPI private from skbuff.h This removes the private element from skbuff, that is only used by HIPPI. Instead it uses skb->cb[] to hold the additional data that is needed in the output path from hard_header to device driver. PS: The only qdisc that might potentially corrupt this cb[] is if netem was used over HIPPI. I will take care of that by fixing netem to use skb->stamp. I don't expect many users of netem over HIPPI Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/rrunner.c | 3 ++- include/linux/hippidevice.h | 5 +++++ include/linux/skbuff.h | 6 ------ net/802/hippi.c | 4 +++- net/core/skbuff.c | 3 --- 5 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index 12a86f96d973..ec1a18d189a1 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -1429,6 +1429,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct rr_private *rrpriv = netdev_priv(dev); struct rr_regs __iomem *regs = rrpriv->regs; + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; struct ring_ctrl *txctrl; unsigned long flags; u32 index, len = skb->len; @@ -1460,7 +1461,7 @@ static int rr_start_xmit(struct sk_buff *skb, struct net_device *dev) ifield = (u32 *)skb_push(skb, 8); ifield[0] = 0; - ifield[1] = skb->private.ifield; + ifield[1] = hcb->ifield; /* * We don't need the lock before we are actually going to start diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9debe6bbe5f0..9bc3b688d2ee 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -26,6 +26,11 @@ #include #ifdef __KERNEL__ + +struct hippi_cb { + __u32 ifield; +}; + extern unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f10a8b9628b0..4aeadb102589 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -193,7 +193,6 @@ struct skb_shared_info { * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict */ @@ -265,11 +264,6 @@ struct sk_buff { struct nf_bridge_info *nf_bridge; #endif #endif /* CONFIG_NETFILTER */ -#if defined(CONFIG_HIPPI) - union { - __u32 ifield; - } private; -#endif #ifdef CONFIG_NET_SCHED __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT diff --git a/net/802/hippi.c b/net/802/hippi.c index 051e8af56a77..cb45ae1310cb 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -51,6 +51,7 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); + struct hippi_cb *hcb = (struct hippi_cb *) skb->cb; if (!len){ len = skb->len - HIPPI_HLEN; @@ -84,9 +85,10 @@ static int hippi_header(struct sk_buff *skb, struct net_device *dev, if (daddr) { memcpy(hip->le.dest_switch_addr, daddr + 3, 3); - memcpy(&skb->private.ifield, daddr + 2, 4); + memcpy(&hcb->ifield, daddr + 2, 4); return HIPPI_HLEN; } + hcb->ifield = 0; return -((int)HIPPI_HLEN); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 8896e6f8aa42..16df7bd77e78 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -365,9 +365,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) nf_bridge_get(skb->nf_bridge); #endif #endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif #ifdef CONFIG_NET_SCHED C(tc_index); #ifdef CONFIG_NET_CLS_ACT -- cgit v1.2.3 From 080774a243f56ce2195ace96fba3d18548ee48ce Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:32:58 -0700 Subject: [NETFILTER]: Add ctnetlink subsystem Add ctnetlink subsystem for userspace-access to ip_conntrack table. This allows reading and updating of existing entries, as well as creating new ones (and new expect's) via nfnetlink. Please note the 'strange' byte order: nfattr (tag+length) are in host byte order, while the payload is always guaranteed to be in network byte order. This allows a simple userspace process to encapsulate netlink messages into arch-independent udp packets by just processing/swapping the headers and not knowing anything about the actual payload. Signed-off-by: Harald Welte Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_conntrack.h | 123 ++ include/linux/netfilter_ipv4/ip_conntrack.h | 46 +- include/linux/netfilter_ipv4/ip_conntrack_core.h | 5 + include/linux/netfilter_ipv4/ip_conntrack_helper.h | 2 + .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 24 +- include/linux/netfilter_ipv4/ip_nat_protocol.h | 25 +- net/ipv4/netfilter/Kconfig | 7 + net/ipv4/netfilter/Makefile | 4 + net/ipv4/netfilter/ip_conntrack_core.c | 281 +++- net/ipv4/netfilter/ip_conntrack_netlink.c | 1588 ++++++++++++++++++++ net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 64 +- net/ipv4/netfilter/ip_conntrack_proto_sctp.c | 7 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 23 + net/ipv4/netfilter/ip_conntrack_proto_udp.c | 5 + net/ipv4/netfilter/ip_conntrack_standalone.c | 38 +- net/ipv4/netfilter/ip_nat_core.c | 99 +- net/ipv4/netfilter/ip_nat_proto_icmp.c | 9 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 10 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 9 +- net/ipv4/netfilter/ip_nat_proto_unknown.c | 2 +- net/ipv4/netfilter/ip_nat_standalone.c | 2 + net/netfilter/nfnetlink.c | 1 + 23 files changed, 2277 insertions(+), 100 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_conntrack.h create mode 100644 net/ipv4/netfilter/ip_conntrack_netlink.c (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 8f1bfb8d650b..ace7a7be0742 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -56,7 +56,7 @@ struct nfgenmsg { u_int16_t res_id; /* resource id */ } __attribute__ ((packed)); -#define NFNETLINK_V1 1 +#define NFNETLINK_V0 0 #define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) @@ -81,6 +81,7 @@ enum nfnl_subsys_id { #ifdef __KERNEL__ +#include #include struct nfnl_callback diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h new file mode 100644 index 000000000000..fb528e0e3bd9 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -0,0 +1,123 @@ +#ifndef _IPCONNTRACK_NETLINK_H +#define _IPCONNTRACK_NETLINK_H +#include + +enum cntl_msg_types { + IPCTNL_MSG_CT_NEW, + IPCTNL_MSG_CT_GET, + IPCTNL_MSG_CT_DELETE, + IPCTNL_MSG_CT_GET_CTRZERO, + + IPCTNL_MSG_MAX +}; + +enum ctnl_exp_msg_types { + IPCTNL_MSG_EXP_NEW, + IPCTNL_MSG_EXP_GET, + IPCTNL_MSG_EXP_DELETE, + + IPCTNL_MSG_EXP_MAX +}; + + +enum ctattr_type { + CTA_UNSPEC, + CTA_TUPLE_ORIG, + CTA_TUPLE_REPLY, + CTA_STATUS, + CTA_PROTOINFO, + CTA_HELP, + CTA_NAT, + CTA_TIMEOUT, + CTA_MARK, + CTA_COUNTERS_ORIG, + CTA_COUNTERS_REPLY, + CTA_USE, + CTA_EXPECT, + CTA_ID, + __CTA_MAX +}; +#define CTA_MAX (__CTA_MAX - 1) + +enum ctattr_tuple { + CTA_TUPLE_UNSPEC, + CTA_TUPLE_IP, + CTA_TUPLE_PROTO, + __CTA_TUPLE_MAX +}; +#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) + +enum ctattr_ip { + CTA_IP_UNSPEC, + CTA_IP_V4_SRC, + CTA_IP_V4_DST, + CTA_IP_V6_SRC, + CTA_IP_V6_DST, + __CTA_IP_MAX +}; +#define CTA_IP_MAX (__CTA_IP_MAX - 1) + +enum ctattr_l4proto { + CTA_PROTO_UNSPEC, + CTA_PROTO_NUM, + CTA_PROTO_SRC_PORT, + CTA_PROTO_DST_PORT, + CTA_PROTO_ICMP_ID, + CTA_PROTO_ICMP_TYPE, + CTA_PROTO_ICMP_CODE, + __CTA_PROTO_MAX +}; +#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) + +enum ctattr_protoinfo { + CTA_PROTOINFO_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_MAX +}; +#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) + +enum ctattr_counters { + CTA_COUNTERS_UNSPEC, + CTA_COUNTERS_PACKETS, + CTA_COUNTERS_BYTES, + __CTA_COUNTERS_MAX +}; +#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) + +enum ctattr_nat { + CTA_NAT_UNSPEC, + CTA_NAT_MINIP, + CTA_NAT_MAXIP, + CTA_NAT_PROTO, + __CTA_NAT_MAX +}; +#define CTA_NAT_MAX (__CTA_NAT_MAX - 1) + +enum ctattr_protonat { + CTA_PROTONAT_UNSPEC, + CTA_PROTONAT_PORT_MIN, + CTA_PROTONAT_PORT_MAX, + __CTA_PROTONAT_MAX +}; +#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) + +enum ctattr_expect { + CTA_EXPECT_UNSPEC, + CTA_EXPECT_TUPLE, + CTA_EXPECT_MASK, + CTA_EXPECT_TIMEOUT, + CTA_EXPECT_ID, + __CTA_EXPECT_MAX +}; +#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) + +enum ctattr_help { + CTA_HELP_UNSPEC, + CTA_HELP_NAME, + __CTA_HELP_MAX +}; +#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) + +#define CTA_HELP_MAXNAMESIZE 32 + +#endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ae1270c97b50..ff2c1c6001f9 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -209,6 +209,9 @@ struct ip_conntrack /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper, if any. */ struct ip_conntrack_helper *helper; @@ -257,6 +260,9 @@ struct ip_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -296,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) } /* decrement reference count on a conntrack */ -extern void ip_conntrack_put(struct ip_conntrack *ct); +static inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} /* call to create an explicit dependency on ip_conntrack. */ extern void need_ip_conntrack(void); @@ -331,6 +342,39 @@ extern void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), void *data); +extern struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *); +extern struct ip_conntrack_helper * +ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple); +extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper); + +extern struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol); +extern struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol); +extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto); + +extern void ip_ct_remove_expectations(struct ip_conntrack *ct); + +extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *, + struct ip_conntrack_tuple *); + +extern void ip_conntrack_free(struct ip_conntrack *ct); + +extern void ip_conntrack_hash_insert(struct ip_conntrack *ct); + +extern struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern void ip_conntrack_flush(void); + /* It's confirmed if it is, or has been in the hash table. */ static inline int is_confirmed(struct ip_conntrack *ct) { diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 46eeea1e2733..fbf6c3e41647 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -2,6 +2,9 @@ #define _IP_CONNTRACK_CORE_H #include +#define MAX_IP_CT_PROTO 256 +extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; + /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ @@ -53,6 +56,8 @@ struct ip_conntrack_ecache; extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); #endif +extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 3692daa93dec..8d69279ccfe4 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -24,6 +24,8 @@ struct ip_conntrack_helper int (*help)(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index e20b57c5e1b7..b6b99be8632a 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -2,6 +2,7 @@ #ifndef _IP_CONNTRACK_PROTOCOL_H #define _IP_CONNTRACK_PROTOCOL_H #include +#include struct seq_file; @@ -47,22 +48,22 @@ struct ip_conntrack_protocol int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct ip_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct ip_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; -#define MAX_IP_CT_PROTO 256 -extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; - /* Protocol registration. */ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); - -static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) -{ - return ip_ct_protos[protocol]; -} - /* Existing built-in protocols */ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; @@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void); /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *, + const struct ip_conntrack_tuple *); +extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *); + #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index 129708c22386..ef63aa991a06 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -4,6 +4,9 @@ #include #include +#include +#include + struct iphdr; struct ip_nat_range; @@ -15,6 +18,8 @@ struct ip_nat_protocol /* Protocol number. */ unsigned int protonum; + struct module *me; + /* Translate a packet to the target according to manip type. Return true if succeeded. */ int (*manip_pkt)(struct sk_buff **pskb, @@ -43,19 +48,20 @@ struct ip_nat_protocol unsigned int (*print_range)(char *buffer, const struct ip_nat_range *range); -}; -#define MAX_IP_NAT_PROTO 256 -extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + int (*range_to_nfattr)(struct sk_buff *skb, + const struct ip_nat_range *range); + + int (*nfattr_to_range)(struct nfattr *tb[], + struct ip_nat_range *range); +}; /* Protocol registration. */ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); -static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) -{ - return ip_nat_protos[protocol]; -} +extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol); +extern void ip_nat_proto_put(struct ip_nat_protocol *proto); /* Built-in protocols. */ extern struct ip_nat_protocol ip_nat_protocol_tcp; @@ -67,4 +73,9 @@ extern int init_protocols(void) __init; extern void cleanup_protocols(void); extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); +extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range); +extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[], + struct ip_nat_range *range); + #endif /*_IP_NAT_PROTO_H*/ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index ff3393eba924..e47ba39eb657 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -702,5 +702,12 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + help + This option enables support for a netlink-based userspace interface + + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 45796d5924dd..abf2a7d1a584 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -9,6 +9,10 @@ iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helpe # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +# conntrack netlink interface +obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o + + # SCTP protocol connection tracking obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index caf89deae116..d9fddae8d787 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -50,7 +50,7 @@ #include #include -#define IP_CONNTRACK_VERSION "2.2" +#define IP_CONNTRACK_VERSION "2.3" #if 0 #define DEBUGP printk @@ -77,6 +77,8 @@ unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); static int ip_conntrack_vmalloc; +static unsigned int ip_conntrack_next_id = 1; +static unsigned int ip_conntrack_expect_next_id = 1; #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS struct notifier_block *ip_conntrack_chain; struct notifier_block *ip_conntrack_expect_chain; @@ -154,13 +156,6 @@ void ip_conntrack_event_cache_init(const struct sk_buff *skb) DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); -void -ip_conntrack_put(struct ip_conntrack *ct) -{ - IP_NF_ASSERT(ct); - nf_conntrack_put(&ct->ct_general); -} - static int ip_conntrack_hash_rnd_initted; static unsigned int ip_conntrack_hash_rnd; @@ -222,6 +217,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp) exp->master->expecting--; } +void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) +{ + unlink_expect(exp); + ip_conntrack_expect_put(exp); +} + static void expectation_timed_out(unsigned long ul_expect) { struct ip_conntrack_expect *exp = (void *)ul_expect; @@ -232,6 +233,33 @@ static void expectation_timed_out(unsigned long ul_expect) ip_conntrack_expect_put(exp); } +struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + list_for_each_entry(i, &ip_conntrack_expect_list, list) { + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { + atomic_inc(&i->use); + return i; + } + } + return NULL; +} + +/* Just find a expectation corresponding to a tuple. */ +struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *i; + + read_lock_bh(&ip_conntrack_lock); + i = __ip_conntrack_expect_find(tuple); + read_unlock_bh(&ip_conntrack_lock); + + return i; +} + /* If an expectation for this connection is found, it gets delete from * global list then returned. */ static struct ip_conntrack_expect * @@ -256,7 +284,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple) } /* delete all expectations for this conntrack */ -static void remove_expectations(struct ip_conntrack *ct) +void ip_ct_remove_expectations(struct ip_conntrack *ct) { struct ip_conntrack_expect *i, *tmp; @@ -286,7 +314,7 @@ clean_from_lists(struct ip_conntrack *ct) LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); } static void @@ -304,7 +332,7 @@ destroy_conntrack(struct nf_conntrack *nfct) /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ - proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); @@ -316,7 +344,7 @@ destroy_conntrack(struct nf_conntrack *nfct) * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ - remove_expectations(ct); + ip_ct_remove_expectations(ct); /* We overload first tuple to link into unconfirmed list. */ if (!is_confirmed(ct)) { @@ -331,8 +359,7 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_put(ct->master); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); - kmem_cache_free(ip_conntrack_cachep, ct); - atomic_dec(&ip_conntrack_count); + ip_conntrack_free(ct); } static void death_by_timeout(unsigned long ul_conntrack) @@ -359,7 +386,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, && ip_ct_tuple_equal(tuple, &i->tuple); } -static struct ip_conntrack_tuple_hash * +struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) { @@ -394,6 +421,29 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, return h; } +static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, + unsigned int hash, + unsigned int repl_hash) +{ + ct->id = ++ip_conntrack_next_id; + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); +} + +void ip_conntrack_hash_insert(struct ip_conntrack *ct) +{ + unsigned int hash, repl_hash; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + write_lock_bh(&ip_conntrack_lock); + __ip_conntrack_hash_insert(ct, hash, repl_hash); + write_unlock_bh(&ip_conntrack_lock); +} + /* Confirm a connection given skb; places it in hash table */ int __ip_conntrack_confirm(struct sk_buff **pskb) @@ -440,10 +490,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* Remove from unconfirmed list */ list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY]); + __ip_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ @@ -527,34 +574,84 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i, return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); } -static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +static struct ip_conntrack_helper * +__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { return LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, tuple); } -/* Allocate a new conntrack: we return -ENOMEM if classification - failed due to stress. Otherwise it really is unclassifiable. */ -static struct ip_conntrack_tuple_hash * -init_conntrack(const struct ip_conntrack_tuple *tuple, - struct ip_conntrack_protocol *protocol, - struct sk_buff *skb) +struct ip_conntrack_helper * +ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_helper *helper; + + /* need ip_conntrack_lock to assure that helper exists until + * try_module_get() is called */ + read_lock_bh(&ip_conntrack_lock); + + helper = __ip_conntrack_helper_find(tuple); + if (helper) { + /* need to increase module usage count to assure helper will + * not go away while the caller is e.g. busy putting a + * conntrack in the hash that uses the helper */ + if (!try_module_get(helper->me)) + helper = NULL; + } + + read_unlock_bh(&ip_conntrack_lock); + + return helper; +} + +void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) +{ + module_put(helper->me); +} + +struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol) +{ + return ip_ct_protos[protocol]; +} + +/* this is guaranteed to always return a valid protocol helper, since + * it falls back to generic_protocol */ +struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + preempt_disable(); + p = __ip_conntrack_proto_find(protocol); + if (p) { + if (!try_module_get(p->me)) + p = &ip_conntrack_generic_protocol; + } + preempt_enable(); + + return p; +} + +void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) +{ + module_put(p->me); +} + +struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, + struct ip_conntrack_tuple *repl) { struct ip_conntrack *conntrack; - struct ip_conntrack_tuple repl_tuple; - size_t hash; - struct ip_conntrack_expect *exp; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); ip_conntrack_hash_rnd_initted = 1; } - hash = hash_conntrack(tuple); - if (ip_conntrack_max && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { if (net_ratelimit()) @@ -565,31 +662,58 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, } } - if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { - DEBUGP("Can't invert tuple.\n"); - return NULL; - } - conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); - return ERR_PTR(-ENOMEM); + return NULL; } memset(conntrack, 0, sizeof(*conntrack)); atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; - if (!protocol->new(conntrack, skb)) { - kmem_cache_free(ip_conntrack_cachep, conntrack); - return NULL; - } + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + atomic_inc(&ip_conntrack_count); + + return conntrack; +} + +void +ip_conntrack_free(struct ip_conntrack *conntrack) +{ + atomic_dec(&ip_conntrack_count); + kmem_cache_free(ip_conntrack_cachep, conntrack); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + * failed due to stress. Otherwise it really is unclassifiable */ +static struct ip_conntrack_tuple_hash * +init_conntrack(struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + struct ip_conntrack_expect *exp; + + if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + if (!(conntrack = ip_conntrack_alloc(tuple, &repl_tuple))) + return NULL; + + if (!protocol->new(conntrack, skb)) { + ip_conntrack_free(conntrack); + return NULL; + } + write_lock_bh(&ip_conntrack_lock); exp = find_expectation(tuple); @@ -610,7 +734,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); } else { - conntrack->helper = ip_ct_find_helper(&repl_tuple); + conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); CONNTRACK_STAT_INC(new); } @@ -618,7 +742,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); - atomic_inc(&ip_conntrack_count); write_unlock_bh(&ip_conntrack_lock); if (exp) { @@ -729,7 +852,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, } #endif - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); + proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter @@ -777,7 +900,7 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { return ip_ct_invert_tuple(inverse, orig, - ip_ct_find_proto(orig->dst.protonum)); + __ip_conntrack_proto_find(orig->dst.protonum)); } /* Would two expected things clash? */ @@ -857,6 +980,8 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; add_timer(&exp->timeout); + exp->id = ++ip_conntrack_expect_next_id; + atomic_inc(&exp->use); CONNTRACK_STAT_INC(expect_create); } @@ -936,7 +1061,7 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (!conntrack->master && conntrack->expecting == 0) - conntrack->helper = ip_ct_find_helper(newreply); + conntrack->helper = __ip_conntrack_helper_find(newreply); write_unlock_bh(&ip_conntrack_lock); } @@ -950,6 +1075,19 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) return 0; } +struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *name) +{ + struct ip_conntrack_helper *h; + + list_for_each_entry(h, &helpers, list) { + if (!strcmp(h->name, name)) + return h; + } + + return NULL; +} + static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { @@ -1025,6 +1163,39 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, } } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be + * in ip_conntrack_core, since we don't want the protocols to autoload + * or depend on ctnetlink */ +int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), + &tuple->src.u.tcp.port); + NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), + &tuple->dst.u.tcp.port); + return 0; + +nfattr_failure: + return -1; +} + +int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *t) +{ + if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) + return -EINVAL; + + t->src.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); + t->dst.u.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); + + return 0; +} +#endif + /* Returns new sk_buff, or NULL */ struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) @@ -1203,16 +1374,13 @@ static void free_conntrack_hash(void) * ip_conntrack_htable_size)); } -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void ip_conntrack_cleanup(void) +void ip_conntrack_flush() { - ip_ct_attach = NULL; /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module delete... */ synchronize_net(); - + i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { @@ -1222,7 +1390,14 @@ void ip_conntrack_cleanup(void) /* wait until all references to ip_conntrack_untracked are dropped */ while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) schedule(); +} +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + ip_ct_attach = NULL; + ip_conntrack_flush(); kmem_cache_destroy(ip_conntrack_cachep); kmem_cache_destroy(ip_conntrack_expect_cachep); free_conntrack_hash(); diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c new file mode 100644 index 000000000000..f43ec18c9166 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -0,0 +1,1588 @@ +/* Connection tracking via netlink socket. Allows for user space + * protocol helpers and general trouble making from userspace. + * + * (C) 2001 by Jay Schulist + * (C) 2002-2005 by Harald Welte + * (C) 2003 by Patrick Mchardy + * (C) 2005 by Pablo Neira Ayuso + * + * I've reworked this stuff to use attributes instead of conntrack + * structures. 5.44 am. I need more tea. --pablo 05/07/11. + * + * Initial connection tracking via netlink development funded and + * generally made possible by Network Robots, Inc. (www.networkrobots.com) + * + * Further development of this code funded by Astaro AG (http://www.astaro.com) + * + * This software may be used and distributed according to the terms + * of the GNU General Public License, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); + +static char __initdata version[] = "0.90"; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + + +static inline int +ctnetlink_dump_tuples_proto(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_protocol *proto; + + NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + if (proto && proto->tuple_to_nfattr) + return proto->tuple_to_nfattr(skb, tuple); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_tuples(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple) +{ + struct nfattr *nest_parms; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); + NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); + NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); + ctnetlink_dump_tuples_proto(skb, tuple); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t status = htonl((u_int32_t) ct->status); + NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + long timeout_l = ct->timeout.expires - jiffies; + u_int32_t timeout; + + if (timeout_l < 0) + timeout = 0; + else + timeout = htonl(timeout_l / HZ); + + NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + + struct nfattr *nest_proto; + int ret; + + if (!proto || !proto->to_nfattr) + return 0; + + nest_proto = NFA_NEST(skb, CTA_PROTOINFO); + + ret = proto->to_nfattr(skb, nest_proto, ct); + + ip_conntrack_proto_put(proto); + + NFA_NEST_END(skb, nest_proto); + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + struct nfattr *nest_helper; + + if (!ct->helper) + return 0; + + nest_helper = NFA_NEST(skb, CTA_HELP); + NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name); + + if (ct->helper->to_nfattr) + ct->helper->to_nfattr(skb, ct); + + NFA_NEST_END(skb, nest_helper); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static inline int +ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, + enum ip_conntrack_dir dir) +{ + enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; + struct nfattr *nest_count = NFA_NEST(skb, type); + u_int64_t tmp; + + tmp = cpu_to_be64(ct->counters[dir].packets); + NFA_PUT(skb, CTA_COUNTERS_PACKETS, sizeof(u_int64_t), &tmp); + + tmp = cpu_to_be64(ct->counters[dir].bytes); + NFA_PUT(skb, CTA_COUNTERS_BYTES, sizeof(u_int64_t), &tmp); + + NFA_NEST_END(skb, nest_count); + + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_counters(a, b, c) (0) +#endif + +#ifdef CONFIG_IP_NF_CONNTRACK_MARK +static inline int +ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t mark = htonl(ct->mark); + + NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); + return 0; + +nfattr_failure: + return -1; +} +#else +#define ctnetlink_dump_mark(a, b) (0) +#endif + +static inline int +ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + u_int32_t id = htonl(ct->id); + NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) +{ + unsigned int use = htonl(atomic_read(&ct->ct_general.use)); + + NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); + return 0; + +nfattr_failure: + return -1; +} + +#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) + +static int +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, int nowait, + const struct ip_conntrack *ct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + if (ctnetlink_dump_status(skb, ct) < 0 || + ctnetlink_dump_timeout(skb, ct) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_protoinfo(skb, ct) < 0 || + ctnetlink_dump_helpinfo(skb, ct) < 0 || + ctnetlink_dump_mark(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_conntrack_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct nfattr *nest_parms; + struct ip_conntrack *ct = (struct ip_conntrack *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + unsigned int flags = 0, groups; + + /* ignore our fake conntrack entry */ + if (ct == &ip_conntrack_untracked) + return NOTIFY_DONE; + + if (events & IPCT_DESTROY) { + type = IPCTNL_MSG_CT_DELETE; + groups = NF_NETLINK_CONNTRACK_DESTROY; + goto alloc_skb; + } + if (events & (IPCT_NEW | IPCT_RELATED)) { + type = IPCTNL_MSG_CT_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + /* dump everything */ + events = ~0UL; + groups = NF_NETLINK_CONNTRACK_NEW; + goto alloc_skb; + } + if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { + type = IPCTNL_MSG_CT_NEW; + groups = NF_NETLINK_CONNTRACK_UPDATE; + goto alloc_skb; + } + + return NOTIFY_DONE; + +alloc_skb: + /* FIXME: Check if there are any listeners before, don't hurt performance */ + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); + if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + goto nfattr_failure; + NFA_NEST_END(skb, nest_parms); + + /* NAT stuff is now a status flag */ + if ((events & IPCT_STATUS || events & IPCT_NATINFO) + && ctnetlink_dump_status(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_REFRESH + && ctnetlink_dump_timeout(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_PROTOINFO + && ctnetlink_dump_protoinfo(skb, ct) < 0) + goto nfattr_failure; + if (events & IPCT_HELPINFO + && ctnetlink_dump_helpinfo(skb, ct) < 0) + goto nfattr_failure; + + if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + nfnetlink_send(skb, 0, groups, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + +static int ctnetlink_done(struct netlink_callback *cb) +{ + DEBUGP("entered %s\n", __FUNCTION__); + return 0; +} + +static int +ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, + cb->args[0], *id); + + read_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + } + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} + +#ifdef CONFIG_IP_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + +static const int cta_min_ip[CTA_IP_MAX] = { + [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), + [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +}; + +static inline int +ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_IP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_IP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_IP_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) + return -EINVAL; + + if (!tb[CTA_IP_V4_SRC-1]) + return -EINVAL; + tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); + + if (!tb[CTA_IP_V4_DST-1]) + return -EINVAL; + tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +static const int cta_min_proto[CTA_PROTO_MAX] = { + [CTA_PROTO_NUM-1] = sizeof(u_int16_t), + [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), + [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), + [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), +}; + +static inline int +ctnetlink_parse_tuple_proto(struct nfattr *attr, + struct ip_conntrack_tuple *tuple) +{ + struct nfattr *tb[CTA_PROTO_MAX]; + struct ip_conntrack_protocol *proto; + int ret = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTO_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTO_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + return -EINVAL; + + if (!tb[CTA_PROTO_NUM-1]) + return -EINVAL; + tuple->dst.protonum = *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); + + proto = ip_conntrack_proto_find_get(tuple->dst.protonum); + + if (likely(proto && proto->nfattr_to_tuple)) { + ret = proto->nfattr_to_tuple(tb, tuple); + ip_conntrack_proto_put(proto); + } + + return ret; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, + enum ctattr_tuple type) +{ + struct nfattr *tb[CTA_TUPLE_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_TUPLE_MAX * sizeof(tb)); + memset(tuple, 0, sizeof(*tuple)); + + if (nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]) < 0) + goto nfattr_failure; + + if (!tb[CTA_TUPLE_IP-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); + if (err < 0) + return err; + + if (!tb[CTA_TUPLE_PROTO-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); + if (err < 0) + return err; + + /* orig and expect tuples get DIR_ORIGINAL */ + if (type == CTA_TUPLE_REPLY) + tuple->dst.dir = IP_CT_DIR_REPLY; + else + tuple->dst.dir = IP_CT_DIR_ORIGINAL; + + DUMP_TUPLE(tuple); + + DEBUGP("leaving\n"); + + return 0; + +nfattr_failure: + return -1; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +static const int cta_min_protonat[CTA_PROTONAT_MAX] = { + [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), + [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), +}; + +static int ctnetlink_parse_nat_proto(struct nfattr *attr, + const struct ip_conntrack *ct, + struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_PROTONAT_MAX]; + struct ip_nat_protocol *npt; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_PROTONAT_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr) < 0) + goto nfattr_failure; + + if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) + goto nfattr_failure; + + npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); + if (!npt) + return 0; + + if (!npt->nfattr_to_range) { + ip_nat_proto_put(npt); + return 0; + } + + /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ + if (npt->nfattr_to_range(tb, range) > 0) + range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + + ip_nat_proto_put(npt); + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_parse_nat(struct nfattr *cda[], + const struct ip_conntrack *ct, struct ip_nat_range *range) +{ + struct nfattr *tb[CTA_NAT_MAX]; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + memset(tb, 0, CTA_NAT_MAX * sizeof(tb)); + memset(range, 0, sizeof(*range)); + + if (nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]) < 0) + goto nfattr_failure; + + if (tb[CTA_NAT_MINIP-1]) + range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); + + if (!tb[CTA_NAT_MAXIP-1]) + range->max_ip = range->min_ip; + else + range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO-1]) + return 0; + + err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); + if (err < 0) + return err; + + DEBUGP("leaving\n"); + return 0; + +nfattr_failure: + return -1; +} +#endif + +static inline int +ctnetlink_parse_help(struct nfattr *attr, char **helper_name) +{ + struct nfattr *tb[CTA_HELP_MAX]; + + DEBUGP("entered %s\n", __FUNCTION__); + memset(tb, 0, CTA_HELP_MAX * sizeof(tb)); + + if (nfattr_parse_nested(tb, CTA_HELP_MAX, attr) < 0) + goto nfattr_failure; + + if (!tb[CTA_HELP_NAME-1]) + return -EINVAL; + + *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else { + /* Flush the whole table */ + ip_conntrack_flush(); + return 0; + } + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash\n"); + return -ENOENT; + } + + ct = tuplehash_to_ctrack(h); + + if (cda[CTA_ID-1]) { + u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); + if (ct->id != id) { + ip_conntrack_put(ct); + return -ENOENT; + } + } + if (del_timer(&ct->timeout)) { + ip_conntrack_put(ct); + ct->timeout.function((unsigned long)ct); + return 0; + } + ip_conntrack_put(ct); + DEBUGP("leaving\n"); + + return 0; +} + +static int +ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + struct ip_conntrack *ct; + struct sk_buff *skb2 = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_IP_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else + return -ENOTSUPP; +#endif + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + } + + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + DEBUGP("tuple not found in conntrack hash"); + return -ENOENT; + } + DEBUGP("tuple found\n"); + ct = tuplehash_to_ctrack(h); + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb2) { + ip_conntrack_put(ct); + return -ENOMEM; + } + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, 1, ct); + ip_conntrack_put(ct); + if (err <= 0) + goto out; + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto out; + + DEBUGP("leaving\n"); + return 0; + +out: + if (skb2) + kfree_skb(skb2); + return -1; +} + +static inline int +ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + unsigned long d, status = *(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]); + d = ct->status ^ status; + + if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) + /* unchangeable */ + return -EINVAL; + + if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) + /* SEEN_REPLY bit can only be set */ + return -EINVAL; + + + if (d & IPS_ASSURED && !(status & IPS_ASSURED)) + /* ASSURED bit can only be set */ + return -EINVAL; + + if (cda[CTA_NAT-1]) { +#ifndef CONFIG_IP_NF_NAT_NEEDED + return -EINVAL; +#else + unsigned int hooknum; + struct ip_nat_range range; + + if (ctnetlink_parse_nat(cda, ct, &range) < 0) + return -EINVAL; + + DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", + NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), + htons(range.min.all), htons(range.max.all)); + + /* This is tricky but it works. ip_nat_setup_info needs the + * hook number as parameter, so let's do the correct + * conversion and run away */ + if (status & IPS_SRC_NAT_DONE) + hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ + else if (status & IPS_DST_NAT_DONE) + hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ + else + return -EINVAL; /* Missing NAT flags */ + + DEBUGP("NAT status: %lu\n", + status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); + + if (ip_nat_initialized(ct, hooknum)) + return -EEXIST; + ip_nat_setup_info(ct, &range, hooknum); + + DEBUGP("NAT status after setup_info: %lu\n", + ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); +#endif + } + + /* Be careful here, modifying NAT bits can screw up things, + * so don't let users modify them directly if they don't pass + * ip_nat_range. */ + ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); + return 0; +} + + +static inline int +ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + struct ip_conntrack_helper *helper; + char *helpname; + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + /* don't change helper of sibling connections */ + if (ct->master) + return -EINVAL; + + err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); + if (err < 0) + return err; + + helper = __ip_conntrack_helper_find_byname(helpname); + if (!helper) { + if (!strcmp(helpname, "")) + helper = NULL; + else + return -EINVAL; + } + + if (ct->helper) { + if (!helper) { + /* we had a helper before ... */ + ip_ct_remove_expectations(ct); + ct->helper = NULL; + } else { + /* need to zero data of old helper */ + memset(&ct->help, 0, sizeof(ct->help)); + } + } + + ct->helper = helper; + + return 0; +} + +static inline int +ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + if (!del_timer(&ct->timeout)) + return -ETIME; + + ct->timeout.expires = jiffies + timeout * HZ; + add_timer(&ct->timeout); + + return 0; +} + +static int +ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) +{ + int err; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_HELP-1]) { + err = ctnetlink_change_helper(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_TIMEOUT-1]) { + err = ctnetlink_change_timeout(ct, cda); + if (err < 0) + return err; + } + + if (cda[CTA_STATUS-1]) { + err = ctnetlink_change_status(ct, cda); + if (err < 0) + return err; + } + + DEBUGP("all done\n"); + return 0; +} + +static int +ctnetlink_create_conntrack(struct nfattr *cda[], + struct ip_conntrack_tuple *otuple, + struct ip_conntrack_tuple *rtuple) +{ + struct ip_conntrack *ct; + int err = -EINVAL; + + DEBUGP("entered %s\n", __FUNCTION__); + + ct = ip_conntrack_alloc(otuple, rtuple); + if (ct == NULL || IS_ERR(ct)) + return -ENOMEM; + + if (!cda[CTA_TIMEOUT-1]) + goto err; + ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); + + ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->status |= IPS_CONFIRMED; + + err = ctnetlink_change_status(ct, cda); + if (err < 0) + goto err; + + ct->helper = ip_conntrack_helper_find_get(rtuple); + + add_timer(&ct->timeout); + ip_conntrack_hash_insert(ct); + + if (ct->helper) + ip_conntrack_helper_put(ct->helper); + + DEBUGP("conntrack with id %u inserted\n", ct->id); + return 0; + +err: + ip_conntrack_free(ct); + return err; +} + +static int +ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple otuple, rtuple; + struct ip_conntrack_tuple_hash *h = NULL; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (cda[CTA_TUPLE_ORIG-1]) { + err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); + if (err < 0) + return err; + } + + if (cda[CTA_TUPLE_REPLY-1]) { + err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); + if (err < 0) + return err; + } + + write_lock_bh(&ip_conntrack_lock); + if (cda[CTA_TUPLE_ORIG-1]) + h = __ip_conntrack_find(&otuple, NULL); + else if (cda[CTA_TUPLE_REPLY-1]) + h = __ip_conntrack_find(&rtuple, NULL); + + if (h == NULL) { + write_unlock_bh(&ip_conntrack_lock); + DEBUGP("no such conntrack, create new\n"); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); + goto out_unlock; + } else { + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT-1]) { + err = -EINVAL; + goto out_unlock; + } + } + + /* We manipulate the conntrack inside the global conntrack table lock, + * so there's no need to increase the refcount */ + DEBUGP("conntrack found\n"); + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); + +out_unlock: + write_unlock_bh(&ip_conntrack_lock); + return err; +} + +/*********************************************************************** + * EXPECT + ***********************************************************************/ + +static inline int +ctnetlink_exp_dump_tuple(struct sk_buff *skb, + const struct ip_conntrack_tuple *tuple, + enum ctattr_expect type) +{ + struct nfattr *nest_parms = NFA_NEST(skb, type); + + if (ctnetlink_dump_tuples(skb, tuple) < 0) + goto nfattr_failure; + + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static inline int +ctnetlink_exp_dump_expect(struct sk_buff *skb, + const struct ip_conntrack_expect *exp) +{ + u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); + u_int32_t id = htonl(exp->id); + struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT); + + if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) + goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) + goto nfattr_failure; + + NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); + NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); + NFA_NEST_END(skb, nest_parms); + + return 0; + +nfattr_failure: + return -1; +} + +static int +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, + int event, + int nowait, + const struct ip_conntrack_expect *exp) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned char *b; + + b = skb->tail; + + event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; + nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +nfattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static int ctnetlink_expect_event(struct notifier_block *this, + unsigned long events, void *ptr) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; + struct sk_buff *skb; + unsigned int type; + unsigned char *b; + int flags = 0; + u16 proto; + + if (events & IPEXP_NEW) { + type = IPCTNL_MSG_EXP_NEW; + flags = NLM_F_CREATE|NLM_F_EXCL; + } else + return NOTIFY_DONE; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NOTIFY_DONE; + + b = skb->tail; + + type |= NFNL_SUBSYS_CTNETLINK << 8; + nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + + nlh->nlmsg_flags = flags; + nfmsg->nfgen_family = AF_INET; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + if (ctnetlink_exp_dump_expect(skb, exp) < 0) + goto nfattr_failure; + + nlh->nlmsg_len = skb->tail - b; + proto = exp->tuple.dst.protonum; + nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + return NOTIFY_DONE; + +nlmsg_failure: +nfattr_failure: + kfree_skb(skb); + return NOTIFY_DONE; +} +#endif + +static int +ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack_expect *exp = NULL; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[0]; + + DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); + + read_lock_bh(&ip_conntrack_lock); + list_for_each(i, &ip_conntrack_expect_list) { + exp = (struct ip_conntrack_expect *) i; + if (exp->id <= *id) + continue; + if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_EXP_NEW, + 1, exp) < 0) + goto out; + *id = exp->id; + } +out: + read_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last id=%llu\n", *id); + + return skb->len; +} + +static int +ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + struct sk_buff *skb2; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct nfgenmsg *msg = NLMSG_DATA(nlh); + u32 rlen; + + if (msg->nfgen_family != AF_INET) + return -EAFNOSUPPORT; + + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_exp_dump_table, + ctnetlink_done)) != 0) + return -EINVAL; + rlen = NLMSG_ALIGN(nlh->nlmsg_len); + if (rlen > skb->len) + rlen = skb->len; + skb_pull(skb, rlen); + return 0; + } + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + err = -ENOMEM; + skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb2) + goto out; + NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; + + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, + 1, exp); + if (err <= 0) + goto out; + + ip_conntrack_expect_put(exp); + + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + if (err < 0) + goto free; + + return err; + +out: + ip_conntrack_expect_put(exp); +free: + if (skb2) + kfree_skb(skb2); + return err; +} + +static int +ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_expect *exp, *tmp; + struct ip_conntrack_tuple tuple; + struct ip_conntrack_helper *h; + int err; + + /* delete by tuple needs either orig or reply tuple */ + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + else if (cda[CTA_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_HELP_NAME-1]); + + /* delete all expectations for this helper */ + write_lock_bh(&ip_conntrack_lock); + h = __ip_conntrack_helper_find_byname(name); + if (!h) { + write_unlock_bh(&ip_conntrack_lock); + return -EINVAL; + } + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (exp->master->helper == h + && del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock(&ip_conntrack_lock); + return 0; + } else { + /* This basically means we have to flush everything*/ + write_lock_bh(&ip_conntrack_lock); + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, + list) { + if (del_timer(&exp->timeout)) + __ip_ct_expect_unlink_destroy(exp); + } + write_unlock_bh(&ip_conntrack_lock); + return 0; + } + + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + ip_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + ip_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. after this line usage count == 0 */ + ip_conntrack_expect_put(exp); + + return 0; +} +static int +ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) +{ + return -EOPNOTSUPP; +} + +static int +ctnetlink_create_expect(struct nfattr *cda[]) +{ + struct ip_conntrack_tuple tuple, mask, master_tuple; + struct ip_conntrack_tuple_hash *h = NULL; + struct ip_conntrack_expect *exp; + struct ip_conntrack *ct; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASK); + if (err < 0) + return err; + + if (cda[CTA_TUPLE_ORIG-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_TUPLE_ORIG); + else if (cda[CTA_TUPLE_REPLY-1]) + err = ctnetlink_parse_tuple(cda, &master_tuple, + CTA_TUPLE_REPLY); + else + return -EINVAL; + + if (err < 0) + return err; + + /* Look for master conntrack of this expectation */ + h = ip_conntrack_find_get(&master_tuple, NULL); + if (!h) + return -ENOENT; + ct = tuplehash_to_ctrack(h); + + if (!ct->helper) { + /* such conntrack hasn't got any helper, abort */ + err = -EINVAL; + goto out; + } + + exp = ip_conntrack_expect_alloc(ct); + if (!exp) { + err = -ENOMEM; + goto out; + } + + exp->expectfn = NULL; + exp->master = ct; + memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); + memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); + + err = ip_conntrack_expect_related(exp); + ip_conntrack_expect_put(exp); + +out: + ip_conntrack_put(tuplehash_to_ctrack(h)); + return err; +} + +static int +ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_expect *exp; + int err = 0; + + DEBUGP("entered %s\n", __FUNCTION__); + + if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1]) + return -EINVAL; + + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + + write_lock_bh(&ip_conntrack_lock); + exp = __ip_conntrack_expect_find(&tuple); + + if (!exp) { + write_unlock_bh(&ip_conntrack_lock); + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) + err = ctnetlink_create_expect(cda); + return err; + } + + err = -EEXIST; + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_expect(exp, cda); + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving\n"); + + return err; +} + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +static struct notifier_block ctnl_notifier = { + .notifier_call = ctnetlink_conntrack_event, +}; + +static struct notifier_block ctnl_notifier_exp = { + .notifier_call = ctnetlink_expect_event, +}; +#endif + +static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { + [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .cap_required = CAP_NET_ADMIN }, + [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem ctnl_subsys = { + .name = "conntrack", + .subsys_id = NFNL_SUBSYS_CTNETLINK, + .cb_count = IPCTNL_MSG_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_cb, +}; + +static struct nfnetlink_subsystem ctnl_exp_subsys = { + .name = "conntrack_expect", + .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, + .cb_count = IPCTNL_MSG_EXP_MAX, + .attr_count = CTA_MAX, + .cb = ctnl_exp_cb, +}; + +static int __init ctnetlink_init(void) +{ + int ret; + + printk("ctnetlink v%s: registering with nfnetlink.\n", version); + ret = nfnetlink_subsys_register(&ctnl_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register with nfnetlink.\n"); + goto err_out; + } + + ret = nfnetlink_subsys_register(&ctnl_exp_subsys); + if (ret < 0) { + printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); + goto err_unreg_subsys; + } + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ret = ip_conntrack_register_notifier(&ctnl_notifier); + if (ret < 0) { + printk("ctnetlink_init: cannot register notifier.\n"); + goto err_unreg_exp_subsys; + } + + ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); + if (ret < 0) { + printk("ctnetlink_init: cannot expect register notifier.\n"); + goto err_unreg_notifier; + } +#endif + + return 0; + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +err_unreg_notifier: + ip_conntrack_unregister_notifier(&ctnl_notifier); +err_unreg_exp_subsys: + nfnetlink_subsys_unregister(&ctnl_exp_subsys); +#endif +err_unreg_subsys: + nfnetlink_subsys_unregister(&ctnl_subsys); +err_out: + return ret; +} + +static void __exit ctnetlink_exit(void) +{ + printk("ctnetlink: unregistering from nfnetlink.\n"); + +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS + ip_conntrack_unregister_notifier(&ctnl_notifier_exp); + ip_conntrack_unregister_notifier(&ctnl_notifier); +#endif + + nfnetlink_subsys_unregister(&ctnl_exp_subsys); + nfnetlink_subsys_unregister(&ctnl_subsys); + return; +} + +module_init(ctnetlink_init); +module_exit(ctnetlink_exit); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index dca1f63d6f51..3f90cb9979ac 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -109,16 +109,17 @@ static int icmp_packet(struct ip_conntrack *ct, return NF_ACCEPT; } +static u_int8_t valid_new[] = { + [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 +}; + /* Called when a new connection for this protocol found. */ static int icmp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) { - static u_int8_t valid_new[] - = { [ICMP_ECHO] = 1, - [ICMP_TIMESTAMP] = 1, - [ICMP_INFO_REQUEST] = 1, - [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ @@ -159,11 +160,12 @@ icmp_error_message(struct sk_buff *skb, return NF_ACCEPT; } - innerproto = ip_ct_find_proto(inside->ip.protocol); + innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; /* Are they talking about one of our connections? */ if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } @@ -171,8 +173,10 @@ icmp_error_message(struct sk_buff *skb, been preserved inside the ICMP. */ if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { DEBUGP("icmp_error_track: Can't invert tuple\n"); + ip_conntrack_proto_put(innerproto); return NF_ACCEPT; } + ip_conntrack_proto_put(innerproto); *ctinfo = IP_CT_RELATED; @@ -266,6 +270,47 @@ checksum_skipped: return icmp_error_message(skb, ctinfo, hooknum); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int icmp_tuple_to_nfattr(struct sk_buff *skb, + const struct ip_conntrack_tuple *t) +{ + NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), + &t->src.u.icmp.id); + NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), + &t->dst.u.icmp.type); + NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), + &t->dst.u.icmp.code); + + if (t->dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[t->dst.u.icmp.type]) + return -EINVAL; + + return 0; + +nfattr_failure: + return -1; +} + +static int icmp_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *tuple) +{ + if (!tb[CTA_PROTO_ICMP_TYPE-1] + || !tb[CTA_PROTO_ICMP_CODE-1] + || !tb[CTA_PROTO_ICMP_ID-1]) + return -1; + + tuple->dst.u.icmp.type = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); + tuple->dst.u.icmp.code = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); + tuple->src.u.icmp.id = + *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + + return 0; +} +#endif + struct ip_conntrack_protocol ip_conntrack_protocol_icmp = { .proto = IPPROTO_ICMP, @@ -277,4 +322,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_icmp = .packet = icmp_packet, .new = icmp_new, .error = icmp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = icmp_tuple_to_nfattr, + .nfattr_to_tuple = icmp_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 3d5f878a07d1..a875f35e576d 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -505,7 +505,12 @@ static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { .packet = sctp_packet, .new = sctp_new, .destroy = NULL, - .me = THIS_MODULE + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index a569ad1ee4d9..c2bce22d4031 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -336,6 +336,23 @@ static int tcp_print_conntrack(struct seq_file *s, return seq_printf(s, "%s ", tcp_conntrack_names[state]); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct) +{ + read_lock_bh(&tcp_lock); + NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), + &ct->proto.tcp.state); + read_unlock_bh(&tcp_lock); + + return 0; + +nfattr_failure: + return -1; +} +#endif + static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; @@ -1100,4 +1117,10 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .packet = tcp_packet, .new = tcp_new, .error = tcp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .to_nfattr = tcp_to_nfattr, + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 6066eaf4d825..14130169cbfd 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -145,4 +145,9 @@ struct ip_conntrack_protocol ip_conntrack_protocol_udp = .packet = udp_packet, .new = udp_new, .error = udp_error, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index f0880004115d..ca97c3ac2f2a 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -5,7 +5,7 @@ */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team + * (C) 2002-2005 Netfilter Core Team * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -147,8 +147,7 @@ static int ct_seq_show(struct seq_file *s, void *v) if (DIRECTION(hash)) return 0; - proto = ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); + proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); IP_NF_ASSERT(proto); if (seq_printf(s, "%-8s %u %ld ", @@ -283,7 +282,7 @@ static int exp_seq_show(struct seq_file *s, void *v) seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, - ip_ct_find_proto(expect->tuple.dst.protonum)); + __ip_conntrack_proto_find(expect->tuple.dst.protonum)); return seq_putc(s, '\n'); } @@ -992,12 +991,16 @@ EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); EXPORT_SYMBOL(ip_ct_refresh_acct); -EXPORT_SYMBOL(ip_ct_protos); -EXPORT_SYMBOL(ip_ct_find_proto); + EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); + EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); EXPORT_SYMBOL(ip_conntrack_htable_size); @@ -1005,7 +1008,28 @@ EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_put); #ifdef CONFIG_IP_NF_NAT_NEEDED EXPORT_SYMBOL(ip_conntrack_tcp_update); #endif + +EXPORT_SYMBOL_GPL(ip_conntrack_flush); +EXPORT_SYMBOL_GPL(__ip_conntrack_find); + +EXPORT_SYMBOL_GPL(ip_conntrack_alloc); +EXPORT_SYMBOL_GPL(ip_conntrack_free); +EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); + +EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); + +EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); + +EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); +EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); +EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); +#endif diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index ed4d731880f7..567c802fecf0 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -47,8 +47,39 @@ DEFINE_RWLOCK(ip_nat_lock); static unsigned int ip_nat_htable_size; static struct list_head *bysource; + +#define MAX_IP_NAT_PROTO 256 struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; +static inline struct ip_nat_protocol * +__ip_nat_proto_find(u_int8_t protonum) +{ + return ip_nat_protos[protonum]; +} + +struct ip_nat_protocol * +ip_nat_proto_find_get(u_int8_t protonum) +{ + struct ip_nat_protocol *p; + + /* we need to disable preemption to make sure 'p' doesn't get + * removed until we've grabbed the reference */ + preempt_disable(); + p = __ip_nat_proto_find(protonum); + if (p) { + if (!try_module_get(p->me)) + p = &ip_nat_unknown_protocol; + } + preempt_enable(); + + return p; +} + +void +ip_nat_proto_put(struct ip_nat_protocol *p) +{ + module_put(p->me); +} /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int @@ -103,7 +134,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = ip_nat_find_proto(tuple->dst.protonum); + struct ip_nat_protocol *proto = + __ip_nat_proto_find(tuple->dst.protonum); /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -216,8 +248,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, struct ip_conntrack *conntrack, enum ip_nat_manip_type maniptype) { - struct ip_nat_protocol *proto - = ip_nat_find_proto(orig_tuple->dst.protonum); + struct ip_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, and that same mapping gives a unique tuple within the given @@ -242,14 +273,20 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ + proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ip_nat_proto_put(proto); return; + } /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); + + ip_nat_proto_put(proto); } unsigned int @@ -320,6 +357,7 @@ manip_pkt(u_int16_t proto, enum ip_nat_manip_type maniptype) { struct iphdr *iph; + struct ip_nat_protocol *p; if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; @@ -327,9 +365,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - target, maniptype)) + p = ip_nat_proto_find_get(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { + ip_nat_proto_put(p); return 0; + } + ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; @@ -425,7 +466,8 @@ int icmp_reply_translation(struct sk_buff **pskb, if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, ip_ct_find_proto(inside->ip.protocol))) + &inner, + __ip_conntrack_proto_find(inside->ip.protocol))) return 0; /* Change inner back to look like incoming packet. We do the @@ -495,6 +537,49 @@ void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) synchronize_net(); } +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) +int +ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range) +{ + NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), + &range->min.tcp.port); + NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), + &range->max.tcp.port); + + return 0; + +nfattr_failure: + return -1; +} + +int +ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) +{ + int ret = 0; + + /* we have to return whether we actually parsed something or not */ + + if (tb[CTA_PROTONAT_PORT_MIN-1]) { + ret = 1; + range->min.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); + } + + if (!tb[CTA_PROTONAT_PORT_MAX-1]) { + if (ret) + range->max.tcp.port = range->min.tcp.port; + } else { + ret = 1; + range->max.tcp.port = + *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); + } + + return ret; +} +#endif + int __init ip_nat_init(void) { size_t i; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 6596c9ee1655..38fdfc2093c4 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -107,10 +107,15 @@ icmp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_icmp -= { "ICMP", IPPROTO_ICMP, += { "ICMP", IPPROTO_ICMP, THIS_MODULE, icmp_manip_pkt, icmp_in_range, icmp_unique_tuple, icmp_print, - icmp_print_range + icmp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index a98e36d2b3c6..f03cd0f0c2bf 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -170,10 +171,15 @@ tcp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_tcp -= { "TCP", IPPROTO_TCP, += { "TCP", IPPROTO_TCP, THIS_MODULE, tcp_manip_pkt, tcp_in_range, tcp_unique_tuple, tcp_print, - tcp_print_range + tcp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 9f66e5625664..7a4e66ecbc0a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -157,10 +157,15 @@ udp_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_protocol_udp -= { "UDP", IPPROTO_UDP, += { "UDP", IPPROTO_UDP, THIS_MODULE, udp_manip_pkt, udp_in_range, udp_unique_tuple, udp_print, - udp_print_range + udp_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + ip_nat_port_range_to_nfattr, + ip_nat_port_nfattr_to_range, +#endif }; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index f5525bd58d16..512d8f2fb824 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -61,7 +61,7 @@ unknown_print_range(char *buffer, const struct ip_nat_range *range) } struct ip_nat_protocol ip_nat_unknown_protocol = { - "unknown", 0, + "unknown", 0, THIS_MODULE, unknown_manip_pkt, unknown_in_range, unknown_unique_tuple, diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 9ecba979033a..89db052add81 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -394,6 +394,8 @@ module_exit(fini); EXPORT_SYMBOL(ip_nat_setup_info); EXPORT_SYMBOL(ip_nat_protocol_register); EXPORT_SYMBOL(ip_nat_protocol_unregister); +EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); +EXPORT_SYMBOL_GPL(ip_nat_proto_put); EXPORT_SYMBOL(ip_nat_cheat_check); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_mangle_udp_packet); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 710acd77cc4c..b0ed57981847 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -121,6 +121,7 @@ void __nfa_fill(struct sk_buff *skb, int attrtype, int attrlen, nfa->nfa_type = attrtype; nfa->nfa_len = size; memcpy(NFA_DATA(nfa), data, attrlen); + memset(NFA_DATA(nfa) + attrlen, 0, NFA_ALIGN(size) - size); } int nfattr_parse(struct nfattr *tb[], int maxattr, struct nfattr *nfa, int len) -- cgit v1.2.3 From b6b99eb5409d75ae35390057cd28f3aedfbd4cf4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:33:51 -0700 Subject: [NET]: Reduce tc_index/tc_verd to u16 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4aeadb102589..af4f02e98243 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -265,9 +265,9 @@ struct sk_buff { #endif #endif /* CONFIG_NETFILTER */ #ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ + __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT - __u32 tc_verd; /* traffic control verdict */ + __u16 tc_verd; /* traffic control verdict */ #endif #endif -- cgit v1.2.3 From f2ccd8fa06c8e302116e71df372f5c1f83432e03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:34:12 -0700 Subject: [NET]: Kill skb->real_dev Bonding just wants the device before the skb_bond() decapsulation occurs, so simply pass that original device into packet_type->func() as an argument. It remains to be seen whether we can use this same exact thing to get rid of skb->input_dev as well. Signed-off-by: David S. Miller --- drivers/block/aoe/aoenet.c | 2 +- drivers/net/bonding/bond_3ad.c | 11 ++++------- drivers/net/bonding/bond_3ad.h | 2 +- drivers/net/bonding/bond_alb.c | 5 ++--- drivers/net/hamradio/bpqether.c | 4 ++-- drivers/net/pppoe.c | 6 ++++-- drivers/net/wan/hdlc_generic.c | 2 +- drivers/net/wan/lapbether.c | 2 +- drivers/net/wan/syncppp.c | 2 +- include/linux/if_vlan.h | 1 - include/linux/netdevice.h | 10 ++++++---- include/linux/skbuff.h | 2 -- include/net/arp.h | 2 +- include/net/ax25.h | 2 +- include/net/datalink.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 3 ++- include/net/llc.h | 8 +++++--- include/net/p8022.h | 3 ++- include/net/psnap.h | 2 +- include/net/x25.h | 2 +- net/802/p8022.c | 3 ++- net/802/psnap.c | 7 ++++--- net/8021q/vlan.h | 2 +- net/8021q/vlan_dev.c | 2 +- net/appletalk/aarp.c | 2 +- net/appletalk/ddp.c | 6 +++--- net/ax25/ax25_in.c | 8 ++++---- net/core/dev.c | 35 +++++++++++++++++++---------------- net/core/skbuff.c | 2 -- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 ++-- net/ipv4/ip_input.c | 2 +- net/ipv4/ipconfig.c | 8 ++++---- net/ipv6/ip6_input.c | 2 +- net/ipx/af_ipx.c | 2 +- net/irda/irlap_frame.c | 2 +- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 3 ++- net/llc/llc_input.c | 4 ++-- net/netrom/nr_dev.c | 2 +- net/packet/af_packet.c | 6 +++--- net/x25/x25_dev.c | 2 +- 45 files changed, 96 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/aoe/aoenet.c b/drivers/block/aoe/aoenet.c index 9e6f51c528b0..4be976940f69 100644 --- a/drivers/block/aoe/aoenet.c +++ b/drivers/block/aoe/aoenet.c @@ -120,7 +120,7 @@ aoenet_xmit(struct sk_buff *sl) * (1) len doesn't include the header by default. I want this. */ static int -aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt) +aoenet_rcv(struct sk_buff *skb, struct net_device *ifp, struct packet_type *pt, struct net_device *orig_dev) { struct aoe_hdr *h; u32 n; diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index a2e8dda5afac..d2f34d5a8083 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2419,22 +2419,19 @@ out: return 0; } -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype) +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev) { struct bonding *bond = dev->priv; struct slave *slave = NULL; int ret = NET_RX_DROP; - if (!(dev->flags & IFF_MASTER)) { + if (!(dev->flags & IFF_MASTER)) goto out; - } read_lock(&bond->lock); - slave = bond_get_slave_by_dev((struct bonding *)dev->priv, - skb->real_dev); - if (slave == NULL) { + slave = bond_get_slave_by_dev((struct bonding *)dev->priv, orig_dev); + if (!slave) goto out_unlock; - } bond_3ad_rx_indication((struct lacpdu *) skb->data, slave, skb->len); diff --git a/drivers/net/bonding/bond_3ad.h b/drivers/net/bonding/bond_3ad.h index f46823894187..673a30af5660 100644 --- a/drivers/net/bonding/bond_3ad.h +++ b/drivers/net/bonding/bond_3ad.h @@ -295,6 +295,6 @@ void bond_3ad_adapter_duplex_changed(struct slave *slave); void bond_3ad_handle_link_change(struct slave *slave, char link); int bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info); int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev); -int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype); +int bond_3ad_lacpdu_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type* ptype, struct net_device *orig_dev); #endif //__BOND_3AD_H__ diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 19e829b567d0..f8fce3961197 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -354,15 +354,14 @@ static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp) _unlock_rx_hashtbl(bond); } -static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype) +static int rlb_arp_recv(struct sk_buff *skb, struct net_device *bond_dev, struct packet_type *ptype, struct net_device *orig_dev) { struct bonding *bond = bond_dev->priv; struct arp_pkt *arp = (struct arp_pkt *)skb->data; int res = NET_RX_DROP; - if (!(bond_dev->flags & IFF_MASTER)) { + if (!(bond_dev->flags & IFF_MASTER)) goto out; - } if (!arp) { dprintk("Packet has no ARP data\n"); diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index ba9f0580e1f9..2946e037a9b1 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -98,7 +98,7 @@ static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF}; static char bpq_eth_addr[6]; -static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static int bpq_device_event(struct notifier_block *, unsigned long, void *); static const char *bpq_print_ethaddr(const unsigned char *); @@ -165,7 +165,7 @@ static inline int dev_is_ethdev(struct net_device *dev) /* * Receive an AX.25 frame via an ethernet interface. */ -static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int bpq_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len; char * ptr; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index ce1a9bf7b9a7..82f236cc3b9b 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -377,7 +377,8 @@ abort_kfree: ***********************************************************************/ static int pppoe_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; @@ -426,7 +427,8 @@ out: ***********************************************************************/ static int pppoe_disc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, + struct net_device *orig_dev) { struct pppoe_hdr *ph; diff --git a/drivers/net/wan/hdlc_generic.c b/drivers/net/wan/hdlc_generic.c index a63f6a2cc4f7..cdd4c09c2d90 100644 --- a/drivers/net/wan/hdlc_generic.c +++ b/drivers/net/wan/hdlc_generic.c @@ -61,7 +61,7 @@ static struct net_device_stats *hdlc_get_stats(struct net_device *dev) static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *p) + struct packet_type *p, struct net_device *orig_dev) { hdlc_device *hdlc = dev_to_hdlc(dev); if (hdlc->proto.netif_rx) diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 7f2e3653c5e5..6c302e9dbca2 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -86,7 +86,7 @@ static __inline__ int dev_is_ethdev(struct net_device *dev) /* * Receive a LAPB frame via an ethernet interface. */ -static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype) +static int lapbeth_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *ptype, struct net_device *orig_dev) { int len, err; struct lapbethdev *lapbeth; diff --git a/drivers/net/wan/syncppp.c b/drivers/net/wan/syncppp.c index 84b65c60c799..f58c794a963a 100644 --- a/drivers/net/wan/syncppp.c +++ b/drivers/net/wan/syncppp.c @@ -1447,7 +1447,7 @@ static void sppp_print_bytes (u_char *p, u16 len) * after interrupt servicing to process frames queued via netif_rx. */ -static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p) +static int sppp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) return NET_RX_DROP; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 62a9d89dfbe2..17d0c0d40b0e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; - skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a0ed7f9e801..296cf93a65e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,10 +497,12 @@ static inline void *netdev_priv(struct net_device *dev) #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - int (*func) (struct sk_buff *, struct net_device *, - struct packet_type *); + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, + struct net_device *); void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f02e98243..60b32151f76a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -164,7 +164,6 @@ struct skb_shared_info { * @stamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on - * @real_dev: The real device we are using * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -206,7 +205,6 @@ struct sk_buff { struct timeval stamp; struct net_device *dev; struct net_device *input_dev; - struct net_device *real_dev; union { struct tcphdr *th; diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a52..a13e30c35f42 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 3696f988a9f1..926eed543023 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb5..deb7ca75db48 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143f..2570b536c8f4 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b3..533fc074ed90 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e2..71769a5aeef3 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c3581..223f8fa9ffca 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,8 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); #endif diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b36..b2e01cc3fc8a 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e8..fee62ff8c194 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); diff --git a/net/802/p8022.c b/net/802/p8022.c index 5ae63416df6d..b24817c63ca8 100644 --- a/net/802/p8022.c +++ b/net/802/p8022.c @@ -35,7 +35,8 @@ static int p8022_request(struct datalink_proto *dl, struct sk_buff *skb, struct datalink_proto *register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct datalink_proto *proto; diff --git a/net/802/psnap.c b/net/802/psnap.c index 1053821ddf93..ab80b1fab53c 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -47,7 +47,7 @@ static struct datalink_proto *find_snap_client(unsigned char *desc) * A SNAP packet has arrived */ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { int rc = 1; struct datalink_proto *proto; @@ -61,7 +61,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, /* Pass the frame on. */ skb->h.raw += 5; skb_pull(skb, 5); - rc = proto->rcvfunc(skb, dev, &snap_packet_type); + rc = proto->rcvfunc(skb, dev, &snap_packet_type, orig_dev); } else { skb->sk = NULL; kfree_skb(skb); @@ -118,7 +118,8 @@ module_exit(snap_exit); struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *)) + struct packet_type *, + struct net_device *)) { struct datalink_proto *proto = NULL; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 508b1fa14546..9ae3a14dd016 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -51,7 +51,7 @@ struct net_device *__find_vlan_dev(struct net_device* real_dev, /* found in vlan_dev.c */ int vlan_dev_rebuild_header(struct sk_buff *skb); int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype); + struct packet_type *ptype, struct net_device *orig_dev); int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, void *daddr, void *saddr, unsigned len); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 49c487413518..145f5cde96cf 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -113,7 +113,7 @@ static inline struct sk_buff *vlan_check_reorder_header(struct sk_buff *skb) * */ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, - struct packet_type* ptype) + struct packet_type* ptype, struct net_device *orig_dev) { unsigned char *rawp = NULL; struct vlan_hdr *vhdr = (struct vlan_hdr *)(skb->data); diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index c34614ea5fce..7076097debc2 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -698,7 +698,7 @@ static void __aarp_resolved(struct aarp_entry **list, struct aarp_entry *a, * frame. We currently only support Ethernet. */ static int aarp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct elapaarp *ea = aarp_hdr(skb); int hash, ret = 0; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 192b529f86a4..ffde33cd09ba 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1390,7 +1390,7 @@ free_it: * [ie ARPHRD_ETHERTALK] */ static int atalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct ddpehdr *ddp; struct sock *sock; @@ -1482,7 +1482,7 @@ freeit: * header and append a long one. */ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { /* Expand any short form frames */ if (skb->mac.raw[2] == 1) { @@ -1528,7 +1528,7 @@ static int ltalk_rcv(struct sk_buff *skb, struct net_device *dev, } skb->h.raw = skb->data; - return atalk_rcv(skb, dev, pt); + return atalk_rcv(skb, dev, pt, orig_dev); freeit: kfree_skb(skb); return 0; diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 3dc808fde33f..124eec8216d7 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -132,7 +132,7 @@ int ax25_rx_iframe(ax25_cb *ax25, struct sk_buff *skb) skb->dev = ax25->ax25_dev->dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, skb->dev, NULL); /* Wrong ptype */ + ip_rcv(skb, skb->dev, NULL, skb->dev); /* Wrong ptype */ return 1; } #endif @@ -258,7 +258,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_IP); - ip_rcv(skb, dev, ptype); /* Note ptype here is the wrong one, fix me later */ + ip_rcv(skb, dev, ptype, dev); /* Note ptype here is the wrong one, fix me later */ break; case AX25_P_ARP: @@ -268,7 +268,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, skb->dev = dev; skb->pkt_type = PACKET_HOST; skb->protocol = htons(ETH_P_ARP); - arp_rcv(skb, dev, ptype); /* Note ptype here is wrong... */ + arp_rcv(skb, dev, ptype, dev); /* Note ptype here is wrong... */ break; #endif case AX25_P_TEXT: @@ -454,7 +454,7 @@ static int ax25_rcv(struct sk_buff *skb, struct net_device *dev, * Receive an AX.25 frame via a SLIP interface. */ int ax25_kiss_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { skb->sk = NULL; /* Initially we don't know who it's for */ skb->destructor = NULL; /* Who initializes this, dammit?! */ diff --git a/net/core/dev.c b/net/core/dev.c index faf59b02c4bf..e1cc162bf295 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1058,7 +1058,7 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) skb2->h.raw = skb2->nh.raw; skb2->pkt_type = PACKET_OUTGOING; - ptype->func(skb2, skb->dev, ptype); + ptype->func(skb2, skb->dev, ptype, skb->dev); } } rcu_read_unlock(); @@ -1425,14 +1425,14 @@ int netif_rx_ni(struct sk_buff *skb) EXPORT_SYMBOL(netif_rx_ni); -static __inline__ void skb_bond(struct sk_buff *skb) +static inline struct net_device *skb_bond(struct sk_buff *skb) { struct net_device *dev = skb->dev; - if (dev->master) { - skb->real_dev = skb->dev; + if (dev->master) skb->dev = dev->master; - } + + return dev; } static void net_tx_action(struct softirq_action *h) @@ -1482,10 +1482,11 @@ static void net_tx_action(struct softirq_action *h) } static __inline__ int deliver_skb(struct sk_buff *skb, - struct packet_type *pt_prev) + struct packet_type *pt_prev, + struct net_device *orig_dev) { atomic_inc(&skb->users); - return pt_prev->func(skb, skb->dev, pt_prev); + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) @@ -1496,7 +1497,8 @@ struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); static __inline__ int handle_bridge(struct sk_buff **pskb, - struct packet_type **pt_prev, int *ret) + struct packet_type **pt_prev, int *ret, + struct net_device *orig_dev) { struct net_bridge_port *port; @@ -1505,14 +1507,14 @@ static __inline__ int handle_bridge(struct sk_buff **pskb, return 0; if (*pt_prev) { - *ret = deliver_skb(*pskb, *pt_prev); + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); *pt_prev = NULL; } return br_handle_frame_hook(port, pskb); } #else -#define handle_bridge(skb, pt_prev, ret) (0) +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) #endif #ifdef CONFIG_NET_CLS_ACT @@ -1559,6 +1561,7 @@ static int ing_filter(struct sk_buff *skb) int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; + struct net_device *orig_dev; int ret = NET_RX_DROP; unsigned short type; @@ -1569,7 +1572,7 @@ int netif_receive_skb(struct sk_buff *skb) if (!skb->stamp.tv_sec) net_timestamp(&skb->stamp); - skb_bond(skb); + orig_dev = skb_bond(skb); __get_cpu_var(netdev_rx_stat).total++; @@ -1590,14 +1593,14 @@ int netif_receive_skb(struct sk_buff *skb) list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } #ifdef CONFIG_NET_CLS_ACT if (pt_prev) { - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; /* noone else should process this after*/ } else { skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); @@ -1616,7 +1619,7 @@ ncls: handle_diverter(skb); - if (handle_bridge(&skb, &pt_prev, &ret)) + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) goto out; type = skb->protocol; @@ -1624,13 +1627,13 @@ ncls: if (ptype->type == type && (!ptype->dev || ptype->dev == skb->dev)) { if (pt_prev) - ret = deliver_skb(skb, pt_prev); + ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = ptype; } } if (pt_prev) { - ret = pt_prev->func(skb, skb->dev, pt_prev); + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { kfree_skb(skb); /* Jamal, now you will not able to escape explaining diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 16df7bd77e78..ef498cb9f786 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -333,7 +333,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->sk = NULL; C(stamp); C(dev); - C(real_dev); C(h); C(nh); C(mac); @@ -397,7 +396,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->sk = NULL; new->dev = old->dev; - new->real_dev = old->real_dev; new->priority = old->priority; new->protocol = old->protocol; new->dst = dst_clone(old->dst); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 0c30409fe9e5..bd49dd97a09c 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2064,7 +2064,7 @@ static struct notifier_block dn_dev_notifier = { .notifier_call = dn_device_event, }; -extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { .type = __constant_htons(ETH_P_DNA_RT), diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 2399fa8a3f86..2c915f305be3 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -572,7 +572,7 @@ static int dn_route_ptp_hello(struct sk_buff *skb) return NET_RX_SUCCESS; } -int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct dn_skb_cb *cb; unsigned char flags = 0; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index b807a314269e..8f0639905558 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1009,7 +1009,7 @@ release: * Receive an Econet frame from a device. */ -static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int econet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ec_framehdr *hdr; struct sock *sk; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index a642fd612853..6eb9c549d643 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -700,7 +700,7 @@ void arp_send(int type, int ptype, u32 dest_ip, static void parp_redo(struct sk_buff *skb) { nf_reset(skb); - arp_rcv(skb, skb->dev, NULL); + arp_rcv(skb, skb->dev, NULL, skb->dev); } /* @@ -927,7 +927,7 @@ out: * Receive an arp request from the device layer. */ -int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *arp; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index c703528e0bcd..d603247bdfe9 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -358,7 +358,7 @@ drop: /* * Main IP Receive routine. */ -int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct iphdr *iph; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d2bf8e1930a3..63e106605f28 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -393,7 +393,7 @@ static int __init ic_defaults(void) #ifdef IPCONFIG_RARP -static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { .type = __constant_htons(ETH_P_RARP), @@ -414,7 +414,7 @@ static inline void ic_rarp_cleanup(void) * Process received RARP packet. */ static int __init -ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct arphdr *rarp; unsigned char *rarp_ptr; @@ -555,7 +555,7 @@ struct bootp_pkt { /* BOOTP packet format */ #define DHCPRELEASE 7 #define DHCPINFORM 8 -static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt); +static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { .type = __constant_htons(ETH_P_IP), @@ -823,7 +823,7 @@ static void __init ic_do_bootp_ext(u8 *ext) /* * Receive BOOTP reply. */ -static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct bootp_pkt *b; struct iphdr *h; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 10fbb50daea4..ab51c0369e15 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -56,7 +56,7 @@ static inline int ip6_rcv_finish( struct sk_buff *skb) return dst_input(skb); } -int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct ipv6hdr *hdr; u32 pkt_len; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5a27e5df5886..3a13c5d1d4d2 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1627,7 +1627,7 @@ out: return rc; } -static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { /* NULL here for pt means the packet was looped back */ struct ipx_interface *intrfc; diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index eb65b4925b51..3e9a06abbdd0 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -1303,7 +1303,7 @@ static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, * Jean II */ int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct irlap_info info; struct irlap_cb *self; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 6ffaed4544e9..634901dd156f 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -54,7 +54,7 @@ extern int irsock_init(void); extern void irsock_cleanup(void); /* irlap_frame.c */ extern int irlap_driver_rcv(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); /* * Module parameters diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 5ff02c080a0b..9727455bf0e7 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -103,7 +103,8 @@ out: struct llc_sap *llc_sap_open(unsigned char lsap, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)) + struct packet_type *pt, + struct net_device *orig_dev)) { struct llc_sap *sap = llc_sap_find(lsap); diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index 4da6976efc9c..13b46240b7a1 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -132,7 +132,7 @@ static inline int llc_fixup_skb(struct sk_buff *skb) * data now), it queues this frame in the connection's backlog. */ int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt) + struct packet_type *pt, struct net_device *orig_dev) { struct llc_sap *sap; struct llc_pdu_sn *pdu; @@ -165,7 +165,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, * LLC functionality */ if (sap->rcv_func) { - sap->rcv_func(skb, dev, pt); + sap->rcv_func(skb, dev, pt, orig_dev); goto out; } dest = llc_pdu_type(skb); diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 220bf7494f71..83eb41d9b937 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -64,7 +64,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) skb->nh.raw = skb->data; skb->pkt_type = PACKET_HOST; - ip_rcv(skb, skb->dev, NULL); + ip_rcv(skb, skb->dev, NULL, skb->dev); return 1; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index c9d5980aa4de..deb5f6f7f858 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -241,7 +241,7 @@ static struct proto_ops packet_ops; #ifdef CONFIG_SOCK_PACKET static struct proto_ops packet_ops_spkt; -static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_pkt *spkt; @@ -441,7 +441,7 @@ static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned we will not harm anyone. */ -static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct sockaddr_ll *sll; @@ -546,7 +546,7 @@ drop: } #ifdef CONFIG_PACKET_MMAP -static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt) +static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; struct packet_sock *po; diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 36fc3bf6d882..adfe7b8df355 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -81,7 +81,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) } int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype) + struct packet_type *ptype, struct net_device *orig_dev) { struct sk_buff *nskb; struct x25_neigh *nb; -- cgit v1.2.3 From 089af26c706d1473f641c909fee7c878d29c1f1a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:37:23 -0700 Subject: [NETFILTER]: Rename skb_ip_make_writable() to skb_make_writable() There is nothing IPv4-specific in it. In fact, it was already used by IPv6, too... Upcoming nfnetlink_queue code will use it for any kind of packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 +++++ include/linux/netfilter_ipv4.h | 5 ----- net/core/netfilter.c | 6 +++--- net/ipv4/netfilter/ip_nat_core.c | 4 ++-- net/ipv4/netfilter/ip_nat_helper.c | 8 ++++---- net/ipv4/netfilter/ip_nat_proto_icmp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_tcp.c | 2 +- net/ipv4/netfilter/ip_nat_proto_udp.c | 2 +- net/ipv4/netfilter/ip_nat_snmp_basic.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_DSCP.c | 2 +- net/ipv4/netfilter/ipt_ECN.c | 4 ++-- net/ipv4/netfilter/ipt_TCPMSS.c | 2 +- net/ipv4/netfilter/ipt_TOS.c | 2 +- net/ipv6/netfilter/ip6_queue.c | 2 +- 15 files changed, 25 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ec60856408fd..54b97a1baba5 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -193,6 +193,11 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +/* Call this before modifying an existing packet: ensures it is + modifiable and linear to the point you care about (writable_len). + Returns true or false. */ +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 552815b8193e..fdc4a9527343 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -80,11 +80,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); -/* Call this before modifying an existing IP packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -extern int skb_ip_make_writable(struct sk_buff **pskb, - unsigned int writable_len); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 076c156d5eda..bbf9081a6804 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -512,8 +512,9 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); +#endif /*CONFIG_INET*/ -int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { struct sk_buff *nskb; @@ -540,8 +541,7 @@ copy_skb: *pskb = nskb; return 1; } -EXPORT_SYMBOL(skb_ip_make_writable); -#endif /*CONFIG_INET*/ +EXPORT_SYMBOL(skb_make_writable); /* Internal logging interface, which relies on the real LOG target modules */ diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 567c802fecf0..1adedb743f60 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -359,7 +359,7 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct ip_nat_protocol *p; - if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; @@ -431,7 +431,7 @@ int icmp_reply_translation(struct sk_buff **pskb, struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; - if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 158f34f32c04..d2dd5d313556 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -168,7 +168,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, struct tcphdr *tcph; int datalen; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -228,7 +228,7 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, match_offset + match_len) return 0; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len @@ -315,7 +315,7 @@ ip_nat_sack_adjust(struct sk_buff **pskb, optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - if (!skb_ip_make_writable(pskb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); @@ -363,7 +363,7 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 7ed2fdb53457..938719043999 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -62,7 +62,7 @@ icmp_manip_pkt(struct sk_buff **pskb, struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index 6113a16af867..1d381bf68574 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -103,7 +103,7 @@ tcp_manip_pkt(struct sk_buff **pskb, if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) + if (!skb_make_writable(pskb, hdroff + hdrsize)) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 689478e637a7..c4906e1aa24a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -94,7 +94,7 @@ udp_manip_pkt(struct sk_buff **pskb, u32 oldip, newip; u16 *portptr, newport; - if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; iph = (struct iphdr *)((*pskb)->data + iphdroff); diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index 2a48b6e635ae..93b2c5111bb2 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -1275,7 +1275,7 @@ static int help(struct sk_buff **pskb, return NF_DROP; } - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; spin_lock_bh(&snmp_lock); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index bc0af8d8e910..ae975ac59c6a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -388,7 +388,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c index 975476fef27a..6e319570a28c 100644 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ b/net/ipv4/netfilter/ipt_DSCP.c @@ -39,7 +39,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index f63a9bc0e4d2..a1319693f648 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -31,7 +31,7 @@ set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; @@ -66,7 +66,7 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) tcph->cwr == einfo->proto.tcp.cwr))) return 1; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) + if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 949288319ca8..8db70d6908c3 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -58,7 +58,7 @@ ipt_tcpmss_target(struct sk_buff **pskb, unsigned int i; u_int8_t *opt; - if (!skb_ip_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; if ((*pskb)->ip_summed == CHECKSUM_HW && diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c index 49abb7eef0a4..deadb36d4428 100644 --- a/net/ipv4/netfilter/ipt_TOS.c +++ b/net/ipv4/netfilter/ipt_TOS.c @@ -33,7 +33,7 @@ target(struct sk_buff **pskb, if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { u_int16_t diffs[2]; - if (!skb_ip_make_writable(pskb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 83ccedceed17..7130603a32c5 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -384,7 +384,7 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) } skb_put(e->skb, diff); } - if (!skb_ip_make_writable(&e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; -- cgit v1.2.3 From 4fdb3bb723db469717c6d38fda667d8b0fa86ebd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:40:55 -0700 Subject: [NETLINK]: Add properly module refcounting for kernel netlink sockets. - Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 4 +- include/linux/net.h | 3 ++ include/linux/netlink.h | 2 +- kernel/audit.c | 3 +- lib/kobject_uevent.c | 3 +- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/rtnetlink.c | 2 +- net/decnet/netfilter/dn_rtmsg.c | 4 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/netfilter/ip_queue.c | 3 +- net/ipv4/netfilter/ipt_ULOG.c | 3 +- net/ipv4/tcp_diag.c | 3 +- net/ipv6/netfilter/ip6_queue.c | 2 +- net/netfilter/nfnetlink.c | 5 +- net/netlink/af_netlink.c | 108 ++++++++++++++++++++++++++++++++-------- net/xfrm/xfrm_user.c | 4 +- security/selinux/netlink.c | 2 +- 17 files changed, 119 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index b5a5e04b6d37..8809788dac26 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 23; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL); + dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); @@ -225,3 +225,5 @@ void w1_remove_master_device(struct w1_bus_master *bm) EXPORT_SYMBOL(w1_add_master_device); EXPORT_SYMBOL(w1_remove_master_device); + +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_W1); diff --git a/include/linux/net.h b/include/linux/net.h index 20cb226b2268..39906619b9d7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6552b71bfa73..1c50fea8995b 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -117,7 +117,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ef35166fdc29..ed4019563d56 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,8 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive); + audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 8e49d21057e4..88f4d746aa05 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -153,7 +153,8 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL); + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + THIS_MODULE); if (!uevent_sock) { printk(KERN_ERR diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 01af4fcef26d..561d75c8ed5a 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,7 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 4b1bb30e6381..9b3c61f1a37d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,7 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv); + rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 284a9998e53d..3068fddb2da3 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,7 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, + THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; @@ -162,6 +163,7 @@ static void __exit fini(void) MODULE_DESCRIPTION("DECnet Routing Message Grabulator"); MODULE_AUTHOR("Steven Whitehouse "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_DNRTMSG); module_init(init); module_exit(fini); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index e5722084239b..b5e2f1550c91 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -567,7 +567,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input); + netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index ae975ac59c6a..b237f7fcad92 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -692,7 +692,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 52a0076302a7..4ea8371ab270 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -62,6 +62,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ @@ -372,7 +373,7 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL); + nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f66945cb158f..f79bd11a4701 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -774,7 +774,8 @@ static void tcpdiag_rcv(struct sock *sk, int len) static int __init tcpdiag_init(void) { - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, + THIS_MODULE); if (tcpnl == NULL) return -ENOMEM; return 0; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7130603a32c5..1c3d247a22cc 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -685,7 +685,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b0ed57981847..6210ca42166c 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -38,6 +38,8 @@ #include MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; @@ -324,7 +326,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, + THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ff774a06c89d..5d487cd69c8c 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -13,7 +13,12 @@ * added netlink_proto_exit * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo * use nlk_sk, as sk->protinfo is on a diet 8) - * + * Fri Jul 22 19:51:12 MEST 2005 Harald Welte + * - inc module use count of module that owns + * the kernel socket in case userspace opens + * socket of same protocol + * - remove all module support, since netlink is + * mandatory if CONFIG_NET=y these days */ #include @@ -92,6 +97,7 @@ struct netlink_table { struct nl_pid_hash hash; struct hlist_head mc_list; unsigned int nl_nonroot; + struct proto_ops *p_ops; }; static struct netlink_table *nl_table; @@ -341,7 +347,21 @@ static int netlink_create(struct socket *sock, int protocol) if (protocol<0 || protocol >= MAX_LINKS) return -EPROTONOSUPPORT; - sock->ops = &netlink_ops; + netlink_table_grab(); + if (!nl_table[protocol].hash.entries) { +#ifdef CONFIG_KMOD + /* We do 'best effort'. If we find a matching module, + * it is loaded. If not, we don't return an error to + * allow pure userspace<->userspace communication. -HW + */ + netlink_table_ungrab(); + request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); + netlink_table_grab(); +#endif + } + netlink_table_ungrab(); + + sock->ops = nl_table[protocol].p_ops; sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1); if (!sk) @@ -394,6 +414,22 @@ static int netlink_release(struct socket *sock) }; notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); } + + /* When this is a kernel socket, we need to remove the owner pointer, + * since we don't know whether the module will be dying at any given + * point - HW + */ + if (!nlk->pid) { + struct proto_ops *p_tmp; + + netlink_table_grab(); + p_tmp = nl_table[sk->sk_protocol].p_ops; + if (p_tmp != &netlink_ops) { + nl_table[sk->sk_protocol].p_ops = &netlink_ops; + kfree(p_tmp); + } + netlink_table_ungrab(); + } sock_put(sk); return 0; @@ -1023,8 +1059,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) +netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) { + struct proto_ops *p_ops; struct socket *sock; struct sock *sk; @@ -1034,22 +1071,63 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)) if (unit<0 || unit>=MAX_LINKS) return NULL; + /* Do a quick check, to make us not go down to netlink_insert() + * if protocol already has kernel socket. + */ + sk = netlink_lookup(unit, 0); + if (unlikely(sk)) { + sock_put(sk); + return NULL; + } + if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock)) return NULL; + sk = NULL; + if (module) { + /* Every registering protocol implemented in a module needs + * it's own p_ops, since the socket code cannot deal with + * module refcounting otherwise. -HW + */ + p_ops = kmalloc(sizeof(*p_ops), GFP_KERNEL); + if (!p_ops) + goto out_sock_release; + + memcpy(p_ops, &netlink_ops, sizeof(*p_ops)); + p_ops->owner = module; + } else + p_ops = &netlink_ops; + + netlink_table_grab(); + nl_table[unit].p_ops = p_ops; + netlink_table_ungrab(); + if (netlink_create(sock, unit) < 0) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; } + sk = sock->sk; sk->sk_data_ready = netlink_data_ready; if (input) nlk_sk(sk)->data_ready = input; if (netlink_insert(sk, 0)) { - sock_release(sock); - return NULL; + sk = NULL; + goto out_kfree_p_ops; + } + + return sk; + +out_kfree_p_ops: + netlink_table_grab(); + if (nl_table[unit].p_ops != &netlink_ops) { + kfree(nl_table[unit].p_ops); + nl_table[unit].p_ops = &netlink_ops; } + netlink_table_ungrab(); +out_sock_release: + sock_release(sock); return sk; } @@ -1413,6 +1491,8 @@ enomem: for (i = 0; i < MAX_LINKS; i++) { struct nl_pid_hash *hash = &nl_table[i].hash; + nl_table[i].p_ops = &netlink_ops; + hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) @@ -1438,21 +1518,7 @@ out: return err; } -static void __exit netlink_proto_exit(void) -{ - sock_unregister(PF_NETLINK); - proc_net_remove("netlink"); - kfree(nl_table); - nl_table = NULL; - proto_unregister(&netlink_proto); -} - core_initcall(netlink_proto_init); -module_exit(netlink_proto_exit); - -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_NETPROTO(PF_NETLINK); EXPORT_SYMBOL(netlink_ack); EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 8da3e25b2c4c..33ceeea783b1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1519,7 +1519,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, + THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; @@ -1537,3 +1538,4 @@ static void __exit xfrm_user_exit(void) module_init(xfrm_user_init); module_exit(xfrm_user_exit); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_XFRM); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 18d08acafa78..341dbe2579be 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -103,7 +103,7 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL); + selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); -- cgit v1.2.3 From 2cc7d5730957c4a3f3659d17d2ba5e06d5581c1f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:42:34 -0700 Subject: [NETFILTER]: Move reroute-after-queue code up to the nf_queue layer. The rerouting functionality is required by the core, therefore it has to be implemented by the core and not in individual queue handlers. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 +++++++ include/linux/netfilter_ipv6.h | 3 ++ net/core/netfilter.c | 66 ++++++++++++++++++++++++++++++++++-------- net/ipv4/netfilter.c | 64 ++++++++++++++++++++++++++++++++++++++-- net/ipv4/netfilter/ip_queue.c | 27 ----------------- net/ipv6/af_inet6.c | 7 +++++ net/ipv6/netfilter.c | 62 +++++++++++++++++++++++++++++++++++++++ net/ipv6/netfilter/ip6_queue.c | 24 --------------- 8 files changed, 199 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 54b97a1baba5..d163e20ca8d9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -198,6 +198,17 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +struct nf_queue_rerouter { + void (*save)(const struct sk_buff *skb, struct nf_info *info); + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); + int rer_size; +}; + +#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) + +extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); +extern int nf_unregister_queue_rerouter(int pf); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 20c069a5e4ac..5d204ee7a312 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,4 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 9849357f6129..1ed4f3110421 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -53,6 +53,9 @@ static struct nf_queue_handler_t { nf_queue_outfn_t outfn; void *data; } queue_handler[NPROTO]; + +static struct nf_queue_rerouter *queue_rerouter; + static DEFINE_RWLOCK(queue_handler_lock); int nf_register_hook(struct nf_hook_ops *reg) @@ -260,11 +263,34 @@ int nf_unregister_queue_handler(int pf) return 0; } +int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memcpy(&queue_rerouter[pf], rer, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + + return 0; +} + +int nf_unregister_queue_rerouter(int pf) +{ + if (pf >= NPROTO) + return -EINVAL; + + write_lock_bh(&queue_handler_lock); + memset(&queue_rerouter[pf], 0, sizeof(queue_rerouter[pf])); + write_unlock_bh(&queue_handler_lock); + return 0; +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int nf_queue(struct sk_buff *skb, +static int nf_queue(struct sk_buff **skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, @@ -282,17 +308,17 @@ static int nf_queue(struct sk_buff *skb, read_lock(&queue_handler_lock); if (!queue_handler[pf].outfn) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } - info = kmalloc(sizeof(*info), GFP_ATOMIC); + info = kmalloc(sizeof(*info)+queue_rerouter[pf].rer_size, GFP_ATOMIC); if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - skb); + *skb); read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -311,15 +337,21 @@ static int nf_queue(struct sk_buff *skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = (*skb)->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif + if (queue_rerouter[pf].save) + queue_rerouter[pf].save(*skb, info); + + status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + + if (status >= 0 && queue_rerouter[pf].reroute) + status = queue_rerouter[pf].reroute(skb, info); - status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); read_unlock(&queue_handler_lock); if (status < 0) { @@ -332,9 +364,11 @@ static int nf_queue(struct sk_buff *skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(skb); + kfree_skb(*skb); + return 1; } + return 1; } @@ -365,7 +399,7 @@ next_hook: ret = -EPERM; } else if (verdict == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) goto next_hook; } unlock: @@ -428,7 +462,7 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, break; case NF_QUEUE: - if (!nf_queue(skb, elem, info->pf, info->hook, + if (!nf_queue(&skb, elem, info->pf, info->hook, info->indev, info->outdev, info->okfn)) goto next_hook; break; @@ -555,6 +589,12 @@ void __init netfilter_init(void) { int i, h; + queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), + GFP_KERNEL); + if (!queue_rerouter) + panic("netfilter: cannot allocate queue rerouter array\n"); + memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); + for (i = 0; i < NPROTO; i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); @@ -573,4 +613,6 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); +EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index 6594d1c9697e..ae0779d82c5d 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,10 +1,11 @@ -#include +/* IPv4 specific functions of netfilter core */ +#include #ifdef CONFIG_NETFILTER -/* IPv4 specific functions of netfilter core */ #include #include +#include #include #include @@ -76,4 +77,63 @@ int ip_route_me_harder(struct sk_buff **pskb) return 0; } EXPORT_SYMBOL(ip_route_me_harder); + +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip_rt_info { + u_int32_t daddr; + u_int32_t saddr; + u_int8_t tos; +}; + +static void queue_save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + const struct iphdr *iph = skb->nh.iph; + + rt_info->tos = iph->tos; + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int queue_reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + const struct ip_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP_LOCAL_OUT) { + struct iphdr *iph = (*pskb)->nh.iph; + + if (!(iph->tos == rt_info->tos + && iph->daddr == rt_info->daddr + && iph->saddr == rt_info->saddr)) + return ip_route_me_harder(pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip_reroute = { + .rer_size = sizeof(struct ip_rt_info), + .save = queue_save, + .reroute = queue_reroute, +}; + +static int init(void) +{ + return nf_register_queue_rerouter(PF_INET, &ip_reroute); +} + +static void fini(void) +{ + nf_unregister_queue_rerouter(PF_INET); +} + +module_init(init); +module_exit(fini); + #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index b237f7fcad92..78892980f42c 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -43,17 +43,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip_queue_maxlen" -struct ipq_rt_info { - __u8 tos; - __u32 daddr; - __u32 saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -305,14 +298,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = skb->nh.iph; - - entry->rt_info.tos = iph->tos; - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -393,18 +378,6 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct iphdr *iph = e->skb->nh.iph; - - if (!(iph->tos == e->rt_info.tos - && iph->daddr == e->rt_info.daddr - && iph->saddr == e->rt_info.saddr)) - return ip_route_me_harder(&e->skb); - } return 0; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 28d9bcab0970..574047353628 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -757,6 +758,9 @@ static int __init inet6_init(void) err = igmp6_init(&inet6_family_ops); if (err) goto igmp_fail; + err = ipv6_netfilter_init(); + if (err) + goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -813,6 +817,8 @@ proc_tcp6_fail: raw6_proc_exit(); proc_raw6_fail: #endif + ipv6_netfilter_fini(); +netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); @@ -852,6 +858,7 @@ static void __exit inet6_exit(void) ip6_route_cleanup(); ipv6_packet_cleanup(); igmp6_cleanup(); + ipv6_netfilter_fini(); ndisc_cleanup(); icmpv6_cleanup(); #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 5656d0959aba..c8daef97cf56 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -5,6 +5,8 @@ #include #include +#include +#include #include #include #include @@ -40,4 +42,64 @@ int ip6_route_me_harder(struct sk_buff *skb) } EXPORT_SYMBOL(ip6_route_me_harder); +/* + * Extra routing may needed on local out, as the QUEUE target never + * returns control to the table. + */ + +struct ip6_rt_info { + struct in6_addr daddr; + struct in6_addr saddr; +}; + +static void save(const struct sk_buff *skb, struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = skb->nh.ipv6h; + + rt_info->daddr = iph->daddr; + rt_info->saddr = iph->saddr; + } +} + +static int reroute(struct sk_buff **pskb, const struct nf_info *info) +{ + struct ip6_rt_info *rt_info = nf_info_reroute(info); + + if (info->hook == NF_IP6_LOCAL_OUT) { + struct ipv6hdr *iph = (*pskb)->nh.ipv6h; + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) + return ip6_route_me_harder(*pskb); + } + return 0; +} + +static struct nf_queue_rerouter ip6_reroute = { + .rer_size = sizeof(struct ip6_rt_info), + .save = &save, + .reroute = &reroute, +}; + +int __init ipv6_netfilter_init(void) +{ + return nf_register_queue_rerouter(PF_INET6, &ip6_reroute); +} + +void ipv6_netfilter_fini(void) +{ + nf_unregister_queue_rerouter(PF_INET6); +} + +#else /* CONFIG_NETFILTER */ +int __init ipv6_netfilter_init(void) +{ + return 0; +} + +void ipv6_netfilter_fini(void) +{ +} #endif /* CONFIG_NETFILTER */ diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 1c3d247a22cc..c45d8f8815de 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -47,16 +47,10 @@ #define NET_IPQ_QMAX 2088 #define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" -struct ipq_rt_info { - struct in6_addr daddr; - struct in6_addr saddr; -}; - struct ipq_queue_entry { struct list_head list; struct nf_info *info; struct sk_buff *skb; - struct ipq_rt_info rt_info; }; typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); @@ -302,13 +296,6 @@ ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) entry->info = info; entry->skb = skb; - if (entry->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = skb->nh.ipv6h; - - entry->rt_info.daddr = iph->daddr; - entry->rt_info.saddr = iph->saddr; - } - nskb = ipq_build_packet_message(entry, &status); if (nskb == NULL) goto err_out_free; @@ -389,17 +376,6 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) memcpy(e->skb->data, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; - /* - * Extra routing may needed on local out, as the QUEUE target never - * returns control to the table. - * Not a nice way to cmp, but works - */ - if (e->info->hook == NF_IP_LOCAL_OUT) { - struct ipv6hdr *iph = e->skb->nh.ipv6h; - if (!ipv6_addr_equal(&iph->daddr, &e->rt_info.daddr) || - !ipv6_addr_equal(&iph->saddr, &e->rt_info.saddr)) - return ip6_route_me_harder(e->skb); - } return 0; } -- cgit v1.2.3 From 0ab43f84995f2c2fcc5cc58a9accaa1095e1317f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:43:44 -0700 Subject: [NETFILTER]: Core changes required by upcoming nfnetlink_queue code - split netfiler verdict in 16bit verdict and 16bit queue number - add 'queuenum' argument to nf_queue_outfn_t and its users ip[6]_queue - move NFNL_SUBSYS_ definitions from enum to #define - introduce autoloading for nfnetlink subsystem modules - add MODULE_ALIAS_NFNL_SUBSYS macro - add nf_unregister_queue_handlers() to register all handlers for a given nf_queue_outfn_t - add more verbose DEBUGP macro definition to nfnetlink.c - make nfnetlink_subsys_register fail if subsys already exists - add some more comments and debug statements to nfnetlink.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 ++++++++++++- include/linux/netfilter/nfnetlink.h | 20 ++++++++++--------- net/core/netfilter.c | 40 ++++++++++++++++++++++++++++++------- net/ipv4/netfilter/ip_queue.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 28 ++++++++++++++++++++------ 6 files changed, 83 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d163e20ca8d9..711e05f33d68 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,6 +21,16 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* we overload the higher bits for encoding auxiliary data such as the queue + * number. Not nice, but better than additional function arguments. */ +#define NF_VERDICT_MASK 0x0000ffff +#define NF_VERDICT_BITS 16 + +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. @@ -179,10 +189,12 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, /* Packet queuing */ typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, void *data); + struct nf_info *info, + unsigned int queuenum, void *data); extern int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data); extern int nf_unregister_queue_handler(int pf); +extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ace7a7be0742..561f9df28808 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -69,15 +69,14 @@ struct nfgenmsg { #define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) #define NFNL_MSG_TYPE(x) (x & 0x00ff) -enum nfnl_subsys_id { - NFNL_SUBSYS_NONE = 0, - NFNL_SUBSYS_CTNETLINK, - NFNL_SUBSYS_CTNETLINK_EXP, - NFNL_SUBSYS_IPTNETLINK, - NFNL_SUBSYS_QUEUE, - NFNL_SUBSYS_ULOG, - NFNL_SUBSYS_COUNT, -}; +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_COUNT 5 #ifdef __KERNEL__ @@ -142,5 +141,8 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ + MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) + #endif /* __KERNEL__ */ #endif /* _NFNETLINK_H */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 1ed4f3110421..3e38084ac2bd 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -221,7 +221,8 @@ static unsigned int nf_iterate(struct list_head *head, verdict = elem->hook(hook, skb, indev, outdev, okfn); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG - if (unlikely(verdict > NF_MAX_VERDICT)) { + if (unlikely((verdict & NF_VERDICT_MASK) + > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", elem->hook, hook); continue; @@ -239,6 +240,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) { int ret; + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); if (queue_handler[pf].outfn) ret = -EBUSY; @@ -255,6 +259,9 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) /* The caller must flush their queue before this */ int nf_unregister_queue_handler(int pf) { + if (pf >= NPROTO) + return -EINVAL; + write_lock_bh(&queue_handler_lock); queue_handler[pf].outfn = NULL; queue_handler[pf].data = NULL; @@ -286,6 +293,20 @@ int nf_unregister_queue_rerouter(int pf) return 0; } +void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +{ + int pf; + + write_lock_bh(&queue_handler_lock); + for (pf = 0; pf < NPROTO; pf++) { + if (queue_handler[pf].outfn == outfn) { + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + } + } + write_unlock_bh(&queue_handler_lock); +} + /* * Any packet that leaves via this function must come back * through nf_reinject(). @@ -295,7 +316,8 @@ static int nf_queue(struct sk_buff **skb, int pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sk_buff *)) + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -347,7 +369,8 @@ static int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queue_handler[pf].data); + status = queue_handler[pf].outfn(*skb, info, queuenum, + queue_handler[pf].data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -397,9 +420,10 @@ next_hook: } else if (verdict == NF_DROP) { kfree_skb(*pskb); ret = -EPERM; - } else if (verdict == NF_QUEUE) { + } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn)) + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; } unlock: @@ -456,14 +480,15 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, info->okfn, INT_MIN); } - switch (verdict) { + switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: info->okfn(skb); break; case NF_QUEUE: if (!nf_queue(&skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn)) + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; } @@ -613,6 +638,7 @@ EXPORT_SYMBOL(nf_reinject); EXPORT_SYMBOL(nf_setsockopt); EXPORT_SYMBOL(nf_unregister_hook); EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); EXPORT_SYMBOL_GPL(nf_register_queue_rerouter); EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); EXPORT_SYMBOL(nf_unregister_sockopt); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 78892980f42c..cfc886f382ac 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -280,7 +280,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index c45d8f8815de..5af4cee93d9b 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -278,7 +278,8 @@ nlmsg_failure: } static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) +ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) { int status = -EINVAL; struct sk_buff *nskb; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 6210ca42166c..30b25f47f7cc 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,7 +44,9 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER); static char __initdata nfversion[] = "0.30"; #if 0 -#define DEBUGP printk +#define DEBUGP(format, args...) \ + printk(KERN_DEBUG "%s(%d):%s(): " format, __FILE__, \ + __LINE__, __FUNCTION__, ## args) #else #define DEBUGP(format, args...) #endif @@ -67,11 +69,11 @@ int nfnetlink_subsys_register(struct nfnetlink_subsystem *n) { DEBUGP("registering subsystem ID %u\n", n->subsys_id); - /* If the netlink socket wasn't created, then fail */ - if (!nfnl) - return -1; - nfnl_lock(); + if (subsys_table[n->subsys_id]) { + nfnl_unlock(); + return -EBUSY; + } subsys_table[n->subsys_id] = n; nfnl_unlock(); @@ -227,8 +229,18 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, type = nlh->nlmsg_type; ss = nfnetlink_get_subsys(type); - if (!ss) + if (!ss) { +#ifdef CONFIG_KMOD + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); + if (!ss) +#endif goto err_inval; + } nc = nfnetlink_find_client(type, ss); if (!nc) { @@ -252,12 +264,14 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, if (err < 0) goto err_inval; + DEBUGP("calling handler\n"); err = nc->call(nfnl, skb, nlh, cda, errp); *errp = err; return err; } err_inval: + DEBUGP("returning -EINVAL\n"); *errp = -EINVAL; return -1; } @@ -311,6 +325,8 @@ static void nfnetlink_rcv(struct sock *sk, int len) kfree_skb(skb); } + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ up(&nfnl_sem); } while(nfnl && nfnl->sk_receive_queue.qlen); } -- cgit v1.2.3 From 7af4cc3fa158ff1dda6e7451c7e6afa6b0bb85cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:44:15 -0700 Subject: [NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink - Add new nfnetlink_queue module - Add new ipt_NFQUEUE and ip6t_NFQUEUE modules to access queue numbers 1-65535 - Mark ip_queue and ip6_queue Kconfig options as OBSOLETE - Update feature-removal-schedule to remove ip[6]_queue in December Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- Documentation/feature-removal-schedule.txt | 12 + include/linux/netfilter/nfnetlink_queue.h | 85 +++ include/linux/netfilter_ipv4/ipt_NFQUEUE.h | 16 + net/ipv4/netfilter/Kconfig | 6 +- net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_NFQUEUE.c | 70 +++ net/ipv6/netfilter/Kconfig | 11 +- net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NFQUEUE.c | 70 +++ net/netfilter/Kconfig | 8 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink_queue.c | 877 +++++++++++++++++++++++++++++ 12 files changed, 1153 insertions(+), 5 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_queue.h create mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h create mode 100644 net/ipv4/netfilter/ipt_NFQUEUE.c create mode 100644 net/ipv6/netfilter/ip6t_NFQUEUE.c create mode 100644 net/netfilter/nfnetlink_queue.c (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8b1430b46655..0665cb12bd66 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -135,3 +135,15 @@ Why: With the 16-bit PCMCIA subsystem now behaving (almost) like a pcmciautils package available at http://kernel.org/pub/linux/utils/kernel/pcmcia/ Who: Dominik Brodowski + +--------------------------- + +What: ip_queue and ip6_queue (old ipv4-only and ipv6-only netfilter queue) +When: December 2005 +Why: This interface has been obsoleted by the new layer3-independent + "nfnetlink_queue". The Kernel interface is compatible, so the old + ip[6]tables "QUEUE" targets still work and will transparently handle + all packets into nfnetlink queue number 0. Userspace users will have + to link against API-compatible library on top of libnfnetlink_queue + instead of the current 'libipq'. +Who: Harald Welte diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 index 000000000000..edb463a87eb4 --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_QUEUE_H +#define _NFNETLINK_QUEUE_H + +#include + +enum nfqnl_msg_types { + NFQNL_MSG_PACKET, /* packet from kernel to userspace */ + NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ + NFQNL_MSG_CONFIG, /* connect to a particular queue */ + + NFQNL_MSG_MAX +}; + +struct nfqnl_msg_packet_hdr { + u_int32_t packet_id; /* unique ID of packet in queue */ + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +enum nfqnl_attr_type { + NFQA_UNSPEC, + NFQA_PACKET_HDR, + NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ + NFQA_MARK, /* u_int32_t nfmark */ + NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ + NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_HWADDR, /* nfqnl_msg_packet_hw */ + NFQA_PAYLOAD, /* opaque data payload */ + + __NFQA_MAX +}; +#define NFQA_MAX (__NFQA_MAX - 1) + +struct nfqnl_msg_verdict_hdr { + u_int32_t verdict; + u_int32_t id; +} __attribute__ ((packed)); + + +enum nfqnl_msg_config_cmds { + NFQNL_CFG_CMD_NONE, + NFQNL_CFG_CMD_BIND, + NFQNL_CFG_CMD_UNBIND, + NFQNL_CFG_CMD_PF_BIND, + NFQNL_CFG_CMD_PF_UNBIND, +}; + +struct nfqnl_msg_config_cmd { + u_int8_t command; /* nfqnl_msg_config_cmds */ + u_int8_t _pad; + u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ +} __attribute__ ((packed)); + +enum nfqnl_config_mode { + NFQNL_COPY_NONE, + NFQNL_COPY_META, + NFQNL_COPY_PACKET, +}; + +struct nfqnl_msg_config_params { + u_int32_t copy_range; + u_int8_t copy_mode; /* enum nfqnl_config_mode */ +} __attribute__ ((packed)); + + +enum nfqnl_attr_config { + NFQA_CFG_UNSPEC, + NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ + NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ + __NFQA_CFG_MAX +}; + +#endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 index 000000000000..b5b2943b0c66 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h @@ -0,0 +1,16 @@ +/* iptables module for using NFQUEUE mechanism + * + * (C) 2005 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * +*/ +#ifndef _IPT_NFQ_TARGET_H +#define _IPT_NFQ_TARGET_H + +/* target info */ +struct ipt_NFQ_info { + u_int16_t queuenum; +}; + +#endif /* _IPT_DSCP_TARGET_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e47ba39eb657..2fa26a41fa47 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -110,11 +110,15 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. config IP_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help Netfilter has the ability to queue packets to user space: the netlink device can be used to access them using this driver. + This option enables the old IPv4-only "ip_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). + To compile it as a module, choose M here. If unsure, say N. config IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index abf2a7d1a584..c2ae663b723f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -91,3 +91,4 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ipt_NFQUEUE.o diff --git a/net/ipv4/netfilter/ipt_NFQUEUE.c b/net/ipv4/netfilter/ipt_NFQUEUE.c new file mode 100644 index 000000000000..3cedc9be8807 --- /dev/null +++ b/net/ipv4/netfilter/ipt_NFQUEUE.c @@ -0,0 +1,70 @@ +/* iptables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 77ec704c9ee3..cd1551983c63 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -10,13 +10,16 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" # dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK #fi config IP6_NF_QUEUE - tristate "Userspace queueing via NETLINK" + tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- This option adds a queue handler to the kernel for IPv6 - packets which lets us to receive the filtered packets - with QUEUE target using libiptc as we can do with - the IPv4 now. + packets which enables users to receive the filtered packets + with QUEUE target using libipq. + + THis option enables the old IPv6-only "ip6_queue" implementation + which has been obsoleted by the new "nfnetlink_queue" code (see + CONFIG_NETFILTER_NETLINK_QUEUE). (C) Fernando Anton 2001 IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2e51714953b6..847651dbcd2a 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,3 +24,4 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_NFQUEUE.c b/net/ipv6/netfilter/ip6t_NFQUEUE.c new file mode 100644 index 000000000000..c6e3730e7409 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NFQUEUE.c @@ -0,0 +1,70 @@ +/* ip6tables module for using new netfilter netlink queue + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("ip6tables NFQUEUE target"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_NFQ_info *tinfo = targinfo; + + return NF_QUEUE_NR(tinfo->queuenum); +} + +static int +checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_NFQ_info))) { + printk(KERN_WARNING "NFQUEUE: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ipt_NFQ_info))); + return 0; + } + + return 1; +} + +static struct ip6t_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_target(&ipt_NFQ_reg); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ipt_NFQ_reg); +} + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3629d3d1776d..f0eb23e5c5f1 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -3,3 +3,11 @@ config NETFILTER_NETLINK help If this option is enabled, the kernel will include support for the new netfilter netlink interface. + +config NETFILTER_NETLINK_QUEUE + tristate "Netfilter NFQUEUE over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option isenabled, the kernel will include support + for queueing packets via NFNETLINK. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 02e67d371941..14a0b187e75e 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c new file mode 100644 index 000000000000..24032610c425 --- /dev/null +++ b/net/netfilter/nfnetlink_queue.c @@ -0,0 +1,877 @@ +/* + * This is a module which is used for queueing packets and communicating with + * userspace via nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ip_queue.c: + * (C) 2000-2002 James Morris + * (C) 2003-2005 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFQNL_QMAX_DEFAULT 1024 + +#if 0 +#define QDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define QDEBUG(x, ...) +#endif + +struct nfqnl_queue_entry { + struct list_head list; + struct nf_info *info; + struct sk_buff *skb; + unsigned int id; +}; + +struct nfqnl_instance { + struct hlist_node hlist; /* global list of queues */ + + int peer_pid; + unsigned int queue_maxlen; + unsigned int copy_range; + unsigned int queue_total; + unsigned int queue_dropped; + unsigned int queue_user_dropped; + + atomic_t id_sequence; /* 'sequence' of pkt ids */ + + u_int16_t queue_num; /* number of this queue */ + u_int8_t copy_mode; + + spinlock_t lock; + + struct list_head queue_list; /* packets in queue */ +}; + +typedef int (*nfqnl_cmpfn)(struct nfqnl_queue_entry *, unsigned long); + +static DEFINE_RWLOCK(instances_lock); + +u_int64_t htonll(u_int64_t in) +{ + u_int64_t out; + int i; + + for (i = 0; i < sizeof(u_int64_t); i++) + ((u_int8_t *)&out)[sizeof(u_int64_t)-1] = ((u_int8_t *)&in)[i]; + + return out; +} + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; + +static inline u_int8_t instance_hashfn(u_int16_t queue_num) +{ + return ((queue_num >> 8) | queue_num) % INSTANCE_BUCKETS; +} + +static struct nfqnl_instance * +__instance_lookup(u_int16_t queue_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfqnl_instance *inst; + + head = &instance_table[instance_hashfn(queue_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->queue_num == queue_num) + return inst; + } + return NULL; +} + +static struct nfqnl_instance * +instance_lookup(u_int16_t queue_num) +{ + struct nfqnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(queue_num); + read_unlock_bh(&instances_lock); + + return inst; +} + +static struct nfqnl_instance * +instance_create(u_int16_t queue_num, int pid) +{ + struct nfqnl_instance *inst; + + QDEBUG("entering for queue_num=%u, pid=%d\n", queue_num, pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(queue_num)) { + inst = NULL; + QDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + inst->queue_num = queue_num; + inst->peer_pid = pid; + inst->queue_maxlen = NFQNL_QMAX_DEFAULT; + inst->copy_range = 0xfffff; + inst->copy_mode = NFQNL_COPY_NONE; + atomic_set(&inst->id_sequence, 0); + inst->lock = SPIN_LOCK_UNLOCKED; + INIT_LIST_HEAD(&inst->queue_list); + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(queue_num)]); + + write_unlock_bh(&instances_lock); + + QDEBUG("successfully created new instance\n"); + + return inst; + +out_free: + kfree(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static void nfqnl_flush(struct nfqnl_instance *queue, int verdict); + +static void +_instance_destroy2(struct nfqnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + QDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->queue_num); + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending skbs from the queue */ + nfqnl_flush(inst, NF_DROP); + + /* and finally free the data structure */ + kfree(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfqnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + + + +static void +issue_verdict(struct nfqnl_queue_entry *entry, int verdict) +{ + QDEBUG("entering for entry %p, verdict %u\n", entry, verdict); + + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); + nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + + kfree(entry); +} + +static inline void +__enqueue_entry(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry) +{ + list_add(&entry->list, &queue->queue_list); + queue->queue_total++; +} + +/* + * Find and return a queued entry matched by cmpfn, or return the last + * entry if cmpfn is NULL. + */ +static inline struct nfqnl_queue_entry * +__find_entry(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, + unsigned long data) +{ + struct list_head *p; + + list_for_each_prev(p, &queue->queue_list) { + struct nfqnl_queue_entry *entry = (struct nfqnl_queue_entry *)p; + + if (!cmpfn || cmpfn(entry, data)) + return entry; + } + return NULL; +} + +static inline void +__dequeue_entry(struct nfqnl_instance *q, struct nfqnl_queue_entry *entry) +{ + list_del(&entry->list); + q->queue_total--; +} + +static inline struct nfqnl_queue_entry * +__find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + entry = __find_entry(queue, cmpfn, data); + if (entry == NULL) + return NULL; + + __dequeue_entry(queue, entry); + return entry; +} + + +static inline void +__nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + struct nfqnl_queue_entry *entry; + + while ((entry = __find_dequeue_entry(queue, NULL, 0))) + issue_verdict(entry, verdict); +} + +static inline int +__nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status = 0; + + switch (mode) { + case NFQNL_COPY_NONE: + case NFQNL_COPY_META: + queue->copy_mode = mode; + queue->copy_range = 0; + break; + + case NFQNL_COPY_PACKET: + queue->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + queue->copy_range = 0xffff; + else + queue->copy_range = range; + break; + + default: + status = -EINVAL; + + } + return status; +} + +static struct nfqnl_queue_entry * +find_dequeue_entry(struct nfqnl_instance *queue, + nfqnl_cmpfn cmpfn, unsigned long data) +{ + struct nfqnl_queue_entry *entry; + + spin_lock_bh(&queue->lock); + entry = __find_dequeue_entry(queue, cmpfn, data); + spin_unlock_bh(&queue->lock); + + return entry; +} + +static void +nfqnl_flush(struct nfqnl_instance *queue, int verdict) +{ + spin_lock_bh(&queue->lock); + __nfqnl_flush(queue, verdict); + spin_unlock_bh(&queue->lock); +} + +static struct sk_buff * +nfqnl_build_packet_message(struct nfqnl_instance *queue, + struct nfqnl_queue_entry *entry, int *errp) +{ + unsigned char *old_tail; + size_t size; + size_t data_len = 0; + struct sk_buff *skb; + struct nfqnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int tmp_uint; + + QDEBUG("entered\n"); + + /* all macros expand to constant values at compile time */ + size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); + + spin_lock_bh(&queue->lock); + + switch (queue->copy_mode) { + case NFQNL_COPY_META: + case NFQNL_COPY_NONE: + data_len = 0; + break; + + case NFQNL_COPY_PACKET: + if (queue->copy_range == 0 + || queue->copy_range > entry->skb->len) + data_len = entry->skb->len; + else + data_len = queue->copy_range; + + size += NLMSG_SPACE(data_len); + break; + + default: + *errp = -EINVAL; + spin_unlock_bh(&queue->lock); + return NULL; + } + + spin_unlock_bh(&queue->lock); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + goto nlmsg_failure; + + old_tail= skb->tail; + nlh = NLMSG_PUT(skb, 0, 0, + NFNL_SUBSYS_QUEUE << 8 | NFQNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = entry->info->pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(queue->queue_num); + + pmsg.packet_id = htonl(entry->id); + pmsg.hw_protocol = htons(entry->skb->protocol); + pmsg.hook = entry->info->hook; + + NFA_PUT(skb, NFQA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (entry->info->indev) { + tmp_uint = htonl(entry->info->indev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->info->outdev) { + tmp_uint = htonl(entry->info->outdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); + } + + if (entry->skb->nfmark) { + tmp_uint = htonl(entry->skb->nfmark); + NFA_PUT(skb, NFQA_MARK, sizeof(u_int32_t), &tmp_uint); + } + + if (entry->info->indev && entry->skb->dev + && entry->skb->dev->hard_header_parse) { + struct nfqnl_msg_packet_hw phw; + + phw.hw_addrlen = + entry->skb->dev->hard_header_parse(entry->skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); + } + + if (entry->skb->stamp.tv_sec) { + struct nfqnl_msg_packet_timestamp ts; + + ts.sec = htonll(entry->skb->stamp.tv_sec); + ts.usec = htonll(entry->skb->stamp.tv_usec); + + NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nf_queue: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(skb, NFA_ALIGN(size)); + nfa->nfa_type = NFQA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(entry->skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = skb->tail - old_tail; + return skb; + +nlmsg_failure: +nfattr_failure: + if (skb) + kfree_skb(skb); + *errp = -EINVAL; + if (net_ratelimit()) + printk(KERN_ERR "nf_queue: error creating packet message\n"); + return NULL; +} + +static int +nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data) +{ + int status = -EINVAL; + struct sk_buff *nskb; + struct nfqnl_instance *queue; + struct nfqnl_queue_entry *entry; + + QDEBUG("entered\n"); + + queue = instance_lookup(queuenum); + if (!queue) { + QDEBUG("no queue instance matching\n"); + return -EINVAL; + } + + if (queue->copy_mode == NFQNL_COPY_NONE) { + QDEBUG("mode COPY_NONE, aborting\n"); + return -EAGAIN; + } + + entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + if (entry == NULL) { + if (net_ratelimit()) + printk(KERN_ERR + "nf_queue: OOM in nfqnl_enqueue_packet()\n"); + return -ENOMEM; + } + + entry->info = info; + entry->skb = skb; + entry->id = atomic_inc_return(&queue->id_sequence); + + nskb = nfqnl_build_packet_message(queue, entry, &status); + if (nskb == NULL) + goto err_out_free; + + spin_lock_bh(&queue->lock); + + if (!queue->peer_pid) + goto err_out_free_nskb; + + if (queue->queue_total >= queue->queue_maxlen) { + queue->queue_dropped++; + status = -ENOSPC; + if (net_ratelimit()) + printk(KERN_WARNING "ip_queue: full at %d entries, " + "dropping packets(s). Dropped: %d\n", + queue->queue_total, queue->queue_dropped); + goto err_out_free_nskb; + } + + /* nfnetlink_unicast will either free the nskb or add it to a socket */ + status = nfnetlink_unicast(nskb, queue->peer_pid, MSG_DONTWAIT); + if (status < 0) { + queue->queue_user_dropped++; + goto err_out_unlock; + } + + __enqueue_entry(queue, entry); + + spin_unlock_bh(&queue->lock); + return status; + +err_out_free_nskb: + kfree_skb(nskb); + +err_out_unlock: + spin_unlock_bh(&queue->lock); + +err_out_free: + kfree(entry); + return status; +} + +static int +nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) +{ + int diff; + + diff = data_len - e->skb->len; + if (diff < 0) + skb_trim(e->skb, data_len); + else if (diff > 0) { + if (data_len > 0xFFFF) + return -EINVAL; + if (diff > skb_tailroom(e->skb)) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "ip_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; + } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; + } + skb_put(e->skb, diff); + } + if (!skb_make_writable(&e->skb, data_len)) + return -ENOMEM; + memcpy(e->skb->data, data, data_len); + + return 0; +} + +static inline int +id_cmp(struct nfqnl_queue_entry *e, unsigned long id) +{ + return (id == e->id); +} + +static int +nfqnl_set_mode(struct nfqnl_instance *queue, + unsigned char mode, unsigned int range) +{ + int status; + + spin_lock_bh(&queue->lock); + status = __nfqnl_set_mode(queue, mode, range); + spin_unlock_bh(&queue->lock); + + return status; +} + +static int +dev_cmp(struct nfqnl_queue_entry *entry, unsigned long ifindex) +{ + if (entry->info->indev) + if (entry->info->indev->ifindex == ifindex) + return 1; + + if (entry->info->outdev) + if (entry->info->outdev->ifindex == ifindex) + return 1; + + return 0; +} + +/* drop all packets with either indev or outdev == ifindex from all queue + * instances */ +static void +nfqnl_dev_drop(int ifindex) +{ + int i; + + QDEBUG("entering for ifindex %u\n", ifindex); + + /* this only looks like we have to hold the readlock for a way too long + * time, issue_verdict(), nf_reinject(), ... - but we always only + * issue NF_DROP, which is processed directly in nf_reinject() */ + read_lock_bh(&instances_lock); + + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry(inst, tmp, head, hlist) { + struct nfqnl_queue_entry *entry; + while ((entry = find_dequeue_entry(inst, dev_cmp, + ifindex)) != NULL) + issue_verdict(entry, NF_DROP); + } + } + + read_unlock_bh(&instances_lock); +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static int +nfqnl_rcv_dev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; + + /* Drop any packets associated with the downed device */ + if (event == NETDEV_DOWN) + nfqnl_dev_drop(dev->ifindex); + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_dev_notifier = { + .notifier_call = nfqnl_rcv_dev_event, +}; + +static int +nfqnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfqnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfqnl_rtnl_notifier = { + .notifier_call = nfqnl_rcv_nl_event, +}; + +static int +nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + + struct nfqnl_msg_verdict_hdr *vhdr; + struct nfqnl_instance *queue; + unsigned int verdict; + struct nfqnl_queue_entry *entry; + + queue = instance_lookup(queue_num); + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + if (!nfqa[NFQA_VERDICT_HDR-1]) + return -EINVAL; + + vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); + verdict = ntohl(vhdr->verdict); + + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) + return -EINVAL; + + entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); + if (entry == NULL) + return -ENOENT; + + if (nfqa[NFQA_PAYLOAD-1]) { + if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), + NFA_PAYLOAD(nfqa[NFQA_PAYLOAD-1]), entry) < 0) + verdict = NF_DROP; + } + + if (nfqa[NFQA_MARK-1]) + skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); + + issue_verdict(entry, verdict); + return 0; +} + +static int +nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static int +nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t queue_num = ntohs(nfmsg->res_id); + struct nfqnl_instance *queue; + + QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + queue = instance_lookup(queue_num); + if (nfqa[NFQA_CFG_CMD-1]) { + struct nfqnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); + QDEBUG("found CFG_CMD\n"); + + switch (cmd->command) { + case NFQNL_CFG_CMD_BIND: + if (queue) + return -EBUSY; + + queue = instance_create(queue_num, NETLINK_CB(skb).pid); + if (!queue) + return -EINVAL; + break; + case NFQNL_CFG_CMD_UNBIND: + if (!queue) + return -ENODEV; + + if (queue->peer_pid != NETLINK_CB(skb).pid) + return -EPERM; + + instance_destroy(queue); + break; + case NFQNL_CFG_CMD_PF_BIND: + QDEBUG("registering queue handler for pf=%u\n", + ntohs(cmd->pf)); + return nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); + + break; + case NFQNL_CFG_CMD_PF_UNBIND: + QDEBUG("unregistering queue handler for pf=%u\n", + ntohs(cmd->pf)); + /* This is a bug and a feature. We can unregister + * other handlers(!) */ + return nf_unregister_queue_handler(ntohs(cmd->pf)); + break; + default: + return -EINVAL; + } + } else { + if (!queue) { + QDEBUG("no config command, and no instance ENOENT\n"); + return -ENOENT; + } + + if (queue->peer_pid != NETLINK_CB(skb).pid) { + QDEBUG("no config command, and wrong pid\n"); + return -EPERM; + } + } + + if (nfqa[NFQA_CFG_PARAMS-1]) { + struct nfqnl_msg_config_params *params; + params = NFA_DATA(nfqa[NFQA_CFG_PARAMS-1]); + + nfqnl_set_mode(queue, params->copy_mode, + ntohl(params->copy_range)); + } + + return 0; +} + +static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { + [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .cap_required = CAP_NET_ADMIN }, + [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfqnl_subsys = { + .name = "nf_queue", + .subsys_id = NFNL_SUBSYS_QUEUE, + .cb_count = NFQNL_MSG_MAX, + .attr_count = NFQA_MAX, + .cb = nfqnl_cb, +}; + +static int +init_or_cleanup(int init) +{ + int status = -ENOMEM; + + if (!init) + goto cleanup; + + netlink_register_notifier(&nfqnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfqnl_subsys); + if (status < 0) { + printk(KERN_ERR "nf_queue: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; + +cleanup: + nf_unregister_queue_handlers(nfqnl_enqueue_packet); + unregister_netdevice_notifier(&nfqnl_dev_notifier); + nfnetlink_subsys_unregister(&nfqnl_subsys); + +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfqnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter packet queue handler"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE); + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From 32519f11d38ea8f4f60896763bacec7db1760f9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:50:02 -0700 Subject: [INET]: Introduce inet_sk_rebuild_header From tcp_v4_rebuild_header, that already was pretty generic, I only needed to use sk->sk_protocol instead of the hardcoded IPPROTO_TCP and establish the requirement that INET transport layer protocols that want to use this function map TCP_SYN_SENT to its equivalent state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 + include/net/tcp.h | 2 - net/ipv4/af_inet.c | 113 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_output.c | 5 --- net/ipv4/tcp_ipv4.c | 98 +------------------------------------------- net/ipv6/tcp_ipv6.c | 2 +- 6 files changed, 117 insertions(+), 105 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 31e7cedd9f84..33e8a19a1a0f 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #endif #endif +extern int inet_sk_rebuild_header(struct sock *sk); + struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/net/tcp.h b/include/net/tcp.h index d95661a3aeeb..0c769adb0463 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -799,8 +799,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_rebuild_header(struct sock *sk); - extern int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 9e83d7773d8f..7137e6420d66 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -951,6 +951,119 @@ void inet_unregister_protosw(struct inet_protosw *p) } } +/* + * Shall we try to damage output packets if routing dev changes? + */ + +int sysctl_ip_dynaddr; + +static int inet_sk_reselect_saddr(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + int err; + struct rtable *rt; + __u32 old_saddr = inet->saddr; + __u32 new_saddr; + __u32 daddr = inet->daddr; + + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; + + /* Query new route. */ + err = ip_route_connect(&rt, daddr, 0, + RT_CONN_FLAGS(sk), + sk->sk_bound_dev_if, + sk->sk_protocol, + inet->sport, inet->dport, sk); + if (err) + return err; + + sk_setup_caps(sk, &rt->u.dst); + + new_saddr = rt->rt_src; + + if (new_saddr == old_saddr) + return 0; + + if (sysctl_ip_dynaddr > 1) { + printk(KERN_INFO "%s(): shifting inet->" + "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", + __FUNCTION__, + NIPQUAD(old_saddr), + NIPQUAD(new_saddr)); + } + + inet->saddr = inet->rcv_saddr = new_saddr; + + /* + * XXX The only one ugly spot where we need to + * XXX really change the sockets identity after + * XXX it has entered the hashes. -DaveM + * + * Besides that, it does not check for connection + * uniqueness. Wait for troubles. + */ + __sk_prot_rehash(sk); + return 0; +} + +int inet_sk_rebuild_header(struct sock *sk) +{ + struct inet_sock *inet = inet_sk(sk); + struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); + u32 daddr; + int err; + + /* Route is OK, nothing to do. */ + if (rt) + return 0; + + /* Reroute. */ + daddr = inet->daddr; + if (inet->opt && inet->opt->srr) + daddr = inet->opt->faddr; +{ + struct flowi fl = { + .oif = sk->sk_bound_dev_if, + .nl_u = { + .ip4_u = { + .daddr = daddr, + .saddr = inet->saddr, + .tos = RT_CONN_FLAGS(sk), + }, + }, + .proto = sk->sk_protocol, + .uli_u = { + .ports = { + .sport = inet->sport, + .dport = inet->dport, + }, + }, + }; + + err = ip_route_output_flow(&rt, &fl, sk, 0); +} + if (!err) + sk_setup_caps(sk, &rt->u.dst); + else { + /* Routing failed... */ + sk->sk_route_caps = 0; + /* + * Other protocols have to map its equivalent state to TCP_SYN_SENT. + * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme + */ + if (!sysctl_ip_dynaddr || + sk->sk_state != TCP_SYN_SENT || + (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || + (err = inet_sk_reselect_saddr(sk)) != 0) + sk->sk_err_soft = -err; + } + + return err; +} + +EXPORT_SYMBOL(inet_sk_rebuild_header); + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c72fc878f06d..dd568b0b7062 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -83,11 +83,6 @@ #include #include -/* - * Shall we try to damage output packets if routing dev changes? - */ - -int sysctl_ip_dynaddr; int sysctl_ip_default_ttl = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4a5daecbd2ac..ae6fad99a9a9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1832,101 +1832,6 @@ do_time_wait: goto discard_it; } -static int tcp_v4_reselect_saddr(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - int err; - struct rtable *rt; - __u32 old_saddr = inet->saddr; - __u32 new_saddr; - __u32 daddr = inet->daddr; - - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - /* Query new route. */ - err = ip_route_connect(&rt, daddr, 0, - RT_CONN_FLAGS(sk), - sk->sk_bound_dev_if, - IPPROTO_TCP, - inet->sport, inet->dport, sk); - if (err) - return err; - - sk_setup_caps(sk, &rt->u.dst); - - new_saddr = rt->rt_src; - - if (new_saddr == old_saddr) - return 0; - - if (sysctl_ip_dynaddr > 1) { - printk(KERN_INFO "tcp_v4_rebuild_header(): shifting inet->" - "saddr from %d.%d.%d.%d to %d.%d.%d.%d\n", - NIPQUAD(old_saddr), - NIPQUAD(new_saddr)); - } - - inet->saddr = new_saddr; - inet->rcv_saddr = new_saddr; - - /* XXX The only one ugly spot where we need to - * XXX really change the sockets identity after - * XXX it has entered the hashes. -DaveM - * - * Besides that, it does not check for connection - * uniqueness. Wait for troubles. - */ - __sk_prot_rehash(sk); - return 0; -} - -int tcp_v4_rebuild_header(struct sock *sk) -{ - struct inet_sock *inet = inet_sk(sk); - struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; - int err; - - /* Route is OK, nothing to do. */ - if (rt) - return 0; - - /* Reroute. */ - daddr = inet->daddr; - if (inet->opt && inet->opt->srr) - daddr = inet->opt->faddr; - - { - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = daddr, - .saddr = inet->saddr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = inet->dport } } }; - - err = ip_route_output_flow(&rt, &fl, sk, 0); - } - if (!err) { - sk_setup_caps(sk, &rt->u.dst); - return 0; - } - - /* Routing failed... */ - sk->sk_route_caps = 0; - - if (!sysctl_ip_dynaddr || - sk->sk_state != TCP_SYN_SENT || - (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || - (err = tcp_v4_reselect_saddr(sk)) != 0) - sk->sk_err_soft = -err; - - return err; -} - static void v4_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) { struct sockaddr_in *sin = (struct sockaddr_in *) uaddr; @@ -1998,7 +1903,7 @@ int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) struct tcp_func ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, @@ -2630,7 +2535,6 @@ EXPORT_SYMBOL(tcp_unhash); EXPORT_SYMBOL(tcp_v4_conn_request); EXPORT_SYMBOL(tcp_v4_connect); EXPORT_SYMBOL(tcp_v4_do_rcv); -EXPORT_SYMBOL(tcp_v4_rebuild_header); EXPORT_SYMBOL(tcp_v4_remember_stamp); EXPORT_SYMBOL(tcp_v4_send_check); EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 885e05bd99f6..4e32a8496be3 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1983,7 +1983,7 @@ static struct tcp_func ipv6_specific = { static struct tcp_func ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, - .rebuild_header = tcp_v4_rebuild_header, + .rebuild_header = inet_sk_rebuild_header, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .remember_stamp = tcp_v4_remember_stamp, -- cgit v1.2.3 From 838ab6364956d9bdcefe84712de1621cf20a40b3 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:50:45 -0700 Subject: [NETFILTER]: Add refcounting and /proc/net/netfilter interface to nfnetlink_queue Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_queue.h | 1 + net/netfilter/nfnetlink_queue.c | 248 ++++++++++++++++++++++++++---- 2 files changed, 221 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index edb463a87eb4..e142b0ff7c08 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -81,5 +81,6 @@ enum nfqnl_attr_config { NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ __NFQA_CFG_MAX }; +#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1) #endif /* _NFNETLINK_QUEUE_H */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 24032610c425..eab309e3d42e 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ struct nfqnl_queue_entry { struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ + atomic_t use; int peer_pid; unsigned int queue_maxlen; @@ -105,17 +107,28 @@ __instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_lookup(u_int16_t queue_num) +instance_lookup_get(u_int16_t queue_num) { struct nfqnl_instance *inst; read_lock_bh(&instances_lock); inst = __instance_lookup(queue_num); + if (inst) + atomic_inc(&inst->use); read_unlock_bh(&instances_lock); return inst; } +static void +instance_put(struct nfqnl_instance *inst) +{ + if (inst && atomic_dec_and_test(&inst->use)) { + QDEBUG("kfree(inst=%p)\n", inst); + kfree(inst); + } +} + static struct nfqnl_instance * instance_create(u_int16_t queue_num, int pid) { @@ -141,6 +154,8 @@ instance_create(u_int16_t queue_num, int pid) inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; atomic_set(&inst->id_sequence, 0); + /* needs to be two, since we _put() after creation */ + atomic_set(&inst->use, 2); inst->lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&inst->queue_list); @@ -182,8 +197,8 @@ _instance_destroy2(struct nfqnl_instance *inst, int lock) /* then flush all pending skbs from the queue */ nfqnl_flush(inst, NF_DROP); - /* and finally free the data structure */ - kfree(inst); + /* and finally put the refcount */ + instance_put(inst); module_put(THIS_MODULE); } @@ -471,7 +486,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, QDEBUG("entered\n"); - queue = instance_lookup(queuenum); + queue = instance_lookup_get(queuenum); if (!queue) { QDEBUG("no queue instance matching\n"); return -EINVAL; @@ -479,7 +494,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, if (queue->copy_mode == NFQNL_COPY_NONE) { QDEBUG("mode COPY_NONE, aborting\n"); - return -EAGAIN; + status = -EAGAIN; + goto err_out_put; } entry = kmalloc(sizeof(*entry), GFP_ATOMIC); @@ -487,7 +503,8 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, if (net_ratelimit()) printk(KERN_ERR "nf_queue: OOM in nfqnl_enqueue_packet()\n"); - return -ENOMEM; + status = -ENOMEM; + goto err_out_put; } entry->info = info; @@ -523,6 +540,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, __enqueue_entry(queue, entry); spin_unlock_bh(&queue->lock); + instance_put(queue); return status; err_out_free_nskb: @@ -533,6 +551,8 @@ err_out_unlock: err_out_free: kfree(entry); +err_out_put: + instance_put(queue); return status; } @@ -685,6 +705,12 @@ static struct notifier_block nfqnl_rtnl_notifier = { .notifier_call = nfqnl_rcv_nl_event, }; +static const int nfqa_verdict_min[NFQA_MAX] = { + [NFQA_VERDICT_HDR-1] = sizeof(struct nfqnl_msg_verdict_hdr), + [NFQA_MARK-1] = sizeof(u_int32_t), + [NFQA_PAYLOAD-1] = 0, +}; + static int nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -696,26 +722,40 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, struct nfqnl_instance *queue; unsigned int verdict; struct nfqnl_queue_entry *entry; + int err; - queue = instance_lookup(queue_num); + if (nfattr_bad_size(nfqa, NFQA_MAX, nfqa_verdict_min)) { + QDEBUG("bad attribute size\n"); + return -EINVAL; + } + + queue = instance_lookup_get(queue_num); if (!queue) return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) - return -EPERM; + if (queue->peer_pid != NETLINK_CB(skb).pid) { + err = -EPERM; + goto err_out_put; + } - if (!nfqa[NFQA_VERDICT_HDR-1]) - return -EINVAL; + if (!nfqa[NFQA_VERDICT_HDR-1]) { + err = -EINVAL; + goto err_out_put; + } vhdr = NFA_DATA(nfqa[NFQA_VERDICT_HDR-1]); verdict = ntohl(vhdr->verdict); - if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) - return -EINVAL; + if ((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT) { + err = -EINVAL; + goto err_out_put; + } entry = find_dequeue_entry(queue, id_cmp, ntohl(vhdr->id)); - if (entry == NULL) - return -ENOENT; + if (entry == NULL) { + err = -ENOENT; + goto err_out_put; + } if (nfqa[NFQA_PAYLOAD-1]) { if (nfqnl_mangle(NFA_DATA(nfqa[NFQA_PAYLOAD-1]), @@ -727,7 +767,12 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, skb->nfmark = ntohl(*(u_int32_t *)NFA_DATA(nfqa[NFQA_MARK-1])); issue_verdict(entry, verdict); + instance_put(queue); return 0; + +err_out_put: + instance_put(queue); + return err; } static int @@ -737,6 +782,11 @@ nfqnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, return -ENOTSUPP; } +static const int nfqa_cfg_min[NFQA_CFG_MAX] = { + [NFQA_CFG_CMD-1] = sizeof(struct nfqnl_msg_config_cmd), + [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), +}; + static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -744,10 +794,16 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); u_int16_t queue_num = ntohs(nfmsg->res_id); struct nfqnl_instance *queue; + int ret = 0; QDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); - queue = instance_lookup(queue_num); + if (nfattr_bad_size(nfqa, NFQA_CFG_MAX, nfqa_cfg_min)) { + QDEBUG("bad attribute size\n"); + return -EINVAL; + } + + queue = instance_lookup_get(queue_num); if (nfqa[NFQA_CFG_CMD-1]) { struct nfqnl_msg_config_cmd *cmd; cmd = NFA_DATA(nfqa[NFQA_CFG_CMD-1]); @@ -766,17 +822,19 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, if (!queue) return -ENODEV; - if (queue->peer_pid != NETLINK_CB(skb).pid) - return -EPERM; + if (queue->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto out_put; + } instance_destroy(queue); break; case NFQNL_CFG_CMD_PF_BIND: QDEBUG("registering queue handler for pf=%u\n", ntohs(cmd->pf)); - return nf_register_queue_handler(ntohs(cmd->pf), - nfqnl_enqueue_packet, - NULL); + ret = nf_register_queue_handler(ntohs(cmd->pf), + nfqnl_enqueue_packet, + NULL); break; case NFQNL_CFG_CMD_PF_UNBIND: @@ -784,20 +842,23 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ntohs(cmd->pf)); /* This is a bug and a feature. We can unregister * other handlers(!) */ - return nf_unregister_queue_handler(ntohs(cmd->pf)); + ret = nf_unregister_queue_handler(ntohs(cmd->pf)); break; default: - return -EINVAL; + ret = -EINVAL; + break; } } else { if (!queue) { QDEBUG("no config command, and no instance ENOENT\n"); - return -ENOENT; + ret = -ENOENT; + goto out_put; } if (queue->peer_pid != NETLINK_CB(skb).pid) { QDEBUG("no config command, and wrong pid\n"); - return -EPERM; + ret = -EPERM; + goto out_put; } } @@ -809,7 +870,9 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ntohl(params->copy_range)); } - return 0; +out_put: + instance_put(queue); + return ret; } static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { @@ -829,14 +892,132 @@ static struct nfnetlink_subsystem nfqnl_subsys = { .cb = nfqnl_cb, }; +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfqnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + inst->queue_num, + inst->peer_pid, inst->queue_total, + inst->copy_mode, inst->copy_range, + inst->queue_dropped, inst->queue_user_dropped, + atomic_read(&inst->id_sequence), + atomic_read(&inst->use)); +} + +static struct seq_operations nfqnl_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqnl_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct iter_state *is; + int ret; + + is = kmalloc(sizeof(*is), GFP_KERNEL); + if (!is) + return -ENOMEM; + memset(is, 0, sizeof(*is)); + ret = seq_open(file, &nfqnl_seq_ops); + if (ret < 0) + goto out_free; + seq = file->private_data; + seq->private = is; + return ret; +out_free: + kfree(is); + return ret; +} + +static struct file_operations nfqnl_file_ops = { + .owner = THIS_MODULE, + .open = nfqnl_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + static int init_or_cleanup(int init) { - int status = -ENOMEM; + int i, status = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nfqueue; +#endif if (!init) goto cleanup; + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + netlink_register_notifier(&nfqnl_rtnl_notifier); status = nfnetlink_subsys_register(&nfqnl_subsys); if (status < 0) { @@ -844,14 +1025,25 @@ init_or_cleanup(int init) goto cleanup_netlink_notifier; } +#ifdef CONFIG_PROC_FS + proc_nfqueue = create_proc_entry("nfnetlink_queue", 0440, + proc_net_netfilter); + if (!proc_nfqueue) + goto cleanup_subsys; + proc_nfqueue->proc_fops = &nfqnl_file_ops; +#endif + register_netdevice_notifier(&nfqnl_dev_notifier); + return status; cleanup: nf_unregister_queue_handlers(nfqnl_enqueue_packet); unregister_netdevice_notifier(&nfqnl_dev_notifier); +#ifdef CONFIG_PROC_FS +cleanup_subsys: +#endif nfnetlink_subsys_unregister(&nfqnl_subsys); - cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); return status; -- cgit v1.2.3 From 608c8e4f7b6e61cc783283e9dff8a465a5ad59bb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:27 -0700 Subject: [NETFILTER]: Extend netfilter logging API This patch is in preparation to nfnetlink_log: - loggers now have to register struct nf_logger instead of nf_logfn - nf_log_unregister() replaced by nf_log_unregister_pf() and nf_log_unregister_logger() - add comment to ip[6]t_LOG.h to assure nobody redefines flags - add /proc/net/netfilter/nf_log to tell user which logger is currently registered for which address family - if user has configured logging, but no logging backend (logger) is available, always spit a message to syslog, not just the first time. - split ip[6]t_LOG.c into two parts: Backend: Always try to register as logger for the respective address family Frontend: Always log via nf_log_packet() API - modify all users of nf_log_packet() to accomodate additional argument Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 48 +++++++++- include/linux/netfilter_ipv4/ipt_LOG.h | 1 + include/linux/netfilter_ipv6/ip6t_LOG.h | 1 + net/core/netfilter.c | 127 +++++++++++++++++++++++---- net/ipv4/netfilter/ip_conntrack_proto_icmp.c | 8 +- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 21 ++--- net/ipv4/netfilter/ip_conntrack_proto_udp.c | 6 +- net/ipv4/netfilter/ipt_LOG.c | 86 ++++++++++-------- net/ipv4/netfilter/ipt_ULOG.c | 33 +++++-- net/ipv6/netfilter/ip6t_LOG.c | 93 +++++++++++--------- 10 files changed, 299 insertions(+), 125 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 711e05f33d68..815583af06c2 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -typedef void nf_logfn(unsigned int hooknum, +/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will + * disappear once iptables is replaced with pkttables. Please DO NOT use them + * for any new code! */ +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_MASK 0x0f + +#define NF_LOG_TYPE_LOG 0x01 +#define NF_LOG_TYPE_ULOG 0x02 + +struct nf_loginfo { + u_int8_t type; + union { + struct { + u_int32_t copy_len; + u_int16_t group; + u_int16_t qthreshold; + } ulog; + struct { + u_int8_t level; + u_int8_t logflags; + } log; + } u; +}; + +typedef void nf_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix); +struct nf_logger { + struct module *me; + nf_logfn *logfn; + char *name; +}; + /* Function to register/unregister log function. */ -int nf_log_register(int pf, nf_logfn *logfn); -void nf_log_unregister(int pf, nf_logfn *logfn); +int nf_log_register(int pf, struct nf_logger *logger); +void nf_log_unregister_pf(int pf); +void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ void nf_log_packet(int pf, @@ -130,6 +166,7 @@ void nf_log_packet(int pf, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *li, const char *fmt, ...); /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -221,6 +258,11 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#ifdef CONFIG_PROC_FS +#include +extern struct proc_dir_entry *proc_net_netfilter; +#endif + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index d25f782e57d1..22d16177319b 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,7 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 42996a43bb39..9008ff5c40ae 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,7 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/net/core/netfilter.c b/net/core/netfilter.c index 3e38084ac2bd..98cc61e79fea 100644 --- a/net/core/netfilter.c +++ b/net/core/netfilter.c @@ -22,6 +22,7 @@ #include #include #include +#include #include /* In this code, we can be waiting indefinitely for userspace to @@ -535,11 +536,10 @@ EXPORT_SYMBOL(skb_make_writable); #define NF_LOG_PREFIXLEN 128 -static nf_logfn *nf_logging[NPROTO]; /* = NULL */ -static int reported = 0; +static struct nf_logger *nf_logging[NPROTO]; /* = NULL */ static DEFINE_SPINLOCK(nf_log_lock); -int nf_log_register(int pf, nf_logfn *logfn) +int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; @@ -547,54 +547,134 @@ int nf_log_register(int pf, nf_logfn *logfn) * substituting pointer. */ spin_lock(&nf_log_lock); if (!nf_logging[pf]) { - rcu_assign_pointer(nf_logging[pf], logfn); + rcu_assign_pointer(nf_logging[pf], logger); ret = 0; } spin_unlock(&nf_log_lock); return ret; } -void nf_log_unregister(int pf, nf_logfn *logfn) +void nf_log_unregister_pf(int pf) { spin_lock(&nf_log_lock); - if (nf_logging[pf] == logfn) - nf_logging[pf] = NULL; + nf_logging[pf] = NULL; spin_unlock(&nf_log_lock); /* Give time to concurrent readers. */ synchronize_net(); -} +} + +void nf_log_unregister_logger(struct nf_logger *logger) +{ + int i; + + spin_lock(&nf_log_lock); + for (i = 0; i < NPROTO; i++) { + if (nf_logging[i] == logger) + nf_logging[i] = NULL; + } + spin_unlock(&nf_log_lock); + + synchronize_net(); +} void nf_log_packet(int pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *loginfo, const char *fmt, ...) { va_list args; char prefix[NF_LOG_PREFIXLEN]; - nf_logfn *logfn; + struct nf_logger *logger; rcu_read_lock(); - logfn = rcu_dereference(nf_logging[pf]); - if (logfn) { + logger = rcu_dereference(nf_logging[pf]); + if (logger) { va_start(args, fmt); vsnprintf(prefix, sizeof(prefix), fmt, args); va_end(args); /* We must read logging before nf_logfn[pf] */ - logfn(hooknum, skb, in, out, prefix); - } else if (!reported) { - printk(KERN_WARNING "nf_log_packet: can\'t log yet, " - "no backend logging module loaded in!\n"); - reported++; + logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix); + } else if (net_ratelimit()) { + printk(KERN_WARNING "nf_log_packet: can\'t log since " + "no backend logging module loaded in! Please either " + "load one, or disable logging explicitly\n"); } rcu_read_unlock(); } EXPORT_SYMBOL(nf_log_register); -EXPORT_SYMBOL(nf_log_unregister); +EXPORT_SYMBOL(nf_log_unregister_pf); +EXPORT_SYMBOL(nf_log_unregister_logger); EXPORT_SYMBOL(nf_log_packet); +#ifdef CONFIG_PROC_FS +struct proc_dir_entry *proc_net_netfilter; +EXPORT_SYMBOL(proc_net_netfilter); + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + rcu_read_lock(); + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + rcu_read_unlock(); +} + +static int seq_show(struct seq_file *s, void *v) +{ + loff_t *pos = v; + const struct nf_logger *logger; + + logger = rcu_dereference(nf_logging[*pos]); + + if (!logger) + return seq_printf(s, "%2lld NONE\n", *pos); + + return seq_printf(s, "%2lld %s\n", *pos, logger->name); +} + +static struct seq_operations nflog_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nflog_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nflog_seq_ops); +} + +static struct file_operations nflog_file_ops = { + .owner = THIS_MODULE, + .open = nflog_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +#endif /* PROC_FS */ + + /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated with it. */ @@ -613,6 +693,9 @@ void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) void __init netfilter_init(void) { int i, h; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); @@ -624,6 +707,16 @@ void __init netfilter_init(void) for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } + +#ifdef CONFIG_PROC_FS + proc_net_netfilter = proc_mkdir("netfilter", proc_net); + if (!proc_net_netfilter) + panic("cannot create netfilter proc entry"); + pde = create_proc_entry("nf_log", S_IRUGO, proc_net_netfilter); + if (!pde) + panic("cannot create /proc/net/netfilter/nf_log"); + pde->proc_fops = &nflog_file_ops; +#endif } EXPORT_SYMBOL(ip_ct_attach); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 3f90cb9979ac..838d1d69b36e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); if (icmph == NULL) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: short packet "); return -NF_ACCEPT; } @@ -231,13 +231,13 @@ icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, if (!(u16)csum_fold(skb->csum)) break; if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: bad ICMP checksum "); return -NF_ACCEPT; } @@ -254,7 +254,7 @@ checksum_skipped: */ if (icmph->type > NR_ICMP_TYPES) { if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index c2bce22d4031..f23ef1f88c46 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -716,7 +716,7 @@ static int tcp_in_window(struct ip_ct_tcp *state, res = 1; } else { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? after(end, sender->td_end - receiver->td_maxwin - 1) ? @@ -815,7 +815,7 @@ static int tcp_error(struct sk_buff *skb, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: short packet "); return -NF_ACCEPT; } @@ -823,7 +823,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -840,7 +840,7 @@ static int tcp_error(struct sk_buff *skb, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; } @@ -849,7 +849,7 @@ static int tcp_error(struct sk_buff *skb, tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); if (!tcp_valid_flags[tcpflags]) { if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; } @@ -897,8 +897,9 @@ static int tcp_packet(struct ip_conntrack *conntrack, */ write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: killing out of sync session "); + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + NULL, "ip_ct_tcp: " + "killing out of sync session "); if (del_timer(&conntrack->timeout)) conntrack->timeout.function((unsigned long) conntrack); @@ -912,7 +913,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid packet ignored "); return NF_ACCEPT; case TCP_CONNTRACK_MAX: @@ -922,7 +923,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, old_state); write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_tcp: invalid state "); return -NF_ACCEPT; case TCP_CONNTRACK_SYN_SENT: @@ -943,7 +944,7 @@ static int tcp_packet(struct ip_conntrack *conntrack, write_unlock_bh(&tcp_lock); if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, - "ip_ct_tcp: invalid SYN"); + NULL, "ip_ct_tcp: invalid SYN"); return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c index 14130169cbfd..f2dcac7c7660 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -98,7 +98,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); if (hdr == NULL) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: short packet "); return -NF_ACCEPT; } @@ -106,7 +106,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; } @@ -126,7 +126,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, skb->ip_summed == CHECKSUM_HW ? skb->csum : skb_checksum(skb, iph->ihl*4, udplen, 0))) { if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, + nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "ip_ct_udp: bad UDP checksum "); return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index ef08733d26da..92ed050fac69 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -27,10 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - #if 0 #define DEBUGP printk #else @@ -41,11 +37,17 @@ MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ipt_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int iphoff) { struct iphdr _iph, *ih; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { @@ -76,7 +78,7 @@ static void dump_packet(const struct ipt_log_info *info, if (ntohs(ih->frag_off) & IP_OFFSET) printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((info->logflags & IPT_LOG_IPOPT) + if ((logflags & IPT_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; unsigned int i, optsize; @@ -119,7 +121,7 @@ static void dump_packet(const struct ipt_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IPT_LOG_TCPSEQ) + if (logflags & IPT_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -146,7 +148,7 @@ static void dump_packet(const struct ipt_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IPT_LOG_TCPOPT) + if ((logflags & IPT_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; unsigned char *op; @@ -328,7 +330,7 @@ static void dump_packet(const struct ipt_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IPT_LOG_UID) && !iphoff && skb->sk) { + if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -349,19 +351,31 @@ static void dump_packet(const struct ipt_log_info *info, /* maxlen = 230+ 91 + 230 + 252 = 803 */ } +struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ipt_log_packet(unsigned int hooknum, +ipt_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ipt_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER @@ -405,28 +419,15 @@ ipt_log_target(struct sk_buff **pskb, void *userinfo) { const struct ipt_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; - level_string[1] = '0' + (loginfo->level % 8); - ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; - return IPT_CONTINUE; -} + nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); -static void -ipt_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ipt_log_info loginfo = { - .level = 0, - .logflags = IPT_LOG_MASK, - .prefix = "" - }; - - ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); + return IPT_CONTINUE; } static int ipt_log_checkentry(const char *tablename, @@ -464,20 +465,29 @@ static struct ipt_target ipt_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_log_logger ={ + .name = "ipt_LOG", + .logfn = &ipt_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ipt_register_target(&ipt_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { + printk(KERN_WARNING "ipt_LOG: not logging via system console " + "since somebody else already registered for PF_INET\n"); + /* we cannot make module load fail here, since otherwise + * iptables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_log_logger); ipt_unregister_target(&ipt_log_reg); } diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 4ea8371ab270..b86f06ec9762 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -304,18 +304,27 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, return IPT_CONTINUE; } -static void ipt_logfn(unsigned int hooknum, +static void ipt_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix) { - struct ipt_ulog_info loginfo = { - .nl_group = ULOG_DEFAULT_NLGROUP, - .copy_range = 0, - .qthreshold = ULOG_DEFAULT_QTHRESHOLD, - .prefix = "" - }; + struct ipt_ulog_info loginfo; + + if (!li || li->type != NF_LOG_TYPE_ULOG) { + loginfo.nl_group = ULOG_DEFAULT_NLGROUP; + loginfo.copy_range = 0; + loginfo.qthreshold = ULOG_DEFAULT_QTHRESHOLD; + loginfo.prefix[0] = '\0'; + } else { + loginfo.nl_group = li->u.ulog.group; + loginfo.copy_range = li->u.ulog.copy_len; + loginfo.qthreshold = li->u.ulog.qthreshold; + strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); + } ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } @@ -355,6 +364,12 @@ static struct ipt_target ipt_ulog_reg = { .me = THIS_MODULE, }; +static struct nf_logger ipt_ulog_logger = { + .name = "ipt_ULOG", + .logfn = &ipt_logfn, + .me = THIS_MODULE, +}; + static int __init init(void) { int i; @@ -382,7 +397,7 @@ static int __init init(void) return -EINVAL; } if (nflog) - nf_log_register(PF_INET, &ipt_logfn); + nf_log_register(PF_INET, &ipt_ulog_logger); return 0; } @@ -395,7 +410,7 @@ static void __exit fini(void) DEBUGP("ipt_ULOG: cleanup_module\n"); if (nflog) - nf_log_unregister(PF_INET, &ipt_logfn); + nf_log_unregister_logger(&ipt_ulog_logger); ipt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index a692e26a4fa3..0cd1d1bd9033 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -26,10 +26,6 @@ MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); -static unsigned int nflog = 1; -module_param(nflog, int, 0400); -MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); - struct in_device; #include #include @@ -44,7 +40,7 @@ struct in_device; static DEFINE_SPINLOCK(log_lock); /* One level of recursion won't kill us */ -static void dump_packet(const struct ip6t_log_info *info, +static void dump_packet(const struct nf_loginfo *info, const struct sk_buff *skb, unsigned int ip6hoff, int recurse) { @@ -53,6 +49,12 @@ static void dump_packet(const struct ip6t_log_info *info, struct ipv6hdr _ip6h, *ih; unsigned int ptr; unsigned int hdrlen = 0; + unsigned int logflags; + + if (info->type == NF_LOG_TYPE_LOG) + logflags = info->u.log.logflags; + else + logflags = NF_LOG_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { @@ -84,7 +86,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 48 "OPT (...) " */ - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk("OPT ( "); switch (currenthdr) { @@ -119,7 +121,7 @@ static void dump_packet(const struct ip6t_log_info *info, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(")"); return; } @@ -127,7 +129,7 @@ static void dump_packet(const struct ip6t_log_info *info, break; /* Max Length */ case IPPROTO_AH: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_auth_hdr _ahdr, *ah; /* Max length: 3 "AH " */ @@ -158,7 +160,7 @@ static void dump_packet(const struct ip6t_log_info *info, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (info->logflags & IP6T_LOG_IPOPT) { + if (logflags & IP6T_LOG_IPOPT) { struct ip_esp_hdr _esph, *eh; /* Max length: 4 "ESP " */ @@ -190,7 +192,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("Unknown Ext Hdr %u", currenthdr); return; } - if (info->logflags & IP6T_LOG_IPOPT) + if (logflags & IP6T_LOG_IPOPT) printk(") "); currenthdr = hp->nexthdr; @@ -218,7 +220,7 @@ static void dump_packet(const struct ip6t_log_info *info, printk("SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (info->logflags & IP6T_LOG_TCPSEQ) + if (logflags & IP6T_LOG_TCPSEQ) printk("SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); /* Max length: 13 "WINDOW=65535 " */ @@ -245,7 +247,7 @@ static void dump_packet(const struct ip6t_log_info *info, /* Max length: 11 "URGP=65535 " */ printk("URGP=%u ", ntohs(th->urg_ptr)); - if ((info->logflags & IP6T_LOG_TCPOPT) + if ((logflags & IP6T_LOG_TCPOPT) && th->doff * 4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)], *op; unsigned int i; @@ -349,7 +351,7 @@ static void dump_packet(const struct ip6t_log_info *info, } /* Max length: 15 "UID=4294967295 " */ - if ((info->logflags & IP6T_LOG_UID) && recurse && skb->sk) { + if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { read_lock_bh(&skb->sk->sk_callback_lock); if (skb->sk->sk_socket && skb->sk->sk_socket->file) printk("UID=%u ", skb->sk->sk_socket->file->f_uid); @@ -357,19 +359,31 @@ static void dump_packet(const struct ip6t_log_info *info, } } +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 0, + .logflags = NF_LOG_MASK, + }, + }, +}; + static void -ip6t_log_packet(unsigned int hooknum, +ip6t_log_packet(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - const struct ip6t_log_info *loginfo, - const char *level_string, + const struct nf_loginfo *loginfo, const char *prefix) { + if (!loginfo) + loginfo = &default_loginfo; + spin_lock_bh(&log_lock); - printk(level_string); - printk("%sIN=%s OUT=%s ", - prefix == NULL ? loginfo->prefix : prefix, + printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, + prefix, in ? in->name : "", out ? out->name : ""); if (in && !out) { @@ -416,29 +430,17 @@ ip6t_log_target(struct sk_buff **pskb, void *userinfo) { const struct ip6t_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct nf_loginfo li; + + li.type = NF_LOG_TYPE_LOG; + li.u.log.level = loginfo->level; + li.u.log.logflags = loginfo->logflags; - level_string[1] = '0' + (loginfo->level % 8); - ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); + nf_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, loginfo->prefix); return IP6T_CONTINUE; } -static void -ip6t_logfn(unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const char *prefix) -{ - struct ip6t_log_info loginfo = { - .level = 0, - .logflags = IP6T_LOG_MASK, - .prefix = "" - }; - - ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); -} static int ip6t_log_checkentry(const char *tablename, const struct ip6t_entry *e, @@ -475,20 +477,29 @@ static struct ip6t_target ip6t_log_reg = { .me = THIS_MODULE, }; +static struct nf_logger ip6t_logger = { + .name = "ip6t_LOG", + .logfn = &ip6t_log_packet, + .me = THIS_MODULE, +}; + static int __init init(void) { if (ip6t_register_target(&ip6t_log_reg)) return -EINVAL; - if (nflog) - nf_log_register(PF_INET6, &ip6t_logfn); + if (nf_log_register(PF_INET6, &ip6t_logger) < 0) { + printk(KERN_WARNING "ip6t_LOG: not logging via system console " + "since somebody else already registered for PF_INET6\n"); + /* we cannot make module load fail here, since otherwise + * ip6tables userspace would abort */ + } return 0; } static void __exit fini(void) { - if (nflog) - nf_log_unregister(PF_INET6, &ip6t_logfn); + nf_log_unregister_logger(&ip6t_logger); ip6t_unregister_target(&ip6t_log_reg); } -- cgit v1.2.3 From 0597f2680d666a3bcf101ac0c771ba7e50016bbd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:39 -0700 Subject: [NETFILTER]: Add new "nfnetlink_log" userspace packet logging facility This is a generic (layer3 independent) version of what ipt_ULOG is already doing for IPv4 today. ipt_ULOG, ebt_ulog and finally also ip[6]t_LOG will be deprecated by this mechanism in the long term. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 85 +++ net/netfilter/Kconfig | 11 + net/netfilter/Makefile | 1 + net/netfilter/nfnetlink_log.c | 995 ++++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue.c | 1 + 5 files changed, 1093 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink_log.h create mode 100644 net/netfilter/nfnetlink_log.c (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h new file mode 100644 index 000000000000..420ff4625cbf --- /dev/null +++ b/include/linux/netfilter/nfnetlink_log.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_LOG_H +#define _NFNETLINK_LOG_H + +/* This file describes the netlink messages (i.e. 'protocol packets'), + * and not any kind of function definitions. It is shared between kernel and + * userspace. Don't put kernel specific stuff in here */ + +#include + +enum nfulnl_msg_types { + NFULNL_MSG_PACKET, /* packet from kernel to userspace */ + NFULNL_MSG_CONFIG, /* connect to a particular queue */ + + NFULNL_MSG_MAX +}; + +struct nfulnl_msg_packet_hdr { + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ + u_int8_t _pad; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +#define NFULNL_PREFIXLEN 30 /* just like old log target */ + +enum nfulnl_attr_type { + NFULA_UNSPEC, + NFULA_PACKET_HDR, + NFULA_MARK, /* u_int32_t nfmark */ + NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ + NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_HWADDR, /* nfulnl_msg_packet_hw */ + NFULA_PAYLOAD, /* opaque data payload */ + NFULA_PREFIX, /* string prefix */ + NFULA_UID, /* user id of socket */ + + __NFULA_MAX +}; +#define NFULA_MAX (__NFULA_MAX - 1) + +enum nfulnl_msg_config_cmds { + NFULNL_CFG_CMD_NONE, + NFULNL_CFG_CMD_BIND, + NFULNL_CFG_CMD_UNBIND, + NFULNL_CFG_CMD_PF_BIND, + NFULNL_CFG_CMD_PF_UNBIND, +}; + +struct nfulnl_msg_config_cmd { + u_int8_t command; /* nfulnl_msg_config_cmds */ +} __attribute__ ((packed)); + +struct nfulnl_msg_config_mode { + u_int32_t copy_range; + u_int8_t copy_mode; + u_int8_t _pad; +} __attribute__ ((packed)); + +enum nfulnl_attr_config { + NFULA_CFG_UNSPEC, + NFULA_CFG_CMD, /* nfulnl_msg_config_cmd */ + NFULA_CFG_MODE, /* nfulnl_msg_config_mode */ + NFULA_CFG_NLBUFSIZ, /* u_int32_t buffer size */ + NFULA_CFG_TIMEOUT, /* u_int32_t in 1/100 s */ + NFULA_CFG_QTHRESH, /* u_int32_t */ + __NFULA_CFG_MAX +}; +#define NFULA_CFG_MAX (__NFULA_CFG_MAX -1) + +#define NFULNL_COPY_NONE 0x00 +#define NFULNL_COPY_META 0x01 +#define NFULNL_COPY_PACKET 0x02 + +#endif /* _NFNETLINK_LOG_H */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index f0eb23e5c5f1..8296b38bf270 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -11,3 +11,14 @@ config NETFILTER_NETLINK_QUEUE If this option isenabled, the kernel will include support for queueing packets via NFNETLINK. +config NETFILTER_NETLINK_LOG + tristate "Netfilter LOG over NFNETLINK interface" + depends on NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for logging packets via NFNETLINK. + + This obsoletes the existing ipt_ULOG and ebg_ulog mechanisms, + and is also scheduled to replace the old syslog-based ipt_LOG + and ip6t_LOG modules. + diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 14a0b187e75e..c41caebc4a7c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o +obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c new file mode 100644 index 000000000000..f41045e385ae --- /dev/null +++ b/net/netfilter/nfnetlink_log.c @@ -0,0 +1,995 @@ +/* + * This is a module which is used for logging packets to userspace via + * nfetlink. + * + * (C) 2005 by Harald Welte + * + * Based on the old ipv4-only ipt_ULOG.c: + * (C) 2000-2004 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define NFULNL_NLBUFSIZ_DEFAULT 4096 +#define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ +#define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ + +#define PRINTR(x, args...) do { if (net_ratelimit()) \ + printk(x, ## args); } while (0); + +#if 0 +#define UDEBUG(x, args ...) printk(KERN_DEBUG "%s(%d):%s(): " x, \ + __FILE__, __LINE__, __FUNCTION__, \ + ## args) +#else +#define UDEBUG(x, ...) +#endif + +struct nfulnl_instance { + struct hlist_node hlist; /* global list of instances */ + spinlock_t lock; + atomic_t use; /* use count */ + + unsigned int qlen; /* number of nlmsgs in skb */ + struct sk_buff *skb; /* pre-allocatd skb */ + struct nlmsghdr *lastnlh; /* netlink header of last msg in skb */ + struct timer_list timer; + int peer_pid; /* PID of the peer process */ + + /* configurable parameters */ + unsigned int flushtimeout; /* timeout until queue flush */ + unsigned int nlbufsiz; /* netlink buffer allocation size */ + unsigned int qthreshold; /* threshold of the queue */ + u_int32_t copy_range; + u_int16_t group_num; /* number of this queue */ + u_int8_t copy_mode; +}; + +static DEFINE_RWLOCK(instances_lock); + +#define INSTANCE_BUCKETS 16 +static struct hlist_head instance_table[INSTANCE_BUCKETS]; +static unsigned int hash_init; + +static inline u_int8_t instance_hashfn(u_int16_t group_num) +{ + return ((group_num & 0xff) % INSTANCE_BUCKETS); +} + +static struct nfulnl_instance * +__instance_lookup(u_int16_t group_num) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct nfulnl_instance *inst; + + UDEBUG("entering (group_num=%u)\n", group_num); + + head = &instance_table[instance_hashfn(group_num)]; + hlist_for_each_entry(inst, pos, head, hlist) { + if (inst->group_num == group_num) + return inst; + } + return NULL; +} + +static inline void +instance_get(struct nfulnl_instance *inst) +{ + atomic_inc(&inst->use); +} + +static struct nfulnl_instance * +instance_lookup_get(u_int16_t group_num) +{ + struct nfulnl_instance *inst; + + read_lock_bh(&instances_lock); + inst = __instance_lookup(group_num); + if (inst) + instance_get(inst); + read_unlock_bh(&instances_lock); + + return inst; +} + +static void +instance_put(struct nfulnl_instance *inst) +{ + if (inst && atomic_dec_and_test(&inst->use)) { + UDEBUG("kfree(inst=%p)\n", inst); + kfree(inst); + } +} + +static void nfulnl_timer(unsigned long data); + +static struct nfulnl_instance * +instance_create(u_int16_t group_num, int pid) +{ + struct nfulnl_instance *inst; + + UDEBUG("entering (group_num=%u, pid=%d)\n", group_num, + pid); + + write_lock_bh(&instances_lock); + if (__instance_lookup(group_num)) { + inst = NULL; + UDEBUG("aborting, instance already exists\n"); + goto out_unlock; + } + + inst = kmalloc(sizeof(*inst), GFP_ATOMIC); + if (!inst) + goto out_unlock; + + memset(inst, 0, sizeof(*inst)); + INIT_HLIST_NODE(&inst->hlist); + inst->lock = SPIN_LOCK_UNLOCKED; + /* needs to be two, since we _put() after creation */ + atomic_set(&inst->use, 2); + + init_timer(&inst->timer); + inst->timer.function = nfulnl_timer; + inst->timer.data = (unsigned long)inst; + /* don't start timer yet. (re)start it with every packet */ + + inst->peer_pid = pid; + inst->group_num = group_num; + + inst->qthreshold = NFULNL_QTHRESH_DEFAULT; + inst->flushtimeout = NFULNL_TIMEOUT_DEFAULT; + inst->nlbufsiz = NFULNL_NLBUFSIZ_DEFAULT; + inst->copy_mode = NFULNL_COPY_PACKET; + inst->copy_range = 0xffff; + + if (!try_module_get(THIS_MODULE)) + goto out_free; + + hlist_add_head(&inst->hlist, + &instance_table[instance_hashfn(group_num)]); + + UDEBUG("newly added node: %p, next=%p\n", &inst->hlist, + inst->hlist.next); + + write_unlock_bh(&instances_lock); + + return inst; + +out_free: + instance_put(inst); +out_unlock: + write_unlock_bh(&instances_lock); + return NULL; +} + +static int __nfulnl_send(struct nfulnl_instance *inst); + +static void +_instance_destroy2(struct nfulnl_instance *inst, int lock) +{ + /* first pull it out of the global list */ + if (lock) + write_lock_bh(&instances_lock); + + UDEBUG("removing instance %p (queuenum=%u) from hash\n", + inst, inst->group_num); + + hlist_del(&inst->hlist); + + if (lock) + write_unlock_bh(&instances_lock); + + /* then flush all pending packets from skb */ + + spin_lock_bh(&inst->lock); + if (inst->skb) { + if (inst->qlen) + __nfulnl_send(inst); + if (inst->skb) { + kfree_skb(inst->skb); + inst->skb = NULL; + } + } + spin_unlock_bh(&inst->lock); + + /* and finally put the refcount */ + instance_put(inst); + + module_put(THIS_MODULE); +} + +static inline void +__instance_destroy(struct nfulnl_instance *inst) +{ + _instance_destroy2(inst, 0); +} + +static inline void +instance_destroy(struct nfulnl_instance *inst) +{ + _instance_destroy2(inst, 1); +} + +static int +nfulnl_set_mode(struct nfulnl_instance *inst, u_int8_t mode, + unsigned int range) +{ + int status = 0; + + spin_lock_bh(&inst->lock); + + switch (mode) { + case NFULNL_COPY_NONE: + case NFULNL_COPY_META: + inst->copy_mode = mode; + inst->copy_range = 0; + break; + + case NFULNL_COPY_PACKET: + inst->copy_mode = mode; + /* we're using struct nfattr which has 16bit nfa_len */ + if (range > 0xffff) + inst->copy_range = 0xffff; + else + inst->copy_range = range; + break; + + default: + status = -EINVAL; + break; + } + + spin_unlock_bh(&inst->lock); + + return status; +} + +static int +nfulnl_set_nlbufsiz(struct nfulnl_instance *inst, u_int32_t nlbufsiz) +{ + int status; + + spin_lock_bh(&inst->lock); + if (nlbufsiz < NFULNL_NLBUFSIZ_DEFAULT) + status = -ERANGE; + else if (nlbufsiz > 131072) + status = -ERANGE; + else { + inst->nlbufsiz = nlbufsiz; + status = 0; + } + spin_unlock_bh(&inst->lock); + + return status; +} + +static int +nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout) +{ + spin_lock_bh(&inst->lock); + inst->flushtimeout = timeout; + spin_unlock_bh(&inst->lock); + + return 0; +} + +static int +nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh) +{ + spin_lock_bh(&inst->lock); + inst->qthreshold = qthresh; + spin_unlock_bh(&inst->lock); + + return 0; +} + +static struct sk_buff *nfulnl_alloc_skb(unsigned int inst_size, + unsigned int pkt_size) +{ + struct sk_buff *skb; + + UDEBUG("entered (%u, %u)\n", inst_size, pkt_size); + + /* alloc skb which should be big enough for a whole multipart + * message. WARNING: has to be <= 128k due to slab restrictions */ + + skb = alloc_skb(inst_size, GFP_ATOMIC); + if (!skb) { + PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n", + inst_size); + + /* try to allocate only as much as we need for current + * packet */ + + skb = alloc_skb(pkt_size, GFP_ATOMIC); + if (!skb) + PRINTR("nfnetlink_log: can't even alloc %u bytes\n", + pkt_size); + } + + return skb; +} + +static int +__nfulnl_send(struct nfulnl_instance *inst) +{ + int status; + + if (timer_pending(&inst->timer)) + del_timer(&inst->timer); + + if (inst->qlen > 1) + inst->lastnlh->nlmsg_type = NLMSG_DONE; + + status = nfnetlink_unicast(inst->skb, inst->peer_pid, MSG_DONTWAIT); + if (status < 0) { + UDEBUG("netlink_unicast() failed\n"); + /* FIXME: statistics */ + } + + inst->qlen = 0; + inst->skb = NULL; + inst->lastnlh = NULL; + + return status; +} + +static void nfulnl_timer(unsigned long data) +{ + struct nfulnl_instance *inst = (struct nfulnl_instance *)data; + + UDEBUG("timer function called, flushing buffer\n"); + + spin_lock_bh(&inst->lock); + __nfulnl_send(inst); + instance_put(inst); + spin_unlock_bh(&inst->lock); +} + +static inline int +__build_packet_message(struct nfulnl_instance *inst, + const struct sk_buff *skb, + unsigned int data_len, + unsigned int pf, + unsigned int hooknum, + const struct net_device *indev, + const struct net_device *outdev, + const struct nf_loginfo *li, + const char *prefix) +{ + unsigned char *old_tail; + struct nfulnl_msg_packet_hdr pmsg; + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + u_int32_t tmp_uint; + + UDEBUG("entered\n"); + + old_tail = inst->skb->tail; + nlh = NLMSG_PUT(inst->skb, 0, 0, + NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, + sizeof(struct nfgenmsg)); + nfmsg = NLMSG_DATA(nlh); + nfmsg->nfgen_family = pf; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = htons(inst->group_num); + + pmsg.hw_protocol = htons(skb->protocol); + pmsg.hook = hooknum; + + NFA_PUT(inst->skb, NFULA_PACKET_HDR, sizeof(pmsg), &pmsg); + + if (prefix) { + int slen = strlen(prefix); + if (slen > NFULNL_PREFIXLEN) + slen = NFULNL_PREFIXLEN; + NFA_PUT(inst->skb, NFULA_PREFIX, slen, prefix); + } + + if (indev) { + tmp_uint = htonl(indev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + } + + if (outdev) { + tmp_uint = htonl(outdev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + } + + if (skb->nfmark) { + tmp_uint = htonl(skb->nfmark); + NFA_PUT(inst->skb, NFULA_MARK, sizeof(tmp_uint), &tmp_uint); + } + + if (indev && skb->dev && skb->dev->hard_header_parse) { + struct nfulnl_msg_packet_hw phw; + + phw.hw_addrlen = + skb->dev->hard_header_parse((struct sk_buff *)skb, + phw.hw_addr); + phw.hw_addrlen = htons(phw.hw_addrlen); + NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); + } + + if (skb->stamp.tv_sec) { + struct nfulnl_msg_packet_timestamp ts; + + ts.sec = cpu_to_be64(skb->stamp.tv_sec); + ts.usec = cpu_to_be64(skb->stamp.tv_usec); + + NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); + } + + /* UID */ + if (skb->sk) { + read_lock_bh(&skb->sk->sk_callback_lock); + if (skb->sk->sk_socket && skb->sk->sk_socket->file) { + u_int32_t uid = htonl(skb->sk->sk_socket->file->f_uid); + /* need to unlock here since NFA_PUT may goto */ + read_unlock_bh(&skb->sk->sk_callback_lock); + NFA_PUT(inst->skb, NFULA_UID, sizeof(uid), &uid); + } else + read_unlock_bh(&skb->sk->sk_callback_lock); + } + + if (data_len) { + struct nfattr *nfa; + int size = NFA_LENGTH(data_len); + + if (skb_tailroom(inst->skb) < (int)NFA_SPACE(data_len)) { + printk(KERN_WARNING "nfnetlink_log: no tailroom!\n"); + goto nlmsg_failure; + } + + nfa = (struct nfattr *)skb_put(inst->skb, NFA_ALIGN(size)); + nfa->nfa_type = NFULA_PAYLOAD; + nfa->nfa_len = size; + + if (skb_copy_bits(skb, 0, NFA_DATA(nfa), data_len)) + BUG(); + } + + nlh->nlmsg_len = inst->skb->tail - old_tail; + return 0; + +nlmsg_failure: + UDEBUG("nlmsg_failure\n"); +nfattr_failure: + PRINTR(KERN_ERR "nfnetlink_log: error creating log nlmsg\n"); + return -1; +} + +#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) + +static struct nf_loginfo default_loginfo = { + .type = NF_LOG_TYPE_ULOG, + .u = { + .ulog = { + .copy_len = 0xffff, + .group = 0, + .qthreshold = 1, + }, + }, +}; + +/* log handler for internal netfilter logging api */ +static void +nfulnl_log_packet(unsigned int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct nf_loginfo *li_user, + const char *prefix) +{ + unsigned int size, data_len; + struct nfulnl_instance *inst; + const struct nf_loginfo *li; + unsigned int qthreshold; + unsigned int nlbufsiz; + + if (li_user && li_user->type == NF_LOG_TYPE_ULOG) + li = li_user; + else + li = &default_loginfo; + + inst = instance_lookup_get(li->u.ulog.group); + if (!inst) + inst = instance_lookup_get(0); + if (!inst) { + PRINTR("nfnetlink_log: trying to log packet, " + "but no instance for group %u\n", li->u.ulog.group); + return; + } + + /* all macros expand to constant values at compile time */ + /* FIXME: do we want to make the size calculation conditional based on + * what is actually present? way more branches and checks, but more + * memory efficient... */ + size = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* mark */ + + NFA_SPACE(sizeof(u_int32_t)) /* uid */ + + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hw)) + + NFA_SPACE(sizeof(struct nfulnl_msg_packet_timestamp)); + + UDEBUG("initial size=%u\n", size); + + spin_lock_bh(&inst->lock); + + qthreshold = inst->qthreshold; + /* per-rule qthreshold overrides per-instance */ + if (qthreshold > li->u.ulog.qthreshold) + qthreshold = li->u.ulog.qthreshold; + + switch (inst->copy_mode) { + case NFULNL_COPY_META: + case NFULNL_COPY_NONE: + data_len = 0; + break; + + case NFULNL_COPY_PACKET: + if (inst->copy_range == 0 + || inst->copy_range > skb->len) + data_len = skb->len; + else + data_len = inst->copy_range; + + size += NFA_SPACE(data_len); + UDEBUG("copy_packet, therefore size now %u\n", size); + break; + + default: + spin_unlock_bh(&inst->lock); + instance_put(inst); + return; + } + + if (size > inst->nlbufsiz) + nlbufsiz = size; + else + nlbufsiz = inst->nlbufsiz; + + if (!inst->skb) { + if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { + UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", + inst->nlbufsiz, size); + goto alloc_failure; + } + } else if (inst->qlen >= qthreshold || + size > skb_tailroom(inst->skb)) { + /* either the queue len is too high or we don't have + * enough room in the skb left. flush to userspace. */ + UDEBUG("flushing old skb\n"); + + __nfulnl_send(inst); + + if (!(inst->skb = nfulnl_alloc_skb(nlbufsiz, size))) { + UDEBUG("error in nfulnl_alloc_skb(%u, %u)\n", + inst->nlbufsiz, size); + goto alloc_failure; + } + } + + UDEBUG("qlen %d, qthreshold %d\n", inst->qlen, qthreshold); + inst->qlen++; + + __build_packet_message(inst, skb, data_len, pf, + hooknum, in, out, li, prefix); + + /* timer_pending always called within inst->lock, so there + * is no chance of a race here */ + if (!timer_pending(&inst->timer)) { + instance_get(inst); + inst->timer.expires = jiffies + (inst->flushtimeout*HZ/100); + add_timer(&inst->timer); + } + spin_unlock_bh(&inst->lock); + + return; + +alloc_failure: + spin_unlock_bh(&inst->lock); + instance_put(inst); + UDEBUG("error allocating skb\n"); + /* FIXME: statistics */ +} + +static int +nfulnl_rcv_nl_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct netlink_notify *n = ptr; + + if (event == NETLINK_URELEASE && + n->protocol == NETLINK_NETFILTER && n->pid) { + int i; + + /* destroy all instances for this pid */ + write_lock_bh(&instances_lock); + for (i = 0; i < INSTANCE_BUCKETS; i++) { + struct hlist_node *tmp, *t2; + struct nfulnl_instance *inst; + struct hlist_head *head = &instance_table[i]; + + hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { + UDEBUG("node = %p\n", inst); + if (n->pid == inst->peer_pid) + __instance_destroy(inst); + } + } + write_unlock_bh(&instances_lock); + } + return NOTIFY_DONE; +} + +static struct notifier_block nfulnl_rtnl_notifier = { + .notifier_call = nfulnl_rcv_nl_event, +}; + +static int +nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) +{ + return -ENOTSUPP; +} + +static struct nf_logger nfulnl_logger = { + .name = "nfnetlink_log", + .logfn = &nfulnl_log_packet, + .me = THIS_MODULE, +}; + +static const int nfula_min[NFULA_MAX] = { + [NFULA_PACKET_HDR-1] = sizeof(struct nfulnl_msg_packet_hdr), + [NFULA_MARK-1] = sizeof(u_int32_t), + [NFULA_TIMESTAMP-1] = sizeof(struct nfulnl_msg_packet_timestamp), + [NFULA_IFINDEX_INDEV-1] = sizeof(u_int32_t), + [NFULA_IFINDEX_OUTDEV-1]= sizeof(u_int32_t), + [NFULA_HWADDR-1] = sizeof(struct nfulnl_msg_packet_hw), + [NFULA_PAYLOAD-1] = 0, + [NFULA_PREFIX-1] = 0, + [NFULA_UID-1] = sizeof(u_int32_t), +}; + +static const int nfula_cfg_min[NFULA_CFG_MAX] = { + [NFULA_CFG_CMD-1] = sizeof(struct nfulnl_msg_config_cmd), + [NFULA_CFG_MODE-1] = sizeof(struct nfulnl_msg_config_mode), + [NFULA_CFG_TIMEOUT-1] = sizeof(u_int32_t), + [NFULA_CFG_QTHRESH-1] = sizeof(u_int32_t), + [NFULA_CFG_NLBUFSIZ-1] = sizeof(u_int32_t), +}; + +static int +nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *nfula[], int *errp) +{ + struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + u_int16_t group_num = ntohs(nfmsg->res_id); + struct nfulnl_instance *inst; + int ret = 0; + + UDEBUG("entering for msg %u\n", NFNL_MSG_TYPE(nlh->nlmsg_type)); + + if (nfattr_bad_size(nfula, NFULA_CFG_MAX, nfula_cfg_min)) { + UDEBUG("bad attribute size\n"); + return -EINVAL; + } + + inst = instance_lookup_get(group_num); + if (nfula[NFULA_CFG_CMD-1]) { + u_int8_t pf = nfmsg->nfgen_family; + struct nfulnl_msg_config_cmd *cmd; + cmd = NFA_DATA(nfula[NFULA_CFG_CMD-1]); + UDEBUG("found CFG_CMD for\n"); + + switch (cmd->command) { + case NFULNL_CFG_CMD_BIND: + if (inst) { + ret = -EBUSY; + goto out_put; + } + + inst = instance_create(group_num, + NETLINK_CB(skb).pid); + if (!inst) { + ret = -EINVAL; + goto out_put; + } + break; + case NFULNL_CFG_CMD_UNBIND: + if (!inst) { + ret = -ENODEV; + goto out_put; + } + + if (inst->peer_pid != NETLINK_CB(skb).pid) { + ret = -EPERM; + goto out_put; + } + + instance_destroy(inst); + break; + case NFULNL_CFG_CMD_PF_BIND: + UDEBUG("registering log handler for pf=%u\n", pf); + ret = nf_log_register(pf, &nfulnl_logger); + break; + case NFULNL_CFG_CMD_PF_UNBIND: + UDEBUG("unregistering log handler for pf=%u\n", pf); + /* This is a bug and a feature. We cannot unregister + * other handlers, like nfnetlink_inst can */ + nf_log_unregister_pf(pf); + break; + default: + ret = -EINVAL; + break; + } + } else { + if (!inst) { + UDEBUG("no config command, and no instance for " + "group=%u pid=%u =>ENOENT\n", + group_num, NETLINK_CB(skb).pid); + ret = -ENOENT; + goto out_put; + } + + if (inst->peer_pid != NETLINK_CB(skb).pid) { + UDEBUG("no config command, and wrong pid\n"); + ret = -EPERM; + goto out_put; + } + } + + if (nfula[NFULA_CFG_MODE-1]) { + struct nfulnl_msg_config_mode *params; + params = NFA_DATA(nfula[NFULA_CFG_MODE-1]); + + nfulnl_set_mode(inst, params->copy_mode, + ntohs(params->copy_range)); + } + + if (nfula[NFULA_CFG_TIMEOUT-1]) { + u_int32_t timeout = + *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_TIMEOUT-1]); + + nfulnl_set_timeout(inst, ntohl(timeout)); + } + + if (nfula[NFULA_CFG_NLBUFSIZ-1]) { + u_int32_t nlbufsiz = + *(u_int32_t *)NFA_DATA(nfula[NFULA_CFG_NLBUFSIZ-1]); + + nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz)); + } + + if (nfula[NFULA_CFG_QTHRESH-1]) { + u_int32_t qthresh = + *(u_int16_t *)NFA_DATA(nfula[NFULA_CFG_QTHRESH-1]); + + nfulnl_set_qthresh(inst, ntohl(qthresh)); + } + +out_put: + instance_put(inst); + return ret; +} + +static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { + [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, + .cap_required = CAP_NET_ADMIN }, + [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, + .cap_required = CAP_NET_ADMIN }, +}; + +static struct nfnetlink_subsystem nfulnl_subsys = { + .name = "log", + .subsys_id = NFNL_SUBSYS_ULOG, + .cb_count = NFULNL_MSG_MAX, + .attr_count = NFULA_MAX, + .cb = nfulnl_cb, +}; + +#ifdef CONFIG_PROC_FS +struct iter_state { + unsigned int bucket; +}; + +static struct hlist_node *get_first(struct seq_file *seq) +{ + struct iter_state *st = seq->private; + + if (!st) + return NULL; + + for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) { + if (!hlist_empty(&instance_table[st->bucket])) + return instance_table[st->bucket].first; + } + return NULL; +} + +static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h) +{ + struct iter_state *st = seq->private; + + h = h->next; + while (!h) { + if (++st->bucket >= INSTANCE_BUCKETS) + return NULL; + + h = instance_table[st->bucket].first; + } + return h; +} + +static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head; + head = get_first(seq); + + if (head) + while (pos && (head = get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + read_lock_bh(&instances_lock); + return get_idx(seq, *pos); +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return get_next(s, v); +} + +static void seq_stop(struct seq_file *s, void *v) +{ + read_unlock_bh(&instances_lock); +} + +static int seq_show(struct seq_file *s, void *v) +{ + const struct nfulnl_instance *inst = v; + + return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", + inst->group_num, + inst->peer_pid, inst->qlen, + inst->copy_mode, inst->copy_range, + inst->flushtimeout, atomic_read(&inst->use)); +} + +static struct seq_operations nful_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nful_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + struct iter_state *is; + int ret; + + is = kmalloc(sizeof(*is), GFP_KERNEL); + if (!is) + return -ENOMEM; + memset(is, 0, sizeof(*is)); + ret = seq_open(file, &nful_seq_ops); + if (ret < 0) + goto out_free; + seq = file->private_data; + seq->private = is; + return ret; +out_free: + kfree(is); + return ret; +} + +static struct file_operations nful_file_ops = { + .owner = THIS_MODULE, + .open = nful_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; + +#endif /* PROC_FS */ + +static int +init_or_cleanup(int init) +{ + int i, status = -ENOMEM; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc_nful; +#endif + + if (!init) + goto cleanup; + + for (i = 0; i < INSTANCE_BUCKETS; i++) + INIT_HLIST_HEAD(&instance_table[i]); + + /* it's not really all that important to have a random value, so + * we can do this from the init function, even if there hasn't + * been that much entropy yet */ + get_random_bytes(&hash_init, sizeof(hash_init)); + + netlink_register_notifier(&nfulnl_rtnl_notifier); + status = nfnetlink_subsys_register(&nfulnl_subsys); + if (status < 0) { + printk(KERN_ERR "log: failed to create netlink socket\n"); + goto cleanup_netlink_notifier; + } + +#ifdef CONFIG_PROC_FS + proc_nful = create_proc_entry("nfnetlink_log", 0440, + proc_net_netfilter); + if (!proc_nful) + goto cleanup_subsys; + proc_nful->proc_fops = &nful_file_ops; +#endif + + return status; + +cleanup: + nf_log_unregister_logger(&nfulnl_logger); +#ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_log", proc_net_netfilter); +cleanup_subsys: +#endif + nfnetlink_subsys_unregister(&nfulnl_subsys); +cleanup_netlink_notifier: + netlink_unregister_notifier(&nfulnl_rtnl_notifier); + return status; +} + +static int __init init(void) +{ + + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +MODULE_DESCRIPTION("netfilter userspace logging"); +MODULE_AUTHOR("Harald Welte "); +MODULE_LICENSE("GPL"); + +module_init(init); +module_exit(fini); diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index eab309e3d42e..d7b0330d64b4 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1041,6 +1041,7 @@ cleanup: nf_unregister_queue_handlers(nfqnl_enqueue_packet); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS + remove_proc_entry("nfnetlink_queue", proc_net_netfilter); cleanup_subsys: #endif nfnetlink_subsys_unregister(&nfqnl_subsys); -- cgit v1.2.3 From 0f7ff9274e72fd254fbd1ab117bbc1db6e7cdb34 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:44 -0700 Subject: [INET]: Just rename the TCP hashtable functions/structs to inet_ This is to break down the complexity of the series of patches, making it very clear that this one just does: 1. renames tcp_ prefixed hashtable functions and data structures that were already mostly generic to inet_ to share it with DCCP and other INET transport protocols. 2. Removes not used functions (__tb_head & tb_head) 3. Removes some leftover prototypes in the headers (tcp_bucket_unlock & tcp_v4_build_header) Next changesets will move tcp_sk(sk)->bind_hash to inet_sock so that we can make functions such as tcp_inherit_port, __tcp_inherit_port, tcp_v4_get_port, __tcp_put_port, generic and get others like tcp_destroy_sock closer to generic (tcp_orphan_count will go to sk->sk_prot to allow this). Eventually most of these functions will be used passing the transport protocol inet_hashinfo structure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 93 +++++++++++++++++++---------------------- net/ipv4/tcp.c | 15 ++++--- net/ipv4/tcp_diag.c | 4 +- net/ipv4/tcp_ipv4.c | 106 +++++++++++++++++++++++++---------------------- net/ipv4/tcp_minisocks.c | 16 +++---- net/ipv6/tcp_ipv6.c | 42 +++++++++---------- 7 files changed, 139 insertions(+), 139 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e4fd82e42104..ec580a560e8c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct tcp_bind_bucket *bind_hash; + struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c769adb0463..6c9f6f7cab5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,13 +44,13 @@ * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */ -struct tcp_ehash_bucket { +struct inet_ehash_bucket { rwlock_t lock; struct hlist_head chain; } __attribute__((__aligned__(8))); /* This is for listening sockets, thus all sockets which possess wildcards. */ -#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ /* There are a few simple rules, which allow for local port reuse by * an application. In essence: @@ -83,31 +83,22 @@ struct tcp_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ -struct tcp_bind_bucket { +struct inet_bind_bucket { unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; -#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) -struct tcp_bind_hashbucket { +struct inet_bind_hashbucket { spinlock_t lock; struct hlist_head chain; }; -static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); -} - -static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_empty(&head->chain) ? NULL : __tb_head(head); -} - -extern struct tcp_hashinfo { +struct inet_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * @@ -116,21 +107,21 @@ extern struct tcp_hashinfo { * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ - struct tcp_ehash_bucket *__tcp_ehash; + struct inet_ehash_bucket *ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ - struct tcp_bind_hashbucket *__tcp_bhash; + struct inet_bind_hashbucket *bhash; - int __tcp_bhash_size; - int __tcp_ehash_size; + int bhash_size; + int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ - struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -138,36 +129,39 @@ extern struct tcp_hashinfo { * Now align to a new cache line as all the following members * are often dirty. */ - rwlock_t __tcp_lhash_lock ____cacheline_aligned; - atomic_t __tcp_lhash_users; - wait_queue_head_t __tcp_lhash_wait; - spinlock_t __tcp_portalloc_lock; -} tcp_hashinfo; - -#define tcp_ehash (tcp_hashinfo.__tcp_ehash) -#define tcp_bhash (tcp_hashinfo.__tcp_bhash) -#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) -#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) -#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; + +extern struct inet_hashinfo tcp_hashinfo; +#define tcp_ehash (tcp_hashinfo.ehash) +#define tcp_bhash (tcp_hashinfo.bhash) +#define tcp_ehash_size (tcp_hashinfo.ehash_size) +#define tcp_bhash_size (tcp_hashinfo.bhash_size) +#define tcp_listening_hash (tcp_hashinfo.listening_hash) +#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) +#define tcp_lhash_users (tcp_hashinfo.lhash_users) +#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) +#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum); -extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); -extern void tcp_bucket_unlock(struct sock *sk); +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); extern int tcp_port_rover; /* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) { - return (lport & (tcp_bhash_size - 1)); + return lport & (bhash_size - 1); } -extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, +extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); #if (BITS_PER_LONG == 64) @@ -212,7 +206,7 @@ struct tcp_tw_bucket { __u32 tw_ts_recent; long tw_ts_recent_stamp; unsigned long tw_ttd; - struct tcp_bind_bucket *tw_tb; + struct inet_bind_bucket *tw_tb; struct hlist_node tw_death_node; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; @@ -366,14 +360,14 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) /* These can have wildcards, don't try too hard. */ -static __inline__ int tcp_lhashfn(unsigned short num) +static inline int inet_lhashfn(const unsigned short num) { - return num & (TCP_LHTABLE_SIZE - 1); + return num & (INET_LHTABLE_SIZE - 1); } -static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) +static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return tcp_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(inet_sk(sk)->num); } #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -799,9 +793,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_build_header(struct sock *sk, - struct sk_buff *skb); - extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 20159a3dafb3..1ec03db7dcd9 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -272,6 +272,9 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); kmem_cache_t *tcp_bucket_cachep; + +EXPORT_SYMBOL_GPL(tcp_bucket_cachep); + kmem_cache_t *tcp_timewait_cachep; atomic_t tcp_orphan_count = ATOMIC_INIT(0); @@ -2259,7 +2262,7 @@ void __init tcp_init(void) sizeof(skb->cb)); tcp_bucket_cachep = kmem_cache_create("tcp_bind_bucket", - sizeof(struct tcp_bind_bucket), + sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL, NULL); if (!tcp_bucket_cachep) @@ -2277,9 +2280,9 @@ void __init tcp_init(void) * * The methodology is similar to that of the buffer cache. */ - tcp_ehash = (struct tcp_ehash_bucket *) + tcp_ehash = alloc_large_system_hash("TCP established", - sizeof(struct tcp_ehash_bucket), + sizeof(struct inet_ehash_bucket), thash_entries, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2294,9 +2297,9 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_ehash[i].chain); } - tcp_bhash = (struct tcp_bind_hashbucket *) + tcp_bhash = alloc_large_system_hash("TCP bind", - sizeof(struct tcp_bind_hashbucket), + sizeof(struct inet_bind_hashbucket), tcp_ehash_size, (num_physpages >= 128 * 1024) ? (25 - PAGE_SHIFT) : @@ -2315,7 +2318,7 @@ void __init tcp_init(void) * on available memory. */ for (order = 0; ((1 << order) << PAGE_SHIFT) < - (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); + (tcp_bhash_size * sizeof(struct inet_bind_hashbucket)); order++) ; if (order >= 4) { diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index f79bd11a4701..5bb6a0f1c77b 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -590,7 +590,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; tcp_listen_lock(); - for (i = s_i; i < TCP_LHTABLE_SIZE; i++) { + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; @@ -646,7 +646,7 @@ skip_listen_ht: return skb->len; for (i = s_i; i < tcp_ehash_size; i++) { - struct tcp_ehash_bucket *head = &tcp_ehash[i]; + struct inet_ehash_bucket *head = &tcp_ehash[i]; struct sock *sk; struct hlist_node *node; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c03d7e9688c8..4138630556e3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -89,12 +89,11 @@ static struct socket *tcp_socket; void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { - .__tcp_lhash_lock = RW_LOCK_UNLOCKED, - .__tcp_lhash_users = ATOMIC_INIT(0), - .__tcp_lhash_wait - = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.__tcp_lhash_wait), - .__tcp_portalloc_lock = SPIN_LOCK_UNLOCKED +struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(tcp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, }; /* @@ -105,14 +104,14 @@ struct tcp_hashinfo __cacheline_aligned tcp_hashinfo = { int sysctl_local_port_range[2] = { 1024, 4999 }; int tcp_port_rover = 1024 - 1; -/* Allocate and initialize a new TCP local port bind bucket. +/* Allocate and initialize a new local port bind bucket. * The bindhash mutex for snum's hash chain must be held here. */ -struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum) +struct inet_bind_bucket *inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum) { - struct tcp_bind_bucket *tb = kmem_cache_alloc(tcp_bucket_cachep, - SLAB_ATOMIC); + struct inet_bind_bucket *tb = kmem_cache_alloc(cachep, SLAB_ATOMIC); if (tb) { tb->port = snum; tb->fastreuse = 0; @@ -123,20 +122,21 @@ struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, } /* Caller must hold hashbucket lock for this tb with local BH disabled */ -void tcp_bucket_destroy(struct tcp_bind_bucket *tb) +void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) { if (hlist_empty(&tb->owners)) { __hlist_del(&tb->node); - kmem_cache_free(tcp_bucket_cachep, tb); + kmem_cache_free(cachep, tb); } } /* Caller must disable local BH processing. */ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) { - struct tcp_bind_hashbucket *head = - &tcp_bhash[tcp_bhashfn(inet_sk(child)->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = + &tcp_bhash[inet_bhashfn(inet_sk(child)->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; @@ -152,15 +152,15 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) local_bh_enable(); } -void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, - unsigned short snum) +void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum) { inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); tcp_sk(sk)->bind_hash = tb; } -static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) +static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); struct sock *sk2; @@ -190,9 +190,9 @@ static inline int tcp_bind_conflict(struct sock *sk, struct tcp_bind_bucket *tb) */ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; + struct inet_bind_hashbucket *head; struct hlist_node *node; - struct tcp_bind_bucket *tb; + struct inet_bind_bucket *tb; int ret; local_bh_disable(); @@ -211,9 +211,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -238,9 +238,9 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -261,7 +261,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -290,15 +290,16 @@ fail: static void __tcp_put_port(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_bind_hashbucket *head = &tcp_bhash[tcp_bhashfn(inet->num)]; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head = &tcp_bhash[inet_bhashfn(inet->num, + tcp_bhash_size)]; + struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = tcp_sk(sk)->bind_hash; __sk_del_bind_node(sk); tcp_sk(sk)->bind_hash = NULL; inet->num = 0; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); } @@ -344,7 +345,7 @@ static __inline__ void __tcp_v4_hash(struct sock *sk, const int listen_possible) BUG_TRAP(sk_unhashed(sk)); if (listen_possible && sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -381,7 +382,7 @@ void tcp_unhash(struct sock *sk) tcp_listen_wlock(); lock = &tcp_lhash_lock; } else { - struct tcp_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; + struct inet_ehash_bucket *head = &tcp_ehash[sk->sk_hashent]; lock = &head->lock; write_lock_bh(&head->lock); } @@ -401,8 +402,10 @@ void tcp_unhash(struct sock *sk) * connection. So always assume those are both wildcarded * during the search since they can never be otherwise. */ -static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, - unsigned short hnum, int dif) +static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; struct hlist_node *node; @@ -438,14 +441,15 @@ static struct sock *__tcp_v4_lookup_listener(struct hlist_head *head, u32 daddr, } /* Optimize the common listener case. */ -static inline struct sock *tcp_v4_lookup_listener(u32 daddr, - unsigned short hnum, int dif) +static inline struct sock *tcp_v4_lookup_listener(const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *sk = NULL; struct hlist_head *head; read_lock(&tcp_lhash_lock); - head = &tcp_listening_hash[tcp_lhashfn(hnum)]; + head = &tcp_listening_hash[inet_lhashfn(hnum)]; if (!hlist_empty(head)) { struct inet_sock *inet = inet_sk((sk = __sk_head(head))); @@ -470,11 +474,13 @@ sherry_cache: * Local BH must be disabled here. */ -static inline struct sock *__tcp_v4_lookup_established(u32 saddr, u16 sport, - u32 daddr, u16 hnum, - int dif) +static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, + const u16 sport, + const u32 daddr, + const u16 hnum, + const int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(sport, hnum); struct sock *sk; @@ -546,7 +552,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_ehash_size); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -639,9 +645,9 @@ static inline u32 connect_port_offset(const struct sock *sk) */ static inline int tcp_v4_hash_connect(struct sock *sk) { - unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -658,14 +664,14 @@ static inline int tcp_v4_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -678,7 +684,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -713,7 +719,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -2055,7 +2061,7 @@ start_req: } read_unlock_bh(&tp->accept_queue.syn_wait_lock); } - if (++st->bucket < TCP_LHTABLE_SIZE) { + if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_listening_hash[st->bucket]); goto get_sk; } @@ -2506,7 +2512,7 @@ void __init tcp_v4_init(struct net_proto_family *ops) EXPORT_SYMBOL(ipv4_specific); EXPORT_SYMBOL(tcp_bind_hash); -EXPORT_SYMBOL(tcp_bucket_create); +EXPORT_SYMBOL(inet_bind_bucket_create); EXPORT_SYMBOL(tcp_hashinfo); EXPORT_SYMBOL(tcp_inherit_port); EXPORT_SYMBOL(tcp_listen_wlock); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 7c46a553c4af..1df6cd46066b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -60,9 +60,9 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead; - struct tcp_bind_hashbucket *bhead; - struct tcp_bind_bucket *tb; + struct inet_ehash_bucket *ehead; + struct inet_bind_hashbucket *bhead; + struct inet_bind_bucket *tb; /* Unlink from established hashes. */ ehead = &tcp_ehash[tw->tw_hashent]; @@ -76,12 +76,12 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) write_unlock(&ehead->lock); /* Disassociate with bind bucket. */ - bhead = &tcp_bhash[tcp_bhashfn(tw->tw_num)]; + bhead = &tcp_bhash[inet_bhashfn(tw->tw_num, tcp_bhash_size)]; spin_lock(&bhead->lock); tb = tw->tw_tb; __hlist_del(&tw->tw_bind_node); tw->tw_tb = NULL; - tcp_bucket_destroy(tb); + inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&bhead->lock); #ifdef SOCK_REFCNT_DEBUG @@ -296,14 +296,14 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { - struct tcp_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; - struct tcp_bind_hashbucket *bhead; + struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; + struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[tcp_bhashfn(inet_sk(sk)->num)]; + bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; spin_lock(&bhead->lock); tw->tw_tb = tcp_sk(sk)->bind_hash; BUG_TRAP(tcp_sk(sk)->bind_hash); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 4e32a8496be3..31f50fb29ffb 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -98,11 +98,11 @@ static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) return tcp_v6_hashfn(laddr, lport, faddr, fport); } -static inline int tcp_v6_bind_conflict(struct sock *sk, - struct tcp_bind_bucket *tb) +static inline int tcp_v6_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { - struct sock *sk2; - struct hlist_node *node; + const struct sock *sk2; + const struct hlist_node *node; /* We must walk the whole port owner list in this case. -DaveM */ sk_for_each_bound(sk2, node, &tb->owners) { @@ -126,8 +126,8 @@ static inline int tcp_v6_bind_conflict(struct sock *sk, */ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) { - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; struct hlist_node *node; int ret; @@ -146,9 +146,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) do { rover++; if (rover > high) rover = low; - head = &tcp_bhash[tcp_bhashfn(rover)]; + head = &tcp_bhash[inet_bhashfn(rover, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) goto next; break; @@ -171,9 +171,9 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum) /* OK, here is the one we will use. */ snum = rover; } else { - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; spin_lock(&head->lock); - tb_for_each(tb, node, &head->chain) + inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) goto tb_found; } @@ -192,7 +192,7 @@ tb_found: } tb_not_found: ret = 1; - if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(tcp_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -224,7 +224,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); if (sk->sk_state == TCP_LISTEN) { - list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)]; + list = &tcp_listening_hash[inet_sk_listen_hashfn(sk)]; lock = &tcp_lhash_lock; tcp_listen_wlock(); } else { @@ -264,7 +264,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor hiscore=0; read_lock(&tcp_lhash_lock); - sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) { + sk_for_each(sk, node, &tcp_listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -305,7 +305,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct tcp_ehash_bucket *head; + struct inet_ehash_bucket *head; struct sock *sk; struct hlist_node *node; __u32 ports = TCP_COMBINED_PORTS(sport, hnum); @@ -461,7 +461,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, int dif = sk->sk_bound_dev_if; u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); - struct tcp_ehash_bucket *head = &tcp_ehash[hash]; + struct inet_ehash_bucket *head = &tcp_ehash[hash]; struct sock *sk2; struct hlist_node *node; struct tcp_tw_bucket *tw; @@ -540,8 +540,8 @@ static inline u32 tcpv6_port_offset(const struct sock *sk) static int tcp_v6_hash_connect(struct sock *sk) { unsigned short snum = inet_sk(sk)->num; - struct tcp_bind_hashbucket *head; - struct tcp_bind_bucket *tb; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; int ret; if (!snum) { @@ -558,14 +558,14 @@ static int tcp_v6_hash_connect(struct sock *sk) local_bh_disable(); for (i = 1; i <= range; i++) { port = low + (i + offset) % range; - head = &tcp_bhash[tcp_bhashfn(port)]; + head = &tcp_bhash[inet_bhashfn(port, tcp_bhash_size)]; spin_lock(&head->lock); /* Does not bother with rcv_saddr checks, * because the established check is already * unique enough. */ - tb_for_each(tb, node, &head->chain) { + inet_bind_bucket_for_each(tb, node, &head->chain) { if (tb->port == port) { BUG_TRAP(!hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) @@ -578,7 +578,7 @@ static int tcp_v6_hash_connect(struct sock *sk) } } - tb = tcp_bucket_create(head, port); + tb = inet_bind_bucket_create(tcp_bucket_cachep, head, port); if (!tb) { spin_unlock(&head->lock); break; @@ -613,7 +613,7 @@ ok: goto out; } - head = &tcp_bhash[tcp_bhashfn(snum)]; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; tb = tcp_sk(sk)->bind_hash; spin_lock_bh(&head->lock); -- cgit v1.2.3 From a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:01:14 -0700 Subject: [INET]: Move bind_hash from tcp_sk to inet_sk This should really be in a inet_connection_sock, but I'm leaving it for a later optimization, when some more fields common to INET transport protocols now in tcp_sk or inet_sk will be chunked out into inet_connection_sock, for now its better to concentrate on getting the changes in the core merged to leave the DCCP tree with only DCCP specific code. Next changesets will take advantage of this move to generalise things like tcp_bind_hash, tcp_put_port, tcp_inherit_port, making the later receive a inet_hashinfo parameter, and even __tcp_tw_hashdance, etc in the future, when tcp_tw_bucket gets transformed into the struct timewait_sock hierarchy. tcp_destroy_sock also is eligible as soon as tcp_orphan_count gets moved to sk_prot. A cascade of incremental changes will ultimately make the tcp_lookup functions be fully generic. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 ++ include/linux/tcp.h | 1 - include/net/tcp.h | 2 +- net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_ipv4.c | 21 +++++++++++---------- net/ipv4/tcp_minisocks.c | 15 ++++++++------- net/ipv6/tcp_ipv6.c | 8 ++++---- 7 files changed, 28 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0f..2c54bbd3da76 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,6 +128,7 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -157,6 +158,7 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ + struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec580a560e8c..e70ab19652db 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ff5d30ac2b06..6c6c879e7e87 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1266,7 +1266,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (tcp_sk(sk)->bind_hash && + if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) tcp_put_port(sk); /* fall through */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ec03db7dcd9..e54a410ca701 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1575,7 +1575,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || tcp_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); sk->sk_prot->destroy(sk); @@ -1802,7 +1802,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || tp->bind_hash); + BUG_TRAP(!inet->num || inet->bind_hash); sk->sk_error_report(sk); return err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 58e36ed88f25..10a9b3ae3442 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -113,9 +113,9 @@ static __inline__ void __tcp_inherit_port(struct sock *sk, struct sock *child) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; sk_add_bind_node(child, &tb->owners); - tcp_sk(child)->bind_hash = tb; + inet_sk(child)->bind_hash = tb; spin_unlock(&head->lock); } @@ -129,9 +129,10 @@ inline void tcp_inherit_port(struct sock *sk, struct sock *child) void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - inet_sk(sk)->num = snum; + struct inet_sock *inet = inet_sk(sk); + inet->num = snum; sk_add_bind_node(sk, &tb->owners); - tcp_sk(sk)->bind_hash = tb; + inet->bind_hash = tb; } static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) @@ -246,9 +247,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -269,9 +270,9 @@ static void __tcp_put_port(struct sock *sk) struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = tcp_sk(sk)->bind_hash; + tb = inet->bind_hash; __sk_del_bind_node(sk); - tcp_sk(sk)->bind_hash = NULL; + inet->bind_hash = NULL; inet->num = 0; inet_bind_bucket_destroy(tcp_bucket_cachep, tb); spin_unlock(&head->lock); @@ -694,7 +695,7 @@ ok: } head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __tcp_v4_hash(sk, 0); @@ -1940,7 +1941,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (tp->bind_hash) + if (inet_sk(sk)->bind_hash) tcp_put_port(sk); /* diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 1df6cd46066b..267cea1087e5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -296,17 +296,17 @@ kill: */ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) { + const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; - /* Step 1: Put TW into bind hash. Original socket stays there too. - Note, that any socket with inet_sk(sk)->num != 0 MUST be bound in + Note, that any socket with inet->num != 0 MUST be bound in binding cache, even if it is closed. */ - bhead = &tcp_bhash[inet_bhashfn(inet_sk(sk)->num, tcp_bhash_size)]; + bhead = &tcp_bhash[inet_bhashfn(inet->num, tcp_bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = tcp_sk(sk)->bind_hash; - BUG_TRAP(tcp_sk(sk)->bind_hash); + tw->tw_tb = inet->bind_hash; + BUG_TRAP(inet->bind_hash); tw_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); @@ -694,6 +694,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, if(newsk != NULL) { struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); + struct inet_sock *newinet = inet_sk(newsk); struct tcp_sock *newtp; struct sk_filter *filter; @@ -702,10 +703,10 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, /* SANITY */ sk_node_init(&newsk->sk_node); - tcp_sk(newsk)->bind_hash = NULL; + newinet->bind_hash = NULL; /* Clone the TCP header template */ - inet_sk(newsk)->dport = ireq->rmt_port; + newinet->dport = ireq->rmt_port; sock_lock_init(newsk); bh_lock_sock(newsk); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 31f50fb29ffb..a8ca7ba06c1c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -204,9 +204,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!tcp_sk(sk)->bind_hash) + if (!inet_sk(sk)->bind_hash) tcp_bind_hash(sk, tb, snum); - BUG_TRAP(tcp_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_sk(sk)->bind_hash == tb); ret = 0; fail_unlock: @@ -613,8 +613,8 @@ ok: goto out; } - head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; - tb = tcp_sk(sk)->bind_hash; + head = &tcp_bhash[inet_bhashfn(snum, tcp_bhash_size)]; + tb = inet_sk(sk)->bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { -- cgit v1.2.3 From a86888b925299330053d20e0eba03ac4d2648c4b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:02:13 -0700 Subject: [NETFILTER]: Fix multiple problems with the conntrack event cache refcnt underflow: the reference count is decremented when a conntrack entry is removed from the hash but it is not incremented when entering new entries. missing protection of process context against softirq context: all cache operations need to locally disable softirqs to avoid races. Additionally the event cache can't be initialized when a packet enteres the conntrack code but needs to be initialized whenever we cache an event and the stored conntrack entry doesn't match the current one. incorrect flushing of the event cache in ip_ct_iterate_cleanup: without real locking we can't flush the cache for different CPUs without incurring races. The cache for different CPUs can only be flushed when no packets are going through the code. ip_ct_iterate_cleanup doesn't need to drop all references, so flushing is moved to the cleanup path. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 29 +++---- include/linux/netfilter_ipv4/ip_conntrack_core.h | 14 ++- net/ipv4/netfilter/ip_conntrack_core.c | 105 ++++++++--------------- net/ipv4/netfilter/ip_conntrack_standalone.c | 3 +- 4 files changed, 57 insertions(+), 94 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ff2c1c6001f9..088742befe49 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -411,6 +411,7 @@ struct ip_conntrack_stat #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS #include +#include struct ip_conntrack_ecache { struct ip_conntrack *ct; @@ -445,26 +446,24 @@ ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); } +extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct); +extern void __ip_ct_event_cache_init(struct ip_conntrack *ct); + static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) { - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); - - if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { - if (net_ratelimit()) { - printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); - dump_stack(); - } - } + struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct; + struct ip_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(ip_conntrack_ecache); + if (ct != ecache->ct) + __ip_ct_event_cache_init(ct); ecache->events |= event; + local_bh_enable(); } -extern void -ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); -extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); - static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) { @@ -483,9 +482,7 @@ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) {} static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) {} -static inline void ip_conntrack_deliver_cached_events_for( - struct ip_conntrack *ct) {} -static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {} static inline void ip_conntrack_expect_event(enum ip_conntrack_expect_events event, struct ip_conntrack_expect *exp) {} diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index fbf6c3e41647..dc4d2a0575de 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -44,18 +44,14 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; int ret = NF_ACCEPT; - if (ct && !is_confirmed(ct)) - ret = __ip_conntrack_confirm(pskb); - ip_conntrack_deliver_cached_events_for(ct); - + if (ct) { + if (!is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_ct_deliver_cached_events(ct); + } return ret; } -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -struct ip_conntrack_ecache; -extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); -#endif - extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); extern struct list_head *ip_conntrack_hash; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index d9fddae8d787..5c3f16eae2d8 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -85,73 +85,62 @@ struct notifier_block *ip_conntrack_expect_chain; DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); -static inline void __deliver_cached_events(struct ip_conntrack_ecache *ecache) +/* deliver cached events and clear cache entry - must be called with locally + * disabled softirqs */ +static inline void +__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) { + DEBUGP("ecache: delivering events for %p\n", ecache->ct); if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) notifier_call_chain(&ip_conntrack_chain, ecache->events, ecache->ct); ecache->events = 0; -} - -void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) -{ - __deliver_cached_events(ecache); + ip_conntrack_put(ecache->ct); + ecache->ct = NULL; } /* Deliver all cached events for a particular conntrack. This is called * by code prior to async packet handling or freeing the skb */ -void -ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct) +void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) { - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); - - if (!ct) - return; + struct ip_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(ip_conntrack_ecache); + if (ecache->ct == ct) + __ip_ct_deliver_cached_events(ecache); + local_bh_enable(); +} - if (ecache->ct == ct) { - DEBUGP("ecache: delivering event for %p\n", ct); - __deliver_cached_events(ecache); - } else { - if (net_ratelimit()) - printk(KERN_WARNING "ecache: want to deliver for %p, " - "but cache has %p\n", ct, ecache->ct); - } +void __ip_ct_event_cache_init(struct ip_conntrack *ct) +{ + struct ip_conntrack_ecache *ecache; - /* signalize that events have already been delivered */ - ecache->ct = NULL; + /* take care of delivering potentially old events */ + ecache = &__get_cpu_var(ip_conntrack_ecache); + BUG_ON(ecache->ct == ct); + if (ecache->ct) + __ip_ct_deliver_cached_events(ecache); + /* initialize for this conntrack/packet */ + ecache->ct = ct; + nf_conntrack_get(&ct->ct_general); } -/* Deliver cached events for old pending events, if current conntrack != old */ -void ip_conntrack_event_cache_init(const struct sk_buff *skb) +/* flush the event cache - touches other CPU's data and must not be called while + * packets are still passing through the code */ +static void ip_ct_event_cache_flush(void) { - struct ip_conntrack *ct = (struct ip_conntrack *) skb->nfct; - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); + struct ip_conntrack_ecache *ecache; + int cpu; - /* take care of delivering potentially old events */ - if (ecache->ct != ct) { - enum ip_conntrack_info ctinfo; - /* we have to check, since at startup the cache is NULL */ - if (likely(ecache->ct)) { - DEBUGP("ecache: entered for different conntrack: " - "ecache->ct=%p, skb->nfct=%p. delivering " - "events\n", ecache->ct, ct); - __deliver_cached_events(ecache); + for_each_cpu(cpu) { + ecache = &per_cpu(ip_conntrack_ecache, cpu); + if (ecache->ct) ip_conntrack_put(ecache->ct); - } else { - DEBUGP("ecache: entered for conntrack %p, " - "cache was clean before\n", ct); - } - - /* initialize for this conntrack/packet */ - ecache->ct = ip_conntrack_get(skb, &ctinfo); - /* ecache->events cleared by __deliver_cached_devents() */ - } else { - DEBUGP("ecache: re-entered for conntrack %p.\n", ct); } } - +#else +static inline void ip_ct_event_cache_flush(void) {} #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); @@ -878,8 +867,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, IP_NF_ASSERT((*pskb)->nfct); - ip_conntrack_event_cache_init(*pskb); - ret = proto->packet(ct, *pskb, ctinfo); if (ret < 0) { /* Invalid: inverse of the return code tells @@ -1278,23 +1265,6 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) ip_conntrack_put(ct); } - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - { - /* we need to deliver all cached events in order to drop - * the reference counts */ - int cpu; - for_each_cpu(cpu) { - struct ip_conntrack_ecache *ecache = - &per_cpu(ip_conntrack_ecache, cpu); - if (ecache->ct) { - __ip_ct_deliver_cached_events(ecache); - ip_conntrack_put(ecache->ct); - ecache->ct = NULL; - } - } - } -#endif } /* Fast function for those who don't want to parse /proc (and I don't @@ -1381,6 +1351,7 @@ void ip_conntrack_flush() delete... */ synchronize_net(); + ip_ct_event_cache_flush(); i_see_dead_people: ip_ct_iterate_cleanup(kill_all, NULL); if (atomic_read(&ip_conntrack_count) != 0) { diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ca97c3ac2f2a..ee5895afd0c3 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -401,7 +401,6 @@ static unsigned int ip_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - ip_conntrack_event_cache_init(*pskb); /* We've seen it coming out the other side: confirm it */ return ip_conntrack_confirm(pskb); } @@ -419,7 +418,6 @@ static unsigned int ip_conntrack_help(unsigned int hooknum, ct = ip_conntrack_get(*pskb, &ctinfo); if (ct && ct->helper) { unsigned int ret; - ip_conntrack_event_cache_init(*pskb); ret = ct->helper->help(pskb, ct, ctinfo); if (ret != NF_ACCEPT) return ret; @@ -978,6 +976,7 @@ EXPORT_SYMBOL_GPL(ip_conntrack_chain); EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); +EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); #endif EXPORT_SYMBOL(ip_conntrack_protocol_register); -- cgit v1.2.3 From 927ccbcc28dceee29dad876982768cca29738564 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:03:40 -0700 Subject: [NETFILTER]: attribute count is an attribute of message type, not subsytem Prior to this patch, every nfnetlink subsystem had to specify it's attribute count. However, in reality the attribute count depends on the message type within the subsystem, not the subsystem itself. This patch moves 'attr_count' from 'struct nfnetlink_subsys' into nfnl_callback to fix this. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- net/ipv4/netfilter/ip_conntrack_netlink.c | 9 +++++++-- net/netfilter/nfnetlink.c | 20 ++++++++++++++++---- net/netfilter/nfnetlink_log.c | 5 +++-- net/netfilter/nfnetlink_queue.c | 4 +++- 5 files changed, 31 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 561f9df28808..b0feb2374079 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -85,9 +85,10 @@ struct nfgenmsg { struct nfnl_callback { - kernel_cap_t cap_required; /* capabilities required for this msg */ int (*call)(struct sock *nl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); + kernel_cap_t cap_required; /* capabilities required for this msg */ + u_int16_t attr_count; /* number of nfattr's */ }; struct nfnetlink_subsystem @@ -95,7 +96,6 @@ struct nfnetlink_subsystem const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ - u_int32_t attr_count; /* number of nfattr's */ struct nfnl_callback *cb; /* callback for individual types */ }; diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 23f18f6a5535..53d98974dcf0 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1484,21 +1484,28 @@ static struct notifier_block ctnl_notifier_exp = { static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, + .attr_count = CTA_MAX, .cap_required = CAP_NET_ADMIN }, }; static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, + .attr_count = CTA_EXPECT_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -1506,7 +1513,6 @@ static struct nfnetlink_subsystem ctnl_subsys = { .name = "conntrack", .subsys_id = NFNL_SUBSYS_CTNETLINK, .cb_count = IPCTNL_MSG_MAX, - .attr_count = CTA_MAX, .cb = ctnl_cb, }; @@ -1514,7 +1520,6 @@ static struct nfnetlink_subsystem ctnl_exp_subsys = { .name = "conntrack_expect", .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, .cb_count = IPCTNL_MSG_EXP_MAX, - .attr_count = CTA_MAX, .cb = ctnl_exp_cb, }; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 30b25f47f7cc..578e4fe40945 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -155,8 +155,18 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, struct nlmsghdr *nlh, struct nfattr *cda[]) { int min_len; + u_int16_t attr_count; + u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - memset(cda, 0, sizeof(struct nfattr *) * subsys->attr_count); + if (unlikely(cb_id >= subsys->cb_count)) { + DEBUGP("msgtype %u >= %u, returning\n", + cb_id, subsys->cb_count); + return -EINVAL; + } + + attr_count = subsys->cb[cb_id].attr_count; + + memset(cda, 0, sizeof(struct nfattr *) * attr_count); /* check attribute lengths. */ min_len = NLMSG_ALIGN(sizeof(struct nfgenmsg)); @@ -170,7 +180,7 @@ nfnetlink_check_attributes(struct nfnetlink_subsystem *subsys, while (NFA_OK(attr, attrlen)) { unsigned flavor = attr->nfa_type; if (flavor) { - if (flavor > subsys->attr_count) + if (flavor > attr_count) return -EINVAL; cda[flavor - 1] = attr; } @@ -256,9 +266,11 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, } { - struct nfattr *cda[ss->attr_count]; + u_int16_t attr_count = + ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; + struct nfattr *cda[attr_count]; - memset(cda, 0, ss->attr_count*sizeof(struct nfattr *)); + memset(cda, 0, sizeof(struct nfattr *) * attr_count); err = nfnetlink_check_attributes(ss, nlh, cda); if (err < 0) diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index f41045e385ae..1750f0d6e4de 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -805,8 +805,10 @@ out_put: static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, - .cap_required = CAP_NET_ADMIN }, + .attr_count = NFULA_MAX, + .cap_required = CAP_NET_ADMIN, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, + .attr_count = NFULA_CFG_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -814,7 +816,6 @@ static struct nfnetlink_subsystem nfulnl_subsys = { .name = "log", .subsys_id = NFNL_SUBSYS_ULOG, .cb_count = NFULNL_MSG_MAX, - .attr_count = NFULA_MAX, .cb = nfulnl_cb, }; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index d7b0330d64b4..04323ee1eb8d 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -877,10 +877,13 @@ out_put: static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, + .attr_count = NFQA_MAX, .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, + .attr_count = NFQA_MAX, .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, + .attr_count = NFQA_CFG_MAX, .cap_required = CAP_NET_ADMIN }, }; @@ -888,7 +891,6 @@ static struct nfnetlink_subsystem nfqnl_subsys = { .name = "nf_queue", .subsys_id = NFNL_SUBSYS_QUEUE, .cb_count = NFQNL_MSG_MAX, - .attr_count = NFQA_MAX, .cb = nfqnl_cb, }; -- cgit v1.2.3 From 1444fc559b01aa5d4fedf4ee4f306a9e9cd56f95 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:04:07 -0700 Subject: [NETFILTER]: don't use nested attributes for conntrack_expect We used to use nested nfattr structures for ip_conntrack_expect. This is bogus, since ip_conntrack and ip_conntrack_expect are communicated in different netlink message types. both should be encoded at the top level attributes, no extra nesting required. This patch addresses the issue. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 3 +- net/ipv4/netfilter/ip_conntrack_netlink.c | 85 ++++++++++++--------------- 2 files changed, 41 insertions(+), 47 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb528e0e3bd9..5c55751c78e4 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -33,7 +33,6 @@ enum ctattr_type { CTA_COUNTERS_ORIG, CTA_COUNTERS_REPLY, CTA_USE, - CTA_EXPECT, CTA_ID, __CTA_MAX }; @@ -103,10 +102,12 @@ enum ctattr_protonat { enum ctattr_expect { CTA_EXPECT_UNSPEC, + CTA_EXPECT_MASTER, CTA_EXPECT_TUPLE, CTA_EXPECT_MASK, CTA_EXPECT_TIMEOUT, CTA_EXPECT_ID, + CTA_EXPECT_HELP_NAME, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 53d98974dcf0..f5bda82c2875 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1100,18 +1100,21 @@ static inline int ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct ip_conntrack_expect *exp) { + struct ip_conntrack *master = exp->master; u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); u_int32_t id = htonl(exp->id); - struct nfattr *nest_parms = NFA_NEST(skb, CTA_EXPECT); if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) goto nfattr_failure; if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) goto nfattr_failure; + if (ctnetlink_exp_dump_tuple(skb, + &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + CTA_EXPECT_MASTER) < 0) + goto nfattr_failure; NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); - NFA_NEST_END(skb, nest_parms); return 0; @@ -1259,10 +1262,8 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, return 0; } - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); + if (cda[CTA_EXPECT_MASTER-1]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER); else return -EINVAL; @@ -1310,13 +1311,33 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct ip_conntrack_helper *h; int err; - /* delete by tuple needs either orig or reply tuple */ - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); - else if (cda[CTA_HELP_NAME-1]) { - char *name = NFA_DATA(cda[CTA_HELP_NAME-1]); + if (cda[CTA_EXPECT_TUPLE-1]) { + /* delete a single expect by tuple */ + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); + if (err < 0) + return err; + + /* bump usage count to 2 */ + exp = ip_conntrack_expect_find_get(&tuple); + if (!exp) + return -ENOENT; + + if (cda[CTA_EXPECT_ID-1]) { + u_int32_t id = + *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); + if (exp->id != ntohl(id)) { + ip_conntrack_expect_put(exp); + return -ENOENT; + } + } + + /* after list removal, usage count == 1 */ + ip_conntrack_unexpect_related(exp); + /* have to put what we 'get' above. + * after this line usage count == 0 */ + ip_conntrack_expect_put(exp); + } else if (cda[CTA_EXPECT_HELP_NAME-1]) { + char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); /* delete all expectations for this helper */ write_lock_bh(&ip_conntrack_lock); @@ -1332,7 +1353,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, __ip_ct_expect_unlink_destroy(exp); } write_unlock(&ip_conntrack_lock); - return 0; } else { /* This basically means we have to flush everything*/ write_lock_bh(&ip_conntrack_lock); @@ -1342,30 +1362,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, __ip_ct_expect_unlink_destroy(exp); } write_unlock_bh(&ip_conntrack_lock); - return 0; } - if (err < 0) - return err; - - /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); - if (!exp) - return -ENOENT; - - if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { - ip_conntrack_expect_put(exp); - return -ENOENT; - } - } - - /* after list removal, usage count == 1 */ - ip_conntrack_unexpect_related(exp); - /* have to put what we 'get' above. after this line usage count == 0 */ - ip_conntrack_expect_put(exp); - return 0; } static int @@ -1385,21 +1383,14 @@ ctnetlink_create_expect(struct nfattr *cda[]) DEBUGP("entered %s\n", __FUNCTION__); + /* caller guarantees that those three CTA_EXPECT_* exist */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); if (err < 0) return err; err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); if (err < 0) return err; - - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &master_tuple, - CTA_TUPLE_REPLY); - else - return -EINVAL; - + err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER); if (err < 0) return err; @@ -1444,7 +1435,9 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1]) + if (!cda[CTA_EXPECT_TUPLE-1] + || !cda[CTA_EXPECT_MASK-1] + || !cda[CTA_EXPECT_MASTER-1]) return -EINVAL; err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); -- cgit v1.2.3 From c752f0739f09b803aed191c4765a3b6650a08653 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:28 -0700 Subject: [TCP]: Move the tcp sock states to net/tcp_states.h Lots of places just needs the states, not even linux/tcp.h, where this enum was, needs it. This speeds up development of the refactorings as less sources are rebuilt when things get moved from net/tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- fs/smbfs/sock.c | 2 +- include/linux/tcp.h | 18 ------------------ include/net/dn.h | 1 + include/net/inet_hashtables.h | 2 +- include/net/ip6_route.h | 1 - include/net/ip_vs.h | 1 - include/net/sctp/constants.h | 2 +- include/net/tcp.h | 2 ++ include/net/tcp_states.h | 34 ++++++++++++++++++++++++++++++++++ net/appletalk/ddp.c | 2 +- net/ax25/af_ax25.c | 2 +- net/ax25/ax25_ds_in.c | 2 +- net/ax25/ax25_ds_timer.c | 2 +- net/ax25/ax25_in.c | 2 +- net/ax25/ax25_std_in.c | 2 +- net/ax25/ax25_std_timer.c | 2 +- net/ax25/ax25_subr.c | 2 +- net/core/datagram.c | 6 +++--- net/decnet/af_decnet.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/ipvs/ip_vs_app.c | 1 + net/ipv4/protocol.c | 1 - net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 3 ++- net/ipv6/datagram.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/udp.c | 1 + net/ipx/af_ipx.c | 2 +- net/ipx/ipx_proc.c | 2 +- net/irda/af_irda.c | 2 +- net/llc/af_llc.c | 2 +- net/llc/llc_conn.c | 2 +- net/llc/llc_if.c | 2 +- net/llc/llc_sap.c | 2 +- net/netrom/af_netrom.c | 2 +- net/netrom/nr_in.c | 2 +- net/netrom/nr_subr.c | 2 +- net/netrom/nr_timer.c | 2 +- net/rose/af_rose.c | 2 +- net/rose/rose_in.c | 2 +- net/rose/rose_route.c | 2 +- net/rose/rose_subr.c | 2 +- net/rose/rose_timer.c | 2 +- net/sctp/ipv6.c | 2 +- net/sunrpc/svcsock.c | 3 +-- net/unix/af_unix.c | 2 +- net/unix/garbage.c | 2 +- net/wanrouter/af_wanpipe.c | 2 +- net/x25/af_x25.c | 2 +- net/x25/x25_in.c | 2 +- net/x25/x25_subr.c | 2 +- net/x25/x25_timer.c | 2 +- 53 files changed, 86 insertions(+), 66 deletions(-) create mode 100644 include/net/tcp_states.h (limited to 'include/linux') diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c index 93f3cd22a2e9..6815b1b12b68 100644 --- a/fs/smbfs/sock.c +++ b/fs/smbfs/sock.c @@ -15,12 +15,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include #include diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e70ab19652db..b88fe05fdcbf 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,24 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; - -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, /* now a valid state */ - - TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_STATE_MASK 0xF #define TCP_ACTION_FIN (1 << 7) enum { diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db397..c1dbbd222793 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,6 +3,7 @@ #include #include +#include #include typedef unsigned short dn_address; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f5d65121f7b7..c816708fa556 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,11 +19,11 @@ #include #include #include -#include /* only for TCP_LISTEN, damn :-( */ #include #include #include +#include #include diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526b..1f2e428ca364 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617a..7a3c43711a17 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -255,7 +255,6 @@ struct ip_vs_daemon_user { #include /* for struct atomic_t */ #include /* for struct neighbour */ #include /* for struct dst_entry */ -#include #include #include diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bbf..c51541ee0247 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -47,10 +47,10 @@ #ifndef __sctp_constants_h__ #define __sctp_constants_h__ -#include /* For TCP states used in sctp_sock_state_t */ #include #include /* For ipv6hdr. */ #include +#include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; diff --git a/include/net/tcp.h b/include/net/tcp.h index bc110cc7022b..9d026d81d8c8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -36,6 +36,8 @@ #include #include #include +#include + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) #include #endif diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 000000000000..b9d4176b2d15 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#endif /* _LINUX_TCP_STATES_H */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ffde33cd09ba..1d31b3a3f1e5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -53,12 +53,12 @@ #include #include -#include #include #include /* For TIOCOUTQ/INQ */ #include #include #include +#include #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index a5c94f11547c..ea43dfb774e2 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/ax25/ax25_ds_in.c b/net/ax25/ax25_ds_in.c index 8adc0022cf58..5d0f8fb9d7aa 100644 --- a/net/ax25/ax25_ds_in.c +++ b/net/ax25/ax25_ds_in.c @@ -23,7 +23,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 3a8b67316fc3..061083efc1dc 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_in.c b/net/ax25/ax25_in.c index 124eec8216d7..0357705d575d 100644 --- a/net/ax25/ax25_in.c +++ b/net/ax25/ax25_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include /* For arp_rcv */ #include #include diff --git a/net/ax25/ax25_std_in.c b/net/ax25/ax25_std_in.c index 7131873322c4..83a33387c061 100644 --- a/net/ax25/ax25_std_in.c +++ b/net/ax25/ax25_std_in.c @@ -30,7 +30,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 066897bc0749..a29c480a4dc1 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index eb7343c10a9f..c41dbe5fadee 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/datagram.c b/net/core/datagram.c index fcee054b6f75..da9bf71421a7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -51,9 +50,10 @@ #include #include -#include -#include +#include +#include +#include /* * Is a socket 'connection oriented' ? diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index bd49dd97a09c..621680f127af 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -118,7 +118,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include +#include #include #include #include diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 202dbde9850d..369f25b60f3f 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b1db561f2542..3fd49f4282ac 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/ipv4/ipvs/ip_vs_app.c index d9212addd193..6e092dadb388 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/ipv4/ipvs/ip_vs_app.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 0db405a869f2..291831e792af 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index e222c5c26b32..304bb0a1d4f0 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -59,7 +59,6 @@ #include #include #include -#include #include #include #include @@ -71,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index dc4d07357e3a..a8135e1f528c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -95,7 +95,8 @@ #include #include #include -#include +#include +#include #include #include #include diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 5229365cd8b4..761984f3bd9c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a082646e6f16..766e1c7179a2 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eff050ac7049..2ffe34cc2ef8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 3a13c5d1d4d2..39d5939ccd6c 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include @@ -52,6 +51,7 @@ #include #include #include +#include #include diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index b6761913445a..1f73d9ea434d 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include static __inline__ struct ipx_interface *ipx_get_interface_idx(loff_t pos) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 92c6e8d4e731..6f92f9c62990 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -56,7 +56,7 @@ #include #include -#include +#include #include diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index f49b82da8264..66f55e514b56 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -23,13 +23,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include /* remember: uninitialized global data is zeroed because its in .bss */ static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 571548619469..4c644bc70eae 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c index 0f9fc48aeaf9..0f84f66018e4 100644 --- a/net/llc/llc_if.c +++ b/net/llc/llc_if.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include u8 llc_mac_null_var[IFHWADDRLEN]; diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c index 965c94eb4bbc..34228ef14985 100644 --- a/net/llc/llc_sap.c +++ b/net/llc/llc_sap.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include /** diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 162a85fed150..9aa8b14a81ab 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 9c44b3794126..2fcba9e24b2e 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include /* For ip_rcv */ #include #include diff --git a/net/netrom/nr_subr.c b/net/netrom/nr_subr.c index 252c1b3ecd78..587bed2674bf 100644 --- a/net/netrom/nr_subr.c +++ b/net/netrom/nr_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index faabda8088be..75b72d389ba9 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 5480caf8ccc2..c6e59f84c3ae 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index ef475a1bb1ba..a52417bd0a16 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -27,7 +27,7 @@ #include #include #include /* For ip_rcv */ -#include +#include #include #include #include diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 25da6f699fd0..4510cd7613ec 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index ae135e27799b..a29a3a960fd6 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index 84dd4403f792..50ae0371dab8 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 4a6421a9fcab..fa3be2b8fb5f 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -66,8 +66,8 @@ #include #include -#include #include +#include #include #include #include diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d0c3120d0233..e750cb685cb6 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -34,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d403e34088ad..bc4c44552c1f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -105,7 +105,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 46252d2807bb..6ffc64e1712d 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -76,11 +76,11 @@ #include #include #include -#include #include #include #include +#include /* Internal data structures and random procedures: */ diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c index d93b19faaab7..596cb96e5f47 100644 --- a/net/wanrouter/af_wanpipe.c +++ b/net/wanrouter/af_wanpipe.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 04bec047fa9a..020d73cc8414 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include /* For TIOCINQ/OUTQ */ diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index b0197c70a9fc..26146874b839 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include static int x25_queue_rx_frame(struct sock *sk, struct sk_buff *skb, int more) diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index e20cfadad4d9..8be9b8fbc24d 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include /* diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index d6a21a3ad80e..0a92e1da3922 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include static void x25_heartbeat_expiry(unsigned long); -- cgit v1.2.3 From 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:30 -0700 Subject: [INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets This paves the way to generalise the rest of the sock ID lookup routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro kernels (where IPv6 is always built as a module): [root@qemu ~]# grep tw_sock /proc/slabinfo tw_sock_TCPv6 0 0 128 31 1 tw_sock_TCP 0 0 96 41 1 [root@qemu ~]# Now if a protocol wants to use the TIME_WAIT generic infrastructure it only has to set the sk_prot->twsk_obj_size field with the size of its inet_timewait_sock derived sock and proto_register will create sk_prot->twsk_slab, for now its only for INET sockets, but we can introduce timewait_sock later if some non INET transport protocolo wants to use this stuff. Next changesets will take advantage of this new infrastructure to generalise even more TCP code. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o /tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o [acme@toy net-2.6.14]$ Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1 (qemu host)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 52 +++++++++- include/linux/tcp.h | 15 +++ include/net/inet_hashtables.h | 41 ++++++++ include/net/inet_timewait_sock.h | 142 +++++++++++++++++++++++++++ include/net/sock.h | 17 ++-- include/net/tcp.h | 202 +-------------------------------------- net/core/sock.c | 35 ++++++- net/ipv4/tcp.c | 10 -- net/ipv4/tcp_diag.c | 10 +- net/ipv4/tcp_ipv4.c | 107 +++++++++++---------- net/ipv4/tcp_minisocks.c | 142 ++++++++++++++------------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 100 ++++++++++--------- 13 files changed, 484 insertions(+), 391 deletions(-) create mode 100644 include/net/inet_timewait_sock.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade24..98fa32316e40 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -308,6 +308,41 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) + +#include + +struct tcp6_timewait_sock { + struct tcp_timewait_sock tw_v6_sk; + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + +static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +{ + return (struct tcp6_timewait_sock *)sk; +} + +static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; +} + +static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +{ + return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; +} + +static inline int tcp_twsk_ipv6only(const struct sock *sk) +{ + return inet_twsk(sk)->tw_ipv6only; +} + +static inline int tcp_v6_ipv6only(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); +} #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 @@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#endif +#define __tcp_v6_rcv_saddr(__sk) NULL +#define tcp_v6_rcv_saddr(__sk) NULL +#define tcp_twsk_ipv6only(__sk) 0 +#define tcp_v6_ipv6only(__sk) 0 +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#endif +#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ + (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((__sk)->sk_family == AF_INET6) && \ + ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ + ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif +#endif /* __KERNEL__ */ + +#endif /* _IPV6_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b88fe05fdcbf..5d295b1b3de7 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -179,6 +179,7 @@ struct tcp_info #include #include #include +#include /* This defines a selective acknowledgement block. */ struct tcp_sack_block { @@ -387,6 +388,20 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) return (struct tcp_sock *)sk; } +struct tcp_timewait_sock { + struct inet_timewait_sock tw_sk; + __u32 tw_rcv_nxt; + __u32 tw_snd_nxt; + __u32 tw_rcv_wnd; + __u32 tw_ts_recent; + long tw_ts_recent_stamp; +}; + +static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) +{ + return (struct tcp_timewait_sock *)sk; +} + static inline void *tcp_ca(const struct tcp_sock *tp) { return (void *) tp->ca_priv; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1c4fa0065a8e..c38c637e0734 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,6 +14,8 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include + #include #include #include @@ -310,4 +312,43 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } + +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport) << 16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport) << 16) | (__u32)(__sport)) +#endif + +#if (BITS_PER_LONG == 64) +#ifdef __BIG_ENDIAN +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 000000000000..ce117048f2fd --- /dev/null +++ b/include/net/inet_timewait_sock.h @@ -0,0 +1,142 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for a generic INET TIMEWAIT sock + * + * From code originally in net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_TIMEWAIT_SOCK_ +#define _INET_TIMEWAIT_SOCK_ + +#include + +#include +#include + +#include +#include + +#include + +#if (BITS_PER_LONG == 64) +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 +#else +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 +#endif + +struct inet_bind_bucket; + +/* + * This is a TIME_WAIT sock. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct inet_timewait_sock { + /* + * Now struct sock also uses sock_common, so please just + * don't add nothing before this first member (__tw_common) --acme + */ + struct sock_common __tw_common; +#define tw_family __tw_common.skc_family +#define tw_state __tw_common.skc_state +#define tw_reuse __tw_common.skc_reuse +#define tw_bound_dev_if __tw_common.skc_bound_dev_if +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node +#define tw_refcnt __tw_common.skc_refcnt +#define tw_prot __tw_common.skc_prot + volatile unsigned char tw_substate; + /* 3 bits hole, try to pack */ + unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_sock */ + __u16 tw_sport; + __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __u32 tw_rcv_saddr; + __u16 tw_dport; + __u16 tw_num; + /* And these are ours. */ + __u8 tw_ipv6only:1; + /* 31 bits hole, try to pack */ + int tw_hashent; + int tw_timeout; + unsigned long tw_ttd; + struct inet_bind_bucket *tw_tb; + struct hlist_node tw_death_node; +}; + +static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + __hlist_del(&tw->tw_death_node); + inet_twsk_dead_node_init(tw); +} + +static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + if (inet_twsk_dead_hashed(tw)) { + __inet_twsk_del_dead_node(tw); + return 1; + } + return 0; +} + +#define inet_twsk_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define inet_twsk_for_each_inmate(tw, node, jail) \ + hlist_for_each_entry(tw, node, jail, tw_death_node) + +#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + +static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) +{ + return (struct inet_timewait_sock *)sk; +} + +static inline u32 inet_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +} + +static inline void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) { +#ifdef SOCK_REFCNT_DEBUG + printk(KERN_DEBUG "%s timewait_sock %p released\n", + tw->tw_prot->name, tw); +#endif + kmem_cache_free(tw->tw_prot->twsk_slab, tw); + } +} +#endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index 391d00b5b7b4..c902c57bf2b7 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ } while(0) struct sock; +struct proto; /** * struct sock_common - minimal network layer representation of sockets @@ -98,10 +99,11 @@ struct sock; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header - * for struct sock and struct tcp_tw_bucket. - */ + * for struct sock and struct inet_timewait_sock. + */ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; @@ -110,11 +112,12 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + struct proto *skc_prot; }; /** * struct sock - network layer representation of sockets - * @__sk_common: shared layout with tcp_tw_bucket + * @__sk_common: shared layout with inet_timewait_sock * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer @@ -140,7 +143,6 @@ struct sock_common { * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used - * @sk_prot: protocol handlers inside a network family * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' @@ -173,7 +175,7 @@ struct sock_common { */ struct sock { /* - * Now struct tcp_tw_bucket also uses sock_common, so please just + * Now struct inet_timewait_sock also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; @@ -184,6 +186,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -218,7 +221,6 @@ struct sock { struct sk_buff *tail; } sk_backlog; struct sk_buff_head sk_error_queue; - struct proto *sk_prot; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, @@ -557,6 +559,9 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c8..cf8e664176ad 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -38,207 +38,14 @@ #include #include -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -#include -#endif #include extern struct inet_hashinfo tcp_hashinfo; -#if (BITS_PER_LONG == 64) -#define TCP_ADDRCMP_ALIGN_BYTES 8 -#else -#define TCP_ADDRCMP_ALIGN_BYTES 4 -#endif - -/* This is a TIME_WAIT bucket. It works around the memory consumption - * problems of sockets in such a state on heavily loaded servers, but - * without violating the protocol specification. - */ -struct tcp_tw_bucket { - /* - * Now struct sock also uses sock_common, so please just - * don't add nothing before this first member (__tw_common) --acme - */ - struct sock_common __tw_common; -#define tw_family __tw_common.skc_family -#define tw_state __tw_common.skc_state -#define tw_reuse __tw_common.skc_reuse -#define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node -#define tw_bind_node __tw_common.skc_bind_node -#define tw_refcnt __tw_common.skc_refcnt - volatile unsigned char tw_substate; - unsigned char tw_rcv_wscale; - __u16 tw_sport; - /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ - __u32 tw_daddr - __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; - __u16 tw_num; - /* And these are ours. */ - int tw_hashent; - int tw_timeout; - __u32 tw_rcv_nxt; - __u32 tw_snd_nxt; - __u32 tw_rcv_wnd; - __u32 tw_ts_recent; - long tw_ts_recent_stamp; - unsigned long tw_ttd; - struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; - int tw_v6_ipv6only; -#endif -}; - -static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_node, list); -} - -static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) -{ - return tw->tw_death_node.pprev != NULL; -} - -static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - __hlist_del(&tw->tw_death_node); - tw_dead_node_init(tw); -} - -static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - if (tw_dead_hashed(tw)) { - __tw_del_dead_node(tw); - return 1; - } - return 0; -} - -#define tw_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) - -#define tw_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) - -#define tw_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) - -#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) - -static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; -} - -#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) - -static inline int tcp_v6_ipv6only(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); -} -#else -# define __tcp_v6_rcv_saddr(__sk) NULL -# define tcp_v6_rcv_saddr(__sk) NULL -# define tcptw_sk_ipv6only(__sk) 0 -# define tcp_v6_ipv6only(__sk) 0 -#endif - -extern kmem_cache_t *tcp_timewait_cachep; - -static inline void tcp_tw_put(struct tcp_tw_bucket *tw) -{ - if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "tw_bucket %p released\n", tw); -#endif - kmem_cache_free(tcp_timewait_cachep, tw); - } -} - extern atomic_t tcp_orphan_count; extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); - - -/* Socket demux engine toys. */ -#ifdef __BIG_ENDIAN -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport)<<16) | (__u32)(__dport)) -#else /* __LITTLE_ENDIAN */ -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport)<<16) | (__u32)(__sport)) -#endif - -#if (BITS_PER_LONG == 64) -#ifdef __BIG_ENDIAN -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); -#else /* __LITTLE_ENDIAN */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); -#endif /* __BIG_ENDIAN */ -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#else /* 32-bit arch */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ - (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif /* 64-bit arch */ - -#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); extern int tcp_v4_remember_stamp(struct sock *sk); -extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); @@ -616,10 +423,9 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, diff --git a/net/core/sock.c b/net/core/sock.c index a1a23be10aa3..aba31fedf2ac 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1378,7 +1378,8 @@ static LIST_HEAD(proto_list); int proto_register(struct proto *prot, int alloc_slab) { - char *request_sock_slab_name; + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; int rc = -ENOBUFS; if (alloc_slab) { @@ -1409,6 +1410,23 @@ int proto_register(struct proto *prot, int alloc_slab) goto out_free_request_sock_slab_name; } } + + if (prot->twsk_obj_size) { + static const char mask[] = "tw_sock_%s"; + + timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL); + + if (timewait_sock_slab_name == NULL) + goto out_free_request_sock_slab; + + sprintf(timewait_sock_slab_name, mask, prot->name); + prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name, + prot->twsk_obj_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (prot->twsk_slab == NULL) + goto out_free_timewait_sock_slab_name; + } } write_lock(&proto_list_lock); @@ -1417,6 +1435,13 @@ int proto_register(struct proto *prot, int alloc_slab) rc = 0; out: return rc; +out_free_timewait_sock_slab_name: + kfree(timewait_sock_slab_name); +out_free_request_sock_slab: + if (prot->rsk_prot && prot->rsk_prot->slab) { + kmem_cache_destroy(prot->rsk_prot->slab); + prot->rsk_prot->slab = NULL; + } out_free_request_sock_slab_name: kfree(request_sock_slab_name); out_free_sock_slab: @@ -1444,6 +1469,14 @@ void proto_unregister(struct proto *prot) prot->rsk_prot->slab = NULL; } + if (prot->twsk_slab != NULL) { + const char *name = kmem_cache_name(prot->twsk_slab); + + kmem_cache_destroy(prot->twsk_slab); + kfree(name); + prot->twsk_slab = NULL; + } + list_del(&prot->node); write_unlock(&proto_list_lock); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2f4b1a374bb7..f1a708bf7a97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -271,8 +271,6 @@ int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics); -kmem_cache_t *tcp_timewait_cachep; - atomic_t tcp_orphan_count = ATOMIC_INIT(0); int sysctl_tcp_mem[3]; @@ -2264,13 +2262,6 @@ void __init tcp_init(void) if (!tcp_hashinfo.bind_bucket_cachep) panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); - tcp_timewait_cachep = kmem_cache_create("tcp_tw_bucket", - sizeof(struct tcp_tw_bucket), - 0, SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!tcp_timewait_cachep) - panic("tcp_init: Cannot alloc tcp_tw_bucket cache."); - /* Size and allocate the main established and bind bucket * hash tables. * @@ -2363,4 +2354,3 @@ EXPORT_SYMBOL(tcp_sendpage); EXPORT_SYMBOL(tcp_setsockopt); EXPORT_SYMBOL(tcp_shutdown); EXPORT_SYMBOL(tcp_statistics); -EXPORT_SYMBOL(tcp_timewait_cachep); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 1a89a03c449b..6f2d6f2276b9 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,7 +81,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); if (r->tcpdiag_state == TCP_TIME_WAIT) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); long tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; @@ -99,10 +99,12 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_inode = 0; #ifdef CONFIG_IP_TCPDIAG_IPV6 if (r->tcpdiag_family == AF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, - &tw->tw_v6_rcv_saddr); + &tcp6tw->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, - &tw->tw_v6_daddr); + &tcp6tw->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -239,7 +241,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) out: if (sk) { if (sk->sk_state == TCP_TIME_WAIT) - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); else sock_put(sk); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a678709b36f6..ce423e48ebe0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -106,7 +106,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = tcp_v4_rcv_saddr(sk); + const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -119,7 +119,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; @@ -251,10 +251,10 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, const int dif) { struct inet_ehash_bucket *head; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; - struct hlist_node *node; + const struct hlist_node *node; /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -262,13 +262,13 @@ static inline struct sock *__tcp_v4_lookup_established(const u32 saddr, head = &tcp_hashinfo.ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { - if (TCP_IPV4_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - if (TCP_IPV4_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) goto hit; } sk = NULL; @@ -313,27 +313,28 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb) /* called with local bh disabled */ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); u32 daddr = inet->rcv_saddr; u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; - TCP_V4_ADDR_COOKIE(acookie, saddr, daddr) - __u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket *)sk2; + tw = inet_twsk(sk2); - if (TCP_IPV4_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); /* With PAWS, it is safe from the viewpoint @@ -350,15 +351,15 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, fall back to VJ's scheme and use initial timestamp retrieved from peer table. */ - if (tw->tw_ts_recent_stamp && + if (tcptw->tw_ts_recent_stamp && (!twp || (sysctl_tcp_tw_reuse && xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { - if ((tp->write_seq = - tw->tw_snd_nxt + 65535 + 2) == 0) + tcptw->tw_ts_recent_stamp > 1))) { + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; + if (tp->write_seq == 0) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -369,7 +370,7 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if (TCP_IPV4_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) goto not_unique; } @@ -392,7 +393,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -429,7 +430,7 @@ static inline int tcp_v4_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + connect_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -482,7 +483,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -757,7 +758,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) return; } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1002,12 +1003,13 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v4_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v4_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1368,7 +1370,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket *)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1557,25 +1559,25 @@ discard_and_relse: do_time_wait: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } if (skb->len < (th->doff << 2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *) sk); goto discard_it; } - switch (tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), tcp_v4_iif(skb)); if (sk2) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -1639,18 +1641,18 @@ int tcp_v4_remember_stamp(struct sock *sk) return 0; } -int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw) +int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw) { - struct inet_peer *peer = NULL; - - peer = inet_getpeer(tw->tw_daddr, 1); + struct inet_peer *peer = inet_getpeer(tw->tw_daddr, 1); if (peer) { - if ((s32)(peer->tcp_ts - tw->tw_ts_recent) <= 0 || + const struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + + if ((s32)(peer->tcp_ts - tcptw->tw_ts_recent) <= 0 || (peer->tcp_ts_stamp + TCP_PAWS_MSL < xtime.tv_sec && - peer->tcp_ts_stamp <= tw->tw_ts_recent_stamp)) { - peer->tcp_ts_stamp = tw->tw_ts_recent_stamp; - peer->tcp_ts = tw->tw_ts_recent; + peer->tcp_ts_stamp <= tcptw->tw_ts_recent_stamp)) { + peer->tcp_ts_stamp = tcptw->tw_ts_recent_stamp; + peer->tcp_ts = tcptw->tw_ts_recent; } inet_putpeer(peer); return 1; @@ -1758,13 +1760,13 @@ EXPORT_SYMBOL(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ -static inline struct tcp_tw_bucket *tw_head(struct hlist_head *head) +static inline struct inet_timewait_sock *tw_head(struct hlist_head *head) { return hlist_empty(head) ? NULL : - list_entry(head->first, struct tcp_tw_bucket, tw_node); + list_entry(head->first, struct inet_timewait_sock, tw_node); } -static inline struct tcp_tw_bucket *tw_next(struct tcp_tw_bucket *tw) +static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) { return tw->tw_node.next ? hlist_entry(tw->tw_node.next, typeof(*tw), tw_node) : NULL; @@ -1860,7 +1862,7 @@ static void *established_get_first(struct seq_file *seq) for (st->bucket = 0; st->bucket < tcp_hashinfo.ehash_size; ++st->bucket) { struct sock *sk; struct hlist_node *node; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; /* We can reschedule _before_ having picked the target: */ cond_resched_softirq(); @@ -1874,8 +1876,8 @@ static void *established_get_first(struct seq_file *seq) goto out; } st->state = TCP_SEQ_STATE_TIME_WAIT; - tw_for_each(tw, node, - &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { + inet_twsk_for_each(tw, node, + &tcp_hashinfo.ehash[st->bucket + tcp_hashinfo.ehash_size].chain) { if (tw->tw_family != st->family) { continue; } @@ -1892,7 +1894,7 @@ out: static void *established_get_next(struct seq_file *seq, void *cur) { struct sock *sk = cur; - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; struct tcp_iter_state* st = seq->private; @@ -2159,7 +2161,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } -static void get_timewait4_sock(struct tcp_tw_bucket *tw, char *tmpbuf, int i) +static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { unsigned int dest, src; __u16 destp, srcp; @@ -2261,6 +2263,7 @@ struct proto tcp_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), + .twsk_obj_size = sizeof(struct tcp_timewait_sock), .rsk_prot = &tcp_request_sock_ops, }; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f29e2f6ebe1b..5b5a49335fbb 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -41,7 +41,7 @@ int sysctl_tcp_max_tw_buckets = NR_FILE*2; int sysctl_tcp_syncookies = SYNC_INIT; int sysctl_tcp_abort_on_overflow; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo); +static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { @@ -58,7 +58,7 @@ int tcp_tw_count; /* Must be called with locally disabled BHs. */ -static void tcp_timewait_kill(struct tcp_tw_bucket *tw) +static void tcp_timewait_kill(struct inet_timewait_sock *tw) { struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; @@ -85,11 +85,11 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) #ifdef SOCK_REFCNT_DEBUG if (atomic_read(&tw->tw_refcnt) != 1) { - printk(KERN_DEBUG "tw_bucket %p refcnt=%d\n", tw, - atomic_read(&tw->tw_refcnt)); + printk(KERN_DEBUG "%s timewait_sock %p refcnt=%d\n", + tw->tw_prot->name, tw, atomic_read(&tw->tw_refcnt)); } #endif - tcp_tw_put(tw); + inet_twsk_put(tw); } /* @@ -121,19 +121,20 @@ static void tcp_timewait_kill(struct tcp_tw_bucket *tw) * to avoid misread sequence numbers, states etc. --ANK */ enum tcp_tw_status -tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, - struct tcphdr *th, unsigned len) +tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, + const struct tcphdr *th) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); struct tcp_options_received tmp_opt; int paws_reject = 0; tmp_opt.saw_tstamp = 0; - if (th->doff > (sizeof(struct tcphdr) >> 2) && tw->tw_ts_recent_stamp) { + if (th->doff > (sizeof(*th) >> 2) && tcptw->tw_ts_recent_stamp) { tcp_parse_options(skb, &tmp_opt, 0); if (tmp_opt.saw_tstamp) { - tmp_opt.ts_recent = tw->tw_ts_recent; - tmp_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tmp_opt.ts_recent = tcptw->tw_ts_recent; + tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; paws_reject = tcp_paws_check(&tmp_opt, th->rst); } } @@ -144,20 +145,20 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, /* Out of window, send ACK */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tw->tw_rcv_nxt, - tw->tw_rcv_nxt + tw->tw_rcv_wnd)) + tcptw->tw_rcv_nxt, + tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd)) return TCP_TW_ACK; if (th->rst) goto kill; - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt)) + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt)) goto kill_with_rst; /* Dup ACK? */ - if (!after(TCP_SKB_CB(skb)->end_seq, tw->tw_rcv_nxt) || + if (!after(TCP_SKB_CB(skb)->end_seq, tcptw->tw_rcv_nxt) || TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq) { - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -165,19 +166,19 @@ tcp_timewait_state_process(struct tcp_tw_bucket *tw, struct sk_buff *skb, * reset. */ if (!th->fin || - TCP_SKB_CB(skb)->end_seq != tw->tw_rcv_nxt + 1) { + TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_RST; } /* FIN arrived, enter true time-wait state. */ - tw->tw_substate = TCP_TIME_WAIT; - tw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tw->tw_substate = TCP_TIME_WAIT; + tcptw->tw_rcv_nxt = TCP_SKB_CB(skb)->end_seq; if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent_stamp = xtime.tv_sec; - tw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; } /* I am shamed, but failed to make it more elegant. @@ -186,7 +187,7 @@ kill_with_rst: * do not undertsnad recycling in any case, it not * a big problem in practice. --ANK */ if (tw->tw_family == AF_INET && - sysctl_tcp_tw_recycle && tw->tw_ts_recent_stamp && + sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_v4_tw_remember_stamp(tw)) tcp_tw_schedule(tw, tw->tw_timeout); else @@ -212,7 +213,7 @@ kill_with_rst: */ if (!paws_reject && - (TCP_SKB_CB(skb)->seq == tw->tw_rcv_nxt && + (TCP_SKB_CB(skb)->seq == tcptw->tw_rcv_nxt && (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq || th->rst))) { /* In window segment, it may be only reset or bare ack. */ @@ -224,18 +225,18 @@ kill_with_rst: if (sysctl_tcp_rfc1337 == 0) { kill: tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } } tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { - tw->tw_ts_recent = tmp_opt.rcv_tsval; - tw->tw_ts_recent_stamp = xtime.tv_sec; + tcptw->tw_ts_recent = tmp_opt.rcv_tsval; + tcptw->tw_ts_recent_stamp = xtime.tv_sec; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -257,9 +258,10 @@ kill: */ if (th->syn && !th->rst && !th->ack && !paws_reject && - (after(TCP_SKB_CB(skb)->seq, tw->tw_rcv_nxt) || - (tmp_opt.saw_tstamp && (s32)(tw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { - u32 isn = tw->tw_snd_nxt + 65535 + 2; + (after(TCP_SKB_CB(skb)->seq, tcptw->tw_rcv_nxt) || + (tmp_opt.saw_tstamp && + (s32)(tcptw->tw_ts_recent - tmp_opt.rcv_tsval) < 0))) { + u32 isn = tcptw->tw_snd_nxt + 65535 + 2; if (isn == 0) isn++; TCP_SKB_CB(skb)->when = isn; @@ -284,7 +286,7 @@ kill: */ return TCP_TW_ACK; } - tcp_tw_put(tw); + inet_twsk_put(tw); return TCP_TW_SUCCESS; } @@ -293,7 +295,7 @@ kill: * relevant info into it from the SK, and mess with hash chains * and list linkage. */ -static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) +static void __tcp_tw_hashdance(struct sock *sk, struct inet_timewait_sock *tw) { const struct inet_sock *inet = inet_sk(sk); struct inet_ehash_bucket *ehead = &tcp_hashinfo.ehash[sk->sk_hashent]; @@ -306,7 +308,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) spin_lock(&bhead->lock); tw->tw_tb = inet->bind_hash; BUG_TRAP(inet->bind_hash); - tw_add_bind_node(tw, &tw->tw_tb->owners); + inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); write_lock(&ehead->lock); @@ -316,7 +318,7 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) sock_prot_dec_use(sk->sk_prot); /* Step 3: Hash TW into TIMEWAIT half of established hash table. */ - tw_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); + inet_twsk_add_node(tw, &(ehead + tcp_hashinfo.ehash_size)->chain); atomic_inc(&tw->tw_refcnt); write_unlock(&ehead->lock); @@ -327,19 +329,23 @@ static void __tcp_tw_hashdance(struct sock *sk, struct tcp_tw_bucket *tw) */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct tcp_tw_bucket *tw = NULL; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw = NULL; + const struct tcp_sock *tp = tcp_sk(sk); int recycle_ok = 0; if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tp->af_specific->remember_stamp(sk); if (tcp_tw_count < sysctl_tcp_max_tw_buckets) - tw = kmem_cache_alloc(tcp_timewait_cachep, SLAB_ATOMIC); + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, SLAB_ATOMIC); + + if (tw != NULL) { + struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); + const struct inet_sock *inet = inet_sk(sk); + const int rto = (tp->rto << 2) - (tp->rto >> 1); - if(tw != NULL) { - struct inet_sock *inet = inet_sk(sk); - int rto = (tp->rto<<2) - (tp->rto>>1); + /* Remember our protocol */ + tw->tw_prot = sk->sk_prot_creator; /* Give us an identity. */ tw->tw_daddr = inet->daddr; @@ -356,25 +362,23 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) atomic_set(&tw->tw_refcnt, 1); tw->tw_hashent = sk->sk_hashent; - tw->tw_rcv_nxt = tp->rcv_nxt; - tw->tw_snd_nxt = tp->snd_nxt; - tw->tw_rcv_wnd = tcp_receive_window(tp); - tw->tw_ts_recent = tp->rx_opt.ts_recent; - tw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; - tw_dead_node_init(tw); + tcptw->tw_rcv_nxt = tp->rcv_nxt; + tcptw->tw_snd_nxt = tp->snd_nxt; + tcptw->tw_rcv_wnd = tcp_receive_window(tp); + tcptw->tw_ts_recent = tp->rx_opt.ts_recent; + tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp; + inet_twsk_dead_node_init(tw); #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) if (tw->tw_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); - ipv6_addr_copy(&tw->tw_v6_daddr, &np->daddr); - ipv6_addr_copy(&tw->tw_v6_rcv_saddr, &np->rcv_saddr); - tw->tw_v6_ipv6only = np->ipv6only; - } else { - memset(&tw->tw_v6_daddr, 0, sizeof(tw->tw_v6_daddr)); - memset(&tw->tw_v6_rcv_saddr, 0, sizeof(tw->tw_v6_rcv_saddr)); - tw->tw_v6_ipv6only = 0; - } + ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr); + ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr); + tw->tw_ipv6only = np->ipv6only; + } else + tw->tw_ipv6only = 0; #endif /* Linkage updates. */ __tcp_tw_hashdance(sk, tw); @@ -392,7 +396,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) } tcp_tw_schedule(tw, timeo); - tcp_tw_put(tw); + inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than @@ -427,7 +431,7 @@ static u32 twkill_thread_slots; /* Returns non-zero if quota exceeded. */ static int tcp_do_twkill_work(int slot, unsigned int quota) { - struct tcp_tw_bucket *tw; + struct inet_timewait_sock *tw; struct hlist_node *node; unsigned int killed; int ret; @@ -441,11 +445,11 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) killed = 0; ret = 0; rescan: - tw_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { - __tw_del_dead_node(tw); + inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { + __inet_twsk_del_dead_node(tw); spin_unlock(&tw_death_lock); tcp_timewait_kill(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); killed++; spin_lock(&tw_death_lock); if (killed > quota) { @@ -531,11 +535,11 @@ static void twkill_work(void *dummy) */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void tcp_tw_deschedule(struct tcp_tw_bucket *tw) +void tcp_tw_deschedule(struct inet_timewait_sock *tw) { spin_lock(&tw_death_lock); - if (tw_del_dead_node(tw)) { - tcp_tw_put(tw); + if (inet_twsk_del_dead_node(tw)) { + inet_twsk_put(tw); if (--tcp_tw_count == 0) del_timer(&tcp_tw_timer); } @@ -552,7 +556,7 @@ static struct timer_list tcp_twcal_timer = TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; -static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) +static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) { struct hlist_head *list; int slot; @@ -586,7 +590,7 @@ static void tcp_tw_schedule(struct tcp_tw_bucket *tw, int timeo) spin_lock(&tw_death_lock); /* Unlink it, if it was scheduled */ - if (tw_del_dead_node(tw)) + if (inet_twsk_del_dead_node(tw)) tcp_tw_count--; else atomic_inc(&tw->tw_refcnt); @@ -644,13 +648,13 @@ void tcp_twcal_tick(unsigned long dummy) for (n=0; nrcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2); u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = tcp_v4_rcv_saddr(sk2); + u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = tcp_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 93a66b9a76e1..af8ad5bb273b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -308,33 +308,32 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u struct in6_addr *daddr, u16 hnum, int dif) { - struct inet_ehash_bucket *head; struct sock *sk; - struct hlist_node *node; - __u32 ports = TCP_COMBINED_PORTS(sport, hnum); - int hash; - + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - head = &tcp_hashinfo.ehash[hash]; + const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); + struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ - if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { - /* FIXME: acme: check this... */ - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { - if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && - (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) goto hit; } } @@ -455,43 +454,46 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } static int __tcp_v6_check_established(struct sock *sk, __u16 lport, - struct tcp_tw_bucket **twp) + struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *daddr = &np->rcv_saddr; struct in6_addr *saddr = &np->daddr; int dif = sk->sk_bound_dev_if; - u32 ports = TCP_COMBINED_PORTS(inet->dport, lport); - int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; - struct hlist_node *node; - struct tcp_tw_bucket *tw; + const struct hlist_node *node; + struct inet_timewait_sock *tw; write_lock(&head->lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) { - tw = (struct tcp_tw_bucket*)sk2; + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2); + + tw = inet_twsk(sk2); if(*((__u32 *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && - ipv6_addr_equal(&tw->tw_v6_daddr, saddr) && - ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if) { + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2); struct tcp_sock *tp = tcp_sk(sk); - if (tw->tw_ts_recent_stamp && - (!twp || (sysctl_tcp_tw_reuse && - xtime.tv_sec - - tw->tw_ts_recent_stamp > 1))) { + if (tcptw->tw_ts_recent_stamp && + (!twp || + (sysctl_tcp_tw_reuse && + xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) { /* See comment in tcp_ipv4.c */ - tp->write_seq = tw->tw_snd_nxt + 65535 + 2; + tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2; if (!tp->write_seq) tp->write_seq = 1; - tp->rx_opt.ts_recent = tw->tw_ts_recent; - tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp; + tp->rx_opt.ts_recent = tcptw->tw_ts_recent; + tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; sock_hold(sk2); goto unique; } else @@ -502,7 +504,7 @@ static int __tcp_v6_check_established(struct sock *sk, __u16 lport, /* And established part... */ sk_for_each(sk2, node, &head->chain) { - if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif)) + if (INET6_MATCH(sk2, saddr, daddr, ports, dif)) goto not_unique; } @@ -521,7 +523,7 @@ unique: tcp_tw_deschedule(tw); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); - tcp_tw_put(tw); + inet_twsk_put(tw); } return 0; @@ -556,7 +558,7 @@ static int tcp_v6_hash_connect(struct sock *sk) static u32 hint; u32 offset = hint + tcpv6_port_offset(sk); struct hlist_node *node; - struct tcp_tw_bucket *tw = NULL; + struct inet_timewait_sock *tw = NULL; local_bh_disable(); for (i = 1; i <= range; i++) { @@ -609,7 +611,7 @@ ok: if (tw) { tcp_tw_deschedule(tw); - tcp_tw_put(tw); + inet_twsk_put(tw); } ret = 0; @@ -845,7 +847,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } if (sk->sk_state == TCP_TIME_WAIT) { - tcp_tw_put((struct tcp_tw_bucket*)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); return; } @@ -1223,12 +1225,14 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { - struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk; + struct inet_timewait_sock *tw = inet_twsk(sk); + const struct tcp_timewait_sock *tcptw = tcp_twsk(sk); - tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt, - tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent); + tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, + tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, + tcptw->tw_ts_recent); - tcp_tw_put(tw); + inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) @@ -1261,7 +1265,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) bh_lock_sock(nsk); return nsk; } - tcp_tw_put((struct tcp_tw_bucket*)nsk); + inet_twsk_put((struct inet_timewait_sock *)nsk); return NULL; } @@ -1798,26 +1802,26 @@ discard_and_relse: do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { TCP_INC_STATS_BH(TCP_MIB_INERRS); - tcp_tw_put((struct tcp_tw_bucket *) sk); + inet_twsk_put((struct inet_timewait_sock *)sk); goto discard_it; } - switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk, - skb, th, skb->len)) { + switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk, + skb, th)) { case TCP_TW_SYN: { struct sock *sk2; sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); if (sk2 != NULL) { - tcp_tw_deschedule((struct tcp_tw_bucket *)sk); - tcp_tw_put((struct tcp_tw_bucket *)sk); + tcp_tw_deschedule((struct inet_timewait_sock *)sk); + inet_twsk_put((struct inet_timewait_sock *)sk); sk = sk2; goto process; } @@ -2137,17 +2141,18 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) } static void get_timewait6_sock(struct seq_file *seq, - struct tcp_tw_bucket *tw, int i) + struct inet_timewait_sock *tw, int i) { struct in6_addr *dest, *src; __u16 destp, srcp; + struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) ttd = 0; - dest = &tw->tw_v6_daddr; - src = &tw->tw_v6_rcv_saddr; + dest = &tcp6tw->tw_v6_daddr; + src = &tcp6tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); @@ -2244,6 +2249,7 @@ struct proto tcpv6_prot = { .sysctl_rmem = sysctl_tcp_rmem, .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), + .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .rsk_prot = &tcp6_request_sock_ops, }; -- cgit v1.2.3 From 463c84b97f24010a67cd871746d6a7e4c925a5f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:42 -0700 Subject: [NET]: Introduce inet_connection_sock This creates struct inet_connection_sock, moving members out of struct tcp_sock that are shareable with other INET connection oriented protocols, such as DCCP, that in my private tree already uses most of these members. The functions that operate on these members were renamed, using a inet_csk_ prefix while not being moved yet to a new file, so as to ease the review of these changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 - include/linux/ipv6.h | 8 +- include/linux/tcp.h | 39 +----- include/net/inet_connection_sock.h | 86 ++++++++++++ include/net/inet_hashtables.h | 6 +- include/net/request_sock.h | 6 +- include/net/sock.h | 3 - include/net/tcp.h | 222 +++++++++++++++---------------- include/net/tcp_ecn.h | 2 +- net/ipv4/inet_hashtables.c | 15 +-- net/ipv4/inet_timewait_sock.c | 5 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 90 +++++++------ net/ipv4/tcp_diag.c | 21 +-- net/ipv4/tcp_input.c | 266 +++++++++++++++++++------------------ net/ipv4/tcp_ipv4.c | 158 ++++++++++++---------- net/ipv4/tcp_minisocks.c | 28 ++-- net/ipv4/tcp_output.c | 86 ++++++------ net/ipv4/tcp_timer.c | 165 ++++++++++++----------- net/ipv6/addrconf.c | 2 +- net/ipv6/tcp_ipv6.c | 54 ++++---- 21 files changed, 685 insertions(+), 581 deletions(-) create mode 100644 include/net/inet_connection_sock.h (limited to 'include/linux') diff --git a/include/linux/ip.h b/include/linux/ip.h index 2c54bbd3da76..33e8a19a1a0f 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,7 +128,6 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } -struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -158,7 +157,6 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ - struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 98fa32316e40..88591913c94f 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,15 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int tcp_twsk_ipv6only(const struct sock *sk) +static inline int inet_twsk_ipv6only(const struct sock *sk) { return inet_twsk(sk)->tw_ipv6only; } -static inline int tcp_v6_ipv6only(const struct sock *sk) +static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); } #else #define __ipv6_only_sock(sk) 0 @@ -360,7 +360,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define __tcp_v6_rcv_saddr(__sk) NULL #define tcp_v6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 -#define tcp_v6_ipv6only(__sk) 0 +#define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5d295b1b3de7..800930fac388 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -177,8 +177,8 @@ struct tcp_info #include #include -#include #include +#include #include /* This defines a selective acknowledgement block. */ @@ -219,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) } struct tcp_sock { - /* inet_sock has to be the first member of tcp_sock */ - struct inet_sock inet; + /* inet_connection_sock has to be the first member of tcp_sock */ + struct inet_connection_sock inet_conn; int tcp_header_len; /* Bytes of tcp header to send */ /* @@ -241,18 +241,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - /* Delayed ACK control data */ - struct { - __u8 pending; /* ACK is pending */ - __u8 quick; /* Scheduled number of quick acks */ - __u8 pingpong; /* The session is interactive */ - __u8 blocked; /* Delayed ACK was blocked by socket lock*/ - __u32 ato; /* Predicted tick of soft clock */ - unsigned long timeout; /* Currently scheduled timeout */ - __u32 lrcvtime; /* timestamp of last received data packet*/ - __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ - } ack; /* Data for direct copy to user */ struct { @@ -271,8 +259,8 @@ struct tcp_sock { __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ - __u8 retransmits; /* Number of unrecovered RTO timeouts. */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ @@ -281,7 +269,7 @@ struct tcp_sock { __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 unused; + __u8 nonagle; /* Disable Nagle algorithm? */ __u8 defer_accept; /* User waits for some data after accept() */ /* RTT measurement */ @@ -290,19 +278,13 @@ struct tcp_sock { __u32 mdev_max; /* maximal mdev for the last rtt period */ __u32 rttvar; /* smoothed mdev_max */ __u32 rtt_seq; /* sequence number to update rttvar */ - __u32 rto; /* retransmit timeout */ __u32 packets_out; /* Packets which are "in flight" */ __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - __u8 backoff; /* backoff */ /* * Options received (usually on last packet, some only on SYN packets). */ - __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - - __u8 probes_out; /* unanswered 0 window probes */ struct tcp_options_received rx_opt; /* @@ -315,11 +297,6 @@ struct tcp_sock { __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; - /* Two commonly used timers in both sender and receiver paths. */ - unsigned long timeout; - struct timer_list retransmit_timer; /* Resend (no ack) */ - struct timer_list delack_timer; /* Ack delay */ - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ @@ -334,7 +311,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 syn_retries; /* num of allowed syn retries */ + __u8 probes_out; /* unanswered 0 window probes */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ @@ -349,14 +326,12 @@ struct tcp_sock { int undo_retrans; /* number of undoable retransmissions. */ __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ - __u8 pending; /* Scheduled timer event */ __u8 urg_mode; /* In urgent mode */ + /* ONE BYTE HOLE, TRY TO PACK! */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ - struct request_sock_queue accept_queue; /* FIFO of established children */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 000000000000..ef609396e41b --- /dev/null +++ b/include/net/inet_connection_sock.h @@ -0,0 +1,86 @@ +/* + * NET Generic infrastructure for INET connection oriented protocols. + * + * Definitions for inet_connection_sock + * + * Authors: Many people, see the TCP sources + * + * From code originally in TCP + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_CONNECTION_SOCK_H +#define _INET_CONNECTION_SOCK_H + +#include +#include +#include + +struct inet_bind_bucket; +struct inet_hashinfo; + +/** inet_connection_sock - INET connection oriented sock + * + * @icsk_accept_queue: FIFO of established children + * @icsk_bind_hash: Bind node + * @icsk_timeout: Timeout + * @icsk_retransmit_timer: Resend (no ack) + * @icsk_rto: Retransmit timeout + * @icsk_retransmits: Number of unrecovered [RTO] timeouts + * @icsk_pending: Scheduled timer event + * @icsk_backoff: Backoff + * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_ack: Delayed ACK control data + */ +struct inet_connection_sock { + /* inet_sock has to be the first member! */ + struct inet_sock icsk_inet; + struct request_sock_queue icsk_accept_queue; + struct inet_bind_bucket *icsk_bind_hash; + unsigned long icsk_timeout; + struct timer_list icsk_retransmit_timer; + struct timer_list icsk_delack_timer; + __u32 icsk_rto; + __u8 icsk_retransmits; + __u8 icsk_pending; + __u8 icsk_backoff; + __u8 icsk_syn_retries; + struct { + __u8 pending; /* ACK is pending */ + __u8 quick; /* Scheduled number of quick acks */ + __u8 pingpong; /* The session is interactive */ + __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u32 ato; /* Predicted tick of soft clock */ + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + } icsk_ack; +}; + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +extern void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)); +extern void inet_csk_clear_xmit_timers(struct sock *sk); + +extern struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const __u32 raddr, + const __u32 laddr); +extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum); + +extern struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req); + +#endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b5c0d64ea741..f0c21c07f894 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include #include #include @@ -185,9 +185,9 @@ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; + inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 334717bf9ef6..b7c7eecbe64d 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -224,17 +224,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) return prev_qlen; } -static inline int reqsk_queue_len(struct request_sock_queue *queue) +static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; } -static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return queue->listen_opt->qlen_young; } -static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) { return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; } diff --git a/include/net/sock.h b/include/net/sock.h index 828dc082fcb7..48cc337a6566 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,9 +493,6 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; -/* Here is the right place to enable sock refcounting debugging */ -//#define SOCK_REFCNT_DEBUG - /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto diff --git a/include/net/tcp.h b/include/net/tcp.h index cf8e664176ad..a943c79c88b0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,10 +19,11 @@ #define _TCP_H #define TCP_DEBUG 1 +#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 /* Cancel timers, when they are not required. */ -#undef TCP_CLEAR_TIMERS +#undef INET_CSK_CLEAR_TIMERS #include #include @@ -205,10 +206,10 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define TCP_TIME_RETRANS 1 /* Retransmit timer */ -#define TCP_TIME_DACK 2 /* Delayed ack timer */ -#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ -#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -257,9 +258,9 @@ extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define TCP_INET_FAMILY(fam) ((fam) == AF_INET) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define TCP_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) 1 #endif /* @@ -372,41 +373,42 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum tcp_ack_state_t -{ - TCP_ACK_SCHED = 1, - TCP_ACK_TIMER = 2, - TCP_ACK_PUSHED= 4 +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 }; -static inline void tcp_schedule_ack(struct tcp_sock *tp) +static inline void inet_csk_schedule_ack(struct sock *sk) { - tp->ack.pending |= TCP_ACK_SCHED; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; } -static inline int tcp_ack_scheduled(struct tcp_sock *tp) +static inline int inet_csk_ack_scheduled(const struct sock *sk) { - return tp->ack.pending&TCP_ACK_SCHED; + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; } -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) { - if (tp->ack.quick) { - if (pkts >= tp->ack.quick) { - tp->ack.quick = 0; + struct inet_connection_sock *icsk = inet_csk(sk); + if (icsk->icsk_ack.quick) { + if (pkts >= icsk->icsk_ack.quick) { + icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.ato = TCP_ATO_MIN; } else - tp->ack.quick -= pkts; + icsk->icsk_ack.quick -= pkts; } } -extern void tcp_enter_quickack_mode(struct tcp_sock *tp); +extern void tcp_enter_quickack_mode(struct sock *sk); -static __inline__ void tcp_delack_init(struct tcp_sock *tp) +static inline void inet_csk_delack_init(struct sock *sk) { - memset(&tp->ack, 0, sizeof(tp->ack)); + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -440,7 +442,7 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); +extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -534,15 +536,18 @@ extern void tcp_cwnd_application_limited(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -extern void tcp_clear_xmit_timers(struct sock *); +static inline void tcp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} -extern void tcp_delete_keepalive_timer(struct sock *); -extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef TCP_DEBUG -extern const char tcp_timer_bug_msg[]; +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; #endif /* tcp_diag.c */ @@ -554,70 +559,58 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void tcp_clear_xmit_timer(struct sock *sk, int what) +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->retransmit_timer); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif - break; - case TCP_TIME_DACK: - tp->ack.blocked = 0; - tp->ack.pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->delack_timer); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif - break; - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; - } /* * Reset the retransmission timer */ -static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (when > TCP_RTO_MAX) { -#ifdef TCP_DEBUG - printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); #endif when = TCP_RTO_MAX; } - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = what; - tp->timeout = jiffies+when; - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); - break; - - case TCP_TIME_DACK: - tp->ack.pending |= TCP_ACK_TIMER; - tp->ack.timeout = jiffies+when; - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); - break; - - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; } /* Initialize RCV_MSS value. @@ -637,7 +630,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); - tp->ack.rcv_mss = hint; + inet_csk(sk)->icsk_ack.rcv_mss = hint; } static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -772,7 +765,7 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -939,8 +932,9 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tp->packets_out && !tp->pending) - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); + const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1021,8 +1015,9 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); - if (!tcp_ack_scheduled(tp)) - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4); } return 1; } @@ -1055,7 +1050,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (inet_sk(sk)->bind_hash && + if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(&tcp_hashinfo, sk); /* fall through */ @@ -1186,51 +1181,55 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, - struct sock *child) +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { - reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); } -static inline void -tcp_synq_removed(struct sock *sk, struct request_sock *req) +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) { - if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) - tcp_delete_keepalive_timer(sk); + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); } -static inline void tcp_synq_added(struct sock *sk) +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) { - if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) - tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); } -static inline int tcp_synq_len(struct sock *sk) +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) { - return reqsk_queue_len(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_young(struct sock *sk) +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) { - return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_is_full(struct sock *sk) +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { - return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - reqsk_queue_unlink(&tp->accept_queue, req, prev); + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); } -static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - tcp_synq_unlink(tcp_sk(sk), req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); reqsk_free(req); } @@ -1265,12 +1264,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } -static inline int tcp_fin_time(const struct tcp_sock *tp) +static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + const int rto = inet_csk(sk)->icsk_rto; - if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) - fin_timeout = (tp->rto<<2) - (tp->rto>>1); + if (fin_timeout < (rto << 2) - (rto >> 1)) + fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92a..c6b84397448d 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) * it is surely retransmit. It is not in ECN RFC, * but Linux follows this rule. */ else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode((struct sock *)tp); } } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d94e962958a4..e8d29fe736d2 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -19,6 +19,7 @@ #include #include +#include #include /* @@ -56,10 +57,9 @@ void inet_bind_bucket_destroy(kmem_cache_t *cachep, struct inet_bind_bucket *tb) void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { - struct inet_sock *inet = inet_sk(sk); - inet->num = snum; + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); - inet->bind_hash = tb; + inet_csk(sk)->icsk_bind_hash = tb; } EXPORT_SYMBOL(inet_bind_hash); @@ -69,16 +69,15 @@ EXPORT_SYMBOL(inet_bind_hash); */ static void __inet_put_port(struct inet_hashinfo *hashinfo, struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); - const int bhash = inet_bhashfn(inet->num, hashinfo->bhash_size); + const int bhash = inet_bhashfn(inet_sk(sk)->num, hashinfo->bhash_size); struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); - inet->bind_hash = NULL; - inet->num = 0; + inet_csk(sk)->icsk_bind_hash = NULL; + inet_sk(sk)->num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); spin_unlock(&head->lock); } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ceb577c74237..5cba59b869fe 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -56,6 +56,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo) { const struct inet_sock *inet = inet_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = &hashinfo->ehash[sk->sk_hashent]; struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. @@ -64,8 +65,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, */ bhead = &hashinfo->bhash[inet_bhashfn(inet->num, hashinfo->bhash_size)]; spin_lock(&bhead->lock); - tw->tw_tb = inet->bind_hash; - BUG_TRAP(inet->bind_hash); + tw->tw_tb = icsk->icsk_bind_hash; + BUG_TRAP(icsk->icsk_bind_hash); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 72d014442185..8692cb9d4bdb 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -180,7 +180,7 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, child = tp->af_specific->syn_recv_sock(sk, skb, req, dst); if (child) - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); else reqsk_free(req); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f1a708bf7a97..8177b86570db 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -313,7 +313,7 @@ EXPORT_SYMBOL(tcp_enter_memory_pressure); static __inline__ unsigned int tcp_listen_poll(struct sock *sk, poll_table *wait) { - return !reqsk_queue_empty(&tcp_sk(sk)->accept_queue) ? (POLLIN | POLLRDNORM) : 0; + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; } /* @@ -458,15 +458,15 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) int tcp_listen_start(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); - int rc = reqsk_queue_alloc(&tp->accept_queue, TCP_SYNQ_HSIZE); + struct inet_connection_sock *icsk = inet_csk(sk); + int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, TCP_SYNQ_HSIZE); if (rc != 0) return rc; sk->sk_max_ack_backlog = 0; sk->sk_ack_backlog = 0; - tcp_delack_init(tp); + inet_csk_delack_init(sk); /* There is race window here: we announce ourselves listening, * but this transition is still not validated by get_port(). @@ -484,7 +484,7 @@ int tcp_listen_start(struct sock *sk) } sk->sk_state = TCP_CLOSE; - __reqsk_queue_destroy(&tp->accept_queue); + __reqsk_queue_destroy(&icsk->icsk_accept_queue); return -EADDRINUSE; } @@ -495,14 +495,14 @@ int tcp_listen_start(struct sock *sk) static void tcp_listen_stop (struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; struct request_sock *req; - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&tp->accept_queue); + acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -512,7 +512,7 @@ static void tcp_listen_stop (struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&tp->accept_queue); + reqsk_queue_destroy(&icsk->icsk_accept_queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -1039,20 +1039,21 @@ static void cleanup_rbuf(struct sock *sk, int copied) BUG_TRAP(!skb || before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)); #endif - if (tcp_ack_scheduled(tp)) { + if (inet_csk_ack_scheduled(sk)) { + const struct inet_connection_sock *icsk = inet_csk(sk); /* Delayed ACKs frequently hit locked sockets during bulk * receive. */ - if (tp->ack.blocked || + if (icsk->icsk_ack.blocked || /* Once-per-two-segments ACK was not sent by tcp_input.c */ - tp->rcv_nxt - tp->rcv_wup > tp->ack.rcv_mss || + tp->rcv_nxt - tp->rcv_wup > icsk->icsk_ack.rcv_mss || /* * If this read emptied read buffer, we send ACK, if * connection is not bidirectional, user drained * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (tp->ack.pending & TCP_ACK_PUSHED) && - !tp->ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -1569,7 +1570,7 @@ void tcp_destroy_sock(struct sock *sk) BUG_TRAP(sk_unhashed(sk)); /* If it has not 0 inet_sk(sk)->num, it must be bound */ - BUG_TRAP(!inet_sk(sk)->num || inet_sk(sk)->bind_hash); + BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); sk->sk_prot->destroy(sk); @@ -1698,10 +1699,10 @@ adjudge_to_death: tcp_send_active_reset(sk, GFP_ATOMIC); NET_INC_STATS_BH(LINUX_MIB_TCPABORTONLINGER); } else { - int tmo = tcp_fin_time(tp); + const int tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tcp_fin_time(tp)); + inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); } else { atomic_inc(&tcp_orphan_count); tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -1746,6 +1747,7 @@ static inline int tcp_need_reset(int state) int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err = 0; int old_state = sk->sk_state; @@ -1782,7 +1784,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->srtt = 0; if ((tp->write_seq += tp->max_window + 2) == 0) tp->write_seq = 1; - tp->backoff = 0; + icsk->icsk_backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; tp->packets_out = 0; @@ -1790,13 +1792,13 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); tcp_clear_retrans(tp); - tcp_delack_init(tp); + inet_csk_delack_init(sk); sk->sk_send_head = NULL; tp->rx_opt.saw_tstamp = 0; tcp_sack_reset(&tp->rx_opt); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || inet->bind_hash); + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -1808,7 +1810,7 @@ int tcp_disconnect(struct sock *sk, int flags) */ static int wait_for_connect(struct sock *sk, long timeo) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); DEFINE_WAIT(wait); int err; @@ -1830,11 +1832,11 @@ static int wait_for_connect(struct sock *sk, long timeo) prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); release_sock(sk); - if (reqsk_queue_empty(&tp->accept_queue)) + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); lock_sock(sk); err = 0; - if (!reqsk_queue_empty(&tp->accept_queue)) + if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) break; err = -EINVAL; if (sk->sk_state != TCP_LISTEN) @@ -1854,9 +1856,9 @@ static int wait_for_connect(struct sock *sk, long timeo) * This will accept the next outstanding connection. */ -struct sock *tcp_accept(struct sock *sk, int flags, int *err) +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct sock *newsk; int error; @@ -1870,7 +1872,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&tp->accept_queue)) { + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -1883,7 +1885,7 @@ struct sock *tcp_accept(struct sock *sk, int flags, int *err) goto out_err; } - newsk = reqsk_queue_get_child(&tp->accept_queue, sk); + newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); out: release_sock(sk); @@ -1901,6 +1903,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int val; int err = 0; @@ -1999,7 +2002,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, elapsed = tp->keepalive_time - elapsed; else elapsed = 0; - tcp_reset_keepalive_timer(sk, elapsed); + inet_csk_reset_keepalive_timer(sk, elapsed); } } break; @@ -2019,7 +2022,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, if (val < 1 || val > MAX_TCP_SYNCNT) err = -EINVAL; else - tp->syn_retries = val; + icsk->icsk_syn_retries = val; break; case TCP_LINGER2: @@ -2058,16 +2061,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, case TCP_QUICKACK: if (!val) { - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } else { - tp->ack.pingpong = 0; + icsk->icsk_ack.pingpong = 0; if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) && - tcp_ack_scheduled(tp)) { - tp->ack.pending |= TCP_ACK_PUSHED; + inet_csk_ack_scheduled(sk)) { + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; cleanup_rbuf(sk, 1); if (!(val & 1)) - tp->ack.pingpong = 1; + icsk->icsk_ack.pingpong = 1; } } break; @@ -2084,15 +2087,16 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, void tcp_get_info(struct sock *sk, struct tcp_info *info) { struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; info->tcpi_ca_state = tp->ca_state; - info->tcpi_retransmits = tp->retransmits; + info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = tp->probes_out; - info->tcpi_backoff = tp->backoff; + info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) info->tcpi_options |= TCPI_OPT_TIMESTAMPS; @@ -2107,10 +2111,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->ecn_flags&TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; - info->tcpi_rto = jiffies_to_usecs(tp->rto); - info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); + info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); + info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); info->tcpi_snd_mss = tp->mss_cache; - info->tcpi_rcv_mss = tp->ack.rcv_mss; + info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; info->tcpi_unacked = tp->packets_out; info->tcpi_sacked = tp->sacked_out; @@ -2119,7 +2123,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); - info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); + info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime); info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = tp->pmtu_cookie; @@ -2179,7 +2183,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: - val = tp->syn_retries ? : sysctl_tcp_syn_retries; + val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; @@ -2209,7 +2213,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return 0; } case TCP_QUICKACK: - val = !tp->ack.pingpong; + val = !inet_csk(sk)->icsk_ack.pingpong; break; case TCP_CONGESTION: @@ -2340,7 +2344,7 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); } -EXPORT_SYMBOL(tcp_accept); +EXPORT_SYMBOL(inet_csk_accept); EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 60c6a797cc50..5f4c74f45e82 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -48,8 +48,9 @@ static struct sock *tcpnl; static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; struct tcp_info *info = NULL; @@ -129,14 +130,14 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { r->tcpdiag_timer = 1; - r->tcpdiag_retrans = tp->retransmits; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); - } else if (tp->pending == TCP_TIME_PROBE0) { + r->tcpdiag_retrans = icsk->icsk_retransmits; + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; r->tcpdiag_retrans = tp->probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout); + r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; r->tcpdiag_retrans = tp->probes_out; @@ -497,7 +498,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, { struct tcpdiag_entry entry; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; struct rtattr *bc = NULL; struct inet_sock *inet = inet_sk(sk); @@ -513,9 +514,9 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.family = sk->sk_family; - read_lock_bh(&tp->accept_queue.syn_wait_lock); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - lopt = tp->accept_queue.listen_opt; + lopt = icsk->icsk_accept_queue.listen_opt; if (!lopt || !lopt->qlen) goto out; @@ -572,7 +573,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, } out: - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); return err; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ffa24025cd02..8a8c5c2d90cb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -114,20 +114,21 @@ int sysctl_tcp_moderate_rcvbuf = 1; /* Adapt the MSS value used to make delayed ack decision to the * real world. */ -static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, - struct sk_buff *skb) +static inline void tcp_measure_rcv_mss(struct sock *sk, + const struct sk_buff *skb) { - unsigned int len, lss; + struct inet_connection_sock *icsk = inet_csk(sk); + const unsigned int lss = icsk->icsk_ack.last_seg_size; + unsigned int len; - lss = tp->ack.last_seg_size; - tp->ack.last_seg_size = 0; + icsk->icsk_ack.last_seg_size = 0; /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ len = skb->len; - if (len >= tp->ack.rcv_mss) { - tp->ack.rcv_mss = len; + if (len >= icsk->icsk_ack.rcv_mss) { + icsk->icsk_ack.rcv_mss = len; } else { /* Otherwise, we make more careful check taking into account, * that SACKs block is variable. @@ -147,41 +148,44 @@ static inline void tcp_measure_rcv_mss(struct tcp_sock *tp, * tcp header plus fixed timestamp option length. * Resulting "len" is MSS free of SACK jitter. */ - len -= tp->tcp_header_len; - tp->ack.last_seg_size = len; + len -= tcp_sk(sk)->tcp_header_len; + icsk->icsk_ack.last_seg_size = len; if (len == lss) { - tp->ack.rcv_mss = len; + icsk->icsk_ack.rcv_mss = len; return; } } - tp->ack.pending |= TCP_ACK_PUSHED; + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } -static void tcp_incr_quickack(struct tcp_sock *tp) +static void tcp_incr_quickack(struct sock *sk) { - unsigned quickacks = tp->rcv_wnd/(2*tp->ack.rcv_mss); + struct inet_connection_sock *icsk = inet_csk(sk); + unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss); if (quickacks==0) quickacks=2; - if (quickacks > tp->ack.quick) - tp->ack.quick = min(quickacks, TCP_MAX_QUICKACKS); + if (quickacks > icsk->icsk_ack.quick) + icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -void tcp_enter_quickack_mode(struct tcp_sock *tp) +void tcp_enter_quickack_mode(struct sock *sk) { - tcp_incr_quickack(tp); - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + struct inet_connection_sock *icsk = inet_csk(sk); + tcp_incr_quickack(sk); + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } /* Send ACKs quickly, if "quick" count is not exhausted * and the session is not interactive. */ -static __inline__ int tcp_in_quickack_mode(struct tcp_sock *tp) +static inline int tcp_in_quickack_mode(const struct sock *sk) { - return (tp->ack.quick && !tp->ack.pingpong); + const struct inet_connection_sock *icsk = inet_csk(sk); + return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong; } /* Buffer size and advertised window tuning. @@ -224,8 +228,8 @@ static void tcp_fixup_sndbuf(struct sock *sk) */ /* Slow part of check#2. */ -static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, - struct sk_buff *skb) +static int __tcp_grow_window(const struct sock *sk, struct tcp_sock *tp, + const struct sk_buff *skb) { /* Optimize this! */ int truesize = tcp_win_from_space(skb->truesize)/2; @@ -233,7 +237,7 @@ static int __tcp_grow_window(struct sock *sk, struct tcp_sock *tp, while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) - return 2*tp->ack.rcv_mss; + return 2 * inet_csk(sk)->icsk_ack.rcv_mss; truesize >>= 1; window >>= 1; @@ -260,7 +264,7 @@ static inline void tcp_grow_window(struct sock *sk, struct tcp_sock *tp, if (incr) { tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp); - tp->ack.quick |= 1; + inet_csk(sk)->icsk_ack.quick |= 1; } } } @@ -325,7 +329,7 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - tp->ack.quick = 0; + inet_csk(sk)->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -346,8 +350,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > tp->ack.rcv_mss) - app_win -= tp->ack.rcv_mss; + if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) + app_win -= inet_csk(sk)->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -415,11 +419,12 @@ new_measure: tp->rcv_rtt_est.time = tcp_time_stamp; } -static inline void tcp_rcv_rtt_measure_ts(struct tcp_sock *tp, struct sk_buff *skb) +static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); if (tp->rx_opt.rcv_tsecr && (TCP_SKB_CB(skb)->end_seq - - TCP_SKB_CB(skb)->seq >= tp->ack.rcv_mss)) + TCP_SKB_CB(skb)->seq >= inet_csk(sk)->icsk_ack.rcv_mss)) tcp_rcv_rtt_update(tp, tcp_time_stamp - tp->rx_opt.rcv_tsecr, 0); } @@ -492,41 +497,42 @@ new_measure: */ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); u32 now; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); - tcp_measure_rcv_mss(tp, skb); + tcp_measure_rcv_mss(sk, skb); tcp_rcv_rtt_measure(tp); now = tcp_time_stamp; - if (!tp->ack.ato) { + if (!icsk->icsk_ack.ato) { /* The _first_ data packet received, initialize * delayed ACK engine. */ - tcp_incr_quickack(tp); - tp->ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + icsk->icsk_ack.ato = TCP_ATO_MIN; } else { - int m = now - tp->ack.lrcvtime; + int m = now - icsk->icsk_ack.lrcvtime; if (m <= TCP_ATO_MIN/2) { /* The fastest case is the first. */ - tp->ack.ato = (tp->ack.ato>>1) + TCP_ATO_MIN/2; - } else if (m < tp->ack.ato) { - tp->ack.ato = (tp->ack.ato>>1) + m; - if (tp->ack.ato > tp->rto) - tp->ack.ato = tp->rto; - } else if (m > tp->rto) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2; + } else if (m < icsk->icsk_ack.ato) { + icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + m; + if (icsk->icsk_ack.ato > icsk->icsk_rto) + icsk->icsk_ack.ato = icsk->icsk_rto; + } else if (m > icsk->icsk_rto) { /* Too long gap. Apparently sender falled to * restart window, so that we send ACKs quickly. */ - tcp_incr_quickack(tp); + tcp_incr_quickack(sk); sk_stream_mem_reclaim(sk); } } - tp->ack.lrcvtime = now; + icsk->icsk_ack.lrcvtime = now; TCP_ECN_check_ce(tp, skb); @@ -611,8 +617,9 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ -static inline void tcp_set_rto(struct tcp_sock *tp) +static inline void tcp_set_rto(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); /* Old crap is replaced with new one. 8) * * More seriously: @@ -623,7 +630,7 @@ static inline void tcp_set_rto(struct tcp_sock *tp) * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some curcumstances. */ - tp->rto = (tp->srtt >> 3) + tp->rttvar; + inet_csk(sk)->icsk_rto = (tp->srtt >> 3) + tp->rttvar; /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, @@ -635,10 +642,10 @@ static inline void tcp_set_rto(struct tcp_sock *tp) /* NOTE: clamping at TCP_RTO_MIN is not required, current algo * guarantees that rto is higher. */ -static inline void tcp_bound_rto(struct tcp_sock *tp) +static inline void tcp_bound_rto(struct sock *sk) { - if (tp->rto > TCP_RTO_MAX) - tp->rto = TCP_RTO_MAX; + if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) + inet_csk(sk)->icsk_rto = TCP_RTO_MAX; } /* Save metrics learned by this TCP session. @@ -658,7 +665,7 @@ void tcp_update_metrics(struct sock *sk) if (dst && (dst->flags&DST_HOST)) { int m; - if (tp->backoff || !tp->srtt) { + if (inet_csk(sk)->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -801,9 +808,9 @@ static void tcp_init_metrics(struct sock *sk) tp->mdev = dst_metric(dst, RTAX_RTTVAR); tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN); } - tcp_set_rto(tp); - tcp_bound_rto(tp); - if (tp->rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) + tcp_set_rto(sk); + tcp_bound_rto(sk); + if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp) goto reset; tp->snd_cwnd = tcp_init_cwnd(tp, dst); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -817,7 +824,7 @@ reset: if (!tp->rx_opt.saw_tstamp && tp->srtt) { tp->srtt = 0; tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT; - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; } } @@ -1118,7 +1125,7 @@ void tcp_enter_frto(struct sock *sk) if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_FRTO); @@ -1214,7 +1221,7 @@ void tcp_enter_loss(struct sock *sk, int how) /* Reduce ssthresh if it has not yet been made inside this window. */ if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !tp->retransmits)) { + (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(tp); tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); tcp_ca_event(tp, CA_EVENT_LOSS); @@ -1253,7 +1260,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_ECN_queue_cwr(tp); } -static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) +static int tcp_check_sack_reneging(struct sock *sk) { struct sk_buff *skb; @@ -1268,9 +1275,10 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - tp->retransmits++; + inet_csk(sk)->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); return 1; } return 0; @@ -1281,15 +1289,15 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; } -static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) { - return (tcp_time_stamp - TCP_SKB_CB(skb)->when > tp->rto); + return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); } static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) { return tp->packets_out && - tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); + tcp_skb_timedout(sk, skb_peek(&sk->sk_write_queue)); } /* Linux NewReno/SACK/FACK/ECN state machine. @@ -1509,7 +1517,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { - if (tcp_skb_timedout(tp, skb) && + if (tcp_skb_timedout(sk, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -1676,7 +1684,7 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) tp->left_out = tp->sacked_out; tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) tcp_set_ca_state(tp, TCP_CA_Open); @@ -1750,7 +1758,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) + if (tp->sacked_out && tcp_check_sack_reneging(sk)) return; /* C. Process data loss notification, provided it is valid. */ @@ -1774,7 +1782,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { case TCP_CA_Loss: - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1824,7 +1832,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - tp->retransmits = 0; + inet_csk(sk)->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); @@ -1881,10 +1889,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Read draft-ietf-tcplw-high-performance before mucking * with this code. (Superceeds RFC1323) */ -static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) +static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) { - __u32 seq_rtt; - /* RTTM Rule: A TSecr value received in a segment is used to * update the averaged RTT measurement only if the segment * acknowledges some new data, i.e., only if it advances the @@ -1900,14 +1906,15 @@ static void tcp_ack_saw_tstamp(struct tcp_sock *tp, u32 *usrtt, int flag) * answer arrives rto becomes 120 seconds! If at least one of segments * in window is lost... Voila. --ANK (010210) */ - seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; + struct tcp_sock *tp = tcp_sk(sk); + const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int flag) +static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag) { /* We don't have a timestamp. Can only use * packets that are not retransmitted to determine @@ -1921,20 +1928,21 @@ static void tcp_ack_no_tstamp(struct tcp_sock *tp, u32 seq_rtt, u32 *usrtt, int if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tp, seq_rtt, usrtt); - tcp_set_rto(tp); - tp->backoff = 0; - tcp_bound_rto(tp); + tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_set_rto(sk); + inet_csk(sk)->icsk_backoff = 0; + tcp_bound_rto(sk); } -static inline void tcp_ack_update_rtt(struct tcp_sock *tp, - int flag, s32 seq_rtt, u32 *usrtt) +static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, + const s32 seq_rtt, u32 *usrtt) { + const struct tcp_sock *tp = tcp_sk(sk); /* Note that peer MAY send zero echo. In this case it is ignored. (rfc1323) */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) - tcp_ack_saw_tstamp(tp, usrtt, flag); + tcp_ack_saw_tstamp(sk, usrtt, flag); else if (seq_rtt >= 0) - tcp_ack_no_tstamp(tp, seq_rtt, usrtt, flag); + tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, @@ -1951,9 +1959,9 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { if (!tp->packets_out) { - tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } } @@ -2090,7 +2098,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { - tcp_ack_update_rtt(tp, acked, seq_rtt, seq_usrtt); + tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); if (tp->ca_ops->pkts_acked) @@ -2125,20 +2133,21 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt static void tcp_ack_probe(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); /* Was it a usable window open? */ if (!after(TCP_SKB_CB(sk->sk_send_head)->end_seq, tp->snd_una + tp->snd_wnd)) { - tp->backoff = 0; - tcp_clear_xmit_timer(sk, TCP_TIME_PROBE0); + icsk->icsk_backoff = 0; + inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). * This function is not for random using! */ } else { - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } } @@ -2157,8 +2166,8 @@ static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) /* Check that window update is acceptable. * The function assumes that snd_una<=ack<=snd_next. */ -static inline int tcp_may_update_window(struct tcp_sock *tp, u32 ack, - u32 ack_seq, u32 nwin) +static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, + const u32 ack_seq, const u32 nwin) { return (after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || @@ -2500,8 +2509,9 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * up to bandwidth of 18Gigabit/sec. 8) ] */ -static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) +static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { + struct tcp_sock *tp = tcp_sk(sk); struct tcphdr *th = skb->h.th; u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -2516,14 +2526,15 @@ static int tcp_disordered_ack(struct tcp_sock *tp, struct sk_buff *skb) !tcp_may_update_window(tp, ack, seq, ntohs(th->window) << tp->rx_opt.snd_wscale) && /* 4. ... and sits in replay window. */ - (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (tp->rto*1024)/HZ); + (s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ); } -static inline int tcp_paws_discard(struct tcp_sock *tp, struct sk_buff *skb) +static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { + const struct tcp_sock *tp = tcp_sk(sk); return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && xtime.tv_sec < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(tp, skb)); + !tcp_disordered_ack(sk, skb)); } /* Check segment sequence number for validity. @@ -2586,7 +2597,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); sk->sk_shutdown |= RCV_SHUTDOWN; sock_set_flag(sk, SOCK_DONE); @@ -2596,7 +2607,7 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) case TCP_ESTABLISHED: /* Move to CLOSE_WAIT */ tcp_set_state(sk, TCP_CLOSE_WAIT); - tp->ack.pingpong = 1; + inet_csk(sk)->icsk_ack.pingpong = 1; break; case TCP_CLOSE_WAIT: @@ -2694,7 +2705,7 @@ static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOST); - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (tp->rx_opt.sack_ok && sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -2942,7 +2953,7 @@ queue_and_out: * gap in queue is filled. */ if (skb_queue_empty(&tp->out_of_order_queue)) - tp->ack.pingpong = 0; + inet_csk(sk)->icsk_ack.pingpong = 0; } if (tp->rx_opt.num_sacks) @@ -2963,8 +2974,8 @@ queue_and_out: tcp_dsack_set(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); out_of_window: - tcp_enter_quickack_mode(tp); - tcp_schedule_ack(tp); + tcp_enter_quickack_mode(sk); + inet_csk_schedule_ack(sk); drop: __kfree_skb(skb); return; @@ -2974,7 +2985,7 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) goto out_of_window; - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode(sk); if (before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { /* Partial packet, seq < rcv_next < end_seq */ @@ -3003,7 +3014,7 @@ drop: /* Disable header prediction. */ tp->pred_flags = 0; - tcp_schedule_ack(tp); + inet_csk_schedule_ack(sk); SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); @@ -3373,13 +3384,13 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > tp->ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ && __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ - tcp_in_quickack_mode(tp) || + tcp_in_quickack_mode(sk) || /* We have out of order data. */ (ofo_possible && skb_peek(&tp->out_of_order_queue))) { @@ -3393,8 +3404,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) static __inline__ void tcp_ack_snd_check(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_ack_scheduled(tp)) { + if (!inet_csk_ack_scheduled(sk)) { /* We sent a data segment already. */ return; } @@ -3648,7 +3658,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* We know that such packets are checksummed * on entry. @@ -3681,7 +3691,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); __skb_pull(skb, tcp_header_len); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; @@ -3702,7 +3712,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, tp->rcv_nxt == tp->rcv_wup) tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; @@ -3722,7 +3732,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Well, only one small jumplet in fast path... */ tcp_ack(sk, skb, FLAG_DATA); tcp_data_snd_check(sk, tp); - if (!tcp_ack_scheduled(tp)) + if (!inet_csk_ack_scheduled(sk)) goto no_ack; } @@ -3744,7 +3754,7 @@ slow_path: * RFC1323: H1. Apply PAWS check first. */ if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -3791,7 +3801,7 @@ step5: if(th->ack) tcp_ack(sk, skb, FLAG_SLOWPATH); - tcp_rcv_rtt_measure_ts(tp, skb); + tcp_rcv_rtt_measure_ts(sk, skb); /* Process urgent data. */ tcp_urg(sk, skb, th); @@ -3933,7 +3943,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_buffer_space(sk); if (sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tp)); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); if (!tp->rx_opt.snd_wscale) __tcp_fast_path_on(tp, tp->snd_wnd); @@ -3945,7 +3955,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) { + if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -3953,12 +3963,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * look so _wonderfully_ clever, that I was not able * to stand against the temptation 8) --ANK */ - tcp_schedule_ack(tp); - tp->ack.lrcvtime = tcp_time_stamp; - tp->ack.ato = TCP_ATO_MIN; - tcp_incr_quickack(tp); - tcp_enter_quickack_mode(tp); - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + tcp_incr_quickack(sk); + tcp_enter_quickack_mode(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); discard: __kfree_skb(skb); @@ -4114,7 +4124,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(tp, skb)) { + tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED); tcp_send_dupack(sk, skb); @@ -4183,7 +4193,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(tp, 0, 0); + tcp_ack_saw_tstamp(sk, 0, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; @@ -4230,9 +4240,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 1; } - tmo = tcp_fin_time(tp); + tmo = tcp_fin_time(sk); if (tmo > TCP_TIMEWAIT_LEN) { - tcp_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); + inet_csk_reset_keepalive_timer(sk, tmo - TCP_TIMEWAIT_LEN); } else if (th->fin || sock_owned_by_user(sk)) { /* Bad case. We could lose such FIN otherwise. * It is not a big problem, but it looks confusing @@ -4240,7 +4250,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * if it spins in bh_lock_sock(), but it is really * marginal case. */ - tcp_reset_keepalive_timer(sk, tmo); + inet_csk_reset_keepalive_timer(sk, tmo); } else { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); goto discard; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e7e91e60ac74..2cd41265d17f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,7 +104,7 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { */ int sysctl_local_port_range[2] = { 1024, 4999 }; -static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; @@ -113,7 +113,7 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb sk_for_each_bound(sk2, node, &tb->owners) { if (sk != sk2 && - !tcp_v6_ipv6only(sk2) && + !inet_v6_ipv6only(sk2) && (!sk->sk_bound_dev_if || !sk2->sk_bound_dev_if || sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { @@ -132,7 +132,8 @@ static inline int tcp_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ -static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum) { struct inet_bind_hashbucket *head; struct hlist_node *node; @@ -146,16 +147,16 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) int remaining = (high - low) + 1; int rover; - spin_lock(&tcp_hashinfo.portalloc_lock); - if (tcp_hashinfo.port_rover < low) + spin_lock(&hashinfo->portalloc_lock); + if (hashinfo->port_rover < low) rover = low; else - rover = tcp_hashinfo.port_rover; + rover = hashinfo->port_rover; do { rover++; if (rover > high) rover = low; - head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == rover) @@ -164,8 +165,8 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) next: spin_unlock(&head->lock); } while (--remaining > 0); - tcp_hashinfo.port_rover = rover; - spin_unlock(&tcp_hashinfo.portalloc_lock); + hashinfo->port_rover = rover; + spin_unlock(&hashinfo->portalloc_lock); /* Exhausted local port range during search? It is not * possible for us to be holding one of the bind hash @@ -182,7 +183,7 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) */ snum = rover; } else { - head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; + head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) if (tb->port == snum) @@ -199,13 +200,13 @@ tb_found: goto success; } else { ret = 1; - if (tcp_bind_conflict(sk, tb)) + if (inet_csk_bind_conflict(sk, tb)) goto fail_unlock; } } tb_not_found: ret = 1; - if (!tb && (tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum)) == NULL) + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) goto fail_unlock; if (hlist_empty(&tb->owners)) { if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) @@ -216,9 +217,9 @@ tb_not_found: (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -228,6 +229,11 @@ fail: return ret; } +static int tcp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return inet_csk_get_port(&tcp_hashinfo, sk, snum); +} + static void tcp_v4_hash(struct sock *sk) { inet_hash(&tcp_hashinfo, sk); @@ -426,7 +432,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { __inet_hash(&tcp_hashinfo, sk, 0); @@ -557,25 +563,28 @@ failure: return err; } -static __inline__ int tcp_v4_iif(struct sk_buff *skb) +static inline int inet_iif(const struct sk_buff *skb) { return ((struct rtable *)skb->dst)->rt_iif; } -static __inline__ u32 tcp_v4_synq_hash(u32 raddr, u16 rport, u32 rnd) +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) { - return (jhash_2words(raddr, (u32) rport, rnd) & (TCP_SYNQ_HSIZE - 1)); + return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); } -static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, - struct request_sock ***prevp, - __u16 rport, - __u32 raddr, __u32 laddr) +struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, const __u32 raddr, + const __u32 laddr) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; - for (prev = &lopt->syn_table[tcp_v4_synq_hash(raddr, rport, lopt->hash_rnd)]; + for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; (req = *prev) != NULL; prev = &req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -583,7 +592,7 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, if (ireq->rmt_port == rport && ireq->rmt_addr == raddr && ireq->loc_addr == laddr && - TCP_INET_FAMILY(req->rsk_ops->family)) { + AF_INET_FAMILY(req->rsk_ops->family)) { BUG_TRAP(!req->sk); *prevp = prev; break; @@ -595,12 +604,13 @@ static struct request_sock *tcp_v4_search_req(struct tcp_sock *tp, static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v4_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -687,7 +697,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) } sk = inet_lookup(&tcp_hashinfo, iph->daddr, th->dest, iph->saddr, - th->source, tcp_v4_iif(skb)); + th->source, inet_iif(skb)); if (!sk) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; @@ -747,8 +757,8 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) if (sock_owned_by_user(sk)) goto out; - req = tcp_v4_search_req(tp, &prev, th->dest, - iph->daddr, iph->saddr); + req = inet_csk_search_req(sk, &prev, th->dest, + iph->daddr, iph->saddr); if (!req) goto out; @@ -768,7 +778,7 @@ void tcp_v4_err(struct sk_buff *skb, u32 info) * created socket, and POSIX does not want network * errors returned from accept(). */ - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -953,8 +963,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) req->ts_recent); } -static struct dst_entry* tcp_v4_route_req(struct sock *sk, - struct request_sock *req) +struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req) { struct rtable *rt; const struct inet_request_sock *ireq = inet_rsk(req); @@ -966,7 +976,7 @@ static struct dst_entry* tcp_v4_route_req(struct sock *sk, ireq->rmt_addr), .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, - .proto = IPPROTO_TCP, + .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; @@ -996,7 +1006,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req, struct sk_buff * skb; /* First, grab a route. */ - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto out; skb = tcp_make_synack(sk, dst, req); @@ -1098,7 +1108,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * limitations, they conserve resources and peer is * evidently real one. */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { #ifdef CONFIG_SYN_COOKIES if (sysctl_tcp_syncookies) { want_cookie = 1; @@ -1112,7 +1122,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * clogging syn queue with openreqs with exponentially increasing * timeout. */ - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp_request_sock_ops); @@ -1169,7 +1179,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) */ if (tmp_opt.saw_tstamp && sysctl_tcp_tw_recycle && - (dst = tcp_v4_route_req(sk, req)) != NULL && + (dst = inet_csk_route_req(sk, req)) != NULL && (peer = rt_get_peer((struct rtable *)dst)) != NULL && peer->v4daddr == saddr) { if (xtime.tv_sec < peer->tcp_ts_stamp + TCP_PAWS_MSL && @@ -1182,7 +1192,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) } /* Kill the following clause, if you dislike this way. */ else if (!sysctl_tcp_syncookies && - (sysctl_max_syn_backlog - tcp_synq_len(sk) < + (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < (sysctl_max_syn_backlog >> 2)) && (!peer || !peer->tcp_ts_stamp) && (!dst || !dst_metric(dst, RTAX_RTT))) { @@ -1240,7 +1250,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL) + if (!dst && (dst = inet_csk_route_req(sk, req)) == NULL) goto exit; newsk = tcp_create_openreq_child(sk, req, skb); @@ -1257,7 +1267,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newinet->saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; - newinet->mc_index = tcp_v4_iif(skb); + newinet->mc_index = inet_iif(skb); newinet->mc_ttl = skb->nh.iph->ttl; newtp->ext_header_len = 0; if (newinet->opt) @@ -1285,18 +1295,17 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = skb->h.th; struct iphdr *iph = skb->nh.iph; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ - struct request_sock *req = tcp_v4_search_req(tp, &prev, th->source, - iph->saddr, iph->daddr); + struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, + iph->saddr, iph->daddr); if (req) return tcp_check_req(sk, skb, req, prev); nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, - ntohs(th->dest), tcp_v4_iif(skb)); + ntohs(th->dest), inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1440,7 +1449,7 @@ int tcp_v4_rcv(struct sk_buff *skb) sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1507,7 +1516,7 @@ do_time_wait: struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, ntohs(th->dest), - tcp_v4_iif(skb)); + inet_iif(skb)); if (sk2) { tcp_tw_deschedule((struct inet_timewait_sock *)sk); inet_twsk_put((struct inet_timewait_sock *)sk); @@ -1619,7 +1628,7 @@ static int tcp_v4_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1672,7 +1681,7 @@ int tcp_v4_destroy_sock(struct sock *sk) __skb_queue_purge(&tp->ucopy.prequeue); /* Clean up a referenced TCP bind bucket. */ - if (inet_sk(sk)->bind_hash) + if (inet_csk(sk)->icsk_bind_hash) inet_put_port(&tcp_hashinfo, sk); /* @@ -1707,7 +1716,7 @@ static inline struct inet_timewait_sock *tw_next(struct inet_timewait_sock *tw) static void *listening_get_next(struct seq_file *seq, void *cur) { - struct tcp_sock *tp; + struct inet_connection_sock *icsk; struct hlist_node *node; struct sock *sk = cur; struct tcp_iter_state* st = seq->private; @@ -1723,7 +1732,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (st->state == TCP_SEQ_STATE_OPENREQ) { struct request_sock *req = cur; - tp = tcp_sk(st->syn_wait_sk); + icsk = inet_csk(st->syn_wait_sk); req = req->dl_next; while (1) { while (req) { @@ -1736,17 +1745,17 @@ static void *listening_get_next(struct seq_file *seq, void *cur) if (++st->sbucket >= TCP_SYNQ_HSIZE) break; get_req: - req = tp->accept_queue.listen_opt->syn_table[st->sbucket]; + req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket]; } sk = sk_next(st->syn_wait_sk); st->state = TCP_SEQ_STATE_LISTENING; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } else { - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) goto start_req; - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); sk = sk_next(sk); } get_sk: @@ -1755,9 +1764,9 @@ get_sk: cur = sk; goto out; } - tp = tcp_sk(sk); - read_lock_bh(&tp->accept_queue.syn_wait_lock); - if (reqsk_queue_len(&tp->accept_queue)) { + icsk = inet_csk(sk); + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + if (reqsk_queue_len(&icsk->icsk_accept_queue)) { start_req: st->uid = sock_i_uid(sk); st->syn_wait_sk = sk; @@ -1765,7 +1774,7 @@ start_req: st->sbucket = 0; goto get_req; } - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } if (++st->bucket < INET_LHTABLE_SIZE) { sk = sk_head(&tcp_hashinfo.listening_hash[st->bucket]); @@ -1951,8 +1960,8 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) switch (st->state) { case TCP_SEQ_STATE_OPENREQ: if (v) { - struct tcp_sock *tp = tcp_sk(st->syn_wait_sk); - read_unlock_bh(&tp->accept_queue.syn_wait_lock); + struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk); + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); } case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) @@ -2058,18 +2067,19 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) int timer_active; unsigned long timer_expires; struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct inet_sock *inet = inet_sk(sp); unsigned int dest = inet->daddr; unsigned int src = inet->rcv_saddr; __u16 destp = ntohs(inet->dport); __u16 srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2084,12 +2094,14 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) tp->write_seq - tp->snd_una, tp->rcv_nxt - tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick << 1) | tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh >= 0xFFFF ? -1 : tp->snd_ssthresh); } @@ -2174,7 +2186,7 @@ struct proto tcp_prot = { .close = tcp_close, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v4_init_sock, .destroy = tcp_v4_destroy_sock, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 8b6cd8d80662..56823704eb7d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -271,7 +271,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) if (tw != NULL) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); - const int rto = (tp->rto << 2) - (tp->rto >> 1); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); tw->tw_rcv_wscale = tp->rx_opt.rcv_wscale; tcptw->tw_rcv_nxt = tp->rcv_nxt; @@ -605,10 +606,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct inet_request_sock *ireq = inet_rsk(req); struct tcp_request_sock *treq = tcp_rsk(req); struct inet_sock *newinet = inet_sk(newsk); + struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp; newsk->sk_state = TCP_SYN_RECV; - newinet->bind_hash = NULL; + newicsk->icsk_bind_hash = NULL; /* Clone the TCP header template */ newinet->dport = ireq->rmt_port; @@ -624,11 +626,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); - newtp->retransmits = 0; - newtp->backoff = 0; + newicsk->icsk_retransmits = 0; + newicsk->icsk_backoff = 0; newtp->srtt = 0; newtp->mdev = TCP_TIMEOUT_INIT; - newtp->rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; newtp->left_out = 0; @@ -667,11 +669,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; /* Deinitialize accept_queue to trap illegal accesses. */ - memset(&newtp->accept_queue, 0, sizeof(newtp->accept_queue)); + memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); if (sock_flag(newsk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(newsk, - keepalive_time_when(newtp)); + inet_csk_reset_keepalive_timer(newsk, + keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; if((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { @@ -701,7 +703,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->tcp_header_len = sizeof(struct tcphdr); } if (skb->len >= TCP_MIN_RCVMSS+newtp->tcp_header_len) - newtp->ack.last_seg_size = skb->len-newtp->tcp_header_len; + newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); if (newtp->ecn_flags&TCP_ECN_OK) @@ -881,10 +883,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (child == NULL) goto listen_overflow; - tcp_synq_unlink(tp, req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); - tcp_acceptq_queue(sk, req, child); + inet_csk_reqsk_queue_add(sk, req, child); return child; listen_overflow: @@ -898,7 +900,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_reset(skb); - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a4d1eb9a0926..6f0a7e30ceac 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -105,8 +105,9 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) { + struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; @@ -116,7 +117,7 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) tp->snd_ssthresh = tcp_current_ssthresh(tp); restart_cwnd = min(restart_cwnd, cwnd); - while ((delta -= tp->rto) > 0 && cwnd > restart_cwnd) + while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; @@ -126,26 +127,25 @@ static void tcp_cwnd_restart(struct tcp_sock *tp, struct dst_entry *dst) static inline void tcp_event_data_sent(struct tcp_sock *tp, struct sk_buff *skb, struct sock *sk) { - u32 now = tcp_time_stamp; + struct inet_connection_sock *icsk = inet_csk(sk); + const u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) - tcp_cwnd_restart(tp, __sk_dst_get(sk)); + if (!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto) + tcp_cwnd_restart(sk, __sk_dst_get(sk)); tp->lsndtime = now; /* If it is a reply for ato after last received * packet, enter pingpong mode. */ - if ((u32)(now - tp->ack.lrcvtime) < tp->ack.ato) - tp->ack.pingpong = 1; + if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) + icsk->icsk_ack.pingpong = 1; } static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { - struct tcp_sock *tp = tcp_sk(sk); - - tcp_dec_quickack_mode(tp, pkts); - tcp_clear_xmit_timer(sk, TCP_TIME_DACK); + tcp_dec_quickack_mode(sk, pkts); + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); } /* Determine a window scaling and initial window to offer. @@ -696,7 +696,7 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) + if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto) tcp_cwnd_application_limited(sk); } } @@ -1147,6 +1147,7 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) */ u32 __tcp_select_window(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); /* MSS for the peer's data. Previous verions used mss_clamp * here. I don't know if the value based on our guesses @@ -1154,7 +1155,7 @@ u32 __tcp_select_window(struct sock *sk) * but may be worse for the performance because of rcv_mss * fluctuations. --SAW 1998/11/1 */ - int mss = tp->ack.rcv_mss; + int mss = icsk->icsk_ack.rcv_mss; int free_space = tcp_space(sk); int full_space = min_t(int, tp->window_clamp, tcp_full_space(sk)); int window; @@ -1163,7 +1164,7 @@ u32 __tcp_select_window(struct sock *sk) mss = full_space; if (free_space < full_space/2) { - tp->ack.quick = 0; + icsk->icsk_ack.quick = 0; if (tcp_memory_pressure) tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U*tp->advmss); @@ -1491,7 +1492,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto); } packet_cnt -= tcp_skb_pcount(skb); @@ -1544,7 +1546,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; if (skb == skb_peek(&sk->sk_write_queue)) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } @@ -1780,8 +1782,8 @@ static inline void tcp_connect_init(struct sock *sk) tp->rcv_wup = 0; tp->copied_seq = 0; - tp->rto = TCP_TIMEOUT_INIT; - tp->retransmits = 0; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_retransmits = 0; tcp_clear_retrans(tp); } @@ -1824,7 +1826,7 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); return 0; } @@ -1834,20 +1836,21 @@ int tcp_connect(struct sock *sk) */ void tcp_send_delayed_ack(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - int ato = tp->ack.ato; + struct inet_connection_sock *icsk = inet_csk(sk); + int ato = icsk->icsk_ack.ato; unsigned long timeout; if (ato > TCP_DELACK_MIN) { + const struct tcp_sock *tp = tcp_sk(sk); int max_ato = HZ/2; - if (tp->ack.pingpong || (tp->ack.pending&TCP_ACK_PUSHED)) + if (icsk->icsk_ack.pingpong || (icsk->icsk_ack.pending & ICSK_ACK_PUSHED)) max_ato = TCP_DELACK_MAX; /* Slow path, intersegment interval is "high". */ /* If some rtt estimate is known, use it to bound delayed ack. - * Do not use tp->rto here, use results of rtt measurements + * Do not use inet_csk(sk)->icsk_rto here, use results of rtt measurements * directly. */ if (tp->srtt) { @@ -1864,21 +1867,22 @@ void tcp_send_delayed_ack(struct sock *sk) timeout = jiffies + ato; /* Use new timeout only if there wasn't a older one earlier. */ - if (tp->ack.pending&TCP_ACK_TIMER) { + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { /* If delack timer was blocked or is about to expire, * send ACK now. */ - if (tp->ack.blocked || time_before_eq(tp->ack.timeout, jiffies+(ato>>2))) { + if (icsk->icsk_ack.blocked || + time_before_eq(icsk->icsk_ack.timeout, jiffies + (ato >> 2))) { tcp_send_ack(sk); return; } - if (!time_before(timeout, tp->ack.timeout)) - timeout = tp->ack.timeout; + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; } - tp->ack.pending |= TCP_ACK_SCHED|TCP_ACK_TIMER; - tp->ack.timeout = timeout; - sk_reset_timer(sk, &tp->delack_timer, timeout); + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); } /* This routine sends an ack and also updates the window. */ @@ -1895,9 +1899,9 @@ void tcp_send_ack(struct sock *sk) */ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (buff == NULL) { - tcp_schedule_ack(tp); - tp->ack.ato = TCP_ATO_MIN; - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX); + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); return; } @@ -2011,6 +2015,7 @@ int tcp_write_wakeup(struct sock *sk) */ void tcp_send_probe0(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int err; @@ -2019,16 +2024,16 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; - tp->backoff = 0; + icsk->icsk_backoff = 0; return; } if (err <= 0) { - if (tp->backoff < sysctl_tcp_retries2) - tp->backoff++; + if (icsk->icsk_backoff < sysctl_tcp_retries2) + icsk->icsk_backoff++; tp->probes_out++; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RTO_MAX)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); } else { /* If packet was not sent due to local congestion, * do not backoff and do not remember probes_out. @@ -2038,8 +2043,9 @@ void tcp_send_probe0(struct sock *sk) */ if (!tp->probes_out) tp->probes_out=1; - tcp_reset_xmit_timer (sk, TCP_TIME_PROBE0, - min(tp->rto << tp->backoff, TCP_RESOURCE_PROBE_INTERVAL)); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + min(icsk->icsk_rto << icsk->icsk_backoff, + TCP_RESOURCE_PROBE_INTERVAL)); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0084227438c2..0b71380ee42f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,9 +36,9 @@ static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); static void tcp_keepalive_timer (unsigned long data); -#ifdef TCP_DEBUG -const char tcp_timer_bug_msg[] = KERN_DEBUG "tcpbug: unknown timer value\n"; -EXPORT_SYMBOL(tcp_timer_bug_msg); +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* @@ -46,40 +46,45 @@ EXPORT_SYMBOL(tcp_timer_bug_msg); * We may wish use just one timer maintaining a list of expire jiffies * to optimize. */ - -void tcp_init_xmit_timers(struct sock *sk) +void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - init_timer(&tp->retransmit_timer); - tp->retransmit_timer.function=&tcp_write_timer; - tp->retransmit_timer.data = (unsigned long) sk; - tp->pending = 0; + init_timer(&icsk->icsk_retransmit_timer); + init_timer(&icsk->icsk_delack_timer); + init_timer(&sk->sk_timer); - init_timer(&tp->delack_timer); - tp->delack_timer.function=&tcp_delack_timer; - tp->delack_timer.data = (unsigned long) sk; - tp->ack.pending = 0; + icsk->icsk_retransmit_timer.function = retransmit_handler; + icsk->icsk_delack_timer.function = delack_handler; + sk->sk_timer.function = keepalive_handler; - init_timer(&sk->sk_timer); - sk->sk_timer.function = &tcp_keepalive_timer; - sk->sk_timer.data = (unsigned long)sk; + icsk->icsk_retransmit_timer.data = + icsk->icsk_delack_timer.data = + sk->sk_timer.data = (unsigned long)sk; + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; } -void tcp_clear_xmit_timers(struct sock *sk) +void inet_csk_clear_xmit_timers(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - tp->pending = 0; - sk_stop_timer(sk, &tp->retransmit_timer); - - tp->ack.pending = 0; - tp->ack.blocked = 0; - sk_stop_timer(sk, &tp->delack_timer); + icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &icsk->icsk_delack_timer); sk_stop_timer(sk, &sk->sk_timer); } +void tcp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, + &tcp_keepalive_timer); +} + static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -155,15 +160,15 @@ static int tcp_orphan_retries(struct sock *sk, int alive) /* A write timeout has occurred. Process the after effects. */ static int tcp_write_timeout(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int retry_until; if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { - if (tp->retransmits) + if (icsk->icsk_retransmits) dst_negative_advice(&sk->sk_dst_cache); - retry_until = tp->syn_retries ? : sysctl_tcp_syn_retries; + retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (tp->retransmits >= sysctl_tcp_retries1) { + if (icsk->icsk_retransmits >= sysctl_tcp_retries1) { /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black hole detection. :-( @@ -189,16 +194,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = (tp->rto < TCP_RTO_MAX); + const int alive = (icsk->icsk_rto < TCP_RTO_MAX); retry_until = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->retransmits < retry_until)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_retransmits < retry_until)) return 1; } } - if (tp->retransmits >= retry_until) { + if (icsk->icsk_retransmits >= retry_until) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -210,26 +215,27 @@ static void tcp_delack_timer(unsigned long data) { struct sock *sk = (struct sock*)data; struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tp->ack.blocked = 1; + icsk->icsk_ack.blocked = 1; NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); - sk_reset_timer(sk, &tp->delack_timer, jiffies + TCP_DELACK_MIN); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); goto out_unlock; } sk_stream_mem_reclaim(sk); - if (sk->sk_state == TCP_CLOSE || !(tp->ack.pending & TCP_ACK_TIMER)) + if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) goto out; - if (time_after(tp->ack.timeout, jiffies)) { - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); goto out; } - tp->ack.pending &= ~TCP_ACK_TIMER; + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; if (!skb_queue_empty(&tp->ucopy.prequeue)) { struct sk_buff *skb; @@ -242,16 +248,16 @@ static void tcp_delack_timer(unsigned long data) tp->ucopy.memory = 0; } - if (tcp_ack_scheduled(tp)) { - if (!tp->ack.pingpong) { + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ - tp->ack.ato = min(tp->ack.ato << 1, tp->rto); + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); } else { /* Delayed ACK missed: leave pingpong mode and * deflate ATO. */ - tp->ack.pingpong = 0; - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; } tcp_send_ack(sk); NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); @@ -294,7 +300,8 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - int alive = ((tp->rto<backoff) < TCP_RTO_MAX); + const struct inet_connection_sock *icsk = inet_csk(sk); + const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); @@ -317,6 +324,7 @@ static void tcp_probe_timer(struct sock *sk) static void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out) goto out; @@ -351,7 +359,7 @@ static void tcp_retransmit_timer(struct sock *sk) if (tcp_write_timeout(sk)) goto out; - if (tp->retransmits == 0) { + if (icsk->icsk_retransmits == 0) { if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { if (tp->ca_state == TCP_CA_Recovery) @@ -381,10 +389,10 @@ static void tcp_retransmit_timer(struct sock *sk) /* Retransmission failed because of local congestion, * do not backoff. */ - if (!tp->retransmits) - tp->retransmits=1; - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, - min(tp->rto, TCP_RESOURCE_PROBE_INTERVAL)); + if (!icsk->icsk_retransmits) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); goto out; } @@ -403,13 +411,13 @@ static void tcp_retransmit_timer(struct sock *sk) * implemented ftp to mars will work nicely. We will have to fix * the 120 second clamps though! */ - tp->backoff++; - tp->retransmits++; + icsk->icsk_backoff++; + icsk->icsk_retransmits++; out_reset_timer: - tp->rto = min(tp->rto << 1, TCP_RTO_MAX); - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); - if (tp->retransmits > sysctl_tcp_retries1) + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); + if (icsk->icsk_retransmits > sysctl_tcp_retries1) __sk_dst_reset(sk); out:; @@ -418,32 +426,32 @@ out:; static void tcp_write_timer(unsigned long data) { struct sock *sk = (struct sock*)data; - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); int event; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later */ - sk_reset_timer(sk, &tp->retransmit_timer, jiffies + (HZ / 20)); + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); goto out_unlock; } - if (sk->sk_state == TCP_CLOSE || !tp->pending) + if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) goto out; - if (time_after(tp->timeout, jiffies)) { - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); goto out; } - event = tp->pending; - tp->pending = 0; + event = icsk->icsk_pending; + icsk->icsk_pending = 0; switch (event) { - case TCP_TIME_RETRANS: + case ICSK_TIME_RETRANS: tcp_retransmit_timer(sk); break; - case TCP_TIME_PROBE0: + case ICSK_TIME_PROBE0: tcp_probe_timer(sk); break; } @@ -463,8 +471,9 @@ out_unlock: static void tcp_synack_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - int max_retries = tp->syn_retries ? : sysctl_tcp_synack_retries; + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; @@ -526,8 +535,8 @@ static void tcp_synack_timer(struct sock *sk) } /* Drop this request */ - tcp_synq_unlink(tp, req, reqp); - reqsk_queue_removed(&tp->accept_queue, req); + inet_csk_reqsk_queue_unlink(sk, req, reqp); + reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_free(req); continue; } @@ -541,15 +550,15 @@ static void tcp_synack_timer(struct sock *sk) lopt->clock_hand = i; if (lopt->qlen) - tcp_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); + inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); } -void tcp_delete_keepalive_timer (struct sock *sk) +void inet_csk_delete_keepalive_timer(struct sock *sk) { sk_stop_timer(sk, &sk->sk_timer); } -void tcp_reset_keepalive_timer (struct sock *sk, unsigned long len) +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) { sk_reset_timer(sk, &sk->sk_timer, jiffies + len); } @@ -560,9 +569,9 @@ void tcp_set_keepalive(struct sock *sk, int val) return; if (val && !sock_flag(sk, SOCK_KEEPOPEN)) - tcp_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); + inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tcp_sk(sk))); else if (!val) - tcp_delete_keepalive_timer(sk); + inet_csk_delete_keepalive_timer(sk); } @@ -576,7 +585,7 @@ static void tcp_keepalive_timer (unsigned long data) bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* Try again later. */ - tcp_reset_keepalive_timer (sk, HZ/20); + inet_csk_reset_keepalive_timer (sk, HZ/20); goto out; } @@ -587,7 +596,7 @@ static void tcp_keepalive_timer (unsigned long data) if (sk->sk_state == TCP_FIN_WAIT2 && sock_flag(sk, SOCK_DEAD)) { if (tp->linger2 >= 0) { - int tmo = tcp_fin_time(tp) - TCP_TIMEWAIT_LEN; + const int tmo = tcp_fin_time(sk) - TCP_TIMEWAIT_LEN; if (tmo > 0) { tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); @@ -634,7 +643,7 @@ static void tcp_keepalive_timer (unsigned long data) sk_stream_mem_reclaim(sk); resched: - tcp_reset_keepalive_timer (sk, elapsed); + inet_csk_reset_keepalive_timer (sk, elapsed); goto out; death: @@ -645,7 +654,7 @@ out: sock_put(sk); } -EXPORT_SYMBOL(tcp_clear_xmit_timers); -EXPORT_SYMBOL(tcp_delete_keepalive_timer); +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(tcp_reset_keepalive_timer); +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4582d9cf4bbe..b9c3da349492 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1043,7 +1043,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = tcp_v6_ipv6only(sk2); + int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index af8ad5bb273b..b9c7003b7f8b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -207,9 +207,9 @@ tb_not_found: tb->fastreuse = 0; success: - if (!inet_sk(sk)->bind_hash) + if (!inet_csk(sk)->icsk_bind_hash) inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_sk(sk)->bind_hash == tb); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); ret = 0; fail_unlock: @@ -381,7 +381,7 @@ EXPORT_SYMBOL_GPL(tcp_v6_lookup); * Open request hash tables. */ -static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) +static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd) { u32 a, b, c; @@ -401,14 +401,15 @@ static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd) return c & (TCP_SYNQ_HSIZE - 1); } -static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp, +static struct request_sock *tcp_v6_search_req(const struct sock *sk, struct request_sock ***prevp, __u16 rport, struct in6_addr *raddr, struct in6_addr *laddr, int iif) { - struct listen_sock *lopt = tp->accept_queue.listen_opt; + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req, **prev; for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)]; @@ -619,7 +620,7 @@ ok: } head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)]; - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; spin_lock_bh(&head->lock); if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { @@ -925,7 +926,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sock_owned_by_user(sk)) goto out; - req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr, + req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, &hdr->saddr, tcp_v6_iif(skb)); if (!req) goto out; @@ -940,7 +941,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; } - tcp_synq_drop(sk, req, prev); + inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case TCP_SYN_SENT: @@ -1245,11 +1246,10 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; struct tcphdr *th = skb->h.th; - struct tcp_sock *tp = tcp_sk(sk); struct sock *nsk; /* Find possible connection requests. */ - req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr, + req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); @@ -1278,12 +1278,12 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req) { - struct tcp_sock *tp = tcp_sk(sk); - struct listen_sock *lopt = tp->accept_queue.listen_opt; - u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd); - reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT); - tcp_synq_added(sk); + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); + inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); } @@ -1308,13 +1308,13 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* * There are no SYN attacks on IPv6, yet... */ - if (tcp_synq_is_full(sk) && !isn) { + if (inet_csk_reqsk_queue_is_full(sk) && !isn) { if (net_ratelimit()) printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n"); goto drop; } - if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1) + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) goto drop; req = reqsk_alloc(&tcp6_request_sock_ops); @@ -2015,7 +2015,7 @@ static int tcp_v6_init_sock(struct sock *sk) tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - tp->rto = TCP_TIMEOUT_INIT; + inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2098,18 +2098,20 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) unsigned long timer_expires; struct inet_sock *inet = inet_sk(sp); struct tcp_sock *tp = tcp_sk(sp); + const struct inet_connection_sock *icsk = inet_csk(sp); struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; destp = ntohs(inet->dport); srcp = ntohs(inet->sport); - if (tp->pending == TCP_TIME_RETRANS) { + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { timer_active = 1; - timer_expires = tp->timeout; - } else if (tp->pending == TCP_TIME_PROBE0) { + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = tp->timeout; + timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; @@ -2130,12 +2132,14 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq, timer_active, jiffies_to_clock_t(timer_expires - jiffies), - tp->retransmits, + icsk->icsk_retransmits, sock_i_uid(sp), tp->probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, - tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong, + icsk->icsk_rto, + icsk->icsk_ack.ato, + (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh ); } @@ -2227,7 +2231,7 @@ struct proto tcpv6_prot = { .close = tcp_close, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, - .accept = tcp_accept, + .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, .destroy = tcp_v6_destroy_sock, -- cgit v1.2.3 From 3f421baa4720b708022f8bcc52a61e5cd6f10bf8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:08 -0700 Subject: [NET]: Just move the inet_connection_sock function from tcp sources Completing the previous changeset, this also generalises tcp_v4_synq_add, renaming it to inet_csk_reqsk_queue_hash_add, already geing used in the DCCP tree, which I plan to merge RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 7 +- include/net/inet_connection_sock.h | 152 ++++++++++++++ include/net/tcp.h | 160 +-------------- net/ipv4/Makefile | 2 +- net/ipv4/inet_connection_sock.c | 401 +++++++++++++++++++++++++++++++++++++ net/ipv4/tcp.c | 93 --------- net/ipv4/tcp_input.c | 10 +- net/ipv4/tcp_ipv4.c | 210 +------------------ net/ipv4/tcp_output.c | 19 +- net/ipv4/tcp_timer.c | 65 +----- 10 files changed, 588 insertions(+), 531 deletions(-) create mode 100644 net/ipv4/inet_connection_sock.c (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 88591913c94f..777339b68464 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int inet_twsk_ipv6only(const struct sock *sk) -{ - return inet_twsk(sk)->tw_ipv6only; -} - static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only; } #else #define __ipv6_only_sock(sk) 0 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ef609396e41b..97e002001c1a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,15 @@ #define _INET_CONNECTION_SOCK_H #include +#include #include #include +#define INET_CSK_DEBUG 1 + +/* Cancel timers, when they are not required. */ +#undef INET_CSK_CLEAR_TIMERS + struct inet_bind_bucket; struct inet_hashinfo; @@ -61,17 +67,107 @@ struct inet_connection_sock { } icsk_ack; }; +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ + static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; } +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 +}; + extern void inet_csk_init_xmit_timers(struct sock *sk, void (*retransmit_handler)(unsigned long), void (*delack_handler)(unsigned long), void (*keepalive_handler)(unsigned long)); extern void inet_csk_clear_xmit_timers(struct sock *sk); +static inline void inet_csk_schedule_ack(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; +} + +static inline int inet_csk_ack_scheduled(const struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; +} + +static inline void inet_csk_delack_init(struct sock *sk) +{ + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); +} + +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); + +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; +#endif + +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +#endif + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); +#endif + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +/* + * Reset the retransmission timer + */ +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when, + const unsigned long max_when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (when > max_when) { +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); +#endif + when = max_when; + } + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); + extern struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __u16 rport, @@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, extern struct dst_entry* inet_csk_route_req(struct sock *sk, const struct request_sock *req); +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) +{ + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); +} + +extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned timeout); + +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) +{ + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); +} + +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) +{ + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); +} + +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) +{ + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) +{ + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) +{ + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); +} + +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); +} + +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + reqsk_free(req); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index a943c79c88b0..dd9a5a288f88 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,18 +19,16 @@ #define _TCP_H #define TCP_DEBUG 1 -#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 -/* Cancel timers, when they are not required. */ -#undef INET_CSK_CLEAR_TIMERS - #include #include #include #include #include #include + +#include #include #include #include @@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -#define ICSK_TIME_DACK 2 /* Delayed ack timer */ -#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ - /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ @@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define AF_INET_FAMILY(fam) ((fam) == AF_INET) -#else -#define AF_INET_FAMILY(fam) 1 -#endif - /* * Pointers to address related TCP functions * (i.e. things that depend on the address family) @@ -373,22 +360,6 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum inet_csk_ack_state_t { - ICSK_ACK_SCHED = 1, - ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4 -}; - -static inline void inet_csk_schedule_ack(struct sock *sk) -{ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; -} - -static inline int inet_csk_ack_scheduled(const struct sock *sk) -{ - return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; -} - static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void inet_csk_delack_init(struct sock *sk) -{ - memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); -} - static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; @@ -442,7 +408,6 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) inet_csk_clear_xmit_timers(sk); } -extern void inet_csk_delete_keepalive_timer(struct sock *sk); -extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef INET_CSK_DEBUG -extern const char inet_csk_timer_bug_msg[]; -#endif - /* tcp_diag.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); -#endif - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_delack_timer); -#endif - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - -/* - * Reset the retransmission timer - */ -static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, - unsigned long when) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (when > TCP_RTO_MAX) { -#ifdef INET_CSK_DEBUG - pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", - sk, what, when, current_text_addr()); -#endif - when = TCP_RTO_MAX; - } - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = what; - icsk->icsk_timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending |= ICSK_ACK_TIMER; - icsk->icsk_ack.timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. @@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + icsk->icsk_rto, TCP_RTO_MAX); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4); + (3 * TCP_RTO_MIN) / 4, + TCP_RTO_MAX); } return 1; } @@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void inet_csk_reqsk_queue_add(struct sock *sk, - struct request_sock *req, - struct sock *child) -{ - reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); -} - -static inline void inet_csk_reqsk_queue_removed(struct sock *sk, - struct request_sock *req) -{ - if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) - inet_csk_delete_keepalive_timer(sk); -} - -static inline void inet_csk_reqsk_queue_added(struct sock *sk, - const unsigned long timeout) -{ - if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) - inet_csk_reset_keepalive_timer(sk, timeout); -} - -static inline int inet_csk_reqsk_queue_len(const struct sock *sk) -{ - return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) -{ - return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); -} - -static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); -} - -static inline void inet_csk_reqsk_queue_drop(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); - reqsk_free(req); -} - static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 6650d18e400f..ea0e1d87dc7e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -5,7 +5,7 @@ obj-y := route.o inetpeer.o protocol.o \ ip_input.o ip_fragment.o ip_forward.o ip_options.o \ ip_output.o ip_sockglue.o inet_hashtables.o \ - inet_timewait_sock.o \ + inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \ diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c new file mode 100644 index 000000000000..2712400a8bb8 --- /dev/null +++ b/net/ipv4/inet_connection_sock.c @@ -0,0 +1,401 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Support for INET connection oriented protocols. + * + * Authors: See the TCP sources + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or(at your option) any later version. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef INET_CSK_DEBUG +const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; +EXPORT_SYMBOL(inet_csk_timer_bug_msg); +#endif + +/* + * This array holds the first and last local port number. + * For high-usage systems, use sysctl to change this to + * 32768-61000 + */ +int sysctl_local_port_range[2] = { 1024, 4999 }; + +static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +{ + const u32 sk_rcv_saddr = inet_rcv_saddr(sk); + struct sock *sk2; + struct hlist_node *node; + int reuse = sk->sk_reuse; + + sk_for_each_bound(sk2, node, &tb->owners) { + if (sk != sk2 && + !inet_v6_ipv6only(sk2) && + (!sk->sk_bound_dev_if || + !sk2->sk_bound_dev_if || + sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { + if (!reuse || !sk2->sk_reuse || + sk2->sk_state == TCP_LISTEN) { + const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + if (!sk2_rcv_saddr || !sk_rcv_saddr || + sk2_rcv_saddr == sk_rcv_saddr) + break; + } + } + } + return node != NULL; +} + +/* Obtain a reference to a local port for the given sock, + * if snum is zero it means select any available local port. + */ +int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum) +{ + struct inet_bind_hashbucket *head; + struct hlist_node *node; + struct inet_bind_bucket *tb; + int ret; + + local_bh_disable(); + if (!snum) { + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + int rover; + + spin_lock(&hashinfo->portalloc_lock); + if (hashinfo->port_rover < low) + rover = low; + else + rover = hashinfo->port_rover; + do { + rover++; + if (rover > high) + rover = low; + head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) + if (tb->port == rover) + goto next; + break; + next: + spin_unlock(&head->lock); + } while (--remaining > 0); + hashinfo->port_rover = rover; + spin_unlock(&hashinfo->portalloc_lock); + + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ + ret = 1; + if (remaining <= 0) + goto fail; + + /* OK, here is the one we will use. HEAD is + * non-NULL and we hold it's mutex. + */ + snum = rover; + } else { + head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; + spin_lock(&head->lock); + inet_bind_bucket_for_each(tb, node, &head->chain) + if (tb->port == snum) + goto tb_found; + } + tb = NULL; + goto tb_not_found; +tb_found: + if (!hlist_empty(&tb->owners)) { + if (sk->sk_reuse > 1) + goto success; + if (tb->fastreuse > 0 && + sk->sk_reuse && sk->sk_state != TCP_LISTEN) { + goto success; + } else { + ret = 1; + if (inet_csk_bind_conflict(sk, tb)) + goto fail_unlock; + } + } +tb_not_found: + ret = 1; + if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) + goto fail_unlock; + if (hlist_empty(&tb->owners)) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) + tb->fastreuse = 1; + else + tb->fastreuse = 0; + } else if (tb->fastreuse && + (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) + tb->fastreuse = 0; +success: + if (!inet_csk(sk)->icsk_bind_hash) + inet_bind_hash(sk, tb, snum); + BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); + ret = 0; + +fail_unlock: + spin_unlock(&head->lock); +fail: + local_bh_enable(); + return ret; +} + +EXPORT_SYMBOL_GPL(inet_csk_get_port); + +/* + * Wait for an incoming connection, avoid race conditions. This must be called + * with the socket locked. + */ +static int inet_csk_wait_for_connect(struct sock *sk, long timeo) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + DEFINE_WAIT(wait); + int err; + + /* + * True wake-one mechanism for incoming connections: only + * one process gets woken up, not the 'whole herd'. + * Since we do not 'race & poll' for established sockets + * anymore, the common case will execute the loop only once. + * + * Subtle issue: "add_wait_queue_exclusive()" will be added + * after any current non-exclusive waiters, and we know that + * it will always _stay_ after any new non-exclusive waiters + * because all non-exclusive waiters are added at the + * beginning of the wait-queue. As such, it's ok to "drop" + * our exclusiveness temporarily when we get woken up without + * having to remove and re-insert us on the wait queue. + */ + for (;;) { + prepare_to_wait_exclusive(sk->sk_sleep, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) + timeo = schedule_timeout(timeo); + lock_sock(sk); + err = 0; + if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) + break; + err = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + break; + err = sock_intr_errno(timeo); + if (signal_pending(current)) + break; + err = -EAGAIN; + if (!timeo) + break; + } + finish_wait(sk->sk_sleep, &wait); + return err; +} + +/* + * This will accept the next outstanding connection. + */ +struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct sock *newsk; + int error; + + lock_sock(sk); + + /* We need to make sure that this socket is listening, + * and that it has something pending. + */ + error = -EINVAL; + if (sk->sk_state != TCP_LISTEN) + goto out_err; + + /* Find already established connection */ + if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { + long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + + /* If this is a non blocking socket don't sleep */ + error = -EAGAIN; + if (!timeo) + goto out_err; + + error = inet_csk_wait_for_connect(sk, timeo); + if (error) + goto out_err; + } + + newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); + BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); +out: + release_sock(sk); + return newsk; +out_err: + newsk = NULL; + *err = error; + goto out; +} + +EXPORT_SYMBOL(inet_csk_accept); + +/* + * Using different timers for retransmit, delayed acks and probes + * We may wish use just one timer maintaining a list of expire jiffies + * to optimize. + */ +void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + init_timer(&icsk->icsk_retransmit_timer); + init_timer(&icsk->icsk_delack_timer); + init_timer(&sk->sk_timer); + + icsk->icsk_retransmit_timer.function = retransmit_handler; + icsk->icsk_delack_timer.function = delack_handler; + sk->sk_timer.function = keepalive_handler; + + icsk->icsk_retransmit_timer.data = + icsk->icsk_delack_timer.data = + sk->sk_timer.data = (unsigned long)sk; + + icsk->icsk_pending = icsk->icsk_ack.pending = 0; +} + +EXPORT_SYMBOL(inet_csk_init_xmit_timers); + +void inet_csk_clear_xmit_timers(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; + + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); + sk_stop_timer(sk, &icsk->icsk_delack_timer); + sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_clear_xmit_timers); + +void inet_csk_delete_keepalive_timer(struct sock *sk) +{ + sk_stop_timer(sk, &sk->sk_timer); +} + +EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); + +void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) +{ + sk_reset_timer(sk, &sk->sk_timer, jiffies + len); +} + +EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); + +struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req) +{ + struct rtable *rt; + const struct inet_request_sock *ireq = inet_rsk(req); + struct ip_options *opt = inet_rsk(req)->opt; + struct flowi fl = { .oif = sk->sk_bound_dev_if, + .nl_u = { .ip4_u = + { .daddr = ((opt && opt->srr) ? + opt->faddr : + ireq->rmt_addr), + .saddr = ireq->loc_addr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = inet_sk(sk)->sport, + .dport = ireq->rmt_port } } }; + + if (ip_route_output_flow(&rt, &fl, sk, 0)) { + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { + ip_rt_put(rt); + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + return &rt->u.dst; +} + +EXPORT_SYMBOL_GPL(inet_csk_route_req); + +static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, + const u32 rnd, const u16 synq_hsize) +{ + return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) +#else +#define AF_INET_FAMILY(fam) 1 +#endif + +struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, const __u32 raddr, + const __u32 laddr) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + struct request_sock *req, **prev; + + for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries)]; + (req = *prev) != NULL; + prev = &req->dl_next) { + const struct inet_request_sock *ireq = inet_rsk(req); + + if (ireq->rmt_port == rport && + ireq->rmt_addr == raddr && + ireq->loc_addr == laddr && + AF_INET_FAMILY(req->rsk_ops->family)) { + BUG_TRAP(!req->sk); + *prevp = prev; + break; + } + } + + return req; +} + +EXPORT_SYMBOL_GPL(inet_csk_search_req); + +void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, + const unsigned timeout) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; + const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, + lopt->hash_rnd, lopt->nr_table_entries); + + reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); + inet_csk_reqsk_queue_added(sk, timeout); +} + +EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 8177b86570db..581016a6a93f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1804,98 +1804,6 @@ int tcp_disconnect(struct sock *sk, int flags) return err; } -/* - * Wait for an incoming connection, avoid race - * conditions. This must be called with the socket locked. - */ -static int wait_for_connect(struct sock *sk, long timeo) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - DEFINE_WAIT(wait); - int err; - - /* - * True wake-one mechanism for incoming connections: only - * one process gets woken up, not the 'whole herd'. - * Since we do not 'race & poll' for established sockets - * anymore, the common case will execute the loop only once. - * - * Subtle issue: "add_wait_queue_exclusive()" will be added - * after any current non-exclusive waiters, and we know that - * it will always _stay_ after any new non-exclusive waiters - * because all non-exclusive waiters are added at the - * beginning of the wait-queue. As such, it's ok to "drop" - * our exclusiveness temporarily when we get woken up without - * having to remove and re-insert us on the wait queue. - */ - for (;;) { - prepare_to_wait_exclusive(sk->sk_sleep, &wait, - TASK_INTERRUPTIBLE); - release_sock(sk); - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) - timeo = schedule_timeout(timeo); - lock_sock(sk); - err = 0; - if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) - break; - err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - break; - err = sock_intr_errno(timeo); - if (signal_pending(current)) - break; - err = -EAGAIN; - if (!timeo) - break; - } - finish_wait(sk->sk_sleep, &wait); - return err; -} - -/* - * This will accept the next outstanding connection. - */ - -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *newsk; - int error; - - lock_sock(sk); - - /* We need to make sure that this socket is listening, - * and that it has something pending. - */ - error = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - goto out_err; - - /* Find already established connection */ - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); - - /* If this is a non blocking socket don't sleep */ - error = -EAGAIN; - if (!timeo) - goto out_err; - - error = wait_for_connect(sk, timeo); - if (error) - goto out_err; - } - - newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - BUG_TRAP(newsk->sk_state != TCP_SYN_RECV); -out: - release_sock(sk); - return newsk; -out_err: - newsk = NULL; - *err = error; - goto out; -} - /* * Socket option code for TCP. */ @@ -2344,7 +2252,6 @@ void __init tcp_init(void) tcp_register_congestion_control(&tcp_reno); } -EXPORT_SYMBOL(inet_csk_accept); EXPORT_SYMBOL(tcp_close); EXPORT_SYMBOL(tcp_destroy_sock); EXPORT_SYMBOL(tcp_disconnect); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a8c5c2d90cb..b35badf53aa5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1278,7 +1278,7 @@ static int tcp_check_sack_reneging(struct sock *sk) inet_csk(sk)->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto); + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1961,7 +1961,7 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); } else { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } } @@ -2147,7 +2147,8 @@ static void tcp_ack_probe(struct sock *sk) */ } else { inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), + TCP_RTO_MAX); } } @@ -3968,7 +3969,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; tcp_incr_quickack(sk); tcp_enter_quickack_mode(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, TCP_RTO_MAX); discard: __kfree_skb(skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cd41265d17f..2f605b9e6b67 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,138 +97,6 @@ struct inet_hashinfo __cacheline_aligned tcp_hashinfo = { .port_rover = 1024 - 1, }; -/* - * This array holds the first and last local port number. - * For high-usage systems, use sysctl to change this to - * 32768-61000 - */ -int sysctl_local_port_range[2] = { 1024, 4999 }; - -static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) -{ - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); - struct sock *sk2; - struct hlist_node *node; - int reuse = sk->sk_reuse; - - sk_for_each_bound(sk2, node, &tb->owners) { - if (sk != sk2 && - !inet_v6_ipv6only(sk2) && - (!sk->sk_bound_dev_if || - !sk2->sk_bound_dev_if || - sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { - if (!reuse || !sk2->sk_reuse || - sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - if (!sk2_rcv_saddr || !sk_rcv_saddr || - sk2_rcv_saddr == sk_rcv_saddr) - break; - } - } - } - return node != NULL; -} - -/* Obtain a reference to a local port for the given sock, - * if snum is zero it means select any available local port. - */ -int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum) -{ - struct inet_bind_hashbucket *head; - struct hlist_node *node; - struct inet_bind_bucket *tb; - int ret; - - local_bh_disable(); - if (!snum) { - int low = sysctl_local_port_range[0]; - int high = sysctl_local_port_range[1]; - int remaining = (high - low) + 1; - int rover; - - spin_lock(&hashinfo->portalloc_lock); - if (hashinfo->port_rover < low) - rover = low; - else - rover = hashinfo->port_rover; - do { - rover++; - if (rover > high) - rover = low; - head = &hashinfo->bhash[inet_bhashfn(rover, hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == rover) - goto next; - break; - next: - spin_unlock(&head->lock); - } while (--remaining > 0); - hashinfo->port_rover = rover; - spin_unlock(&hashinfo->portalloc_lock); - - /* Exhausted local port range during search? It is not - * possible for us to be holding one of the bind hash - * locks if this test triggers, because if 'remaining' - * drops to zero, we broke out of the do/while loop at - * the top level, not from the 'break;' statement. - */ - ret = 1; - if (unlikely(remaining <= 0)) - goto fail; - - /* OK, here is the one we will use. HEAD is - * non-NULL and we hold it's mutex. - */ - snum = rover; - } else { - head = &hashinfo->bhash[inet_bhashfn(snum, hashinfo->bhash_size)]; - spin_lock(&head->lock); - inet_bind_bucket_for_each(tb, node, &head->chain) - if (tb->port == snum) - goto tb_found; - } - tb = NULL; - goto tb_not_found; -tb_found: - if (!hlist_empty(&tb->owners)) { - if (sk->sk_reuse > 1) - goto success; - if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN) { - goto success; - } else { - ret = 1; - if (inet_csk_bind_conflict(sk, tb)) - goto fail_unlock; - } - } -tb_not_found: - ret = 1; - if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep, head, snum)) == NULL) - goto fail_unlock; - if (hlist_empty(&tb->owners)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN) - tb->fastreuse = 1; - else - tb->fastreuse = 0; - } else if (tb->fastreuse && - (!sk->sk_reuse || sk->sk_state == TCP_LISTEN)) - tb->fastreuse = 0; -success: - if (!inet_csk(sk)->icsk_bind_hash) - inet_bind_hash(sk, tb, snum); - BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb); - ret = 0; - -fail_unlock: - spin_unlock(&head->lock); -fail: - local_bh_enable(); - return ret; -} - static int tcp_v4_get_port(struct sock *sk, unsigned short snum) { return inet_csk_get_port(&tcp_hashinfo, sk, snum); @@ -568,52 +436,6 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, - const u32 rnd, const u16 synq_hsize) -{ - return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); -} - -struct request_sock *inet_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, - const __u16 rport, const __u32 raddr, - const __u32 laddr) -{ - const struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; - - for (prev = &lopt->syn_table[inet_synq_hash(raddr, rport, lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { - const struct inet_request_sock *ireq = inet_rsk(req); - - if (ireq->rmt_port == rport && - ireq->rmt_addr == raddr && - ireq->loc_addr == laddr && - AF_INET_FAMILY(req->rsk_ops->family)) { - BUG_TRAP(!req->sk); - *prevp = prev; - break; - } - } - - return req; -} - -static void tcp_v4_synq_add(struct sock *sk, struct request_sock *req) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - const u32 h = inet_synq_hash(inet_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, - lopt->hash_rnd, lopt->nr_table_entries); - - reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT); - inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT); -} - - /* * This routine does path mtu discovery as defined in RFC1191. */ @@ -963,36 +785,6 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) req->ts_recent); } -struct dst_entry* inet_csk_route_req(struct sock *sk, - const struct request_sock *req) -{ - struct rtable *rt; - const struct inet_request_sock *ireq = inet_rsk(req); - struct ip_options *opt = inet_rsk(req)->opt; - struct flowi fl = { .oif = sk->sk_bound_dev_if, - .nl_u = { .ip4_u = - { .daddr = ((opt && opt->srr) ? - opt->faddr : - ireq->rmt_addr), - .saddr = ireq->loc_addr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = inet_sk(sk)->sport, - .dport = ireq->rmt_port } } }; - - if (ip_route_output_flow(&rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway) { - ip_rt_put(rt); - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return NULL; - } - return &rt->u.dst; -} - /* * Send a SYN-ACK after having received an ACK. * This still operates on a request_sock only, not on a big @@ -1222,7 +1014,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (want_cookie) { reqsk_free(req); } else { - tcp_v4_synq_add(sk, req); + inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); } return 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 6f0a7e30ceac..f458eacb5ef2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1493,7 +1493,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (skb == skb_peek(&sk->sk_write_queue)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto); + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); } packet_cnt -= tcp_skb_pcount(skb); @@ -1546,7 +1547,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) break; if (skb == skb_peek(&sk->sk_write_queue)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, + TCP_RTO_MAX); NET_INC_STATS_BH(LINUX_MIB_TCPFORWARDRETRANS); } @@ -1826,7 +1829,8 @@ int tcp_connect(struct sock *sk) TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); return 0; } @@ -1901,7 +1905,8 @@ void tcp_send_ack(struct sock *sk) if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MAX, TCP_RTO_MAX); return; } @@ -2033,7 +2038,8 @@ void tcp_send_probe0(struct sock *sk) icsk->icsk_backoff++; tp->probes_out++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, - min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX)); + min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), + TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, * do not backoff and do not remember probes_out. @@ -2045,7 +2051,8 @@ void tcp_send_probe0(struct sock *sk) tp->probes_out=1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, - TCP_RESOURCE_PROBE_INTERVAL)); + TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); } } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0b71380ee42f..c03930c48f42 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -36,55 +36,14 @@ static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); static void tcp_keepalive_timer (unsigned long data); -#ifdef INET_CSK_DEBUG -const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; -EXPORT_SYMBOL(inet_csk_timer_bug_msg); -#endif - -/* - * Using different timers for retransmit, delayed acks and probes - * We may wish use just one timer maintaining a list of expire jiffies - * to optimize. - */ -void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - init_timer(&icsk->icsk_retransmit_timer); - init_timer(&icsk->icsk_delack_timer); - init_timer(&sk->sk_timer); - - icsk->icsk_retransmit_timer.function = retransmit_handler; - icsk->icsk_delack_timer.function = delack_handler; - sk->sk_timer.function = keepalive_handler; - - icsk->icsk_retransmit_timer.data = - icsk->icsk_delack_timer.data = - sk->sk_timer.data = (unsigned long)sk; - - icsk->icsk_pending = icsk->icsk_ack.pending = 0; -} - -void inet_csk_clear_xmit_timers(struct sock *sk) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - icsk->icsk_pending = icsk->icsk_ack.pending = icsk->icsk_ack.blocked = 0; - - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); - sk_stop_timer(sk, &icsk->icsk_delack_timer); - sk_stop_timer(sk, &sk->sk_timer); -} - void tcp_init_xmit_timers(struct sock *sk) { inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, &tcp_keepalive_timer); } +EXPORT_SYMBOL(tcp_init_xmit_timers); + static void tcp_write_err(struct sock *sk) { sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; @@ -392,7 +351,8 @@ static void tcp_retransmit_timer(struct sock *sk) if (!icsk->icsk_retransmits) icsk->icsk_retransmits = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL)); + min(icsk->icsk_rto, TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); goto out; } @@ -416,7 +376,7 @@ static void tcp_retransmit_timer(struct sock *sk) out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); if (icsk->icsk_retransmits > sysctl_tcp_retries1) __sk_dst_reset(sk); @@ -553,16 +513,6 @@ static void tcp_synack_timer(struct sock *sk) inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); } -void inet_csk_delete_keepalive_timer(struct sock *sk) -{ - sk_stop_timer(sk, &sk->sk_timer); -} - -void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len) -{ - sk_reset_timer(sk, &sk->sk_timer, jiffies + len); -} - void tcp_set_keepalive(struct sock *sk, int val) { if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) @@ -653,8 +603,3 @@ out: bh_unlock_sock(sk); sock_put(sk); } - -EXPORT_SYMBOL(inet_csk_clear_xmit_timers); -EXPORT_SYMBOL(inet_csk_delete_keepalive_timer); -EXPORT_SYMBOL(tcp_init_xmit_timers); -EXPORT_SYMBOL(inet_csk_reset_keepalive_timer); -- cgit v1.2.3 From 295f7324ff8d9ea58b4d3ec93b1aaa1d80e048a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:56 -0700 Subject: [ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer With this we're very close to getting all of the current TCP refactorings in my dccp-2.6 tree merged, next changeset will export some functions needed by the current DCCP code and then dccp-2.6.git will be born! Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/inet_connection_sock.h | 2 ++ include/net/request_sock.h | 7 ++++++ include/net/tcp.h | 3 ++- net/core/request_sock.c | 1 + net/ipv4/af_inet.c | 1 + net/ipv4/tcp.c | 21 ++++++++--------- net/ipv4/tcp_input.c | 11 ++++++--- net/ipv4/tcp_minisocks.c | 10 +++++---- net/ipv4/tcp_timer.c | 46 +++++++++++++++++++++++--------------- 10 files changed, 67 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 800930fac388..620096840744 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -270,7 +270,7 @@ struct tcp_sock { __u8 frto_counter; /* Number of new acks after RTO */ __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 defer_accept; /* User waits for some data after accept() */ + /* ONE BYTE HOLE, TRY TO PACK */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index a50f4a4b7b4b..692825fc8135 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, reqsk_free(req); } +extern void inet_csk_listen_stop(struct sock *sk); + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b7c7eecbe64d..447d287a38fd 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -97,6 +97,7 @@ struct listen_sock { * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children + * @rskq_defer_accept - User waits for some data after accept() * @syn_wait_lock - serializer * * %syn_wait_lock is necessary only to avoid proc interface having to grab the main @@ -112,6 +113,8 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; + u8 rskq_defer_accept; + /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; }; @@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, write_unlock(&queue->syn_wait_lock); } +extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, + const unsigned long interval, const unsigned long timeout, + const unsigned long max_rto, int max_retries); + #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 68f1ec1c583a..2423f059b62b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int tcp_listen_start(struct sock *sk); +extern int inet_csk_listen_start(struct sock *sk, + const int nr_table_entries); extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 98f0fc923f91..b8203de5ff07 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -52,6 +52,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue, get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd)); rwlock_init(&queue->syn_wait_lock); queue->rskq_accept_head = queue->rskq_accept_head = NULL; + queue->rskq_defer_accept = 0; lopt->nr_table_entries = nr_table_entries; write_lock_bh(&queue->syn_wait_lock); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f691058cf599..52f5ecc58c46 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -99,6 +99,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a1f812159ced..a4e9eec44895 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -495,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); * This routine closes sockets which have been at least partially * opened, but not yet accepted. */ -static void inet_csk_listen_stop(struct sock *sk) +void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock *acc_req; @@ -1947,15 +1947,15 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, break; case TCP_DEFER_ACCEPT: - tp->defer_accept = 0; + icsk->icsk_accept_queue.rskq_defer_accept = 0; if (val > 0) { /* Translate value in seconds to number of * retransmits */ - while (tp->defer_accept < 32 && + while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && val > ((TCP_TIMEOUT_INIT / HZ) << - tp->defer_accept)) - tp->defer_accept++; - tp->defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept)) + icsk->icsk_accept_queue.rskq_defer_accept++; + icsk->icsk_accept_queue.rskq_defer_accept++; } break; @@ -2058,6 +2058,7 @@ EXPORT_SYMBOL_GPL(tcp_get_info); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int val, len; @@ -2095,7 +2096,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = tp->keepalive_probes ? : sysctl_tcp_keepalive_probes; break; case TCP_SYNCNT: - val = inet_csk(sk)->icsk_syn_retries ? : sysctl_tcp_syn_retries; + val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; break; case TCP_LINGER2: val = tp->linger2; @@ -2103,8 +2104,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !tp->defer_accept ? 0 : ((TCP_TIMEOUT_INIT / HZ) << - (tp->defer_accept - 1)); + val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : + ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; @@ -2125,7 +2126,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return 0; } case TCP_QUICKACK: - val = !inet_csk(sk)->icsk_ack.pingpong; + val = !icsk->icsk_ack.pingpong; break; case TCP_CONGESTION: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b35badf53aa5..71d456148de7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3831,6 +3831,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_parse_options(skb, &tp->rx_opt, 0); if (th->ack) { + struct inet_connection_sock *icsk; /* rfc793: * "If the state is SYN-SENT then * first check the ACK bit @@ -3956,7 +3957,11 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, sk_wake_async(sk, 0, POLL_OUT); } - if (sk->sk_write_pending || tp->defer_accept || inet_csk(sk)->icsk_ack.pingpong) { + icsk = inet_csk(sk); + + if (sk->sk_write_pending || + icsk->icsk_accept_queue.rskq_defer_accept || + icsk->icsk_ack.pingpong) { /* Save one ACK. Data will be ready after * several ticks, if write_pending is set. * @@ -3965,8 +3970,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * to stand against the temptation 8) --ANK */ inet_csk_schedule_ack(sk); - inet_csk(sk)->icsk_ack.lrcvtime = tcp_time_stamp; - inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.lrcvtime = tcp_time_stamp; + icsk->icsk_ack.ato = TCP_ATO_MIN; tcp_incr_quickack(sk); tcp_enter_quickack_mode(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4cfbe1d1c920..2d95afe5b393 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -787,9 +787,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, does sequence test, SYN is truncated, and thus we consider it a bare ACK. - If tp->defer_accept, we silently drop this bare ACK. Otherwise, - we create an established connection. Both ends (listening sockets) - accept the new incoming connection and try to talk to each other. 8-) + If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this + bare ACK. Otherwise, we create an established connection. Both + ends (listening sockets) accept the new incoming connection and try + to talk to each other. 8-) Note: This case is both harmless, and rare. Possibility is about the same as us discovering intelligent life on another plant tomorrow. @@ -856,7 +857,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb, return NULL; /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (tp->defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { + if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c03930c48f42..b614ad4d30c9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -424,16 +424,12 @@ out_unlock: sock_put(sk); } -/* - * Timer for listening sockets - */ - -static void tcp_synack_timer(struct sock *sk) +void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, + const unsigned long interval, const unsigned long timeout, + const unsigned long max_rto, int max_retries) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + struct inet_connection_sock *icsk = inet_csk(parent); + struct listen_sock *lopt = queue->listen_opt; int thresh = max_retries; unsigned long now = jiffies; struct request_sock **reqp, *req; @@ -470,10 +466,10 @@ static void tcp_synack_timer(struct sock *sk) } } - if (tp->defer_accept) - max_retries = tp->defer_accept; + if (queue->rskq_defer_accept) + max_retries = queue->rskq_defer_accept; - budget = 2*(TCP_SYNQ_HSIZE/(TCP_TIMEOUT_INIT/TCP_SYNQ_INTERVAL)); + budget = 2 * (lopt->nr_table_entries / (timeout / interval)); i = lopt->clock_hand; do { @@ -482,20 +478,19 @@ static void tcp_synack_timer(struct sock *sk) if (time_after_eq(now, req->expires)) { if ((req->retrans < thresh || (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(sk, req, NULL)) { + && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { unsigned long timeo; if (req->retrans++ == 0) lopt->qlen_young--; - timeo = min((TCP_TIMEOUT_INIT << req->retrans), - TCP_RTO_MAX); + timeo = min((timeout << req->retrans), max_rto); req->expires = now + timeo; reqp = &req->dl_next; continue; } /* Drop this request */ - inet_csk_reqsk_queue_unlink(sk, req, reqp); + inet_csk_reqsk_queue_unlink(parent, req, reqp); reqsk_queue_removed(&icsk->icsk_accept_queue, req); reqsk_free(req); continue; @@ -503,14 +498,29 @@ static void tcp_synack_timer(struct sock *sk) reqp = &req->dl_next; } - i = (i+1)&(TCP_SYNQ_HSIZE-1); + i = (i + 1) & (lopt->nr_table_entries - 1); } while (--budget > 0); lopt->clock_hand = i; if (lopt->qlen) - inet_csk_reset_keepalive_timer(sk, TCP_SYNQ_INTERVAL); + inet_csk_reset_keepalive_timer(parent, interval); +} + +EXPORT_SYMBOL_GPL(reqsk_queue_prune); + +/* + * Timer for listening sockets + */ + +static void tcp_synack_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; + + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, + TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries); } void tcp_set_keepalive(struct sock *sk, int val) -- cgit v1.2.3 From c4365c9235f80128c3c3d5993074173941b1c1f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:12:30 -0700 Subject: [RANDOM]: Introduce secure_dccp_sequence_number Code contributed by Stephen Hemminger. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- drivers/char/random.c | 34 ++++++++++++++++++++++++++++++++++ include/linux/random.h | 2 ++ 2 files changed, 36 insertions(+) (limited to 'include/linux') diff --git a/drivers/char/random.c b/drivers/char/random.c index 6b11d6b2129f..7999da25fe40 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1589,6 +1589,40 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp EXPORT_SYMBOL(secure_tcpv6_port_ephemeral); #endif +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +/* Similar to secure_tcp_sequence_number but generate a 48 bit value + * bit's 32-47 increase every key exchange + * 0-31 hash(source, dest) + */ +u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport) +{ + struct timeval tv; + u64 seq; + __u32 hash[4]; + struct keydata *keyptr = get_keyptr(); + + hash[0] = saddr; + hash[1] = daddr; + hash[2] = (sport << 16) + dport; + hash[3] = keyptr->secret[11]; + + seq = half_md4_transform(hash, keyptr->secret); + seq |= ((u64)keyptr->count) << (32 - HASH_BITS); + + do_gettimeofday(&tv); + seq += tv.tv_usec + tv.tv_sec * 1000000; + seq &= (1ull << 48) - 1; +#if 0 + printk("dccp init_seq(%lx, %lx, %d, %d) = %d\n", + saddr, daddr, sport, dport, seq); +#endif + return seq; +} + +EXPORT_SYMBOL(secure_dccp_sequence_number); +#endif + #endif /* CONFIG_INET */ diff --git a/include/linux/random.h b/include/linux/random.h index cc6703449916..7b2adb3322d5 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, __u16 sport, __u16 dport); +extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport); #ifndef MODULE extern struct file_operations random_fops, urandom_fops; -- cgit v1.2.3 From 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:14:34 -0700 Subject: [DCCP]: Initial implementation Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 432 ++++++++++ include/linux/in.h | 1 + include/linux/net.h | 1 + include/linux/socket.h | 1 + net/Kconfig | 1 + net/Makefile | 1 + net/dccp/Kconfig | 24 + net/dccp/Makefile | 5 + net/dccp/ccid.c | 139 +++ net/dccp/ccid.h | 156 ++++ net/dccp/ccids/Kconfig | 25 + net/dccp/ccids/Makefile | 3 + net/dccp/ccids/ccid3.c | 2164 +++++++++++++++++++++++++++++++++++++++++++++++ net/dccp/ccids/ccid3.h | 137 +++ net/dccp/dccp.h | 422 +++++++++ net/dccp/input.c | 510 +++++++++++ net/dccp/ipv4.c | 1289 ++++++++++++++++++++++++++++ net/dccp/minisocks.c | 199 +++++ net/dccp/options.c | 763 +++++++++++++++++ net/dccp/output.c | 406 +++++++++ net/dccp/proto.c | 818 ++++++++++++++++++ net/dccp/timer.c | 249 ++++++ 22 files changed, 7746 insertions(+) create mode 100644 include/linux/dccp.h create mode 100644 net/dccp/Kconfig create mode 100644 net/dccp/Makefile create mode 100644 net/dccp/ccid.c create mode 100644 net/dccp/ccid.h create mode 100644 net/dccp/ccids/Kconfig create mode 100644 net/dccp/ccids/Makefile create mode 100644 net/dccp/ccids/ccid3.c create mode 100644 net/dccp/ccids/ccid3.h create mode 100644 net/dccp/dccp.h create mode 100644 net/dccp/input.c create mode 100644 net/dccp/ipv4.c create mode 100644 net/dccp/minisocks.c create mode 100644 net/dccp/options.c create mode 100644 net/dccp/output.c create mode 100644 net/dccp/proto.c create mode 100644 net/dccp/timer.c (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 000000000000..e3b4bf7346bb --- /dev/null +++ b/include/linux/dccp.h @@ -0,0 +1,432 @@ +#ifndef _LINUX_DCCP_H +#define _LINUX_DCCP_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* FIXME: this is utterly wrong */ +struct sockaddr_dccp { + struct sockaddr_in in; + unsigned int service; +}; + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +/** + * struct dccp_hdr - generic part of DCCP packet header + * + * @dccph_sport - Relevant port on the endpoint that sent this packet + * @dccph_dport - Relevant port on the other endpoint + * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words + * @dccph_ccval - Used by the HC-Sender CCID + * @dccph_cscov - Parts of the packet that are covered by the Checksum field + * @dccph_checksum - Internet checksum, depends on dccph_cscov + * @dccph_x - 0 = 24 bit sequence number, 1 = 48 + * @dccph_type - packet type, see DCCP_PKT_ prefixed macros + * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x + */ +struct dccp_hdr { + __u16 dccph_sport, + dccph_dport; + __u8 dccph_doff; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 dccph_cscov:4, + dccph_ccval:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 dccph_ccval:4, + dccph_cscov:4; +#else +#error "Adjust your defines" +#endif + __u16 dccph_checksum; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 dccph_x:1, + dccph_type:4, + dccph_reserved:3, + dccph_seq:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 dccph_reserved:3, + dccph_type:4, + dccph_x:1, + dccph_seq:24; +#else +#error "Adjust your defines" +#endif +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +/** + * struct dccp_hdr_ext - the low bits of a 48 bit seq packet + * + * @dccph_seq_low - low 24 bits of a 48 bit seq packet + */ +struct dccp_hdr_ext { + __u32 dccph_seq_low; +}; + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +/** + * struct dccp_hdr_request - Conection initiation request header + * + * @dccph_req_service - Service to which the client app wants to connect + * @dccph_req_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_request { + __u32 dccph_req_service; +}; + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + */ +struct dccp_hdr_ack_bits { + __u32 dccph_reserved1:8, + dccph_ack_nr_high:24; + __u32 dccph_ack_nr_low; +}; + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +/** + * struct dccp_hdr_response - Conection initiation response header + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request + * @dccph_resp_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_response { + struct dccp_hdr_ack_bits dccph_resp_ack; + __u32 dccph_resp_service; +}; + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_reset - Unconditionally shut down a connection + * + * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request + * @dccph_reset_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_reset { + struct dccp_hdr_ack_bits dccph_reset_ack; + __u8 dccph_reset_code, + dccph_reset_data[3]; +}; + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +enum dccp_pkt_type { + DCCP_PKT_REQUEST = 0, + DCCP_PKT_RESPONSE, + DCCP_PKT_DATA, + DCCP_PKT_ACK, + DCCP_PKT_DATAACK, + DCCP_PKT_CLOSEREQ, + DCCP_PKT_CLOSE, + DCCP_PKT_RESET, + DCCP_PKT_SYNC, + DCCP_PKT_SYNCACK, + DCCP_PKT_INVALID, +}; + +#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID + +static inline unsigned int dccp_packet_hdr_len(const __u8 type) +{ + if (type == DCCP_PKT_DATA) + return 0; + if (type == DCCP_PKT_DATAACK || + type == DCCP_PKT_ACK || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ) + return sizeof(struct dccp_hdr_ack_bits); + if (type == DCCP_PKT_REQUEST) + return sizeof(struct dccp_hdr_request); + if (type == DCCP_PKT_RESPONSE) + return sizeof(struct dccp_hdr_response); + return sizeof(struct dccp_hdr_reset); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + +enum dccp_reset_codes { + DCCP_RESET_CODE_UNSPECIFIED = 0, + DCCP_RESET_CODE_CLOSED, + DCCP_RESET_CODE_ABORTED, + DCCP_RESET_CODE_NO_CONNECTION, + DCCP_RESET_CODE_PACKET_ERROR, + DCCP_RESET_CODE_OPTION_ERROR, + DCCP_RESET_CODE_MANDATORY_ERROR, + DCCP_RESET_CODE_CONNECTION_REFUSED, + DCCP_RESET_CODE_BAD_SERVICE_CODE, + DCCP_RESET_CODE_TOO_BUSY, + DCCP_RESET_CODE_BAD_INIT_COOKIE, + DCCP_RESET_CODE_AGGRESSION_PENALTY, +}; + +/* DCCP options */ +enum { + DCCPO_PADDING = 0, + DCCPO_MANDATORY = 1, + DCCPO_MIN_RESERVED = 3, + DCCPO_MAX_RESERVED = 31, + DCCPO_NDP_COUNT = 37, + DCCPO_ACK_VECTOR_0 = 38, + DCCPO_ACK_VECTOR_1 = 39, + DCCPO_TIMESTAMP = 41, + DCCPO_TIMESTAMP_ECHO = 42, + DCCPO_ELAPSED_TIME = 43, + DCCPO_MAX = 45, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, +}; + +/* DCCP features */ +enum { + DCCPF_RESERVED = 0, + DCCPF_SEQUENCE_WINDOW = 3, + DCCPF_SEND_ACK_VECTOR = 6, + DCCPF_SEND_NDP_COUNT = 7, + /* 10-127 reserved */ + DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_MAX_CCID_SPECIFIC = 255, +}; + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +/* FIXME: for now we're using CCID 3 (TFRC) */ +#define DCCPF_INITIAL_CCID 3 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 0 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +#define DCCP_NDP_LIMIT 0xFFFFFF + +/** + * struct dccp_options - option values for a DCCP connection + * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpo_ccid - Congestion Control Id (CCID) (section 10) + * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) + */ +struct dccp_options { + __u64 dccpo_sequence_window; + __u8 dccpo_ccid; + __u8 dccpo_send_ack_vector; + __u8 dccpo_send_ndp_count; +}; + +extern void __dccp_options_init(struct dccp_options *dccpo); +extern void dccp_options_init(struct dccp_options *dccpo); +extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); + +struct dccp_request_sock { + struct inet_request_sock dreq_inet_rsk; + __u64 dreq_iss; + __u64 dreq_isr; + __u32 dreq_service; +}; + +static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) +{ + return (struct dccp_request_sock *)req; +} + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACK_VECTOR_LEN 253 + +struct dccp_options_received { + u32 dccpor_ndp:24, + dccpor_ack_vector_len:8; + u32 dccpor_ack_vector_idx:10; + /* 22 bits hole, try to pack */ + u32 dccpor_timestamp; + u32 dccpor_timestamp_echo; + u32 dccpor_elapsed_time; +}; + +struct ccid; + +enum dccp_role { + DCCP_ROLE_UNDEFINED, + DCCP_ROLE_LISTEN, + DCCP_ROLE_CLIENT, + DCCP_ROLE_SERVER, +}; + +/** + * struct dccp_sock - DCCP socket state + * + * @dccps_swl - sequence number window low + * @dccps_swh - sequence number window high + * @dccps_awl - acknowledgement number window low + * @dccps_awh - acknowledgement number window high + * @dccps_iss - initial sequence number sent + * @dccps_isr - initial sequence number received + * @dccps_osr - first OPEN sequence number received + * @dccps_gss - greatest sequence number sent + * @dccps_gsr - greatest valid sequence number received + * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss + * @dccps_timestamp_time - time of latest TIMESTAMP option + * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option + * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) + * @dccps_pmtu_cookie - Last pmtu seen by socket + * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_role - Role of this sock, one of %dccp_role + * @dccps_ndp_count - number of Non Data Packets since last data packet + * @dccps_hc_rx_ackpkts - receiver half connection acked packets + */ +struct dccp_sock { + /* inet_connection_sock has to be the first member of dccp_sock */ + struct inet_connection_sock dccps_inet_connection; + __u64 dccps_swl; + __u64 dccps_swh; + __u64 dccps_awl; + __u64 dccps_awh; + __u64 dccps_iss; + __u64 dccps_isr; + __u64 dccps_osr; + __u64 dccps_gss; + __u64 dccps_gsr; + __u64 dccps_gar; + unsigned long dccps_service; + unsigned long dccps_timestamp_time; + __u32 dccps_timestamp_echo; + __u32 dccps_avg_packet_size; + unsigned long dccps_ndp_count; + __u16 dccps_ext_header_len; + __u32 dccps_pmtu_cookie; + __u32 dccps_mss_cache; + struct dccp_options dccps_options; + struct dccp_ackpkts *dccps_hc_rx_ackpkts; + void *dccps_hc_rx_ccid_private; + void *dccps_hc_tx_ccid_private; + struct ccid *dccps_hc_rx_ccid; + struct ccid *dccps_hc_tx_ccid; + struct dccp_options_received dccps_options_received; + enum dccp_role dccps_role:2; +}; + +static inline struct dccp_sock *dccp_sk(const struct sock *sk) +{ + return (struct dccp_sock *)sk; +} + +static inline const char *dccp_role(const struct sock *sk) +{ + switch (dccp_sk(sk)->dccps_role) { + case DCCP_ROLE_UNDEFINED: return "undefined"; + case DCCP_ROLE_LISTEN: return "listen"; + case DCCP_ROLE_SERVER: return "server"; + case DCCP_ROLE_CLIENT: return "client"; + } + return NULL; +} + +#endif /* _LINUX_DCCP_H */ diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748d..ba355384016a 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -32,6 +32,7 @@ enum { IPPROTO_PUP = 12, /* PUP protocol */ IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */ + IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ IPPROTO_RSVP = 46, /* RSVP protocol */ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ diff --git a/include/linux/net.h b/include/linux/net.h index 39906619b9d7..5f8b632ff653 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -84,6 +84,7 @@ enum sock_type { SOCK_RAW = 3, SOCK_RDM = 4, SOCK_SEQPACKET = 5, + SOCK_DCCP = 6, SOCK_PACKET = 10, }; diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2e..ddf22559f484 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -271,6 +271,7 @@ struct ucred { #define SOL_IRDA 266 #define SOL_NETBEUI 267 #define SOL_LLC 268 +#define SOL_DCCP 269 /* IPX options */ #define IPX_TYPE 1 diff --git a/net/Kconfig b/net/Kconfig index 02877ac0f2f4..c07aafb59a0f 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig" endif +source "net/dccp/Kconfig" source "net/sctp/Kconfig" source "net/atm/Kconfig" source "net/bridge/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4a01be8d3e1e..7e6eff206c81 100644 --- a/net/Makefile +++ b/net/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_VLAN_8021Q) += 8021q/ +obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ ifeq ($(CONFIG_NET),y) diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig new file mode 100644 index 000000000000..90460bc629b3 --- /dev/null +++ b/net/dccp/Kconfig @@ -0,0 +1,24 @@ +menu "DCCP Configuration (EXPERIMENTAL)" + depends on INET && EXPERIMENTAL + +config IP_DCCP + tristate "The DCCP Protocol (EXPERIMENTAL)" + ---help--- + Datagram Congestion Control Protocol + + From draft-ietf-dccp-spec-11 . + + The Datagram Congestion Control Protocol (DCCP) is a transport + protocol that implements bidirectional, unicast connections of + congestion-controlled, unreliable datagrams. It should be suitable + for use by applications such as streaming media, Internet telephony, + and on-line games + + To compile this protocol support as a module, choose M here: the + module will be called dccp. + + If in doubt, say N. + +source "net/dccp/ccids/Kconfig" + +endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile new file mode 100644 index 000000000000..c6e6ba55c36b --- /dev/null +++ b/net/dccp/Makefile @@ -0,0 +1,5 @@ +obj-$(CONFIG_IP_DCCP) += dccp.o + +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o + +obj-y += ccids/ diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c new file mode 100644 index 000000000000..9d8fc0e289ea --- /dev/null +++ b/net/dccp/ccid.c @@ -0,0 +1,139 @@ +/* + * net/dccp/ccid.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "ccid.h" + +static struct ccid *ccids[CCID_MAX]; +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +static atomic_t ccids_lockct = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(ccids_lock); + +/* + * The strategy is: modifications ccids vector are short, do not sleep and + * veeery rare, but read access should be free of any exclusive locks. + */ +static void ccids_write_lock(void) +{ + spin_lock(&ccids_lock); + while (atomic_read(&ccids_lockct) != 0) { + spin_unlock(&ccids_lock); + yield(); + spin_lock(&ccids_lock); + } +} + +static inline void ccids_write_unlock(void) +{ + spin_unlock(&ccids_lock); +} + +static inline void ccids_read_lock(void) +{ + atomic_inc(&ccids_lockct); + spin_unlock_wait(&ccids_lock); +} + +static inline void ccids_read_unlock(void) +{ + atomic_dec(&ccids_lockct); +} + +#else +#define ccids_write_lock() do { } while(0) +#define ccids_write_unlock() do { } while(0) +#define ccids_read_lock() do { } while(0) +#define ccids_read_unlock() do { } while(0) +#endif + +int ccid_register(struct ccid *ccid) +{ + int err; + + if (ccid->ccid_init == NULL) + return -1; + + ccids_write_lock(); + err = -EEXIST; + if (ccids[ccid->ccid_id] == NULL) { + ccids[ccid->ccid_id] = ccid; + err = 0; + } + ccids_write_unlock(); + if (err == 0) + pr_info("CCID: Registered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return err; +} + +EXPORT_SYMBOL_GPL(ccid_register); + +int ccid_unregister(struct ccid *ccid) +{ + ccids_write_lock(); + ccids[ccid->ccid_id] = NULL; + ccids_write_unlock(); + pr_info("CCID: Unregistered CCID %d (%s)\n", + ccid->ccid_id, ccid->ccid_name); + return 0; +} + +EXPORT_SYMBOL_GPL(ccid_unregister); + +struct ccid *ccid_init(unsigned char id, struct sock *sk) +{ + struct ccid *ccid; + +#ifdef CONFIG_KMOD + if (ccids[id] == NULL) + request_module("net-dccp-ccid-%d", id); +#endif + ccids_read_lock(); + + ccid = ccids[id]; + if (ccid == NULL) + goto out; + + if (!try_module_get(ccid->ccid_owner)) + goto out_err; + + if (ccid->ccid_init(sk) != 0) + goto out_module_put; +out: + ccids_read_unlock(); + return ccid; +out_module_put: + module_put(ccid->ccid_owner); +out_err: + ccid = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(ccid_init); + +void ccid_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid == NULL) + return; + + ccids_read_lock(); + + if (ccids[ccid->ccid_id] != NULL) { + if (ccid->ccid_exit != NULL) + ccid->ccid_exit(sk); + module_put(ccid->ccid_owner); + } + + ccids_read_unlock(); +} + +EXPORT_SYMBOL_GPL(ccid_exit); diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h new file mode 100644 index 000000000000..06105b2a613c --- /dev/null +++ b/net/dccp/ccid.h @@ -0,0 +1,156 @@ +#ifndef _CCID_H +#define _CCID_H +/* + * net/dccp/ccid.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * CCID infrastructure + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define CCID_MAX 255 + +struct ccid { + unsigned char ccid_id; + const char *ccid_name; + struct module *ccid_owner; + int (*ccid_init)(struct sock *sk); + void (*ccid_exit)(struct sock *sk); + int (*ccid_hc_rx_init)(struct sock *sk); + int (*ccid_hc_tx_init)(struct sock *sk); + void (*ccid_hc_rx_exit)(struct sock *sk); + void (*ccid_hc_tx_exit)(struct sock *sk); + void (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_rx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + void (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb); + void (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb); + int (*ccid_hc_tx_parse_options)(struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value); + int (*ccid_hc_tx_send_packet)(struct sock *sk, + struct sk_buff *skb, int len, + long *delay); + void (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len); +}; + +extern int ccid_register(struct ccid *ccid); +extern int ccid_unregister(struct ccid *ccid); + +extern struct ccid *ccid_init(unsigned char id, struct sock *sk); +extern void ccid_exit(struct ccid *ccid, struct sock *sk); + +static inline void __ccid_get(struct ccid *ccid) +{ + __module_get(ccid->ccid_owner); +} + +static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb, int len, + long *delay) +{ + int rc = 0; + if (ccid->ccid_hc_tx_send_packet != NULL) + rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay); + return rc; +} + +static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, + int more, int len) +{ + if (ccid->ccid_hc_tx_packet_sent != NULL) + ccid->ccid_hc_tx_packet_sent(sk, more, len); +} + +static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_rx_init != NULL) + rc = ccid->ccid_hc_rx_init(sk); + return rc; +} + +static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk) +{ + int rc = 0; + if (ccid->ccid_hc_tx_init != NULL) + rc = ccid->ccid_hc_tx_init(sk); + return rc; +} + +static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_rx_exit != NULL) + ccid->ccid_hc_rx_exit(sk); +} + +static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk) +{ + if (ccid->ccid_hc_tx_exit != NULL) + ccid->ccid_hc_tx_exit(sk); +} + +static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_packet_recv != NULL) + ccid->ccid_hc_rx_packet_recv(sk, skb); +} + +static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_packet_recv != NULL) + ccid->ccid_hc_tx_packet_recv(sk, skb); +} + +static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_tx_parse_options != NULL) + rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, + unsigned char option, + unsigned char len, u16 idx, + unsigned char* value) +{ + int rc = 0; + if (ccid->ccid_hc_rx_parse_options != NULL) + rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value); + return rc; +} + +static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_tx_insert_options != NULL) + ccid->ccid_hc_tx_insert_options(sk, skb); +} + +static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, + struct sk_buff *skb) +{ + if (ccid->ccid_hc_rx_insert_options != NULL) + ccid->ccid_hc_rx_insert_options(sk, skb); +} +#endif /* _CCID_H */ diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig new file mode 100644 index 000000000000..67f9c06bd179 --- /dev/null +++ b/net/dccp/ccids/Kconfig @@ -0,0 +1,25 @@ +menu "DCCP CCIDs Configuration (EXPERIMENTAL)" + depends on IP_DCCP && EXPERIMENTAL + +config IP_DCCP_CCID3 + tristate "CCID3 (TFRC) (EXPERIMENTAL)" + depends on IP_DCCP + ---help--- + CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based + rate-controlled congestion control mechanism. TFRC is designed to + be reasonably fair when competing for bandwidth with TCP-like flows, + where a flow is "reasonably fair" if its sending rate is generally + within a factor of two of the sending rate of a TCP flow under the + same conditions. However, TFRC has a much lower variation of + throughput over time compared with TCP, which makes CCID 3 more + suitable than CCID 2 for applications such streaming media where a + relatively smooth sending rate is of importance. + + CCID 3 is further described in [CCID 3 PROFILE]. The TFRC + congestion control algorithms were initially described in RFC 3448. + + This text was extracted from draft-ietf-dccp-spec-11.txt. + + If in doubt, say M. + +endmenu diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile new file mode 100644 index 000000000000..1c720131c5db --- /dev/null +++ b/net/dccp/ccids/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o + +dccp_ccid3-y := ccid3.o diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c new file mode 100644 index 000000000000..4f45902cb55e --- /dev/null +++ b/net/dccp/ccids/ccid3.c @@ -0,0 +1,2164 @@ +/* + * net/dccp/ccids/ccid3.c + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "../ccid.h" +#include "../dccp.h" +#include "ccid3.h" + +#ifdef CCID3_DEBUG +extern int ccid3_debug; + +#define ccid3_pr_debug(format, a...) \ + do { if (ccid3_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ + } while (0) +#else +#define ccid3_pr_debug(format, a...) +#endif + +#define TFRC_MIN_PACKET_SIZE 16 +#define TFRC_STD_PACKET_SIZE 256 +#define TFRC_MAX_PACKET_SIZE 65535 + +#define USEC_IN_SEC 1000000 + +#define TFRC_INITIAL_TIMEOUT (2 * USEC_IN_SEC) +/* two seconds as per CCID3 spec 11 */ + +#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_IN_SEC / (2 * HZ)) +/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */ + +#define TFRC_WIN_COUNT_PER_RTT 4 +#define TFRC_WIN_COUNT_LIMIT 16 + +#define TFRC_MAX_BACK_OFF_TIME 64 +/* above is in seconds */ + +#define TFRC_SMALLEST_P 40 + +#define TFRC_RECV_IVAL_F_LENGTH 8 /* length(w[]) */ + +/* Number of later packets received before one is considered lost */ +#define TFRC_RECV_NUM_LATE_LOSS 3 + +enum ccid3_options { + TFRC_OPT_LOSS_EVENT_RATE = 192, + TFRC_OPT_LOSS_INTERVALS = 193, + TFRC_OPT_RECEIVE_RATE = 194, +}; + +static int ccid3_debug; + +static kmem_cache_t *ccid3_tx_hist_slab; +static kmem_cache_t *ccid3_rx_hist_slab; +static kmem_cache_t *ccid3_loss_interval_hist_slab; + +static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) +{ + struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); + + if (entry != NULL) + entry->ccid3htx_sent = 0; + + return entry; +} + +static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_tx_hist_slab, entry); +} + +static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, + struct sk_buff *skb, + int prio) +{ + struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->ccid3hrx_win_count = dh->dccph_ccval; + entry->ccid3hrx_type = dh->dccph_type; + entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; + do_gettimeofday(&(entry->ccid3hrx_tstamp)); + } + + return entry; +} + +static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_rx_hist_slab, entry); +} + +static void ccid3_rx_history_delete(struct list_head *hist) +{ + struct ccid3_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { + list_del_init(&entry->ccid3hrx_node); + kmem_cache_free(ccid3_rx_hist_slab, entry); + } +} + +static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) +{ + return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio); +} + +static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); +} + +static void ccid3_loss_interval_history_delete(struct list_head *hist) +{ + struct ccid3_loss_interval_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, hist, ccid3lih_node) { + list_del_init(&entry->ccid3lih_node); + kmem_cache_free(ccid3_loss_interval_hist_slab, entry); + } +} + +static int ccid3_init(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + return 0; +} + +static void ccid3_exit(struct sock *sk) +{ + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); +} + +/* TFRC sender states */ +enum ccid3_hc_tx_states { + TFRC_SSTATE_NO_SENT = 1, + TFRC_SSTATE_NO_FBACK, + TFRC_SSTATE_FBACK, + TFRC_SSTATE_TERM, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state) +{ + static char *ccid3_state_names[] = { + [TFRC_SSTATE_NO_SENT] = "NO_SENT", + [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", + [TFRC_SSTATE_FBACK] = "FBACK", + [TFRC_SSTATE_TERM] = "TERM", + }; + + return ccid3_state_names[state]; +} +#endif + +static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state)); + WARN_ON(state == oldstate); + hctx->ccid3hctx_state = state; +} + +static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) { + + result->tv_sec = large.tv_sec-small.tv_sec; + if (large.tv_usec < small.tv_usec) { + (result->tv_sec)--; + result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec; + } else + result->tv_usec = large.tv_usec-small.tv_usec; +} + +static inline void timeval_fix(struct timeval *tv) { + if (tv->tv_usec >= USEC_IN_SEC) { + tv->tv_sec++; + tv->tv_usec -= USEC_IN_SEC; + } +} + +/* returns the difference in usecs between timeval passed in and current time */ +static inline u32 now_delta(struct timeval tv) { + struct timeval now; + + do_gettimeofday(&now); + return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); +} + +#define CALCX_ARRSIZE 500 + +#define CALCX_SPLIT 50000 +/* equivalent to 0.05 */ + +static const u32 calcx_lookup[CALCX_ARRSIZE][2] = { + { 37172 , 8172 }, + { 53499 , 11567 }, + { 66664 , 14180 }, + { 78298 , 16388 }, + { 89021 , 18339 }, + { 99147 , 20108 }, + { 108858 , 21738 }, + { 118273 , 23260 }, + { 127474 , 24693 }, + { 136520 , 26052 }, + { 145456 , 27348 }, + { 154316 , 28589 }, + { 163130 , 29783 }, + { 171919 , 30935 }, + { 180704 , 32049 }, + { 189502 , 33130 }, + { 198328 , 34180 }, + { 207194 , 35202 }, + { 216114 , 36198 }, + { 225097 , 37172 }, + { 234153 , 38123 }, + { 243294 , 39055 }, + { 252527 , 39968 }, + { 261861 , 40864 }, + { 271305 , 41743 }, + { 280866 , 42607 }, + { 290553 , 43457 }, + { 300372 , 44293 }, + { 310333 , 45117 }, + { 320441 , 45929 }, + { 330705 , 46729 }, + { 341131 , 47518 }, + { 351728 , 48297 }, + { 362501 , 49066 }, + { 373460 , 49826 }, + { 384609 , 50577 }, + { 395958 , 51320 }, + { 407513 , 52054 }, + { 419281 , 52780 }, + { 431270 , 53499 }, + { 443487 , 54211 }, + { 455940 , 54916 }, + { 468635 , 55614 }, + { 481581 , 56306 }, + { 494785 , 56991 }, + { 508254 , 57671 }, + { 521996 , 58345 }, + { 536019 , 59014 }, + { 550331 , 59677 }, + { 564939 , 60335 }, + { 579851 , 60988 }, + { 595075 , 61636 }, + { 610619 , 62279 }, + { 626491 , 62918 }, + { 642700 , 63553 }, + { 659253 , 64183 }, + { 676158 , 64809 }, + { 693424 , 65431 }, + { 711060 , 66050 }, + { 729073 , 66664 }, + { 747472 , 67275 }, + { 766266 , 67882 }, + { 785464 , 68486 }, + { 805073 , 69087 }, + { 825103 , 69684 }, + { 845562 , 70278 }, + { 866460 , 70868 }, + { 887805 , 71456 }, + { 909606 , 72041 }, + { 931873 , 72623 }, + { 954614 , 73202 }, + { 977839 , 73778 }, + { 1001557 , 74352 }, + { 1025777 , 74923 }, + { 1050508 , 75492 }, + { 1075761 , 76058 }, + { 1101544 , 76621 }, + { 1127867 , 77183 }, + { 1154739 , 77741 }, + { 1182172 , 78298 }, + { 1210173 , 78852 }, + { 1238753 , 79405 }, + { 1267922 , 79955 }, + { 1297689 , 80503 }, + { 1328066 , 81049 }, + { 1359060 , 81593 }, + { 1390684 , 82135 }, + { 1422947 , 82675 }, + { 1455859 , 83213 }, + { 1489430 , 83750 }, + { 1523671 , 84284 }, + { 1558593 , 84817 }, + { 1594205 , 85348 }, + { 1630518 , 85878 }, + { 1667543 , 86406 }, + { 1705290 , 86932 }, + { 1743770 , 87457 }, + { 1782994 , 87980 }, + { 1822973 , 88501 }, + { 1863717 , 89021 }, + { 1905237 , 89540 }, + { 1947545 , 90057 }, + { 1990650 , 90573 }, + { 2034566 , 91087 }, + { 2079301 , 91600 }, + { 2124869 , 92111 }, + { 2171279 , 92622 }, + { 2218543 , 93131 }, + { 2266673 , 93639 }, + { 2315680 , 94145 }, + { 2365575 , 94650 }, + { 2416371 , 95154 }, + { 2468077 , 95657 }, + { 2520707 , 96159 }, + { 2574271 , 96660 }, + { 2628782 , 97159 }, + { 2684250 , 97658 }, + { 2740689 , 98155 }, + { 2798110 , 98651 }, + { 2856524 , 99147 }, + { 2915944 , 99641 }, + { 2976382 , 100134 }, + { 3037850 , 100626 }, + { 3100360 , 101117 }, + { 3163924 , 101608 }, + { 3228554 , 102097 }, + { 3294263 , 102586 }, + { 3361063 , 103073 }, + { 3428966 , 103560 }, + { 3497984 , 104045 }, + { 3568131 , 104530 }, + { 3639419 , 105014 }, + { 3711860 , 105498 }, + { 3785467 , 105980 }, + { 3860253 , 106462 }, + { 3936229 , 106942 }, + { 4013410 , 107422 }, + { 4091808 , 107902 }, + { 4171435 , 108380 }, + { 4252306 , 108858 }, + { 4334431 , 109335 }, + { 4417825 , 109811 }, + { 4502501 , 110287 }, + { 4588472 , 110762 }, + { 4675750 , 111236 }, + { 4764349 , 111709 }, + { 4854283 , 112182 }, + { 4945564 , 112654 }, + { 5038206 , 113126 }, + { 5132223 , 113597 }, + { 5227627 , 114067 }, + { 5324432 , 114537 }, + { 5422652 , 115006 }, + { 5522299 , 115474 }, + { 5623389 , 115942 }, + { 5725934 , 116409 }, + { 5829948 , 116876 }, + { 5935446 , 117342 }, + { 6042439 , 117808 }, + { 6150943 , 118273 }, + { 6260972 , 118738 }, + { 6372538 , 119202 }, + { 6485657 , 119665 }, + { 6600342 , 120128 }, + { 6716607 , 120591 }, + { 6834467 , 121053 }, + { 6953935 , 121514 }, + { 7075025 , 121976 }, + { 7197752 , 122436 }, + { 7322131 , 122896 }, + { 7448175 , 123356 }, + { 7575898 , 123815 }, + { 7705316 , 124274 }, + { 7836442 , 124733 }, + { 7969291 , 125191 }, + { 8103877 , 125648 }, + { 8240216 , 126105 }, + { 8378321 , 126562 }, + { 8518208 , 127018 }, + { 8659890 , 127474 }, + { 8803384 , 127930 }, + { 8948702 , 128385 }, + { 9095861 , 128840 }, + { 9244875 , 129294 }, + { 9395760 , 129748 }, + { 9548529 , 130202 }, + { 9703198 , 130655 }, + { 9859782 , 131108 }, + { 10018296 , 131561 }, + { 10178755 , 132014 }, + { 10341174 , 132466 }, + { 10505569 , 132917 }, + { 10671954 , 133369 }, + { 10840345 , 133820 }, + { 11010757 , 134271 }, + { 11183206 , 134721 }, + { 11357706 , 135171 }, + { 11534274 , 135621 }, + { 11712924 , 136071 }, + { 11893673 , 136520 }, + { 12076536 , 136969 }, + { 12261527 , 137418 }, + { 12448664 , 137867 }, + { 12637961 , 138315 }, + { 12829435 , 138763 }, + { 13023101 , 139211 }, + { 13218974 , 139658 }, + { 13417071 , 140106 }, + { 13617407 , 140553 }, + { 13819999 , 140999 }, + { 14024862 , 141446 }, + { 14232012 , 141892 }, + { 14441465 , 142339 }, + { 14653238 , 142785 }, + { 14867346 , 143230 }, + { 15083805 , 143676 }, + { 15302632 , 144121 }, + { 15523842 , 144566 }, + { 15747453 , 145011 }, + { 15973479 , 145456 }, + { 16201939 , 145900 }, + { 16432847 , 146345 }, + { 16666221 , 146789 }, + { 16902076 , 147233 }, + { 17140429 , 147677 }, + { 17381297 , 148121 }, + { 17624696 , 148564 }, + { 17870643 , 149007 }, + { 18119154 , 149451 }, + { 18370247 , 149894 }, + { 18623936 , 150336 }, + { 18880241 , 150779 }, + { 19139176 , 151222 }, + { 19400759 , 151664 }, + { 19665007 , 152107 }, + { 19931936 , 152549 }, + { 20201564 , 152991 }, + { 20473907 , 153433 }, + { 20748982 , 153875 }, + { 21026807 , 154316 }, + { 21307399 , 154758 }, + { 21590773 , 155199 }, + { 21876949 , 155641 }, + { 22165941 , 156082 }, + { 22457769 , 156523 }, + { 22752449 , 156964 }, + { 23049999 , 157405 }, + { 23350435 , 157846 }, + { 23653774 , 158287 }, + { 23960036 , 158727 }, + { 24269236 , 159168 }, + { 24581392 , 159608 }, + { 24896521 , 160049 }, + { 25214642 , 160489 }, + { 25535772 , 160929 }, + { 25859927 , 161370 }, + { 26187127 , 161810 }, + { 26517388 , 162250 }, + { 26850728 , 162690 }, + { 27187165 , 163130 }, + { 27526716 , 163569 }, + { 27869400 , 164009 }, + { 28215234 , 164449 }, + { 28564236 , 164889 }, + { 28916423 , 165328 }, + { 29271815 , 165768 }, + { 29630428 , 166208 }, + { 29992281 , 166647 }, + { 30357392 , 167087 }, + { 30725779 , 167526 }, + { 31097459 , 167965 }, + { 31472452 , 168405 }, + { 31850774 , 168844 }, + { 32232445 , 169283 }, + { 32617482 , 169723 }, + { 33005904 , 170162 }, + { 33397730 , 170601 }, + { 33792976 , 171041 }, + { 34191663 , 171480 }, + { 34593807 , 171919 }, + { 34999428 , 172358 }, + { 35408544 , 172797 }, + { 35821174 , 173237 }, + { 36237335 , 173676 }, + { 36657047 , 174115 }, + { 37080329 , 174554 }, + { 37507197 , 174993 }, + { 37937673 , 175433 }, + { 38371773 , 175872 }, + { 38809517 , 176311 }, + { 39250924 , 176750 }, + { 39696012 , 177190 }, + { 40144800 , 177629 }, + { 40597308 , 178068 }, + { 41053553 , 178507 }, + { 41513554 , 178947 }, + { 41977332 , 179386 }, + { 42444904 , 179825 }, + { 42916290 , 180265 }, + { 43391509 , 180704 }, + { 43870579 , 181144 }, + { 44353520 , 181583 }, + { 44840352 , 182023 }, + { 45331092 , 182462 }, + { 45825761 , 182902 }, + { 46324378 , 183342 }, + { 46826961 , 183781 }, + { 47333531 , 184221 }, + { 47844106 , 184661 }, + { 48358706 , 185101 }, + { 48877350 , 185541 }, + { 49400058 , 185981 }, + { 49926849 , 186421 }, + { 50457743 , 186861 }, + { 50992759 , 187301 }, + { 51531916 , 187741 }, + { 52075235 , 188181 }, + { 52622735 , 188622 }, + { 53174435 , 189062 }, + { 53730355 , 189502 }, + { 54290515 , 189943 }, + { 54854935 , 190383 }, + { 55423634 , 190824 }, + { 55996633 , 191265 }, + { 56573950 , 191706 }, + { 57155606 , 192146 }, + { 57741621 , 192587 }, + { 58332014 , 193028 }, + { 58926806 , 193470 }, + { 59526017 , 193911 }, + { 60129666 , 194352 }, + { 60737774 , 194793 }, + { 61350361 , 195235 }, + { 61967446 , 195677 }, + { 62589050 , 196118 }, + { 63215194 , 196560 }, + { 63845897 , 197002 }, + { 64481179 , 197444 }, + { 65121061 , 197886 }, + { 65765563 , 198328 }, + { 66414705 , 198770 }, + { 67068508 , 199213 }, + { 67726992 , 199655 }, + { 68390177 , 200098 }, + { 69058085 , 200540 }, + { 69730735 , 200983 }, + { 70408147 , 201426 }, + { 71090343 , 201869 }, + { 71777343 , 202312 }, + { 72469168 , 202755 }, + { 73165837 , 203199 }, + { 73867373 , 203642 }, + { 74573795 , 204086 }, + { 75285124 , 204529 }, + { 76001380 , 204973 }, + { 76722586 , 205417 }, + { 77448761 , 205861 }, + { 78179926 , 206306 }, + { 78916102 , 206750 }, + { 79657310 , 207194 }, + { 80403571 , 207639 }, + { 81154906 , 208084 }, + { 81911335 , 208529 }, + { 82672880 , 208974 }, + { 83439562 , 209419 }, + { 84211402 , 209864 }, + { 84988421 , 210309 }, + { 85770640 , 210755 }, + { 86558080 , 211201 }, + { 87350762 , 211647 }, + { 88148708 , 212093 }, + { 88951938 , 212539 }, + { 89760475 , 212985 }, + { 90574339 , 213432 }, + { 91393551 , 213878 }, + { 92218133 , 214325 }, + { 93048107 , 214772 }, + { 93883493 , 215219 }, + { 94724314 , 215666 }, + { 95570590 , 216114 }, + { 96422343 , 216561 }, + { 97279594 , 217009 }, + { 98142366 , 217457 }, + { 99010679 , 217905 }, + { 99884556 , 218353 }, + { 100764018 , 218801 }, + { 101649086 , 219250 }, + { 102539782 , 219698 }, + { 103436128 , 220147 }, + { 104338146 , 220596 }, + { 105245857 , 221046 }, + { 106159284 , 221495 }, + { 107078448 , 221945 }, + { 108003370 , 222394 }, + { 108934074 , 222844 }, + { 109870580 , 223294 }, + { 110812910 , 223745 }, + { 111761087 , 224195 }, + { 112715133 , 224646 }, + { 113675069 , 225097 }, + { 114640918 , 225548 }, + { 115612702 , 225999 }, + { 116590442 , 226450 }, + { 117574162 , 226902 }, + { 118563882 , 227353 }, + { 119559626 , 227805 }, + { 120561415 , 228258 }, + { 121569272 , 228710 }, + { 122583219 , 229162 }, + { 123603278 , 229615 }, + { 124629471 , 230068 }, + { 125661822 , 230521 }, + { 126700352 , 230974 }, + { 127745083 , 231428 }, + { 128796039 , 231882 }, + { 129853241 , 232336 }, + { 130916713 , 232790 }, + { 131986475 , 233244 }, + { 133062553 , 233699 }, + { 134144966 , 234153 }, + { 135233739 , 234608 }, + { 136328894 , 235064 }, + { 137430453 , 235519 }, + { 138538440 , 235975 }, + { 139652876 , 236430 }, + { 140773786 , 236886 }, + { 141901190 , 237343 }, + { 143035113 , 237799 }, + { 144175576 , 238256 }, + { 145322604 , 238713 }, + { 146476218 , 239170 }, + { 147636442 , 239627 }, + { 148803298 , 240085 }, + { 149976809 , 240542 }, + { 151156999 , 241000 }, + { 152343890 , 241459 }, + { 153537506 , 241917 }, + { 154737869 , 242376 }, + { 155945002 , 242835 }, + { 157158929 , 243294 }, + { 158379673 , 243753 }, + { 159607257 , 244213 }, + { 160841704 , 244673 }, + { 162083037 , 245133 }, + { 163331279 , 245593 }, + { 164586455 , 246054 }, + { 165848586 , 246514 }, + { 167117696 , 246975 }, + { 168393810 , 247437 }, + { 169676949 , 247898 }, + { 170967138 , 248360 }, + { 172264399 , 248822 }, + { 173568757 , 249284 }, + { 174880235 , 249747 }, + { 176198856 , 250209 }, + { 177524643 , 250672 }, + { 178857621 , 251136 }, + { 180197813 , 251599 }, + { 181545242 , 252063 }, + { 182899933 , 252527 }, + { 184261908 , 252991 }, + { 185631191 , 253456 }, + { 187007807 , 253920 }, + { 188391778 , 254385 }, + { 189783129 , 254851 }, + { 191181884 , 255316 }, + { 192588065 , 255782 }, + { 194001698 , 256248 }, + { 195422805 , 256714 }, + { 196851411 , 257181 }, + { 198287540 , 257648 }, + { 199731215 , 258115 }, + { 201182461 , 258582 }, + { 202641302 , 259050 }, + { 204107760 , 259518 }, + { 205581862 , 259986 }, + { 207063630 , 260454 }, + { 208553088 , 260923 }, + { 210050262 , 261392 }, + { 211555174 , 261861 }, + { 213067849 , 262331 }, + { 214588312 , 262800 }, + { 216116586 , 263270 }, + { 217652696 , 263741 }, + { 219196666 , 264211 }, + { 220748520 , 264682 }, + { 222308282 , 265153 }, + { 223875978 , 265625 }, + { 225451630 , 266097 }, + { 227035265 , 266569 }, + { 228626905 , 267041 }, + { 230226576 , 267514 }, + { 231834302 , 267986 }, + { 233450107 , 268460 }, + { 235074016 , 268933 }, + { 236706054 , 269407 }, + { 238346244 , 269881 }, + { 239994613 , 270355 }, + { 241651183 , 270830 }, + { 243315981 , 271305 } +}; + +/* Calculate the send rate as per section 3.1 of RFC3448 + +Returns send rate in bytes per second + +Integer maths and lookups are used as not allowed floating point in kernel + +The function for Xcalc as per section 3.1 of RFC3448 is: + +X = s + ------------------------------------------------------------- + R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2))) + +where +X is the trasmit rate in bytes/second +s is the packet size in bytes +R is the round trip time in seconds +p is the loss event rate, between 0 and 1.0, of the number of loss events + as a fraction of the number of packets transmitted +t_RTO is the TCP retransmission timeout value in seconds +b is the number of packets acknowledged by a single TCP acknowledgement + +we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes: + +X = s + ----------------------------------------------------------------------- + R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2))) + + +which we can break down into: + +X = s + -------- + R * f(p) + +where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p)) + +Function parameters: +s - bytes +R - RTT in usecs +p - loss rate (decimal fraction multiplied by 1,000,000) + +Returns Xcalc in bytes per second + +DON'T alter this code unless you run test cases against it as the code +has been manipulated to stop underflow/overlow. + +*/ +static u32 ccid3_calc_x(u16 s, u32 R, u32 p) +{ + int index; + u32 f; + u64 tmp1, tmp2; + + if (p < CALCX_SPLIT) + index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1; + else + index = (p / (1000000 / CALCX_ARRSIZE)) - 1; + + if (index < 0) + /* p should be 0 unless there is a bug in my code */ + index = 0; + + if (R == 0) + R = 1; /* RTT can't be zero or else divide by zero */ + + BUG_ON(index >= CALCX_ARRSIZE); + + if (p >= CALCX_SPLIT) + f = calcx_lookup[index][0]; + else + f = calcx_lookup[index][1]; + + tmp1 = ((u64)s * 100000000); + tmp2 = ((u64)R * (u64)f); + do_div(tmp2,10000); + do_div(tmp1,tmp2); + /* don't alter above math unless you test due to overflow on 32 bit */ + + return (u32)tmp1; +} + +/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */ +static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx) +{ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) + return; + /* if no feedback spec says t_ipi is 1 second (set elsewhere and then + * doubles after every no feedback timer (separate function) */ + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10); + /* reason for above maths with 10 in there is to avoid 32 bit + * overflow for jumbo packets */ + +} + +/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ +static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx) +{ + hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); + +} + +/* + * Update X by + * If (p > 0) + * x_calc = calcX(s, R, p); + * X = max(min(X_calc, 2 * X_recv), s / t_mbi); + * Else + * If (now - tld >= R) + * X = max(min(2 * X, 2 * X_recv), s / R); + * tld = now; + */ +static void ccid3_hc_tx_update_x(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) { /* to avoid large error in calcX */ + hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s, + hctx->ccid3hctx_rtt, + hctx->ccid3hctx_p); + hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv), + hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME); + } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { + u32 rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) { + rtt = 10; + } /* avoid divide by zero below */ + + hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x), + (hctx->ccid3hctx_s * 100000) / (rtt / 10)); + /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */ + do_gettimeofday(&hctx->ccid3hctx_t_ld); + } + + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 1; + } +} + +static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct dccp_sock *dp = dccp_sk(sk); + unsigned long next_tmout = 0; + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + u32 rtt; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + /* XXX: set some sensible MIB */ + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5); + goto out; + } + + ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_TERM: + goto out; + case TFRC_SSTATE_NO_FBACK: + /* Halve send rate */ + hctx->ccid3hctx_x /= 2; + if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME)) + hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME; + + ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n", + dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state), + hctx->ccid3hctx_x); + next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) + / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT); + /* do above maths with 100000 and 10 to prevent overflow on 32 bit */ + /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11 + * should adjust tx_t_ipi and double that to achieve it really */ + break; + case TFRC_SSTATE_FBACK: + /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */ + rtt = hctx->ccid3hctx_rtt; + if (rtt < 10) + rtt = 10; + /* stop divide by zero below */ + if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= + 4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) { + ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk, + ccid3_tx_state_name(hctx->ccid3hctx_state)); + /* Halve sending rate */ + + /* If (X_calc > 2 * X_recv) + * X_recv = max(X_recv / 2, s / (2 * t_mbi)); + * Else + * X_recv = X_calc / 4; + */ + BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0); + + /* check also if p is zero -> x_calc is infinity? */ + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P || + hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv) + hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2, + hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME)); + else + hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4; + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + } + if (hctx->ccid3hctx_x == 0) { + ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n"); + hctx->ccid3hctx_x = 10; + } + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10)); + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + goto out; + } + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout))); + hctx->ccid3hctx_idle = 1; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, + int len, long *delay) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *new_packet = NULL; + struct timeval now; + int rc = -ENOTCONN; + +// ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len); + /* + * check if pure ACK or Terminating */ + /* XXX: We only call this function for DATA and DATAACK, on, these packets can have + * zero length, but why the comment about "pure ACK"? + */ + if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM) + goto out; + + /* See if last packet allocated was not sent */ + if (!list_empty(&hctx->ccid3hctx_hist)) + new_packet = list_entry(hctx->ccid3hctx_hist.next, + struct ccid3_tx_hist_entry, ccid3htx_node); + + if (new_packet == NULL || new_packet->ccid3htx_sent) { + new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + + rc = -ENOBUFS; + if (new_packet == NULL) { + ccid3_pr_debug("%s, sk=%p, not enough mem to add " + "to history, send refused\n", dccp_role(sk), sk); + goto out; + } + + list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + } + + do_gettimeofday(&now); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk, + dp->dccps_gss); + + hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer; + hctx->ccid3hctx_no_feedback_timer.data = (unsigned long)sk; + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)); + hctx->ccid3hctx_last_win_count = 0; + hctx->ccid3hctx_t_last_win_count = now; + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); + hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT; + + /* Set nominal send time for initial packet */ + hctx->ccid3hctx_t_nom = now; + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + rc = 0; + break; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta); + ccid3_pr_debug("send_packet delay=%ld\n",*delay); + *delay /= -1000; + /* divide by -1000 is to convert to ms and get sign right */ + rc = *delay > 0 ? -EAGAIN : 0; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + rc = -EINVAL; + break; + } + + /* Can we send? if so add options and add to packet history */ + if (rc == 0) + new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +out: + return rc; +} + +static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *packet = NULL; + struct timeval now; + +// ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); + BUG_ON(hctx == NULL); + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n", + dccp_role(sk), sk); + return; + } + + do_gettimeofday(&now); + + /* check if we have sent a data packet */ + if (len > 0) { + unsigned long quarter_rtt; + + if (list_empty(&hctx->ccid3hctx_hist)) { + printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); + return; + } + packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); + if (packet->ccid3htx_sent) { + printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); + return; + } + packet->ccid3htx_tstamp = now; + packet->ccid3htx_seqno = dp->dccps_gss; + // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); + + /* + * Check if win_count have changed */ + /* COMPLIANCE_BEGIN + * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt + */ + quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4); + if (quarter_rtt > 0) { + hctx->ccid3hctx_t_last_win_count = now; + hctx->ccid3hctx_last_win_count = (hctx->ccid3hctx_last_win_count + + min_t(unsigned long, quarter_rtt, 5)) % 16; + ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", + dccp_role(sk), sk, + packet->ccid3htx_win_count, + hctx->ccid3hctx_last_win_count); + } + /* COMPLIANCE_END */ +#if 0 + ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", + dccp_role(sk), sk, + packet->ccid3htx_seqno, + packet->ccid3htx_win_count); +#endif + hctx->ccid3hctx_idle = 0; + packet->ccid3htx_sent = 1; + } else + ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", + dccp_role(sk), sk, dp->dccps_gss); + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* if first wasn't pure ack */ + if (len != 0) + printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n", + __FUNCTION__, dccp_role(sk)); + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + if (len > 0) { + hctx->ccid3hctx_t_nom = now; + ccid3_calc_new_t_ipi(hctx); + ccid3_calc_new_delta(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + struct ccid3_tx_hist_entry *entry, *next, *packet; + unsigned long next_tmout; + u16 t_elapsed; + u32 pinv; + u32 x_recv; + u32 r_sample; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hctx == NULL) + return; + + if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) { + ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk); + return; + } + + /* we are only interested in ACKs */ + if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || + DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) + return; + + opt_recv = &hctx->ccid3hctx_options_received; + + t_elapsed = dp->dccps_options_received.dccpor_elapsed_time; + x_recv = opt_recv->ccid3or_receive_rate; + pinv = opt_recv->ccid3or_loss_event_rate; + + switch (hctx->ccid3hctx_state) { + case TFRC_SSTATE_NO_SENT: + /* FIXME: what to do here? */ + return; + case TFRC_SSTATE_NO_FBACK: + case TFRC_SSTATE_FBACK: + /* Calculate new round trip sample by + * R_sample = (now - t_recvdata) - t_delay */ + /* get t_recvdata from history */ + packet = NULL; + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) + if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { + packet = entry; + break; + } + + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", + dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + /* Update RTT */ + r_sample = now_delta(packet->ccid3htx_tstamp); + /* FIXME: */ + // r_sample -= usecs_to_jiffies(t_elapsed * 10); + + /* Update RTT estimate by + * If (No feedback recv) + * R = R_sample; + * Else + * R = q * R + (1 - q) * R_sample; + * + * q is a constant, RFC 3448 recomments 0.9 + */ + if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) { + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK); + hctx->ccid3hctx_rtt = r_sample; + } else + hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10; + + /* + * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent + * implemention of the new window count. + */ + if (hctx->ccid3hctx_rtt < 4) + hctx->ccid3hctx_rtt = 4; + + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n", + dccp_role(sk), sk, + hctx->ccid3hctx_rtt, + r_sample); + + /* Update timeout interval */ + inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC); + + /* Update receive rate */ + hctx->ccid3hctx_x_recv = x_recv; /* x_recv in bytes per second */ + + /* Update loss event rate */ + if (pinv == ~0 || pinv == 0) + hctx->ccid3hctx_p = 0; + else { + hctx->ccid3hctx_p = 1000000 / pinv; + + if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) { + hctx->ccid3hctx_p = TFRC_SMALLEST_P; + ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk); + } + } + + /* unschedule no feedback timer */ + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Update sending rate */ + ccid3_hc_tx_update_x(sk); + + /* Update next send time */ + if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) { + (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC; + (hctx->ccid3hctx_t_nom).tv_sec--; + } + /* FIXME - if no feedback then t_ipi can go > 1 second */ + (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi; + ccid3_calc_new_t_ipi(hctx); + (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi; + timeval_fix(&(hctx->ccid3hctx_t_nom)); + ccid3_calc_new_delta(hctx); + + /* remove all packets older than the one acked from history */ +#if 0 + FIXME! + list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } +#endif + if (hctx->ccid3hctx_x < 10) { + ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); + hctx->ccid3hctx_x = 10; + } + /* to prevent divide by zero below */ + + /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ + next_tmout = max(inet_csk(sk)->icsk_rto, + 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + /* maths with 100000 and 10 is to prevent overflow with 32 bit */ + + ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", + dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); + + sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, + jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout))); + + /* set idle flag */ + hctx->ccid3hctx_idle = 1; + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state); + dump_stack(); + break; + } +} + +static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + + if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; +} + +static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, + unsigned char len, u16 idx, unsigned char *value) +{ + int rc = 0; + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_options_received *opt_recv; + + if (hctx == NULL) + return 0; + + opt_recv = &hctx->ccid3hctx_options_received; + + if (opt_recv->ccid3or_seqno != dp->dccps_gsr) { + opt_recv->ccid3or_seqno = dp->dccps_gsr; + opt_recv->ccid3or_loss_event_rate = ~0; + opt_recv->ccid3or_loss_intervals_idx = 0; + opt_recv->ccid3or_loss_intervals_len = 0; + opt_recv->ccid3or_receive_rate = 0; + } + + switch (option) { + case TFRC_OPT_LOSS_EVENT_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_event_rate); + } + break; + case TFRC_OPT_LOSS_INTERVALS: + opt_recv->ccid3or_loss_intervals_idx = idx; + opt_recv->ccid3or_loss_intervals_len = len; + ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n", + dccp_role(sk), sk, + opt_recv->ccid3or_loss_intervals_idx, + opt_recv->ccid3or_loss_intervals_len); + break; + case TFRC_OPT_RECEIVE_RATE: + if (len != 4) { + ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n", + dccp_role(sk), sk); + rc = -EINVAL; + } else { + opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value); + ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n", + dccp_role(sk), sk, + opt_recv->ccid3or_receive_rate); + } + break; + } + + return rc; +} + +static int ccid3_hc_tx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any()); + if (hctx == NULL) + return -ENOMEM; + + memset(hctx, 0, sizeof(*hctx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + else + hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; + + hctx->ccid3hctx_x = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */ + hctx->ccid3hctx_rtt = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */ + inet_csk(sk)->icsk_rto = USEC_IN_SEC; + hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT; + INIT_LIST_HEAD(&hctx->ccid3hctx_hist); + init_timer(&hctx->ccid3hctx_no_feedback_timer); + + return 0; +} + +static void ccid3_hc_tx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; + struct ccid3_tx_hist_entry *entry, *next; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + BUG_ON(hctx == NULL); + + ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM); + sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); + + /* Empty packet history */ + list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { + list_del_init(&entry->ccid3htx_node); + ccid3_tx_hist_entry_delete(entry); + } + + kfree(dp->dccps_hc_tx_ccid_private); + dp->dccps_hc_tx_ccid_private = NULL; +} + +/* + * RX Half Connection methods + */ + +/* TFRC receiver states */ +enum ccid3_hc_rx_states { + TFRC_RSTATE_NO_DATA = 1, + TFRC_RSTATE_DATA, + TFRC_RSTATE_TERM = 127, +}; + +#ifdef CCID3_DEBUG +static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) +{ + static char *ccid3_rx_state_names[] = { + [TFRC_RSTATE_NO_DATA] = "NO_DATA", + [TFRC_RSTATE_DATA] = "DATA", + [TFRC_RSTATE_TERM] = "TERM", + }; + + return ccid3_rx_state_names[state]; +} +#endif + +static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state; + + ccid3_pr_debug("%s(%p) %-8.8s -> %s\n", + dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state)); + WARN_ON(state == oldstate); + hcrx->ccid3hcrx_state = state; +} + +static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next; + u8 num_later = 0; + + if (list_empty(&hcrx->ccid3hcrx_hist)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + u64 seqno = packet->ccid3hrx_seqno; + struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, + struct ccid3_rx_hist_entry, + ccid3hrx_node); + if (after48(seqno, iter->ccid3hrx_seqno)) + list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + else { + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later = 1; + + list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (after48(seqno, iter->ccid3hrx_seqno)) { + list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + goto trim_history; + } + + if (iter->ccid3hrx_type == DCCP_PKT_DATA || + iter->ccid3hrx_type == DCCP_PKT_DATAACK) + num_later++; + + if (num_later == TFRC_RECV_NUM_LATE_LOSS) { + ccid3_rx_hist_entry_delete(packet); + ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", + dccp_role(sk), sk, seqno); + return 1; + } + } + + if (num_later < TFRC_RECV_NUM_LATE_LOSS) + list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + /* FIXME: else what? should we destroy the packet like above? */ + } + } + +trim_history: + /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */ + num_later = TFRC_RECV_NUM_LATE_LOSS + 1; + + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } else { + int step = 0; + u8 win_count = 0; /* Not needed, but lets shut up gcc */ + int tmp; + /* + * We have no loss interval history so we need at least one + * rtt:s of data packets to approximate rtt. + */ + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + switch (step) { + case 0: + step = 1; + /* OK, find next data packet */ + num_later = 1; + break; + case 1: + step = 2; + /* OK, find next data packet */ + num_later = 1; + win_count = entry->ccid3hrx_win_count; + break; + case 2: + tmp = win_count - entry->ccid3hrx_win_count; + if (tmp < 0) + tmp += TFRC_WIN_COUNT_LIMIT; + if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { + /* we have found a packet older than one rtt + * remove the rest */ + step = 3; + } else /* OK, find next data packet */ + num_later = 1; + break; + case 3: + list_del_init(&entry->ccid3hrx_node); + ccid3_rx_hist_entry_delete(entry); + break; + } + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + } + + return 0; +} + +static void ccid3_hc_rx_send_feedback(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *packet; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + hcrx->ccid3hcrx_x_recv = 0; + break; + case TFRC_RSTATE_DATA: { + u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + + if (delta == 0) + delta = 1; /* to prevent divide by zero */ + hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + packet = NULL; + list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + if (packet == NULL) { + printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", + __FUNCTION__, dccp_role(sk), sk); + dump_stack(); + return; + } + + do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); + hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_bytes_recv = 0; + + /* Convert to multiples of 10us */ + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + if (hcrx->ccid3hcrx_p == 0) + hcrx->ccid3hcrx_pinv = ~0; + else + hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dccp_send_ack(sk); +} + +static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) + return; + + if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb)) + dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) { + const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv); + const u32 pinv = htonl(hcrx->ccid3hcrx_pinv); + + dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv)); + dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv)); + } + + DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter; +} + +/* Weights used to calculate loss event rate */ +/* + * These are integers as per section 8 of RFC3448. We can then divide by 4 * + * when we use it. + */ +const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, }; + +/* + * args: fvalue - function value to match + * returns: p closest to that value + * + * both fvalue and p are multiplied by 1,000,000 to use ints + */ +u32 calcx_reverse_lookup(u32 fvalue) { + int ctr = 0; + int small; + + if (fvalue < calcx_lookup[0][1]) + return 0; + if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1]) + small = 1; + else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0]) + return 1000000; + else + small = 0; + while (fvalue > calcx_lookup[ctr][small]) + ctr++; + if (small) + return (CALCX_SPLIT * ctr / CALCX_ARRSIZE); + else + return (1000000 * ctr / CALCX_ARRSIZE) ; +} + +/* calculate first loss interval + * + * returns estimated loss interval in usecs */ + +static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + u32 rtt, delta, x_recv, fval, p, tmp2; + struct timeval tstamp, tmp_tv; + int interval = 0; + int win_count = 0; + int step = 0; + u64 tmp1; + + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + tail = entry; + + switch (step) { + case 0: + tstamp = entry->ccid3hrx_tstamp; + win_count = entry->ccid3hrx_win_count; + step = 1; + break; + case 1: + interval = win_count - entry->ccid3hrx_win_count; + if (interval < 0) + interval += TFRC_WIN_COUNT_LIMIT; + if (interval > 4) + goto found; + break; + } + } + } + + if (step == 0) { + printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n", + __FUNCTION__, dccp_role(sk), sk); + return ~0; + } + + if (interval == 0) { + ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n", + dccp_role(sk), sk); + interval = 1; + } +found: + timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; + ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", + dccp_role(sk), sk, rtt); + if (rtt == 0) + rtt = 1; + + delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback); + if (delta == 0) + delta = 1; + + x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta; + + tmp1 = (u64)x_recv * (u64)rtt; + do_div(tmp1,10000000); + tmp2 = (u32)tmp1; + fval = (hcrx->ccid3hcrx_s * 100000) / tmp2; + /* do not alter order above or you will get overflow on 32 bit */ + p = calcx_reverse_lookup(fval); + ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\ + dccp_role(sk), sk, x_recv, p); + + if (p == 0) + return ~0; + else + return 1000000 / p; +} + +static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry; + + if (seq_loss != DCCP_MAX_SEQNO + 1) { + ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n", + dccp_role(sk), sk, seq_loss, win_loss); + + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + struct ccid3_loss_interval_hist_entry *li_tail = NULL; + int i; + + ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk); + for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) { + li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC); + if (li_entry == NULL) { + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n", + dccp_role(sk), sk); + return; + } + if (li_tail == NULL) + li_tail = li_entry; + list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist); + } + + li_entry->ccid3lih_seqno = seq_loss; + li_entry->ccid3lih_win_count = win_loss; + + li_tail->ccid3lih_interval = ccid3_hc_rx_calc_first_li(sk); + } + } + /* FIXME: find end of interval */ +} + +static void ccid3_hc_rx_detect_loss(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; + struct ccid3_rx_hist_entry *a_loss = NULL; + struct ccid3_rx_hist_entry *b_loss = NULL; + u64 seq_loss = DCCP_MAX_SEQNO + 1; + u8 win_loss = 0; + u8 num_later = TFRC_RECV_NUM_LATE_LOSS; + + list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + b_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } + + if (b_loss == NULL) + goto out_update_li; + + a_next = b_next; + num_later = 1; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + if (num_later == 0) { + a_loss = entry; + break; + } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || + entry->ccid3hrx_type == DCCP_PKT_DATAACK) + --num_later; + } +#endif + + if (a_loss == NULL) { + if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { + /* no loss event have occured yet */ + ccid3_pr_debug("%s, sk=%p, TODO: find a lost data " + "packet by comparing to initial seqno\n", + dccp_role(sk), sk); + goto out_update_li; + } else { + pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history", + __FUNCTION__, dccp_role(sk), sk); + return; + } + } + + /* Locate a lost data packet */ + entry = packet = b_loss; +#if 0 + FIXME MERGE GIT! + list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + + if (delta != 0) { + if (packet->ccid3hrx_type == DCCP_PKT_DATA || + packet->ccid3hrx_type == DCCP_PKT_DATAACK) + --delta; + /* + * FIXME: check this, probably this % usage is because + * in earlier drafts the ndp count was just 8 bits + * long, but now it cam be up to 24 bits long. + */ +#if 0 + if (delta % DCCP_NDP_LIMIT != + (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) +#endif + if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { + seq_loss = entry->ccid3hrx_seqno; + dccp_inc_seqno(&seq_loss); + } + } + packet = entry; + if (packet == a_loss) + break; + } +#endif + + if (seq_loss != DCCP_MAX_SEQNO + 1) + win_loss = a_loss->ccid3hrx_win_count; + +out_update_li: + ccid3_hc_rx_update_li(sk, seq_loss, win_loss); +} + +static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_loss_interval_hist_entry *li_entry, *li_next; + int i = 0; + u32 i_tot; + u32 i_tot0 = 0; + u32 i_tot1 = 0; + u32 w_tot = 0; + + list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) { + if (i < TFRC_RECV_IVAL_F_LENGTH) { + i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i]; + w_tot += ccid3_hc_rx_w[i]; + } + + if (i != 0) + i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1]; + + if (++i > TFRC_RECV_IVAL_F_LENGTH) + break; + } + + if (i != TFRC_RECV_IVAL_F_LENGTH) { + pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n", + __FUNCTION__, dccp_role(sk), sk); + return 0; + } + + i_tot = max(i_tot0, i_tot1); + + /* FIXME: Why do we do this? -Ian McDonald */ + if (i_tot * 4 < w_tot) + i_tot = w_tot * 4; + + return i_tot * 4 / w_tot; +} + +static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + struct ccid3_rx_hist_entry *packet; + struct timeval now; + u8 win_count; + u32 p_prev; + int ins; +#if 0 + ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), + skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); +#endif + if (hcrx == NULL) + return; + + BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA || + hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA)); + + switch (DCCP_SKB_CB(skb)->dccpd_type) { + case DCCP_PKT_ACK: + if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) + return; + case DCCP_PKT_DATAACK: + if (dp->dccps_options_received.dccpor_timestamp_echo == 0) + break; + p_prev = hcrx->ccid3hcrx_rtt; + do_gettimeofday(&now); + /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo - + usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10); + FIXME - I think above code is broken - have to look at options more, will also need + to fix pr_debug below */ + if (p_prev != hcrx->ccid3hcrx_rtt) + ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n", + dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, + dp->dccps_options_received.dccpor_timestamp_echo, + dp->dccps_options_received.dccpor_elapsed_time); + break; + case DCCP_PKT_DATA: + break; + default: + ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n", + dccp_role(sk), sk, + dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type)); + return; + } + + packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + if (packet == NULL) { + ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", + dccp_role(sk), sk); + return; + } + + win_count = packet->ccid3hrx_win_count; + + ins = ccid3_hc_rx_add_hist(sk, packet); + + if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) + return; + + switch (hcrx->ccid3hcrx_state) { + case TFRC_RSTATE_NO_DATA: + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + ccid3_hc_rx_send_feedback(sk); + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); + return; + case TFRC_RSTATE_DATA: + hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4; + if (ins == 0) { + do_gettimeofday(&now); + if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) { + hcrx->ccid3hcrx_tstamp_last_ack = now; + ccid3_hc_rx_send_feedback(sk); + } + return; + } + break; + default: + printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n", + __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state); + dump_stack(); + return; + } + + /* Dealing with packet loss */ + ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n", + dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb); + + ccid3_hc_rx_detect_loss(sk); + p_prev = hcrx->ccid3hcrx_p; + + /* Calculate loss event rate */ + if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) + /* Scaling up by 1000000 as fixed decimal */ + hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk); + + if (hcrx->ccid3hcrx_p > p_prev) { + ccid3_hc_rx_send_feedback(sk); + return; + } +} + +static int ccid3_hc_rx_init(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any()); + if (hcrx == NULL) + return -ENOMEM; + + memset(hcrx, 0, sizeof(*hcrx)); + + if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + else + hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; + + hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; + INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); + INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist); + + return 0; +} + +static void ccid3_hc_rx_exit(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; + + ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); + + if (hcrx == NULL) + return; + + ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); + + /* Empty packet history */ + ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + + /* Empty loss interval history */ + ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); + + kfree(dp->dccps_hc_rx_ccid_private); + dp->dccps_hc_rx_ccid_private = NULL; +} + +static struct ccid ccid3 = { + .ccid_id = 3, + .ccid_name = "ccid3", + .ccid_owner = THIS_MODULE, + .ccid_init = ccid3_init, + .ccid_exit = ccid3_exit, + .ccid_hc_tx_init = ccid3_hc_tx_init, + .ccid_hc_tx_exit = ccid3_hc_tx_exit, + .ccid_hc_tx_send_packet = ccid3_hc_tx_send_packet, + .ccid_hc_tx_packet_sent = ccid3_hc_tx_packet_sent, + .ccid_hc_tx_packet_recv = ccid3_hc_tx_packet_recv, + .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options, + .ccid_hc_tx_parse_options = ccid3_hc_tx_parse_options, + .ccid_hc_rx_init = ccid3_hc_rx_init, + .ccid_hc_rx_exit = ccid3_hc_rx_exit, + .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options, + .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, +}; + +module_param(ccid3_debug, int, 0444); +MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); + +static __init int ccid3_module_init(void) +{ + int rc = -ENOMEM; + + ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", + sizeof(struct ccid3_tx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_tx_hist_slab == NULL) + goto out; + + ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", + sizeof(struct ccid3_rx_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_rx_hist_slab == NULL) + goto out_free_tx_history; + + ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", + sizeof(struct ccid3_loss_interval_hist_entry), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (ccid3_loss_interval_hist_slab == NULL) + goto out_free_rx_history; + + rc = ccid_register(&ccid3); + if (rc != 0) + goto out_free_loss_interval_history; + +out: + return rc; +out_free_loss_interval_history: + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; +out_free_rx_history: + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; +out_free_tx_history: + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + goto out; +} +module_init(ccid3_module_init); + +static __exit void ccid3_module_exit(void) +{ + ccid_unregister(&ccid3); + + if (ccid3_tx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_tx_hist_slab); + ccid3_tx_hist_slab = NULL; + } + if (ccid3_rx_hist_slab != NULL) { + kmem_cache_destroy(ccid3_rx_hist_slab); + ccid3_rx_hist_slab = NULL; + } + if (ccid3_loss_interval_hist_slab != NULL) { + kmem_cache_destroy(ccid3_loss_interval_hist_slab); + ccid3_loss_interval_hist_slab = NULL; + } +} +module_exit(ccid3_module_exit); + +MODULE_AUTHOR("Ian McDonald & Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("net-dccp-ccid-3"); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h new file mode 100644 index 000000000000..5d6b623e64da --- /dev/null +++ b/net/dccp/ccids/ccid3.h @@ -0,0 +1,137 @@ +/* + * net/dccp/ccids/ccid3.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ +#ifndef _DCCP_CCID3_H_ +#define _DCCP_CCID3_H_ + +#include +#include +#include + +struct ccid3_tx_hist_entry { + struct list_head ccid3htx_node; + u64 ccid3htx_seqno:48, + ccid3htx_win_count:8, + ccid3htx_sent:1; + struct timeval ccid3htx_tstamp; +}; + +struct ccid3_options_received { + u64 ccid3or_seqno:48, + ccid3or_loss_intervals_idx:16; + u16 ccid3or_loss_intervals_len; + u32 ccid3or_loss_event_rate; + u32 ccid3or_receive_rate; +}; + +/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block + * + * @ccid3hctx_state - Sender state + * @ccid3hctx_x - Current sending rate + * @ccid3hctx_x_recv - Receive rate + * @ccid3hctx_x_calc - Calculated send (?) rate + * @ccid3hctx_s - Packet size + * @ccid3hctx_rtt - Estimate of current round trip time in usecs + * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000 + * @ccid3hctx_last_win_count - Last window counter sent + * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent + * @ccid3hctx_no_feedback_timer - Handle to no feedback timer + * @ccid3hctx_idle - FIXME + * @ccid3hctx_t_ld - Time last doubled during slow start + * @ccid3hctx_t_nom - Nominal send time of next packet + * @ccid3hctx_t_ipi - Interpacket (send) interval + * @ccid3hctx_delta - Send timer delta + * @ccid3hctx_hist - Packet history + */ +struct ccid3_hc_tx_sock { + u32 ccid3hctx_x; + u32 ccid3hctx_x_recv; + u32 ccid3hctx_x_calc; + u16 ccid3hctx_s; + u32 ccid3hctx_rtt; + u32 ccid3hctx_p; + u8 ccid3hctx_state; + u8 ccid3hctx_last_win_count; + u8 ccid3hctx_idle; + struct timeval ccid3hctx_t_last_win_count; + struct timer_list ccid3hctx_no_feedback_timer; + struct timeval ccid3hctx_t_ld; + struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_ipi; + u32 ccid3hctx_delta; + struct list_head ccid3hctx_hist; + struct ccid3_options_received ccid3hctx_options_received; +}; + +struct ccid3_loss_interval_hist_entry { + struct list_head ccid3lih_node; + u64 ccid3lih_seqno:48, + ccid3lih_win_count:4; + u32 ccid3lih_interval; +}; + +struct ccid3_rx_hist_entry { + struct list_head ccid3hrx_node; + u64 ccid3hrx_seqno:48, + ccid3hrx_win_count:4, + ccid3hrx_type:4; + u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval ccid3hrx_tstamp; +}; + +struct ccid3_hc_rx_sock { + u64 ccid3hcrx_seqno_last_counter:48, + ccid3hcrx_state:8, + ccid3hcrx_last_counter:4; + unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_p; + u32 ccid3hcrx_bytes_recv; + struct timeval ccid3hcrx_tstamp_last_feedback; + struct timeval ccid3hcrx_tstamp_last_ack; + struct list_head ccid3hcrx_hist; + struct list_head ccid3hcrx_loss_interval_hist; + u16 ccid3hcrx_s; + u32 ccid3hcrx_pinv; + u32 ccid3hcrx_elapsed_time; + u32 ccid3hcrx_x_recv; +}; + +#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field) + +#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \ + ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field) + +#endif /* _DCCP_CCID3_H_ */ diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h new file mode 100644 index 000000000000..fb83454102c1 --- /dev/null +++ b/net/dccp/dccp.h @@ -0,0 +1,422 @@ +#ifndef _DCCP_H +#define _DCCP_H +/* + * net/dccp/dccp.h + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#define DCCP_DEBUG + +#ifdef DCCP_DEBUG +extern int dccp_debug; + +#define dccp_pr_debug(format, a...) \ + do { if (dccp_debug) \ + printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \ + } while (0) +#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0) +#else +#define dccp_pr_debug(format, a...) +#define dccp_pr_debug_cat(format, a...) +#endif + +extern struct inet_hashinfo dccp_hashinfo; + +extern atomic_t dccp_orphan_count; +extern int dccp_tw_count; +extern void dccp_tw_deschedule(struct inet_timewait_sock *tw); + +extern void dccp_time_wait(struct sock *sk, int state, int timeo); + +/* FIXME: Right size this */ +#define DCCP_MAX_OPT_LEN 128 + +#define DCCP_MAX_PACKET_HDR 32 + +#define MAX_DCCP_HEADER (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER) + +#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT + * state, about 60 seconds */ + +/* draft-ietf-dccp-spec-11.txt initial RTO value */ +#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ)) + +/* Maximal interval between probes for local resources. */ +#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U)) + +#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */ + +extern struct proto dccp_v4_prot; + +/* is seq1 < seq2 ? */ +static inline const int before48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq1 << 16) - (seq2 << 16)) < 0; +} + +/* is seq1 > seq2 ? */ +static inline const int after48(const u64 seq1, const u64 seq2) +{ + return (const s64)((seq2 << 16) - (seq1 << 16)) < 0; +} + +/* is seq2 <= seq1 <= seq3 ? */ +static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3) +{ + return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16); +} + +static inline u64 max48(const u64 seq1, const u64 seq2) +{ + return after48(seq1, seq2) ? seq1 : seq2; +} + +enum { + DCCP_MIB_NUM = 0, + DCCP_MIB_ACTIVEOPENS, /* ActiveOpens */ + DCCP_MIB_ESTABRESETS, /* EstabResets */ + DCCP_MIB_CURRESTAB, /* CurrEstab */ + DCCP_MIB_OUTSEGS, /* OutSegs */ + DCCP_MIB_OUTRSTS, + DCCP_MIB_ABORTONTIMEOUT, + DCCP_MIB_TIMEOUTS, + DCCP_MIB_ABORTFAILED, + DCCP_MIB_PASSIVEOPENS, + DCCP_MIB_ATTEMPTFAILS, + DCCP_MIB_OUTDATAGRAMS, + DCCP_MIB_INERRS, + DCCP_MIB_OPTMANDATORYERROR, + DCCP_MIB_INVALIDOPT, + __DCCP_MIB_MAX +}; + +#define DCCP_MIB_MAX __DCCP_MIB_MAX +struct dccp_mib { + unsigned long mibs[DCCP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + +DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics); +#define DCCP_INC_STATS(field) SNMP_INC_STATS(dccp_statistics, field) +#define DCCP_INC_STATS_BH(field) SNMP_INC_STATS_BH(dccp_statistics, field) +#define DCCP_INC_STATS_USER(field) SNMP_INC_STATS_USER(dccp_statistics, field) +#define DCCP_DEC_STATS(field) SNMP_DEC_STATS(dccp_statistics, field) +#define DCCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(dccp_statistics, field, val) +#define DCCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(dccp_statistics, field, val) + +extern int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb); +extern int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb); + +extern int dccp_send_response(struct sock *sk); +extern void dccp_send_ack(struct sock *sk); +extern void dccp_send_delayed_ack(struct sock *sk); +extern void dccp_send_sync(struct sock *sk, u64 seq); + +extern void dccp_init_xmit_timers(struct sock *sk); +static inline void dccp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} + +extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); + +extern const char *dccp_packet_name(const int type); +extern const char *dccp_state_name(const int state); + +static inline void dccp_set_state(struct sock *sk, const int state) +{ + const int oldstate = sk->sk_state; + + dccp_pr_debug("%s(%p) %-10.10s -> %s\n", + dccp_role(sk), sk, + dccp_state_name(oldstate), dccp_state_name(state)); + WARN_ON(state == oldstate); + + switch (state) { + case DCCP_OPEN: + if (oldstate != DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_CURRESTAB); + break; + + case DCCP_CLOSED: + if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN) + DCCP_INC_STATS(DCCP_MIB_ESTABRESETS); + + sk->sk_prot->unhash(sk); + if (inet_csk(sk)->icsk_bind_hash != NULL && + !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) + inet_put_port(&dccp_hashinfo, sk); + /* fall through */ + default: + if (oldstate == DCCP_OPEN) + DCCP_DEC_STATS(DCCP_MIB_CURRESTAB); + } + + /* Change state AFTER socket is unhashed to avoid closed + * socket sitting in hash tables. + */ + sk->sk_state = state; +} + +static inline void dccp_done(struct sock *sk) +{ + dccp_set_state(sk, DCCP_CLOSED); + dccp_clear_xmit_timers(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + else + inet_csk_destroy_sock(sk); +} + +static inline void dccp_openreq_init(struct request_sock *req, + struct dccp_sock *dp, + struct sk_buff *skb) +{ + /* + * FIXME: fill in the other req fields from the DCCP options + * received + */ + inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->acked = 0; + req->rcv_wnd = 0; +} + +extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, + struct sk_buff *skb); +extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb); + +extern struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb); + +extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); + +extern void dccp_v4_err(struct sk_buff *skb, u32); + +extern int dccp_v4_rcv(struct sk_buff *skb); + +extern struct sock *dccp_v4_request_recv_sock(struct sock *sk, + struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst); +extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev); + +extern int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb); +extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len); +extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len); + +extern void dccp_close(struct sock *sk, long timeout); +extern struct sk_buff *dccp_make_response(struct sock *sk, + struct dst_entry *dst, + struct request_sock *req); + +extern int dccp_connect(struct sock *sk); +extern int dccp_disconnect(struct sock *sk, int flags); +extern int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen); +extern int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg); +extern int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t size); +extern int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int nonblock, + int flags, int *addr_len); +extern int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen); +extern void dccp_shutdown(struct sock *sk, int how); + +extern int dccp_v4_checksum(struct sk_buff *skb); + +extern int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code); +extern void dccp_send_close(struct sock *sk); + +struct dccp_skb_cb { + __u8 dccpd_type; + __u8 dccpd_reset_code; + __u8 dccpd_service; + __u8 dccpd_ccval; + __u64 dccpd_seq; + __u64 dccpd_ack_seq; + int dccpd_opt_len; +}; + +#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) + +static inline int dccp_non_data_packet(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_ACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ || + type == DCCP_PKT_RESET || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK; +} + +static inline int dccp_packet_without_ack(const struct sk_buff *skb) +{ + const __u8 type = DCCP_SKB_CB(skb)->dccpd_type; + + return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST; +} + +#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1) +#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2) + +static inline void dccp_set_seqno(u64 *seqno, u64 value) +{ + if (value > DCCP_MAX_SEQNO) + value -= DCCP_MAX_SEQNO + 1; + *seqno = value; +} + +static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2) +{ + return ((seqno2 << 16) - (seqno1 << 16)) >> 16; +} + +static inline void dccp_inc_seqno(u64 *seqno) +{ + if (++*seqno > DCCP_MAX_SEQNO) + *seqno = 0; +} + +static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss) +{ + struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh)); + +#if defined(__LITTLE_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dh->dccph_seq = htonl((gss >> 32)); +#else +#error "Adjust your defines" +#endif + dhx->dccph_seq_low = htonl(gss & 0xffffffff); +} + +static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr) +{ +#if defined(__LITTLE_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8; +#elif defined(__BIG_ENDIAN_BITFIELD) + dhack->dccph_ack_nr_high = htonl((gsr >> 32)); +#else +#error "Adjust your defines" +#endif + dhack->dccph_ack_nr_low = htonl(gsr & 0xffffffff); +} + +static inline void dccp_update_gsr(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gsr; + + dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4)); + dp->dccps_gsr = seq; + dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr)); + dccp_set_seqno(&dp->dccps_swh, + dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4); +} + +static inline void dccp_update_gss(struct sock *sk, u64 seq) +{ + struct dccp_sock *dp = dccp_sk(sk); + u64 tmp_gss; + + dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1); + dp->dccps_awl = max48(tmp_gss, dp->dccps_iss); + dp->dccps_awh = dp->dccps_gss = seq; +} + +extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); +extern void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time); +extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + unsigned char option, + const void *value, unsigned char len); + +extern struct socket *dccp_ctl_socket; + +#define DCCP_ACKPKTS_STATE_RECEIVED 0 +#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackpkts - acknowledgeable packets + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpap_buf_head - circular buffer head + * @dccpap_buf_tail - circular buffer tail + * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno) + * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno) + * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpap_buf_len - circular buffer length + * @dccpap_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackpkts { + unsigned int dccpap_buf_head; + unsigned int dccpap_buf_tail; + u64 dccpap_buf_ackno; + u64 dccpap_ack_seqno; + u64 dccpap_ack_ackno; + unsigned int dccpap_ack_ptr; + unsigned int dccpap_buf_vector_len; + unsigned int dccpap_ack_vector_len; + unsigned int dccpap_buf_len; + unsigned long dccpap_time; + u8 dccpap_buf_nonce; + u8 dccpap_ack_nonce; + u8 dccpap_buf[0]; +}; + +extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority); +extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno); + +#ifdef DCCP_DEBUG +extern void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, int len); +extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); +#else +static inline void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, + int len) { } +static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } +#endif + +#endif /* _DCCP_H */ diff --git a/net/dccp/input.c b/net/dccp/input.c new file mode 100644 index 000000000000..622e976a51fe --- /dev/null +++ b/net/dccp/input.c @@ -0,0 +1,510 @@ +/* + * net/dccp/input.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_fin(struct sock *sk, struct sk_buff *skb) +{ + sk->sk_shutdown |= RCV_SHUTDOWN; + sock_set_flag(sk, SOCK_DONE); + __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); +} + +static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) +{ + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED); + dccp_fin(sk, skb); + dccp_set_state(sk, DCCP_CLOSED); + break; + } +} + +static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) +{ + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return; + } + + switch (sk->sk_state) { + case DCCP_PARTOPEN: + case DCCP_OPEN: + dccp_set_state(sk, DCCP_CLOSING); + dccp_send_close(sk); + break; + } +} + +static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dp->dccps_options.dccpo_send_ack_vector) + dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); +} + +static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + struct dccp_sock *dp = dccp_sk(sk); + u64 lswl = dp->dccps_swl; + u64 lawl = dp->dccps_awl; + + /* + * Step 5: Prepare sequence numbers for Sync + * If P.type == Sync or P.type == SyncAck, + * If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL, + * / * P is valid, so update sequence number variables + * accordingly. After this update, P will pass the tests + * in Step 6. A SyncAck is generated if necessary in + * Step 15 * / + * Update S.GSR, S.SWL, S.SWH + * Otherwise, + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_SYNC || + dh->dccph_type == DCCP_PKT_SYNCACK) { + if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) && + !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl)) + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + else + return -1; + /* + * Step 6: Check sequence numbers + * Let LSWL = S.SWL and LAWL = S.AWL + * If P.type == CloseReq or P.type == Close or P.type == Reset, + * LSWL := S.GSR + 1, LAWL := S.GAR + * If LSWL <= P.seqno <= S.SWH + * and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH), + * Update S.GSR, S.SWL, S.SWH + * If P.type != Sync, + * Update S.GAR + * Otherwise, + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ || + dh->dccph_type == DCCP_PKT_CLOSE || + dh->dccph_type == DCCP_PKT_RESET) { + lswl = dp->dccps_gsr; + dccp_inc_seqno(&lswl); + lawl = dp->dccps_gar; + } + + if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) && + (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ || + between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) { + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (dh->dccph_type != DCCP_PKT_SYNC && + DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq; + } else { + dccp_pr_debug("Step 6 failed, sending SYNC...\n"); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + return -1; + } + + return 0; +} + +int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, + const struct dccp_hdr *dh, const unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + + if (dccp_check_seqno(sk, skb)) + goto discard; + + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) { + LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n")); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + goto discard; + } + + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (!inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX); + } + } + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + switch (dccp_hdr(skb)->dccph_type) { + case DCCP_PKT_DATAACK: + case DCCP_PKT_DATA: + /* + * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option + * if it is. + */ + __skb_pull(skb, dh->dccph_doff * 4); + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk, 0); + return 0; + case DCCP_PKT_ACK: + goto discard; + case DCCP_PKT_RESET: + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + case DCCP_PKT_CLOSEREQ: + dccp_rcv_closereq(sk, skb); + goto discard; + case DCCP_PKT_CLOSE: + dccp_rcv_close(sk, skb); + return 0; + case DCCP_PKT_REQUEST: + /* Step 7 + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state >= OPEN and P.type == Request + * and P.seqno >= S.OSR) + * or (S.state >= OPEN and P.type == Response + * and P.seqno >= S.OSR) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + if (dp->dccps_role != DCCP_ROLE_LISTEN) + goto send_sync; + goto check_seq; + case DCCP_PKT_RESPONSE: + if (dp->dccps_role != DCCP_ROLE_CLIENT) + goto send_sync; +check_seq: + if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) { +send_sync: + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + } + break; + } + + DCCP_INC_STATS_BH(DCCP_MIB_INERRS); +discard: + __kfree_skb(skb); + return 0; +} + +static int dccp_rcv_request_sent_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + /* + * Step 4: Prepare sequence numbers in REQUEST + * If S.state == REQUEST, + * If (P.type == Response or P.type == Reset) + * and S.AWL <= P.ackno <= S.AWH, + * / * Set sequence number variables corresponding to the + * other endpoint, so P will pass the tests in Step 6 * / + * Set S.GSR, S.ISR, S.SWL, S.SWH + * / * Response processing continues in Step 10; Reset + * processing continues in Step 9 * / + */ + if (dh->dccph_type == DCCP_PKT_RESPONSE) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct dccp_sock *dp = dccp_sk(sk); + + /* Stop the REQUEST timer */ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + BUG_TRAP(sk->sk_send_head != NULL); + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { + dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n", + dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh); + goto out_invalid_packet; + } + + dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq); + + if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 || + ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) { + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + /* FIXME: send appropriate RESET code */ + goto out_invalid_packet; + } + + dccp_sync_mss(sk, dp->dccps_pmtu_cookie); + + /* + * Step 10: Process REQUEST state (second part) + * If S.state == REQUEST, + * / * If we get here, P is a valid Response from the server (see + * Step 4), and we should move to PARTOPEN state. PARTOPEN + * means send an Ack, don't send Data packets, retransmit + * Acks periodically, and always include any Init Cookie from + * the Response * / + * S.state := PARTOPEN + * Set PARTOPEN timer + * Continue with S.state == PARTOPEN + * / * Step 12 will send the Ack completing the three-way + * handshake * / + */ + dccp_set_state(sk, DCCP_PARTOPEN); + + /* Make sure socket is routed, for correct metrics. */ + inet_sk_rebuild_header(sk); + + if (!sock_flag(sk, SOCK_DEAD)) { + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + } + + if (sk->sk_write_pending || icsk->icsk_ack.pingpong || + icsk->icsk_accept_queue.rskq_defer_accept) { + /* Save one ACK. Data will be ready after + * several ticks, if write_pending is set. + * + * It may be deleted, but with this feature tcpdumps + * look so _wonderfully_ clever, that I was not able + * to stand against the temptation 8) --ANK + */ + /* + * OK, in DCCP we can as well do a similar trick, its + * even in the draft, but there is no need for us to + * schedule an ack here, as dccp_sendmsg does this for + * us, also stated in the draft. -acme + */ + __kfree_skb(skb); + return 0; + } + dccp_send_ack(sk); + return -1; + } + +out_invalid_packet: + return 1; /* dccp_v4_do_rcv will send a reset, but... + FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR */ +} + +static int dccp_rcv_respond_partopen_state_process(struct sock *sk, + struct sk_buff *skb, + const struct dccp_hdr *dh, + const unsigned len) +{ + int queued = 0; + + switch (dh->dccph_type) { + case DCCP_PKT_RESET: + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + break; + case DCCP_PKT_DATAACK: + case DCCP_PKT_ACK: + /* + * FIXME: we should be reseting the PARTOPEN (DELACK) timer here, + * but only if we haven't used the DELACK timer for something else, + * like sending a delayed ack for a TIMESTAMP echo, etc, for now + * were not clearing it, sending an extra ACK when there is nothing + * else to do in DELACK is not a big deal after all. + */ + + /* Stop the PARTOPEN timer */ + if (sk->sk_state == DCCP_PARTOPEN) + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); + + dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq; + dccp_set_state(sk, DCCP_OPEN); + + if (dh->dccph_type == DCCP_PKT_DATAACK) { + dccp_rcv_established(sk, skb, dh, len); + queued = 1; /* packet was queued (by dccp_rcv_established) */ + } + break; + } + + return queued; +} + +int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, + struct dccp_hdr *dh, unsigned len) +{ + struct dccp_sock *dp = dccp_sk(sk); + const int old_state = sk->sk_state; + int queued = 0; + + if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) { + if (dccp_check_seqno(sk, skb)) + goto discard; + + /* + * Step 8: Process options and mark acknowledgeable + */ + if (dccp_parse_options(sk, skb)) + goto discard; + + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + dccp_event_ack_recv(sk, skb); + + ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); + + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) + goto discard; + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 && + !inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX); + } + } + } + + /* + * Step 9: Process Reset + * If P.type == Reset, + * Tear down connection + * S.state := TIMEWAIT + * Set TIMEWAIT timer + * Drop packet and return + */ + if (dh->dccph_type == DCCP_PKT_RESET) { + /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */ + dccp_fin(sk, skb); + dccp_time_wait(sk, DCCP_TIME_WAIT, 0); + return 0; + /* + * Step 7: Check for unexpected packet types + * If (S.is_server and P.type == CloseReq) + * or (S.is_server and P.type == Response) + * or (S.is_client and P.type == Request) + * or (S.state == RESPOND and P.type == Data), + * Send Sync packet acknowledging P.seqno + * Drop packet and return + */ + } else if ((dp->dccps_role != DCCP_ROLE_CLIENT && + (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) || + (dp->dccps_role == DCCP_ROLE_CLIENT && + dh->dccph_type == DCCP_PKT_REQUEST) || + (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq); + goto discard; + } + + switch (sk->sk_state) { + case DCCP_CLOSED: + return 1; + + case DCCP_LISTEN: + if (dh->dccph_type == DCCP_PKT_ACK || + dh->dccph_type == DCCP_PKT_DATAACK) + return 1; + + if (dh->dccph_type == DCCP_PKT_RESET) + goto discard; + + if (dh->dccph_type == DCCP_PKT_REQUEST) { + if (dccp_v4_conn_request(sk, skb) < 0) + return 1; + + /* FIXME: do congestion control initialization */ + goto discard; + } + goto discard; + + case DCCP_REQUESTING: + /* FIXME: do congestion control initialization */ + + queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len); + if (queued >= 0) + return queued; + + __kfree_skb(skb); + return 0; + + case DCCP_RESPOND: + case DCCP_PARTOPEN: + queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len); + break; + } + + if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) { + switch (old_state) { + case DCCP_PARTOPEN: + sk->sk_state_change(sk); + sk_wake_async(sk, 0, POLL_OUT); + break; + } + } + + if (!queued) { +discard: + __kfree_skb(skb); + } + return 0; +} diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c new file mode 100644 index 000000000000..083bacaecb3b --- /dev/null +++ b/net/dccp/ipv4.c @@ -0,0 +1,1289 @@ +/* + * net/dccp/ipv4.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { + .lhash_lock = RW_LOCK_UNLOCKED, + .lhash_users = ATOMIC_INIT(0), + .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), + .portalloc_lock = SPIN_LOCK_UNLOCKED, + .port_rover = 1024 - 1, +}; + +static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) +{ + return inet_csk_get_port(&dccp_hashinfo, sk, snum); +} + +static void dccp_v4_hash(struct sock *sk) +{ + inet_hash(&dccp_hashinfo, sk); +} + +static void dccp_v4_unhash(struct sock *sk) +{ + inet_unhash(&dccp_hashinfo, sk); +} + +/* called with local bh disabled */ +static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_sock *inet = inet_sk(sk); + const u32 daddr = inet->rcv_saddr; + const u32 saddr = inet->daddr; + const int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size); + struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash]; + const struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ + inet->num = lport; + inet->sport = htons(lport); + sk->sk_hashent = hash; + BUG_TRAP(sk_unhashed(sk)); + __sk_add_node(sk, &head->chain); + sock_prot_inc_use(sk->sk_prot); + write_unlock(&head->lock); + + if (twp != NULL) { + *twp = tw; + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + } else if (tw != NULL) { + /* Silly. Should hash-dance instead... */ + dccp_tw_deschedule(tw); + NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); + + inet_twsk_put(tw); + } + + return 0; + +not_unique: + write_unlock(&head->lock); + return -EADDRNOTAVAIL; +} + +/* + * Bind a port for a connect operation and hash it. + */ +static int dccp_v4_hash_connect(struct sock *sk) +{ + const unsigned short snum = inet_sk(sk)->num; + struct inet_bind_hashbucket *head; + struct inet_bind_bucket *tb; + int ret; + + if (snum == 0) { + int rover; + int low = sysctl_local_port_range[0]; + int high = sysctl_local_port_range[1]; + int remaining = (high - low) + 1; + struct hlist_node *node; + struct inet_timewait_sock *tw = NULL; + + local_bh_disable(); + + /* TODO. Actually it is not so bad idea to remove + * dccp_hashinfo.portalloc_lock before next submission to Linus. + * As soon as we touch this place at all it is time to think. + * + * Now it protects single _advisory_ variable dccp_hashinfo.port_rover, + * hence it is mostly useless. + * Code will work nicely if we just delete it, but + * I am afraid in contented case it will work not better or + * even worse: another cpu just will hit the same bucket + * and spin there. + * So some cpu salt could remove both contention and + * memory pingpong. Any ideas how to do this in a nice way? + */ + spin_lock(&dccp_hashinfo.portalloc_lock); + rover = dccp_hashinfo.port_rover; + + do { + rover++; + if ((rover < low) || (rover > high)) + rover = low; + head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)]; + spin_lock(&head->lock); + + /* Does not bother with rcv_saddr checks, + * because the established check is already + * unique enough. + */ + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (tb->port == rover) { + BUG_TRAP(!hlist_empty(&tb->owners)); + if (tb->fastreuse >= 0) + goto next_port; + if (!__dccp_v4_check_established(sk, + rover, + &tw)) + goto ok; + goto next_port; + } + } + + tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover); + if (tb == NULL) { + spin_unlock(&head->lock); + break; + } + tb->fastreuse = -1; + goto ok; + + next_port: + spin_unlock(&head->lock); + } while (--remaining > 0); + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + local_bh_enable(); + + return -EADDRNOTAVAIL; + +ok: + /* All locks still held and bhs disabled */ + dccp_hashinfo.port_rover = rover; + spin_unlock(&dccp_hashinfo.portalloc_lock); + + inet_bind_hash(sk, tb, rover); + if (sk_unhashed(sk)) { + inet_sk(sk)->sport = htons(rover); + __inet_hash(&dccp_hashinfo, sk, 0); + } + spin_unlock(&head->lock); + + if (tw != NULL) { + dccp_tw_deschedule(tw); + inet_twsk_put(tw); + } + + ret = 0; + goto out; + } + + head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)]; + tb = inet_csk(sk)->icsk_bind_hash; + spin_lock_bh(&head->lock); + if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) { + __inet_hash(&dccp_hashinfo, sk, 0); + spin_unlock_bh(&head->lock); + return 0; + } else { + spin_unlock(&head->lock); + /* No definite answer... Walk to established hash table */ + ret = __dccp_v4_check_established(sk, snum, NULL); +out: + local_bh_enable(); + return ret; + } +} + +static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct rtable *rt; + u32 daddr, nexthop; + int tmp; + int err; + + dp->dccps_role = DCCP_ROLE_CLIENT; + + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + if (usin->sin_family != AF_INET) + return -EAFNOSUPPORT; + + nexthop = daddr = usin->sin_addr.s_addr; + if (inet->opt != NULL && inet->opt->srr) { + if (daddr == 0) + return -EINVAL; + nexthop = inet->opt->faddr; + } + + tmp = ip_route_connect(&rt, nexthop, inet->saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + inet->sport, usin->sin_port, sk); + if (tmp < 0) + return tmp; + + if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { + ip_rt_put(rt); + return -ENETUNREACH; + } + + if (inet->opt == NULL || !inet->opt->srr) + daddr = rt->rt_dst; + + if (inet->saddr == 0) + inet->saddr = rt->rt_src; + inet->rcv_saddr = inet->saddr; + + inet->dport = usin->sin_port; + inet->daddr = daddr; + + dp->dccps_ext_header_len = 0; + if (inet->opt != NULL) + dp->dccps_ext_header_len = inet->opt->optlen; + /* + * Socket identity is still unknown (sport may be zero). + * However we set state to DCCP_REQUESTING and not releasing socket + * lock select source port, enter ourselves into the hash tables and + * complete initialization after this. + */ + dccp_set_state(sk, DCCP_REQUESTING); + err = dccp_v4_hash_connect(sk); + if (err != 0) + goto failure; + + err = ip_route_newports(&rt, inet->sport, inet->dport, sk); + if (err != 0) + goto failure; + + /* OK, now commit destination to socket. */ + sk_setup_caps(sk, &rt->u.dst); + + dp->dccps_gar = + dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, + inet->daddr, + inet->sport, + usin->sin_port); + dccp_update_gss(sk, dp->dccps_iss); + + inet->id = dp->dccps_iss ^ jiffies; + + err = dccp_connect(sk); + rt = NULL; + if (err != 0) + goto failure; +out: + return err; +failure: + /* This unhashes the socket and releases the local port, if necessary. */ + dccp_set_state(sk, DCCP_CLOSED); + ip_rt_put(rt); + sk->sk_route_caps = 0; + inet->dport = 0; + goto out; +} + +/* + * This routine does path mtu discovery as defined in RFC1191. + */ +static inline void dccp_do_pmtu_discovery(struct sock *sk, + const struct iphdr *iph, + u32 mtu) +{ + struct dst_entry *dst; + const struct inet_sock *inet = inet_sk(sk); + const struct dccp_sock *dp = dccp_sk(sk); + + /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs + * send out by Linux are always < 576bytes so they should go through + * unfragmented). + */ + if (sk->sk_state == DCCP_LISTEN) + return; + + /* We don't check in the destentry if pmtu discovery is forbidden + * on this route. We just assume that no packet_to_big packets + * are send back when pmtu discovery is not active. + * There is a small race when the user changes this flag in the + * route, but I think that's acceptable. + */ + if ((dst = __sk_dst_check(sk, 0)) == NULL) + return; + + dst->ops->update_pmtu(dst, mtu); + + /* Something is about to be wrong... Remember soft error + * for the case, if this connection will not able to recover. + */ + if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) + sk->sk_err_soft = EMSGSIZE; + + mtu = dst_mtu(dst); + + if (inet->pmtudisc != IP_PMTUDISC_DONT && + dp->dccps_pmtu_cookie > mtu) { + dccp_sync_mss(sk, mtu); + + /* + * From: draft-ietf-dccp-spec-11.txt + * + * DCCP-Sync packets are the best choice for upward probing, + * since DCCP-Sync probes do not risk application data loss. + */ + dccp_send_sync(sk, dp->dccps_gsr); + } /* else let the usual retransmit timer handle it */ +} + +static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_ack_bits); + struct sk_buff *skb; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + return; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + skb->dst = dst_clone(rxskb->dst); + + skb->h.raw = skb_push(skb, dccp_hdr_ack_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_ack_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_ACK; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_ack_len / 4; + dh->dccph_x = 1; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +} + +static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) +{ + dccp_v4_ctl_send_ack(skb); +} + +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, + struct dst_entry *dst) +{ + int err = -1; + struct sk_buff *skb; + + /* First, grab a route. */ + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + + err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, + ireq->rmt_addr, + ireq->opt); + if (err == NET_XMIT_CN) + err = 0; + } + +out: + dst_release(dst); + return err; +} + +/* + * This routine is called by the ICMP module when it gets some sort of error + * condition. If err < 0 then the socket should be closed and the error + * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. + * After adjustment header points to the first 8 bytes of the tcp header. We + * need to find the appropriate port. + * + * The locking strategy used here is very "optimistic". When someone else + * accesses the socket the ICMP is just dropped and for some paths there is no + * check at all. A more general error queue to queue errors for later handling + * is probably better. + */ +void dccp_v4_err(struct sk_buff *skb, u32 info) +{ + const struct iphdr *iph = (struct iphdr *)skb->data; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); + struct dccp_sock *dp; + struct inet_sock *inet; + const int type = skb->h.icmph->type; + const int code = skb->h.icmph->code; + struct sock *sk; + __u64 seq; + int err; + + if (skb->len < (iph->ihl << 2) + 8) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, + iph->saddr, dh->dccph_sport, inet_iif(skb)); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; + } + + if (sk->sk_state == DCCP_TIME_WAIT) { + inet_twsk_put((struct inet_timewait_sock *)sk); + return; + } + + bh_lock_sock(sk); + /* If too many ICMPs get dropped on busy + * servers this needs to be solved differently. + */ + if (sock_owned_by_user(sk)) + NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + + if (sk->sk_state == DCCP_CLOSED) + goto out; + + dp = dccp_sk(sk); + seq = dccp_hdr_seq(skb); + if (sk->sk_state != DCCP_LISTEN && + !between48(seq, dp->dccps_swl, dp->dccps_swh)) { + NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + + switch (type) { + case ICMP_SOURCE_QUENCH: + /* Just silently ignore these. */ + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + break; + case ICMP_DEST_UNREACH: + if (code > NR_ICMP_UNREACH) + goto out; + + if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ + if (!sock_owned_by_user(sk)) + dccp_do_pmtu_discovery(sk, iph, info); + goto out; + } + + err = icmp_err_convert[code].errno; + break; + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + goto out; + } + + switch (sk->sk_state) { + struct request_sock *req , **prev; + case DCCP_LISTEN: + if (sock_owned_by_user(sk)) + goto out; + req = inet_csk_search_req(sk, &prev, dh->dccph_dport, + iph->daddr, iph->saddr); + if (!req) + goto out; + + /* + * ICMPs are not backlogged, hence we cannot get an established + * socket here. + */ + BUG_TRAP(!req->sk); + + if (seq != dccp_rsk(req)->dreq_iss) { + NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + goto out; + } + /* + * Still in RESPOND, just remove it silently. + * There is no good way to pass the error to the newly + * created socket, and POSIX does not want network + * errors returned from accept(). + */ + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; + + case DCCP_REQUESTING: + case DCCP_RESPOND: + if (!sock_owned_by_user(sk)) { + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + sk->sk_err = err; + + sk->sk_error_report(sk); + + dccp_done(sk); + } else + sk->sk_err_soft = err; + goto out; + } + + /* If we've already connected we will keep trying + * until we time out, or the user gives up. + * + * rfc1122 4.2.3.9 allows to consider as hard errors + * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, + * but it is obsoleted by pmtu discovery). + * + * Note, that in modern internet, where routing is unreliable + * and in each dark corner broken firewalls sit, sending random + * errors ordered by their masters even this two messages finally lose + * their original sense (even Linux sends invalid PORT_UNREACHs) + * + * Now we are in compliance with RFCs. + * --ANK (980905) + */ + + inet = inet_sk(sk); + if (!sock_owned_by_user(sk) && inet->recverr) { + sk->sk_err = err; + sk->sk_error_report(sk); + } else /* Only an error on timeout */ + sk->sk_err_soft = err; +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code); + +int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) +{ + struct sk_buff *skb; + /* + * FIXME: what if rebuild_header fails? + * Should we be doing a rebuild_header here? + */ + int err = inet_sk_rebuild_header(sk); + + if (err != 0) + return err; + + skb = dccp_make_reset(sk, sk->sk_dst_cache, code); + if (skb != NULL) { + const struct dccp_sock *dp = dccp_sk(sk); + const struct inet_sock *inet = inet_sk(sk); + + err = ip_build_and_send_pkt(skb, sk, + inet->saddr, inet->daddr, NULL); + if (err == NET_XMIT_CN) + err = 0; + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); + } + + return err; +} + +static inline u64 dccp_v4_init_sequence(const struct sock *sk, + const struct sk_buff *skb) +{ + return secure_dccp_sequence_number(skb->nh.iph->daddr, + skb->nh.iph->saddr, + dccp_hdr(skb)->dccph_dport, + dccp_hdr(skb)->dccph_sport); +} + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct dccp_sock dp; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __u32 saddr = skb->nh.iph->saddr; + const __u32 daddr = skb->nh.iph->daddr; + struct dst_entry *dst = NULL; + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (((struct rtable *)skb->dst)->rt_flags & + (RTCF_BROADCAST | RTCF_MULTICAST)) + goto drop; + + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = reqsk_alloc(sk->sk_prot->rsk_prot); + if (req == NULL) + goto drop; + + /* FIXME: process options */ + + dccp_openreq_init(req, &dp, skb); + + ireq = inet_rsk(req); + ireq->loc_addr = daddr; + ireq->rmt_addr = saddr; + /* FIXME: Merge Aristeu's option parsing code when ready */ + req->rcv_wnd = 100; /* Fake, option parsing will get the right value */ + ireq->opt = NULL; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * In fact we defer setting S.GSR, S.SWL, S.SWH to + * dccp_create_openreq_child. + */ + dreq = dccp_rsk(req); + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; + + if (dccp_v4_send_response(sk, req, dst)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + /* + * FIXME: should be reqsk_free after implementing req->rsk_ops + */ + __reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + return -1; +} + +/* + * The three way handshake has completed - we got a valid ACK or DATAACK - + * now create the new socket. + * + * This is the equivalent of TCP's tcp_v4_syn_recv_sock + */ +struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) +{ + struct inet_request_sock *ireq; + struct inet_sock *newinet; + struct dccp_sock *newdp; + struct sock *newsk; + + if (sk_acceptq_is_full(sk)) + goto exit_overflow; + + if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) + goto exit; + + newsk = dccp_create_openreq_child(sk, req, skb); + if (newsk == NULL) + goto exit; + + sk_setup_caps(newsk, dst); + + newdp = dccp_sk(newsk); + newinet = inet_sk(newsk); + ireq = inet_rsk(req); + newinet->daddr = ireq->rmt_addr; + newinet->rcv_saddr = ireq->loc_addr; + newinet->saddr = ireq->loc_addr; + newinet->opt = ireq->opt; + ireq->opt = NULL; + newinet->mc_index = inet_iif(skb); + newinet->mc_ttl = skb->nh.iph->ttl; + newinet->id = jiffies; + + dccp_sync_mss(newsk, dst_mtu(dst)); + + __inet_hash(&dccp_hashinfo, newsk, 0); + __inet_inherit_port(&dccp_hashinfo, sk, newsk); + + return newsk; + +exit_overflow: + NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); +exit: + NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + dst_release(dst); + return NULL; +} + +static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + const struct iphdr *iph = skb->nh.iph; + struct sock *nsk; + struct request_sock **prev; + /* Find possible connection requests. */ + struct request_sock *req = inet_csk_search_req(sk, &prev, + dh->dccph_sport, + iph->saddr, iph->daddr); + if (req != NULL) + return dccp_check_req(sk, skb, req, prev); + + nsk = __inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + if (nsk != NULL) { + if (nsk->sk_state != DCCP_TIME_WAIT) { + bh_lock_sock(nsk); + return nsk; + } + inet_twsk_put((struct inet_timewait_sock *)nsk); + return NULL; + } + + return sk; +} + +int dccp_v4_checksum(struct sk_buff *skb) +{ + struct dccp_hdr* dh = dccp_hdr(skb); + int checksum_len; + u32 tmp; + + if (dh->dccph_cscov == 0) + checksum_len = skb->len; + else { + checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); + checksum_len = checksum_len < skb->len ? checksum_len : skb->len; + } + + tmp = csum_partial((unsigned char *)dh, checksum_len, 0); + return csum_fold(tmp); +} + +static int dccp_v4_verify_checksum(struct sk_buff *skb) +{ + struct dccp_hdr *th = dccp_hdr(skb); + const u16 remote_checksum = th->dccph_checksum; + u16 local_checksum; + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = 0; /* zero it for computation */ + + local_checksum = dccp_v4_checksum(skb); + + /* FIXME: don't mess with skb payload */ + th->dccph_checksum = remote_checksum; /* put it back */ + + return remote_checksum == local_checksum ? 0 : -1; +} + +static struct dst_entry* dccp_v4_route_skb(struct sock *sk, + struct sk_buff *skb) +{ + struct rtable *rt; + struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, + .nl_u = { .ip4_u = + { .daddr = skb->nh.iph->saddr, + .saddr = skb->nh.iph->daddr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = dccp_hdr(skb)->dccph_dport, + .dport = dccp_hdr(skb)->dccph_sport } } }; + + if (ip_route_output_flow(&rt, &fl, sk, 0)) { + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + return NULL; + } + + return &rt->u.dst; +} + +void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +{ + int err; + struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; + const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb; + struct dst_entry *dst; + + /* Never send a reset in response to a reset. */ + if (rxdh->dccph_type == DCCP_PKT_RESET) + return; + + if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + return; + + dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); + if (dst == NULL) + return; + + skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + if (skb == NULL) + goto out; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->dst = dst_clone(dst); + + skb->h.raw = skb_push(skb, dccp_hdr_reset_len); + dh = dccp_hdr(skb); + memset(dh, 0, dccp_hdr_reset_len); + + /* Build DCCP header and checksum it. */ + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_sport = rxdh->dccph_dport; + dh->dccph_dport = rxdh->dccph_sport; + dh->dccph_doff = dccp_hdr_reset_len / 4; + dh->dccph_x = 1; + dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code; + + dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + bh_lock_sock(dccp_ctl_socket->sk); + err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, + rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL); + bh_unlock_sock(dccp_ctl_socket->sk); + + if (err == NET_XMIT_CN || err == 0) { + DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); + DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); + } +out: + dst_release(dst); +} + +int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_hdr *dh = dccp_hdr(skb); + + if (sk->sk_state == DCCP_OPEN) { /* Fast path */ + if (dccp_rcv_established(sk, skb, dh, skb->len)) + goto reset; + return 0; + } + + /* + * Step 3: Process LISTEN state + * If S.state == LISTEN, + * If P.type == Request or P contains a valid Init Cookie option, + * * Must scan the packet's options to check for an Init + * Cookie. Only the Init Cookie is processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies * + * * Generate a new socket and switch to that socket * + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * Continue with S.state == RESPOND + * * A Response packet will be generated in Step 11 * + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + * + * NOTE: the check for the packet types is done in dccp_rcv_state_process + */ + if (sk->sk_state == DCCP_LISTEN) { + struct sock *nsk = dccp_v4_hnd_req(sk, skb); + + if (nsk == NULL) + goto discard; + + if (nsk != sk) { + if (dccp_child_process(sk, nsk, skb)) + goto reset; + return 0; + } + } + + if (dccp_rcv_state_process(sk, skb, dh, skb->len)) + goto reset; + return 0; + +reset: + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); +discard: + kfree_skb(skb); + return 0; +} + +static inline int dccp_invalid_packet(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + + if (skb->pkt_type != PACKET_HOST) + return 1; + + if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { + dccp_pr_debug("pskb_may_pull failed\n"); + return 1; + } + + dh = dccp_hdr(skb); + + /* If the packet type is not understood, drop packet and return */ + if (dh->dccph_type >= DCCP_PKT_INVALID) { + dccp_pr_debug("invalid packet type\n"); + return 1; + } + + /* + * If P.Data Offset is too small for packet type, or too large for + * packet, drop packet and return + */ + if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { + dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff); + return 1; + } + + if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { + dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff); + return 1; + } + + dh = dccp_hdr(skb); + + /* + * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet + * has short sequence numbers), drop packet and return + */ + if (dh->dccph_x == 0 && + dh->dccph_type != DCCP_PKT_DATA && + dh->dccph_type != DCCP_PKT_ACK && + dh->dccph_type != DCCP_PKT_DATAACK) { + dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n", + dccp_packet_name(dh->dccph_type)); + return 1; + } + + /* If the header checksum is incorrect, drop packet and return */ + if (dccp_v4_verify_checksum(skb) < 0) { + dccp_pr_debug("header checksum is incorrect\n"); + return 1; + } + + return 0; +} + +/* this is called when real data arrives */ +int dccp_v4_rcv(struct sk_buff *skb) +{ + const struct dccp_hdr *dh; + struct sock *sk; + int rc; + + /* Step 1: Check header basics: */ + + if (dccp_invalid_packet(skb)) + goto discard_it; + + dh = dccp_hdr(skb); +#if 0 + /* + * Use something like this to simulate some DATA/DATAACK loss to test + * dccp_ackpkts_add, you'll get something like this on a session that + * sends 10 DATA/DATAACK packets: + * + * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * + * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state + * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet + * + * So... + * + * 281473596467422 was received + * 281473596467421 was not received + * 281473596467420 was received + * 281473596467419 was not received + * 281473596467418 was received + * 281473596467417 was not received + * 281473596467416 was received + * 281473596467415 was not received + * 281473596467414 was received + * 281473596467413 was received (this one was the 3way handshake RESPONSE) + * + */ + if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) { + static int discard = 0; + + if (discard) { + discard = 0; + goto discard_it; + } + discard = 1; + } +#endif + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; + + dccp_pr_debug("%8.8s " + "src=%u.%u.%u.%u@%-5d " + "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_packet_name(dh->dccph_type), + NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), + NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + DCCP_SKB_CB(skb)->dccpd_seq); + + if (dccp_packet_without_ack(skb)) { + DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ; + dccp_pr_debug_cat("\n"); + } else { + DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); + dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq); + } + + /* Step 2: + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup(&dccp_hashinfo, + skb->nh.iph->saddr, dh->dccph_sport, + skb->nh.iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); + + /* + * Step 2: + * If no socket ... + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (sk == NULL) { + dccp_pr_debug("failed to look up flow ID in table and " + "get corresponding socket\n"); + goto no_dccp_socket; + } + + /* + * Step 2: + * ... or S.state == TIMEWAIT, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + + if (sk->sk_state == DCCP_TIME_WAIT) { + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); + goto discard_and_relse; + } + + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { + dccp_pr_debug("xfrm4_policy_check failed\n"); + goto discard_and_relse; + } + + if (sk_filter(sk, skb, 0)) { + dccp_pr_debug("sk_filter failed\n"); + goto discard_and_relse; + } + + skb->dev = NULL; + + bh_lock_sock(sk); + rc = 0; + if (!sock_owned_by_user(sk)) + rc = dccp_v4_do_rcv(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + + sock_put(sk); + return rc; + +no_dccp_socket: + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto discard_it; + /* + * Step 2: + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return + */ + if (dh->dccph_type != DCCP_PKT_RESET) { + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + dccp_v4_ctl_send_reset(skb); + } + +discard_it: + /* Discard frame. */ + kfree_skb(skb); + return 0; + +discard_and_relse: + sock_put(sk); + goto discard_it; +} + +static int dccp_v4_init_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + static int dccp_ctl_socket_init = 1; + + dccp_options_init(&dp->dccps_options); + + if (dp->dccps_options.dccpo_send_ack_vector) { + dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); + + if (dp->dccps_hc_rx_ackpkts == NULL) + return -ENOMEM; + } + + /* + * FIXME: We're hardcoding the CCID, and doing this at this point makes + * the listening (master) sock get CCID control blocks, which is not + * necessary, but for now, to not mess with the test userspace apps, + * lets leave it here, later the real solution is to do this in a + * setsockopt(CCIDs-I-want/accept). -acme + */ + if (likely(!dccp_ctl_socket_init)) { + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); + if (dp->dccps_hc_rx_ccid == NULL || + dp->dccps_hc_tx_ccid == NULL) { + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + return -ENOMEM; + } + } else + dccp_ctl_socket_init = 0; + + dccp_init_xmit_timers(sk); + sk->sk_state = DCCP_CLOSED; + dp->dccps_mss_cache = 536; + dp->dccps_role = DCCP_ROLE_UNDEFINED; + + return 0; +} + +int dccp_v4_destroy_sock(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + /* + * DCCP doesn't use sk_qrite_queue, just sk_send_head + * for retransmissions + */ + if (sk->sk_send_head != NULL) { + kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + /* Clean up a referenced DCCP bind bucket. */ + if (inet_csk(sk)->icsk_bind_hash != NULL) + inet_put_port(&dccp_hashinfo, sk); + + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; + ccid_exit(dp->dccps_hc_rx_ccid, sk); + ccid_exit(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + + return 0; +} + +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + kfree(inet_rsk(req)->opt); +} + +static struct request_sock_ops dccp_request_sock_ops = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_v4_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, +}; + +struct proto dccp_v4_prot = { + .name = "DCCP", + .owner = THIS_MODULE, + .close = dccp_close, + .connect = dccp_v4_connect, + .disconnect = dccp_disconnect, + .ioctl = dccp_ioctl, + .init = dccp_v4_init_sock, + .setsockopt = dccp_setsockopt, + .getsockopt = dccp_getsockopt, + .sendmsg = dccp_sendmsg, + .recvmsg = dccp_recvmsg, + .backlog_rcv = dccp_v4_do_rcv, + .hash = dccp_v4_hash, + .unhash = dccp_v4_unhash, + .accept = inet_csk_accept, + .get_port = dccp_v4_get_port, + .shutdown = dccp_shutdown, + .destroy = dccp_v4_destroy_sock, + .orphan_count = &dccp_orphan_count, + .max_header = MAX_DCCP_HEADER, + .obj_size = sizeof(struct dccp_sock), + .rsk_prot = &dccp_request_sock_ops, + .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ +}; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c new file mode 100644 index 000000000000..810f0c293b85 --- /dev/null +++ b/net/dccp/minisocks.c @@ -0,0 +1,199 @@ +/* + * net/dccp/minisocks.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +void dccp_time_wait(struct sock *sk, int state, int timeo) +{ + /* FIXME: Implement */ + dccp_pr_debug("Want to help? Start here\n"); + dccp_set_state(sk, state); +} + +/* This is for handling early-kills of TIME_WAIT sockets. */ +void dccp_tw_deschedule(struct inet_timewait_sock *tw) +{ + dccp_pr_debug("Want to help? Start here\n"); + __inet_twsk_kill(tw, &dccp_hashinfo); +} + +struct sock *dccp_create_openreq_child(struct sock *sk, + const struct request_sock *req, + const struct sk_buff *skb) +{ + /* + * Step 3: Process LISTEN state + * + * // Generate a new socket and switch to that socket + * Set S := new socket for this port pair + */ + struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC); + + if (newsk != NULL) { + const struct dccp_request_sock *dreq = dccp_rsk(req); + struct inet_connection_sock *newicsk = inet_csk(sk); + struct dccp_sock *newdp = dccp_sk(newsk); + + newdp->dccps_hc_rx_ackpkts = NULL; + newdp->dccps_role = DCCP_ROLE_SERVER; + newicsk->icsk_rto = TCP_TIMEOUT_INIT; + + if (newdp->dccps_options.dccpo_send_ack_vector) { + newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); + /* + * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone + * copied the master sock and left the CCID pointers for this child, + * that is why we do the __ccid_get calls. + */ + if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) + goto out_free; + } + + if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 || + ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { + dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); + ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); + ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); +out_free: + /* It is still raw copy of parent, so invalidate + * destructor and make plain sk_free() */ + newsk->sk_destruct = NULL; + sk_free(newsk); + return NULL; + } + + __ccid_get(newdp->dccps_hc_rx_ccid); + __ccid_get(newdp->dccps_hc_tx_ccid); + + /* + * Step 3: Process LISTEN state + * + * Choose S.ISS (initial seqno) or set from Init Cookie + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + */ + + /* See dccp_v4_conn_request */ + newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd; + + newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr; + dccp_update_gsr(newsk, dreq->dreq_isr); + + newdp->dccps_iss = dreq->dreq_iss; + dccp_update_gss(newsk, dreq->dreq_iss); + + dccp_init_xmit_timers(newsk); + + DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS); + } + return newsk; +} + +/* + * Process an incoming packet for RESPOND sockets represented + * as an request_sock. + */ +struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct request_sock **prev) +{ + struct sock *child = NULL; + + /* Check for retransmitted REQUEST */ + if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) { + if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) { + struct dccp_request_sock *dreq = dccp_rsk(req); + + dccp_pr_debug("Retransmitted REQUEST\n"); + /* Send another RESPONSE packet */ + dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1); + dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq); + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + } + /* Network Duplicate, discard packet */ + return NULL; + } + + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; + + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK && + dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK) + goto drop; + + /* Invalid ACK */ + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) { + dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n", + DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss); + goto drop; + } + + child = dccp_v4_request_recv_sock(sk, skb, req, NULL); + if (child == NULL) + goto listen_overflow; + + /* FIXME: deal with options */ + + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_add(sk, req, child); +out: + return child; +listen_overflow: + dccp_pr_debug("listen_overflow!\n"); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; +drop: + if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET) + req->rsk_ops->send_reset(skb); + + inet_csk_reqsk_queue_drop(sk, req, prev); + goto out; +} + +/* + * Queue segment on the new socket if the new socket is active, + * otherwise we just shortcircuit this and continue with + * the new socket. + */ +int dccp_child_process(struct sock *parent, struct sock *child, + struct sk_buff *skb) +{ + int ret = 0; + const int state = child->sk_state; + + if (!sock_owned_by_user(child)) { + ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len); + + /* Wakeup parent, send SIGIO */ + if (state == DCCP_RESPOND && child->sk_state != state) + parent->sk_data_ready(parent, 0); + } else { + /* Alas, it is possible again, because we do lookup + * in main socket hash table and lock on listening + * socket does not protect us more. + */ + sk_add_backlog(child, skb); + } + + bh_unlock_sock(child); + sock_put(child); + return ret; +} diff --git a/net/dccp/options.c b/net/dccp/options.c new file mode 100644 index 000000000000..e1867767946c --- /dev/null +++ b/net/dccp/options.c @@ -0,0 +1,763 @@ +/* + * net/dccp/options.c + * + * An implementation of the DCCP protocol + * Aristeu Sergio Rozanski Filho + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, + const u64 ackno, + const unsigned char len, + const unsigned char *vector); + +/* stores the default values for new connection. may be changed with sysctl */ +static const struct dccp_options dccpo_default_values = { + .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, + .dccpo_ccid = DCCPF_INITIAL_CCID, + .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, + .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, +}; + +void dccp_options_init(struct dccp_options *dccpo) +{ + memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo)); +} + +static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len) +{ + u32 value = 0; + + if (len > 3) + value += *bf++ << 24; + if (len > 2) + value += *bf++ << 16; + if (len > 1) + value += *bf++ << 8; + if (len > 0) + value += *bf; + + return value; +} + +int dccp_parse_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " : + "server rx opt: "; +#endif + const struct dccp_hdr *dh = dccp_hdr(skb); + const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type; + unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb); + unsigned char *opt_ptr = options; + const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4); + struct dccp_options_received *opt_recv = &dp->dccps_options_received; + unsigned char opt, len; + unsigned char *value; + + memset(opt_recv, 0, sizeof(*opt_recv)); + + while (opt_ptr != opt_end) { + opt = *opt_ptr++; + len = 0; + value = NULL; + + /* Check if this isn't a single byte option */ + if (opt > DCCPO_MAX_RESERVED) { + if (opt_ptr == opt_end) + goto out_invalid_option; + + len = *opt_ptr++; + if (len < 3) + goto out_invalid_option; + /* + * Remove the type and len fields, leaving + * just the value size + */ + len -= 2; + value = opt_ptr; + opt_ptr += len; + + if (opt_ptr > opt_end) + goto out_invalid_option; + } + + switch (opt) { + case DCCPO_PADDING: + break; + case DCCPO_NDP_COUNT: + if (len > 3) + goto out_invalid_option; + + opt_recv->dccpor_ndp = dccp_decode_value_var(value, len); + dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp); + break; + case DCCPO_ACK_VECTOR_0: + if (len > DCCP_MAX_ACK_VECTOR_LEN) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + + opt_recv->dccpor_ack_vector_len = len; + opt_recv->dccpor_ack_vector_idx = value - options; + + dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", + debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, + value, len); + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); + break; + case DCCPO_TIMESTAMP: + if (len != 4) + goto out_invalid_option; + + opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); + + dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; + dp->dccps_timestamp_time = jiffies; + + dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", + debug_prefix, opt_recv->dccpor_timestamp, + DCCP_SKB_CB(skb)->dccpd_ack_seq); + break; + case DCCPO_TIMESTAMP_ECHO: + if (len < 4 || len > 8) + goto out_invalid_option; + + opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n", + debug_prefix, opt_recv->dccpor_timestamp_echo, + len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq, + tcp_time_stamp - opt_recv->dccpor_timestamp_echo); + + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4); + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + case DCCPO_ELAPSED_TIME: + if (len > 4) + goto out_invalid_option; + + if (pkt_type == DCCP_PKT_DATA) + continue; + opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, + opt_recv->dccpor_elapsed_time); + break; + /* + * From draft-ietf-dccp-spec-11.txt: + * + * Option numbers 128 through 191 are for options sent from the HC- + * Sender to the HC-Receiver; option numbers 192 through 255 are for + * options sent from the HC-Receiver to the HC-Sender. + */ + case 128 ... 191: { + const u16 idx = value - options; + + if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + case 192 ... 255: { + const u16 idx = value - options; + + if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0) + goto out_invalid_option; + } + break; + default: + pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n", + sk, opt, len); + break; + } + } + + return 0; + +out_invalid_option: + DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; + pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len); + return -1; +} + +static void dccp_encode_value_var(const u32 value, unsigned char *to, + const unsigned int len) +{ + if (len > 3) + *to++ = (value & 0xFF000000) >> 24; + if (len > 2) + *to++ = (value & 0xFF0000) >> 16; + if (len > 1) + *to++ = (value & 0xFF00) >> 8; + if (len > 0) + *to++ = (value & 0xFF); +} + +static inline int dccp_ndp_len(const int ndp) +{ + return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3; +} + +void dccp_insert_option(struct sock *sk, struct sk_buff *skb, + const unsigned char option, + const void *value, const unsigned char len) +{ + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option)); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2; + + to = skb_push(skb, len + 2); + *to++ = option; + *to++ = len + 2; + + memcpy(to, value, len); +} + +EXPORT_SYMBOL_GPL(dccp_insert_option); + +static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + int ndp = dp->dccps_ndp_count; + + if (dccp_non_data_packet(skb)) + ++dp->dccps_ndp_count; + else + dp->dccps_ndp_count = 0; + + if (ndp > 0) { + unsigned char *ptr; + const int ndp_len = dccp_ndp_len(ndp); + const int len = ndp_len + 2; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + ptr = skb_push(skb, len); + *ptr++ = DCCPO_NDP_COUNT; + *ptr++ = len; + dccp_encode_value_var(ndp, ptr, ndp_len); + } +} + +static inline int dccp_elapsed_time_len(const u32 elapsed_time) +{ + return elapsed_time == 0 ? 0 : + elapsed_time <= 0xFF ? 1 : + elapsed_time <= 0xFFFF ? 2 : + elapsed_time <= 0xFFFFFF ? 3 : 4; +} + +void dccp_insert_option_elapsed_time(struct sock *sk, + struct sk_buff *skb, + u32 elapsed_time) +{ +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 2 + elapsed_time_len; + unsigned char *to; + + /* If elapsed_time == 0... */ + if (elapsed_time_len == 2) + return; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ELAPSED_TIME; + *to++ = len; + + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", + debug_prefix, elapsed_time, + len, DCCP_SKB_CB(skb)->dccpd_seq); +} + +EXPORT_SYMBOL(dccp_insert_option_elapsed_time); + +static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + int len = ap->dccpap_buf_vector_len + 2; + const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + unsigned char *to, *from; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n")); + return; + } + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = ap->dccpap_buf_vector_len; + from = ap->dccpap_buf + ap->dccpap_buf_head; + + /* Check if buf_head wraps */ + if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { + const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head; + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = ap->dccpap_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal + * buf_nonce. + * + * This implemention uses just one ack record for now. + */ + ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + ap->dccpap_ack_ptr = ap->dccpap_buf_head; + ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; + ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; + ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n", + debug_prefix, ap->dccpap_ack_vector_len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); +} + +static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) +{ + const u32 now = htonl(tcp_time_stamp); + dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); +} + +static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : + "server TX opt: "; +#endif + u32 tstamp_echo; + const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10; + const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + const int len = 6 + elapsed_time_len; + unsigned char *to; + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n")); + return; + } + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_TIMESTAMP_ECHO; + *to++ = len; + + tstamp_echo = htonl(dp->dccps_timestamp_echo); + memcpy(to, &tstamp_echo, 4); + to += 4; + dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", + debug_prefix, dp->dccps_timestamp_echo, + len, DCCP_SKB_CB(skb)->dccpd_seq); + + dp->dccps_timestamp_echo = 0; + dp->dccps_timestamp_time = 0; +} + +void dccp_insert_options(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); + + DCCP_SKB_CB(skb)->dccpd_opt_len = 0; + + if (dp->dccps_options.dccpo_send_ndp_count) + dccp_insert_option_ndp(sk, skb); + + if (!dccp_packet_without_ack(skb)) { + if (dp->dccps_options.dccpo_send_ack_vector && + dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1) + dccp_insert_option_ack_vector(sk, skb); + + dccp_insert_option_timestamp(sk, skb); + if (dp->dccps_timestamp_echo != 0) + dccp_insert_option_timestamp_echo(sk, skb); + } + + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + + /* XXX: insert other options when appropriate */ + + if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) { + /* The length of all options has to be a multiple of 4 */ + int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4; + + if (padding != 0) { + padding = 4 - padding; + memset(skb_push(skb, padding), 0, padding); + DCCP_SKB_CB(skb)->dccpd_opt_len += padding; + } + } +} + +struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority) +{ + struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); + + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap->dccpap_buf, 0xFF, len); +#endif + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time = 0; + ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; + } + + return ap; +} + +void dccp_ackpkts_free(struct dccp_ackpkts *ap) +{ + if (ap != NULL) { +#ifdef DCCP_DEBUG + memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); +#endif + kfree(ap); + } +} + +static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; +} + +static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = ap->dccpap_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += ap->dccpap_buf_len; + } + + ap->dccpap_buf_head = new_head; + + if (gap > 0) + memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, + DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); + + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpap_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the HC- + * Sender is sending data at a very high rate, when the HC-Receiver's + * acknowledgements are not reaching the HC-Sender, or when the HC- + * Sender is forgetting to acknowledge those acks (so the HC-Receiver + * is unable to clean up old state). In this case, the HC-Receiver + * should either compress the buffer (by increasing run lengths when + * possible), transfer its state to a larger buffer, or, as a last + * resort, drop all received packets, without processing them + * whatsoever, until its buffer shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (ap->dccpap_buf_vector_len == 0) { + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len = 1; + } else if (after48(ackno, ap->dccpap_buf_ackno)) { + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno); + + /* + * Look if the state of this packet is the same as the previous ackno + * and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && + dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK) + ap->dccpap_buf[ap->dccpap_buf_head]++; + else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and S <= buf_ackno, + * the HC-Receiver will scan the table for the byte corresponding to S. + * (Indexing structures could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); + unsigned int index = ap->dccpap_buf_head; + + while (1) { + const u8 len = dccp_ackpkts_len(ap, index); + const u8 state = dccp_ackpkts_state(ap, index); + /* + * valid packets not yet in dccpap_buf have a reserved entry, with + * a len equal to 0 + */ + if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", ackno); + ap->dccpap_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == ap->dccpap_buf_len) + index = 0; + } + } + + ap->dccpap_buf_ackno = ackno; + ap->dccpap_time = jiffies; +out: + dccp_pr_debug(""); + dccp_ackpkts_print(ap); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno); + return -EILSEQ; +} + +#ifdef DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackpkts_print(const struct dccp_ackpkts *ap) +{ + dccp_ackvector_print(ap->dccpap_buf_ackno, + ap->dccpap_buf + ap->dccpap_buf_head, + ap->dccpap_buf_vector_len); +} +#endif + +static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) +{ + /* + * As we're keeping track of the ack vector size + * (dccpap_buf_vector_len) and the sent ack vector size + * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but + * keep this code here as in the future we'll implement a vector of ack + * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme + */ +#if 0 + ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; + if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) + ap->dccpap_buf_tail -= ap->dccpap_buf_len; +#endif + ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; +} + +void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, + u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == ap->dccpap_ack_seqno) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK vector + * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); + if (before48(ackno, ap->dccpap_ack_seqno)) { + // dccp_pr_debug_cat("yes\n"); + return; + } + // dccp_pr_debug_cat("no\n"); + + i = len; + while (i--) { + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno); + if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + // dccp_pr_debug_cat("yes\n"); + + if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { +#ifdef DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " : + "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n", + debug_prefix, len, + ap->dccpap_ack_seqno, ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + } + /* + * If dccpap_ack_seqno was not received, no problem we'll + * send another ACK vector. + */ + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + // dccp_pr_debug_cat("no\n"); + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} diff --git a/net/dccp/output.c b/net/dccp/output.c new file mode 100644 index 000000000000..22ca2910d4f2 --- /dev/null +++ b/net/dccp/output.c @@ -0,0 +1,406 @@ +/* + * net/dccp/output.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include + +#include "ccid.h" +#include "dccp.h" + +static inline void dccp_event_ack_sent(struct sock *sk) +{ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); +} + +/* + * All SKB's seen here are completely headerless. It is our + * job to build the DCCP header, and pass the packet down to + * IP so it can do the same plus pass the packet off to the + * device. + */ +int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (likely(skb != NULL)) { + const struct inet_sock *inet = inet_sk(sk); + struct dccp_sock *dp = dccp_sk(sk); + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + struct dccp_hdr *dh; + /* XXX For now we're using only 48 bits sequence numbers */ + const int dccp_header_size = sizeof(*dh) + + sizeof(struct dccp_hdr_ext) + + dccp_packet_hdr_len(dcb->dccpd_type); + int err, set_ack = 1; + u64 ackno = dp->dccps_gsr; + + /* + * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing + * to do here... + */ + dccp_inc_seqno(&dp->dccps_gss); + + dcb->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + switch (dcb->dccpd_type) { + case DCCP_PKT_DATA: + set_ack = 0; + break; + case DCCP_PKT_SYNC: + case DCCP_PKT_SYNCACK: + ackno = dcb->dccpd_seq; + break; + } + + skb->h.raw = skb_push(skb, dccp_header_size); + dh = dccp_hdr(skb); + /* Data packets are not cloned as they are never retransmitted */ + if (skb_cloned(skb)) + skb_set_owner_w(skb, sk); + + /* Build DCCP header and checksum it. */ + memset(dh, 0, dccp_header_size); + dh->dccph_type = dcb->dccpd_type; + dh->dccph_sport = inet->sport; + dh->dccph_dport = inet->dport; + dh->dccph_doff = (dccp_header_size + dcb->dccpd_opt_len) / 4; + dh->dccph_ccval = dcb->dccpd_ccval; + /* XXX For now we're using only 48 bits sequence numbers */ + dh->dccph_x = 1; + + dp->dccps_awh = dp->dccps_gss; + dccp_hdr_set_seq(dh, dp->dccps_gss); + if (set_ack) + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno); + + switch (dcb->dccpd_type) { + case DCCP_PKT_REQUEST: + dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service; + break; + case DCCP_PKT_RESET: + dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code; + break; + } + + dh->dccph_checksum = dccp_v4_checksum(skb); + + if (dcb->dccpd_type == DCCP_PKT_ACK || + dcb->dccpd_type == DCCP_PKT_DATAACK) + dccp_event_ack_sent(sk); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + + err = ip_queue_xmit(skb, 0); + if (err <= 0) + return err; + + /* NET_XMIT_CN is special. It does not guarantee, + * that this packet is lost. It tells that device + * is about to start to drop packets or already + * drops some packets of the same priority and + * invokes us to send less aggressively. + */ + return err == NET_XMIT_CN ? 0 : err; + } + return -ENOBUFS; +} + +unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) +{ + struct dccp_sock *dp = dccp_sk(sk); + int mss_now; + + /* + * FIXME: we really should be using the af_specific thing to support IPv6. + * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + */ + mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext); + + /* Now subtract optional transport overhead */ + mss_now -= dp->dccps_ext_header_len; + + /* + * FIXME: this should come from the CCID infrastructure, where, say, + * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets + * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED + * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to + * make it a multiple of 4 + */ + + mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4; + + /* And store cached results */ + dp->dccps_pmtu_cookie = pmtu; + dp->dccps_mss_cache = mss_now; + + return mss_now; +} + +int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) +{ + if (inet_sk_rebuild_header(sk) != 0) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + return dccp_transmit_skb(sk, (skb_cloned(skb) ? + pskb_copy(skb, GFP_ATOMIC): + skb_clone(skb, GFP_ATOMIC))); +} + +struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, + struct request_sock *req) +{ + struct dccp_hdr *dh; + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_response); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; + DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_rsk(req)->rmt_port; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESPONSE; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, + const enum dccp_reset_codes code) + +{ + struct dccp_hdr *dh; + struct dccp_sock *dp = dccp_sk(sk); + const int dccp_header_size = sizeof(struct dccp_hdr) + + sizeof(struct dccp_hdr_ext) + + sizeof(struct dccp_hdr_reset); + struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN + + dccp_header_size, 1, + GFP_ATOMIC); + if (skb == NULL) + return NULL; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size); + + skb->dst = dst_clone(dst); + skb->csum = 0; + + dccp_inc_seqno(&dp->dccps_gss); + + DCCP_SKB_CB(skb)->dccpd_reset_code = code; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESET; + DCCP_SKB_CB(skb)->dccpd_seq = dp->dccps_gss; + dccp_insert_options(sk, skb); + + skb->h.raw = skb_push(skb, dccp_header_size); + + dh = dccp_hdr(skb); + memset(dh, 0, dccp_header_size); + + dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_dport = inet_sk(sk)->dport; + dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; + dh->dccph_type = DCCP_PKT_RESET; + dh->dccph_x = 1; + dccp_hdr_set_seq(dh, dp->dccps_gss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr); + + dccp_hdr_reset(skb)->dccph_reset_code = code; + + dh->dccph_checksum = dccp_v4_checksum(skb); + + DCCP_INC_STATS(DCCP_MIB_OUTSEGS); + return skb; +} + +/* + * Do all connect socket setups that can be done AF independent. + */ +static inline void dccp_connect_init(struct sock *sk) +{ + struct dst_entry *dst = __sk_dst_get(sk); + struct inet_connection_sock *icsk = inet_csk(sk); + + sk->sk_err = 0; + sock_reset_flag(sk, SOCK_DONE); + + dccp_sync_mss(sk, dst_mtu(dst)); + + /* + * FIXME: set dp->{dccps_swh,dccps_swl}, with + * something like dccp_inc_seq + */ + + icsk->icsk_retransmits = 0; +} + +int dccp_connect(struct sock *sk) +{ + struct sk_buff *skb; + struct inet_connection_sock *icsk = inet_csk(sk); + + dccp_connect_init(sk); + + skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation); + if (unlikely(skb == NULL)) + return -ENOBUFS; + + /* Reserve space for headers. */ + skb_reserve(skb, MAX_DCCP_HEADER); + + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; + /* FIXME: set service to something meaningful, coming + * from userspace*/ + DCCP_SKB_CB(skb)->dccpd_service = 0; + skb->csum = 0; + skb_set_owner_w(skb, sk); + + BUG_TRAP(sk->sk_send_head == NULL); + sk->sk_send_head = skb; + dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL)); + DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); + + /* Timer for repeating the REQUEST until an answer. */ + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + return 0; +} + +void dccp_send_ack(struct sock *sk) +{ + /* If we have been reset, we may not send again. */ + if (sk->sk_state != DCCP_CLOSED) { + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) { + inet_csk_schedule_ack(sk); + inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX); + return; + } + + /* Reserve space for headers */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK; + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + } +} + +EXPORT_SYMBOL_GPL(dccp_send_ack); + +void dccp_send_delayed_ack(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + /* + * FIXME: tune this timer. elapsed time fixes the skew, so no problem + * with using 2s, and active senders also piggyback the ACK into a + * DATAACK packet, so this is really for quiescent senders. + */ + unsigned long timeout = jiffies + 2 * HZ; + + /* Use new timeout only if there wasn't a older one earlier. */ + if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) { + /* If delack timer was blocked or is about to expire, + * send ACK now. + * + * FIXME: check the "about to expire" part + */ + if (icsk->icsk_ack.blocked) { + dccp_send_ack(sk); + return; + } + + if (!time_before(timeout, icsk->icsk_ack.timeout)) + timeout = icsk->icsk_ack.timeout; + } + icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = timeout; + sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout); +} + +void dccp_send_sync(struct sock *sk, u64 seq) +{ + /* + * We are not putting this on the write queue, so + * dccp_transmit_skb() will set the ownership to this + * sock. + */ + struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC); + + if (skb == NULL) + /* FIXME: how to make sure the sync is sent? */ + return; + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, MAX_DCCP_HEADER); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC; + DCCP_SKB_CB(skb)->dccpd_seq = seq; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); +} + +/* Send a DCCP_PKT_CLOSE/CLOSEREQ. The caller locks the socket for us. This cannot be + * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances. + */ +void dccp_send_close(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + struct sk_buff *skb; + + /* Socket is locked, keep trying until memory is available. */ + for (;;) { + skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL); + if (skb != NULL) + break; + yield(); + } + + /* Reserve space for headers and prepare control bits. */ + skb_reserve(skb, sk->sk_prot->max_header); + skb->csum = 0; + DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; + + skb_set_owner_w(skb, sk); + dccp_transmit_skb(sk, skb); + + ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); + ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); +} diff --git a/net/dccp/proto.c b/net/dccp/proto.c new file mode 100644 index 000000000000..70284e6afe05 --- /dev/null +++ b/net/dccp/proto.c @@ -0,0 +1,818 @@ +/* + * net/dccp/proto.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "ccid.h" +#include "dccp.h" + +DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics); + +atomic_t dccp_orphan_count = ATOMIC_INIT(0); + +static struct net_protocol dccp_protocol = { + .handler = dccp_v4_rcv, + .err_handler = dccp_v4_err, +}; + +const char *dccp_packet_name(const int type) +{ + static const char *dccp_packet_names[] = { + [DCCP_PKT_REQUEST] = "REQUEST", + [DCCP_PKT_RESPONSE] = "RESPONSE", + [DCCP_PKT_DATA] = "DATA", + [DCCP_PKT_ACK] = "ACK", + [DCCP_PKT_DATAACK] = "DATAACK", + [DCCP_PKT_CLOSEREQ] = "CLOSEREQ", + [DCCP_PKT_CLOSE] = "CLOSE", + [DCCP_PKT_RESET] = "RESET", + [DCCP_PKT_SYNC] = "SYNC", + [DCCP_PKT_SYNCACK] = "SYNCACK", + }; + + if (type >= DCCP_NR_PKT_TYPES) + return "INVALID"; + else + return dccp_packet_names[type]; +} + +EXPORT_SYMBOL_GPL(dccp_packet_name); + +const char *dccp_state_name(const int state) +{ + static char *dccp_state_names[] = { + [DCCP_OPEN] = "OPEN", + [DCCP_REQUESTING] = "REQUESTING", + [DCCP_PARTOPEN] = "PARTOPEN", + [DCCP_LISTEN] = "LISTEN", + [DCCP_RESPOND] = "RESPOND", + [DCCP_CLOSING] = "CLOSING", + [DCCP_TIME_WAIT] = "TIME_WAIT", + [DCCP_CLOSED] = "CLOSED", + }; + + if (state >= DCCP_MAX_STATES) + return "INVALID STATE!"; + else + return dccp_state_names[state]; +} + +EXPORT_SYMBOL_GPL(dccp_state_name); + +static inline int dccp_listen_start(struct sock *sk) +{ + dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; + return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); +} + +int dccp_disconnect(struct sock *sk, int flags) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_sock *inet = inet_sk(sk); + int err = 0; + const int old_state = sk->sk_state; + + if (old_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + /* ABORT function of RFC793 */ + if (old_state == DCCP_LISTEN) { + inet_csk_listen_stop(sk); + /* FIXME: do the active reset thing */ + } else if (old_state == DCCP_REQUESTING) + sk->sk_err = ECONNRESET; + + dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + if (sk->sk_send_head != NULL) { + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + } + + inet->dport = 0; + + if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) + inet_reset_saddr(sk); + + sk->sk_shutdown = 0; + sock_reset_flag(sk, SOCK_DONE); + + icsk->icsk_backoff = 0; + inet_csk_delack_init(sk); + __sk_dst_reset(sk); + + BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + + sk->sk_error_report(sk); + return err; +} + +int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + dccp_pr_debug("entry\n"); + return -ENOIOCTLCMD; +} + +int dccp_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_setsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_getsockopt(struct sock *sk, int level, int optname, + char *optval, int *optlen) +{ + dccp_pr_debug("entry\n"); + + if (level != SOL_DCCP) + return ip_getsockopt(sk, level, optname, optval, optlen); + + return -EOPNOTSUPP; +} + +int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + const struct dccp_sock *dp = dccp_sk(sk); + const int flags = msg->msg_flags; + const int noblock = flags & MSG_DONTWAIT; + struct sk_buff *skb; + int rc, size; + long timeo; + + if (len > dp->dccps_mss_cache) + return -EMSGSIZE; + + lock_sock(sk); + + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + + /* + * We have to use sk_stream_wait_connect here to set sk_write_pending, + * so that the trick in dccp_rcv_request_sent_state_process. + */ + /* Wait for a connection to finish. */ + if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING)) + if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0) + goto out_err; + + size = sk->sk_prot->max_header + len; + release_sock(sk); + skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); + + if (skb == NULL) + goto out_release; + + skb_reserve(skb, sk->sk_prot->max_header); + rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (rc == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + long delay; + + /* + * XXX: This is just to match the Waikato tree CA interaction + * points, after the CCID3 code is stable and I have a better + * understanding of behaviour I'll change this to look more like + * TCP. + */ + while (1) { + rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, + skb, len, &delay); + if (rc == 0) + break; + if (rc != -EAGAIN) + goto out_discard; + if (delay > timeo) + goto out_discard; + release_sock(sk); + delay = schedule_timeout(delay); + lock_sock(sk); + timeo -= delay; + if (signal_pending(current)) + goto out_interrupted; + rc = -EPIPE; + if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN)) + goto out_discard; + } + + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + dcb->dccpd_type = DCCP_PKT_DATAACK; + /* FIXME: we really should have a dccps_ack_pending or use icsk */ + } else if (inet_csk_ack_scheduled(sk) || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } else { +out_discard: + kfree_skb(skb); + } +out_release: + release_sock(sk); + return rc ? : len; +out_err: + rc = sk_stream_error(sk, flags, rc); + goto out_release; +out_interrupted: + rc = sock_intr_errno(timeo); + goto out_discard; +} + +EXPORT_SYMBOL(dccp_sendmsg); + +int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int nonblock, int flags, int *addr_len) +{ + const struct dccp_hdr *dh; + int copied = 0; + unsigned long used; + int err; + int target; /* Read at least this many bytes */ + long timeo; + + lock_sock(sk); + + err = -ENOTCONN; + if (sk->sk_state == DCCP_LISTEN) + goto out; + + timeo = sock_rcvtimeo(sk, nonblock); + + /* Urgent data needs to be handled specially. */ + if (flags & MSG_OOB) + goto recv_urg; + + /* FIXME */ +#if 0 + seq = &tp->copied_seq; + if (flags & MSG_PEEK) { + peek_seq = tp->copied_seq; + seq = &peek_seq; + } +#endif + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, len); + + do { + struct sk_buff *skb; + u32 offset; + + /* FIXME */ +#if 0 + /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ + if (tp->urg_data && tp->urg_seq == *seq) { + if (copied) + break; + if (signal_pending(current)) { + copied = timeo ? sock_intr_errno(timeo) : -EAGAIN; + break; + } + } +#endif + + /* Next get a buffer. */ + + skb = skb_peek(&sk->sk_receive_queue); + do { + if (!skb) + break; + + offset = 0; + dh = dccp_hdr(skb); + + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) + goto found_ok_skb; + + if (dh->dccph_type == DCCP_PKT_RESET || + dh->dccph_type == DCCP_PKT_CLOSE) { + dccp_pr_debug("found fin ok!\n"); + goto found_fin_ok; + } + dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); + BUG_TRAP(flags & MSG_PEEK); + skb = skb->next; + } while (skb != (struct sk_buff *)&sk->sk_receive_queue); + + /* Well, if we have backlog, try to process it now yet. */ + if (copied >= target && !sk->sk_backlog.tail) + break; + + if (copied) { + if (sk->sk_err || + sk->sk_state == DCCP_CLOSED || + (sk->sk_shutdown & RCV_SHUTDOWN) || + !timeo || + signal_pending(current) || + (flags & MSG_PEEK)) + break; + } else { + if (sock_flag(sk, SOCK_DONE)) + break; + + if (sk->sk_err) { + copied = sock_error(sk); + break; + } + + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + if (sk->sk_state == DCCP_CLOSED) { + if (!sock_flag(sk, SOCK_DONE)) { + /* This occurs when user tries to read + * from never connected socket. + */ + copied = -ENOTCONN; + break; + } + break; + } + + if (!timeo) { + copied = -EAGAIN; + break; + } + + if (signal_pending(current)) { + copied = sock_intr_errno(timeo); + break; + } + } + + /* FIXME: cleanup_rbuf(sk, copied); */ + + if (copied >= target) { + /* Do not sleep, just process backlog. */ + release_sock(sk); + lock_sock(sk); + } else + sk_wait_data(sk, &timeo); + + continue; + + found_ok_skb: + /* Ok so how much can we use? */ + used = skb->len - offset; + if (len < used) + used = len; + + if (!(flags & MSG_TRUNC)) { + err = skb_copy_datagram_iovec(skb, offset, + msg->msg_iov, used); + if (err) { + /* Exception. Bailout! */ + if (!copied) + copied = -EFAULT; + break; + } + } + + copied += used; + len -= used; + + /* FIXME: tcp_rcv_space_adjust(sk); */ + +//skip_copy: + if (used + offset < skb->len) + continue; + + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + continue; + found_fin_ok: + if (!(flags & MSG_PEEK)) + sk_eat_skb(sk, skb); + break; + + } while (len > 0); + + /* According to UNIX98, msg_name/msg_namelen are ignored + * on connected socket. I was just happy when found this 8) --ANK + */ + + /* Clean up data we have read: This will do ACK frames. */ + /* FIXME: cleanup_rbuf(sk, copied); */ + + release_sock(sk); + return copied; + +out: + release_sock(sk); + return err; + +recv_urg: + /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */ + goto out; +} + +static int inet_dccp_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + unsigned char old_state; + int err; + + lock_sock(sk); + + err = -EINVAL; + if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP) + goto out; + + old_state = sk->sk_state; + if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) + goto out; + + /* Really, if the socket is already in listen state + * we can only allow the backlog to be adjusted. + */ + if (old_state != DCCP_LISTEN) { + /* + * FIXME: here it probably should be sk->sk_prot->listen_start + * see tcp_listen_start + */ + err = dccp_listen_start(sk); + if (err) + goto out; + } + sk->sk_max_ack_backlog = backlog; + err = 0; + +out: + release_sock(sk); + return err; +} + +static const unsigned char dccp_new_state[] = { + /* current state: new state: action: */ + [0] = DCCP_CLOSED, + [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_REQUESTING] = DCCP_CLOSED, + [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN, + [DCCP_LISTEN] = DCCP_CLOSED, + [DCCP_RESPOND] = DCCP_CLOSED, + [DCCP_CLOSING] = DCCP_CLOSED, + [DCCP_TIME_WAIT] = DCCP_CLOSED, + [DCCP_CLOSED] = DCCP_CLOSED, +}; + +static int dccp_close_state(struct sock *sk) +{ + const int next = dccp_new_state[sk->sk_state]; + const int ns = next & DCCP_STATE_MASK; + + if (ns != sk->sk_state) + dccp_set_state(sk, ns); + + return next & DCCP_ACTION_FIN; +} + +void dccp_close(struct sock *sk, long timeout) +{ + struct sk_buff *skb; + + lock_sock(sk); + + sk->sk_shutdown = SHUTDOWN_MASK; + + if (sk->sk_state == DCCP_LISTEN) { + dccp_set_state(sk, DCCP_CLOSED); + + /* Special case. */ + inet_csk_listen_stop(sk); + + goto adjudge_to_death; + } + + /* + * We need to flush the recv. buffs. We do this only on the + * descriptor close, not protocol-sourced closes, because the + *reader process may not have drained the data yet! + */ + /* FIXME: check for unread data */ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + __kfree_skb(skb); + } + + if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { + /* Check zero linger _after_ checking for unread data. */ + sk->sk_prot->disconnect(sk, 0); + } else if (dccp_close_state(sk)) { + dccp_send_close(sk); + } + + sk_stream_wait_close(sk, timeout); + +adjudge_to_death: + release_sock(sk); + /* + * Now socket is owned by kernel and we acquire BH lock + * to finish close. No need to check for user refs. + */ + local_bh_disable(); + bh_lock_sock(sk); + BUG_TRAP(!sock_owned_by_user(sk)); + + sock_hold(sk); + sock_orphan(sk); + + if (sk->sk_state != DCCP_CLOSED) + dccp_set_state(sk, DCCP_CLOSED); + + atomic_inc(&dccp_orphan_count); + if (sk->sk_state == DCCP_CLOSED) + inet_csk_destroy_sock(sk); + + /* Otherwise, socket is reprieved until protocol close. */ + + bh_unlock_sock(sk); + local_bh_enable(); + sock_put(sk); +} + +void dccp_shutdown(struct sock *sk, int how) +{ + dccp_pr_debug("entry\n"); +} + +struct proto_ops inet_dccp_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = inet_release, + .bind = inet_bind, + .connect = inet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + .poll = sock_no_poll, + .ioctl = inet_ioctl, + .listen = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +extern struct net_proto_family inet_family_ops; + +static struct inet_protosw dccp_v4_protosw = { + .type = SOCK_DCCP, + .protocol = IPPROTO_DCCP, + .prot = &dccp_v4_prot, + .ops = &inet_dccp_ops, + .capability = -1, + .no_check = 0, + .flags = 0, +}; + +/* + * This is the global socket data structure used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ +struct socket *dccp_ctl_socket; + +static char dccp_ctl_socket_err_msg[] __initdata = + KERN_ERR "DCCP: Failed to create the control socket.\n"; + +static int __init dccp_ctl_sock_init(void) +{ + int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP, + &dccp_ctl_socket); + if (rc < 0) + printk(dccp_ctl_socket_err_msg); + else { + dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC; + inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1; + + /* Unhash it so that IP input processing does not even + * see it, we do not wish this socket to see incoming + * packets. + */ + dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk); + } + + return rc; +} + +static void __exit dccp_ctl_sock_exit(void) +{ + if (dccp_ctl_socket != NULL) + sock_release(dccp_ctl_socket); +} + +static int __init init_dccp_v4_mibs(void) +{ + int rc = -ENOMEM; + + dccp_statistics[0] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[0] == NULL) + goto out; + + dccp_statistics[1] = alloc_percpu(struct dccp_mib); + if (dccp_statistics[1] == NULL) + goto out_free_one; + + rc = 0; +out: + return rc; +out_free_one: + free_percpu(dccp_statistics[0]); + dccp_statistics[0] = NULL; + goto out; + +} + +static int thash_entries; +module_param(thash_entries, int, 0444); +MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); + +int dccp_debug; +module_param(dccp_debug, int, 0444); +MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); + +static int __init dccp_init(void) +{ + unsigned long goal; + int ehash_order, bhash_order, i; + int rc = proto_register(&dccp_v4_prot, 1); + + if (rc) + goto out; + + dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", + sizeof(struct inet_bind_bucket), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!dccp_hashinfo.bind_bucket_cachep) + goto out_proto_unregister; + + /* + * Size and allocate the main established and bind bucket + * hash tables. + * + * The methodology is similar to that of the buffer cache. + */ + if (num_physpages >= (128 * 1024)) + goal = num_physpages >> (21 - PAGE_SHIFT); + else + goal = num_physpages >> (23 - PAGE_SHIFT); + + if (thash_entries) + goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT; + for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) + ; + do { + dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + sizeof(struct inet_ehash_bucket); + dccp_hashinfo.ehash_size >>= 1; + while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1)) + dccp_hashinfo.ehash_size--; + dccp_hashinfo.ehash = (struct inet_ehash_bucket *) + __get_free_pages(GFP_ATOMIC, ehash_order); + } while (!dccp_hashinfo.ehash && --ehash_order > 0); + + if (!dccp_hashinfo.ehash) { + printk(KERN_CRIT "Failed to allocate DCCP " + "established hash table\n"); + goto out_free_bind_bucket_cachep; + } + + for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) { + rwlock_init(&dccp_hashinfo.ehash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); + } + + bhash_order = ehash_order; + + do { + dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE / + sizeof(struct inet_bind_hashbucket); + if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0) + continue; + dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) + __get_free_pages(GFP_ATOMIC, bhash_order); + } while (!dccp_hashinfo.bhash && --bhash_order >= 0); + + if (!dccp_hashinfo.bhash) { + printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n"); + goto out_free_dccp_ehash; + } + + for (i = 0; i < dccp_hashinfo.bhash_size; i++) { + spin_lock_init(&dccp_hashinfo.bhash[i].lock); + INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain); + } + + if (init_dccp_v4_mibs()) + goto out_free_dccp_bhash; + + rc = -EAGAIN; + if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP)) + goto out_free_dccp_v4_mibs; + + inet_register_protosw(&dccp_v4_protosw); + + rc = dccp_ctl_sock_init(); + if (rc) + goto out_unregister_protosw; +out: + return rc; +out_unregister_protosw: + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_protocol, IPPROTO_DCCP); +out_free_dccp_v4_mibs: + free_percpu(dccp_statistics[0]); + free_percpu(dccp_statistics[1]); + dccp_statistics[0] = dccp_statistics[1] = NULL; +out_free_dccp_bhash: + free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); + dccp_hashinfo.bhash = NULL; +out_free_dccp_ehash: + free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); + dccp_hashinfo.ehash = NULL; +out_free_bind_bucket_cachep: + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); + dccp_hashinfo.bind_bucket_cachep = NULL; +out_proto_unregister: + proto_unregister(&dccp_v4_prot); + goto out; +} + +static const char dccp_del_proto_err_msg[] __exitdata = + KERN_ERR "can't remove dccp net_protocol\n"; + +static void __exit dccp_fini(void) +{ + dccp_ctl_sock_exit(); + + inet_unregister_protosw(&dccp_v4_protosw); + + if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0) + printk(dccp_del_proto_err_msg); + + /* Free the control endpoint. */ + sock_release(dccp_ctl_socket); + + proto_unregister(&dccp_v4_prot); + + kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +} + +module_init(dccp_init); +module_exit(dccp_fini); + +/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly */ +MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); diff --git a/net/dccp/timer.c b/net/dccp/timer.c new file mode 100644 index 000000000000..8c396ee01aac --- /dev/null +++ b/net/dccp/timer.c @@ -0,0 +1,249 @@ +/* + * net/dccp/timer.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include + +#include "dccp.h" + +static void dccp_write_timer(unsigned long data); +static void dccp_keepalive_timer(unsigned long data); +static void dccp_delack_timer(unsigned long data); + +void dccp_init_xmit_timers(struct sock *sk) +{ + inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, + &dccp_keepalive_timer); +} + +static void dccp_write_err(struct sock *sk) +{ + sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; + sk->sk_error_report(sk); + + dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED); + dccp_done(sk); + DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT); +} + +/* A write timeout has occurred. Process the after effects. */ +static int dccp_write_timeout(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + int retry_until; + + if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) { + if (icsk->icsk_retransmits != 0) + dst_negative_advice(&sk->sk_dst_cache); + retry_until = icsk->icsk_syn_retries ? : /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */; + } else { + if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) { + /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black + hole detection. :-( + + It is place to make it. It is not made. I do not want + to make it. It is disguisting. It does not work in any + case. Let me to cite the same draft, which requires for + us to implement this: + + "The one security concern raised by this memo is that ICMP black holes + are often caused by over-zealous security administrators who block + all ICMP messages. It is vitally important that those who design and + deploy security systems understand the impact of strict filtering on + upper-layer protocols. The safest web site in the world is worthless + if most TCP implementations cannot transfer data from it. It would + be far nicer to have all of the black holes fixed rather than fixing + all of the TCP implementations." + + Golden words :-). + */ + + dst_negative_advice(&sk->sk_dst_cache); + } + + retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */; + /* + * FIXME: see tcp_write_timout and tcp_out_of_resources + */ + } + + if (icsk->icsk_retransmits >= retry_until) { + /* Has it gone just too far? */ + dccp_write_err(sk); + return 1; + } + return 0; +} + +/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ +static void dccp_delack_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + icsk->icsk_ack.blocked = 1; + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED); + sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) + goto out; + if (time_after(icsk->icsk_ack.timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + goto out; + } + + icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; + + if (inet_csk_ack_scheduled(sk)) { + if (!icsk->icsk_ack.pingpong) { + /* Delayed ACK missed: inflate ATO. */ + icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto); + } else { + /* Delayed ACK missed: leave pingpong mode and + * deflate ATO. + */ + icsk->icsk_ack.pingpong = 0; + icsk->icsk_ack.ato = TCP_ATO_MIN; + } + dccp_send_ack(sk); + NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS); + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * The DCCP retransmit timer. + */ +static void dccp_retransmit_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + /* + * sk->sk_send_head has to have one skb with + * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP + * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake + * (PARTOPEN timer), etc). + */ + BUG_TRAP(sk->sk_send_head != NULL); + + /* + * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was + * sent, no need to retransmit, this sock is dead. + */ + if (dccp_write_timeout(sk)) + goto out; + + /* + * We want to know the number of packets retransmitted, not the + * total number of retransmissions of clones of original packets. + */ + if (icsk->icsk_retransmits == 0) + DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS); + + if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) { + /* + * Retransmission failed because of local congestion, + * do not backoff. + */ + if (icsk->icsk_retransmits == 0) + icsk->icsk_retransmits = 1; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + min(icsk->icsk_rto, + TCP_RESOURCE_PROBE_INTERVAL), + TCP_RTO_MAX); + goto out; + } + + icsk->icsk_backoff++; + icsk->icsk_retransmits++; + + icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); + if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */) + __sk_dst_reset(sk); +out:; +} + +static void dccp_write_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = inet_csk(sk); + int event = 0; + + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later */ + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); + goto out; + } + + if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending) + goto out; + + if (time_after(icsk->icsk_timeout, jiffies)) { + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + goto out; + } + + event = icsk->icsk_pending; + icsk->icsk_pending = 0; + + switch (event) { + case ICSK_TIME_RETRANS: + dccp_retransmit_timer(sk); + break; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} + +/* + * Timer for listening sockets + */ +static void dccp_response_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; + + reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, + DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); +} + +static void dccp_keepalive_timer(unsigned long data) +{ + struct sock *sk = (struct sock *)data; + + /* Only process if socket is not in use. */ + bh_lock_sock(sk); + if (sock_owned_by_user(sk)) { + /* Try again later. */ + inet_csk_reset_keepalive_timer(sk, HZ / 20); + goto out; + } + + if (sk->sk_state == DCCP_LISTEN) { + dccp_response_timer(sk); + goto out; + } +out: + bh_unlock_sock(sk); + sock_put(sk); +} -- cgit v1.2.3 From 74459dc7bacda04d14626d239c8f5c4dac22560d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:51 -0700 Subject: [LIST]: Introduce list_for_each_entry_safe_continue Used in the dccp CCID3 code, that is going to be submitted RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/list.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index aab2db21b013..597094e0fdb5 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -418,6 +418,19 @@ static inline void list_splice_init(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * list_for_each_entry_safe_continue - iterate over list of given type + * continuing after existing point safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + /** * list_for_each_rcu - iterate over an rcu-protected list * @pos: the &struct list_head to use as a loop counter. -- cgit v1.2.3 From fbcd923c3e0c8ec9e4ed64f5a4e5766807b32729 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:22:10 -0700 Subject: [NETFILTER]: add correct bridging support to nfnetlink_{queue,log} This patch adds support for passing the real 'physical' device ifindex down to userspace via nfnetlink_log and nfnetlink_queue. This feature basically obsoletes net/bridge/netfilter/ebt_ulog.c, and it is likely ebt_ulog.c will die with one of the next couple of patches. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 2 ++ include/linux/netfilter/nfnetlink_queue.h | 2 ++ net/netfilter/nfnetlink_log.c | 58 +++++++++++++++++++++++++++++++ net/netfilter/nfnetlink_queue.c | 58 +++++++++++++++++++++++++++++++ 4 files changed, 120 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index 420ff4625cbf..a61836a083e7 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -40,6 +40,8 @@ enum nfulnl_attr_type { NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFULA_HWADDR, /* nfulnl_msg_packet_hw */ NFULA_PAYLOAD, /* opaque data payload */ NFULA_PREFIX, /* string prefix */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index e142b0ff7c08..2d8d2b2cfcaa 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -36,6 +36,8 @@ enum nfqnl_attr_type { NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 11584289c262..464c9fa2934b 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -33,6 +33,10 @@ #include +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + #define NFULNL_NLBUFSIZ_DEFAULT 4096 #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ @@ -412,14 +416,64 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev) { tmp_uint = htonl(indev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (pf == PF_BRIDGE) { + /* Case 1: outdev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(indev->br_port->br->dev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + sizeof(tmp_uint), &tmp_uint); + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_INDEV, + sizeof(tmp_uint), &tmp_uint); + if (skb->nf_bridge && skb->nf_bridge->physindev) { + tmp_uint = + htonl(skb->nf_bridge->physindev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (outdev) { tmp_uint = htonl(outdev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(outdev->br_port->br->dev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + sizeof(tmp_uint), &tmp_uint); + } else { + /* Case 2: indev is a bridge group, we need to look + * for physical device (when called from ipv4) */ + NFA_PUT(inst->skb, NFULA_IFINDEX_OUTDEV, + sizeof(tmp_uint), &tmp_uint); + if (skb->nf_bridge) { + tmp_uint = + htonl(skb->nf_bridge->physoutdev->ifindex); + NFA_PUT(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (skb->nfmark) { @@ -536,6 +590,10 @@ nfulnl_log_packet(unsigned int pf, + NFA_SPACE(sizeof(struct nfulnl_msg_packet_hdr)) + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NFA_SPACE(sizeof(u_int32_t)) /* ifindex */ +#endif + NFA_SPACE(sizeof(u_int32_t)) /* mark */ + NFA_SPACE(sizeof(u_int32_t)) /* uid */ + NFA_SPACE(NFULNL_PREFIXLEN) /* prefix */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 04323ee1eb8d..bf9223084b4a 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -30,6 +30,10 @@ #include +#ifdef CONFIG_BRIDGE_NETFILTER +#include "../bridge/br_private.h" +#endif + #define NFQNL_QMAX_DEFAULT 1024 #if 0 @@ -361,6 +365,10 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, size = NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hdr)) + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ +#ifdef CONFIG_BRIDGE_NETFILTER + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ + + NLMSG_SPACE(sizeof(u_int32_t)) /* ifindex */ +#endif + NLMSG_SPACE(sizeof(u_int32_t)) /* mark */ + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_hw)) + NLMSG_SPACE(sizeof(struct nfqnl_msg_packet_timestamp)); @@ -412,12 +420,62 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, if (entry->info->indev) { tmp_uint = htonl(entry->info->indev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (entry->info->pf == PF_BRIDGE) { + /* Case 1: indev is physical input device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, sizeof(tmp_uint), + &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(entry->info->indev->br_port->br->dev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + } else { + /* Case 2: indev is bridge group, we need to look for + * physical device (when called from ipv4) */ + NFA_PUT(skb, NFQA_IFINDEX_INDEV, sizeof(tmp_uint), + &tmp_uint); + if (entry->skb->nf_bridge + && entry->skb->nf_bridge->physindev) { + tmp_uint = htonl(entry->skb->nf_bridge->physindev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_PHYSINDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (entry->info->outdev) { tmp_uint = htonl(entry->info->outdev->ifindex); +#ifndef CONFIG_BRIDGE_NETFILTER NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), &tmp_uint); +#else + if (entry->info->pf == PF_BRIDGE) { + /* Case 1: outdev is physical output device, we need to + * look for bridge group (when called from + * netfilter_bridge) */ + NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, sizeof(tmp_uint), + &tmp_uint); + /* this is the bridge group "brX" */ + tmp_uint = htonl(entry->info->outdev->br_port->br->dev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + } else { + /* Case 2: outdev is bridge group, we need to look for + * physical output device (when called from ipv4) */ + NFA_PUT(skb, NFQA_IFINDEX_OUTDEV, sizeof(tmp_uint), + &tmp_uint); + if (entry->skb->nf_bridge + && entry->skb->nf_bridge->physoutdev) { + tmp_uint = htonl(entry->skb->nf_bridge->physoutdev->ifindex); + NFA_PUT(skb, NFQA_IFINDEX_PHYSOUTDEV, + sizeof(tmp_uint), &tmp_uint); + } + } +#endif } if (entry->skb->nfmark) { -- cgit v1.2.3 From bbd86b9fc469b7e91dc7444e6abb8930811d79cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:11 -0700 Subject: [NETFILTER]: add /proc/net/netfilter interface to nf_queue This patch adds a /proc/net/netfilter/nf_queue file, similar to the recently-added /proc/net/netfilter/nf_log. It indicates which queue handler is registered to which protocol family. This is useful since there are now multiple queue handlers in the treee (ip[6]_queue, nfnetlink_queue). Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 13 +++-- net/ipv4/netfilter/ip_queue.c | 9 +++- net/ipv6/netfilter/ip6_queue.c | 9 +++- net/netfilter/nf_log.c | 1 + net/netfilter/nf_queue.c | 106 ++++++++++++++++++++++++++++++++-------- net/netfilter/nfnetlink_queue.c | 12 +++-- 6 files changed, 116 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 815583af06c2..bf430fcbe364 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -225,13 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, int *len); /* Packet queuing */ -typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, - unsigned int queuenum, void *data); +struct nf_queue_handler { + int (*outfn)(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data); + void *data; + char *name; +}; extern int nf_register_queue_handler(int pf, - nf_queue_outfn_t outfn, void *data); + struct nf_queue_handler *qh); extern int nf_unregister_queue_handler(int pf); -extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); +extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 629de649f130..1c49833e00a9 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -656,6 +656,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) } #endif /* CONFIG_PROC_FS */ +static struct nf_queue_handler nfqh = { + .name = "ip_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -684,7 +689,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET, &nfqh); if (status < 0) { printk(KERN_ERR "ip_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -692,7 +697,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 56ffec3568fa..7ecb91e24a34 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -652,6 +652,11 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) return len; } +static struct nf_queue_handler nfqh = { + .name = "ip6_queue", + .outfn = &ipq_enqueue_packet, +}; + static int init_or_cleanup(int init) { @@ -679,7 +684,7 @@ init_or_cleanup(int init) register_netdevice_notifier(&ipq_dev_notifier); ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); + status = nf_register_queue_handler(PF_INET6, &nfqh); if (status < 0) { printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); goto cleanup_sysctl; @@ -687,7 +692,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(&ipq_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); synchronize_net(); ipq_flush(NF_DROP); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ec58c4d2c667..31a9d63921d6 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5586f843ed45..8a67bde8b640 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "nf_internals.h" @@ -14,17 +15,12 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static struct nf_queue_handler_t { - nf_queue_outfn_t outfn; - void *data; -} queue_handler[NPROTO]; - +static struct nf_queue_handler *queue_handler[NPROTO]; static struct nf_queue_rerouter *queue_rerouter; static DEFINE_RWLOCK(queue_handler_lock); - -int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) +int nf_register_queue_handler(int pf, struct nf_queue_handler *qh) { int ret; @@ -32,11 +28,10 @@ int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) return -EINVAL; write_lock_bh(&queue_handler_lock); - if (queue_handler[pf].outfn) + if (queue_handler[pf]) ret = -EBUSY; else { - queue_handler[pf].outfn = outfn; - queue_handler[pf].data = data; + queue_handler[pf] = qh; ret = 0; } write_unlock_bh(&queue_handler_lock); @@ -52,8 +47,7 @@ int nf_unregister_queue_handler(int pf) return -EINVAL; write_lock_bh(&queue_handler_lock); - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; + queue_handler[pf] = NULL; write_unlock_bh(&queue_handler_lock); return 0; @@ -85,16 +79,14 @@ int nf_unregister_queue_rerouter(int pf) } EXPORT_SYMBOL_GPL(nf_unregister_queue_rerouter); -void nf_unregister_queue_handlers(nf_queue_outfn_t outfn) +void nf_unregister_queue_handlers(struct nf_queue_handler *qh) { int pf; write_lock_bh(&queue_handler_lock); for (pf = 0; pf < NPROTO; pf++) { - if (queue_handler[pf].outfn == outfn) { - queue_handler[pf].outfn = NULL; - queue_handler[pf].data = NULL; - } + if (queue_handler[pf] == qh) + queue_handler[pf] = NULL; } write_unlock_bh(&queue_handler_lock); } @@ -121,7 +113,7 @@ int nf_queue(struct sk_buff **skb, /* QUEUE == DROP if noone is waiting, to be safe. */ read_lock(&queue_handler_lock); - if (!queue_handler[pf].outfn) { + if (!queue_handler[pf]->outfn) { read_unlock(&queue_handler_lock); kfree_skb(*skb); return 1; @@ -162,8 +154,8 @@ int nf_queue(struct sk_buff **skb, if (queue_rerouter[pf].save) queue_rerouter[pf].save(*skb, info); - status = queue_handler[pf].outfn(*skb, info, queuenum, - queue_handler[pf].data); + status = queue_handler[pf]->outfn(*skb, info, queuenum, + queue_handler[pf]->data); if (status >= 0 && queue_rerouter[pf].reroute) status = queue_rerouter[pf].reroute(skb, info); @@ -259,13 +251,87 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, } EXPORT_SYMBOL(nf_reinject); +#ifdef CONFIG_PROC_FS +static void *seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void *seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + + if (*pos >= NPROTO) + return NULL; + + return pos; +} + +static void seq_stop(struct seq_file *s, void *v) +{ + +} + +static int seq_show(struct seq_file *s, void *v) +{ + int ret; + loff_t *pos = v; + struct nf_queue_handler *qh; + + read_lock_bh(&queue_handler_lock); + qh = queue_handler[*pos]; + if (!qh) + ret = seq_printf(s, "%2lld NONE\n", *pos); + else + ret = seq_printf(s, "%2lld %s\n", *pos, qh->name); + read_unlock_bh(&queue_handler_lock); + + return ret; +} + +static struct seq_operations nfqueue_seq_ops = { + .start = seq_start, + .next = seq_next, + .stop = seq_stop, + .show = seq_show, +}; + +static int nfqueue_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &nfqueue_seq_ops); +} + +static struct file_operations nfqueue_file_ops = { + .owner = THIS_MODULE, + .open = nfqueue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif /* PROC_FS */ + + int __init netfilter_queue_init(void) { +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde; +#endif queue_rerouter = kmalloc(NPROTO * sizeof(struct nf_queue_rerouter), GFP_KERNEL); if (!queue_rerouter) return -ENOMEM; +#ifdef CONFIG_PROC_FS + pde = create_proc_entry("nf_queue", S_IRUGO, proc_net_netfilter); + if (!pde) { + kfree(queue_rerouter); + return -1; + } + pde->proc_fops = &nfqueue_file_ops; +#endif memset(queue_rerouter, 0, NPROTO * sizeof(struct nf_queue_rerouter)); return 0; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index bf9223084b4a..741686ff71d8 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -845,6 +845,11 @@ static const int nfqa_cfg_min[NFQA_CFG_MAX] = { [NFQA_CFG_PARAMS-1] = sizeof(struct nfqnl_msg_config_params), }; +static struct nf_queue_handler nfqh = { + .name = "nf_queue", + .outfn = &nfqnl_enqueue_packet, +}; + static int nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *nfqa[], int *errp) @@ -890,10 +895,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, case NFQNL_CFG_CMD_PF_BIND: QDEBUG("registering queue handler for pf=%u\n", ntohs(cmd->pf)); - ret = nf_register_queue_handler(ntohs(cmd->pf), - nfqnl_enqueue_packet, - NULL); - + ret = nf_register_queue_handler(ntohs(cmd->pf), &nfqh); break; case NFQNL_CFG_CMD_PF_UNBIND: QDEBUG("unregistering queue handler for pf=%u\n", @@ -1098,7 +1100,7 @@ init_or_cleanup(int init) return status; cleanup: - nf_unregister_queue_handlers(nfqnl_enqueue_packet); + nf_unregister_queue_handlers(&nfqh); unregister_netdevice_notifier(&nfqnl_dev_notifier); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", proc_net_netfilter); -- cgit v1.2.3 From 8a61fadb3908454ccfa538aaa75eb1d22def5700 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:53 -0700 Subject: [NETFILTER]: check nf_log function call arguments Check whether pf is too large in order to prevent array overflow. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- net/netfilter/nf_log.c | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf430fcbe364..ac3c61411d4b 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -157,7 +157,7 @@ struct nf_logger { /* Function to register/unregister log function. */ int nf_log_register(int pf, struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +int nf_log_unregister_pf(int pf); void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index e104760f7a67..573e76a770d9 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -24,6 +24,9 @@ int nf_log_register(int pf, struct nf_logger *logger) { int ret = -EBUSY; + if (pf >= NPROTO) + return -EINVAL; + /* Any setup of logging members must be done before * substituting pointer. */ spin_lock(&nf_log_lock); @@ -38,14 +41,19 @@ int nf_log_register(int pf, struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(int pf) +int nf_log_unregister_pf(int pf) { + if (pf >= NPROTO) + return -EINVAL; + spin_lock(&nf_log_lock); nf_logging[pf] = NULL; spin_unlock(&nf_log_lock); /* Give time to concurrent readers. */ synchronize_net(); + + return 0; } EXPORT_SYMBOL(nf_log_unregister_pf); -- cgit v1.2.3 From 5a47a470e602eecb168ddd3b78841b84ceddd319 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:03 -0700 Subject: [DCCP]: make include-able from userspace The protocol header files in are usually structured in a way to be included by userspace code. The top section consists of general protocol structure definitions, typedefs, enums - followed by an #ifdef __KERNEL__ section. Currently doesn't follow that convention and can therefore not be used from userspace. However, for example iptables' libipt_dccp.c actually needs various definitions from there. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 238 ++++++++++++++++++++++++++------------------------- 1 file changed, 121 insertions(+), 117 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index e3b4bf7346bb..add4908b8e57 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -1,16 +1,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H -#include -#include #include -#include -#include - -#include -#include -#include -#include +#include /* FIXME: this is utterly wrong */ struct sockaddr_dccp { @@ -18,40 +10,6 @@ struct sockaddr_dccp { unsigned int service; }; -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: - This mapping is horrible, but TCP has - no matching state for DCCP_PARTOPEN, - as TCP_SYN_RECV is already used by - DCCP_RESPOND, why don't stop using TCP - mapping of states? OK, now we don't use - sk_stream_sendmsg anymore, so doesn't - seem to exist any reason for us to - do the TCP mapping here */ - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_MAX_STATES = TCP_MAX_STATES, -}; - -#define DCCP_STATE_MASK 0xf -#define DCCP_ACTION_FIN (1<<7) - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_PARTOPEN = TCPF_FIN_WAIT1, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, -}; - /** * struct dccp_hdr - generic part of DCCP packet header * @@ -94,11 +52,6 @@ struct dccp_hdr { #endif }; -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb->h.raw; -} - /** * struct dccp_hdr_ext - the low bits of a 48 bit seq packet * @@ -108,34 +61,6 @@ struct dccp_hdr_ext { __u32 dccph_seq_low; }; -static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); -} - -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); -} - -static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq << 8); -#elif defined(__BIG_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq); -#else -#error "Adjust your defines" -#endif - - if (dh->dccph_x != 0) - seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); - - return seq_nr; -} - /** * struct dccp_hdr_request - Conection initiation request header * @@ -145,12 +70,6 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) struct dccp_hdr_request { __u32 dccph_req_service; }; - -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets * @@ -162,24 +81,6 @@ struct dccp_hdr_ack_bits { dccph_ack_nr_high:24; __u32 dccph_ack_nr_low; }; - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#elif defined(__BIG_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#else -#error "Adjust your defines" -#endif -} - /** * struct dccp_hdr_response - Conection initiation response header * @@ -193,11 +94,6 @@ struct dccp_hdr_response { __u32 dccph_resp_service; }; -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_reset - Unconditionally shut down a connection * @@ -210,11 +106,6 @@ struct dccp_hdr_reset { dccph_reset_data[3]; }; -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - enum dccp_pkt_type { DCCP_PKT_REQUEST = 0, DCCP_PKT_RESPONSE, @@ -248,13 +139,6 @@ static inline unsigned int dccp_packet_hdr_len(const __u8 type) return sizeof(struct dccp_hdr_response); return sizeof(struct dccp_hdr_reset); } - -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); -} - enum dccp_reset_codes { DCCP_RESET_CODE_UNSPECIFIED = 0, DCCP_RESET_CODE_CLOSED, @@ -298,6 +182,124 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#include +#include +#include +#include + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + + /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 /* FIXME: for now we're using CCID 3 (TFRC) */ @@ -429,4 +431,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } +#endif /* __KERNEL__ */ + #endif /* _LINUX_DCCP_H */ -- cgit v1.2.3 From e2e268665f6c01686b477a6b0cc5a70bab689d54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:26:28 -0700 Subject: [DCCP]: Fix struct sockaddr_dccp definition Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index add4908b8e57..fd1412ddb3ff 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -4,10 +4,14 @@ #include #include -/* FIXME: this is utterly wrong */ +/* Structure describing an Internet (DCCP) socket address. */ struct sockaddr_dccp { - struct sockaddr_in in; - unsigned int service; + __u16 sdccp_family; /* Address family */ + __u16 sdccp_port; /* Port number */ + __u32 sdccp_addr; /* Internet address */ + __u32 sdccp_service; /* Service */ + /* Pad to size of `struct sockaddr': 16 bytes . */ + __u32 sdccp_pad; }; /** -- cgit v1.2.3 From 1d3de414eb20d937d82c5219fd13ee4cedc499cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:55 -0700 Subject: [NETFILTER]: New iptables DCCP protocol header match Using this new iptables DCCP protocol header match, it is possible to create simplistic stateless packet filtering rules for DCCP. It permits matching of port numbers, packet type and options. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 16 ++- include/linux/netfilter_ipv4/ipt_dccp.h | 23 +++++ net/ipv4/netfilter/Kconfig | 11 ++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_dccp.c | 176 ++++++++++++++++++++++++++++++++ 5 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h create mode 100644 net/ipv4/netfilter/ipt_dccp.c (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index fd1412ddb3ff..431d58923ba9 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -242,10 +242,15 @@ static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); } +static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) +{ + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); + return __dccp_basic_hdr_len(dh); } static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) @@ -297,10 +302,15 @@ static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); } +static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) +{ + return __dccp_basic_hdr_len(dh) + + dccp_packet_hdr_len(dh->dccph_type); +} + static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) { - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); + return __dccp_hdr_len(dccp_hdr(skb)); } diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h new file mode 100644 index 000000000000..3cb3a522e62b --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_dccp.h @@ -0,0 +1,23 @@ +#ifndef _IPT_DCCP_H_ +#define _IPT_DCCP_H_ + +#define IPT_DCCP_SRC_PORTS 0x01 +#define IPT_DCCP_DEST_PORTS 0x02 +#define IPT_DCCP_TYPE 0x04 +#define IPT_DCCP_OPTION 0x08 + +#define IPT_DCCP_VALID_FLAGS 0x0f + +struct ipt_dccp_info { + u_int16_t dpts[2]; /* Min, Max */ + u_int16_t spts[2]; /* Min, Max */ + + u_int16_t flags; + u_int16_t invflags; + + u_int16_t typemask; + u_int8_t option; +}; + +#endif /* _IPT_DCCP_H_ */ + diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2fa26a41fa47..9f5e1d769b5f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -354,6 +354,17 @@ config IP_NF_MATCH_SCTP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config IP_NF_MATCH_DCCP + tristate 'DCCP protocol match support' + depends on IP_NF_IPTABLES + help + With this option enabled, you will be able to use the iptables + `dccp' match in order to match on DCCP source/destination ports + and DCCP flags. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config IP_NF_MATCH_COMMENT tristate 'comment match support' depends on IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c2ae663b723f..58aa7c616e1f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o +obj-$(CONFIG_IP_NF_MATCH_DCCP) += ipt_dccp.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o diff --git a/net/ipv4/netfilter/ipt_dccp.c b/net/ipv4/netfilter/ipt_dccp.c new file mode 100644 index 000000000000..ad3278bba6c1 --- /dev/null +++ b/net/ipv4/netfilter/ipt_dccp.c @@ -0,0 +1,176 @@ +/* + * iptables module for DCCP protocol header matching + * + * (C) 2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#define DCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \ + || (!!((invflag) & (option)) ^ (cond))) + +static unsigned char *dccp_optbuf; +static DEFINE_SPINLOCK(dccp_buflock); + +static inline int +dccp_find_option(u_int8_t option, + const struct sk_buff *skb, + const struct dccp_hdr *dh, + int *hotdrop) +{ + /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ + unsigned char *op; + unsigned int optoff = __dccp_hdr_len(dh); + unsigned int optlen = dh->dccph_doff*4 - __dccp_hdr_len(dh); + unsigned int i; + + if (dh->dccph_doff * 4 < __dccp_hdr_len(dh)) { + *hotdrop = 1; + return 0; + } + + if (!optlen) + return 0; + + spin_lock_bh(&dccp_buflock); + op = skb_header_pointer(skb, + skb->nh.iph->ihl*4 + optoff, + optlen, dccp_optbuf); + if (op == NULL) { + /* If we don't have the whole header, drop packet. */ + spin_unlock_bh(&dccp_buflock); + *hotdrop = 1; + return 0; + } + + for (i = 0; i < optlen; ) { + if (op[i] == option) { + spin_unlock_bh(&dccp_buflock); + return 1; + } + + if (op[i] < 2) + i++; + else + i += op[i+1]?:1; + } + + spin_unlock_bh(&dccp_buflock); + return 0; +} + + +static inline int +match_types(const struct dccp_hdr *dh, u_int16_t typemask) +{ + return (typemask & (1 << dh->dccph_type)); +} + +static inline int +match_option(u_int8_t option, const struct sk_buff *skb, + const struct dccp_hdr *dh, int *hotdrop) +{ + return dccp_find_option(option, skb, dh, hotdrop); +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_dccp_info *info = + (const struct ipt_dccp_info *)matchinfo; + struct dccp_hdr _dh, *dh; + + if (offset) + return 0; + + dh = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_dh), &_dh); + if (dh == NULL) { + *hotdrop = 1; + return 0; + } + + return DCCHECK(((ntohs(dh->dccph_sport) >= info->spts[0]) + && (ntohs(dh->dccph_sport) <= info->spts[1])), + IPT_DCCP_SRC_PORTS, info->flags, info->invflags) + && DCCHECK(((ntohs(dh->dccph_dport) >= info->dpts[0]) + && (ntohs(dh->dccph_dport) <= info->dpts[1])), + IPT_DCCP_DEST_PORTS, info->flags, info->invflags) + && DCCHECK(match_types(dh, info->typemask), + IPT_DCCP_TYPE, info->flags, info->invflags) + && DCCHECK(match_option(info->option, skb, dh, hotdrop), + IPT_DCCP_OPTION, info->flags, info->invflags); +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_dccp_info *info; + + info = (const struct ipt_dccp_info *)matchinfo; + + return ip->proto == IPPROTO_DCCP + && !(ip->invflags & IPT_INV_PROTO) + && matchsize == IPT_ALIGN(sizeof(struct ipt_dccp_info)) + && !(info->flags & ~IPT_DCCP_VALID_FLAGS) + && !(info->invflags & ~IPT_DCCP_VALID_FLAGS) + && !(info->invflags & ~info->flags); +} + +static struct ipt_match dccp_match = +{ + .name = "dccp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + int ret; + + /* doff is 8 bits, so the maximum option size is (4*256). Don't put + * this in BSS since DaveM is worried about locked TLB's for kernel + * BSS. */ + dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); + if (!dccp_optbuf) + return -ENOMEM; + ret = ipt_register_match(&dccp_match); + if (ret) + kfree(dccp_optbuf); + + return ret; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&dccp_match); + kfree(dccp_optbuf); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("Match for DCCP protocol packets"); + -- cgit v1.2.3 From 64cf1e5d8b5f88d56509260e08fa0d8314277350 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:21 -0700 Subject: [DCCP]: Finish the TIMEWAIT minisock support Using most of the infrastructure TCP uses, with a dccp_death_row, etc. As per my current interpretation of the draft what we have with this changeset seems to be all we need (or very close to it 8)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 +++ net/dccp/ipv4.c | 15 ++++++++----- net/dccp/minisocks.c | 60 +++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 63 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 431d58923ba9..3dccdd5108b5 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -194,6 +194,7 @@ enum { #include #include +#include #include #include #include @@ -354,6 +355,8 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) return (struct dccp_request_sock *)req; } +extern struct inet_timewait_death_row dccp_death_row; + /* Read about the ECN nonce to see why it is 253 */ #define DCCP_MAX_ACK_VECTOR_LEN 253 diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 6bccf4dd1e70..f6da9328221e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -97,7 +97,7 @@ static int __dccp_v4_check_established(struct sock *sk, const __u16 lport, NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); } else if (tw != NULL) { /* Silly. Should hash-dance instead... */ - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); inet_twsk_put(tw); @@ -201,7 +201,7 @@ ok: spin_unlock(&head->lock); if (tw != NULL) { - dccp_tw_deschedule(tw); + inet_twsk_deschedule(tw, &dccp_death_row); inet_twsk_put(tw); } @@ -1131,8 +1131,9 @@ int dccp_v4_rcv(struct sk_buff *skb) */ if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n"); - goto discard_and_relse; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " + "do_time_wait\n"); + goto do_time_wait; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) { @@ -1179,6 +1180,10 @@ discard_it: discard_and_relse: sock_put(sk); goto discard_it; + +do_time_wait: + inet_twsk_put((struct inet_timewait_sock *)sk); + goto no_dccp_socket; } static int dccp_v4_init_sock(struct sock *sk) @@ -1290,5 +1295,5 @@ struct proto dccp_v4_prot = { .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), .rsk_prot = &dccp_request_sock_ops, - .twsk_obj_size = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */ + .twsk_obj_size = sizeof(struct inet_timewait_sock), }; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index e498e389fccc..a6a0b270fb6c 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -22,18 +22,58 @@ #include "ccid.h" #include "dccp.h" +struct inet_timewait_death_row dccp_death_row = { + .sysctl_max_tw_buckets = NR_FILE * 2, + .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, + .death_lock = SPIN_LOCK_UNLOCKED, + .hashinfo = &dccp_hashinfo, + .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, + (unsigned long)&dccp_death_row), + .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, + inet_twdr_twkill_work, + &dccp_death_row), +/* Short-time timewait calendar */ + + .twcal_hand = -1, + .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, + (unsigned long)&dccp_death_row), +}; + void dccp_time_wait(struct sock *sk, int state, int timeo) { - /* FIXME: Implement */ - dccp_pr_debug("Want to help? Start here\n"); - dccp_set_state(sk, state); -} + struct inet_timewait_sock *tw = NULL; -/* This is for handling early-kills of TIME_WAIT sockets. */ -void dccp_tw_deschedule(struct inet_timewait_sock *tw) -{ - dccp_pr_debug("Want to help? Start here\n"); - __inet_twsk_kill(tw, &dccp_hashinfo); + if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) + tw = inet_twsk_alloc(sk, state); + + if (tw != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); + const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1); + + /* Linkage updates. */ + __inet_twsk_hashdance(tw, sk, &dccp_hashinfo); + + /* Get the TIME_WAIT timeout firing. */ + if (timeo < rto) + timeo = rto; + + tw->tw_timeout = DCCP_TIMEWAIT_LEN; + if (state == DCCP_TIME_WAIT) + timeo = DCCP_TIMEWAIT_LEN; + + inet_twsk_schedule(tw, &dccp_death_row, timeo, + DCCP_TIMEWAIT_LEN); + inet_twsk_put(tw); + } else { + /* Sorry, if we're out of memory, just CLOSE this + * socket up. We've got bigger problems than + * non-graceful socket closings. + */ + if (net_ratelimit()) + printk(KERN_INFO "DCCP: time wait bucket table overflow\n"); + } + + dccp_done(sk); } struct sock *dccp_create_openreq_child(struct sock *sk, @@ -55,7 +95,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; - newicsk->icsk_rto = TCP_TIMEOUT_INIT; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, -- cgit v1.2.3 From 6687e988d9aeaccad6774e6a8304f681f3ec0a03 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 04:03:31 -0300 Subject: [ICSK]: Move TCP congestion avoidance members to icsk This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(), minimal renaming/moving done in this changeset to ease review. Most of it is just changes of struct tcp_sock * to struct sock * parameters. With this we move to a state closer to two interesting goals: 1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used for any INET transport protocol that has struct inet_hashinfo and are derived from struct inet_connection_sock. Keeps the userspace API, that will just not display DCCP sockets, while newer versions of tools can support DCCP. 2. INET generic transport pluggable Congestion Avoidance infrastructure, using the current TCP CA infrastructure with DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 21 +--- include/net/inet_connection_sock.h | 15 +++ include/net/tcp.h | 74 ++++++------ net/ipv4/inet_connection_sock.c | 3 +- net/ipv4/tcp.c | 12 +- net/ipv4/tcp_bic.c | 46 ++++---- net/ipv4/tcp_cong.c | 44 +++++--- net/ipv4/tcp_diag.c | 16 +-- net/ipv4/tcp_highspeed.c | 17 +-- net/ipv4/tcp_htcp.c | 53 +++++---- net/ipv4/tcp_hybla.c | 31 +++--- net/ipv4/tcp_input.c | 223 +++++++++++++++++++++---------------- net/ipv4/tcp_ipv4.c | 9 +- net/ipv4/tcp_minisocks.c | 5 +- net/ipv4/tcp_output.c | 36 +++--- net/ipv4/tcp_scalable.c | 6 +- net/ipv4/tcp_timer.c | 26 +++-- net/ipv4/tcp_vegas.c | 44 ++++---- net/ipv4/tcp_westwood.c | 58 +++++----- net/ipv6/tcp_ipv6.c | 7 +- 20 files changed, 412 insertions(+), 334 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 620096840744..ac4ca44c75ca 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,19 +258,15 @@ struct tcp_sock { __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ - __u8 ca_state; /* State of fast-retransmit machine */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 nonagle; /* Disable Nagle algorithm? */ - /* ONE BYTE HOLE, TRY TO PACK */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ @@ -311,8 +307,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 probes_out; /* unanswered 0 window probes */ - __u8 ecn_flags; /* ECN status bits. */ + __u16 advmss; /* Advertised MSS */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ __u32 sacked_out; /* SACK'd packets */ @@ -327,7 +322,7 @@ struct tcp_sock { __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ __u8 urg_mode; /* In urgent mode */ - /* ONE BYTE HOLE, TRY TO PACK! */ + __u8 ecn_flags; /* ECN status bits. */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ @@ -351,11 +346,6 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; - - /* Pluggable TCP congestion control hook */ - struct tcp_congestion_ops *ca_ops; - u32 ca_priv[16]; -#define TCP_CA_PRIV_SIZE (16*sizeof(u32)) }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -static inline void *tcp_ca(const struct tcp_sock *tp) -{ - return (void *) tp->ca_priv; -} - #endif #endif /* _LINUX_TCP_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index bec19d5cff26..4d7e708c07d1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -27,6 +27,7 @@ struct inet_bind_bucket; struct inet_hashinfo; +struct tcp_congestion_ops; /** inet_connection_sock - INET connection oriented sock * @@ -35,10 +36,13 @@ struct inet_hashinfo; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -50,10 +54,14 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -65,6 +73,8 @@ struct inet_connection_sock { __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority); diff --git a/include/net/tcp.h b/include/net/tcp.h index d489ac548e4b..0b3f7294c5c7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,29 +669,29 @@ struct tcp_congestion_ops { struct list_head list; /* initialize private data (optional) */ - void (*init)(struct tcp_sock *tp); + void (*init)(struct sock *sk); /* cleanup private data (optional) */ - void (*release)(struct tcp_sock *tp); + void (*release)(struct sock *sk); /* return slow start threshold (required) */ - u32 (*ssthresh)(struct tcp_sock *tp); + u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct tcp_sock *tp); + u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ - void (*set_state)(struct tcp_sock *tp, u8 new_state); + void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ - u32 (*undo_cwnd)(struct tcp_sock *tp); + u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for tcp_diag (optional) */ - void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; @@ -700,30 +700,34 @@ struct tcp_congestion_ops { extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_init_congestion_control(struct tcp_sock *tp); -extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); -extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); -extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { - if (tp->ca_ops->set_state) - tp->ca_ops->set_state(tp, ca_state); - tp->ca_state = ca_state; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; } -static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { - if (tp->ca_ops->cwnd_event) - tp->ca_ops->cwnd_event(tp, event); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); } /* This determines how many packets are "in the network" to the best @@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) } /* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) +static inline void __tcp_enter_cwr(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + tp->undo_marker = 0; - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 026630a15ea0..fe3c6d3d0c91 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,7 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; - newicsk->icsk_backoff = 0; + newicsk->icsk_backoff = 0; + newicsk->icsk_probes_out = 0; /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0eed64a1991d..02848e72e9c1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1671,11 +1671,11 @@ int tcp_disconnect(struct sock *sk, int flags) tp->write_seq = 1; icsk->icsk_backoff = 0; tp->snd_cwnd = 2; - tp->probes_out = 0; + icsk->icsk_probes_out = 0; tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); sk->sk_send_head = NULL; @@ -1718,7 +1718,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(tp, name); + err = tcp_set_congestion_control(sk, name); release_sock(sk); return err; } @@ -1886,9 +1886,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) memset(info, 0, sizeof(*info)); info->tcpi_state = sk->sk_state; - info->tcpi_ca_state = tp->ca_state; + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; - info->tcpi_probes = tp->probes_out; + info->tcpi_probes = icsk->icsk_probes_out; info->tcpi_backoff = icsk->icsk_backoff; if (tp->rx_opt.tstamp_ok) @@ -2016,7 +2016,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, len = min_t(unsigned int, len, TCP_CA_NAME_MAX); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, tp->ca_ops->name, len)) + if (copy_to_user(optval, icsk->icsk_ca_ops->name, len)) return -EFAULT; return 0; default: diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index ec38d45d6649..b940346de4e7 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -86,11 +86,11 @@ static inline void bictcp_reset(struct bictcp *ca) ca->delayed_ack = 2 << ACK_RATIO_SHIFT; } -static void bictcp_init(struct tcp_sock *tp) +static void bictcp_init(struct sock *sk) { - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); if (initial_ssthresh) - tp->snd_ssthresh = initial_ssthresh; + tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } /* @@ -156,9 +156,10 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) /* Detect low utilization in congestion avoidance */ -static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) +static inline void bictcp_low_utilization(struct sock *sk, int flag) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); u32 dist, delay; /* No time stamp */ @@ -208,12 +209,13 @@ static inline void bictcp_low_utilization(struct tcp_sock *tp, int flag) } -static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, +static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int data_acked) { - struct bictcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); - bictcp_low_utilization(tp, data_acked); + bictcp_low_utilization(sk, data_acked); if (in_flight < tp->snd_cwnd) return; @@ -242,9 +244,10 @@ static void bictcp_cong_avoid(struct tcp_sock *tp, u32 ack, * behave like Reno until low_window is reached, * then increase congestion window slowly */ -static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 bictcp_recalc_ssthresh(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct bictcp *ca = inet_csk_ca(sk); ca->epoch_start = 0; /* end of epoch */ @@ -269,31 +272,34 @@ static u32 bictcp_recalc_ssthresh(struct tcp_sock *tp) return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct tcp_sock *tp) +static u32 bictcp_undo_cwnd(struct sock *sk) { - struct bictcp *ca = tcp_ca(tp); - + const struct tcp_sock *tp = tcp_sk(sk); + const struct bictcp *ca = inet_csk_ca(sk); return max(tp->snd_cwnd, ca->last_max_cwnd); } -static u32 bictcp_min_cwnd(struct tcp_sock *tp) +static u32 bictcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void bictcp_state(struct tcp_sock *tp, u8 new_state) +static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) - bictcp_reset(tcp_ca(tp)); + bictcp_reset(inet_csk_ca(sk)); } /* Track delayed acknowledgement ratio using sliding window * ratio = (15*ratio + sample) / 16 */ -static void bictcp_acked(struct tcp_sock *tp, u32 cnt) +static void bictcp_acked(struct sock *sk, u32 cnt) { - if (cnt > 0 && tp->ca_state == TCP_CA_Open) { - struct bictcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) { + struct bictcp *ca = inet_csk_ca(sk); cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; ca->delayed_ack += cnt; } @@ -314,7 +320,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUG_ON(sizeof(struct bictcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4970d10a7785..bbf2d6624e89 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -73,33 +73,36 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); /* Assign choice of congestion control. */ -void tcp_init_congestion_control(struct tcp_sock *tp) +void tcp_init_congestion_control(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; - if (tp->ca_ops != &tcp_init_congestion_ops) + if (icsk->icsk_ca_ops != &tcp_init_congestion_ops) return; rcu_read_lock(); list_for_each_entry_rcu(ca, &tcp_cong_list, list) { if (try_module_get(ca->owner)) { - tp->ca_ops = ca; + icsk->icsk_ca_ops = ca; break; } } rcu_read_unlock(); - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } /* Manage refcounts on socket close. */ -void tcp_cleanup_congestion_control(struct tcp_sock *tp) +void tcp_cleanup_congestion_control(struct sock *sk) { - if (tp->ca_ops->release) - tp->ca_ops->release(tp); - module_put(tp->ca_ops->owner); + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->release) + icsk->icsk_ca_ops->release(sk); + module_put(icsk->icsk_ca_ops->owner); } /* Used by sysctl to change default congestion control */ @@ -143,14 +146,15 @@ void tcp_get_default_congestion_control(char *name) } /* Change congestion control for socket */ -int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) +int tcp_set_congestion_control(struct sock *sk, const char *name) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_congestion_ops *ca; int err = 0; rcu_read_lock(); ca = tcp_ca_find(name); - if (ca == tp->ca_ops) + if (ca == icsk->icsk_ca_ops) goto out; if (!ca) @@ -160,10 +164,10 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) err = -EBUSY; else { - tcp_cleanup_congestion_control(tp); - tp->ca_ops = ca; - if (tp->ca_ops->init) - tp->ca_ops->init(tp); + tcp_cleanup_congestion_control(sk); + icsk->icsk_ca_ops = ca; + if (icsk->icsk_ca_ops->init) + icsk->icsk_ca_ops->init(sk); } out: rcu_read_unlock(); @@ -177,9 +181,11 @@ int tcp_set_congestion_control(struct tcp_sock *tp, const char *name) /* This is Jacobson's slow start and congestion avoidance. * SIGCOMM '88, p. 328. */ -void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, +void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); + if (in_flight < tp->snd_cwnd) return; @@ -202,15 +208,17 @@ void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, u32 in_flight, EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); /* Slow start threshold is half the congestion window (min 2) */ -u32 tcp_reno_ssthresh(struct tcp_sock *tp) +u32 tcp_reno_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd >> 1U, 2U); } EXPORT_SYMBOL_GPL(tcp_reno_ssthresh); /* Lower bound on congestion window. */ -u32 tcp_reno_min_cwnd(struct tcp_sock *tp) +u32 tcp_reno_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh/2; } EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5f4c74f45e82..4288ecfec9a7 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -66,10 +66,10 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (ext & (1<<(TCPDIAG_CONG-1))) { - size_t len = strlen(tp->ca_ops->name); + if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), - tp->ca_ops->name); + icsk->icsk_ca_ops->name); } } r->tcpdiag_family = sk->sk_family; @@ -136,18 +136,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { r->tcpdiag_timer = 4; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { r->tcpdiag_timer = 2; - r->tcpdiag_retrans = tp->probes_out; + r->tcpdiag_retrans = icsk->icsk_probes_out; r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { r->tcpdiag_timer = 0; r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; r->tcpdiag_uid = sock_i_uid(sk); @@ -163,8 +162,9 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (info) tcp_get_info(sk, info); - if (sk->sk_state < TCP_TIME_WAIT && tp->ca_ops->get_info) - tp->ca_ops->get_info(tp, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); nlh->nlmsg_len = skb->tail - b; return skb->len; diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 36c51f8136bf..6acc04bde080 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -98,9 +98,10 @@ struct hstcp { u32 ai; }; -static void hstcp_init(struct tcp_sock *tp) +static void hstcp_init(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); ca->ai = 0; @@ -109,10 +110,11 @@ static void hstcp_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = min_t(u32, tp->snd_cwnd_clamp, 0xffffffff/128); } -static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, +static void hstcp_cong_avoid(struct sock *sk, u32 adk, u32 rtt, u32 in_flight, int good) { - struct hstcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hstcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -143,9 +145,10 @@ static void hstcp_cong_avoid(struct tcp_sock *tp, u32 adk, u32 rtt, } } -static u32 hstcp_ssthresh(struct tcp_sock *tp) +static u32 hstcp_ssthresh(struct sock *sk) { - struct hstcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct hstcp *ca = inet_csk_ca(sk); /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); @@ -164,7 +167,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUG_ON(sizeof(struct hstcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 40168275acf9..e47b37984e95 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -55,18 +55,21 @@ static inline void htcp_reset(struct htcp *ca) ca->snd_cwnd_cnt2 = 0; } -static u32 htcp_cwnd_undo(struct tcp_sock *tp) +static u32 htcp_cwnd_undo(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); ca->ccount = ca->undo_ccount; ca->maxRTT = ca->undo_maxRTT; ca->old_maxB = ca->undo_old_maxB; return max(tp->snd_cwnd, (tp->snd_ssthresh<<7)/ca->beta); } -static inline void measure_rtt(struct tcp_sock *tp) +static inline void measure_rtt(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 srtt = tp->srtt>>3; /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -74,7 +77,7 @@ static inline void measure_rtt(struct tcp_sock *tp) ca->minRTT = srtt; /* max RTT */ - if (tp->ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { + if (icsk->icsk_ca_state == TCP_CA_Open && tp->snd_ssthresh < 0xFFFF && ca->ccount > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt && srtt <= ca->maxRTT+HZ/50) @@ -82,13 +85,16 @@ static inline void measure_rtt(struct tcp_sock *tp) } } -static void measure_achieved_throughput(struct tcp_sock *tp, u32 pkts_acked) +static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked) { - struct htcp *ca = tcp_ca(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); u32 now = tcp_time_stamp; /* achieved throughput calculations */ - if (tp->ca_state != TCP_CA_Open && tp->ca_state != TCP_CA_Disorder) { + if (icsk->icsk_ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Disorder) { ca->packetcount = 0; ca->lasttime = now; return; @@ -173,9 +179,9 @@ static inline void htcp_alpha_update(struct htcp *ca) * that point do we really have a real sense of maxRTT (the queues en route * were getting just too full now). */ -static void htcp_param_update(struct tcp_sock *tp) +static void htcp_param_update(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); u32 minRTT = ca->minRTT; u32 maxRTT = ca->maxRTT; @@ -187,17 +193,19 @@ static void htcp_param_update(struct tcp_sock *tp) ca->maxRTT = minRTT + ((maxRTT-minRTT)*95)/100; } -static u32 htcp_recalc_ssthresh(struct tcp_sock *tp) +static u32 htcp_recalc_ssthresh(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); - htcp_param_update(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct htcp *ca = inet_csk_ca(sk); + htcp_param_update(sk); return max((tp->snd_cwnd * ca->beta) >> 7, 2U); } -static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int data_acked) { - struct htcp *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct htcp *ca = inet_csk_ca(sk); if (in_flight < tp->snd_cwnd) return; @@ -207,7 +215,7 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, if (tp->snd_cwnd < tp->snd_cwnd_clamp) tp->snd_cwnd++; } else { - measure_rtt(tp); + measure_rtt(sk); /* keep track of number of round-trip times since last backoff event */ if (ca->snd_cwnd_cnt2++ > tp->snd_cwnd) { @@ -229,28 +237,29 @@ static void htcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, } /* Lower bound on congestion window. */ -static u32 htcp_min_cwnd(struct tcp_sock *tp) +static u32 htcp_min_cwnd(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return tp->snd_ssthresh; } -static void htcp_init(struct tcp_sock *tp) +static void htcp_init(struct sock *sk) { - struct htcp *ca = tcp_ca(tp); + struct htcp *ca = inet_csk_ca(sk); memset(ca, 0, sizeof(struct htcp)); ca->alpha = ALPHA_BASE; ca->beta = BETA_MIN; } -static void htcp_state(struct tcp_sock *tp, u8 new_state) +static void htcp_state(struct sock *sk, u8 new_state) { switch (new_state) { case TCP_CA_CWR: case TCP_CA_Recovery: case TCP_CA_Loss: - htcp_reset(tcp_ca(tp)); + htcp_reset(inet_csk_ca(sk)); break; } } @@ -269,7 +278,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUG_ON(sizeof(struct htcp) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); if (!use_bandwidth_switch) htcp.pkts_acked = NULL; diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index 13a66342c304..77add63623df 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -33,19 +33,20 @@ MODULE_PARM_DESC(rtt0, "reference rout trip time (ms)"); /* This is called to refresh values for hybla parameters */ -static inline void hybla_recalc_param (struct tcp_sock *tp) +static inline void hybla_recalc_param (struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct hybla *ca = inet_csk_ca(sk); - ca->rho_3ls = max_t(u32, tp->srtt / msecs_to_jiffies(rtt0), 8); + ca->rho_3ls = max_t(u32, tcp_sk(sk)->srtt / msecs_to_jiffies(rtt0), 8); ca->rho = ca->rho_3ls >> 3; ca->rho2_7ls = (ca->rho_3ls * ca->rho_3ls) << 1; ca->rho2 = ca->rho2_7ls >>7; } -static void hybla_init(struct tcp_sock *tp) +static void hybla_init(struct sock *sk) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); ca->rho = 0; ca->rho2 = 0; @@ -57,17 +58,16 @@ static void hybla_init(struct tcp_sock *tp) tp->snd_cwnd_clamp = 65535; /* 1st Rho measurement based on initial srtt */ - hybla_recalc_param(tp); + hybla_recalc_param(sk); /* set minimum rtt as this is the 1st ever seen */ ca->minrtt = tp->srtt; tp->snd_cwnd = ca->rho; } -static void hybla_state(struct tcp_sock *tp, u8 ca_state) +static void hybla_state(struct sock *sk, u8 ca_state) { - struct hybla *ca = tcp_ca(tp); - + struct hybla *ca = inet_csk_ca(sk); ca->hybla_en = (ca_state == TCP_CA_Open); } @@ -86,27 +86,28 @@ static inline u32 hybla_fraction(u32 odds) * o Give cwnd a new value based on the model proposed * o remember increments <1 */ -static void hybla_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { - struct hybla *ca = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct hybla *ca = inet_csk_ca(sk); u32 increment, odd, rho_fractions; int is_slowstart = 0; /* Recalculate rho only if this srtt is the lowest */ if (tp->srtt < ca->minrtt){ - hybla_recalc_param(tp); + hybla_recalc_param(sk); ca->minrtt = tp->srtt; } if (!ca->hybla_en) - return tcp_reno_cong_avoid(tp, ack, rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag); if (in_flight < tp->snd_cwnd) return; if (ca->rho == 0) - hybla_recalc_param(tp); + hybla_recalc_param(sk); rho_fractions = ca->rho_3ls - (ca->rho << 3); @@ -170,7 +171,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUG_ON(sizeof(struct hybla) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 71d456148de7..fdd9547fb783 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -325,11 +325,12 @@ static void tcp_init_buffer_space(struct sock *sk) /* 5. Recalculate window clamp after socket hit its memory bounds. */ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) { + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_buff *skb; unsigned int app_win = tp->rcv_nxt - tp->copied_seq; int ofo_win = 0; - inet_csk(sk)->icsk_ack.quick = 0; + icsk->icsk_ack.quick = 0; skb_queue_walk(&tp->out_of_order_queue, skb) { ofo_win += skb->len; @@ -350,8 +351,8 @@ static void tcp_clamp_window(struct sock *sk, struct tcp_sock *tp) app_win += ofo_win; if (atomic_read(&sk->sk_rmem_alloc) >= 2 * sk->sk_rcvbuf) app_win >>= 1; - if (app_win > inet_csk(sk)->icsk_ack.rcv_mss) - app_win -= inet_csk(sk)->icsk_ack.rcv_mss; + if (app_win > icsk->icsk_ack.rcv_mss) + app_win -= icsk->icsk_ack.rcv_mss; app_win = max(app_win, 2U*tp->advmss); if (!ofo_win) @@ -549,8 +550,10 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_sock *tp, struct sk_ * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ -static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) +static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt, u32 *usrtt) { + struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); long m = mrtt; /* RTT */ /* The following amusing code comes from Jacobson's @@ -610,8 +613,8 @@ static void tcp_rtt_estimator(struct tcp_sock *tp, __u32 mrtt, u32 *usrtt) tp->rtt_seq = tp->snd_nxt; } - if (tp->ca_ops->rtt_sample) - tp->ca_ops->rtt_sample(tp, *usrtt); + if (icsk->icsk_ca_ops->rtt_sample) + icsk->icsk_ca_ops->rtt_sample(sk, *usrtt); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -663,9 +666,10 @@ void tcp_update_metrics(struct sock *sk) dst_confirm(dst); if (dst && (dst->flags&DST_HOST)) { + const struct inet_connection_sock *icsk = inet_csk(sk); int m; - if (inet_csk(sk)->icsk_backoff || !tp->srtt) { + if (icsk->icsk_backoff || !tp->srtt) { /* This session failed to estimate rtt. Why? * Probably, no packets returned in time. * Reset our results. @@ -714,7 +718,7 @@ void tcp_update_metrics(struct sock *sk) tp->snd_cwnd > dst_metric(dst, RTAX_CWND)) dst->metrics[RTAX_CWND-1] = tp->snd_cwnd; } else if (tp->snd_cwnd > tp->snd_ssthresh && - tp->ca_state == TCP_CA_Open) { + icsk->icsk_ca_state == TCP_CA_Open) { /* Cong. avoidance phase, cwnd is reliable. */ if (!dst_metric_locked(dst, RTAX_SSTHRESH)) dst->metrics[RTAX_SSTHRESH-1] = @@ -828,8 +832,10 @@ reset: } } -static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) +static void tcp_update_reordering(struct sock *sk, const int metric, + const int ts) { + struct tcp_sock *tp = tcp_sk(sk); if (metric > tp->reordering) { tp->reordering = min(TCP_MAX_REORDERING, metric); @@ -844,7 +850,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) NET_INC_STATS_BH(LINUX_MIB_TCPSACKREORDER); #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", - tp->rx_opt.sack_ok, tp->ca_state, + tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, tp->fackets_out, tp->sacked_out, @@ -906,6 +912,7 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) static int tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); @@ -1071,7 +1078,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ * we have to account for reordering! Ugly, * but should help. */ - if (lost_retrans && tp->ca_state == TCP_CA_Recovery) { + if (lost_retrans && icsk->icsk_ca_state == TCP_CA_Recovery) { struct sk_buff *skb; sk_stream_for_retrans_queue(skb, sk) { @@ -1100,8 +1107,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->left_out = tp->sacked_out + tp->lost_out; - if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); + if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss) + tcp_update_reordering(sk, ((tp->fackets_out + 1) - reord), 0); #if FASTRETRANS_DEBUG > 0 BUG_TRAP((int)tp->sacked_out >= 0); @@ -1118,17 +1125,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ void tcp_enter_frto(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; tp->frto_counter = 1; - if (tp->ca_state <= TCP_CA_Disorder || + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_FRTO); + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_FRTO); } /* Have to clear retransmission markers here to keep the bookkeeping @@ -1145,7 +1153,7 @@ void tcp_enter_frto(struct sock *sk) } tcp_sync_left_out(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); tp->frto_highmark = tp->snd_nxt; } @@ -1191,7 +1199,7 @@ static void tcp_enter_frto_loss(struct sock *sk) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->frto_highmark; TCP_ECN_queue_cwr(tp); } @@ -1215,16 +1223,17 @@ void tcp_clear_retrans(struct tcp_sock *tp) */ void tcp_enter_loss(struct sock *sk, int how) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int cnt = 0; /* Reduce ssthresh if it has not yet been made inside this window. */ - if (tp->ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || - (tp->ca_state == TCP_CA_Loss && !inet_csk(sk)->icsk_retransmits)) { - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); - tcp_ca_event(tp, CA_EVENT_LOSS); + if (icsk->icsk_ca_state <= TCP_CA_Disorder || tp->snd_una == tp->high_seq || + (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); + tcp_ca_event(sk, CA_EVENT_LOSS); } tp->snd_cwnd = 1; tp->snd_cwnd_cnt = 0; @@ -1255,7 +1264,7 @@ void tcp_enter_loss(struct sock *sk, int how) tp->reordering = min_t(unsigned int, tp->reordering, sysctl_tcp_reordering); - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); } @@ -1272,13 +1281,14 @@ static int tcp_check_sack_reneging(struct sock *sk) */ if ((skb = skb_peek(&sk->sk_write_queue)) != NULL && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { + struct inet_connection_sock *icsk = inet_csk(sk); NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING); tcp_enter_loss(sk, 1); - inet_csk(sk)->icsk_retransmits++; + icsk->icsk_retransmits++; tcp_retransmit_skb(sk, skb_peek(&sk->sk_write_queue)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, TCP_RTO_MAX); + icsk->icsk_rto, TCP_RTO_MAX); return 1; } return 0; @@ -1431,8 +1441,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) * in assumption of absent reordering, interpret this as reordering. * The only another reason could be bug in receiver TCP. */ -static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) +static void tcp_check_reno_reordering(struct sock *sk, const int addend) { + struct tcp_sock *tp = tcp_sk(sk); u32 holes; holes = max(tp->lost_out, 1U); @@ -1440,16 +1451,17 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) if ((tp->sacked_out + holes) > tp->packets_out) { tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + tcp_update_reordering(sk, tp->packets_out + addend, 0); } } /* Emulate SACKs for SACKless connection: account for a new dupack. */ -static void tcp_add_reno_sack(struct tcp_sock *tp) +static void tcp_add_reno_sack(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->sacked_out++; - tcp_check_reno_reordering(tp, 0); + tcp_check_reno_reordering(sk, 0); tcp_sync_left_out(tp); } @@ -1464,7 +1476,7 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke else tp->sacked_out -= acked-1; } - tcp_check_reno_reordering(tp, acked); + tcp_check_reno_reordering(sk, acked); tcp_sync_left_out(tp); } @@ -1538,14 +1550,16 @@ static inline void tcp_moderate_cwnd(struct tcp_sock *tp) } /* Decrease cwnd each second ack. */ -static void tcp_cwnd_down(struct tcp_sock *tp) +static void tcp_cwnd_down(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); int decr = tp->snd_cwnd_cnt + 1; tp->snd_cwnd_cnt = decr&1; decr >>= 1; - if (decr && tp->snd_cwnd > tp->ca_ops->min_cwnd(tp)) + if (decr && tp->snd_cwnd > icsk->icsk_ca_ops->min_cwnd(sk)) tp->snd_cwnd -= decr; tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1); @@ -1579,11 +1593,15 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) #define DBGUNDO(x...) do { } while (0) #endif -static void tcp_undo_cwr(struct tcp_sock *tp, int undo) +static void tcp_undo_cwr(struct sock *sk, const int undo) { + struct tcp_sock *tp = tcp_sk(sk); + if (tp->prior_ssthresh) { - if (tp->ca_ops->undo_cwnd) - tp->snd_cwnd = tp->ca_ops->undo_cwnd(tp); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->undo_cwnd) + tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk); else tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1); @@ -1611,9 +1629,9 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) /* Happy end! We did not retransmit anything * or our original transmission succeeded. */ - DBGUNDO(sk, tp, tp->ca_state == TCP_CA_Loss ? "loss" : "retrans"); - tcp_undo_cwr(tp, 1); - if (tp->ca_state == TCP_CA_Loss) + DBGUNDO(sk, tp, inet_csk(sk)->icsk_ca_state == TCP_CA_Loss ? "loss" : "retrans"); + tcp_undo_cwr(sk, 1); + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); else NET_INC_STATS_BH(LINUX_MIB_TCPFULLUNDO); @@ -1626,7 +1644,7 @@ static int tcp_try_undo_recovery(struct sock *sk, struct tcp_sock *tp) tcp_moderate_cwnd(tp); return 1; } - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 0; } @@ -1635,7 +1653,7 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_sock *tp) { if (tp->undo_marker && !tp->undo_retrans) { DBGUNDO(sk, tp, "D-SACK"); - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); tp->undo_marker = 0; NET_INC_STATS_BH(LINUX_MIB_TCPDSACKUNDO); } @@ -1656,10 +1674,10 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, if (tp->retrans_out == 0) tp->retrans_stamp = 0; - tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); + tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); DBGUNDO(sk, tp, "Hoe"); - tcp_undo_cwr(tp, 0); + tcp_undo_cwr(sk, 0); NET_INC_STATS_BH(LINUX_MIB_TCPPARTIALUNDO); /* So... Do not make Hoe's retransmit yet. @@ -1682,22 +1700,23 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) DBGUNDO(sk, tp, "partial loss"); tp->lost_out = 0; tp->left_out = tp->sacked_out; - tcp_undo_cwr(tp, 1); + tcp_undo_cwr(sk, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); inet_csk(sk)->icsk_retransmits = 0; tp->undo_marker = 0; if (!IsReno(tp)) - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); return 1; } return 0; } -static inline void tcp_complete_cwr(struct tcp_sock *tp) +static inline void tcp_complete_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); tp->snd_cwnd_stamp = tcp_time_stamp; - tcp_ca_event(tp, CA_EVENT_COMPLETE_CWR); + tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) @@ -1708,21 +1727,21 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) tp->retrans_stamp = 0; if (flag&FLAG_ECE) - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); - if (tp->ca_state != TCP_CA_CWR) { + if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; if (tp->left_out || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; - if (tp->ca_state != state) { - tcp_set_ca_state(tp, state); + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); tp->high_seq = tp->snd_nxt; } tcp_moderate_cwnd(tp); } else { - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); } } @@ -1741,6 +1760,7 @@ static void tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, int prior_packets, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int is_dupack = (tp->snd_una == prior_snd_una && !(flag&FLAG_NOT_DUP)); @@ -1764,7 +1784,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && - tp->ca_state != TCP_CA_Open && + icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); @@ -1775,14 +1795,14 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* E. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ - if (tp->ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Loss: - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (tcp_try_undo_recovery(sk, tp)) return; break; @@ -1791,8 +1811,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* CWR is to be held something *above* high_seq * is ACKed for CWR bit to reach receiver. */ if (tp->snd_una != tp->high_seq) { - tcp_complete_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_complete_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1803,7 +1823,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * catching for all duplicate ACKs. */ IsReno(tp) || tp->snd_una != tp->high_seq) { tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Open); + tcp_set_ca_state(sk, TCP_CA_Open); } break; @@ -1812,17 +1832,17 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tcp_reset_reno_sack(tp); if (tcp_try_undo_recovery(sk, tp)) return; - tcp_complete_cwr(tp); + tcp_complete_cwr(sk); break; } } /* F. Process state. */ - switch (tp->ca_state) { + switch (icsk->icsk_ca_state) { case TCP_CA_Recovery: if (prior_snd_una == tp->snd_una) { if (IsReno(tp) && is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } else { int acked = prior_packets - tp->packets_out; if (IsReno(tp)) @@ -1832,13 +1852,13 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, break; case TCP_CA_Loss: if (flag&FLAG_DATA_ACKED) - inet_csk(sk)->icsk_retransmits = 0; + icsk->icsk_retransmits = 0; if (!tcp_try_undo_loss(sk, tp)) { tcp_moderate_cwnd(tp); tcp_xmit_retransmit_queue(sk); return; } - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return; /* Loss is undone; fall through to processing in Open state. */ default: @@ -1846,10 +1866,10 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (tp->snd_una != prior_snd_una) tcp_reset_reno_sack(tp); if (is_dupack) - tcp_add_reno_sack(tp); + tcp_add_reno_sack(sk); } - if (tp->ca_state == TCP_CA_Disorder) + if (icsk->icsk_ca_state == TCP_CA_Disorder) tcp_try_undo_dsack(sk, tp); if (!tcp_time_to_recover(sk, tp)) { @@ -1869,20 +1889,20 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->undo_marker = tp->snd_una; tp->undo_retrans = tp->retrans_out; - if (tp->ca_state < TCP_CA_CWR) { + if (icsk->icsk_ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) - tp->prior_ssthresh = tcp_current_ssthresh(tp); - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); TCP_ECN_queue_cwr(tp); } tp->snd_cwnd_cnt = 0; - tcp_set_ca_state(tp, TCP_CA_Recovery); + tcp_set_ca_state(sk, TCP_CA_Recovery); } if (is_dupack || tcp_head_timedout(sk, tp)) tcp_update_scoreboard(sk, tp); - tcp_cwnd_down(tp); + tcp_cwnd_down(sk); tcp_xmit_retransmit_queue(sk); } @@ -1908,7 +1928,7 @@ static void tcp_ack_saw_tstamp(struct sock *sk, u32 *usrtt, int flag) */ struct tcp_sock *tp = tcp_sk(sk); const __u32 seq_rtt = tcp_time_stamp - tp->rx_opt.rcv_tsecr; - tcp_rtt_estimator(tp, seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1928,7 +1948,7 @@ static void tcp_ack_no_tstamp(struct sock *sk, u32 seq_rtt, u32 *usrtt, int flag if (flag & FLAG_RETRANS_DATA_ACKED) return; - tcp_rtt_estimator(tcp_sk(sk), seq_rtt, usrtt); + tcp_rtt_estimator(sk, seq_rtt, usrtt); tcp_set_rto(sk); inet_csk(sk)->icsk_backoff = 0; tcp_bound_rto(sk); @@ -1945,11 +1965,12 @@ static inline void tcp_ack_update_rtt(struct sock *sk, const int flag, tcp_ack_no_tstamp(sk, seq_rtt, usrtt, flag); } -static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good) { - tp->ca_ops->cong_avoid(tp, ack, rtt, in_flight, good); - tp->snd_cwnd_stamp = tcp_time_stamp; + const struct inet_connection_sock *icsk = inet_csk(sk); + icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good); + tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp; } /* Restart timer after forward progress on connection. @@ -2098,11 +2119,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt } if (acked&FLAG_ACKED) { + const struct inet_connection_sock *icsk = inet_csk(sk); tcp_ack_update_rtt(sk, acked, seq_rtt, seq_usrtt); tcp_ack_packets_out(sk, tp); - if (tp->ca_ops->pkts_acked) - tp->ca_ops->pkts_acked(tp, pkts_acked); + if (icsk->icsk_ca_ops->pkts_acked) + icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked); } #if FASTRETRANS_DEBUG > 0 @@ -2110,19 +2132,20 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt BUG_TRAP((int)tp->lost_out >= 0); BUG_TRAP((int)tp->retrans_out >= 0); if (!tp->packets_out && tp->rx_opt.sack_ok) { + const struct inet_connection_sock *icsk = inet_csk(sk); if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", - tp->lost_out, tp->ca_state); + tp->lost_out, icsk->icsk_ca_state); tp->lost_out = 0; } if (tp->sacked_out) { printk(KERN_DEBUG "Leak s=%u %d\n", - tp->sacked_out, tp->ca_state); + tp->sacked_out, icsk->icsk_ca_state); tp->sacked_out = 0; } if (tp->retrans_out) { printk(KERN_DEBUG "Leak r=%u %d\n", - tp->retrans_out, tp->ca_state); + tp->retrans_out, icsk->icsk_ca_state); tp->retrans_out = 0; } } @@ -2152,16 +2175,17 @@ static void tcp_ack_probe(struct sock *sk) } } -static inline int tcp_ack_is_dubious(struct tcp_sock *tp, int flag) +static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) { return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || - tp->ca_state != TCP_CA_Open); + inet_csk(sk)->icsk_ca_state != TCP_CA_Open); } -static inline int tcp_may_raise_cwnd(struct tcp_sock *tp, int flag) +static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) { + const struct tcp_sock *tp = tcp_sk(sk); return (!(flag & FLAG_ECE) || tp->snd_cwnd < tp->snd_ssthresh) && - !((1<ca_state)&(TCPF_CA_Recovery|TCPF_CA_CWR)); + !((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_Recovery | TCPF_CA_CWR)); } /* Check that window update is acceptable. @@ -2251,6 +2275,7 @@ static void tcp_process_frto(struct sock *sk, u32 prior_snd_una) /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; @@ -2278,7 +2303,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; - tcp_ca_event(tp, CA_EVENT_FAST_ACK); + tcp_ca_event(sk, CA_EVENT_FAST_ACK); NET_INC_STATS_BH(LINUX_MIB_TCPHPACKS); } else { @@ -2295,7 +2320,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) if (TCP_ECN_rcv_ecn_echo(tp, skb->h.th)) flag |= FLAG_ECE; - tcp_ca_event(tp, CA_EVENT_SLOW_ACK); + tcp_ca_event(sk, CA_EVENT_SLOW_ACK); } /* We passed data and got it acked, remove any soft error @@ -2311,19 +2336,19 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, - tp->ca_ops->rtt_sample ? &seq_usrtt : NULL); + icsk->icsk_ca_ops->rtt_sample ? &seq_usrtt : NULL); if (tp->frto_counter) tcp_process_frto(sk, prior_snd_una); - if (tcp_ack_is_dubious(tp, flag)) { + if (tcp_ack_is_dubious(sk, flag)) { /* Advanve CWND, if state allows this. */ - if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(tp, flag)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 0); + if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag)) + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0); tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag); } else { if ((flag & FLAG_DATA_ACKED)) - tcp_cong_avoid(tp, ack, seq_rtt, prior_in_flight, 1); + tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP)) @@ -2332,7 +2357,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) return 1; no_queue: - tp->probes_out = 0; + icsk->icsk_probes_out = 0; /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than @@ -3301,12 +3326,12 @@ void tcp_cwnd_application_limited(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (tp->ca_state == TCP_CA_Open && + if (inet_csk(sk)->icsk_ca_state == TCP_CA_Open && sk->sk_socket && !test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { /* Limited by application or receiver window. */ u32 win_used = max(tp->snd_cwnd_used, 2U); if (win_used < tp->snd_cwnd) { - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->snd_cwnd = (tp->snd_cwnd + win_used) >> 1; } tp->snd_cwnd_used = 0; @@ -3935,7 +3960,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on first data * packet. @@ -4212,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tcp_init_metrics(sk); - tcp_init_congestion_control(tp); + tcp_init_congestion_control(sk); /* Prevent spurious tcp_cwnd_restart() on * first data packet. diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 32a0ebc589d5..97bbf595230d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1409,13 +1409,14 @@ struct tcp_func ipv4_specific = { */ static int tcp_v4_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -1433,7 +1434,7 @@ static int tcp_v4_init_sock(struct sock *sk) tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_state = TCP_CLOSE; @@ -1456,7 +1457,7 @@ int tcp_v4_destroy_sock(struct sock *sk) tcp_clear_xmit_timers(sk); - tcp_cleanup_congestion_control(tp); + tcp_cleanup_congestion_control(sk); /* Cleanup up the write buffer. */ sk_stream_writequeue_purge(sk); @@ -1883,7 +1884,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dc085233d512..a88db28b0af7 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,9 +384,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newtp->ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_reno; - tcp_set_ca_state(newtp, TCP_CA_Open); + tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); skb_queue_head_init(&newtp->out_of_order_queue); newtp->rcv_wup = treq->rcv_isn + 1; @@ -399,7 +399,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.dsack = 0; newtp->rx_opt.eff_sacks = 0; - newtp->probes_out = 0; newtp->rx_opt.num_sacks = 0; newtp->urg_data = 0; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f458eacb5ef2..267b0fcbfc9c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -112,9 +112,9 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) u32 restart_cwnd = tcp_init_cwnd(tp, dst); u32 cwnd = tp->snd_cwnd; - tcp_ca_event(tp, CA_EVENT_CWND_RESTART); + tcp_ca_event(sk, CA_EVENT_CWND_RESTART); - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) @@ -265,6 +265,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { if (skb != NULL) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -280,7 +281,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) #define SYSCTL_FLAG_SACK 0x4 /* If congestion control is doing timestamping */ - if (tp->ca_ops->rtt_sample) + if (icsk->icsk_ca_ops->rtt_sample) do_gettimeofday(&skb->stamp); sysctl_flags = 0; @@ -308,7 +309,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) } if (tcp_packets_in_flight(tp) == 0) - tcp_ca_event(tp, CA_EVENT_TX_START); + tcp_ca_event(sk, CA_EVENT_TX_START); th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; @@ -366,7 +367,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) if (err <= 0) return err; - tcp_enter_cwr(tp); + tcp_enter_cwr(sk); /* NET_XMIT_CN is special. It does not guarantee, * that this packet is lost. It tells that device @@ -905,12 +906,13 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, */ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) { + const struct inet_connection_sock *icsk = inet_csk(sk); u32 send_win, cong_win, limit, in_flight; if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) return 0; - if (tp->ca_state != TCP_CA_Open) + if (icsk->icsk_ca_state != TCP_CA_Open) return 0; in_flight = tcp_packets_in_flight(tp); @@ -1287,6 +1289,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ void tcp_simple_retransmit(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); @@ -1317,12 +1320,12 @@ void tcp_simple_retransmit(struct sock *sk) * in network, but units changed and effective * cwnd/ssthresh really reduced now. */ - if (tp->ca_state != TCP_CA_Loss) { + if (icsk->icsk_ca_state != TCP_CA_Loss) { tp->high_seq = tp->snd_nxt; - tp->snd_ssthresh = tcp_current_ssthresh(tp); + tp->snd_ssthresh = tcp_current_ssthresh(sk); tp->prior_ssthresh = 0; tp->undo_marker = 0; - tcp_set_ca_state(tp, TCP_CA_Loss); + tcp_set_ca_state(sk, TCP_CA_Loss); } tcp_xmit_retransmit_queue(sk); } @@ -1462,6 +1465,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) */ void tcp_xmit_retransmit_queue(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; int packet_cnt = tp->lost_out; @@ -1485,7 +1489,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (!(sacked&(TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { if (tcp_retransmit_skb(sk, skb)) return; - if (tp->ca_state != TCP_CA_Loss) + if (icsk->icsk_ca_state != TCP_CA_Loss) NET_INC_STATS_BH(LINUX_MIB_TCPFASTRETRANS); else NET_INC_STATS_BH(LINUX_MIB_TCPSLOWSTARTRETRANS); @@ -1507,7 +1511,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ - if (tp->ca_state != TCP_CA_Recovery) + if (icsk->icsk_ca_state != TCP_CA_Recovery) return; /* No forward retransmissions in Reno are possible. */ @@ -2028,7 +2032,7 @@ void tcp_send_probe0(struct sock *sk) if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ - tp->probes_out = 0; + icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; return; } @@ -2036,19 +2040,19 @@ void tcp_send_probe0(struct sock *sk) if (err <= 0) { if (icsk->icsk_backoff < sysctl_tcp_retries2) icsk->icsk_backoff++; - tp->probes_out++; + icsk->icsk_probes_out++; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RTO_MAX), TCP_RTO_MAX); } else { /* If packet was not sent due to local congestion, - * do not backoff and do not remember probes_out. + * do not backoff and do not remember icsk_probes_out. * Let local senders to fight for local resources. * * Use accumulated backoff yet. */ - if (!tp->probes_out) - tp->probes_out=1; + if (!icsk->icsk_probes_out) + icsk->icsk_probes_out = 1; inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, min(icsk->icsk_rto << icsk->icsk_backoff, TCP_RESOURCE_PROBE_INTERVAL), diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 70e108e15c71..327770bf5522 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -16,9 +16,10 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, +static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag) { + struct tcp_sock *tp = tcp_sk(sk); if (in_flight < tp->snd_cwnd) return; @@ -35,8 +36,9 @@ static void tcp_scalable_cong_avoid(struct tcp_sock *tp, u32 ack, u32 rtt, tp->snd_cwnd_stamp = tcp_time_stamp; } -static u32 tcp_scalable_ssthresh(struct tcp_sock *tp) +static u32 tcp_scalable_ssthresh(struct sock *sk) { + const struct tcp_sock *tp = tcp_sk(sk); return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 72cec6981830..415ee47ac1c5 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -233,11 +233,12 @@ out_unlock: static void tcp_probe_timer(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; if (tp->packets_out || !sk->sk_send_head) { - tp->probes_out = 0; + icsk->icsk_probes_out = 0; return; } @@ -248,7 +249,7 @@ static void tcp_probe_timer(struct sock *sk) * FIXME: We ought not to do it, Solaris 2.5 actually has fixing * this behaviour in Solaris down as a bug fix. [AC] * - * Let me to explain. probes_out is zeroed by incoming ACKs + * Let me to explain. icsk_probes_out is zeroed by incoming ACKs * even if they advertise zero window. Hence, connection is killed only * if we received no ACKs for normal connection timeout. It is not killed * only because window stays zero for some time, window may be zero @@ -259,16 +260,15 @@ static void tcp_probe_timer(struct sock *sk) max_probes = sysctl_tcp_retries2; if (sock_flag(sk, SOCK_DEAD)) { - const struct inet_connection_sock *icsk = inet_csk(sk); const int alive = ((icsk->icsk_rto << icsk->icsk_backoff) < TCP_RTO_MAX); max_probes = tcp_orphan_retries(sk, alive); - if (tcp_out_of_resources(sk, alive || tp->probes_out <= max_probes)) + if (tcp_out_of_resources(sk, alive || icsk->icsk_probes_out <= max_probes)) return; } - if (tp->probes_out > max_probes) { + if (icsk->icsk_probes_out > max_probes) { tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -319,19 +319,20 @@ static void tcp_retransmit_timer(struct sock *sk) goto out; if (icsk->icsk_retransmits == 0) { - if (tp->ca_state == TCP_CA_Disorder || tp->ca_state == TCP_CA_Recovery) { + if (icsk->icsk_ca_state == TCP_CA_Disorder || + icsk->icsk_ca_state == TCP_CA_Recovery) { if (tp->rx_opt.sack_ok) { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPSACKRECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPSACKFAILURES); } else { - if (tp->ca_state == TCP_CA_Recovery) + if (icsk->icsk_ca_state == TCP_CA_Recovery) NET_INC_STATS_BH(LINUX_MIB_TCPRENORECOVERYFAIL); else NET_INC_STATS_BH(LINUX_MIB_TCPRENOFAILURES); } - } else if (tp->ca_state == TCP_CA_Loss) { + } else if (icsk->icsk_ca_state == TCP_CA_Loss) { NET_INC_STATS_BH(LINUX_MIB_TCPLOSSFAILURES); } else { NET_INC_STATS_BH(LINUX_MIB_TCPTIMEOUTS); @@ -449,6 +450,7 @@ void tcp_set_keepalive(struct sock *sk, int val) static void tcp_keepalive_timer (unsigned long data) { struct sock *sk = (struct sock *) data; + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); __u32 elapsed; @@ -490,14 +492,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = tcp_time_stamp - tp->rcv_tstamp; if (elapsed >= keepalive_time_when(tp)) { - if ((!tp->keepalive_probes && tp->probes_out >= sysctl_tcp_keepalive_probes) || - (tp->keepalive_probes && tp->probes_out >= tp->keepalive_probes)) { + if ((!tp->keepalive_probes && icsk->icsk_probes_out >= sysctl_tcp_keepalive_probes) || + (tp->keepalive_probes && icsk->icsk_probes_out >= tp->keepalive_probes)) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; } if (tcp_write_wakeup(sk) <= 0) { - tp->probes_out++; + icsk->icsk_probes_out++; elapsed = keepalive_intvl_when(tp); } else { /* If keepalive was lost due to local congestion, diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 9bd443db5193..054de24efee2 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -82,9 +82,10 @@ struct vegas { * Instead we must wait until the completion of an RTT during * which we actually receive ACKs. */ -static inline void vegas_enable(struct tcp_sock *tp) +static inline void vegas_enable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); /* Begin taking Vegas samples next time we send something. */ vegas->doing_vegas_now = 1; @@ -97,19 +98,19 @@ static inline void vegas_enable(struct tcp_sock *tp) } /* Stop taking Vegas samples for now. */ -static inline void vegas_disable(struct tcp_sock *tp) +static inline void vegas_disable(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->doing_vegas_now = 0; } -static void tcp_vegas_init(struct tcp_sock *tp) +static void tcp_vegas_init(struct sock *sk) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); vegas->baseRTT = 0x7fffffff; - vegas_enable(tp); + vegas_enable(sk); } /* Do RTT sampling needed for Vegas. @@ -120,9 +121,9 @@ static void tcp_vegas_init(struct tcp_sock *tp) * o min-filter RTT samples from a much longer window (forever for now) * to find the propagation delay (baseRTT) */ -static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) +static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt) { - struct vegas *vegas = tcp_ca(tp); + struct vegas *vegas = inet_csk_ca(sk); u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */ /* Filter to find propagation delay: */ @@ -136,13 +137,13 @@ static void tcp_vegas_rtt_calc(struct tcp_sock *tp, u32 usrtt) vegas->cntRTT++; } -static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) +static void tcp_vegas_state(struct sock *sk, u8 ca_state) { if (ca_state == TCP_CA_Open) - vegas_enable(tp); + vegas_enable(sk); else - vegas_disable(tp); + vegas_disable(sk); } /* @@ -154,20 +155,21 @@ static void tcp_vegas_state(struct tcp_sock *tp, u8 ca_state) * packets, _then_ we can make Vegas calculations * again. */ -static void tcp_vegas_cwnd_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) - tcp_vegas_init(tp); + tcp_vegas_init(sk); } -static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, +static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 seq_rtt, u32 in_flight, int flag) { - struct vegas *vegas = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct vegas *vegas = inet_csk_ca(sk); if (!vegas->doing_vegas_now) - return tcp_reno_cong_avoid(tp, ack, seq_rtt, in_flight, flag); + return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag); /* The key players are v_beg_snd_una and v_beg_snd_nxt. * @@ -219,7 +221,7 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, * but that's not too awful, since we're taking the min, * rather than averaging. */ - tcp_vegas_rtt_calc(tp, seq_rtt*1000); + tcp_vegas_rtt_calc(sk, seq_rtt * 1000); /* We do the Vegas calculations only if we got enough RTT * samples that we can be reasonably sure that we got @@ -359,10 +361,10 @@ static void tcp_vegas_cong_avoid(struct tcp_sock *tp, u32 ack, } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_vegas_get_info(struct tcp_sock *tp, u32 ext, +static void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct vegas *ca = tcp_ca(tp); + const struct vegas *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct tcpvegas_info *info; @@ -393,7 +395,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUG_ON(sizeof(struct vegas) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index ef827242c940..d8a5a2b92e37 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -40,9 +40,9 @@ struct westwood { * way as soon as possible. It will reasonably happen within the first * RTT period of the connection lifetime. */ -static void tcp_westwood_init(struct tcp_sock *tp) +static void tcp_westwood_init(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); w->bk = 0; w->bw_ns_est = 0; @@ -51,7 +51,7 @@ static void tcp_westwood_init(struct tcp_sock *tp) w->cumul_ack = 0; w->rtt_min = w->rtt = TCP_WESTWOOD_INIT_RTT; w->rtt_win_sx = tcp_time_stamp; - w->snd_una = tp->snd_una; + w->snd_una = tcp_sk(sk)->snd_una; } /* @@ -74,11 +74,11 @@ static inline void westwood_filter(struct westwood *w, u32 delta) * Called after processing group of packets. * but all westwood needs is the last sample of srtt. */ -static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) +static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); if (cnt > 0) - w->rtt = tp->srtt >> 3; + w->rtt = tcp_sk(sk)->srtt >> 3; } /* @@ -86,9 +86,9 @@ static void tcp_westwood_pkts_acked(struct tcp_sock *tp, u32 cnt) * It updates RTT evaluation window if it is the right moment to do * it. If so it calls filter for evaluating bandwidth. */ -static void westwood_update_window(struct tcp_sock *tp) +static void westwood_update_window(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + struct westwood *w = inet_csk_ca(sk); s32 delta = tcp_time_stamp - w->rtt_win_sx; /* @@ -114,11 +114,12 @@ static void westwood_update_window(struct tcp_sock *tp) * header prediction is successful. In such case in fact update is * straight forward and doesn't need any particular care. */ -static inline void westwood_fast_bw(struct tcp_sock *tp) +static inline void westwood_fast_bw(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); - westwood_update_window(tp); + westwood_update_window(sk); w->bk += tp->snd_una - w->snd_una; w->snd_una = tp->snd_una; @@ -130,9 +131,10 @@ static inline void westwood_fast_bw(struct tcp_sock *tp) * This function evaluates cumul_ack for evaluating bk in case of * delayed or partial acks. */ -static inline u32 westwood_acked_count(struct tcp_sock *tp) +static inline u32 westwood_acked_count(struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); w->cumul_ack = tp->snd_una - w->snd_una; @@ -160,9 +162,10 @@ static inline u32 westwood_acked_count(struct tcp_sock *tp) return w->cumul_ack; } -static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) +static inline u32 westwood_bw_rttmin(const struct sock *sk) { - struct westwood *w = tcp_ca(tp); + const struct tcp_sock *tp = tcp_sk(sk); + const struct westwood *w = inet_csk_ca(sk); return max_t(u32, (w->bw_est * w->rtt_min) / tp->mss_cache, 2); } @@ -172,31 +175,32 @@ static inline u32 westwood_bw_rttmin(const struct tcp_sock *tp) * in packets we use mss_cache). Rttmin is guaranteed to be >= 2 * so avoids ever returning 0. */ -static u32 tcp_westwood_cwnd_min(struct tcp_sock *tp) +static u32 tcp_westwood_cwnd_min(struct sock *sk) { - return westwood_bw_rttmin(tp); + return westwood_bw_rttmin(sk); } -static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) +static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) { - struct westwood *w = tcp_ca(tp); + struct tcp_sock *tp = tcp_sk(sk); + struct westwood *w = inet_csk_ca(sk); switch(event) { case CA_EVENT_FAST_ACK: - westwood_fast_bw(tp); + westwood_fast_bw(sk); break; case CA_EVENT_COMPLETE_CWR: - tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_cwnd = tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_FRTO: - tp->snd_ssthresh = westwood_bw_rttmin(tp); + tp->snd_ssthresh = westwood_bw_rttmin(sk); break; case CA_EVENT_SLOW_ACK: - westwood_update_window(tp); - w->bk += westwood_acked_count(tp); + westwood_update_window(sk); + w->bk += westwood_acked_count(sk); w->rtt_min = min(w->rtt, w->rtt_min); break; @@ -208,10 +212,10 @@ static void tcp_westwood_event(struct tcp_sock *tp, enum tcp_ca_event event) /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_westwood_info(struct tcp_sock *tp, u32 ext, +static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { - const struct westwood *ca = tcp_ca(tp); + const struct westwood *ca = inet_csk_ca(sk); if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { struct rtattr *rta; struct tcpvegas_info *info; @@ -242,7 +246,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUG_ON(sizeof(struct westwood) > TCP_CA_PRIV_SIZE); + BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 08c55b024704..3312cb8742e2 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2010,13 +2010,14 @@ static struct tcp_func ipv6_mapped = { */ static int tcp_v6_init_sock(struct sock *sk) { + struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); skb_queue_head_init(&tp->out_of_order_queue); tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); - inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT; + icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev = TCP_TIMEOUT_INIT; /* So many TCP implementations out there (incorrectly) count the @@ -2038,7 +2039,7 @@ static int tcp_v6_init_sock(struct sock *sk) sk->sk_state = TCP_CLOSE; tp->af_specific = &ipv6_specific; - tp->ca_ops = &tcp_init_congestion_ops; + icsk->icsk_ca_ops = &tcp_init_congestion_ops; sk->sk_write_space = sk_stream_write_space; sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); @@ -2135,7 +2136,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), - tp->probes_out, + icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, icsk->icsk_rto, -- cgit v1.2.3 From 540722ffc3a0d7e11d97a13e1ce6f3bc23b061c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 05:54:28 -0300 Subject: [TCPDIAG]: Implement cheapest way of supporting DCCPDIAG_GETSOCK With ugly ifdefs, etc, but this actually: 1. keeps the existing ABI, i.e. no need to recompile the iproute2 utilities if not interested in DCCP. 2. Provides all the tcp_diag functionality in DCCP, with just a small patch that makes iproute2 support DCCP. Of course I'll get this cleaned-up in time, but for now I think its OK to be this way to quickly get this functionality. iproute2-ss050808 patch at: http://vger.kernel.org/~acme/iproute2-ss050808.dccp.patch Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 1 + net/dccp/ipv4.c | 2 ++ net/ipv4/tcp_diag.c | 86 ++++++++++++++++++++++++++++++++++-------------- 3 files changed, 64 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 7a5996743946..190494ebcfb8 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -3,6 +3,7 @@ /* Just some random number */ #define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 /* Socket identity */ struct tcpdiag_sockid diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f6da9328221e..d3770aed3b15 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -34,6 +34,8 @@ struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { .port_rover = 1024 - 1, }; +EXPORT_SYMBOL_GPL(dccp_hashinfo); + static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) { return inet_csk_get_port(&dccp_hashinfo, sk, snum); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 4288ecfec9a7..f5fc84aaa9b4 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -45,11 +45,15 @@ static struct sock *tcpnl; #define TCPDIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) +extern struct inet_hashinfo dccp_hashinfo; +#endif + static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, - int ext, u32 pid, u32 seq, u16 nlmsg_flags) + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); - struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; @@ -57,7 +61,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, struct tcpdiag_meminfo *minfo = NULL; unsigned char *b = skb->tail; - nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = nlmsg_flags; r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { @@ -147,8 +151,20 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + /* + * Ahem... for now we'll have some knowledge about TCP -acme + * But this is just one of two small exceptions, both in this + * function, so lets close our eyes for some 15 lines or so... 8) + * -acme + */ + if (sk->sk_protocol == IPPROTO_TCP) { + const struct tcp_sock *tp = tcp_sk(sk); + + r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + } else + r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; + r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = sock_i_ino(sk); @@ -159,8 +175,13 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); } - if (info) - tcp_get_info(sk, info); + /* Ahem... for now we'll have some knowledge about TCP -acme */ + if (info) { + if (sk->sk_protocol == IPPROTO_TCP) + tcp_get_info(sk, info); + else + memset(info, 0, sizeof(*info)); + } if (sk->sk_state < TCP_TIME_WAIT && icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) @@ -194,9 +215,13 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sock *sk; struct tcpdiagreq *req = NLMSG_DATA(nlh); struct sk_buff *rep; - + struct inet_hashinfo *hashinfo = &tcp_hashinfo; +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + if (nlh->nlmsg_type == DCCPDIAG_GETSOCK) + hashinfo = &dccp_hashinfo; +#endif if (req->tcpdiag_family == AF_INET) { - sk = inet_lookup(&tcp_hashinfo, req->id.tcpdiag_dst[0], + sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, req->id.tcpdiag_src[0], req->id.tcpdiag_sport, req->id.tcpdiag_if); } @@ -230,7 +255,7 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) if (tcpdiag_fill(rep, sk, req->tcpdiag_ext, NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0) <= 0) + nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); @@ -436,12 +461,13 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, } return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, - u32 pid, u32 seq) + u32 pid, u32 seq, + const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); @@ -450,7 +476,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r)); + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = NLM_F_MULTI; r = NLMSG_DATA(nlh); @@ -526,7 +552,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.userlocks = sk->sk_userlocks; } - for (j = s_j; j < TCP_SYNQ_HSIZE; j++) { + for (j = s_j; j < lopt->nr_table_entries; j++) { struct request_sock *req, *head = lopt->syn_table[j]; reqnum = 0; @@ -561,7 +587,7 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = tcpdiag_fill_req(skb, sk, req, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq); + cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; cb->args[4] = reqnum; @@ -583,20 +609,26 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) int i, num; int s_i, s_num; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_hashinfo *hashinfo; s_i = cb->args[1]; s_num = num = cb->args[2]; - + hashinfo = &tcp_hashinfo; +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK) + hashinfo = &dccp_hashinfo; +#endif if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; - inet_listen_lock(&tcp_hashinfo); + + inet_listen_lock(hashinfo); for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct sock *sk; struct hlist_node *node; num = 0; - sk_for_each(sk, node, &tcp_hashinfo.listening_hash[i]) { + sk_for_each(sk, node, &hashinfo->listening_hash[i]) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) { @@ -614,7 +646,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) goto syn_recv; if (tcpdiag_dump_sock(skb, sk, cb) < 0) { - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); goto done; } @@ -623,7 +655,7 @@ syn_recv: goto next_listen; if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); goto done; } @@ -637,7 +669,7 @@ next_listen: cb->args[3] = 0; cb->args[4] = 0; } - inet_listen_unlock(&tcp_hashinfo); + inet_listen_unlock(hashinfo); skip_listen_ht: cb->args[0] = 1; s_i = num = s_num = 0; @@ -646,8 +678,8 @@ skip_listen_ht: if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) return skb->len; - for (i = s_i; i < tcp_hashinfo.ehash_size; i++) { - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[i]; + for (i = s_i; i < hashinfo->ehash_size; i++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[i]; struct sock *sk; struct hlist_node *node; @@ -679,7 +711,7 @@ next_normal: if (r->tcpdiag_states&TCPF_TIME_WAIT) { sk_for_each(sk, node, - &tcp_hashinfo.ehash[i + tcp_hashinfo.ehash_size].chain) { + &hashinfo->ehash[i + hashinfo->ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) @@ -719,7 +751,11 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; - if (nlh->nlmsg_type != TCPDIAG_GETSOCK) + if (nlh->nlmsg_type != TCPDIAG_GETSOCK +#if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE) + && nlh->nlmsg_type != DCCPDIAG_GETSOCK +#endif + ) goto err_inval; if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) -- cgit v1.2.3 From 8c60f3fab55712f23f2bd557ceedfbb00c649f37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 12:59:38 -0300 Subject: [CCID3]: Separate most of the packet history code This also changes the list_for_each_entry_safe_continue behaviour to match its kerneldoc comment, that is, to start after the pos passed. Also adds several helper functions from previously open coded fragments, making the code more clear. Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/list.h | 3 +- net/dccp/Makefile | 3 +- net/dccp/ccids/ccid3.c | 327 +++++++++++++++++++--------------------------- net/dccp/ccids/ccid3.h | 18 --- net/dccp/packet_history.c | 198 ++++++++++++++++++++++++++++ net/dccp/packet_history.h | 182 ++++++++++++++++++++++++++ 6 files changed, 518 insertions(+), 213 deletions(-) create mode 100644 net/dccp/packet_history.c create mode 100644 net/dccp/packet_history.h (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 597094e0fdb5..0f2435f92db3 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -427,7 +427,8 @@ static inline void list_splice_init(struct list_head *list, * @member: the name of the list_struct within the struct. */ #define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + for (pos = list_entry(pos->member.next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) diff --git a/net/dccp/Makefile b/net/dccp/Makefile index c6e6ba55c36b..25a50bdbf1bb 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_IP_DCCP) += dccp.o -dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o +dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ + timer.o packet_history.o obj-y += ccids/ diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index df4adfeaafac..15c25f622000 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -34,8 +34,10 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#include #include "../ccid.h" #include "../dccp.h" +#include "../packet_history.h" #include "ccid3.h" #ifdef CCID3_DEBUG @@ -82,60 +84,10 @@ enum ccid3_options { static int ccid3_debug; -static kmem_cache_t *ccid3_tx_hist_slab; -static kmem_cache_t *ccid3_rx_hist_slab; -static kmem_cache_t *ccid3_loss_interval_hist_slab; - -static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio) -{ - struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio); - - if (entry != NULL) - entry->ccid3htx_sent = 0; - - return entry; -} - -static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_tx_hist_slab, entry); -} - -static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk, - struct sk_buff *skb, - int prio) -{ - struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio); - - if (entry != NULL) { - const struct dccp_hdr *dh = dccp_hdr(skb); - - entry->ccid3hrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - entry->ccid3hrx_win_count = dh->dccph_ccval; - entry->ccid3hrx_type = dh->dccph_type; - entry->ccid3hrx_ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; - do_gettimeofday(&(entry->ccid3hrx_tstamp)); - } - - return entry; -} - -static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry) -{ - if (entry != NULL) - kmem_cache_free(ccid3_rx_hist_slab, entry); -} +struct dccp_tx_hist *ccid3_tx_hist; +struct dccp_rx_hist *ccid3_rx_hist; -static void ccid3_rx_history_delete(struct list_head *hist) -{ - struct ccid3_rx_hist_entry *entry, *next; - - list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) { - list_del_init(&entry->ccid3hrx_node); - kmem_cache_free(ccid3_rx_hist_slab, entry); - } -} +static kmem_cache_t *ccid3_loss_interval_hist_slab; static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio) { @@ -982,7 +934,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *new_packet = NULL; + struct dccp_tx_hist_entry *new_packet; struct timeval now; long delay; int rc = -ENOTCONN; @@ -997,12 +949,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; /* See if last packet allocated was not sent */ - if (!list_empty(&hctx->ccid3hctx_hist)) - new_packet = list_entry(hctx->ccid3hctx_hist.next, - struct ccid3_tx_hist_entry, ccid3htx_node); - - if (new_packet == NULL || new_packet->ccid3htx_sent) { - new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC); + new_packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (new_packet == NULL || new_packet->dccphtx_sent) { + new_packet = dccp_tx_hist_entry_new(ccid3_tx_hist, SLAB_ATOMIC); rc = -ENOBUFS; if (new_packet == NULL) { @@ -1011,7 +960,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, goto out; } - list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist); + dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } do_gettimeofday(&now); @@ -1054,7 +1003,9 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, /* Can we send? if so add options and add to packet history */ if (rc == 0) - new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + new_packet->dccphtx_win_count = + DCCP_SKB_CB(skb)->dccpd_ccval = + hctx->ccid3hctx_last_win_count; out: return rc; } @@ -1063,7 +1014,6 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *packet = NULL; struct timeval now; // ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len); @@ -1080,20 +1030,23 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) /* check if we have sent a data packet */ if (len > 0) { unsigned long quarter_rtt; + struct dccp_tx_hist_entry *packet; - if (list_empty(&hctx->ccid3hctx_hist)) { + packet = dccp_tx_hist_head(&hctx->ccid3hctx_hist); + if (packet == NULL) { printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__); return; } - packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node); - if (packet->ccid3htx_sent) { + if (packet->dccphtx_sent) { printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__); return; } - packet->ccid3htx_tstamp = now; - packet->ccid3htx_seqno = dp->dccps_gss; - // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno); - + packet->dccphtx_tstamp = now; + packet->dccphtx_seqno = dp->dccps_gss; +#if 0 + ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", + dccp_role(sk), sk, packet->dccphtx_seqno); +#endif /* * Check if win_count have changed */ /* COMPLIANCE_BEGIN @@ -1106,18 +1059,18 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) min_t(unsigned long, quarter_rtt, 5)) % 16; ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n", dccp_role(sk), sk, - packet->ccid3htx_win_count, + packet->dccphtx_win_count, hctx->ccid3hctx_last_win_count); } /* COMPLIANCE_END */ #if 0 ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n", dccp_role(sk), sk, - packet->ccid3htx_seqno, - packet->ccid3htx_win_count); + packet->dccphtx_seqno, + packet->dccphtx_win_count); #endif hctx->ccid3hctx_idle = 0; - packet->ccid3htx_sent = 1; + packet->dccphtx_sent = 1; } else ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n", dccp_role(sk), sk, dp->dccps_gss); @@ -1152,7 +1105,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_options_received *opt_recv; - struct ccid3_tx_hist_entry *entry, *next, *packet; + struct dccp_tx_hist_entry *packet; unsigned long next_tmout; u16 t_elapsed; u32 pinv; @@ -1191,13 +1144,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Calculate new round trip sample by * R_sample = (now - t_recvdata) - t_delay */ /* get t_recvdata from history */ - packet = NULL; - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) - if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) { - packet = entry; - break; - } - + packet = dccp_tx_hist_find_entry(&hctx->ccid3hctx_hist, + DCCP_SKB_CB(skb)->dccpd_ack_seq); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n", dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq, @@ -1206,7 +1154,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = now_delta(packet->ccid3htx_tstamp); + r_sample = now_delta(packet->dccphtx_tstamp); /* FIXME: */ // r_sample -= usecs_to_jiffies(t_elapsed * 10); @@ -1273,10 +1221,9 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) ccid3_calc_new_delta(hctx); /* remove all packets older than the one acked from history */ - list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge_older(ccid3_tx_hist, + &hctx->ccid3hctx_hist, packet); + if (hctx->ccid3hctx_x < 10) { ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n"); hctx->ccid3hctx_x = 10; @@ -1285,7 +1232,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */ next_tmout = max(inet_csk(sk)->icsk_rto, - 2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10)); + (2 * (hctx->ccid3hctx_s * 100000) / + (hctx->ccid3hctx_x / 10))); /* maths with 100000 and 10 is to prevent overflow with 32 bit */ ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n", @@ -1408,7 +1356,6 @@ static void ccid3_hc_tx_exit(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; - struct ccid3_tx_hist_entry *entry, *next; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); BUG_ON(hctx == NULL); @@ -1417,10 +1364,7 @@ static void ccid3_hc_tx_exit(struct sock *sk) sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer); /* Empty packet history */ - list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) { - list_del_init(&entry->ccid3htx_node); - ccid3_tx_hist_entry_delete(entry); - } + dccp_tx_hist_purge(ccid3_tx_hist, &hctx->ccid3hctx_hist); kfree(dp->dccps_hc_tx_ccid_private); dp->dccps_hc_tx_ccid_private = NULL; @@ -1462,39 +1406,40 @@ static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_state hcrx->ccid3hcrx_state = state; } -static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet) +static int ccid3_hc_rx_add_hist(struct sock *sk, + struct dccp_rx_hist_entry *packet) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next; + struct dccp_rx_hist_entry *entry, *next, *iter; u8 num_later = 0; - if (list_empty(&hcrx->ccid3hcrx_hist)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + iter = dccp_rx_hist_head(&hcrx->ccid3hcrx_hist); + if (iter == NULL) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - u64 seqno = packet->ccid3hrx_seqno; - struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next, - struct ccid3_rx_hist_entry, - ccid3hrx_node); - if (after48(seqno, iter->ccid3hrx_seqno)) - list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + const u64 seqno = packet->dccphrx_seqno; + + if (after48(seqno, iter->dccphrx_seqno)) + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, packet); else { - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later = 1; - list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (after48(seqno, iter->ccid3hrx_seqno)) { - list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node); + list_for_each_entry_continue(iter, + &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (after48(seqno, iter->dccphrx_seqno)) { + dccp_rx_hist_add_entry(&iter->dccphrx_node, + packet); goto trim_history; } - if (iter->ccid3hrx_type == DCCP_PKT_DATA || - iter->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(iter)) num_later++; if (num_later == TFRC_RECV_NUM_LATE_LOSS) { - ccid3_rx_hist_entry_delete(packet); + dccp_rx_hist_entry_delete(ccid3_rx_hist, packet); ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n", dccp_role(sk), sk, seqno); return 1; @@ -1502,7 +1447,8 @@ static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *pac } if (num_later < TFRC_RECV_NUM_LATE_LOSS) - list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist); + dccp_rx_hist_add_entry(&hcrx->ccid3hcrx_hist, + packet); /* FIXME: else what? should we destroy the packet like above? */ } } @@ -1512,12 +1458,12 @@ trim_history: num_later = TFRC_RECV_NUM_LATE_LOSS + 1; if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) { - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } else { @@ -1528,7 +1474,8 @@ trim_history: * We have no loss interval history so we need at least one * rtt:s of data packets to approximate rtt. */ - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { switch (step) { case 0: @@ -1540,10 +1487,11 @@ trim_history: step = 2; /* OK, find next data packet */ num_later = 1; - win_count = entry->ccid3hrx_win_count; + win_count = entry->dccphrx_win_count; break; case 2: - tmp = win_count - entry->ccid3hrx_win_count; + tmp = (win_count - + entry->dccphrx_win_count); if (tmp < 0) tmp += TFRC_WIN_COUNT_LIMIT; if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) { @@ -1554,12 +1502,11 @@ trim_history: num_later = 1; break; case 3: - list_del_init(&entry->ccid3hrx_node); - ccid3_rx_hist_entry_delete(entry); + list_del_init(&entry->dccphrx_node); + dccp_rx_hist_entry_delete(ccid3_rx_hist, entry); break; } - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } } @@ -1571,7 +1518,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *packet; + struct dccp_rx_hist_entry *packet; ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); @@ -1594,14 +1541,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) return; } - packet = NULL; - list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node) - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { - packet = entry; - break; - } - + packet = dccp_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); if (packet == NULL) { printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n", __FUNCTION__, dccp_role(sk), sk); @@ -1610,12 +1550,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) } do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback)); - hcrx->ccid3hcrx_last_counter = packet->ccid3hrx_win_count; - hcrx->ccid3hcrx_seqno_last_counter = packet->ccid3hrx_seqno; + hcrx->ccid3hcrx_last_counter = packet->dccphrx_win_count; + hcrx->ccid3hcrx_seqno_last_counter = packet->dccphrx_seqno; hcrx->ccid3hcrx_bytes_recv = 0; /* Convert to multiples of 10us */ - hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10; + hcrx->ccid3hcrx_elapsed_time = now_delta(packet->dccphrx_tstamp) / 10; if (hcrx->ccid3hcrx_p == 0) hcrx->ccid3hcrx_pinv = ~0; else @@ -1686,7 +1626,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *next, *tail = NULL; + struct dccp_rx_hist_entry *entry, *next, *tail = NULL; u32 rtt, delta, x_recv, fval, p, tmp2; struct timeval tstamp, tmp_tv; int interval = 0; @@ -1694,19 +1634,19 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) int step = 0; u64 tmp1; - list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + if (dccp_rx_hist_entry_data_packet(entry)) { tail = entry; switch (step) { case 0: - tstamp = entry->ccid3hrx_tstamp; - win_count = entry->ccid3hrx_win_count; + tstamp = entry->dccphrx_tstamp; + win_count = entry->dccphrx_win_count; step = 1; break; case 1: - interval = win_count - entry->ccid3hrx_win_count; + interval = win_count - entry->dccphrx_win_count; if (interval < 0) interval += TFRC_WIN_COUNT_LIMIT; if (interval > 4) @@ -1728,7 +1668,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) interval = 1; } found: - timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv); + timeval_sub(tstamp,tail->dccphrx_tstamp,&tmp_tv); rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval; ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n", dccp_role(sk), sk, rtt); @@ -1797,34 +1737,33 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet; - struct ccid3_rx_hist_entry *a_loss = NULL; - struct ccid3_rx_hist_entry *b_loss = NULL; + struct dccp_rx_hist_entry *entry, *next, *packet; + struct dccp_rx_hist_entry *a_loss = NULL; + struct dccp_rx_hist_entry *b_loss = NULL; u64 seq_loss = DCCP_MAX_SEQNO + 1; u8 win_loss = 0; u8 num_later = TFRC_RECV_NUM_LATE_LOSS; - list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { b_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } if (b_loss == NULL) goto out_update_li; - a_next = b_next; num_later = 1; - list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { if (num_later == 0) { a_loss = entry; break; - } else if (entry->ccid3hrx_type == DCCP_PKT_DATA || - entry->ccid3hrx_type == DCCP_PKT_DATAACK) + } else if (dccp_rx_hist_entry_data_packet(entry)) --num_later; } @@ -1844,12 +1783,13 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) /* Locate a lost data packet */ entry = packet = b_loss; - list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) { - u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno); + list_for_each_entry_safe_continue(entry, next, &hcrx->ccid3hcrx_hist, + dccphrx_node) { + u64 delta = dccp_delta_seqno(entry->dccphrx_seqno, + packet->dccphrx_seqno); if (delta != 0) { - if (packet->ccid3hrx_type == DCCP_PKT_DATA || - packet->ccid3hrx_type == DCCP_PKT_DATAACK) + if (dccp_rx_hist_entry_data_packet(packet)) --delta; /* * FIXME: check this, probably this % usage is because @@ -1858,10 +1798,12 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) */ #if 0 if (delta % DCCP_NDP_LIMIT != - (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT) + (packet->dccphrx_ndp - + entry->dccphrx_ndp) % DCCP_NDP_LIMIT) #endif - if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) { - seq_loss = entry->ccid3hrx_seqno; + if (delta != + packet->dccphrx_ndp - entry->dccphrx_ndp) { + seq_loss = entry->dccphrx_seqno; dccp_inc_seqno(&seq_loss); } } @@ -1871,7 +1813,7 @@ static void ccid3_hc_rx_detect_loss(struct sock *sk) } if (seq_loss != DCCP_MAX_SEQNO + 1) - win_loss = a_loss->ccid3hrx_win_count; + win_loss = a_loss->dccphrx_win_count; out_update_li: ccid3_hc_rx_update_li(sk, seq_loss, win_loss); @@ -1920,7 +1862,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private; - struct ccid3_rx_hist_entry *packet; + struct dccp_rx_hist_entry *packet; struct timeval now; u8 win_count; u32 p_prev; @@ -1964,14 +1906,16 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC); + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, + dp->dccps_options_received.dccpor_ndp, + skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!", dccp_role(sk), sk); return; } - win_count = packet->ccid3hrx_win_count; + win_count = packet->dccphrx_win_count; ins = ccid3_hc_rx_add_hist(sk, packet); @@ -2060,7 +2004,7 @@ static void ccid3_hc_rx_exit(struct sock *sk) ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM); /* Empty packet history */ - ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist); + dccp_rx_hist_purge(ccid3_rx_hist, &hcrx->ccid3hcrx_hist); /* Empty loss interval history */ ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist); @@ -2093,41 +2037,38 @@ MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); static __init int ccid3_module_init(void) { - int rc = -ENOMEM; + int rc = -ENOBUFS; - ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history", - sizeof(struct ccid3_tx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_tx_hist_slab == NULL) + ccid3_rx_hist = dccp_rx_hist_new("ccid3"); + if (ccid3_rx_hist == NULL) goto out; - ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history", - sizeof(struct ccid3_rx_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); - if (ccid3_rx_hist_slab == NULL) - goto out_free_tx_history; + ccid3_tx_hist = dccp_tx_hist_new("ccid3"); + if (ccid3_tx_hist == NULL) + goto out_free_rx; - ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history", - sizeof(struct ccid3_loss_interval_hist_entry), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + ccid3_loss_interval_hist_slab = kmem_cache_create("li_hist_ccid3", + sizeof(struct ccid3_loss_interval_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); if (ccid3_loss_interval_hist_slab == NULL) - goto out_free_rx_history; + goto out_free_tx; rc = ccid_register(&ccid3); if (rc != 0) goto out_free_loss_interval_history; - out: return rc; + out_free_loss_interval_history: kmem_cache_destroy(ccid3_loss_interval_hist_slab); ccid3_loss_interval_hist_slab = NULL; -out_free_rx_history: - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; -out_free_tx_history: - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; +out_free_tx: + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; +out_free_rx: + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; goto out; } module_init(ccid3_module_init); @@ -2136,13 +2077,13 @@ static __exit void ccid3_module_exit(void) { ccid_unregister(&ccid3); - if (ccid3_tx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_tx_hist_slab); - ccid3_tx_hist_slab = NULL; + if (ccid3_tx_hist != NULL) { + dccp_tx_hist_delete(ccid3_tx_hist); + ccid3_tx_hist = NULL; } - if (ccid3_rx_hist_slab != NULL) { - kmem_cache_destroy(ccid3_rx_hist_slab); - ccid3_rx_hist_slab = NULL; + if (ccid3_rx_hist != NULL) { + dccp_rx_hist_delete(ccid3_rx_hist); + ccid3_rx_hist = NULL; } if (ccid3_loss_interval_hist_slab != NULL) { kmem_cache_destroy(ccid3_loss_interval_hist_slab); diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 5d6b623e64da..d2705fb74195 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -38,15 +38,6 @@ #include #include -#include - -struct ccid3_tx_hist_entry { - struct list_head ccid3htx_node; - u64 ccid3htx_seqno:48, - ccid3htx_win_count:8, - ccid3htx_sent:1; - struct timeval ccid3htx_tstamp; -}; struct ccid3_options_received { u64 ccid3or_seqno:48, @@ -102,15 +93,6 @@ struct ccid3_loss_interval_hist_entry { u32 ccid3lih_interval; }; -struct ccid3_rx_hist_entry { - struct list_head ccid3hrx_node; - u64 ccid3hrx_seqno:48, - ccid3hrx_win_count:4, - ccid3hrx_type:4; - u32 ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */ - struct timeval ccid3hrx_tstamp; -}; - struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, diff --git a/net/dccp/packet_history.c b/net/dccp/packet_history.c new file mode 100644 index 000000000000..6b414898f0c4 --- /dev/null +++ b/net/dccp/packet_history.c @@ -0,0 +1,198 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include + +#include "packet_history.h" + +struct dccp_rx_hist *dccp_rx_hist_new(const char *name) +{ + struct dccp_rx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_rx_hist_mask[] = "rx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_rx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_rx_hist_mask, name); + hist->dccprxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_rx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccprxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_new); + +void dccp_rx_hist_delete(struct dccp_rx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccprxh_slab); + + kmem_cache_destroy(hist->dccprxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_delete); + +void dccp_rx_hist_purge(struct dccp_rx_hist *hist, struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphrx_node) { + list_del_init(&entry->dccphrx_node); + kmem_cache_free(hist->dccprxh_slab, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_purge); + +struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list) +{ + struct dccp_rx_hist_entry *entry, *packet = NULL; + + list_for_each_entry(entry, list, dccphrx_node) + if (entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_rx_hist_find_data_packet); + +struct dccp_tx_hist *dccp_tx_hist_new(const char *name) +{ + struct dccp_tx_hist *hist = kmalloc(sizeof(*hist), GFP_ATOMIC); + static const char dccp_tx_hist_mask[] = "tx_hist_%s"; + char *slab_name; + + if (hist == NULL) + goto out; + + slab_name = kmalloc(strlen(name) + sizeof(dccp_tx_hist_mask) - 1, + GFP_ATOMIC); + if (slab_name == NULL) + goto out_free_hist; + + sprintf(slab_name, dccp_tx_hist_mask, name); + hist->dccptxh_slab = kmem_cache_create(slab_name, + sizeof(struct dccp_tx_hist_entry), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (hist->dccptxh_slab == NULL) + goto out_free_slab_name; +out: + return hist; +out_free_slab_name: + kfree(slab_name); +out_free_hist: + kfree(hist); + hist = NULL; + goto out; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_new); + +void dccp_tx_hist_delete(struct dccp_tx_hist *hist) +{ + const char* name = kmem_cache_name(hist->dccptxh_slab); + + kmem_cache_destroy(hist->dccptxh_slab); + kfree(name); + kfree(hist); +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_delete); + +struct dccp_tx_hist_entry *dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq) +{ + struct dccp_tx_hist_entry *packet = NULL, *entry; + + list_for_each_entry(entry, list, dccphtx_node) + if (entry->dccphtx_seqno == seq) { + packet = entry; + break; + } + + return packet; +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_find_entry); + +void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, struct list_head *list, + struct dccp_tx_hist_entry *packet) +{ + struct dccp_tx_hist_entry *next; + + list_for_each_entry_safe_continue(packet, next, list, dccphtx_node) { + list_del_init(&packet->dccphtx_node); + dccp_tx_hist_entry_delete(hist, packet); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge_older); + +void dccp_tx_hist_purge(struct dccp_tx_hist *hist, struct list_head *list) +{ + struct dccp_tx_hist_entry *entry, *next; + + list_for_each_entry_safe(entry, next, list, dccphtx_node) { + list_del_init(&entry->dccphtx_node); + dccp_tx_hist_entry_delete(hist, entry); + } +} + +EXPORT_SYMBOL_GPL(dccp_tx_hist_purge); diff --git a/net/dccp/packet_history.h b/net/dccp/packet_history.h new file mode 100644 index 000000000000..565dc96506e9 --- /dev/null +++ b/net/dccp/packet_history.h @@ -0,0 +1,182 @@ +/* + * net/dccp/packet_history.h + * + * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * + * An implementation of the DCCP protocol + * + * This code has been developed by the University of Waikato WAND + * research group. For further information please see http://www.wand.net.nz/ + * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz + * + * This code also uses code from Lulea University, rereleased as GPL by its + * authors: + * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon + * + * Changes to meet Linux coding standards, to make it meet latest ccid3 draft + * and to make it work as a loadable module in the DCCP stack written by + * Arnaldo Carvalho de Melo . + * + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _DCCP_PKT_HIST_ +#define _DCCP_PKT_HIST_ + +#include +#include +#include +#include + +#include "dccp.h" + +struct dccp_tx_hist_entry { + struct list_head dccphtx_node; + u64 dccphtx_seqno:48, + dccphtx_win_count:8, + dccphtx_sent:1; + struct timeval dccphtx_tstamp; +}; + +struct dccp_rx_hist_entry { + struct list_head dccphrx_node; + u64 dccphrx_seqno:48, + dccphrx_win_count:4, + dccphrx_type:4; + u32 dccphrx_ndp; /* In fact it is from 8 to 24 bits */ + struct timeval dccphrx_tstamp; +}; + +struct dccp_tx_hist { + kmem_cache_t *dccptxh_slab; +}; + +extern struct dccp_tx_hist *dccp_tx_hist_new(const char *name); +extern void dccp_tx_hist_delete(struct dccp_tx_hist *hist); + +struct dccp_rx_hist { + kmem_cache_t *dccprxh_slab; +}; + +extern struct dccp_rx_hist *dccp_rx_hist_new(const char *name); +extern void dccp_rx_hist_delete(struct dccp_rx_hist *hist); +extern struct dccp_rx_hist_entry * + dccp_rx_hist_find_data_packet(const struct list_head *list); + +static inline struct dccp_tx_hist_entry * + dccp_tx_hist_entry_new(struct dccp_tx_hist *hist, + const int prio) +{ + struct dccp_tx_hist_entry *entry = kmem_cache_alloc(hist->dccptxh_slab, + prio); + + if (entry != NULL) + entry->dccphtx_sent = 0; + + return entry; +} + +static inline void dccp_tx_hist_entry_delete(struct dccp_tx_hist *hist, + struct dccp_tx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccptxh_slab, entry); +} + +extern struct dccp_tx_hist_entry * + dccp_tx_hist_find_entry(const struct list_head *list, + const u64 seq); + +static inline void dccp_tx_hist_add_entry(struct list_head *list, + struct dccp_tx_hist_entry *entry) +{ + list_add(&entry->dccphtx_node, list); +} + +extern void dccp_tx_hist_purge_older(struct dccp_tx_hist *hist, + struct list_head *list, + struct dccp_tx_hist_entry *next); + +extern void dccp_tx_hist_purge(struct dccp_tx_hist *hist, + struct list_head *list); + +static inline struct dccp_tx_hist_entry *dccp_tx_hist_head(struct list_head *list) +{ + struct dccp_tx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_tx_hist_entry, + dccphtx_node); + return head; +} + +static inline struct dccp_rx_hist_entry * + dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const u32 ndp, + const struct sk_buff *skb, + const int prio) +{ + struct dccp_rx_hist_entry *entry = kmem_cache_alloc(hist->dccprxh_slab, + prio); + + if (entry != NULL) { + const struct dccp_hdr *dh = dccp_hdr(skb); + + entry->dccphrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + entry->dccphrx_win_count = dh->dccph_ccval; + entry->dccphrx_type = dh->dccph_type; + entry->dccphrx_ndp = ndp; + do_gettimeofday(&(entry->dccphrx_tstamp)); + } + + return entry; +} + +static inline void dccp_rx_hist_entry_delete(struct dccp_rx_hist *hist, + struct dccp_rx_hist_entry *entry) +{ + if (entry != NULL) + kmem_cache_free(hist->dccprxh_slab, entry); +} + +extern void dccp_rx_hist_purge(struct dccp_rx_hist *hist, + struct list_head *list); + +static inline void dccp_rx_hist_add_entry(struct list_head *list, + struct dccp_rx_hist_entry *entry) +{ + list_add(&entry->dccphrx_node, list); +} + +static inline struct dccp_rx_hist_entry *dccp_rx_hist_head(struct list_head *list) +{ + struct dccp_rx_hist_entry *head = NULL; + + if (!list_empty(list)) + head = list_entry(list->next, struct dccp_rx_hist_entry, + dccphrx_node); + return head; +} + +static inline int + dccp_rx_hist_entry_data_packet(const struct dccp_rx_hist_entry *entry) +{ + return entry->dccphrx_type == DCCP_PKT_DATA || + entry->dccphrx_type == DCCP_PKT_DATAACK; +} + +#endif /* _DCCP_PKT_HIST_ */ -- cgit v1.2.3 From 5917ed961def82a4dba9198d11a75f79d115a8cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 11 Aug 2005 15:31:15 -0700 Subject: [NETFILTER]: Fix NF_QUEUE_NR() macro I obviously wanted to use bitwise-or, not logical or. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ac3c61411d4b..189ba67ba603 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3 From 0a242efc4fb859b2da506cdf8f3366231602e4ff Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Thu, 11 Aug 2005 15:32:53 -0700 Subject: [NET]: Deinline netif_carrier_{on,off}(). # grep -r 'netif_carrier_o[nf]' linux-2.6.12 | wc -l 246 # size vmlinux.org vmlinux.carrier text data bss dec hex filename 4339634 1054414 259296 5653344 564360 vmlinux.org 4337710 1054414 259296 5651420 563bdc vmlinux.carrier And this ain't an allyesconfig kernel! Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++------------ net/sched/sch_generic.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 296cf93a65e0..d8e52edfd526 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -699,19 +699,9 @@ static inline int netif_carrier_ok(const struct net_device *dev) extern void __netdev_watchdog_up(struct net_device *dev); -static inline void netif_carrier_on(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); -} +extern void netif_carrier_on(struct net_device *dev); -static inline void netif_carrier_off(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); -} +extern void netif_carrier_off(struct net_device *dev); /* Hot-plugging. */ static inline int netif_device_present(struct net_device *dev) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d066c965342..99ceb91f0150 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -238,6 +238,20 @@ static void dev_watchdog_down(struct net_device *dev) spin_unlock_bh(&dev->xmit_lock); } +void netif_carrier_on(struct net_device *dev) +{ + if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) + linkwatch_fire_event(dev); + if (netif_running(dev)) + __netdev_watchdog_up(dev); +} + +void netif_carrier_off(struct net_device *dev) +{ + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + linkwatch_fire_event(dev); +} + /* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper. @@ -600,6 +614,8 @@ void dev_shutdown(struct net_device *dev) } EXPORT_SYMBOL(__netdev_watchdog_up); +EXPORT_SYMBOL(netif_carrier_on); +EXPORT_SYMBOL(netif_carrier_off); EXPORT_SYMBOL(noop_qdisc); EXPORT_SYMBOL(noop_qdisc_ops); EXPORT_SYMBOL(qdisc_create_dflt); -- cgit v1.2.3 From b766b305d3f2d8be173e5d9853534ea1afdbabba Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 12 Aug 2005 11:36:44 -0700 Subject: [NETFILTER]: Fix gcc-3.4.x warning about iplicit operator precedence Fix gcc-3.4.x warning about iplicit operator precedence in NF_QUEUE_NR() Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 189ba67ba603..be365e70ee99 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) +#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3 From 505cbfc577f3fa778005e2800b869eca25727d5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:19:38 -0300 Subject: [IPV6]: Generalise the tcp_v6_lookup routines In the same way as was done with the v4 counterparts, this will be moved to inet6_hashtables.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 ++ include/net/inet6_hashtables.h | 26 ++++++++ net/ipv4/Kconfig | 3 - net/ipv4/tcp_diag.c | 40 +++++------- net/ipv6/tcp_ipv6.c | 139 ++++++++++++++++++++++------------------- 5 files changed, 122 insertions(+), 91 deletions(-) create mode 100644 include/net/inet6_hashtables.h (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 777339b68464..3c7dbc6a0a70 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,11 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +static inline int inet6_iif(const struct sk_buff *skb) +{ + return IP6CB(skb)->iif; +} + struct tcp6_request_sock { struct tcp_request_sock req; struct in6_addr loc_addr; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 000000000000..297c2b16967a --- /dev/null +++ b/include/net/inet6_hashtables.h @@ -0,0 +1,26 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET6_HASHTABLES_H +#define _INET6_HASHTABLES_H + +#include + +struct in6_addr; +struct inet_hashinfo; + +extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif); +#endif /* _INET6_HASHTABLES_H */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c844954c1ad5..a79b4f9c10c5 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -425,9 +425,6 @@ config IP_TCPDIAG If unsure, say Y. -config IP_TCPDIAG_IPV6 - def_bool (IP_TCPDIAG=y && IPV6=y) || (IP_TCPDIAG=m && IPV6) - config IP_TCPDIAG_DCCP def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 8bf495c698f8..b812191b2f5c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -24,6 +24,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -102,7 +106,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = 0; r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); @@ -121,7 +125,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->id.tcpdiag_src[0] = inet->rcv_saddr; r->id.tcpdiag_dst[0] = inet->daddr; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -196,19 +200,6 @@ nlmsg_failure: return -1; } -#ifdef CONFIG_IP_TCPDIAG_IPV6 -extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif); -#else -static inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) -{ - return NULL; -} -#endif - static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { int err; @@ -225,11 +216,14 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) req->id.tcpdiag_dport, req->id.tcpdiag_src[0], req->id.tcpdiag_sport, req->id.tcpdiag_if); } -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else if (req->tcpdiag_family == AF_INET6) { - sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport, - (struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport, - req->id.tcpdiag_if); + sk = inet6_lookup(hashinfo, + (struct in6_addr*)req->id.tcpdiag_dst, + req->id.tcpdiag_dport, + (struct in6_addr*)req->id.tcpdiag_src, + req->id.tcpdiag_sport, + req->id.tcpdiag_if); } #endif else { @@ -440,7 +434,7 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, struct inet_sock *inet = inet_sk(sk); entry.family = sk->sk_family; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (entry.family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -502,7 +496,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, r->tcpdiag_wqueue = 0; r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = 0; -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->tcpdiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, &tcp6_rsk(req)->loc_addr); @@ -567,13 +561,13 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (bc) { entry.saddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = -#ifdef CONFIG_IP_TCPDIAG_IPV6 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? tcp6_rsk(req)->rmt_addr.s6_addr32 : #endif diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3312cb8742e2..2bc7fafe7668 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -76,26 +76,27 @@ static struct tcp_func ipv6_mapped; static struct tcp_func ipv6_specific; /* I have no idea if this is a good hash for v6 or not. -DaveM */ -static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport, - struct in6_addr *faddr, u16 fport) +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) { int hashent = (lport ^ fport); hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); hashent ^= hashent>>16; hashent ^= hashent>>8; - return (hashent & (tcp_hashinfo.ehash_size - 1)); + return (hashent & (ehash_size - 1)); } -static __inline__ int tcp_v6_sk_hashfn(struct sock *sk) +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *laddr = &np->rcv_saddr; - struct in6_addr *faddr = &np->daddr; - __u16 lport = inet->num; - __u16 fport = inet->dport; - return tcp_v6_hashfn(laddr, lport, faddr, fport); + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); } static inline int tcp_v6_bind_conflict(const struct sock *sk, @@ -231,7 +232,7 @@ static __inline__ void __tcp_v6_hash(struct sock *sk) lock = &tcp_hashinfo.lhash_lock; inet_listen_wlock(&tcp_hashinfo); } else { - sk->sk_hashent = tcp_v6_sk_hashfn(sk); + sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size); list = &tcp_hashinfo.ehash[sk->sk_hashent].chain; lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock; write_lock(lock); @@ -258,7 +259,10 @@ static void tcp_v6_hash(struct sock *sk) } } -static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif) +static struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif) { struct sock *sk; struct hlist_node *node; @@ -266,8 +270,8 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor int score, hiscore; hiscore=0; - read_lock(&tcp_hashinfo.lhash_lock); - sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) { + read_lock(&hashinfo->lhash_lock); + sk_for_each(sk, node, &hashinfo->listening_hash[inet_lhashfn(hnum)]) { if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -294,7 +298,7 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor } if (result) sock_hold(result); - read_unlock(&tcp_hashinfo.lhash_lock); + read_unlock(&hashinfo->lhash_lock); return result; } @@ -304,9 +308,13 @@ static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned shor * The sockhash lock must be held as a reader here. */ -static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { struct sock *sk; const struct hlist_node *node; @@ -314,8 +322,9 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ - const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport); - struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; read_lock(&head->lock); sk_for_each(sk, node, &head->chain) { @@ -324,7 +333,7 @@ static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u goto hit; /* You sunk my battleship! */ } /* Must check for a TIME_WAIT'er before going to listener hash. */ - sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) { + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); if(*((__u32 *)&(tw->tw_dport)) == ports && @@ -347,34 +356,36 @@ hit: } -static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 hnum, - int dif) +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) { - struct sock *sk; - - sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif); - + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); if (sk) return sk; - return tcp_v6_lookup_listener(daddr, hnum, dif); + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); } -inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport, - struct in6_addr *daddr, u16 dport, - int dif) +inline struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif) { struct sock *sk; local_bh_disable(); - sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif); + sk = __inet6_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk; } -EXPORT_SYMBOL_GPL(tcp_v6_lookup); +EXPORT_SYMBOL_GPL(inet6_lookup); /* @@ -454,16 +465,17 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb) } } -static int __tcp_v6_check_established(struct sock *sk, __u16 lport, +static int __tcp_v6_check_established(struct sock *sk, const __u16 lport, struct inet_timewait_sock **twp) { struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *daddr = &np->rcv_saddr; - struct in6_addr *saddr = &np->daddr; - int dif = sk->sk_bound_dev_if; + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *daddr = &np->rcv_saddr; + const struct in6_addr *saddr = &np->daddr; + const int dif = sk->sk_bound_dev_if; const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); - const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport); + const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport, + tcp_hashinfo.ehash_size); struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash]; struct sock *sk2; const struct hlist_node *node; @@ -637,11 +649,6 @@ out: } } -static __inline__ int tcp_v6_iif(struct sk_buff *skb) -{ - return IP6CB(skb)->iif; -} - static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -833,14 +840,15 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data; - struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; __u32 seq; - sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, + th->source, skb->dev->ifindex); if (sk == NULL) { ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -927,7 +935,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, tcp_v6_iif(skb)); + &hdr->saddr, inet6_iif(skb)); if (!req) goto out; @@ -1138,7 +1146,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1207,7 +1215,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 buff->csum); fl.proto = IPPROTO_TCP; - fl.oif = tcp_v6_iif(skb); + fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; @@ -1245,20 +1253,18 @@ static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req) static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) { struct request_sock *req, **prev; - struct tcphdr *th = skb->h.th; + const struct tcphdr *th = skb->h.th; struct sock *nsk; /* Find possible connection requests. */ req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, tcp_v6_iif(skb)); + &skb->nh.ipv6h->daddr, inet6_iif(skb)); if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr, - th->source, - &skb->nh.ipv6h->daddr, - ntohs(th->dest), - tcp_v6_iif(skb)); + nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr, + th->source, &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1346,7 +1352,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) - treq->iif = tcp_v6_iif(skb); + treq->iif = inet6_iif(skb); if (isn == 0) isn = tcp_v6_init_sequence(sk,skb); @@ -1411,7 +1417,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_backlog_rcv = tcp_v4_do_rcv; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* @@ -1516,7 +1522,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, skb_set_owner_r(newnp->pktoptions, newsk); } newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); + newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = skb->nh.ipv6h->hop_limit; /* Clone native IPv6 options from listening socket (if any) @@ -1691,7 +1697,7 @@ ipv6_pktoptions: if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo) - np->mcast_oif = tcp_v6_iif(opt_skb); + np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { @@ -1746,8 +1752,9 @@ static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h); TCP_SKB_CB(skb)->sacked = 0; - sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source, - &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source, + &skb->nh.ipv6h->daddr, ntohs(th->dest), + inet6_iif(skb)); if (!sk) goto no_tcp_socket; @@ -1818,7 +1825,9 @@ do_time_wait: { struct sock *sk2; - sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb)); + sk2 = inet6_lookup_listener(&tcp_hashinfo, + &skb->nh.ipv6h->daddr, + ntohs(th->dest), inet6_iif(skb)); if (sk2 != NULL) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule(tw, &tcp_death_row); -- cgit v1.2.3 From 4f5736c4c7cf6f9bd8db82b712cfdd51c87e06b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:27:49 -0300 Subject: [TCPDIAG]: Introduce inet_diag_{register,unregister} Next changeset will rename tcp_diag to inet_diag and move the tcp_diag code out of it and into a new tcp_diag.c, similar to the net/dccp/diag.c introduced in this changeset, completing the transition to a generic inet_diag infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 19 ++++++ net/dccp/Kconfig | 5 ++ net/dccp/Makefile | 4 ++ net/dccp/diag.c | 47 +++++++++++++++ net/ipv4/Kconfig | 3 - net/ipv4/tcp_diag.c | 153 ++++++++++++++++++++++++++++++++++------------- 6 files changed, 186 insertions(+), 45 deletions(-) create mode 100644 net/dccp/diag.c (limited to 'include/linux') diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 190494ebcfb8..910c34ba19c0 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -5,6 +5,8 @@ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 +#define INET_DIAG_GETSOCK_MAX 24 + /* Socket identity */ struct tcpdiag_sockid { @@ -125,4 +127,21 @@ struct tcpvegas_info { __u32 tcpv_minrtt; }; +#ifdef __KERNEL__ +struct sock; +struct inet_hashinfo; + +struct inet_diag_handler { + struct inet_hashinfo *idiag_hashinfo; + void (*idiag_get_info)(struct sock *sk, + struct tcpdiagmsg *r, + void *info); + __u16 idiag_info_size; + __u16 idiag_type; +}; + +extern int inet_diag_register(const struct inet_diag_handler *handler); +extern void inet_diag_unregister(const struct inet_diag_handler *handler); +#endif /* __KERNEL__ */ + #endif /* _TCP_DIAG_H_ */ diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 90460bc629b3..ff5b5459b97a 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -19,6 +19,11 @@ config IP_DCCP If in doubt, say N. +config IP_DCCP_DIAG + depends on IP_DCCP && IP_TCPDIAG + def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + def_tristate m + source "net/dccp/ccids/Kconfig" endmenu diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 25a50bdbf1bb..5741fffc436f 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -3,4 +3,8 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o packet_history.o +obj-$(CONFIG_IP_DCCP_DIAG) += dccp_diag.o + obj-y += ccids/ + +dccp_diag-y := diag.o diff --git a/net/dccp/diag.c b/net/dccp/diag.c new file mode 100644 index 000000000000..4d9037c56ddc --- /dev/null +++ b/net/dccp/diag.c @@ -0,0 +1,47 @@ +/* + * net/dccp/diag.c + * + * An implementation of the DCCP protocol + * Arnaldo Carvalho de Melo + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include + +#include +#include + +#include "dccp.h" + +static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, + void *_info) +{ + r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; +} + +static struct inet_diag_handler dccp_diag_handler = { + .idiag_hashinfo = &dccp_hashinfo, + .idiag_get_info = dccp_diag_get_info, + .idiag_type = DCCPDIAG_GETSOCK, + .idiag_info_size = 0, +}; + +static int __init dccp_diag_init(void) +{ + return inet_diag_register(&dccp_diag_handler); +} + +static void __exit dccp_diag_fini(void) +{ + inet_diag_unregister(&dccp_diag_handler); +} + +module_init(dccp_diag_init); +module_exit(dccp_diag_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo "); +MODULE_DESCRIPTION("DCCP inet_diag handler"); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 960c02faf440..1e6db2a896b9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -423,9 +423,6 @@ config IP_TCPDIAG If unsure, say Y. -config IP_TCPDIAG_DCCP - def_bool (IP_TCPDIAG=y && IP_DCCP=y) || (IP_TCPDIAG=m && IP_DCCP) - config TCP_CONG_ADVANCED bool "TCP: advanced congestion control" ---help--- diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b812191b2f5c..b13b71cb9ced 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -34,6 +34,8 @@ #include +static const struct inet_diag_handler **inet_diag_table; + struct tcpdiag_entry { u32 *saddr; @@ -61,18 +63,24 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, const struct inet_connection_sock *icsk = inet_csk(sk); struct tcpdiagmsg *r; struct nlmsghdr *nlh; - struct tcp_info *info = NULL; + void *info = NULL; struct tcpdiag_meminfo *minfo = NULL; unsigned char *b = skb->tail; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[unlh->nlmsg_type]; + BUG_ON(handler == NULL); nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); nlh->nlmsg_flags = nlmsg_flags; + r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { if (ext & (1<<(TCPDIAG_MEMINFO-1))) minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo)); if (ext & (1<<(TCPDIAG_INFO-1))) - info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); + info = TCPDIAG_PUT(skb, TCPDIAG_INFO, + handler->idiag_info_size); if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { size_t len = strlen(icsk->icsk_ca_ops->name); @@ -155,19 +163,6 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r->tcpdiag_expires = 0; } #undef EXPIRES_IN_MS - /* - * Ahem... for now we'll have some knowledge about TCP -acme - * But this is just one of two small exceptions, both in this - * function, so lets close our eyes for some 15 lines or so... 8) - * -acme - */ - if (sk->sk_protocol == IPPROTO_TCP) { - const struct tcp_sock *tp = tcp_sk(sk); - - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; - } else - r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; r->tcpdiag_uid = sock_i_uid(sk); r->tcpdiag_inode = sock_i_ino(sk); @@ -179,13 +174,7 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); } - /* Ahem... for now we'll have some knowledge about TCP -acme */ - if (info) { - if (sk->sk_protocol == IPPROTO_TCP) - tcp_get_info(sk, info); - else - memset(info, 0, sizeof(*info)); - } + handler->idiag_get_info(sk, r, info); if (sk->sk_state < TCP_TIME_WAIT && icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) @@ -206,11 +195,13 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) struct sock *sk; struct tcpdiagreq *req = NLMSG_DATA(nlh); struct sk_buff *rep; - struct inet_hashinfo *hashinfo = &tcp_hashinfo; -#ifdef CONFIG_IP_TCPDIAG_DCCP - if (nlh->nlmsg_type == DCCPDIAG_GETSOCK) - hashinfo = &dccp_hashinfo; -#endif + struct inet_hashinfo *hashinfo; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + if (req->tcpdiag_family == AF_INET) { sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], req->id.tcpdiag_dport, req->id.tcpdiag_src[0], @@ -241,9 +232,10 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+ - sizeof(struct tcpdiag_meminfo)+ - sizeof(struct tcp_info)+64), GFP_KERNEL); + rep = alloc_skb(NLMSG_SPACE((sizeof(struct tcpdiagmsg) + + sizeof(struct tcpdiag_meminfo) + + handler->idiag_info_size + 64)), + GFP_KERNEL); if (!rep) goto out; @@ -603,15 +595,16 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) int i, num; int s_i, s_num; struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; + handler = inet_diag_table[cb->nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + s_i = cb->args[1]; s_num = num = cb->args[2]; - hashinfo = &tcp_hashinfo; -#ifdef CONFIG_IP_TCPDIAG_DCCP - if (cb->nlh->nlmsg_type == DCCPDIAG_GETSOCK) - hashinfo = &dccp_hashinfo; -#endif + if (cb->args[0] == 0) { if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) goto skip_listen_ht; @@ -745,13 +738,12 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; - if (nlh->nlmsg_type != TCPDIAG_GETSOCK -#ifdef CONFIG_IP_TCPDIAG_DCCP - && nlh->nlmsg_type != DCCPDIAG_GETSOCK -#endif - ) + if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) goto err_inval; + if (inet_diag_table[nlh->nlmsg_type] == NULL) + return -ENOENT; + if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) goto err_inval; @@ -803,18 +795,95 @@ static void tcpdiag_rcv(struct sock *sk, int len) } } +static void tcp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, + void *_info) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_info *info = _info; + + r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + if (info != NULL) + tcp_get_info(sk, info); +} + +static struct inet_diag_handler tcp_diag_handler = { + .idiag_hashinfo = &tcp_hashinfo, + .idiag_get_info = tcp_diag_get_info, + .idiag_type = TCPDIAG_GETSOCK, + .idiag_info_size = sizeof(struct tcp_info), +}; + +static DEFINE_SPINLOCK(inet_diag_register_lock); + +int inet_diag_register(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + int err = -EINVAL; + + if (type >= INET_DIAG_GETSOCK_MAX) + goto out; + + spin_lock(&inet_diag_register_lock); + err = -EEXIST; + if (inet_diag_table[type] == NULL) { + inet_diag_table[type] = h; + err = 0; + } + spin_unlock(&inet_diag_register_lock); +out: + return err; +} +EXPORT_SYMBOL_GPL(inet_diag_register); + +void inet_diag_unregister(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + + if (type >= INET_DIAG_GETSOCK_MAX) + return; + + spin_lock(&inet_diag_register_lock); + inet_diag_table[type] = NULL; + spin_unlock(&inet_diag_register_lock); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(inet_diag_unregister); + static int __init tcpdiag_init(void) { + const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + sizeof(struct inet_diag_handler *)); + int err = -ENOMEM; + + inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); + if (!inet_diag_table) + goto out; + + memset(inet_diag_table, 0, inet_diag_table_size); + tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, THIS_MODULE); if (tcpnl == NULL) - return -ENOMEM; - return 0; + goto out_free_table; + + err = inet_diag_register(&tcp_diag_handler); + if (err) + goto out_sock_release; +out: + return err; +out_sock_release: + sock_release(tcpnl->sk_socket); +out_free_table: + kfree(inet_diag_table); + goto out; } static void __exit tcpdiag_exit(void) { sock_release(tcpnl->sk_socket); + kfree(inet_diag_table); } module_init(tcpdiag_init); -- cgit v1.2.3 From 73c1f4a033675f168df7e98bbeeafca3c644b8a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:51:49 -0300 Subject: [TCPDIAG]: Just rename everything to inet_diag Next changeset will rename tcp_diag.[ch] to inet_diag.[ch]. I'm taking this longer route so as to easy review, making clear the changes made all along the way. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/linux/tcp_diag.h | 135 ++++++++-------- include/net/tcp.h | 2 +- net/dccp/Kconfig | 4 +- net/dccp/diag.c | 4 +- net/ipv4/Kconfig | 10 +- net/ipv4/Makefile | 2 +- net/ipv4/tcp_diag.c | 391 ++++++++++++++++++++++++----------------------- net/ipv4/tcp_vegas.c | 4 +- net/ipv4/tcp_westwood.c | 4 +- 10 files changed, 275 insertions(+), 283 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1c50fea8995b..d5e09bcd80f9 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -8,7 +8,7 @@ #define NETLINK_W1 1 /* 1-wire subsystem */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ #define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ +#define NETLINK_INET_DIAG 4 /* INET socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 910c34ba19c0..a4606e5810e5 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -1,5 +1,5 @@ -#ifndef _TCP_DIAG_H_ -#define _TCP_DIAG_H_ 1 +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 /* Just some random number */ #define TCPDIAG_GETSOCK 18 @@ -8,39 +8,36 @@ #define INET_DIAG_GETSOCK_MAX 24 /* Socket identity */ -struct tcpdiag_sockid -{ - __u16 tcpdiag_sport; - __u16 tcpdiag_dport; - __u32 tcpdiag_src[4]; - __u32 tcpdiag_dst[4]; - __u32 tcpdiag_if; - __u32 tcpdiag_cookie[2]; -#define TCPDIAG_NOCOOKIE (~0U) +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) }; /* Request structure */ -struct tcpdiagreq -{ - __u8 tcpdiag_family; /* Family of addresses. */ - __u8 tcpdiag_src_len; - __u8 tcpdiag_dst_len; - __u8 tcpdiag_ext; /* Query extended information */ +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ - struct tcpdiag_sockid id; + struct inet_diag_sockid id; - __u32 tcpdiag_states; /* States to dump */ - __u32 tcpdiag_dbs; /* Tables to dump (NI) */ + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ }; -enum -{ - TCPDIAG_REQ_NONE, - TCPDIAG_REQ_BYTECODE, +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, }; -#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: @@ -48,28 +45,25 @@ enum * length of the command and its arguments. */ -struct tcpdiag_bc_op -{ +struct inet_diag_bc_op { unsigned char code; unsigned char yes; unsigned short no; }; -enum -{ - TCPDIAG_BC_NOP, - TCPDIAG_BC_JMP, - TCPDIAG_BC_S_GE, - TCPDIAG_BC_S_LE, - TCPDIAG_BC_D_GE, - TCPDIAG_BC_D_LE, - TCPDIAG_BC_AUTO, - TCPDIAG_BC_S_COND, - TCPDIAG_BC_D_COND, +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, }; -struct tcpdiag_hostcond -{ +struct inet_diag_hostcond { __u8 family; __u8 prefix_len; int port; @@ -78,47 +72,44 @@ struct tcpdiag_hostcond /* Base info structure. It contains socket identity (addrs/ports/cookie) * and, alas, the information shown by netstat. */ -struct tcpdiagmsg -{ - __u8 tcpdiag_family; - __u8 tcpdiag_state; - __u8 tcpdiag_timer; - __u8 tcpdiag_retrans; - - struct tcpdiag_sockid id; - - __u32 tcpdiag_expires; - __u32 tcpdiag_rqueue; - __u32 tcpdiag_wqueue; - __u32 tcpdiag_uid; - __u32 tcpdiag_inode; +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; + + struct inet_diag_sockid id; + + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; }; /* Extensions */ -enum -{ - TCPDIAG_NONE, - TCPDIAG_MEMINFO, - TCPDIAG_INFO, - TCPDIAG_VEGASINFO, - TCPDIAG_CONG, +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, }; -#define TCPDIAG_MAX TCPDIAG_CONG +#define INET_DIAG_MAX INET_DIAG_CONG -/* TCPDIAG_MEM */ +/* INET_DIAG_MEM */ -struct tcpdiag_meminfo -{ - __u32 tcpdiag_rmem; - __u32 tcpdiag_wmem; - __u32 tcpdiag_fmem; - __u32 tcpdiag_tmem; +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; }; -/* TCPDIAG_VEGASINFO */ +/* INET_DIAG_VEGASINFO */ struct tcpvegas_info { __u32 tcpv_enabled; @@ -134,7 +125,7 @@ struct inet_hashinfo; struct inet_diag_handler { struct inet_hashinfo *idiag_hashinfo; void (*idiag_get_info)(struct sock *sk, - struct tcpdiagmsg *r, + struct inet_diag_msg *r, void *info); __u16 idiag_info_size; __u16 idiag_type; @@ -144,4 +135,4 @@ extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* __KERNEL__ */ -#endif /* _TCP_DIAG_H_ */ +#endif /* _INET_DIAG_H_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0b3f7294c5c7..fef122782b44 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -690,7 +690,7 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked); - /* get info for tcp_diag (optional) */ + /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index ff5b5459b97a..efce4f346fd0 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -20,8 +20,8 @@ config IP_DCCP If in doubt, say N. config IP_DCCP_DIAG - depends on IP_DCCP && IP_TCPDIAG - def_tristate y if (IP_DCCP = y && IP_TCPDIAG = y) + depends on IP_DCCP && IP_INET_DIAG + def_tristate y if (IP_DCCP = y && IP_INET_DIAG = y) def_tristate m source "net/dccp/ccids/Kconfig" diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 4d9037c56ddc..9f07eff2e3b6 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -16,10 +16,10 @@ #include "dccp.h" -static void dccp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, +static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { - r->tcpdiag_rqueue = r->tcpdiag_wqueue = 0; + r->idiag_rqueue = r->idiag_wqueue = 0; } static struct inet_diag_handler dccp_diag_handler = { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 1e6db2a896b9..019e88d8f29e 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -413,13 +413,13 @@ config INET_TUNNEL If unsure, say Y. -config IP_TCPDIAG - tristate "IP: TCP socket monitoring interface" +config IP_INET_DIAG + tristate "IP: INET socket monitoring interface" default y ---help--- - Support for TCP socket monitoring interface used by native Linux - tools such as ss. ss is included in iproute2, currently downloadable - at . + Support for INET (TCP, DCCP, etc) socket monitoring interface used by + native Linux tools such as ss. ss is included in iproute2, currently + downloadable at . If unsure, say Y. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ea0e1d87dc7e..9b1c894039a9 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ -obj-$(CONFIG_IP_TCPDIAG) += tcp_diag.o +obj-$(CONFIG_IP_INET_DIAG) += tcp_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index b13b71cb9ced..24abe82e23a0 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -1,7 +1,7 @@ /* - * tcp_diag.c Module for monitoring TCP sockets. + * inet_diag.c Module for monitoring INET transport protocols sockets. * - * Version: $Id: tcp_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ + * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ * * Authors: Alexey Kuznetsov, * @@ -36,8 +36,7 @@ static const struct inet_diag_handler **inet_diag_table; -struct tcpdiag_entry -{ +struct inet_diag_entry { u32 *saddr; u32 *daddr; u16 sport; @@ -46,25 +45,21 @@ struct tcpdiag_entry u16 userlocks; }; -static struct sock *tcpnl; +static struct sock *idiagnl; -#define TCPDIAG_PUT(skb, attrtype, attrlen) \ +#define INET_DIAG_PUT(skb, attrtype, attrlen) \ RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) -#ifdef CONFIG_IP_TCPDIAG_DCCP -extern struct inet_hashinfo dccp_hashinfo; -#endif - -static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, +static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, int ext, u32 pid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcpdiagmsg *r; + struct inet_diag_msg *r; struct nlmsghdr *nlh; void *info = NULL; - struct tcpdiag_meminfo *minfo = NULL; + struct inet_diag_meminfo *minfo = NULL; unsigned char *b = skb->tail; const struct inet_diag_handler *handler; @@ -76,51 +71,52 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, r = NLMSG_DATA(nlh); if (sk->sk_state != TCP_TIME_WAIT) { - if (ext & (1<<(TCPDIAG_MEMINFO-1))) - minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo)); - if (ext & (1<<(TCPDIAG_INFO-1))) - info = TCPDIAG_PUT(skb, TCPDIAG_INFO, + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) + minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, + sizeof(*minfo)); + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, handler->idiag_info_size); - if ((ext & (1 << (TCPDIAG_CONG - 1))) && icsk->icsk_ca_ops) { + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { size_t len = strlen(icsk->icsk_ca_ops->name); - strcpy(TCPDIAG_PUT(skb, TCPDIAG_CONG, len+1), + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), icsk->icsk_ca_ops->name); } } - r->tcpdiag_family = sk->sk_family; - r->tcpdiag_state = sk->sk_state; - r->tcpdiag_timer = 0; - r->tcpdiag_retrans = 0; + r->idiag_family = sk->sk_family; + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; - r->id.tcpdiag_if = sk->sk_bound_dev_if; - r->id.tcpdiag_cookie[0] = (u32)(unsigned long)sk; - r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)sk; + r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - if (r->tcpdiag_state == TCP_TIME_WAIT) { + if (r->idiag_state == TCP_TIME_WAIT) { const struct inet_timewait_sock *tw = inet_twsk(sk); long tmo = tw->tw_ttd - jiffies; if (tmo < 0) tmo = 0; - r->id.tcpdiag_sport = tw->tw_sport; - r->id.tcpdiag_dport = tw->tw_dport; - r->id.tcpdiag_src[0] = tw->tw_rcv_saddr; - r->id.tcpdiag_dst[0] = tw->tw_daddr; - r->tcpdiag_state = tw->tw_substate; - r->tcpdiag_timer = 3; - r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ; - r->tcpdiag_rqueue = 0; - r->tcpdiag_wqueue = 0; - r->tcpdiag_uid = 0; - r->tcpdiag_inode = 0; + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { + if (r->idiag_family == AF_INET6) { const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &tcp6tw->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &tcp6tw->tw_v6_daddr); } #endif @@ -128,18 +124,18 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, return skb->len; } - r->id.tcpdiag_sport = inet->sport; - r->id.tcpdiag_dport = inet->dport; - r->id.tcpdiag_src[0] = inet->rcv_saddr; - r->id.tcpdiag_dst[0] = inet->daddr; + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = inet->dport; + r->id.idiag_src[0] = inet->rcv_saddr; + r->id.idiag_dst[0] = inet->daddr; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { + if (r->idiag_family == AF_INET6) { struct ipv6_pinfo *np = inet6_sk(sk); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &np->rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &np->daddr); } #endif @@ -147,31 +143,31 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, #define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ if (icsk->icsk_pending == ICSK_TIME_RETRANS) { - r->tcpdiag_timer = 1; - r->tcpdiag_retrans = icsk->icsk_retransmits; - r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_timer = 1; + r->idiag_retrans = icsk->icsk_retransmits; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { - r->tcpdiag_timer = 4; - r->tcpdiag_retrans = icsk->icsk_probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + r->idiag_timer = 4; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); } else if (timer_pending(&sk->sk_timer)) { - r->tcpdiag_timer = 2; - r->tcpdiag_retrans = icsk->icsk_probes_out; - r->tcpdiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + r->idiag_timer = 2; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); } else { - r->tcpdiag_timer = 0; - r->tcpdiag_expires = 0; + r->idiag_timer = 0; + r->idiag_expires = 0; } #undef EXPIRES_IN_MS - r->tcpdiag_uid = sock_i_uid(sk); - r->tcpdiag_inode = sock_i_ino(sk); + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = sock_i_ino(sk); if (minfo) { - minfo->tcpdiag_rmem = atomic_read(&sk->sk_rmem_alloc); - minfo->tcpdiag_wmem = sk->sk_wmem_queued; - minfo->tcpdiag_fmem = sk->sk_forward_alloc; - minfo->tcpdiag_tmem = atomic_read(&sk->sk_wmem_alloc); + minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_wmem = sk->sk_wmem_queued; + minfo->idiag_fmem = sk->sk_forward_alloc; + minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); } handler->idiag_get_info(sk, r, info); @@ -189,11 +185,11 @@ nlmsg_failure: return -1; } -static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) +static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) { int err; struct sock *sk; - struct tcpdiagreq *req = NLMSG_DATA(nlh); + struct inet_diag_req *req = NLMSG_DATA(nlh); struct sk_buff *rep; struct inet_hashinfo *hashinfo; const struct inet_diag_handler *handler; @@ -202,19 +198,19 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) BUG_ON(handler == NULL); hashinfo = handler->idiag_hashinfo; - if (req->tcpdiag_family == AF_INET) { - sk = inet_lookup(hashinfo, req->id.tcpdiag_dst[0], - req->id.tcpdiag_dport, req->id.tcpdiag_src[0], - req->id.tcpdiag_sport, req->id.tcpdiag_if); + if (req->idiag_family == AF_INET) { + sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], + req->id.idiag_sport, req->id.idiag_if); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->tcpdiag_family == AF_INET6) { + else if (req->idiag_family == AF_INET6) { sk = inet6_lookup(hashinfo, - (struct in6_addr*)req->id.tcpdiag_dst, - req->id.tcpdiag_dport, - (struct in6_addr*)req->id.tcpdiag_src, - req->id.tcpdiag_sport, - req->id.tcpdiag_if); + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); } #endif else { @@ -225,26 +221,27 @@ static int tcpdiag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) return -ENOENT; err = -ESTALE; - if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE || - req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != req->id.tcpdiag_cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.tcpdiag_cookie[1])) + if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || + req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && + ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || + (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) goto out; err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct tcpdiagmsg) + - sizeof(struct tcpdiag_meminfo) + + rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + handler->idiag_info_size + 64)), GFP_KERNEL); if (!rep) goto out; - if (tcpdiag_fill(rep, sk, req->tcpdiag_ext, + if (inet_diag_fill(rep, sk, req->idiag_ext, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, nlh) <= 0) BUG(); - err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); if (err > 0) err = 0; @@ -285,42 +282,42 @@ static int bitstring_match(const u32 *a1, const u32 *a2, int bits) } -static int tcpdiag_bc_run(const void *bc, int len, - const struct tcpdiag_entry *entry) +static int inet_diag_bc_run(const void *bc, int len, + const struct inet_diag_entry *entry) { while (len > 0) { int yes = 1; - const struct tcpdiag_bc_op *op = bc; + const struct inet_diag_bc_op *op = bc; switch (op->code) { - case TCPDIAG_BC_NOP: + case INET_DIAG_BC_NOP: break; - case TCPDIAG_BC_JMP: + case INET_DIAG_BC_JMP: yes = 0; break; - case TCPDIAG_BC_S_GE: + case INET_DIAG_BC_S_GE: yes = entry->sport >= op[1].no; break; - case TCPDIAG_BC_S_LE: + case INET_DIAG_BC_S_LE: yes = entry->dport <= op[1].no; break; - case TCPDIAG_BC_D_GE: + case INET_DIAG_BC_D_GE: yes = entry->dport >= op[1].no; break; - case TCPDIAG_BC_D_LE: + case INET_DIAG_BC_D_LE: yes = entry->dport <= op[1].no; break; - case TCPDIAG_BC_AUTO: + case INET_DIAG_BC_AUTO: yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); break; - case TCPDIAG_BC_S_COND: - case TCPDIAG_BC_D_COND: + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: { - struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1); + struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(op+1); u32 *addr; if (cond->port != -1 && - cond->port != (op->code == TCPDIAG_BC_S_COND ? + cond->port != (op->code == INET_DIAG_BC_S_COND ? entry->sport : entry->dport)) { yes = 0; break; @@ -329,7 +326,7 @@ static int tcpdiag_bc_run(const void *bc, int len, if (cond->prefix_len == 0) break; - if (op->code == TCPDIAG_BC_S_COND) + if (op->code == INET_DIAG_BC_S_COND) addr = entry->saddr; else addr = entry->daddr; @@ -362,7 +359,7 @@ static int tcpdiag_bc_run(const void *bc, int len, static int valid_cc(const void *bc, int len, int cc) { while (len >= 0) { - const struct tcpdiag_bc_op *op = bc; + const struct inet_diag_bc_op *op = bc; if (cc > len) return 0; @@ -376,33 +373,33 @@ static int valid_cc(const void *bc, int len, int cc) return 0; } -static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len) +static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) { const unsigned char *bc = bytecode; int len = bytecode_len; while (len > 0) { - struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc; + struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)bc; //printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); switch (op->code) { - case TCPDIAG_BC_AUTO: - case TCPDIAG_BC_S_COND: - case TCPDIAG_BC_D_COND: - case TCPDIAG_BC_S_GE: - case TCPDIAG_BC_S_LE: - case TCPDIAG_BC_D_GE: - case TCPDIAG_BC_D_LE: + case INET_DIAG_BC_AUTO: + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: + case INET_DIAG_BC_S_GE: + case INET_DIAG_BC_S_LE: + case INET_DIAG_BC_D_GE: + case INET_DIAG_BC_D_LE: if (op->yes < 4 || op->yes > len+4) return -EINVAL; - case TCPDIAG_BC_JMP: + case INET_DIAG_BC_JMP: if (op->no < 4 || op->no > len+4) return -EINVAL; if (op->no < len && !valid_cc(bytecode, bytecode_len, len-op->no)) return -EINVAL; break; - case TCPDIAG_BC_NOP: + case INET_DIAG_BC_NOP: if (op->yes < 4 || op->yes > len+4) return -EINVAL; break; @@ -415,13 +412,13 @@ static int tcpdiag_bc_audit(const void *bytecode, int bytecode_len) return len == 0 ? 0 : -EINVAL; } -static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, +static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb) { - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { - struct tcpdiag_entry entry; + struct inet_diag_entry entry; struct rtattr *bc = (struct rtattr *)(r + 1); struct inet_sock *inet = inet_sk(sk); @@ -442,15 +439,15 @@ static int tcpdiag_dump_sock(struct sk_buff *skb, struct sock *sk, entry.dport = ntohs(inet->dport); entry.userlocks = sk->sk_userlocks; - if (!tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) return 0; } - return tcpdiag_fill(skb, sk, r->tcpdiag_ext, NETLINK_CB(cb->skb).pid, + return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } -static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, +static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, u32 pid, u32 seq, const struct nlmsghdr *unlh) @@ -458,7 +455,7 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, const struct inet_request_sock *ireq = inet_rsk(req); struct inet_sock *inet = inet_sk(sk); unsigned char *b = skb->tail; - struct tcpdiagmsg *r; + struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -466,33 +463,33 @@ static int tcpdiag_fill_req(struct sk_buff *skb, struct sock *sk, nlh->nlmsg_flags = NLM_F_MULTI; r = NLMSG_DATA(nlh); - r->tcpdiag_family = sk->sk_family; - r->tcpdiag_state = TCP_SYN_RECV; - r->tcpdiag_timer = 1; - r->tcpdiag_retrans = req->retrans; + r->idiag_family = sk->sk_family; + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = req->retrans; - r->id.tcpdiag_if = sk->sk_bound_dev_if; - r->id.tcpdiag_cookie[0] = (u32)(unsigned long)req; - r->id.tcpdiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)req; + r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); tmo = req->expires - jiffies; if (tmo < 0) tmo = 0; - r->id.tcpdiag_sport = inet->sport; - r->id.tcpdiag_dport = ireq->rmt_port; - r->id.tcpdiag_src[0] = ireq->loc_addr; - r->id.tcpdiag_dst[0] = ireq->rmt_addr; - r->tcpdiag_expires = jiffies_to_msecs(tmo), - r->tcpdiag_rqueue = 0; - r->tcpdiag_wqueue = 0; - r->tcpdiag_uid = sock_i_uid(sk); - r->tcpdiag_inode = 0; + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = ireq->rmt_port; + r->id.idiag_src[0] = ireq->loc_addr; + r->id.idiag_dst[0] = ireq->rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->tcpdiag_family == AF_INET6) { - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_src, + if (r->idiag_family == AF_INET6) { + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, &tcp6_rsk(req)->loc_addr); - ipv6_addr_copy((struct in6_addr *)r->id.tcpdiag_dst, + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, &tcp6_rsk(req)->rmt_addr); } #endif @@ -505,11 +502,11 @@ nlmsg_failure: return -1; } -static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, +static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, struct netlink_callback *cb) { - struct tcpdiag_entry entry; - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_entry entry; + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt; struct rtattr *bc = NULL; @@ -547,8 +544,8 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, if (reqnum < s_reqnum) continue; - if (r->id.tcpdiag_dport != ireq->rmt_port && - r->id.tcpdiag_dport) + if (r->id.idiag_dport != ireq->rmt_port && + r->id.idiag_dport) continue; if (bc) { @@ -566,12 +563,12 @@ static int tcpdiag_dump_reqs(struct sk_buff *skb, struct sock *sk, &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); - if (!tcpdiag_bc_run(RTA_DATA(bc), + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) continue; } - err = tcpdiag_fill_req(skb, sk, req, + err = inet_diag_fill_req(skb, sk, req, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { @@ -590,11 +587,11 @@ out: return err; } -static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { int i, num; int s_i, s_num; - struct tcpdiagreq *r = NLMSG_DATA(cb->nlh); + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); const struct inet_diag_handler *handler; struct inet_hashinfo *hashinfo; @@ -606,7 +603,7 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) s_num = num = cb->args[2]; if (cb->args[0] == 0) { - if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV))) + if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) goto skip_listen_ht; inet_listen_lock(hashinfo); @@ -623,25 +620,25 @@ static int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; } - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_listen; - if (!(r->tcpdiag_states&TCPF_LISTEN) || - r->id.tcpdiag_dport || + if (!(r->idiag_states & TCPF_LISTEN) || + r->id.idiag_dport || cb->args[3] > 0) goto syn_recv; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } syn_recv: - if (!(r->tcpdiag_states&TCPF_SYN_RECV)) + if (!(r->idiag_states & TCPF_SYN_RECV)) goto next_listen; - if (tcpdiag_dump_reqs(skb, sk, cb) < 0) { + if (inet_diag_dump_reqs(skb, sk, cb) < 0) { inet_listen_unlock(hashinfo); goto done; } @@ -662,7 +659,7 @@ skip_listen_ht: s_i = num = s_num = 0; } - if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV))) + if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) return skb->len; for (i = s_i; i < hashinfo->ehash_size; i++) { @@ -681,14 +678,14 @@ skip_listen_ht: if (num < s_num) goto next_normal; - if (!(r->tcpdiag_states & (1 << sk->sk_state))) + if (!(r->idiag_states & (1 << sk->sk_state))) goto next_normal; - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_normal; - if (r->id.tcpdiag_dport != inet->dport && r->id.tcpdiag_dport) + if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) goto next_normal; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -696,20 +693,20 @@ next_normal: ++num; } - if (r->tcpdiag_states&TCPF_TIME_WAIT) { + if (r->idiag_states & TCPF_TIME_WAIT) { sk_for_each(sk, node, &hashinfo->ehash[i + hashinfo->ehash_size].chain) { struct inet_sock *inet = inet_sk(sk); if (num < s_num) goto next_dying; - if (r->id.tcpdiag_sport != inet->sport && - r->id.tcpdiag_sport) + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) goto next_dying; - if (r->id.tcpdiag_dport != inet->dport && - r->id.tcpdiag_dport) + if (r->id.idiag_dport != inet->dport && + r->id.idiag_dport) goto next_dying; - if (tcpdiag_dump_sock(skb, sk, cb) < 0) { + if (inet_diag_dump_sock(skb, sk, cb) < 0) { read_unlock_bh(&head->lock); goto done; } @@ -726,14 +723,14 @@ done: return skb->len; } -static int tcpdiag_dump_done(struct netlink_callback *cb) +static int inet_diag_dump_done(struct netlink_callback *cb) { return 0; } static __inline__ int -tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) { if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) return 0; @@ -744,24 +741,28 @@ tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (inet_diag_table[nlh->nlmsg_type] == NULL) return -ENOENT; - if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len) + if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) goto err_inval; if (nlh->nlmsg_flags&NLM_F_DUMP) { - if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) { - struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq)); - if (rta->rta_type != TCPDIAG_REQ_BYTECODE || + if (nlh->nlmsg_len > + (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { + struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + + sizeof(struct inet_diag_req)); + if (rta->rta_type != INET_DIAG_REQ_BYTECODE || rta->rta_len < 8 || - rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq))) + rta->rta_len > + (nlh->nlmsg_len - + NLMSG_SPACE(sizeof(struct inet_diag_req)))) goto err_inval; - if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) + if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) goto err_inval; } - return netlink_dump_start(tcpnl, skb, nlh, - tcpdiag_dump, - tcpdiag_dump_done); + return netlink_dump_start(idiagnl, skb, nlh, + inet_diag_dump, + inet_diag_dump_done); } else { - return tcpdiag_get_exact(skb, nlh); + return inet_diag_get_exact(skb, nlh); } err_inval: @@ -769,7 +770,7 @@ err_inval: } -static inline void tcpdiag_rcv_skb(struct sk_buff *skb) +static inline void inet_diag_rcv_skb(struct sk_buff *skb) { int err; struct nlmsghdr * nlh; @@ -778,31 +779,31 @@ static inline void tcpdiag_rcv_skb(struct sk_buff *skb) nlh = (struct nlmsghdr *)skb->data; if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) return; - err = tcpdiag_rcv_msg(skb, nlh); + err = inet_diag_rcv_msg(skb, nlh); if (err || nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, err); } } -static void tcpdiag_rcv(struct sock *sk, int len) +static void inet_diag_rcv(struct sock *sk, int len) { struct sk_buff *skb; unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { - tcpdiag_rcv_skb(skb); + inet_diag_rcv_skb(skb); kfree_skb(skb); } } -static void tcp_diag_get_info(struct sock *sk, struct tcpdiagmsg *r, +static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *_info) { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_info *info = _info; - r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->tcpdiag_wqueue = tp->write_seq - tp->snd_una; + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_wqueue = tp->write_seq - tp->snd_una; if (info != NULL) tcp_get_info(sk, info); } @@ -851,7 +852,7 @@ void inet_diag_unregister(const struct inet_diag_handler *h) } EXPORT_SYMBOL_GPL(inet_diag_unregister); -static int __init tcpdiag_init(void) +static int __init inet_diag_init(void) { const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * sizeof(struct inet_diag_handler *)); @@ -863,9 +864,9 @@ static int __init tcpdiag_init(void) memset(inet_diag_table, 0, inet_diag_table_size); - tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv, - THIS_MODULE); - if (tcpnl == NULL) + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + THIS_MODULE); + if (idiagnl == NULL) goto out_free_table; err = inet_diag_register(&tcp_diag_handler); @@ -874,18 +875,18 @@ static int __init tcpdiag_init(void) out: return err; out_sock_release: - sock_release(tcpnl->sk_socket); + sock_release(idiagnl->sk_socket); out_free_table: kfree(inet_diag_table); goto out; } -static void __exit tcpdiag_exit(void) +static void __exit inet_diag_exit(void) { - sock_release(tcpnl->sk_socket); + sock_release(idiagnl->sk_socket); kfree(inet_diag_table); } -module_init(tcpdiag_init); -module_exit(tcpdiag_exit); +module_init(inet_diag_init); +module_exit(inet_diag_exit); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 054de24efee2..8cef9dc11fb7 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -365,10 +365,10 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); - if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct tcpvegas_info *info; - info = RTA_DATA(__RTA_PUT(skb, TCPDIAG_VEGASINFO, + info = RTA_DATA(__RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info))); info->tcpv_enabled = ca->doing_vegas_now; diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index d8a5a2b92e37..395100317875 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -216,11 +216,11 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); - if (ext & (1<<(TCPDIAG_VEGASINFO-1))) { + if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { struct rtattr *rta; struct tcpvegas_info *info; - rta = __RTA_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*info)); + rta = __RTA_PUT(skb, INET_DIAG_VEGASINFO, sizeof(*info)); info = RTA_DATA(rta); info->tcpv_enabled = 1; info->tcpv_rttcnt = 0; -- cgit v1.2.3 From a8c2190ee7da1a1dc68ff1a6b5f03feb61e523a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:56:38 -0300 Subject: [INET_DIAG]: Rename tcp_diag.[ch] to inet_diag.[ch] Next changeset will introduce net/ipv4/tcp_diag.c, moving the code that was put transitioanlly in inet_diag.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 138 +++++++ include/linux/tcp_diag.h | 138 ------- net/dccp/diag.c | 2 +- net/ipv4/Makefile | 2 +- net/ipv4/inet_diag.c | 893 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/tcp_diag.c | 892 --------------------------------------------- net/ipv4/tcp_vegas.c | 2 +- net/ipv4/tcp_westwood.c | 2 +- 8 files changed, 1035 insertions(+), 1034 deletions(-) create mode 100644 include/linux/inet_diag.h delete mode 100644 include/linux/tcp_diag.h create mode 100644 net/ipv4/inet_diag.c delete mode 100644 net/ipv4/tcp_diag.c (limited to 'include/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h new file mode 100644 index 000000000000..a4606e5810e5 --- /dev/null +++ b/include/linux/inet_diag.h @@ -0,0 +1,138 @@ +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 + +/* Just some random number */ +#define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 + +#define INET_DIAG_GETSOCK_MAX 24 + +/* Socket identity */ +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) +}; + +/* Request structure */ + +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ + + struct inet_diag_sockid id; + + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ +}; + +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, +}; + +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE + +/* Bytecode is sequence of 4 byte commands followed by variable arguments. + * All the commands identified by "code" are conditional jumps forward: + * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be + * length of the command and its arguments. + */ + +struct inet_diag_bc_op { + unsigned char code; + unsigned char yes; + unsigned short no; +}; + +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, +}; + +struct inet_diag_hostcond { + __u8 family; + __u8 prefix_len; + int port; + __u32 addr[0]; +}; + +/* Base info structure. It contains socket identity (addrs/ports/cookie) + * and, alas, the information shown by netstat. */ +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; + + struct inet_diag_sockid id; + + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; +}; + +/* Extensions */ + +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, +}; + +#define INET_DIAG_MAX INET_DIAG_CONG + + +/* INET_DIAG_MEM */ + +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; +}; + +/* INET_DIAG_VEGASINFO */ + +struct tcpvegas_info { + __u32 tcpv_enabled; + __u32 tcpv_rttcnt; + __u32 tcpv_rtt; + __u32 tcpv_minrtt; +}; + +#ifdef __KERNEL__ +struct sock; +struct inet_hashinfo; + +struct inet_diag_handler { + struct inet_hashinfo *idiag_hashinfo; + void (*idiag_get_info)(struct sock *sk, + struct inet_diag_msg *r, + void *info); + __u16 idiag_info_size; + __u16 idiag_type; +}; + +extern int inet_diag_register(const struct inet_diag_handler *handler); +extern void inet_diag_unregister(const struct inet_diag_handler *handler); +#endif /* __KERNEL__ */ + +#endif /* _INET_DIAG_H_ */ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h deleted file mode 100644 index a4606e5810e5..000000000000 --- a/include/linux/tcp_diag.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef _INET_DIAG_H_ -#define _INET_DIAG_H_ 1 - -/* Just some random number */ -#define TCPDIAG_GETSOCK 18 -#define DCCPDIAG_GETSOCK 19 - -#define INET_DIAG_GETSOCK_MAX 24 - -/* Socket identity */ -struct inet_diag_sockid { - __u16 idiag_sport; - __u16 idiag_dport; - __u32 idiag_src[4]; - __u32 idiag_dst[4]; - __u32 idiag_if; - __u32 idiag_cookie[2]; -#define INET_DIAG_NOCOOKIE (~0U) -}; - -/* Request structure */ - -struct inet_diag_req { - __u8 idiag_family; /* Family of addresses. */ - __u8 idiag_src_len; - __u8 idiag_dst_len; - __u8 idiag_ext; /* Query extended information */ - - struct inet_diag_sockid id; - - __u32 idiag_states; /* States to dump */ - __u32 idiag_dbs; /* Tables to dump (NI) */ -}; - -enum { - INET_DIAG_REQ_NONE, - INET_DIAG_REQ_BYTECODE, -}; - -#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE - -/* Bytecode is sequence of 4 byte commands followed by variable arguments. - * All the commands identified by "code" are conditional jumps forward: - * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be - * length of the command and its arguments. - */ - -struct inet_diag_bc_op { - unsigned char code; - unsigned char yes; - unsigned short no; -}; - -enum { - INET_DIAG_BC_NOP, - INET_DIAG_BC_JMP, - INET_DIAG_BC_S_GE, - INET_DIAG_BC_S_LE, - INET_DIAG_BC_D_GE, - INET_DIAG_BC_D_LE, - INET_DIAG_BC_AUTO, - INET_DIAG_BC_S_COND, - INET_DIAG_BC_D_COND, -}; - -struct inet_diag_hostcond { - __u8 family; - __u8 prefix_len; - int port; - __u32 addr[0]; -}; - -/* Base info structure. It contains socket identity (addrs/ports/cookie) - * and, alas, the information shown by netstat. */ -struct inet_diag_msg { - __u8 idiag_family; - __u8 idiag_state; - __u8 idiag_timer; - __u8 idiag_retrans; - - struct inet_diag_sockid id; - - __u32 idiag_expires; - __u32 idiag_rqueue; - __u32 idiag_wqueue; - __u32 idiag_uid; - __u32 idiag_inode; -}; - -/* Extensions */ - -enum { - INET_DIAG_NONE, - INET_DIAG_MEMINFO, - INET_DIAG_INFO, - INET_DIAG_VEGASINFO, - INET_DIAG_CONG, -}; - -#define INET_DIAG_MAX INET_DIAG_CONG - - -/* INET_DIAG_MEM */ - -struct inet_diag_meminfo { - __u32 idiag_rmem; - __u32 idiag_wmem; - __u32 idiag_fmem; - __u32 idiag_tmem; -}; - -/* INET_DIAG_VEGASINFO */ - -struct tcpvegas_info { - __u32 tcpv_enabled; - __u32 tcpv_rttcnt; - __u32 tcpv_rtt; - __u32 tcpv_minrtt; -}; - -#ifdef __KERNEL__ -struct sock; -struct inet_hashinfo; - -struct inet_diag_handler { - struct inet_hashinfo *idiag_hashinfo; - void (*idiag_get_info)(struct sock *sk, - struct inet_diag_msg *r, - void *info); - __u16 idiag_info_size; - __u16 idiag_type; -}; - -extern int inet_diag_register(const struct inet_diag_handler *handler); -extern void inet_diag_unregister(const struct inet_diag_handler *handler); -#endif /* __KERNEL__ */ - -#endif /* _INET_DIAG_H_ */ diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 9f07eff2e3b6..0b10c176c35a 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -12,7 +12,7 @@ #include #include -#include +#include #include "dccp.h" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 9b1c894039a9..fe5accbb56bf 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -30,7 +30,7 @@ obj-$(CONFIG_IP_ROUTE_MULTIPATH_WRANDOM) += multipath_wrandom.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_DRR) += multipath_drr.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IP_VS) += ipvs/ -obj-$(CONFIG_IP_INET_DIAG) += tcp_diag.o +obj-$(CONFIG_IP_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c new file mode 100644 index 000000000000..3bd510941da0 --- /dev/null +++ b/net/ipv4/inet_diag.c @@ -0,0 +1,893 @@ +/* + * inet_diag.c Module for monitoring INET transport protocols sockets. + * + * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ + * + * Authors: Alexey Kuznetsov, + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +static const struct inet_diag_handler **inet_diag_table; + +struct inet_diag_entry { + u32 *saddr; + u32 *daddr; + u16 sport; + u16 dport; + u16 family; + u16 userlocks; +}; + +static struct sock *idiagnl; + +#define INET_DIAG_PUT(skb, attrtype, attrlen) \ + RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) + +static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, + int ext, u32 pid, u32 seq, u16 nlmsg_flags, + const struct nlmsghdr *unlh) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + void *info = NULL; + struct inet_diag_meminfo *minfo = NULL; + unsigned char *b = skb->tail; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[unlh->nlmsg_type]; + BUG_ON(handler == NULL); + + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); + nlh->nlmsg_flags = nlmsg_flags; + + r = NLMSG_DATA(nlh); + if (sk->sk_state != TCP_TIME_WAIT) { + if (ext & (1 << (INET_DIAG_MEMINFO - 1))) + minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, + sizeof(*minfo)); + if (ext & (1 << (INET_DIAG_INFO - 1))) + info = INET_DIAG_PUT(skb, INET_DIAG_INFO, + handler->idiag_info_size); + + if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { + size_t len = strlen(icsk->icsk_ca_ops->name); + strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), + icsk->icsk_ca_ops->name); + } + } + r->idiag_family = sk->sk_family; + r->idiag_state = sk->sk_state; + r->idiag_timer = 0; + r->idiag_retrans = 0; + + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)sk; + r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); + + if (r->idiag_state == TCP_TIME_WAIT) { + const struct inet_timewait_sock *tw = inet_twsk(sk); + long tmo = tw->tw_ttd - jiffies; + if (tmo < 0) + tmo = 0; + + r->id.idiag_sport = tw->tw_sport; + r->id.idiag_dport = tw->tw_dport; + r->id.idiag_src[0] = tw->tw_rcv_saddr; + r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; + r->idiag_timer = 3; + r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = 0; + r->idiag_inode = 0; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (r->idiag_family == AF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, + &tcp6tw->tw_v6_rcv_saddr); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, + &tcp6tw->tw_v6_daddr); + } +#endif + nlh->nlmsg_len = skb->tail - b; + return skb->len; + } + + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = inet->dport; + r->id.idiag_src[0] = inet->rcv_saddr; + r->id.idiag_dst[0] = inet->daddr; + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (r->idiag_family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, + &np->rcv_saddr); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, + &np->daddr); + } +#endif + +#define EXPIRES_IN_MS(tmo) ((tmo - jiffies) * 1000 + HZ - 1) / HZ + + if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + r->idiag_timer = 1; + r->idiag_retrans = icsk->icsk_retransmits; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + r->idiag_timer = 4; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); + } else if (timer_pending(&sk->sk_timer)) { + r->idiag_timer = 2; + r->idiag_retrans = icsk->icsk_probes_out; + r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); + } else { + r->idiag_timer = 0; + r->idiag_expires = 0; + } +#undef EXPIRES_IN_MS + + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = sock_i_ino(sk); + + if (minfo) { + minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); + minfo->idiag_wmem = sk->sk_wmem_queued; + minfo->idiag_fmem = sk->sk_forward_alloc; + minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); + } + + handler->idiag_get_info(sk, r, info); + + if (sk->sk_state < TCP_TIME_WAIT && + icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) + icsk->icsk_ca_ops->get_info(sk, ext, skb); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +rtattr_failure: +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) +{ + int err; + struct sock *sk; + struct inet_diag_req *req = NLMSG_DATA(nlh); + struct sk_buff *rep; + struct inet_hashinfo *hashinfo; + const struct inet_diag_handler *handler; + + handler = inet_diag_table[nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + + if (req->idiag_family == AF_INET) { + sk = inet_lookup(hashinfo, req->id.idiag_dst[0], + req->id.idiag_dport, req->id.idiag_src[0], + req->id.idiag_sport, req->id.idiag_if); + } +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + else if (req->idiag_family == AF_INET6) { + sk = inet6_lookup(hashinfo, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if); + } +#endif + else { + return -EINVAL; + } + + if (sk == NULL) + return -ENOENT; + + err = -ESTALE; + if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || + req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && + ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || + (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) + goto out; + + err = -ENOMEM; + rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + + sizeof(struct inet_diag_meminfo) + + handler->idiag_info_size + 64)), + GFP_KERNEL); + if (!rep) + goto out; + + if (inet_diag_fill(rep, sk, req->idiag_ext, + NETLINK_CB(in_skb).pid, + nlh->nlmsg_seq, 0, nlh) <= 0) + BUG(); + + err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, + MSG_DONTWAIT); + if (err > 0) + err = 0; + +out: + if (sk) { + if (sk->sk_state == TCP_TIME_WAIT) + inet_twsk_put((struct inet_timewait_sock *)sk); + else + sock_put(sk); + } + return err; +} + +static int bitstring_match(const u32 *a1, const u32 *a2, int bits) +{ + int words = bits >> 5; + + bits &= 0x1f; + + if (words) { + if (memcmp(a1, a2, words << 2)) + return 0; + } + if (bits) { + __u32 w1, w2; + __u32 mask; + + w1 = a1[words]; + w2 = a2[words]; + + mask = htonl((0xffffffff) << (32 - bits)); + + if ((w1 ^ w2) & mask) + return 0; + } + + return 1; +} + + +static int inet_diag_bc_run(const void *bc, int len, + const struct inet_diag_entry *entry) +{ + while (len > 0) { + int yes = 1; + const struct inet_diag_bc_op *op = bc; + + switch (op->code) { + case INET_DIAG_BC_NOP: + break; + case INET_DIAG_BC_JMP: + yes = 0; + break; + case INET_DIAG_BC_S_GE: + yes = entry->sport >= op[1].no; + break; + case INET_DIAG_BC_S_LE: + yes = entry->dport <= op[1].no; + break; + case INET_DIAG_BC_D_GE: + yes = entry->dport >= op[1].no; + break; + case INET_DIAG_BC_D_LE: + yes = entry->dport <= op[1].no; + break; + case INET_DIAG_BC_AUTO: + yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); + break; + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: { + struct inet_diag_hostcond *cond; + u32 *addr; + + cond = (struct inet_diag_hostcond *)(op + 1); + if (cond->port != -1 && + cond->port != (op->code == INET_DIAG_BC_S_COND ? + entry->sport : entry->dport)) { + yes = 0; + break; + } + + if (cond->prefix_len == 0) + break; + + if (op->code == INET_DIAG_BC_S_COND) + addr = entry->saddr; + else + addr = entry->daddr; + + if (bitstring_match(addr, cond->addr, cond->prefix_len)) + break; + if (entry->family == AF_INET6 && + cond->family == AF_INET) { + if (addr[0] == 0 && addr[1] == 0 && + addr[2] == htonl(0xffff) && + bitstring_match(addr + 3, cond->addr, + cond->prefix_len)) + break; + } + yes = 0; + break; + } + } + + if (yes) { + len -= op->yes; + bc += op->yes; + } else { + len -= op->no; + bc += op->no; + } + } + return (len == 0); +} + +static int valid_cc(const void *bc, int len, int cc) +{ + while (len >= 0) { + const struct inet_diag_bc_op *op = bc; + + if (cc > len) + return 0; + if (cc == len) + return 1; + if (op->yes < 4) + return 0; + len -= op->yes; + bc += op->yes; + } + return 0; +} + +static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) +{ + const unsigned char *bc = bytecode; + int len = bytecode_len; + + while (len > 0) { + struct inet_diag_bc_op *op = (struct inet_diag_bc_op *)bc; + +//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); + switch (op->code) { + case INET_DIAG_BC_AUTO: + case INET_DIAG_BC_S_COND: + case INET_DIAG_BC_D_COND: + case INET_DIAG_BC_S_GE: + case INET_DIAG_BC_S_LE: + case INET_DIAG_BC_D_GE: + case INET_DIAG_BC_D_LE: + if (op->yes < 4 || op->yes > len + 4) + return -EINVAL; + case INET_DIAG_BC_JMP: + if (op->no < 4 || op->no > len + 4) + return -EINVAL; + if (op->no < len && + !valid_cc(bytecode, bytecode_len, len - op->no)) + return -EINVAL; + break; + case INET_DIAG_BC_NOP: + if (op->yes < 4 || op->yes > len + 4) + return -EINVAL; + break; + default: + return -EINVAL; + } + bc += op->yes; + len -= op->yes; + } + return len == 0 ? 0 : -EINVAL; +} + +static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, + struct netlink_callback *cb) +{ + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + + if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + struct inet_diag_entry entry; + struct rtattr *bc = (struct rtattr *)(r + 1); + struct inet_sock *inet = inet_sk(sk); + + entry.family = sk->sk_family; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (entry.family == AF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + entry.saddr = np->rcv_saddr.s6_addr32; + entry.daddr = np->daddr.s6_addr32; + } else +#endif + { + entry.saddr = &inet->rcv_saddr; + entry.daddr = &inet->daddr; + } + entry.sport = inet->num; + entry.dport = ntohs(inet->dport); + entry.userlocks = sk->sk_userlocks; + + if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) + return 0; + } + + return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); +} + +static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, + struct request_sock *req, + u32 pid, u32 seq, + const struct nlmsghdr *unlh) +{ + const struct inet_request_sock *ireq = inet_rsk(req); + struct inet_sock *inet = inet_sk(sk); + unsigned char *b = skb->tail; + struct inet_diag_msg *r; + struct nlmsghdr *nlh; + long tmo; + + nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); + nlh->nlmsg_flags = NLM_F_MULTI; + r = NLMSG_DATA(nlh); + + r->idiag_family = sk->sk_family; + r->idiag_state = TCP_SYN_RECV; + r->idiag_timer = 1; + r->idiag_retrans = req->retrans; + + r->id.idiag_if = sk->sk_bound_dev_if; + r->id.idiag_cookie[0] = (u32)(unsigned long)req; + r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); + + tmo = req->expires - jiffies; + if (tmo < 0) + tmo = 0; + + r->id.idiag_sport = inet->sport; + r->id.idiag_dport = ireq->rmt_port; + r->id.idiag_src[0] = ireq->loc_addr; + r->id.idiag_dst[0] = ireq->rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); + r->idiag_rqueue = 0; + r->idiag_wqueue = 0; + r->idiag_uid = sock_i_uid(sk); + r->idiag_inode = 0; +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + if (r->idiag_family == AF_INET6) { + ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, + &tcp6_rsk(req)->loc_addr); + ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, + &tcp6_rsk(req)->rmt_addr); + } +#endif + nlh->nlmsg_len = skb->tail - b; + + return skb->len; + +nlmsg_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, + struct netlink_callback *cb) +{ + struct inet_diag_entry entry; + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + struct inet_connection_sock *icsk = inet_csk(sk); + struct listen_sock *lopt; + struct rtattr *bc = NULL; + struct inet_sock *inet = inet_sk(sk); + int j, s_j; + int reqnum, s_reqnum; + int err = 0; + + s_j = cb->args[3]; + s_reqnum = cb->args[4]; + + if (s_j > 0) + s_j--; + + entry.family = sk->sk_family; + + read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + + lopt = icsk->icsk_accept_queue.listen_opt; + if (!lopt || !lopt->qlen) + goto out; + + if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { + bc = (struct rtattr *)(r + 1); + entry.sport = inet->num; + entry.userlocks = sk->sk_userlocks; + } + + for (j = s_j; j < lopt->nr_table_entries; j++) { + struct request_sock *req, *head = lopt->syn_table[j]; + + reqnum = 0; + for (req = head; req; reqnum++, req = req->dl_next) { + struct inet_request_sock *ireq = inet_rsk(req); + + if (reqnum < s_reqnum) + continue; + if (r->id.idiag_dport != ireq->rmt_port && + r->id.idiag_dport) + continue; + + if (bc) { + entry.saddr = +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + (entry.family == AF_INET6) ? + tcp6_rsk(req)->loc_addr.s6_addr32 : +#endif + &ireq->loc_addr; + entry.daddr = +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) + (entry.family == AF_INET6) ? + tcp6_rsk(req)->rmt_addr.s6_addr32 : +#endif + &ireq->rmt_addr; + entry.dport = ntohs(ireq->rmt_port); + + if (!inet_diag_bc_run(RTA_DATA(bc), + RTA_PAYLOAD(bc), &entry)) + continue; + } + + err = inet_diag_fill_req(skb, sk, req, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, cb->nlh); + if (err < 0) { + cb->args[3] = j + 1; + cb->args[4] = reqnum; + goto out; + } + } + + s_reqnum = 0; + } + +out: + read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); + + return err; +} + +static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int i, num; + int s_i, s_num; + struct inet_diag_req *r = NLMSG_DATA(cb->nlh); + const struct inet_diag_handler *handler; + struct inet_hashinfo *hashinfo; + + handler = inet_diag_table[cb->nlh->nlmsg_type]; + BUG_ON(handler == NULL); + hashinfo = handler->idiag_hashinfo; + + s_i = cb->args[1]; + s_num = num = cb->args[2]; + + if (cb->args[0] == 0) { + if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) + goto skip_listen_ht; + + inet_listen_lock(hashinfo); + for (i = s_i; i < INET_LHTABLE_SIZE; i++) { + struct sock *sk; + struct hlist_node *node; + + num = 0; + sk_for_each(sk, node, &hashinfo->listening_hash[i]) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) { + num++; + continue; + } + + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) + goto next_listen; + + if (!(r->idiag_states & TCPF_LISTEN) || + r->id.idiag_dport || + cb->args[3] > 0) + goto syn_recv; + + if (inet_diag_dump_sock(skb, sk, cb) < 0) { + inet_listen_unlock(hashinfo); + goto done; + } + +syn_recv: + if (!(r->idiag_states & TCPF_SYN_RECV)) + goto next_listen; + + if (inet_diag_dump_reqs(skb, sk, cb) < 0) { + inet_listen_unlock(hashinfo); + goto done; + } + +next_listen: + cb->args[3] = 0; + cb->args[4] = 0; + ++num; + } + + s_num = 0; + cb->args[3] = 0; + cb->args[4] = 0; + } + inet_listen_unlock(hashinfo); +skip_listen_ht: + cb->args[0] = 1; + s_i = num = s_num = 0; + } + + if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) + return skb->len; + + for (i = s_i; i < hashinfo->ehash_size; i++) { + struct inet_ehash_bucket *head = &hashinfo->ehash[i]; + struct sock *sk; + struct hlist_node *node; + + if (i > s_i) + s_num = 0; + + read_lock_bh(&head->lock); + + num = 0; + sk_for_each(sk, node, &head->chain) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_normal; + if (!(r->idiag_states & (1 << sk->sk_state))) + goto next_normal; + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) + goto next_normal; + if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) + goto next_normal; + if (inet_diag_dump_sock(skb, sk, cb) < 0) { + read_unlock_bh(&head->lock); + goto done; + } +next_normal: + ++num; + } + + if (r->idiag_states & TCPF_TIME_WAIT) { + sk_for_each(sk, node, + &hashinfo->ehash[i + hashinfo->ehash_size].chain) { + struct inet_sock *inet = inet_sk(sk); + + if (num < s_num) + goto next_dying; + if (r->id.idiag_sport != inet->sport && + r->id.idiag_sport) + goto next_dying; + if (r->id.idiag_dport != inet->dport && + r->id.idiag_dport) + goto next_dying; + if (inet_diag_dump_sock(skb, sk, cb) < 0) { + read_unlock_bh(&head->lock); + goto done; + } +next_dying: + ++num; + } + } + read_unlock_bh(&head->lock); + } + +done: + cb->args[1] = i; + cb->args[2] = num; + return skb->len; +} + +static int inet_diag_dump_done(struct netlink_callback *cb) +{ + return 0; +} + + +static __inline__ int +inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) +{ + if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) + return 0; + + if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) + goto err_inval; + + if (inet_diag_table[nlh->nlmsg_type] == NULL) + return -ENOENT; + + if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) + goto err_inval; + + if (nlh->nlmsg_flags&NLM_F_DUMP) { + if (nlh->nlmsg_len > + (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { + struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + + sizeof(struct inet_diag_req)); + if (rta->rta_type != INET_DIAG_REQ_BYTECODE || + rta->rta_len < 8 || + rta->rta_len > + (nlh->nlmsg_len - + NLMSG_SPACE(sizeof(struct inet_diag_req)))) + goto err_inval; + if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) + goto err_inval; + } + return netlink_dump_start(idiagnl, skb, nlh, + inet_diag_dump, + inet_diag_dump_done); + } else { + return inet_diag_get_exact(skb, nlh); + } + +err_inval: + return -EINVAL; +} + + +static inline void inet_diag_rcv_skb(struct sk_buff *skb) +{ + int err; + struct nlmsghdr * nlh; + + if (skb->len >= NLMSG_SPACE(0)) { + nlh = (struct nlmsghdr *)skb->data; + if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) + return; + err = inet_diag_rcv_msg(skb, nlh); + if (err || nlh->nlmsg_flags & NLM_F_ACK) + netlink_ack(skb, nlh, err); + } +} + +static void inet_diag_rcv(struct sock *sk, int len) +{ + struct sk_buff *skb; + unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); + + while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { + inet_diag_rcv_skb(skb); + kfree_skb(skb); + } +} + +static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, + void *_info) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_info *info = _info; + + r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; + r->idiag_wqueue = tp->write_seq - tp->snd_una; + if (info != NULL) + tcp_get_info(sk, info); +} + +static struct inet_diag_handler tcp_diag_handler = { + .idiag_hashinfo = &tcp_hashinfo, + .idiag_get_info = tcp_diag_get_info, + .idiag_type = TCPDIAG_GETSOCK, + .idiag_info_size = sizeof(struct tcp_info), +}; + +static DEFINE_SPINLOCK(inet_diag_register_lock); + +int inet_diag_register(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + int err = -EINVAL; + + if (type >= INET_DIAG_GETSOCK_MAX) + goto out; + + spin_lock(&inet_diag_register_lock); + err = -EEXIST; + if (inet_diag_table[type] == NULL) { + inet_diag_table[type] = h; + err = 0; + } + spin_unlock(&inet_diag_register_lock); +out: + return err; +} +EXPORT_SYMBOL_GPL(inet_diag_register); + +void inet_diag_unregister(const struct inet_diag_handler *h) +{ + const __u16 type = h->idiag_type; + + if (type >= INET_DIAG_GETSOCK_MAX) + return; + + spin_lock(&inet_diag_register_lock); + inet_diag_table[type] = NULL; + spin_unlock(&inet_diag_register_lock); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(inet_diag_unregister); + +static int __init inet_diag_init(void) +{ + const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * + sizeof(struct inet_diag_handler *)); + int err = -ENOMEM; + + inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); + if (!inet_diag_table) + goto out; + + memset(inet_diag_table, 0, inet_diag_table_size); + + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + THIS_MODULE); + if (idiagnl == NULL) + goto out_free_table; + + err = inet_diag_register(&tcp_diag_handler); + if (err) + goto out_sock_release; +out: + return err; +out_sock_release: + sock_release(idiagnl->sk_socket); +out_free_table: + kfree(inet_diag_table); + goto out; +} + +static void __exit inet_diag_exit(void) +{ + sock_release(idiagnl->sk_socket); + kfree(inet_diag_table); +} + +module_init(inet_diag_init); +module_exit(inet_diag_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c deleted file mode 100644 index 24abe82e23a0..000000000000 --- a/net/ipv4/tcp_diag.c +++ /dev/null @@ -1,892 +0,0 @@ -/* - * inet_diag.c Module for monitoring INET transport protocols sockets. - * - * Version: $Id: inet_diag.c,v 1.3 2002/02/01 22:01:04 davem Exp $ - * - * Authors: Alexey Kuznetsov, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -static const struct inet_diag_handler **inet_diag_table; - -struct inet_diag_entry { - u32 *saddr; - u32 *daddr; - u16 sport; - u16 dport; - u16 family; - u16 userlocks; -}; - -static struct sock *idiagnl; - -#define INET_DIAG_PUT(skb, attrtype, attrlen) \ - RTA_DATA(__RTA_PUT(skb, attrtype, attrlen)) - -static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, - int ext, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - void *info = NULL; - struct inet_diag_meminfo *minfo = NULL; - unsigned char *b = skb->tail; - const struct inet_diag_handler *handler; - - handler = inet_diag_table[unlh->nlmsg_type]; - BUG_ON(handler == NULL); - - nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); - nlh->nlmsg_flags = nlmsg_flags; - - r = NLMSG_DATA(nlh); - if (sk->sk_state != TCP_TIME_WAIT) { - if (ext & (1 << (INET_DIAG_MEMINFO - 1))) - minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, - sizeof(*minfo)); - if (ext & (1 << (INET_DIAG_INFO - 1))) - info = INET_DIAG_PUT(skb, INET_DIAG_INFO, - handler->idiag_info_size); - - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) { - size_t len = strlen(icsk->icsk_ca_ops->name); - strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1), - icsk->icsk_ca_ops->name); - } - } - r->idiag_family = sk->sk_family; - r->idiag_state = sk->sk_state; - r->idiag_timer = 0; - r->idiag_retrans = 0; - - r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)sk; - r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1); - - if (r->idiag_state == TCP_TIME_WAIT) { - const struct inet_timewait_sock *tw = inet_twsk(sk); - long tmo = tw->tw_ttd - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = tw->tw_sport; - r->id.idiag_dport = tw->tw_dport; - r->id.idiag_src[0] = tw->tw_rcv_saddr; - r->id.idiag_dst[0] = tw->tw_daddr; - r->idiag_state = tw->tw_substate; - r->idiag_timer = 3; - r->idiag_expires = (tmo * 1000 + HZ - 1) / HZ; - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = 0; - r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->idiag_family == AF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); - - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6tw->tw_v6_rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6tw->tw_v6_daddr); - } -#endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; - } - - r->id.idiag_sport = inet->sport; - r->id.idiag_dport = inet->dport; - r->id.idiag_src[0] = inet->rcv_saddr; - r->id.idiag_dst[0] = inet->daddr; - -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->idiag_family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &np->rcv_saddr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &np->daddr); - } -#endif - -#define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ - - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { - r->idiag_timer = 1; - r->idiag_retrans = icsk->icsk_retransmits; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); - } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { - r->idiag_timer = 4; - r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(icsk->icsk_timeout); - } else if (timer_pending(&sk->sk_timer)) { - r->idiag_timer = 2; - r->idiag_retrans = icsk->icsk_probes_out; - r->idiag_expires = EXPIRES_IN_MS(sk->sk_timer.expires); - } else { - r->idiag_timer = 0; - r->idiag_expires = 0; - } -#undef EXPIRES_IN_MS - - r->idiag_uid = sock_i_uid(sk); - r->idiag_inode = sock_i_ino(sk); - - if (minfo) { - minfo->idiag_rmem = atomic_read(&sk->sk_rmem_alloc); - minfo->idiag_wmem = sk->sk_wmem_queued; - minfo->idiag_fmem = sk->sk_forward_alloc; - minfo->idiag_tmem = atomic_read(&sk->sk_wmem_alloc); - } - - handler->idiag_get_info(sk, r, info); - - if (sk->sk_state < TCP_TIME_WAIT && - icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) - icsk->icsk_ca_ops->get_info(sk, ext, skb); - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -rtattr_failure: -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int inet_diag_get_exact(struct sk_buff *in_skb, const struct nlmsghdr *nlh) -{ - int err; - struct sock *sk; - struct inet_diag_req *req = NLMSG_DATA(nlh); - struct sk_buff *rep; - struct inet_hashinfo *hashinfo; - const struct inet_diag_handler *handler; - - handler = inet_diag_table[nlh->nlmsg_type]; - BUG_ON(handler == NULL); - hashinfo = handler->idiag_hashinfo; - - if (req->idiag_family == AF_INET) { - sk = inet_lookup(hashinfo, req->id.idiag_dst[0], - req->id.idiag_dport, req->id.idiag_src[0], - req->id.idiag_sport, req->id.idiag_if); - } -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - else if (req->idiag_family == AF_INET6) { - sk = inet6_lookup(hashinfo, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if); - } -#endif - else { - return -EINVAL; - } - - if (sk == NULL) - return -ENOENT; - - err = -ESTALE; - if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE || - req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) && - ((u32)(unsigned long)sk != req->id.idiag_cookie[0] || - (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1])) - goto out; - - err = -ENOMEM; - rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - handler->idiag_info_size + 64)), - GFP_KERNEL); - if (!rep) - goto out; - - if (inet_diag_fill(rep, sk, req->idiag_ext, - NETLINK_CB(in_skb).pid, - nlh->nlmsg_seq, 0, nlh) <= 0) - BUG(); - - err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid, - MSG_DONTWAIT); - if (err > 0) - err = 0; - -out: - if (sk) { - if (sk->sk_state == TCP_TIME_WAIT) - inet_twsk_put((struct inet_timewait_sock *)sk); - else - sock_put(sk); - } - return err; -} - -static int bitstring_match(const u32 *a1, const u32 *a2, int bits) -{ - int words = bits >> 5; - - bits &= 0x1f; - - if (words) { - if (memcmp(a1, a2, words << 2)) - return 0; - } - if (bits) { - __u32 w1, w2; - __u32 mask; - - w1 = a1[words]; - w2 = a2[words]; - - mask = htonl((0xffffffff) << (32 - bits)); - - if ((w1 ^ w2) & mask) - return 0; - } - - return 1; -} - - -static int inet_diag_bc_run(const void *bc, int len, - const struct inet_diag_entry *entry) -{ - while (len > 0) { - int yes = 1; - const struct inet_diag_bc_op *op = bc; - - switch (op->code) { - case INET_DIAG_BC_NOP: - break; - case INET_DIAG_BC_JMP: - yes = 0; - break; - case INET_DIAG_BC_S_GE: - yes = entry->sport >= op[1].no; - break; - case INET_DIAG_BC_S_LE: - yes = entry->dport <= op[1].no; - break; - case INET_DIAG_BC_D_GE: - yes = entry->dport >= op[1].no; - break; - case INET_DIAG_BC_D_LE: - yes = entry->dport <= op[1].no; - break; - case INET_DIAG_BC_AUTO: - yes = !(entry->userlocks & SOCK_BINDPORT_LOCK); - break; - case INET_DIAG_BC_S_COND: - case INET_DIAG_BC_D_COND: - { - struct inet_diag_hostcond *cond = (struct inet_diag_hostcond*)(op+1); - u32 *addr; - - if (cond->port != -1 && - cond->port != (op->code == INET_DIAG_BC_S_COND ? - entry->sport : entry->dport)) { - yes = 0; - break; - } - - if (cond->prefix_len == 0) - break; - - if (op->code == INET_DIAG_BC_S_COND) - addr = entry->saddr; - else - addr = entry->daddr; - - if (bitstring_match(addr, cond->addr, cond->prefix_len)) - break; - if (entry->family == AF_INET6 && - cond->family == AF_INET) { - if (addr[0] == 0 && addr[1] == 0 && - addr[2] == htonl(0xffff) && - bitstring_match(addr+3, cond->addr, cond->prefix_len)) - break; - } - yes = 0; - break; - } - } - - if (yes) { - len -= op->yes; - bc += op->yes; - } else { - len -= op->no; - bc += op->no; - } - } - return (len == 0); -} - -static int valid_cc(const void *bc, int len, int cc) -{ - while (len >= 0) { - const struct inet_diag_bc_op *op = bc; - - if (cc > len) - return 0; - if (cc == len) - return 1; - if (op->yes < 4) - return 0; - len -= op->yes; - bc += op->yes; - } - return 0; -} - -static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) -{ - const unsigned char *bc = bytecode; - int len = bytecode_len; - - while (len > 0) { - struct inet_diag_bc_op *op = (struct inet_diag_bc_op*)bc; - -//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len); - switch (op->code) { - case INET_DIAG_BC_AUTO: - case INET_DIAG_BC_S_COND: - case INET_DIAG_BC_D_COND: - case INET_DIAG_BC_S_GE: - case INET_DIAG_BC_S_LE: - case INET_DIAG_BC_D_GE: - case INET_DIAG_BC_D_LE: - if (op->yes < 4 || op->yes > len+4) - return -EINVAL; - case INET_DIAG_BC_JMP: - if (op->no < 4 || op->no > len+4) - return -EINVAL; - if (op->no < len && - !valid_cc(bytecode, bytecode_len, len-op->no)) - return -EINVAL; - break; - case INET_DIAG_BC_NOP: - if (op->yes < 4 || op->yes > len+4) - return -EINVAL; - break; - default: - return -EINVAL; - } - bc += op->yes; - len -= op->yes; - } - return len == 0 ? 0 : -EINVAL; -} - -static int inet_diag_dump_sock(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) -{ - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - - if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { - struct inet_diag_entry entry; - struct rtattr *bc = (struct rtattr *)(r + 1); - struct inet_sock *inet = inet_sk(sk); - - entry.family = sk->sk_family; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (entry.family == AF_INET6) { - struct ipv6_pinfo *np = inet6_sk(sk); - - entry.saddr = np->rcv_saddr.s6_addr32; - entry.daddr = np->daddr.s6_addr32; - } else -#endif - { - entry.saddr = &inet->rcv_saddr; - entry.daddr = &inet->daddr; - } - entry.sport = inet->num; - entry.dport = ntohs(inet->dport); - entry.userlocks = sk->sk_userlocks; - - if (!inet_diag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), &entry)) - return 0; - } - - return inet_diag_fill(skb, sk, r->idiag_ext, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); -} - -static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, - struct request_sock *req, - u32 pid, u32 seq, - const struct nlmsghdr *unlh) -{ - const struct inet_request_sock *ireq = inet_rsk(req); - struct inet_sock *inet = inet_sk(sk); - unsigned char *b = skb->tail; - struct inet_diag_msg *r; - struct nlmsghdr *nlh; - long tmo; - - nlh = NLMSG_PUT(skb, pid, seq, unlh->nlmsg_type, sizeof(*r)); - nlh->nlmsg_flags = NLM_F_MULTI; - r = NLMSG_DATA(nlh); - - r->idiag_family = sk->sk_family; - r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; - r->idiag_retrans = req->retrans; - - r->id.idiag_if = sk->sk_bound_dev_if; - r->id.idiag_cookie[0] = (u32)(unsigned long)req; - r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1); - - tmo = req->expires - jiffies; - if (tmo < 0) - tmo = 0; - - r->id.idiag_sport = inet->sport; - r->id.idiag_dport = ireq->rmt_port; - r->id.idiag_src[0] = ireq->loc_addr; - r->id.idiag_dst[0] = ireq->rmt_addr; - r->idiag_expires = jiffies_to_msecs(tmo); - r->idiag_rqueue = 0; - r->idiag_wqueue = 0; - r->idiag_uid = sock_i_uid(sk); - r->idiag_inode = 0; -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - if (r->idiag_family == AF_INET6) { - ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6_rsk(req)->loc_addr); - ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6_rsk(req)->rmt_addr); - } -#endif - nlh->nlmsg_len = skb->tail - b; - - return skb->len; - -nlmsg_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, - struct netlink_callback *cb) -{ - struct inet_diag_entry entry; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - struct inet_connection_sock *icsk = inet_csk(sk); - struct listen_sock *lopt; - struct rtattr *bc = NULL; - struct inet_sock *inet = inet_sk(sk); - int j, s_j; - int reqnum, s_reqnum; - int err = 0; - - s_j = cb->args[3]; - s_reqnum = cb->args[4]; - - if (s_j > 0) - s_j--; - - entry.family = sk->sk_family; - - read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - - lopt = icsk->icsk_accept_queue.listen_opt; - if (!lopt || !lopt->qlen) - goto out; - - if (cb->nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(*r))) { - bc = (struct rtattr *)(r + 1); - entry.sport = inet->num; - entry.userlocks = sk->sk_userlocks; - } - - for (j = s_j; j < lopt->nr_table_entries; j++) { - struct request_sock *req, *head = lopt->syn_table[j]; - - reqnum = 0; - for (req = head; req; reqnum++, req = req->dl_next) { - struct inet_request_sock *ireq = inet_rsk(req); - - if (reqnum < s_reqnum) - continue; - if (r->id.idiag_dport != ireq->rmt_port && - r->id.idiag_dport) - continue; - - if (bc) { - entry.saddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - (entry.family == AF_INET6) ? - tcp6_rsk(req)->loc_addr.s6_addr32 : -#endif - &ireq->loc_addr; - entry.daddr = -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) - (entry.family == AF_INET6) ? - tcp6_rsk(req)->rmt_addr.s6_addr32 : -#endif - &ireq->rmt_addr; - entry.dport = ntohs(ireq->rmt_port); - - if (!inet_diag_bc_run(RTA_DATA(bc), - RTA_PAYLOAD(bc), &entry)) - continue; - } - - err = inet_diag_fill_req(skb, sk, req, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, cb->nlh); - if (err < 0) { - cb->args[3] = j + 1; - cb->args[4] = reqnum; - goto out; - } - } - - s_reqnum = 0; - } - -out: - read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock); - - return err; -} - -static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - int i, num; - int s_i, s_num; - struct inet_diag_req *r = NLMSG_DATA(cb->nlh); - const struct inet_diag_handler *handler; - struct inet_hashinfo *hashinfo; - - handler = inet_diag_table[cb->nlh->nlmsg_type]; - BUG_ON(handler == NULL); - hashinfo = handler->idiag_hashinfo; - - s_i = cb->args[1]; - s_num = num = cb->args[2]; - - if (cb->args[0] == 0) { - if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV))) - goto skip_listen_ht; - - inet_listen_lock(hashinfo); - for (i = s_i; i < INET_LHTABLE_SIZE; i++) { - struct sock *sk; - struct hlist_node *node; - - num = 0; - sk_for_each(sk, node, &hashinfo->listening_hash[i]) { - struct inet_sock *inet = inet_sk(sk); - - if (num < s_num) { - num++; - continue; - } - - if (r->id.idiag_sport != inet->sport && - r->id.idiag_sport) - goto next_listen; - - if (!(r->idiag_states & TCPF_LISTEN) || - r->id.idiag_dport || - cb->args[3] > 0) - goto syn_recv; - - if (inet_diag_dump_sock(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); - goto done; - } - -syn_recv: - if (!(r->idiag_states & TCPF_SYN_RECV)) - goto next_listen; - - if (inet_diag_dump_reqs(skb, sk, cb) < 0) { - inet_listen_unlock(hashinfo); - goto done; - } - -next_listen: - cb->args[3] = 0; - cb->args[4] = 0; - ++num; - } - - s_num = 0; - cb->args[3] = 0; - cb->args[4] = 0; - } - inet_listen_unlock(hashinfo); -skip_listen_ht: - cb->args[0] = 1; - s_i = num = s_num = 0; - } - - if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV))) - return skb->len; - - for (i = s_i; i < hashinfo->ehash_size; i++) { - struct inet_ehash_bucket *head = &hashinfo->ehash[i]; - struct sock *sk; - struct hlist_node *node; - - if (i > s_i) - s_num = 0; - - read_lock_bh(&head->lock); - - num = 0; - sk_for_each(sk, node, &head->chain) { - struct inet_sock *inet = inet_sk(sk); - - if (num < s_num) - goto next_normal; - if (!(r->idiag_states & (1 << sk->sk_state))) - goto next_normal; - if (r->id.idiag_sport != inet->sport && - r->id.idiag_sport) - goto next_normal; - if (r->id.idiag_dport != inet->dport && r->id.idiag_dport) - goto next_normal; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { - read_unlock_bh(&head->lock); - goto done; - } -next_normal: - ++num; - } - - if (r->idiag_states & TCPF_TIME_WAIT) { - sk_for_each(sk, node, - &hashinfo->ehash[i + hashinfo->ehash_size].chain) { - struct inet_sock *inet = inet_sk(sk); - - if (num < s_num) - goto next_dying; - if (r->id.idiag_sport != inet->sport && - r->id.idiag_sport) - goto next_dying; - if (r->id.idiag_dport != inet->dport && - r->id.idiag_dport) - goto next_dying; - if (inet_diag_dump_sock(skb, sk, cb) < 0) { - read_unlock_bh(&head->lock); - goto done; - } -next_dying: - ++num; - } - } - read_unlock_bh(&head->lock); - } - -done: - cb->args[1] = i; - cb->args[2] = num; - return skb->len; -} - -static int inet_diag_dump_done(struct netlink_callback *cb) -{ - return 0; -} - - -static __inline__ int -inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) -{ - if (!(nlh->nlmsg_flags&NLM_F_REQUEST)) - return 0; - - if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX) - goto err_inval; - - if (inet_diag_table[nlh->nlmsg_type] == NULL) - return -ENOENT; - - if (NLMSG_LENGTH(sizeof(struct inet_diag_req)) > skb->len) - goto err_inval; - - if (nlh->nlmsg_flags&NLM_F_DUMP) { - if (nlh->nlmsg_len > - (4 + NLMSG_SPACE(sizeof(struct inet_diag_req)))) { - struct rtattr *rta = (void *)(NLMSG_DATA(nlh) + - sizeof(struct inet_diag_req)); - if (rta->rta_type != INET_DIAG_REQ_BYTECODE || - rta->rta_len < 8 || - rta->rta_len > - (nlh->nlmsg_len - - NLMSG_SPACE(sizeof(struct inet_diag_req)))) - goto err_inval; - if (inet_diag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta))) - goto err_inval; - } - return netlink_dump_start(idiagnl, skb, nlh, - inet_diag_dump, - inet_diag_dump_done); - } else { - return inet_diag_get_exact(skb, nlh); - } - -err_inval: - return -EINVAL; -} - - -static inline void inet_diag_rcv_skb(struct sk_buff *skb) -{ - int err; - struct nlmsghdr * nlh; - - if (skb->len >= NLMSG_SPACE(0)) { - nlh = (struct nlmsghdr *)skb->data; - if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len) - return; - err = inet_diag_rcv_msg(skb, nlh); - if (err || nlh->nlmsg_flags & NLM_F_ACK) - netlink_ack(skb, nlh, err); - } -} - -static void inet_diag_rcv(struct sock *sk, int len) -{ - struct sk_buff *skb; - unsigned int qlen = skb_queue_len(&sk->sk_receive_queue); - - while (qlen-- && (skb = skb_dequeue(&sk->sk_receive_queue))) { - inet_diag_rcv_skb(skb); - kfree_skb(skb); - } -} - -static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, - void *_info) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct tcp_info *info = _info; - - r->idiag_rqueue = tp->rcv_nxt - tp->copied_seq; - r->idiag_wqueue = tp->write_seq - tp->snd_una; - if (info != NULL) - tcp_get_info(sk, info); -} - -static struct inet_diag_handler tcp_diag_handler = { - .idiag_hashinfo = &tcp_hashinfo, - .idiag_get_info = tcp_diag_get_info, - .idiag_type = TCPDIAG_GETSOCK, - .idiag_info_size = sizeof(struct tcp_info), -}; - -static DEFINE_SPINLOCK(inet_diag_register_lock); - -int inet_diag_register(const struct inet_diag_handler *h) -{ - const __u16 type = h->idiag_type; - int err = -EINVAL; - - if (type >= INET_DIAG_GETSOCK_MAX) - goto out; - - spin_lock(&inet_diag_register_lock); - err = -EEXIST; - if (inet_diag_table[type] == NULL) { - inet_diag_table[type] = h; - err = 0; - } - spin_unlock(&inet_diag_register_lock); -out: - return err; -} -EXPORT_SYMBOL_GPL(inet_diag_register); - -void inet_diag_unregister(const struct inet_diag_handler *h) -{ - const __u16 type = h->idiag_type; - - if (type >= INET_DIAG_GETSOCK_MAX) - return; - - spin_lock(&inet_diag_register_lock); - inet_diag_table[type] = NULL; - spin_unlock(&inet_diag_register_lock); - - synchronize_rcu(); -} -EXPORT_SYMBOL_GPL(inet_diag_unregister); - -static int __init inet_diag_init(void) -{ - const int inet_diag_table_size = (INET_DIAG_GETSOCK_MAX * - sizeof(struct inet_diag_handler *)); - int err = -ENOMEM; - - inet_diag_table = kmalloc(inet_diag_table_size, GFP_KERNEL); - if (!inet_diag_table) - goto out; - - memset(inet_diag_table, 0, inet_diag_table_size); - - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, - THIS_MODULE); - if (idiagnl == NULL) - goto out_free_table; - - err = inet_diag_register(&tcp_diag_handler); - if (err) - goto out_sock_release; -out: - return err; -out_sock_release: - sock_release(idiagnl->sk_socket); -out_free_table: - kfree(inet_diag_table); - goto out; -} - -static void __exit inet_diag_exit(void) -{ - sock_release(idiagnl->sk_socket); - kfree(inet_diag_table); -} - -module_init(inet_diag_init); -module_exit(inet_diag_exit); -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 8cef9dc11fb7..93c5f92070f9 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 395100317875..0c340c3756c2 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include /* TCP Westwood structure */ -- cgit v1.2.3 From 0ba2c6e8c0fb5cde5a23a213c2e7cb851b85c310 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:55:44 -0700 Subject: [NETFILTER]: introduce and use aligned_u64 data type As proposed by Andi Kleen, this is required esp. for x86_64 architecture, where 64bit code needs 8byte aligned 64bit data types, but 32bit userspace apps will only align to 4bytes. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 5 +++-- include/linux/netfilter/nfnetlink_queue.h | 5 +++-- include/linux/types.h | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a61836a083e7..b04b03880595 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -5,6 +5,7 @@ * and not any kind of function definitions. It is shared between kernel and * userspace. Don't put kernel specific stuff in here */ +#include #include enum nfulnl_msg_types { @@ -27,8 +28,8 @@ struct nfulnl_msg_packet_hw { } __attribute__ ((packed)); struct nfulnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 2d8d2b2cfcaa..9e774373244c 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -1,6 +1,7 @@ #ifndef _NFNETLINK_QUEUE_H #define _NFNETLINK_QUEUE_H +#include #include enum nfqnl_msg_types { @@ -24,8 +25,8 @@ struct nfqnl_msg_packet_hw { } __attribute__ ((packed)); struct nfqnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); enum nfqnl_attr_type { diff --git a/include/linux/types.h b/include/linux/types.h index dcb13f865df9..2b678c22ca4a 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -123,6 +123,9 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif +/* this is a special 64bit data type that is 8-byte aligned */ +#define aligned_u64 unsigned long long __attribute__((aligned(8))) + /* * The type used for indexing onto a disc or disc partition. * If required, asm/types.h can override it and define -- cgit v1.2.3 From 9d810fd2d28a9d672eca3136476af1a54a380bb2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:56:26 -0700 Subject: [NETFILTER]: Add new iptables "connbytes" match This patch ads a new "connbytes" match that utilizes the CONFIG_NF_CT_ACCT per-connection byte and packet counters. Using it you can do things like packet classification on average packet size within a connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 25 ++++ net/ipv4/netfilter/Kconfig | 11 +- net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_connbytes.c | 166 +++++++++++++++++++++++++++ 4 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h create mode 100644 net/ipv4/netfilter/ipt_connbytes.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h new file mode 100644 index 000000000000..abaa65afd4e9 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -0,0 +1,25 @@ +#ifndef _IPT_CONNBYTES_H +#define _IPT_CONNBYTES_H + +enum ipt_connbytes_what { + IPT_CONNBYTES_WHAT_PKTS, + IPT_CONNBYTES_WHAT_BYTES, + IPT_CONNBYTES_WHAT_AVGPKT, +}; + +enum ipt_connbytes_direction { + IPT_CONNBYTES_DIR_ORIGINAL, + IPT_CONNBYTES_DIR_REPLY, + IPT_CONNBYTES_DIR_BOTH, +}; + +struct ipt_connbytes_info +{ + struct { + aligned_u64 from; /* count to be matched */ + aligned_u64 to; /* count to be matched */ + } count; + u_int8_t what; /* ipt_connbytes_what */ + u_int8_t direction; /* ipt_connbytes_direction */ +}; +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 9f5e1d769b5f..3f7e6e49cbdd 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -386,6 +386,16 @@ config IP_NF_MATCH_CONNMARK . The module will be called ipt_connmark.o. If unsure, say `N'. +config IP_NF_MATCH_CONNBYTES + tristate 'Connection byte/packet counter match support' + depends on IP_NF_CT_ACCT && IP_NF_IPTABLES + help + This option adds a `connbytes' match, which allows you to match the + number of bytes and/or packets for each direction within a connection. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config IP_NF_MATCH_HASHLIMIT tristate 'hashlimit match support' depends on IP_NF_IPTABLES @@ -723,6 +733,5 @@ config IP_NF_CONNTRACK_NETLINK help This option enables support for a netlink-based userspace interface - endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 58aa7c616e1f..7c8ae858aa43 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o +obj-$(CONFIG_IP_NF_MATCH_CONNBYTES) += ipt_connbytes.o obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c new file mode 100644 index 000000000000..0dfb52c0e808 --- /dev/null +++ b/net/ipv4/netfilter/ipt_connbytes.c @@ -0,0 +1,166 @@ +/* Kernel module to match connection tracking byte counter. + * GPL (C) 2002 Martin Devera (devik@cdi.cz). + * + * 2004-07-20 Harald Welte + * - reimplemented to use per-connection accounting counters + * - add functionality to match number of packets + * - add functionality to match average packet size + * - add support to match directions seperately + * + */ +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables match for matching number of pkts/bytes per connection"); + +/* 64bit divisor, dividend and result. dynamic precision */ +static u_int64_t div64_64(u_int64_t divisor, u_int64_t dividend) +{ + u_int64_t result = divisor; + + if (dividend > 0xffffffff) { + int first_bit = find_first_bit((unsigned long *) ÷nd, sizeof(dividend)); + /* calculate number of bits to shift. shift exactly enough + * bits to make dividend fit in 32bits. */ + int num_shift = (64 - 32 - first_bit); + /* first bit has to be < 32, since dividend was > 0xffffffff */ + result = result >> num_shift; + dividend = dividend >> num_shift; + } + + do_div(divisor, dividend); + + return divisor; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_connbytes_info *sinfo = matchinfo; + enum ip_conntrack_info ctinfo; + struct ip_conntrack *ct; + u_int64_t what = 0; /* initialize to make gcc happy */ + + if (!(ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo))) + return 0; /* no match */ + + switch (sinfo->what) { + case IPT_CONNBYTES_WHAT_PKTS: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = ct->counters[IP_CT_DIR_ORIGINAL].packets; + break; + case IPT_CONNBYTES_DIR_REPLY: + what = ct->counters[IP_CT_DIR_REPLY].packets; + break; + case IPT_CONNBYTES_DIR_BOTH: + what = ct->counters[IP_CT_DIR_ORIGINAL].packets; + what += ct->counters[IP_CT_DIR_REPLY].packets; + break; + } + break; + case IPT_CONNBYTES_WHAT_BYTES: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; + break; + case IPT_CONNBYTES_DIR_REPLY: + what = ct->counters[IP_CT_DIR_REPLY].bytes; + break; + case IPT_CONNBYTES_DIR_BOTH: + what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; + what += ct->counters[IP_CT_DIR_REPLY].bytes; + break; + } + break; + case IPT_CONNBYTES_WHAT_AVGPKT: + switch (sinfo->direction) { + case IPT_CONNBYTES_DIR_ORIGINAL: + what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, + ct->counters[IP_CT_DIR_ORIGINAL].packets); + break; + case IPT_CONNBYTES_DIR_REPLY: + what = div64_64(ct->counters[IP_CT_DIR_REPLY].bytes, + ct->counters[IP_CT_DIR_REPLY].packets); + break; + case IPT_CONNBYTES_DIR_BOTH: + { + u_int64_t bytes; + u_int64_t pkts; + bytes = ct->counters[IP_CT_DIR_ORIGINAL].bytes + + ct->counters[IP_CT_DIR_REPLY].bytes; + pkts = ct->counters[IP_CT_DIR_ORIGINAL].packets+ + ct->counters[IP_CT_DIR_REPLY].packets; + + /* FIXME_THEORETICAL: what to do if sum + * overflows ? */ + + what = div64_64(bytes, pkts); + } + break; + } + break; + } + + if (sinfo->count.to) + return (what <= sinfo->count.to && what >= sinfo->count.from); + else + return (what >= sinfo->count.from); +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_connbytes_info *sinfo = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info))) + return 0; + + if (sinfo->what != IPT_CONNBYTES_WHAT_PKTS && + sinfo->what != IPT_CONNBYTES_WHAT_BYTES && + sinfo->what != IPT_CONNBYTES_WHAT_AVGPKT) + return 0; + + if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL && + sinfo->direction != IPT_CONNBYTES_DIR_REPLY && + sinfo->direction != IPT_CONNBYTES_DIR_BOTH) + return 0; + + return 1; +} + +static struct ipt_match state_match = { + .name = "connbytes", + .match = &match, + .checkentry = &check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&state_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&state_match); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From 25ed891019b84498c83903ecf53df7ce35e9cff6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 13 Aug 2005 13:58:21 -0700 Subject: [NETFILTER]: Nicer names for ipt_connbytes constants Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 6 +++--- net/ipv4/netfilter/ipt_connbytes.c | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h index abaa65afd4e9..9e5532f8d8ac 100644 --- a/include/linux/netfilter_ipv4/ipt_connbytes.h +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -2,9 +2,9 @@ #define _IPT_CONNBYTES_H enum ipt_connbytes_what { - IPT_CONNBYTES_WHAT_PKTS, - IPT_CONNBYTES_WHAT_BYTES, - IPT_CONNBYTES_WHAT_AVGPKT, + IPT_CONNBYTES_PKTS, + IPT_CONNBYTES_BYTES, + IPT_CONNBYTES_AVGPKT, }; enum ipt_connbytes_direction { diff --git a/net/ipv4/netfilter/ipt_connbytes.c b/net/ipv4/netfilter/ipt_connbytes.c index 47128c073d85..df4a42c6da22 100644 --- a/net/ipv4/netfilter/ipt_connbytes.c +++ b/net/ipv4/netfilter/ipt_connbytes.c @@ -54,7 +54,7 @@ match(const struct sk_buff *skb, return 0; /* no match */ switch (sinfo->what) { - case IPT_CONNBYTES_WHAT_PKTS: + case IPT_CONNBYTES_PKTS: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = ct->counters[IP_CT_DIR_ORIGINAL].packets; @@ -68,7 +68,7 @@ match(const struct sk_buff *skb, break; } break; - case IPT_CONNBYTES_WHAT_BYTES: + case IPT_CONNBYTES_BYTES: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = ct->counters[IP_CT_DIR_ORIGINAL].bytes; @@ -82,7 +82,7 @@ match(const struct sk_buff *skb, break; } break; - case IPT_CONNBYTES_WHAT_AVGPKT: + case IPT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case IPT_CONNBYTES_DIR_ORIGINAL: what = div64_64(ct->counters[IP_CT_DIR_ORIGINAL].bytes, @@ -128,9 +128,9 @@ static int check(const char *tablename, if (matchsize != IPT_ALIGN(sizeof(struct ipt_connbytes_info))) return 0; - if (sinfo->what != IPT_CONNBYTES_WHAT_PKTS && - sinfo->what != IPT_CONNBYTES_WHAT_BYTES && - sinfo->what != IPT_CONNBYTES_WHAT_AVGPKT) + if (sinfo->what != IPT_CONNBYTES_PKTS && + sinfo->what != IPT_CONNBYTES_BYTES && + sinfo->what != IPT_CONNBYTES_AVGPKT) return 0; if (sinfo->direction != IPT_CONNBYTES_DIR_ORIGINAL && -- cgit v1.2.3 From a61bbcf28a8cb0ba56f8193d512f7222e711a294 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:24:31 -0700 Subject: [NET]: Store skb->timestamp as offset to a base timestamp Reduces skb size by 8 bytes on 64-bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/atm/ambassador.c | 2 +- drivers/atm/atmtcp.c | 2 +- drivers/atm/eni.c | 2 +- drivers/atm/firestream.c | 2 +- drivers/atm/fore200e.c | 2 +- drivers/atm/he.c | 2 +- drivers/atm/horizon.c | 2 +- drivers/atm/idt77252.c | 8 +++---- drivers/atm/lanai.c | 2 +- drivers/atm/nicstar.c | 10 ++++----- drivers/atm/zatm.c | 2 +- include/linux/skbuff.h | 47 ++++++++++++++++++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 2 +- include/net/neighbour.h | 9 +++++++- include/net/sock.h | 13 ++++++----- net/bluetooth/hci_core.c | 2 +- net/bluetooth/hci_event.c | 2 +- net/bluetooth/hci_sock.c | 8 +++++-- net/bridge/netfilter/ebt_ulog.c | 2 +- net/core/dev.c | 28 ++++++++++++++++-------- net/core/neighbour.c | 7 +++--- net/core/skbuff.c | 8 +++++-- net/econet/af_econet.c | 2 +- net/ipv4/arp.c | 4 +++- net/ipv4/ip_fragment.c | 4 ++-- net/ipv4/netfilter/ip_queue.c | 4 ++-- net/ipv4/netfilter/ipt_ULOG.c | 8 +++---- net/ipv4/tcp_input.c | 10 ++++++--- net/ipv4/tcp_output.c | 4 ++-- net/ipv6/ndisc.c | 4 +++- net/ipv6/netfilter/ip6_queue.c | 4 ++-- net/ipv6/reassembly.c | 4 ++-- net/ipx/af_ipx.c | 4 ++-- net/netfilter/nfnetlink_log.c | 6 ++--- net/netfilter/nfnetlink_queue.c | 6 ++--- net/packet/af_packet.c | 8 +++---- net/sctp/input.c | 4 ++-- net/sctp/sm_make_chunk.c | 9 ++++---- net/sunrpc/svcsock.c | 11 ++++++---- 39 files changed, 172 insertions(+), 88 deletions(-) (limited to 'include/linux') diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 73c6b85299c1..d74a7c5e75dd 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -513,7 +513,7 @@ static void rx_complete (amb_dev * dev, rx_out * rx) { // VC layer stats atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability atm_vcc->push (atm_vcc, skb); return; diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c index f2f01cb82cb4..57f1810fdccd 100644 --- a/drivers/atm/atmtcp.c +++ b/drivers/atm/atmtcp.c @@ -325,7 +325,7 @@ static int atmtcp_c_send(struct atm_vcc *vcc,struct sk_buff *skb) result = -ENOBUFS; goto done; } - do_gettimeofday(&new_skb->stamp); + __net_timestamp(new_skb); memcpy(skb_put(new_skb,skb->len),skb->data,skb->len); out_vcc->push(out_vcc,new_skb); atomic_inc(&vcc->stats->tx); diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 10da36934769..c13c4d736ef5 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -537,7 +537,7 @@ static int rx_aal0(struct atm_vcc *vcc) return 0; } skb_put(skb,length); - skb->stamp = eni_vcc->timestamp; + skb_set_timestamp(skb, &eni_vcc->timestamp); DPRINTK("got len %ld\n",length); if (do_rx_dma(vcc,skb,1,length >> 2,length >> 2)) return 1; eni_vcc->rxing++; diff --git a/drivers/atm/firestream.c b/drivers/atm/firestream.c index b078fa548ebf..58219744f5db 100644 --- a/drivers/atm/firestream.c +++ b/drivers/atm/firestream.c @@ -815,7 +815,7 @@ static void process_incoming (struct fs_dev *dev, struct queue *q) skb_put (skb, qe->p1 & 0xffff); ATM_SKB(skb)->vcc = atm_vcc; atomic_inc(&atm_vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-skb: %p (pushed)\n", skb); atm_vcc->push (atm_vcc, skb); fs_dprintk (FS_DEBUG_ALLOC, "Free rec-d: %p\n", pe); diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 5f702199543a..2bf723a7b6e6 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -1176,7 +1176,7 @@ fore200e_push_rpd(struct fore200e* fore200e, struct atm_vcc* vcc, struct rpd* rp return -ENOMEM; } - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #ifdef FORE200E_52BYTE_AAL0_SDU if (cell_header) { diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 28250c9b32d6..fde9334059af 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -1886,7 +1886,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) if (rx_skb_reserve > 0) skb_reserve(skb, rx_skb_reserve); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); for (iov = he_vcc->iov_head; iov < he_vcc->iov_tail; ++iov) { diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 924a2c8988bd..0cded0468003 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -1034,7 +1034,7 @@ static void rx_schedule (hrz_dev * dev, int irq) { struct atm_vcc * vcc = ATM_SKB(skb)->vcc; // VC layer stats atomic_inc(&vcc->stats->rx); - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); // end of our responsability vcc->push (vcc, skb); } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 30b7e990ed0b..b4a76cade646 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1101,7 +1101,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) cell, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); @@ -1179,7 +1179,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1201,7 +1201,7 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) skb_trim(skb, len); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); @@ -1340,7 +1340,7 @@ idt77252_rx_raw(struct idt77252_dev *card) ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index ffe3afa723b8..51ec14787293 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -1427,7 +1427,7 @@ static void vcc_rx_aal5(struct lanai_vcc *lvcc, int endptr) skb_put(skb, size); vcc_rx_memcpy(skb->data, lvcc, size); ATM_SKB(skb)->vcc = lvcc->rx.atmvcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); lvcc->rx.atmvcc->push(lvcc->rx.atmvcc, skb); atomic_inc(&lvcc->rx.atmvcc->stats->rx); out: diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index a0e3bd861f1c..c57e20dcb0f8 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -2213,7 +2213,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(sb->tail, cell, ATM_CELL_PAYLOAD); skb_put(sb, ATM_CELL_PAYLOAD); ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); cell += ATM_CELL_PAYLOAD; @@ -2346,7 +2346,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) skb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2373,7 +2373,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) sb->destructor = ns_sb_destructor; #endif /* NS_USE_DESTRUCTORS */ ATM_SKB(sb)->vcc = vcc; - do_gettimeofday(&sb->stamp); + __net_timestamp(sb); vcc->push(vcc, sb); atomic_inc(&vcc->stats->rx); } @@ -2398,7 +2398,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) memcpy(skb->data, sb->data, NS_SMBUFSIZE); skb_put(skb, len - NS_SMBUFSIZE); ATM_SKB(skb)->vcc = vcc; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); } @@ -2505,7 +2505,7 @@ static void dequeue_rx(ns_dev *card, ns_rsqe *rsqe) #ifdef NS_USE_DESTRUCTORS hb->destructor = ns_hb_destructor; #endif /* NS_USE_DESTRUCTORS */ - do_gettimeofday(&hb->stamp); + __net_timestamp(hb); vcc->push(vcc, hb); atomic_inc(&vcc->stats->rx); } diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index 85fee9530fa9..c4b75ecf9460 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -400,7 +400,7 @@ unsigned long *x; EVENT("error code 0x%x/0x%x\n",(here[3] & uPD98401_AAL5_ES) >> uPD98401_AAL5_ES_SHIFT,error); skb = ((struct rx_buffer_head *) bus_to_virt(here[2]))->skb; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); #if 0 printk("[-3..0] 0x%08lx 0x%08lx 0x%08lx 0x%08lx\n",((unsigned *) skb->data)[-3], ((unsigned *) skb->data)[-2],((unsigned *) skb->data)[-1], diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60b32151f76a..32635c401d4d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,13 +155,20 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) +extern struct timeval skb_tv_base; + +struct skb_timeval { + u32 off_sec; + u32 off_usec; +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @stamp: Time we arrived + * @tstamp: Time we arrived stored as offset to skb_tv_base * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -202,7 +209,7 @@ struct sk_buff { struct sk_buff *prev; struct sock *sk; - struct timeval stamp; + struct skb_timeval tstamp; struct net_device *dev; struct net_device *input_dev; @@ -1213,6 +1220,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, extern void skb_init(void); extern void skb_add_mtu(int mtu); +/** + * skb_get_timestamp - get timestamp from a skb + * @skb: skb to get stamp from + * @stamp: pointer to struct timeval to store stamp in + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts the offset back to a struct timeval and stores + * it in stamp. + */ +static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + stamp->tv_sec = skb->tstamp.off_sec; + stamp->tv_usec = skb->tstamp.off_usec; + if (skb->tstamp.off_sec) { + stamp->tv_sec += skb_tv_base.tv_sec; + stamp->tv_usec += skb_tv_base.tv_usec; + } +} + +/** + * skb_set_timestamp - set timestamp of a skb + * @skb: skb to set stamp of + * @stamp: pointer to struct timeval to get stamp from + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts a struct timeval to an offset and stores + * it in the skb. + */ +static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; + skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; +} + +extern void __net_timestamp(struct sk_buff *skb); + #ifdef CONFIG_NETFILTER static inline void nf_conntrack_put(struct nf_conntrack *nfct) { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731b..7f933f302078 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) bt_cb(skb)->incoming = 1; /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5ab..34c07731933d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, return neigh_create(tbl, pkey, dev); } -#define LOCALLY_ENQUEUED -2 +struct neighbour_cb { + unsigned long sched_next; + unsigned int flags; +}; + +#define LOCALLY_ENQUEUED 0x1 + +#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index 065df67b6422..d59428877078 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1282,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval *stamp = &skb->stamp; + struct timeval stamp; + + skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp->tv_sec == 0) - do_gettimeofday(stamp); + if (stamp.tv_sec == 0) + do_gettimeofday(&stamp); + skb_set_timestamp(skb, &stamp); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - stamp); + &stamp); } else - sk->sk_stamp = *stamp; + sk->sk_stamp = stamp; } /** diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 4f9e11b533a3..55dc42eac92c 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -999,7 +999,7 @@ static int hci_send_frame(struct sk_buff *skb) if (atomic_read(&hdev->promisc)) { /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); hci_send_to_sock(hdev, skb); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 40b219560bb1..d6da0939216d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1087,7 +1087,7 @@ void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) memcpy(ev->data, data, dlen); bt_cb(skb)->incoming = 1; - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); bt_cb(skb)->pkt_type = HCI_EVENT_PKT; skb->dev = (void *) hdev; diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index eed9090d77f1..32ef7975a139 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -332,8 +332,12 @@ static inline void hci_sock_cmsg(struct sock *sk, struct msghdr *msg, struct sk_ put_cmsg(msg, SOL_HCI, HCI_CMSG_DIR, sizeof(incoming), &incoming); } - if (mask & HCI_CMSG_TSTAMP) - put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(skb->stamp), &skb->stamp); + if (mask & HCI_CMSG_TSTAMP) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_HCI, HCI_CMSG_TSTAMP, sizeof(tv), &tv); + } } static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 561d75c8ed5a..acb888d32587 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -162,7 +162,7 @@ static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, pm->version = EBT_ULOG_VERSION; do_gettimeofday(&pm->stamp); if (ub->qlen == 1) - ub->skb->stamp = pm->stamp; + skb_set_timestamp(ub->skb, &pm->stamp); pm->data_len = copy_len; pm->mark = skb->nfmark; pm->hook = hooknr; diff --git a/net/core/dev.c b/net/core/dev.c index 9d153eb1e8cf..a3ed53cc4af8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1009,13 +1009,22 @@ void net_disable_timestamp(void) atomic_dec(&netstamp_needed); } -static inline void net_timestamp(struct timeval *stamp) +void __net_timestamp(struct sk_buff *skb) +{ + struct timeval tv; + + do_gettimeofday(&tv); + skb_set_timestamp(skb, &tv); +} +EXPORT_SYMBOL(__net_timestamp); + +static inline void net_timestamp(struct sk_buff *skb) { if (atomic_read(&netstamp_needed)) - do_gettimeofday(stamp); + __net_timestamp(skb); else { - stamp->tv_sec = 0; - stamp->tv_usec = 0; + skb->tstamp.off_sec = 0; + skb->tstamp.off_usec = 0; } } @@ -1027,7 +1036,8 @@ static inline void net_timestamp(struct timeval *stamp) void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; - net_timestamp(&skb->stamp); + + net_timestamp(skb); rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_all, list) { @@ -1379,8 +1389,8 @@ int netif_rx(struct sk_buff *skb) if (netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); /* * The code is rearranged so that the path is the most @@ -1566,8 +1576,8 @@ int netif_receive_skb(struct sk_buff *skb) if (skb->dev->poll && netpoll_rx(skb)) return NET_RX_DROP; - if (!skb->stamp.tv_sec) - net_timestamp(&skb->stamp); + if (!skb->tstamp.off_sec) + net_timestamp(skb); if (!skb->input_dev) skb->input_dev = skb->dev; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 1beb782ac41b..72ee00f7b30c 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1217,7 +1217,7 @@ static void neigh_proxy_process(unsigned long arg) while (skb != (struct sk_buff *)&tbl->proxy_queue) { struct sk_buff *back = skb; - long tdif = back->stamp.tv_usec - now; + long tdif = NEIGH_CB(back)->sched_next - now; skb = skb->next; if (tdif <= 0) { @@ -1248,8 +1248,9 @@ void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, kfree_skb(skb); return; } - skb->stamp.tv_sec = LOCALLY_ENQUEUED; - skb->stamp.tv_usec = sched_next; + + NEIGH_CB(skb)->sched_next = sched_next; + NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; spin_lock(&tbl->proxy_queue.lock); if (del_timer(&tbl->proxy_timer)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ef498cb9f786..39a161dbc16d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -70,6 +70,8 @@ static kmem_cache_t *skbuff_head_cache; +struct timeval __read_mostly skb_tv_base; + /* * Keep out-of-line to prevent kernel bloat. * __builtin_return_address is not used because it is not always @@ -331,7 +333,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) n->next = n->prev = NULL; n->sk = NULL; - C(stamp); + C(tstamp); C(dev); C(h); C(nh); @@ -408,7 +410,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; new->pkt_type = old->pkt_type; - new->stamp = old->stamp; + new->tstamp = old->tstamp; new->destructor = NULL; #ifdef CONFIG_NETFILTER new->nfmark = old->nfmark; @@ -1645,6 +1647,7 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); @@ -1678,3 +1681,4 @@ EXPORT_SYMBOL(skb_prepare_seq_read); EXPORT_SYMBOL(skb_seq_read); EXPORT_SYMBOL(skb_abort_seq_read); EXPORT_SYMBOL(skb_find_text); +EXPORT_SYMBOL(skb_tv_base); diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8f0639905558..4a62093eb343 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -159,7 +159,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_toiovec(msg->msg_iov, skb->data, copied); if (err) goto out_free; - sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &sk->sk_stamp); if (msg->msg_name) memcpy(msg->msg_name, skb->cb, msg->msg_namelen); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 6eb9c549d643..8bf312bdea13 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -865,7 +865,7 @@ static int arp_process(struct sk_buff *skb) if (n) neigh_release(n); - if (skb->stamp.tv_sec == LOCALLY_ENQUEUED || + if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || skb->pkt_type == PACKET_HOST || in_dev->arp_parms->proxy_delay == 0) { arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); @@ -948,6 +948,8 @@ int arp_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out_of_mem; + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); freeskb: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 1ac64c0c5b37..9e6e683cc34d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -533,7 +533,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (skb->dev) qp->iif = skb->dev->ifindex; skb->dev = NULL; - qp->stamp = skb->stamp; + skb_get_timestamp(skb, &qp->stamp); qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) @@ -615,7 +615,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->next = NULL; head->dev = dev; - head->stamp = qp->stamp; + skb_set_timestamp(head, &qp->stamp); iph = head->nh.iph; iph->frag_off = 0; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 1c49833e00a9..7f2bcc7198fa 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -240,8 +240,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b86f06ec9762..1d8ac4595e17 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -220,13 +220,13 @@ static void ipt_ulog_packet(unsigned int hooknum, pm = NLMSG_DATA(nlh); /* We might not have a timestamp, get one */ - if (skb->stamp.tv_sec == 0) - do_gettimeofday((struct timeval *)&skb->stamp); + if (skb->tstamp.off_sec == 0) + __net_timestamp((struct sk_buff *)skb); /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb->stamp.tv_sec; - pm->timestamp_usec = skb->stamp.tv_usec; + pm->timestamp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + pm->timestamp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; pm->mark = skb->nfmark; pm->hook = hooknum; if (prefix != NULL) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fdd9547fb783..ebb8654e3dee 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2097,9 +2097,13 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if (seq_usrtt) - *seq_usrtt = (usnow.tv_sec - skb->stamp.tv_sec) * 1000000 - + (usnow.tv_usec - skb->stamp.tv_usec); + if (seq_usrtt) { + struct timeval tv; + + skb_get_timestamp(skb, &tv); + *seq_usrtt = (usnow.tv_sec - tv.tv_sec) * 1000000 + + (usnow.tv_usec - tv.tv_usec); + } if (sacked & TCPCB_SACKED_ACKED) tp->sacked_out -= tcp_skb_pcount(skb); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 267b0fcbfc9c..8d92ab562aed 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -282,7 +282,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) /* If congestion control is doing timestamping */ if (icsk->icsk_ca_ops->rtt_sample) - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); sysctl_flags = 0; if (tcb->flags & TCPCB_FLAG_SYN) { @@ -483,7 +483,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned * skbs, which it never sent before. --ANK */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; - buff->stamp = skb->stamp; + buff->tstamp = skb->tstamp; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= tcp_skb_pcount(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 7ae72d4c9bd2..a7eae30f4554 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -812,7 +812,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { - if (skb->stamp.tv_sec != LOCALLY_ENQUEUED && + if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && idev->nd_parms->proxy_delay != 0) { @@ -1487,6 +1487,8 @@ int ndisc_rcv(struct sk_buff *skb) return 0; } + memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); + switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: ndisc_recv_ns(skb); diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 7ecb91e24a34..446764545b10 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -238,8 +238,8 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) pmsg->packet_id = (unsigned long )entry; pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->stamp.tv_sec; - pmsg->timestamp_usec = entry->skb->stamp.tv_usec; + pmsg->timestamp_sec = skb_tv_base.tv_sec + entry->skb->tstamp.off_sec; + pmsg->timestamp_usec = skb_tv_base.tv_usec + entry->skb->tstamp.off_usec; pmsg->mark = entry->skb->nfmark; pmsg->hook = entry->info->hook; pmsg->hw_protocol = entry->skb->protocol; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 59e7c6317872..9d9e04344c77 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (skb->dev) fq->iif = skb->dev->ifindex; skb->dev = NULL; - fq->stamp = skb->stamp; + skb_get_timestamp(skb, &fq->stamp); fq->meat += skb->len; atomic_add(skb->truesize, &ip6_frag_mem); @@ -664,7 +664,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->next = NULL; head->dev = dev; - head->stamp = fq->stamp; + skb_set_timestamp(head, &fq->stamp); head->nh.ipv6h->payload_len = htons(payload_len); *skb_in = head; diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 39d5939ccd6c..c54f8acc97eb 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1796,8 +1796,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, copied); if (rc) goto out_free; - if (skb->stamp.tv_sec) - sk->sk_stamp = skb->stamp; + if (skb->tstamp.off_sec) + skb_get_timestamp(skb, &sk->sk_stamp) msg->msg_namelen = sizeof(*sipx); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 464c9fa2934b..ff5601ceedcb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -491,11 +491,11 @@ __build_packet_message(struct nfulnl_instance *inst, NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw); } - if (skb->stamp.tv_sec) { + if (skb->tstamp.off_sec) { struct nfulnl_msg_packet_timestamp ts; - ts.sec = cpu_to_be64(skb->stamp.tv_sec); - ts.usec = cpu_to_be64(skb->stamp.tv_usec); + ts.sec = cpu_to_be64(skb_tv_base.tv_sec + skb->tstamp.off_sec); + ts.usec = cpu_to_be64(skb_tv_base.tv_usec + skb->tstamp.off_usec); NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 741686ff71d8..e3a5285329af 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -494,11 +494,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw); } - if (entry->skb->stamp.tv_sec) { + if (entry->skb->tstamp.off_sec) { struct nfqnl_msg_packet_timestamp ts; - ts.sec = htonll(entry->skb->stamp.tv_sec); - ts.usec = htonll(entry->skb->stamp.tv_usec); + ts.sec = htonll(skb_tv_base.tv_sec + entry->skb->tstamp.off_sec); + ts.usec = htonll(skb_tv_base.tv_usec + entry->skb->tstamp.off_usec); NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index deb5f6f7f858..ba997095f08f 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -635,12 +635,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe h->tp_snaplen = snaplen; h->tp_mac = macoff; h->tp_net = netoff; - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } - h->tp_sec = skb->stamp.tv_sec; - h->tp_usec = skb->stamp.tv_usec; + h->tp_sec = skb_tv_base.tv_sec + skb->tstamp.off_sec; + h->tp_usec = skb_tv_base.tv_usec + skb->tstamp.off_usec; sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); sll->sll_halen = 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index 742be9171b7d..28f32243397f 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -236,8 +236,8 @@ int sctp_rcv(struct sk_buff *skb) } /* SCTP seems to always need a timestamp right now (FIXME) */ - if (skb->stamp.tv_sec == 0) { - do_gettimeofday(&skb->stamp); + if (skb->tstamp.off_sec == 0) { + __net_timestamp(skb); sock_enable_timestamp(sk); } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 00d32b7c8266..3868a8d70cc0 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1362,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( char *key; sctp_scope_t scope; struct sk_buff *skb = chunk->skb; + struct timeval tv; headersize = sizeof(sctp_chunkhdr_t) + SCTP_SECRET_SIZE; bodysize = ntohs(chunk->chunk_hdr->length) - headersize; @@ -1434,7 +1435,8 @@ no_hmac: * an association, there is no need to check cookie's expiration * for init collision case of lost COOKIE ACK. */ - if (!asoc && tv_lt(bear_cookie->expiration, skb->stamp)) { + skb_get_timestamp(skb, &tv); + if (!asoc && tv_lt(bear_cookie->expiration, tv)) { __u16 len; /* * Section 3.3.10.3 Stale Cookie Error (3) @@ -1447,10 +1449,9 @@ no_hmac: len = ntohs(chunk->chunk_hdr->length); *errp = sctp_make_op_error_space(asoc, chunk, len); if (*errp) { - suseconds_t usecs = (skb->stamp.tv_sec - + suseconds_t usecs = (tv.tv_sec - bear_cookie->expiration.tv_sec) * 1000000L + - skb->stamp.tv_usec - - bear_cookie->expiration.tv_usec; + tv.tv_usec - bear_cookie->expiration.tv_usec; usecs = htonl(usecs); sctp_init_cause(*errp, SCTP_ERROR_STALE_COOKIE, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 199d3747bd42..05fe2e735538 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -584,13 +584,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp) /* possibly an icmp error */ dprintk("svc: recvfrom returned error %d\n", -err); } - if (skb->stamp.tv_sec == 0) { - skb->stamp.tv_sec = xtime.tv_sec; - skb->stamp.tv_usec = xtime.tv_nsec / NSEC_PER_USEC; + if (skb->tstamp.off_sec == 0) { + struct timeval tv; + + tv.tv_sec = xtime.tv_sec; + tv.tv_usec = xtime.tv_nsec * 1000; + skb_set_timestamp(skb, &tv); /* Don't enable netstamp, sunrpc doesn't need that much accuracy */ } - svsk->sk_sk->sk_stamp = skb->stamp; + skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp); set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */ /* -- cgit v1.2.3 From fb13ab2849074244a51ae5147483610529a29ced Mon Sep 17 00:00:00 2001 From: Domen Puncer Date: Sun, 14 Aug 2005 17:32:05 -0700 Subject: [NETFILTER]: Remove two unused files Signed-off-by: Domen Puncer Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_logging.h | 20 -------------------- include/linux/netfilter_ipv6/ip6_logging.h | 20 -------------------- 2 files changed, 40 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ip_logging.h delete mode 100644 include/linux/netfilter_ipv6/ip6_logging.h (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h deleted file mode 100644 index 0c5c52cb6589..000000000000 --- a/include/linux/netfilter_ipv4/ip_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv4 macros for the internal logging interface. */ -#ifndef __IP_LOGGING_H -#define __IP_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip(pfh,len,fmt,args...) \ - nf_log(AF_INET,pfh,len,fmt,##args) - -#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging) -#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP_LOGGING_H*/ diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h deleted file mode 100644 index a0b2ee3043aa..000000000000 --- a/include/linux/netfilter_ipv6/ip6_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv6 macros for the nternal logging interface. */ -#ifndef __IP6_LOGGING_H -#define __IP6_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip6(pfh,len,fmt,args...) \ - nf_log(AF_INET6,pfh,len,fmt,##args) - -#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging) -#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP6_LOGGING_H*/ -- cgit v1.2.3 From db080529798b497eb5a37b92a25e966be5a7dd5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:26:34 -0700 Subject: [NETLINK]: Remove unused groups member from struct netlink_skb_parms Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - net/ipv4/fib_frontend.c | 1 - net/netlink/af_netlink.c | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d5e09bcd80f9..eab51f9c9c86 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -106,7 +106,6 @@ struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ __u32 pid; - __u32 groups; __u32 dst_pid; __u32 dst_groups; kernel_cap_t eff_cap; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b5e2f1550c91..75d03e37b9a8 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -558,7 +558,6 @@ static void nl_fib_input(struct sock *sk, int len) nl_fib_lookup(frn, tb); pid = nlh->nlmsg_pid; /*pid of sending process */ - NETLINK_CB(skb).groups = 0; /* not in mcast group */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_pid = pid; NETLINK_CB(skb).dst_groups = 0; /* unicast */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5d487cd69c8c..7b7b45a19597 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,7 +950,6 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; NETLINK_CB(skb).pid = nlk->pid; - NETLINK_CB(skb).groups = nlk->groups; NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_groups = dst_groups; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); -- cgit v1.2.3 From d629b836d151d43332492651dd841d32e57ebe3b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:27:50 -0700 Subject: [NETLINK]: Use group numbers instead of bitmasks internally Using the group number allows increasing the number of groups without beeing limited by the size of the bitmask. It introduces one limitation for netlink users: messages can't be broadcasted to multiple groups anymore, however this feature was never used inside the kernel. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 35 ++++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index eab51f9c9c86..c724c9d4984a 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -107,7 +107,7 @@ struct netlink_skb_parms struct ucred creds; /* Skb credentials */ __u32 pid; __u32 dst_pid; - __u32 dst_groups; + __u32 dst_group; kernel_cap_t eff_cap; __u32 loginuid; /* Login (audit) uid */ }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index c41a88100fea..3c56b96b4a4b 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,7 +67,7 @@ struct netlink_sock { u32 pid; unsigned int groups; u32 dst_pid; - unsigned int dst_groups; + u32 dst_group; unsigned long state; wait_queue_head_t wait; struct netlink_callback *cb; @@ -116,6 +116,11 @@ static atomic_t nl_table_users = ATOMIC_INIT(0); static struct notifier_block *netlink_chain; +static u32 netlink_group_mask(u32 group) +{ + return group ? 1 << (group - 1) : 0; +} + static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) { return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; @@ -533,7 +538,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; nlk->dst_pid = 0; - nlk->dst_groups = 0; + nlk->dst_group = 0; return 0; } if (addr->sa_family != AF_NETLINK) @@ -549,7 +554,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (err == 0) { sk->sk_state = NETLINK_CONNECTED; nlk->dst_pid = nladdr->nl_pid; - nlk->dst_groups = nladdr->nl_groups; + nlk->dst_group = ffs(nladdr->nl_groups); } return err; @@ -567,10 +572,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr if (peer) { nladdr->nl_pid = nlk->dst_pid; - nladdr->nl_groups = nlk->dst_groups; + nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->pid; - nladdr->nl_groups = nlk->groups; + nladdr->nl_groups = nlk->groups; } return 0; } @@ -771,7 +776,7 @@ static inline int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) goto out; if (p->failure) { @@ -867,7 +872,7 @@ static inline int do_one_set_err(struct sock *sk, if (sk == p->exclude_sk) goto out; - if (nlk->pid == p->pid || !(nlk->groups & p->group)) + if (nlk->pid == p->pid || !(nlk->groups & netlink_group_mask(p->group))) goto out; sk->sk_err = p->code; @@ -913,7 +918,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr=msg->msg_name; u32 dst_pid; - u32 dst_groups; + u32 dst_group; struct sk_buff *skb; int err; struct scm_cookie scm; @@ -931,12 +936,12 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (addr->nl_family != AF_NETLINK) return -EINVAL; dst_pid = addr->nl_pid; - dst_groups = addr->nl_groups; - if (dst_groups && !netlink_capable(sock, NL_NONROOT_SEND)) + dst_group = ffs(addr->nl_groups); + if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND)) return -EPERM; } else { dst_pid = nlk->dst_pid; - dst_groups = nlk->dst_groups; + dst_group = nlk->dst_group; } if (!nlk->pid) { @@ -955,7 +960,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).pid = nlk->pid; NETLINK_CB(skb).dst_pid = dst_pid; - NETLINK_CB(skb).dst_groups = dst_groups; + NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); @@ -977,9 +982,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; } - if (dst_groups) { + if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_groups, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); } err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); @@ -1025,7 +1030,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, addr->nl_family = AF_NETLINK; addr->nl_pad = 0; addr->nl_pid = NETLINK_CB(skb).pid; - addr->nl_groups = NETLINK_CB(skb).dst_groups; + addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } -- cgit v1.2.3 From ac6d439d2097b72ea0cbc2322ce1263a38bc1fd0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:29:52 -0700 Subject: [NETLINK]: Convert netlink users to use group numbers instead of bitmasks Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_netlink.c | 2 +- include/linux/netfilter/nfnetlink.h | 23 ++++++++++++++++- include/linux/netfilter_decnet.h | 14 +++++++++++ include/linux/rtnetlink.h | 42 ++++++++++++++++++++++++++++--- include/linux/selinux_netlink.h | 13 +++++++++- include/linux/xfrm.h | 18 +++++++++++++ lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 4 +-- net/core/neighbour.c | 8 +++--- net/core/rtnetlink.c | 6 ++--- net/core/wireless.c | 4 +-- net/decnet/dn_dev.c | 8 +++--- net/decnet/dn_table.c | 4 +-- net/decnet/netfilter/dn_rtmsg.c | 6 ++--- net/ipv4/devinet.c | 7 +++--- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_semantics.c | 4 +-- net/ipv4/netfilter/ip_conntrack_netlink.c | 12 ++++----- net/ipv4/netfilter/ipt_ULOG.c | 8 +++--- net/ipv6/addrconf.c | 24 +++++++++--------- net/ipv6/route.c | 8 +++--- net/netfilter/nfnetlink.c | 2 +- net/sched/act_api.c | 8 +++--- net/sched/cls_api.c | 2 +- net/sched/sch_api.c | 4 +-- net/xfrm/xfrm_user.c | 35 +++++++++++--------------- security/selinux/netlink.c | 4 +-- 27 files changed, 183 insertions(+), 91 deletions(-) (limited to 'include/linux') diff --git a/drivers/w1/w1_netlink.c b/drivers/w1/w1_netlink.c index 2a82fb055c70..e7b774423dd6 100644 --- a/drivers/w1/w1_netlink.c +++ b/drivers/w1/w1_netlink.c @@ -51,7 +51,7 @@ void w1_netlink_send(struct w1_master *dev, struct w1_netlink_msg *msg) memcpy(data, msg, sizeof(struct w1_netlink_msg)); - NETLINK_CB(skb).dst_groups = dev->groups; + NETLINK_CB(skb).dst_group = dev->groups; netlink_broadcast(dev->nls, skb, 0, dev->groups, GFP_ATOMIC); nlmsg_failure: diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b0feb2374079..1d5b10ae2399 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -2,13 +2,34 @@ #define _NFNETLINK_H #include -/* nfnetlink groups: Up to 32 maximum */ +#ifndef __KERNEL__ +/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */ #define NF_NETLINK_CONNTRACK_NEW 0x00000001 #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 +#endif + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) /* Generic structure for encapsulation optional netfilter information. * It is reminiscent of sockaddr, but with sa_family replaced diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 018979484150..6f425369ee29 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -56,7 +56,21 @@ struct nf_dn_rtmsg { #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define DNRMG_L1_GROUP 0x01 #define DNRMG_L2_GROUP 0x02 +#endif + +enum { + DNRNG_NLGRP_NONE, +#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE + DNRNG_NLGRP_L1, +#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 + DNRNG_NLGRP_L2, +#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 + __DNRNG_NLGRP_MAX +}; +#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) #endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 657c05ab8f9e..c231e9a08f0b 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -826,9 +826,8 @@ enum #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) - -/* RTnetlink multicast groups */ - +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 @@ -847,6 +846,43 @@ enum #define RTMGRP_DECnet_ROUTE 0x4000 #define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) /* TC action piece */ struct tcamsg diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h index 957e6ebca4e6..bbf489decd84 100644 --- a/include/linux/selinux_netlink.h +++ b/include/linux/selinux_netlink.h @@ -20,10 +20,21 @@ enum { SELNL_MSG_MAX }; -/* Multicast groups */ +#ifndef __KERNEL__ +/* Multicast groups - backwards compatiblility for userspace */ #define SELNL_GRP_NONE 0x00000000 #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ #define SELNL_GRP_ALL 0xffffffff +#endif + +enum selinux_nlgroups { + SELNLGRP_NONE, +#define SELNLGRP_NONE SELNLGRP_NONE + SELNLGRP_AVC, +#define SELNLGRP_AVC SELNLGRP_AVC + __SELNLGRP_MAX +}; +#define SELNLGRP_MAX (__SELNLGRP_MAX - 1) /* Message structures */ struct selnl_msg_setenforce { diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0d423300d84..0fb077d68441 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -258,9 +258,27 @@ struct xfrm_usersa_flush { __u8 proto; }; +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#endif + +enum xfrm_nlgroups { + XFRMNLGRP_NONE, +#define XFRMNLGRP_NONE XFRMNLGRP_NONE + XFRMNLGRP_ACQUIRE, +#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE + XFRMNLGRP_EXPIRE, +#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE + XFRMNLGRP_SA, +#define XFRMNLGRP_SA XFRMNLGRP_SA + XFRMNLGRP_POLICY, +#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY + __XFRMNLGRP_MAX +}; +#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) #endif /* _LINUX_XFRM_H */ diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index bc000619f4f8..1ebd735d6439 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -93,7 +93,7 @@ static int send_uevent(const char *signal, const char *obj, } } - NETLINK_CB(skb).dst_groups = 1; + NETLINK_CB(skb).dst_group = 1; return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index acb888d32587..6845b5dd6d77 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -78,8 +78,8 @@ static void ulog_send(unsigned int nlgroup) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = 1 << nlgroup; - netlink_broadcast(ebtulognl, ub->skb, 0, 1 << nlgroup, GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroup + 1; + netlink_broadcast(ebtulognl, ub->skb, 0, nlgroup + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 72ee00f7b30c..39fc55edf691 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2343,8 +2343,8 @@ void neigh_app_ns(struct neighbour *n) } nlh = (struct nlmsghdr *)skb->data; nlh->nlmsg_flags = NLM_F_REQUEST; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) @@ -2361,8 +2361,8 @@ static void neigh_app_notify(struct neighbour *n) return; } nlh = (struct nlmsghdr *)skb->data; - NETLINK_CB(skb).dst_groups = RTMGRP_NEIGH; - netlink_broadcast(rtnl, skb, 0, RTMGRP_NEIGH, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9b3c61f1a37d..5f3f95b5585d 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -148,7 +148,7 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) { int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(rtnl, skb, pid, group, GFP_KERNEL); @@ -458,8 +458,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); } static int rtnetlink_done(struct netlink_callback *cb) diff --git a/net/core/wireless.c b/net/core/wireless.c index 3ff5639c0b78..19fa6a5389b3 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -1144,8 +1144,8 @@ static inline void rtmsg_iwinfo(struct net_device * dev, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_LINK; - netlink_broadcast(rtnl, skb, 0, RTMGRP_LINK, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); } #endif /* WE_EVENT_NETLINK */ diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 00233ecbc9cb..5610bb16dbf9 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -752,16 +752,16 @@ static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) skb = alloc_skb(size, GFP_KERNEL); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); return; } if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_DECnet_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_DECnet_IFADDR, GFP_KERNEL); + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 28ba5777a25a..73a88489ff3e 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -349,10 +349,10 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_DECnet_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; if (nlh->nlmsg_flags & NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_DECnet_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); if (nlh->nlmsg_flags & NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 3068fddb2da3..353fed6888f9 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -71,10 +71,10 @@ static void dnrmg_send_peer(struct sk_buff *skb) switch(flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_L1RT: - group = DNRMG_L1_GROUP; + group = DNRMG_L1_NLGRP; break; case DN_RT_PKT_L2RT: - group = DNRMG_L2_GROUP; + group = DNRMG_L2_NLGRP; break; default: return; @@ -83,7 +83,7 @@ static void dnrmg_send_peer(struct sk_buff *skb) skb2 = dnrmg_build_message(skb, &status); if (skb2 == NULL) return; - NETLINK_CB(skb2).dst_groups = group; + NETLINK_CB(skb2).dst_group = group; netlink_broadcast(dnrmg, skb2, 0, group, GFP_ATOMIC); } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d8a10e3dd77d..ba2895ae8151 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1111,13 +1111,12 @@ static void rtmsg_ifa(int event, struct in_ifaddr* ifa) struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); if (!skb) - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); else if (inet_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV4_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); } else { - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV4_IFADDR, GFP_KERNEL); + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); } } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 75d03e37b9a8..d4e7b578a25d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -560,7 +560,7 @@ static void nl_fib_input(struct sock *sk, int len) pid = nlh->nlmsg_pid; /*pid of sending process */ NETLINK_CB(skb).pid = 0; /* from kernel */ NETLINK_CB(skb).dst_pid = pid; - NETLINK_CB(skb).dst_groups = 0; /* unicast */ + NETLINK_CB(skb).dst_group = 0; /* unicast */ netlink_unicast(sk, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e278cb9d0075..7e4651b3caa8 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -290,10 +290,10 @@ void rtmsg_fib(int event, u32 key, struct fib_alias *fa, kfree_skb(skb); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV4_ROUTE; + NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; if (n->nlmsg_flags&NLM_F_ECHO) atomic_inc(&skb->users); - netlink_broadcast(rtnl, skb, pid, RTMGRP_IPV4_ROUTE, GFP_KERNEL); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); if (n->nlmsg_flags&NLM_F_ECHO) netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 1221a9c8bac2..a4e9278db4ed 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -297,7 +297,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, struct sk_buff *skb; unsigned int type; unsigned char *b; - unsigned int flags = 0, groups; + unsigned int flags = 0, group; /* ignore our fake conntrack entry */ if (ct == &ip_conntrack_untracked) @@ -305,7 +305,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (events & IPCT_DESTROY) { type = IPCTNL_MSG_CT_DELETE; - groups = NF_NETLINK_CONNTRACK_DESTROY; + group = NFNLGRP_CONNTRACK_DESTROY; goto alloc_skb; } if (events & (IPCT_NEW | IPCT_RELATED)) { @@ -313,7 +313,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, flags = NLM_F_CREATE|NLM_F_EXCL; /* dump everything */ events = ~0UL; - groups = NF_NETLINK_CONNTRACK_NEW; + group = NFNLGRP_CONNTRACK_NEW; goto alloc_skb; } if (events & (IPCT_STATUS | @@ -322,7 +322,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, IPCT_HELPINFO | IPCT_NATINFO)) { type = IPCTNL_MSG_CT_NEW; - groups = NF_NETLINK_CONNTRACK_UPDATE; + group = NFNLGRP_CONNTRACK_UPDATE; goto alloc_skb; } @@ -375,7 +375,7 @@ alloc_skb: goto nfattr_failure; nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, groups, 0); + nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; nlmsg_failure: @@ -1194,7 +1194,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nlh->nlmsg_len = skb->tail - b; proto = exp->tuple.dst.protonum; - nfnetlink_send(skb, 0, NF_NETLINK_CONNTRACK_EXP_NEW, 0); + nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); return NOTIFY_DONE; nlmsg_failure: diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 1d8ac4595e17..89816b83455e 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -116,10 +116,10 @@ static void ulog_send(unsigned int nlgroupnum) if (ub->qlen > 1) ub->lastnlh->nlmsg_type = NLMSG_DONE; - NETLINK_CB(ub->skb).dst_groups = (1 << nlgroupnum); - DEBUGP("ipt_ULOG: throwing %d packets to netlink mask %u\n", - ub->qlen, nlgroupnum); - netlink_broadcast(nflognl, ub->skb, 0, (1 << nlgroupnum), GFP_ATOMIC); + NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; + DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); + netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b9c3da349492..493abf94bcfc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2858,16 +2858,16 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); return; } if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFADDR, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFADDR; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFADDR, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -2994,16 +2994,16 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); return; } if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_IFINFO, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_IFINFO; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_IFINFO, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, @@ -3054,16 +3054,16 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, skb = alloc_skb(size, GFP_ATOMIC); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); return; } if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_PREFIX, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_PREFIX; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_PREFIX, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 878789b3122d..6ea494ab4e02 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1850,16 +1850,16 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, skb = alloc_skb(size, gfp_any()); if (!skb) { - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); return; } if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { kfree_skb(skb); - netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL); + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); return; } - NETLINK_CB(skb).dst_groups = RTMGRP_IPV6_ROUTE; - netlink_broadcast(rtnl, skb, 0, RTMGRP_IPV6_ROUTE, gfp_any()); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); } /* diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 84efffdbade3..36a4c5fbb7d7 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -198,7 +198,7 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) int allocation = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; int err = 0; - NETLINK_CB(skb).dst_groups = group; + NETLINK_CB(skb).dst_group = group; if (echo) atomic_inc(&skb->users); netlink_broadcast(nfnl, skb, pid, group, allocation); diff --git a/net/sched/act_api.c b/net/sched/act_api.c index c896a0118a32..8aebe8f6d271 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -593,7 +593,7 @@ static int tca_action_flush(struct rtattr *rta, struct nlmsghdr *n, u32 pid) nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (err > 0) return 0; @@ -656,7 +656,7 @@ tca_action_gd(struct rtattr *rta, struct nlmsghdr *n, u32 pid, int event) /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, pid, RTMGRP_TC, + ret = rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); if (ret > 0) return 0; @@ -698,9 +698,9 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event, x->rta_len = skb->tail - (u8*)x; nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = RTMGRP_TC; + NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, pid, RTMGRP_TC, flags&NLM_F_ECHO); + err = rtnetlink_send(skb, pid, RTNLGRP_TC, flags&NLM_F_ECHO); if (err > 0) err = 0; return err; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3b5714ef4d1a..b4d89fbb3782 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct tcf_dump_args diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index b9a069af4a02..737681cb9a92 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -816,7 +816,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, } if (skb->len) - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); err_out: kfree_skb(skb); @@ -1040,7 +1040,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, return -EINVAL; } - return rtnetlink_send(skb, pid, RTMGRP_TC, n->nlmsg_flags&NLM_F_ECHO); + return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); } struct qdisc_dump_args diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 4d553a1d2169..0579d209af27 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1125,9 +1125,8 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (build_expire(skb, x, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_sa_flush(struct km_event *c) @@ -1152,9 +1151,8 @@ static int xfrm_notify_sa_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); @@ -1228,9 +1226,8 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_SA; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_SA, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_SA; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_SA, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1308,9 +1305,8 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, if (build_acquire(skb, x, xt, xp, dir) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_ACQUIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_ACQUIRE, GFP_ATOMIC); } /* User gives us xfrm_user_policy_info followed by an array of 0 @@ -1409,9 +1405,8 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve if (build_polexpire(skb, xp, dir, c->data.hard) < 0) BUG(); - NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_EXPIRE; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event *c) @@ -1459,9 +1454,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: rtattr_failure: @@ -1486,9 +1480,8 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh->nlmsg_len = skb->tail - b; - NETLINK_CB(skb).dst_groups = XFRMGRP_POLICY; - - return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_POLICY, GFP_ATOMIC); + NETLINK_CB(skb).dst_group = XFRMNLGRP_POLICY; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: kfree_skb(skb); diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 0f7be6524555..20f481015db4 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -80,8 +80,8 @@ static void selnl_notify(int msgtype, void *data) nlh = NLMSG_PUT(skb, 0, 0, msgtype, len); selnl_add_payload(nlh, len, msgtype, data); nlh->nlmsg_len = skb->tail - tmp; - NETLINK_CB(skb).dst_groups = SELNL_GRP_AVC; - netlink_broadcast(selnl, skb, 0, SELNL_GRP_AVC, GFP_USER); + NETLINK_CB(skb).dst_group = SELNLGRP_AVC; + netlink_broadcast(selnl, skb, 0, SELNLGRP_AVC, GFP_USER); out: return; -- cgit v1.2.3 From 9a4595bc7e67962f13232ee55a64e063062c3a99 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:32:15 -0700 Subject: [NETLINK]: Add set/getsockopt options to support more than 32 groups NETLINK_ADD_MEMBERSHIP/NETLINK_DROP_MEMBERSHIP are used to join/leave groups, NETLINK_PKTINFO is used to enable nl_pktinfo control messages for received packets to get the extended destination group number. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 +++++ include/linux/socket.h | 1 + net/netlink/af_netlink.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 103 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c724c9d4984a..36a40449f9f1 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -90,6 +90,15 @@ struct nlmsgerr struct nlmsghdr msg; }; +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 + +struct nl_pktinfo +{ + __u32 group; +}; + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { diff --git a/include/linux/socket.h b/include/linux/socket.h index ddf22559f484..acc55aac8a43 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -272,6 +272,7 @@ struct ucred { #define SOL_NETBEUI 267 #define SOL_LLC 268 #define SOL_DCCP 269 +#define SOL_NETLINK 270 /* IPX options */ #define IPX_TYPE 1 diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 58d4ca42ac32..47e791738014 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -81,6 +81,7 @@ struct netlink_sock { }; #define NETLINK_KERNEL_SOCKET 0x1 +#define NETLINK_RECV_PKTINFO 0x2 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -946,6 +947,94 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +static int netlink_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int val = 0, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (optlen >= sizeof(int) && + get_user(val, (int __user *)optval)) + return -EFAULT; + + switch (optname) { + case NETLINK_PKTINFO: + if (val) + nlk->flags |= NETLINK_RECV_PKTINFO; + else + nlk->flags &= ~NETLINK_RECV_PKTINFO; + err = 0; + break; + case NETLINK_ADD_MEMBERSHIP: + case NETLINK_DROP_MEMBERSHIP: { + unsigned int subscriptions; + int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0; + + if (!netlink_capable(sock, NL_NONROOT_RECV)) + return -EPERM; + if (!val || val - 1 >= nlk->ngroups) + return -EINVAL; + netlink_table_grab(); + old = test_bit(val - 1, nlk->groups); + subscriptions = nlk->subscriptions - old + new; + if (new) + __set_bit(val - 1, nlk->groups); + else + __clear_bit(val - 1, nlk->groups); + netlink_update_subscriptions(sk, subscriptions); + netlink_table_ungrab(); + err = 0; + break; + } + default: + err = -ENOPROTOOPT; + } + return err; +} + +static int netlink_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct netlink_sock *nlk = nlk_sk(sk); + int len, val, err; + + if (level != SOL_NETLINK) + return -ENOPROTOOPT; + + if (get_user(len, optlen)) + return -EFAULT; + if (len < 0) + return -EINVAL; + + switch (optname) { + case NETLINK_PKTINFO: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0; + put_user(len, optlen); + put_user(val, optval); + err = 0; + break; + default: + err = -ENOPROTOOPT; + } + return err; +} + +static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb) +{ + struct nl_pktinfo info; + + info.group = NETLINK_CB(skb).dst_group; + put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info); +} + static inline void netlink_rcv_wake(struct sock *sk) { struct netlink_sock *nlk = nlk_sk(sk); @@ -1091,6 +1180,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, netlink_dump(sk); scm_recv(sock, msg, siocb->scm, flags); + if (nlk->flags & NETLINK_RECV_PKTINFO) + netlink_cmsg_recv_pktinfo(msg, skb); out: netlink_rcv_wake(sk); @@ -1465,8 +1556,8 @@ static struct proto_ops netlink_ops = { .ioctl = sock_no_ioctl, .listen = sock_no_listen, .shutdown = sock_no_shutdown, - .setsockopt = sock_no_setsockopt, - .getsockopt = sock_no_getsockopt, + .setsockopt = netlink_setsockopt, + .getsockopt = netlink_getsockopt, .sendmsg = netlink_sendmsg, .recvmsg = netlink_recvmsg, .mmap = sock_no_mmap, -- cgit v1.2.3 From 066286071d3542243baa68166acb779187c848b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:33:26 -0700 Subject: [NETLINK]: Add "groups" argument to netlink_kernel_create Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- drivers/w1/w1_int.c | 2 +- include/linux/netlink.h | 2 +- kernel/audit.c | 2 +- lib/kobject_uevent.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 3 ++- net/core/rtnetlink.c | 3 ++- net/decnet/netfilter/dn_rtmsg.c | 4 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/netfilter/ip_queue.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 3 ++- net/ipv6/netfilter/ip6_queue.c | 3 ++- net/netfilter/nfnetlink.c | 4 ++-- net/netlink/af_netlink.c | 6 ++++-- net/xfrm/xfrm_user.c | 4 ++-- security/selinux/netlink.c | 3 ++- 16 files changed, 27 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index f3f339d057f9..498ad505fa5f 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -88,7 +88,7 @@ static struct w1_master * w1_alloc_dev(u32 id, int slave_count, int slave_ttl, dev->groups = 1; dev->seq = 1; - dev->nls = netlink_kernel_create(NETLINK_W1, NULL, THIS_MODULE); + dev->nls = netlink_kernel_create(NETLINK_W1, 1, NULL, THIS_MODULE); if (!dev->nls) { printk(KERN_ERR "Failed to create new netlink socket(%u) for w1 master %s.\n", NETLINK_NFLOG, dev->dev.bus_id); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 36a40449f9f1..7d1d9683b246 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -125,7 +125,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, diff --git a/kernel/audit.c b/kernel/audit.c index ed4019563d56..7f0699790d46 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -514,7 +514,7 @@ static int __init audit_init(void) { printk(KERN_INFO "audit: initializing netlink socket (%s)\n", audit_default ? "enabled" : "disabled"); - audit_sock = netlink_kernel_create(NETLINK_AUDIT, audit_receive, + audit_sock = netlink_kernel_create(NETLINK_AUDIT, 0, audit_receive, THIS_MODULE); if (!audit_sock) audit_panic("cannot initialize netlink socket"); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 1ebd735d6439..04ca4429ddfa 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(kobject_uevent_atomic); static int __init kobject_uevent_init(void) { - uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, NULL, + uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL, THIS_MODULE); if (!uevent_sock) { diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 6845b5dd6d77..aae26ae2e61f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -258,7 +258,8 @@ static int __init init(void) spin_lock_init(&ulog_buffers[i].lock); } - ebtulognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + ebtulognl = netlink_kernel_create(NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, + NULL, THIS_MODULE); if (!ebtulognl) ret = -ENOMEM; else if ((ret = ebt_register_watcher(&ulog))) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5f3f95b5585d..9bed7569ce3f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -708,7 +708,8 @@ void __init rtnetlink_init(void) if (!rta_buf) panic("rtnetlink_init: cannot allocate rta_buf\n"); - rtnl = netlink_kernel_create(NETLINK_ROUTE, rtnetlink_rcv, THIS_MODULE); + rtnl = netlink_kernel_create(NETLINK_ROUTE, RTNLGRP_MAX, rtnetlink_rcv, + THIS_MODULE); if (rtnl == NULL) panic("rtnetlink_init: cannot initialize rtnetlink\n"); netlink_set_nonroot(NETLINK_ROUTE, NL_NONROOT_RECV); diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index 353fed6888f9..afb33a25ea55 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -138,8 +138,8 @@ static int __init init(void) { int rv = 0; - dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, dnrmg_receive_user_sk, - THIS_MODULE); + dnrmg = netlink_kernel_create(NETLINK_DNRTMSG, DNRNG_NLGRP_MAX, + dnrmg_receive_user_sk, THIS_MODULE); if (dnrmg == NULL) { printk(KERN_ERR "dn_rtmsg: Cannot create netlink socket"); return -ENOMEM; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d4e7b578a25d..4e1379f71269 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -566,7 +566,7 @@ static void nl_fib_input(struct sock *sk, int len) static void nl_fib_lookup_init(void) { - netlink_kernel_create(NETLINK_FIB_LOOKUP, nl_fib_input, THIS_MODULE); + netlink_kernel_create(NETLINK_FIB_LOOKUP, 0, nl_fib_input, THIS_MODULE); } static void fib_disable_ip(struct net_device *dev, int force) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 1880ad8575d8..71f3c7350c6e 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -845,7 +845,7 @@ static int __init inet_diag_init(void) goto out; memset(inet_diag_table, 0, inet_diag_table_size); - idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, inet_diag_rcv, + idiagnl = netlink_kernel_create(NETLINK_INET_DIAG, 0, inet_diag_rcv, THIS_MODULE); if (idiagnl == NULL) goto out_free_table; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 7f2bcc7198fa..d54f14d926f6 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -671,7 +671,7 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk, + ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 89816b83455e..e2c14f3cb2fc 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -388,7 +388,8 @@ static int __init init(void) ulog_buffers[i].timer.data = i; } - nflognl = netlink_kernel_create(NETLINK_NFLOG, NULL, THIS_MODULE); + nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, + THIS_MODULE); if (!nflognl) return -ENOMEM; diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 446764545b10..aa11cf366efa 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -667,7 +667,8 @@ init_or_cleanup(int init) goto cleanup; netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk, THIS_MODULE); + ipqnl = netlink_kernel_create(NETLINK_IP6_FW, 0, ipq_rcv_sk, + THIS_MODULE); if (ipqnl == NULL) { printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); goto cleanup_netlink_notifier; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 36a4c5fbb7d7..e089f17bb803 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -355,8 +355,8 @@ int __init nfnetlink_init(void) { printk("Netfilter messages via NETLINK v%s.\n", nfversion); - nfnl = netlink_kernel_create(NETLINK_NETFILTER, nfnetlink_rcv, - THIS_MODULE); + nfnl = netlink_kernel_create(NETLINK_NETFILTER, NFNLGRP_MAX, + nfnetlink_rcv, THIS_MODULE); if (!nfnl) { printk(KERN_ERR "cannot initialize nfnetlink!\n"); return -1; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 47e791738014..e259f46e26f7 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1204,7 +1204,9 @@ static void netlink_data_ready(struct sock *sk, int len) */ struct sock * -netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module) +netlink_kernel_create(int unit, unsigned int groups, + void (*input)(struct sock *sk, int len), + struct module *module) { struct socket *sock; struct sock *sk; @@ -1234,7 +1236,7 @@ netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct nlk->flags |= NETLINK_KERNEL_SOCKET; netlink_table_grab(); - nl_table[unit].groups = 32; + nl_table[unit].groups = groups < 32 ? 32 : groups; nl_table[unit].module = module; nl_table[unit].registered = 1; netlink_table_ungrab(); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0579d209af27..c35336a0f71b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1520,8 +1520,8 @@ static int __init xfrm_user_init(void) { printk(KERN_INFO "Initializing IPsec netlink socket\n"); - xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv, - THIS_MODULE); + xfrm_nl = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, + xfrm_netlink_rcv, THIS_MODULE); if (xfrm_nl == NULL) return -ENOMEM; diff --git a/security/selinux/netlink.c b/security/selinux/netlink.c index 20f481015db4..e203883406dd 100644 --- a/security/selinux/netlink.c +++ b/security/selinux/netlink.c @@ -104,7 +104,8 @@ void selnl_notify_policyload(u32 seqno) static int __init selnl_init(void) { - selnl = netlink_kernel_create(NETLINK_SELINUX, NULL, THIS_MODULE); + selnl = netlink_kernel_create(NETLINK_SELINUX, SELNLGRP_MAX, NULL, + THIS_MODULE); if (selnl == NULL) panic("SELinux: Cannot create netlink socket."); netlink_set_nonroot(NETLINK_SELINUX, NL_NONROOT_RECV); -- cgit v1.2.3 From 20380731bc2897f2952ae055420972ded4cd786e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 02:18:02 -0300 Subject: [NET]: Fix sparse warnings Of this type, mostly: CHECK net/ipv6/netfilter.c net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static? net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static? Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ include/linux/if_frad.h | 6 ++++-- include/linux/if_tr.h | 4 ++++ include/linux/igmp.h | 3 +++ include/linux/net.h | 7 +++++++ include/linux/netdevice.h | 10 ++++++++++ include/linux/netfilter_ipv6.h | 4 ++-- include/linux/security.h | 6 ++++-- include/linux/skbuff.h | 2 ++ include/linux/socket.h | 7 +++++++ include/net/addrconf.h | 6 ++++++ include/net/af_unix.h | 15 ++++++++++++++ include/net/icmp.h | 7 +++++++ include/net/ip.h | 23 ++++++++++++++++++++++ include/net/ip_fib.h | 5 +++++ include/net/ipv6.h | 35 +++++++++++++++++++++++++++++++-- include/net/p8022.h | 2 ++ include/net/raw.h | 7 ++++++- include/net/route.h | 2 ++ include/net/sock.h | 12 ++++++++++++ include/net/tcp.h | 12 ++++++++++++ include/net/udp.h | 5 +++++ init/main.c | 2 +- kernel/sysctl.c | 4 +--- net/802/p8023.c | 1 + net/802/sysctl_net_802.c | 3 ++- net/core/dev.c | 6 ------ net/core/sysctl_net_core.c | 9 ++------- net/core/utils.c | 2 ++ net/core/wireless.c | 4 ---- net/ethernet/eth.c | 2 -- net/ethernet/sysctl_net_ether.c | 1 + net/ipv4/af_inet.c | 14 -------------- net/ipv4/datagram.c | 1 + net/ipv4/inetpeer.c | 1 + net/ipv4/ip_sockglue.c | 2 -- net/ipv4/proc.c | 3 --- net/ipv4/syncookies.c | 2 -- net/ipv4/sysctl_net_ipv4.c | 43 +++++++---------------------------------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/addrconf.c | 4 ++-- net/ipv6/af_inet6.c | 24 ----------------------- net/ipv6/ipv6_sockglue.c | 8 -------- net/ipv6/route.c | 6 ++---- net/ipv6/sit.c | 2 +- net/ipv6/sysctl_net_ipv6.c | 3 --- net/ipv6/tcp_ipv6.c | 4 ---- net/ipv6/udp.c | 2 -- net/ipx/af_ipx.c | 2 -- net/socket.c | 11 +++++------ net/sysctl_net.c | 8 +++----- net/unix/af_unix.c | 8 -------- net/unix/sysctl_net_unix.c | 2 +- 54 files changed, 208 insertions(+), 162 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index b5b58e9c054c..fc2d4c8225aa 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->mac.raw; } + +extern struct ctl_table ether_table[]; #endif #endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 3c94b1736570..511999c7eeda 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -191,10 +191,12 @@ struct frad_local int buffer; /* current buffer for S508 firmware */ }; -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - #endif /* __KERNEL__ */ #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ +#ifdef __KERNEL__ +extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); +#endif + #endif diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 3fba9e2f5427..5502f597cf0e 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -43,12 +43,16 @@ struct trh_hdr { }; #ifdef __KERNEL__ +#include #include static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) { return (struct trh_hdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL +extern struct ctl_table tr_table[]; +#endif #endif /* This is an Token-Ring LLC structure */ diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 0c31ef0b5bad..28f4f3b36950 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -129,6 +129,9 @@ struct igmpv3_query { #include #include +extern int sysctl_igmp_max_memberships; +extern int sysctl_igmp_max_msf; + struct ip_sf_socklist { unsigned int sl_max; diff --git a/include/linux/net.h b/include/linux/net.h index 5f8b632ff653..4e981585a89a 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -286,5 +286,12 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) +#ifdef CONFIG_SYSCTL +#include +extern ctl_table net_table[]; +extern int net_msg_cost; +extern int net_msg_burst; +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8e52edfd526..1fcaa88b8625 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -244,6 +244,7 @@ struct netdev_boot_setup { }; #define NETDEV_BOOT_SETUP_MAX 8 +extern int __init netdev_boot_setup(char *str); /* * The DEVICE structure. @@ -673,6 +674,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern void dev_init(void); extern int netdev_nit; +extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); @@ -908,6 +910,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); +#ifdef CONFIG_PROC_FS +extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); +extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); +extern void dev_seq_stop(struct seq_file *seq, void *v); +#endif + +extern void linkwatch_run_queue(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 5d204ee7a312..edcc2c6eb5c7 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,7 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +extern int ipv6_netfilter_init(void); +extern void ipv6_netfilter_fini(void); #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1c..7aab6ab7c57f 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32635c401d4d..db10335e4192 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern void skb_release_data(struct sk_buff *skb); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/linux/socket.h b/include/linux/socket.h index acc55aac8a43..1739c2d5b95b 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ +extern int sysctl_somaxconn; +extern void sock_init(void); +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + typedef unsigned short sa_family_t; /* diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed93672176..750e2508dd90 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -45,6 +45,7 @@ struct prefix_info { #ifdef __KERNEL__ +#include #include #include #include @@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) addr->s6_addr32[3] == htonl(0x00000002)); } +#ifdef CONFIG_PROC_FS +extern int if6_proc_init(void); +extern void if6_proc_exit(void); +#endif + #endif #endif diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d1..b5d785ab4a0e 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,5 +1,11 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include +#include +#include +#include + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -74,5 +80,14 @@ struct unix_sock { wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) + +#ifdef CONFIG_SYSCTL +extern int sysctl_unix_max_dgram_qlen; +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); +#else +static inline void unix_sysctl_register(void) {} +static inline void unix_sysctl_unregister(void) {} +#endif #endif #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb45..6cdebeee5f96 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; +extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; +extern int sysctl_icmp_ratelimit; +extern int sysctl_icmp_ratemask; + #endif /* _ICMP_H */ diff --git a/include/net/ip.h b/include/net/ip.h index c16fb6ac3446..7623e414a5fb 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,24 @@ extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +/* From ip_fragment.c */ +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; + +/* From inetpeer.c */ +extern int inet_peer_threshold; +extern int inet_peer_minttl; +extern int inet_peer_maxttl; +extern int inet_peer_gc_mintime; +extern int inet_peer_gc_maxtime; + +/* From ip_output.c */ +extern int sysctl_ip_dynaddr; + +extern void ipfrag_init(void); + #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ @@ -348,5 +366,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context); +#ifdef CONFIG_PROC_FS +extern int ip_misc_proc_init(void); +#endif + +extern struct ctl_table ipv4_table[]; #endif /* _IP_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac0..14de4ebd1211 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) #endif } +#ifdef CONFIG_PROC_FS +extern int fib_proc_init(void); +extern void fib_proc_exit(void); +#endif + #endif /* _NET_FIB_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c5a02ddc594a..3203eaff4bd4 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -104,6 +104,7 @@ struct frag_hdr { #ifdef __KERNEL__ +#include #include /* sysctls */ @@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -#endif /* __KERNEL__ */ -#endif /* _NET_IPV6_H */ +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +extern int ip6_mc_source(int add, int omode, struct sock *sk, + struct group_source_req *pgsr); +extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, + struct group_filter __user *optval, + int __user *optlen); + +#ifdef CONFIG_PROC_FS +extern int ac6_proc_init(void); +extern void ac6_proc_exit(void); +extern int raw6_proc_init(void); +extern void raw6_proc_exit(void); +extern int tcp6_proc_init(void); +extern void tcp6_proc_exit(void); +extern int udp6_proc_init(void); +extern void udp6_proc_exit(void); +extern int ipv6_misc_proc_init(void); +extern void ipv6_misc_proc_exit(void); + +extern struct rt6_statistics rt6_stats; +#endif +#ifdef CONFIG_SYSCTL +extern ctl_table ipv6_route_table[]; +extern ctl_table ipv6_icmp_table[]; +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif +#endif /* __KERNEL__ */ +#endif /* _NET_IPV6_H */ diff --git a/include/net/p8022.h b/include/net/p8022.h index 223f8fa9ffca..42e9fac51b31 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -8,4 +8,6 @@ extern struct datalink_proto * struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_8023_client(struct datalink_proto *dl); #endif diff --git a/include/net/raw.h b/include/net/raw.h index 1c4bc3e6809f..f47917469b12 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,10 +17,10 @@ #ifndef _RAW_H #define _RAW_H +#include extern struct proto raw_prot; - extern void raw_err(struct sock *, struct sk_buff *, u32 info); extern int raw_rcv(struct sock *, struct sk_buff *); @@ -39,4 +39,9 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +#ifdef CONFIG_PROC_FS +extern int raw_proc_init(void); +extern void raw_proc_exit(void); +#endif + #endif /* _RAW_H */ diff --git a/include/net/route.h b/include/net/route.h index 63c94558236d..dbe79ca67d31 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -195,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +extern ctl_table ipv4_route_table[]; + #endif /* _ROUTE_H */ diff --git a/include/net/sock.h b/include/net/sock.h index d59428877078..14183883e8e6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1370,4 +1370,16 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign } #endif +extern void sk_init(void); + +#ifdef CONFIG_SYSCTL +extern struct ctl_table core_table[]; +extern int sysctl_optmem_max; +#endif + +#ifdef CONFIG_PROC_FS +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; +#endif + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d958260af23c..d6bcf1317a6a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1183,4 +1183,16 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern struct request_sock_ops tcp_request_sock_ops; + +extern int tcp_v4_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int tcp4_proc_init(void); +extern void tcp4_proc_exit(void); +#endif + +extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_init(void); + #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761dbc..107b9d791a1f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -94,6 +94,11 @@ struct udp_iter_state { struct seq_operations seq_ops; }; +#ifdef CONFIG_PROC_FS extern int udp_proc_register(struct udp_seq_afinfo *afinfo); extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); + +extern int udp4_proc_init(void); +extern void udp4_proc_exit(void); +#endif #endif /* _UDP_H */ diff --git a/init/main.c b/init/main.c index c9c311cf1771..ff410063e4e1 100644 --- a/init/main.c +++ b/init/main.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include @@ -80,7 +81,6 @@ static int init(void *); extern void init_IRQ(void); -extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); extern void sbus_init(void); diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3e0bbee549ea..8e56e2495542 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -136,9 +137,6 @@ static struct ctl_table_header root_table_header = static ctl_table kern_table[]; static ctl_table vm_table[]; -#ifdef CONFIG_NET -extern ctl_table net_table[]; -#endif static ctl_table proc_table[]; static ctl_table fs_table[]; static ctl_table debug_table[]; diff --git a/net/802/p8023.c b/net/802/p8023.c index a0b61b40225f..6368d3dce444 100644 --- a/net/802/p8023.c +++ b/net/802/p8023.c @@ -20,6 +20,7 @@ #include #include +#include /* * Place an 802.3 header on a packet. The driver will do the mac diff --git a/net/802/sysctl_net_802.c b/net/802/sysctl_net_802.c index 36079630c49f..700129556c13 100644 --- a/net/802/sysctl_net_802.c +++ b/net/802/sysctl_net_802.c @@ -10,9 +10,10 @@ * 2 of the License, or (at your option) any later version. */ +#include #include +#include #include -#include #ifdef CONFIG_TR extern int sysctl_tr_rif_timeout; diff --git a/net/core/dev.c b/net/core/dev.c index a3ed53cc4af8..c01511e3d0c1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -267,10 +267,6 @@ void dev_add_pack(struct packet_type *pt) spin_unlock_bh(&ptype_lock); } -extern void linkwatch_run_queue(void); - - - /** * __dev_remove_pack - remove packet handler * @pt: packet type declaration @@ -1133,8 +1129,6 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -extern void skb_release_data(struct sk_buff *); - /* Keep head the same: replace data */ int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask) { diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8f817ad9f546..2f278c8e4743 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -9,23 +9,18 @@ #include #include #include +#include +#include #ifdef CONFIG_SYSCTL extern int netdev_max_backlog; -extern int netdev_budget; extern int weight_p; -extern int net_msg_cost; -extern int net_msg_burst; extern __u32 sysctl_wmem_max; extern __u32 sysctl_rmem_max; -extern __u32 sysctl_wmem_default; -extern __u32 sysctl_rmem_default; extern int sysctl_core_destroy_delay; -extern int sysctl_optmem_max; -extern int sysctl_somaxconn; #ifdef CONFIG_NET_DIVERT extern char sysctl_divert_version[]; diff --git a/net/core/utils.c b/net/core/utils.c index 88eb8b68e26b..7b5970fc9e40 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include diff --git a/net/core/wireless.c b/net/core/wireless.c index 19fa6a5389b3..5caae2399f3a 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -571,10 +571,6 @@ static int wireless_seq_show(struct seq_file *seq, void *v) return 0; } -extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); -extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); -extern void dev_seq_stop(struct seq_file *seq, void *v); - static struct seq_operations wireless_seq_ops = { .start = dev_seq_start, .next = dev_seq_next, diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f444a2f2675f..87a052a9a84f 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -62,8 +62,6 @@ #include #include -extern int __init netdev_boot_setup(char *str); - __setup("ether=", netdev_boot_setup); /* diff --git a/net/ethernet/sysctl_net_ether.c b/net/ethernet/sysctl_net_ether.c index b81a6d532342..66b39fc342d2 100644 --- a/net/ethernet/sysctl_net_ether.c +++ b/net/ethernet/sysctl_net_ether.c @@ -7,6 +7,7 @@ #include #include +#include ctl_table ether_table[] = { {0} diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 20f52b5f5dea..5810f9d14914 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -859,10 +859,6 @@ static struct net_proto_family inet_family_ops = { .owner = THIS_MODULE, }; - -extern void tcp_init(void); -extern void tcp_v4_init(struct net_proto_family *); - /* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ @@ -1132,7 +1128,6 @@ static int __init init_ipv4_mibs(void) } static int ipv4_proc_init(void); -extern void ipfrag_init(void); /* * IP protocol layer initialiser @@ -1253,19 +1248,10 @@ module_init(inet_init); /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -extern int fib_proc_init(void); -extern void fib_proc_exit(void); #ifdef CONFIG_IP_FIB_TRIE extern int fib_stat_proc_init(void); extern void fib_stat_proc_exit(void); #endif -extern int ip_misc_proc_init(void); -extern int raw_proc_init(void); -extern void raw_proc_exit(void); -extern int tcp4_proc_init(void); -extern void tcp4_proc_exit(void); -extern int udp4_proc_init(void); -extern void udp4_proc_exit(void); static int __init ipv4_proc_init(void) { diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 3fd49f4282ac..c1b42b5257f8 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 3c513ceaca76..4410b9dc03e9 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -20,6 +20,7 @@ #include #include #include +#include #include /* diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ddb1aedbdc6d..aca088b3707a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -614,7 +614,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case IP_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct ip_msfilter *msf; @@ -769,7 +768,6 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval, } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_igmp_max_msf; struct sockaddr_in *psin; struct ip_msfilter *msf = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3eadbb271871..f7943ba1f43c 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,6 @@ static int fold_prot_inuse(struct proto *proto) */ static int sockstat_seq_show(struct seq_file *seq, void *v) { - /* From net/socket.c */ - extern void socket_seq_show(struct seq_file *seq); - socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 8692cb9d4bdb..a34e60ea48a1 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -169,8 +169,6 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) return mssind < NUM_MSS ? msstab[mssind] + 1 : 0; } -extern struct request_sock_ops tcp_request_sock_ops; - static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index ce47a345ecc5..652685623519 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -19,36 +21,6 @@ /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; -/* From icmp.c */ -extern int sysctl_icmp_echo_ignore_all; -extern int sysctl_icmp_echo_ignore_broadcasts; -extern int sysctl_icmp_ignore_bogus_error_responses; -extern int sysctl_icmp_errors_use_inbound_ifaddr; - -/* From ip_fragment.c */ -extern int sysctl_ipfrag_low_thresh; -extern int sysctl_ipfrag_high_thresh; -extern int sysctl_ipfrag_time; -extern int sysctl_ipfrag_secret_interval; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - -/* From icmp.c */ -extern int sysctl_icmp_ratelimit; -extern int sysctl_icmp_ratemask; - -/* From igmp.c */ -extern int sysctl_igmp_max_memberships; -extern int sysctl_igmp_max_msf; - -/* From inetpeer.c */ -extern int inet_peer_threshold; -extern int inet_peer_minttl; -extern int inet_peer_maxttl; -extern int inet_peer_gc_mintime; -extern int inet_peer_gc_maxtime; - #ifdef CONFIG_SYSCTL static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; @@ -57,8 +29,6 @@ static int ip_local_port_range_max[] = { 65535, 65535 }; struct ipv4_config ipv4_config; -extern ctl_table ipv4_route_table[]; - #ifdef CONFIG_SYSCTL static @@ -136,10 +106,11 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, - void **context) +static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen, + void **context) { char val[TCP_CA_NAME_MAX]; ctl_table tbl = { diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index ebb8654e3dee..1afb080bdf0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4229,7 +4229,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, */ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && !tp->srtt) - tcp_ack_saw_tstamp(sk, 0, 0); + tcp_ack_saw_tstamp(sk, NULL, 0); if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 97bbf595230d..13dfb391cdf1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include #include @@ -76,7 +77,6 @@ #include #include -extern int sysctl_ip_dynaddr; int sysctl_tcp_tw_reuse; int sysctl_tcp_low_latency; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 493abf94bcfc..937ad32db77c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1126,7 +1126,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &maddr); } -void addrconf_join_anycast(struct inet6_ifaddr *ifp) +static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); @@ -1135,7 +1135,7 @@ void addrconf_join_anycast(struct inet6_ifaddr *ifp) ipv6_dev_ac_inc(ifp->idev->dev, &addr); } -void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7df2ccb380d9..4f8795af2edb 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -67,23 +67,6 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -/* IPv6 procfs goodies... */ - -#ifdef CONFIG_PROC_FS -extern int raw6_proc_init(void); -extern void raw6_proc_exit(void); -extern int tcp6_proc_init(void); -extern void tcp6_proc_exit(void); -extern int udp6_proc_init(void); -extern void udp6_proc_exit(void); -extern int ipv6_misc_proc_init(void); -extern void ipv6_misc_proc_exit(void); -extern int ac6_proc_init(void); -extern void ac6_proc_exit(void); -extern int if6_proc_init(void); -extern void if6_proc_exit(void); -#endif - int sysctl_ipv6_bindv6only; /* The inetsw table contains everything that inet_create needs to @@ -505,11 +488,6 @@ static struct net_proto_family inet6_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void ipv6_sysctl_register(void); -extern void ipv6_sysctl_unregister(void); -#endif - /* Same as inet6_dgram_ops, sans udp_poll. */ static struct proto_ops inet6_sockraw_ops = { .family = PF_INET6, @@ -676,8 +654,6 @@ static void cleanup_ipv6_mibs(void) snmp6_mib_free((void **)udp_stats_in6); } -extern int ipv6_misc_proc_init(void); - static int __init inet6_init(void) { struct sk_buff *dummy_skb; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76fe23925d77..7516b8829a9d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -109,13 +109,6 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)) return 0; } -extern int ip6_mc_source(int add, int omode, struct sock *sk, - struct group_source_req *pgsr); -extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); -extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, - struct group_filter __user *optval, int __user *optlen); - - int ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -446,7 +439,6 @@ done: } case MCAST_MSFILTER: { - extern int sysctl_optmem_max; extern int sysctl_mld_max_msf; struct group_filter *gsf; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 6ea494ab4e02..5d5bbb49ec78 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1372,7 +1372,7 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) * Drop the packet on the floor */ -int ip6_pkt_discard(struct sk_buff *skb) +static int ip6_pkt_discard(struct sk_buff *skb) { IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev); @@ -1380,7 +1380,7 @@ int ip6_pkt_discard(struct sk_buff *skb) return 0; } -int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sk_buff *skb) { skb->dev = skb->dst->dev; return ip6_pkt_discard(skb); @@ -1960,8 +1960,6 @@ static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) return arg.len; } -extern struct rt6_statistics rt6_stats; - static int rt6_stats_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e553e5b80d6e..c3123c9e1a8e 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -770,7 +770,7 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -int __init ipip6_fb_tunnel_init(struct net_device *dev) +static int __init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = dev->priv; struct iphdr *iph = &tunnel->parms.iph; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3a18e0e6ffed..8eff9fa1e983 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -14,9 +14,6 @@ #include #include -extern ctl_table ipv6_route_table[]; -extern ctl_table ipv6_icmp_table[]; - #ifdef CONFIG_SYSCTL static ctl_table ipv6_table[] = { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fb291b81cf63..794734f1d230 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1910,8 +1910,6 @@ static int tcp_v6_init_sock(struct sock *sk) static int tcp_v6_destroy_sock(struct sock *sk) { - extern int tcp_v4_destroy_sock(struct sock *sk); - tcp_v4_destroy_sock(sk); return inet6_destroy_sock(sk); } @@ -2123,8 +2121,6 @@ static struct inet6_protocol tcpv6_protocol = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -extern struct proto_ops inet6_stream_ops; - static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index c348307e5773..67d9a04b6902 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1054,8 +1054,6 @@ struct proto udpv6_prot = { .obj_size = sizeof(struct udp6_sock), }; -extern struct proto_ops inet6_dgram_ops; - static struct inet_protosw udpv6_protosw = { .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 180e383f707c..34b3bb868409 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1940,9 +1940,7 @@ static struct notifier_block ipx_dev_notifier = { }; extern struct datalink_proto *make_EII_client(void); -extern struct datalink_proto *make_8023_client(void); extern void destroy_EII_client(struct datalink_proto *); -extern void destroy_8023_client(struct datalink_proto *); static unsigned char ipx_8022_type = 0xE0; static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; diff --git a/net/socket.c b/net/socket.c index 5f76ab8a1594..ce69b7862f59 100644 --- a/net/socket.c +++ b/net/socket.c @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include #include @@ -724,8 +726,8 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return __sock_sendmsg(iocb, sock, &x->async_msg, size); } -ssize_t sock_sendpage(struct file *file, struct page *page, - int offset, size_t size, loff_t *ppos, int more) +static ssize_t sock_sendpage(struct file *file, struct page *page, + int offset, size_t size, loff_t *ppos, int more) { struct socket *sock; int flags; @@ -948,7 +950,7 @@ static int sock_mmap(struct file * file, struct vm_area_struct * vma) return sock->ops->mmap(file, sock, vma); } -int sock_close(struct inode *inode, struct file *filp) +static int sock_close(struct inode *inode, struct file *filp) { /* * It was possible the inode is NULL we were @@ -2027,9 +2029,6 @@ int sock_unregister(int family) return 0; } - -extern void sk_init(void); - void __init sock_init(void) { /* diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 3f6e31069c54..c5241fcbb966 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -17,17 +17,15 @@ #include #ifdef CONFIG_INET -extern struct ctl_table ipv4_table[]; +#include #endif -extern struct ctl_table core_table[]; - #ifdef CONFIG_NET -extern struct ctl_table ether_table[]; +#include #endif #ifdef CONFIG_TR -extern struct ctl_table tr_table[]; +#include #endif struct ctl_table net_table[] = { diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index bc4c44552c1f..41feca3bef86 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2026,14 +2026,6 @@ static struct net_proto_family unix_family_ops = { .owner = THIS_MODULE, }; -#ifdef CONFIG_SYSCTL -extern void unix_sysctl_register(void); -extern void unix_sysctl_unregister(void); -#else -static inline void unix_sysctl_register(void) {} -static inline void unix_sysctl_unregister(void) {} -#endif - static int __init af_unix_init(void) { int rc = -1; diff --git a/net/unix/sysctl_net_unix.c b/net/unix/sysctl_net_unix.c index c974dac4580a..690ffa5d5bfb 100644 --- a/net/unix/sysctl_net_unix.c +++ b/net/unix/sysctl_net_unix.c @@ -12,7 +12,7 @@ #include #include -extern int sysctl_unix_max_dgram_qlen; +#include static ctl_table unix_table[] = { { -- cgit v1.2.3 From 6ed8a48582c08432e84e5610564c1d25fe00dd7f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 19:02:15 -0300 Subject: [NETLINK]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- net/netlink/af_netlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7d1d9683b246..167518668936 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -129,7 +129,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, int allocation); + __u32 group, unsigned int __nocast allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index e259f46e26f7..62435ffc6184 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -861,7 +861,7 @@ out: } int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, - u32 group, int allocation) + u32 group, unsigned int __nocast allocation) { struct netlink_broadcast_data info; struct hlist_node *node; -- cgit v1.2.3 From d179cd12928443f3ec29cfbc3567439644bd0afc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Aug 2005 14:57:30 -0700 Subject: [NET]: Implement SKB fast cloning. Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 ++++++++++++++-- include/net/sock.h | 2 +- net/core/skbuff.c | 82 ++++++++++++++++++++++++++++++++++++++++++++------ net/ipv4/tcp_output.c | 4 +-- 4 files changed, 98 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index db10335e4192..42edce6abe23 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -162,6 +162,13 @@ struct skb_timeval { u32 off_usec; }; + +enum { + SKB_FCLONE_UNAVAILABLE, + SKB_FCLONE_ORIG, + SKB_FCLONE_CLONE, +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -255,7 +262,8 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; - __u8 pkt_type; + __u8 pkt_type:3, + fclone:2; __be16 protocol; void (*destructor)(struct sk_buff *skb); @@ -295,8 +303,20 @@ struct sk_buff { #include extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority); +extern struct sk_buff *__alloc_skb(unsigned int size, + unsigned int __nocast priority, int fclone); +static inline struct sk_buff *alloc_skb(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 0); +} + +static inline struct sk_buff *alloc_skb_fclone(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 1); +} + extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, unsigned int __nocast priority); diff --git a/include/net/sock.h b/include/net/sock.h index 14183883e8e6..d57aece9492c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int hdr_len; hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); - skb = alloc_skb(size + hdr_len, gfp); + skb = alloc_skb_fclone(size + hdr_len, gfp); if (skb) { skb->truesize += mem; if (sk->sk_forward_alloc >= (int)skb->truesize || diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 39a161dbc16d..b853a9b29eb6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -69,6 +69,7 @@ #include static kmem_cache_t *skbuff_head_cache; +static kmem_cache_t *skbuff_fclone_cache; struct timeval __read_mostly skb_tv_base; @@ -120,7 +121,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) */ /** - * alloc_skb - allocate a network buffer + * __alloc_skb - allocate a network buffer * @size: size to allocate * @gfp_mask: allocation mask * @@ -131,14 +132,20 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) * Buffers may only be allocated from interrupts using a @gfp_mask of * %GFP_ATOMIC. */ -struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) +struct sk_buff *__alloc_skb(unsigned int size, unsigned int __nocast gfp_mask, + int fclone) { struct sk_buff *skb; u8 *data; /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); + if (fclone) + skb = kmem_cache_alloc(skbuff_fclone_cache, + gfp_mask & ~__GFP_DMA); + else + skb = kmem_cache_alloc(skbuff_head_cache, + gfp_mask & ~__GFP_DMA); + if (!skb) goto out; @@ -155,7 +162,15 @@ struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask) skb->data = data; skb->tail = data; skb->end = data + size; + if (fclone) { + struct sk_buff *child = skb + 1; + atomic_t *fclone_ref = (atomic_t *) (child + 1); + skb->fclone = SKB_FCLONE_ORIG; + atomic_set(fclone_ref, 1); + + child->fclone = SKB_FCLONE_UNAVAILABLE; + } atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; skb_shinfo(skb)->tso_size = 0; @@ -268,8 +283,34 @@ void skb_release_data(struct sk_buff *skb) */ void kfree_skbmem(struct sk_buff *skb) { + struct sk_buff *other; + atomic_t *fclone_ref; + skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); + switch (skb->fclone) { + case SKB_FCLONE_UNAVAILABLE: + kmem_cache_free(skbuff_head_cache, skb); + break; + + case SKB_FCLONE_ORIG: + fclone_ref = (atomic_t *) (skb + 2); + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, skb); + break; + + case SKB_FCLONE_CLONE: + fclone_ref = (atomic_t *) (skb + 1); + other = skb - 1; + + /* The clone portion is available for + * fast-cloning again. + */ + skb->fclone = SKB_FCLONE_UNAVAILABLE; + + if (atomic_dec_and_test(fclone_ref)) + kmem_cache_free(skbuff_fclone_cache, other); + break; + }; } /** @@ -324,10 +365,20 @@ void __kfree_skb(struct sk_buff *skb) struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask) { - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - - if (!n) - return NULL; + struct sk_buff *n; + + n = skb + 1; + if (skb->fclone == SKB_FCLONE_ORIG && + n->fclone == SKB_FCLONE_UNAVAILABLE) { + atomic_t *fclone_ref = (atomic_t *) (n + 1); + n->fclone = SKB_FCLONE_CLONE; + atomic_inc(fclone_ref); + } else { + n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); + if (!n) + return NULL; + n->fclone = SKB_FCLONE_UNAVAILABLE; + } #define C(x) n->x = skb->x @@ -409,6 +460,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac.raw = old->mac.raw + offset; memcpy(new->cb, old->cb, sizeof(old->cb)); new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; new->pkt_type = old->pkt_type; new->tstamp = old->tstamp; new->destructor = NULL; @@ -1647,13 +1699,23 @@ void __init skb_init(void) NULL, NULL); if (!skbuff_head_cache) panic("cannot create skbuff cache"); + + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", + (2*sizeof(struct sk_buff)) + + sizeof(atomic_t), + 0, + SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); + do_gettimeofday(&skb_tv_base); } EXPORT_SYMBOL(___pskb_trim); EXPORT_SYMBOL(__kfree_skb); EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); +EXPORT_SYMBOL(__alloc_skb); EXPORT_SYMBOL(pskb_copy); EXPORT_SYMBOL(pskb_expand_head); EXPORT_SYMBOL(skb_checksum); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8d92ab562aed..75b68116682a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1582,7 +1582,7 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb(MAX_TCP_HEADER, GFP_KERNEL); + skb = alloc_skb_fclone(MAX_TCP_HEADER, GFP_KERNEL); if (skb) break; yield(); @@ -1804,7 +1804,7 @@ int tcp_connect(struct sock *sk) tcp_connect_init(sk); - buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation); + buff = alloc_skb_fclone(MAX_TCP_HEADER + 15, sk->sk_allocation); if (unlikely(buff == NULL)) return -ENOBUFS; -- cgit v1.2.3 From 1bc0986957b63a2fbbc46ab95d3d1d72830bda83 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 20 Aug 2005 00:23:43 -0300 Subject: [DCCP]: Fix the timestamp options This changes timestamp, timestamp echo, and elapsed time to use units of 10 usecs as per DCCP spec. This has been tested to verify that times are correct. Also fixed up length and used hton/ntoh more. Still to add in later patches: - actually use elapsed time to adjust RTT (commented out as was prior to this patch) - send options at times more closely following the spec (content is now correct) Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- net/dccp/ccids/ccid3.c | 12 ++----- net/dccp/dccp.h | 19 +++++++++-- net/dccp/options.c | 87 ++++++++++++++++++++++++++++++++++---------------- 4 files changed, 79 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dccdd5108b5..9e3a1370b906 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -415,7 +415,7 @@ struct dccp_sock { __u64 dccps_gsr; __u64 dccps_gar; unsigned long dccps_service; - unsigned long dccps_timestamp_time; + struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_avg_packet_size; unsigned long dccps_ndp_count; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 2dd3e94ba8f4..694149061b8b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -2,12 +2,12 @@ * net/dccp/ccids/ccid3.c * * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. + * Copyright (c) 2005 Ian McDonald * * An implementation of the DCCP protocol * * This code has been developed by the University of Waikato WAND * research group. For further information please see http://www.wand.net.nz/ - * or e-mail Ian McDonald - iam4@cs.waikato.ac.nz * * This code also uses code from Lulea University, rereleased as GPL by its * authors: @@ -174,14 +174,6 @@ static inline void timeval_fix(struct timeval *tv) } } -/* returns the difference in usecs between timeval passed in and current time */ -static inline u32 now_delta(struct timeval tv) { - struct timeval now; - - do_gettimeofday(&now); - return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec); -} - #define CALCX_ARRSIZE 500 #define CALCX_SPLIT 50000 @@ -1110,7 +1102,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; unsigned long next_tmout; - u16 t_elapsed; + u32 t_elapsed; u32 pinv; u32 x_recv; u32 r_sample; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 4efdce47000b..aab72b8d0703 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -4,7 +4,8 @@ * net/dccp/dccp.h * * An implementation of the DCCP protocol - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as @@ -404,6 +405,7 @@ extern struct socket *dccp_ctl_socket; * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. * * @dccpap_buf_len - circular buffer length + * @dccpap_time - the time in usecs * @dccpap_buf - circular buffer of acknowledgeable packets */ struct dccp_ackpkts { @@ -416,7 +418,7 @@ struct dccp_ackpkts { unsigned int dccpap_buf_vector_len; unsigned int dccpap_ack_vector_len; unsigned int dccpap_buf_len; - unsigned long dccpap_time; + struct timeval dccpap_time; u8 dccpap_buf_nonce; u8 dccpap_ack_nonce; u8 dccpap_buf[0]; @@ -430,6 +432,19 @@ extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +/* + * Returns the difference in usecs between timeval + * passed in and current time + */ +static inline u32 now_delta(struct timeval tv) +{ + struct timeval now; + + do_gettimeofday(&now); + return (now.tv_sec - tv.tv_sec) * USEC_PER_SEC + + (now.tv_usec - tv.tv_usec); +} + #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/options.c b/net/dccp/options.c index 85a86bd61f44..7ecffdf85756 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -2,8 +2,9 @@ * net/dccp/options.c * * An implementation of the DCCP protocol - * Aristeu Sergio Rozanski Filho - * Arnaldo Carvalho de Melo + * Copyright (c) 2005 Aristeu Sergio Rozanski Filho + * Copyright (c) 2005 Arnaldo Carvalho de Melo + * Copyright (c) 2005 Ian McDonald * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -138,7 +139,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - dp->dccps_timestamp_time = jiffies; + do_gettimeofday(&dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -146,36 +147,45 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) DCCP_SKB_CB(skb)->dccpd_ack_seq); break; case DCCPO_TIMESTAMP_ECHO: - if (len < 4 || len > 8) + if (len != 4 && len != 6 && len != 8) goto out_invalid_option; opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value); - dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, " - "diff=%u\n", + dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, ", debug_prefix, opt_recv->dccpor_timestamp_echo, len + 2, (unsigned long long) - DCCP_SKB_CB(skb)->dccpd_ack_seq, - (tcp_time_stamp - - opt_recv->dccpor_timestamp_echo)); - - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value + 4, - len - 4); - dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", + DCCP_SKB_CB(skb)->dccpd_ack_seq); + + if (len > 4) { + if (len == 6) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)(value + 4)); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)(value + 4)); + + dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); + } break; case DCCPO_ELAPSED_TIME: - if (len > 4) + if (len != 2 && len != 4) goto out_invalid_option; if (pkt_type == DCCP_PKT_DATA) continue; - opt_recv->dccpor_elapsed_time = - dccp_decode_value_var(value, len); + + if (len == 2) + opt_recv->dccpor_elapsed_time = + ntohs(*(u16 *)value); + else + opt_recv->dccpor_elapsed_time = + ntohl(*(u32 *)value); + dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix, opt_recv->dccpor_elapsed_time); break; @@ -309,8 +319,7 @@ void dccp_insert_option_elapsed_time(struct sock *sk, const int len = 2 + elapsed_time_len; unsigned char *to; - /* If elapsed_time == 0... */ - if (elapsed_time_len == 2) + if (elapsed_time_len == 0) return; if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { @@ -325,7 +334,13 @@ void dccp_insert_option_elapsed_time(struct sock *sk, *to++ = DCCPO_ELAPSED_TIME; *to++ = len; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n", debug_prefix, elapsed_time, @@ -344,7 +359,7 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10; + const u32 elapsed_time = now_delta(ap->dccpap_time) / 10; unsigned char *to, *from; if (elapsed_time != 0) @@ -414,7 +429,15 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { - const u32 now = htonl(tcp_time_stamp); + struct timeval tv; + u32 now; + + do_gettimeofday(&tv); + now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + /* yes this will overflow but that is the point as we want a + * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ + + now = htonl(now); dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now)); } @@ -427,8 +450,7 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, "CLIENT TX opt: " : "server TX opt: "; #endif u32 tstamp_echo; - const u32 elapsed_time = jiffies_to_usecs(jiffies - - dp->dccps_timestamp_time) / 10; + const u32 elapsed_time = now_delta(dp->dccps_timestamp_time) / 10; const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); const int len = 6 + elapsed_time_len; unsigned char *to; @@ -448,7 +470,14 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, tstamp_echo = htonl(dp->dccps_timestamp_echo); memcpy(to, &tstamp_echo, 4); to += 4; - dccp_encode_value_var(elapsed_time, to, elapsed_time_len); + + if (elapsed_time_len == 2) { + const u16 var16 = htons((u16)elapsed_time); + memcpy(to, &var16, 2); + } else if (elapsed_time_len == 4) { + const u32 var32 = htonl(elapsed_time); + memcpy(to, &var32, 4); + } dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n", debug_prefix, dp->dccps_timestamp_echo, @@ -456,7 +485,8 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); dp->dccps_timestamp_echo = 0; - dp->dccps_timestamp_time = 0; + dp->dccps_timestamp_time.tv_sec = 0; + dp->dccps_timestamp_time.tv_usec = 0; } void dccp_insert_options(struct sock *sk, struct sk_buff *skb) @@ -514,7 +544,8 @@ struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; ap->dccpap_ack_ptr = 0; - ap->dccpap_time = 0; + ap->dccpap_time.tv_sec = 0; + ap->dccpap_time.tv_usec = 0; ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; } @@ -665,7 +696,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - ap->dccpap_time = jiffies; + do_gettimeofday(&ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); -- cgit v1.2.3 From a6f9a70578b981321b63786ac8015f17cca4fcbd Mon Sep 17 00:00:00 2001 From: Jon Wetzel Date: Sat, 20 Aug 2005 17:15:54 -0700 Subject: [NET]: Add support for getting the permanent hardware address. This patch adds a new field to net device to hold the permanent hardware address, and adds a new generic ethtool_op function to get that address. Signed-off-by: Jon Wetzel Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/linux/ethtool.h | 13 ++++++++++++- include/linux/netdevice.h | 1 + net/core/ethtool.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d7021c391b2b..ed1440ea4c91 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -250,6 +250,12 @@ struct ethtool_stats { u64 data[0]; }; +struct ethtool_perm_addr { + u32 cmd; /* ETHTOOL_GPERMADDR */ + u32 size; + u8 data[0]; +}; + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); int ethtool_op_set_tso(struct net_device *dev, u32 data); +int ethtool_op_get_perm_addr(struct net_device *dev, + struct ethtool_perm_addr *addr, u8 *data); /** * ðtool_ops - Alter and report network device settings @@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data); * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device * get_stats: Return statistics about the device - * + * get_perm_addr: Gets the permanent hardware address + * * Description: * * get_settings: @@ -352,6 +361,7 @@ struct ethtool_ops { int (*phys_id)(struct net_device *, u32); int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); }; @@ -389,6 +399,7 @@ struct ethtool_ops { #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1fcaa88b8625..7c717907896d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -337,6 +337,7 @@ struct net_device /* Interface address info. */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ + unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ diff --git a/net/core/ethtool.c b/net/core/ethtool.c index a3eeb88e1c81..289c1b5a8e4a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -81,6 +81,18 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data) return 0; } +int ethtool_op_get_perm_addr(struct net_device *dev, struct ethtool_perm_addr *addr, u8 *data) +{ + unsigned char len = dev->addr_len; + if ( addr->size < len ) + return -ETOOSMALL; + + addr->size = len; + memcpy(data, dev->perm_addr, len); + return 0; +} + + /* Handlers for each ethtool command */ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) @@ -683,6 +695,39 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_perm_addr(struct net_device *dev, void *useraddr) +{ + struct ethtool_perm_addr epaddr; + u8 *data; + int ret; + + if (!dev->ethtool_ops->get_perm_addr) + return -EOPNOTSUPP; + + if (copy_from_user(&epaddr,useraddr,sizeof(epaddr))) + return -EFAULT; + + data = kmalloc(epaddr.size, GFP_USER); + if (!data) + return -ENOMEM; + + ret = dev->ethtool_ops->get_perm_addr(dev,&epaddr,data); + if (ret) + return ret; + + ret = -EFAULT; + if (copy_to_user(useraddr, &epaddr, sizeof(epaddr))) + goto out; + useraddr += sizeof(epaddr); + if (copy_to_user(useraddr, data, epaddr.size)) + goto out; + ret = 0; + + out: + kfree(data); + return ret; +} + /* The main entry point in this file. Called from net/core/dev.c */ int dev_ethtool(struct ifreq *ifr) @@ -806,6 +851,9 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_GSTATS: rc = ethtool_get_stats(dev, useraddr); break; + case ETHTOOL_GPERMADDR: + rc = ethtool_get_perm_addr(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } @@ -826,6 +874,7 @@ int dev_ethtool(struct ifreq *ifr) EXPORT_SYMBOL(dev_ethtool); EXPORT_SYMBOL(ethtool_op_get_link); +EXPORT_SYMBOL_GPL(ethtool_op_get_perm_addr); EXPORT_SYMBOL(ethtool_op_get_sg); EXPORT_SYMBOL(ethtool_op_get_tso); EXPORT_SYMBOL(ethtool_op_get_tx_csum); -- cgit v1.2.3 From 7567662ba896ee0c33d6215f32e2011488a6d1bf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2005 23:30:34 -0700 Subject: [NETFILTER]: Add string match Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_string.h | 18 ++++++ net/ipv4/netfilter/Kconfig | 12 ++++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_string.c | 91 +++++++++++++++++++++++++++++++ 4 files changed, 122 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_string.h create mode 100644 net/ipv4/netfilter/ipt_string.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h new file mode 100644 index 000000000000..a265f6e44eab --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_string.h @@ -0,0 +1,18 @@ +#ifndef _IPT_STRING_H +#define _IPT_STRING_H + +#define IPT_STRING_MAX_PATTERN_SIZE 128 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16 + +struct ipt_string_info +{ + u_int16_t from_offset; + u_int16_t to_offset; + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE]; + char pattern[IPT_STRING_MAX_PATTERN_SIZE]; + u_int8_t patlen; + u_int8_t invert; + struct ts_config __attribute__((aligned(8))) *config; +}; + +#endif /*_IPT_STRING_H*/ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3f7e6e49cbdd..f2bea6ecb226 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -410,6 +410,18 @@ config IP_NF_MATCH_HASHLIMIT destination IP' or `500pps from any given source IP' with a single IPtables rule. +config IP_NF_MATCH_STRING + tristate 'string match support' + depends on IP_NF_IPTABLES + select TEXTSEARCH + select TEXTSEARCH_KMP + select TEXTSEARCH_FSM + help + This option adds a `string' match, which allows you to look for + pattern matchings in packets. + + To compile it as a module, choose M here. If unsure, say N. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 7c8ae858aa43..89cae69ee208 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -65,6 +65,7 @@ obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o # targets obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o diff --git a/net/ipv4/netfilter/ipt_string.c b/net/ipv4/netfilter/ipt_string.c new file mode 100644 index 000000000000..b5def204d798 --- /dev/null +++ b/net/ipv4/netfilter/ipt_string.c @@ -0,0 +1,91 @@ +/* String matching match for iptables + * + * (C) 2005 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_DESCRIPTION("IP tables string match module"); +MODULE_LICENSE("GPL"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + struct ts_state state; + struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo; + + memset(&state, 0, sizeof(struct ts_state)); + + return (skb_find_text((struct sk_buff *)skb, conf->from_offset, + conf->to_offset, conf->config, &state) + != UINT_MAX) && !conf->invert; +} + +#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m) + +static int checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_string_info *conf = matchinfo; + struct ts_config *ts_conf; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info))) + return 0; + + /* Damn, can't handle this case properly with iptables... */ + if (conf->from_offset > conf->to_offset) + return 0; + + ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen, + GFP_KERNEL, TS_AUTOLOAD); + if (IS_ERR(ts_conf)) + return 0; + + conf->config = ts_conf; + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchsize) +{ + textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); +} + +static struct ipt_match string_match = { + .name = "string", + .match = match, + .checkentry = checkentry, + .destroy = destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&string_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&string_match); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From 764d8a9f240729534a1d8a0ffd39e722cf5cc5af Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:06 -0700 Subject: [NETFILTER]: Add IPv6 REJECT target Originally written by Yasuyuki Kozakai , taken from netfilter patch-o-matic and fixed up to work with current kernels. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_REJECT.h | 18 ++ net/ipv6/netfilter/Kconfig | 10 + net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 284 +++++++++++++++++++++++++++++ 4 files changed, 313 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_REJECT.h create mode 100644 net/ipv6/netfilter/ip6t_REJECT.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h new file mode 100644 index 000000000000..6be6504162bb --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -0,0 +1,18 @@ +#ifndef _IP6T_REJECT_H +#define _IP6T_REJECT_H + +enum ip6t_reject_with { + IP6T_ICMP6_NO_ROUTE, + IP6T_ICMP6_ADM_PROHIBITED, + IP6T_ICMP6_NOT_NEIGHBOUR, + IP6T_ICMP6_ADDR_UNREACH, + IP6T_ICMP6_PORT_UNREACH, + IP6T_ICMP6_ECHOREPLY, + IP6T_TCP_RESET +}; + +struct ip6t_reject_info { + u_int32_t with; /* reject type */ +}; + +#endif /*_IP6T_REJECT_H*/ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index cd1551983c63..8a10c2d0d154 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -199,6 +199,16 @@ config IP6_NF_TARGET_LOG To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP6_NF_FILTER + help + The REJECT target allows a filtering rule to specify that an ICMPv6 + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + # if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then # dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER # if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 847651dbcd2a..70f6ba610102 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -24,4 +24,5 @@ obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += ip6t_NFQUEUE.o diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c new file mode 100644 index 000000000000..14316c3ebde4 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -0,0 +1,284 @@ +/* + * IP6 tables REJECT target module + * Linux INET6 implementation + * + * Copyright (C)2003 USAGI/WIDE Project + * + * Authors: + * Yasuyuki Kozakai + * + * Based on net/ipv4/netfilter/ipt_REJECT.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Yasuyuki KOZAKAI "); +MODULE_DESCRIPTION("IP6 tables REJECT target module"); +MODULE_LICENSE("GPL"); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* Send RST reply */ +static void send_reset(struct sk_buff *oldskb) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + struct ipv6hdr *oip6h = oldskb->nh.ipv6h, *ip6h; + struct dst_entry *dst = NULL; + u8 proto; + struct flowi fl; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + DEBUGP("ip6t_REJECT: addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + DEBUGP("ip6t_REJECT: Can't get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if ((proto != IPPROTO_TCP) || (otcplen < sizeof(struct tcphdr))) { + DEBUGP("ip6t_REJECT: proto(%d) != IPPROTO_TCP, or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + DEBUGP("ip6t_REJECT: RST is set\n"); + return; + } + + /* Check checksum. */ + if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, + skb_checksum(oldskb, tcphoff, otcplen, 0))) { + DEBUGP("ip6t_REJECT: TCP checksum is invalid\n"); + return; + } + + memset(&fl, 0, sizeof(fl)); + fl.proto = IPPROTO_TCP; + ipv6_addr_copy(&fl.fl6_src, &oip6h->daddr); + ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); + fl.fl_ip_sport = otcph.dest; + fl.fl_ip_dport = otcph.source; + dst = ip6_route_output(NULL, &fl); + if (dst == NULL) + return; + if (dst->error || + xfrm_lookup(&dst, &fl, NULL, 0)) { + dst_release(dst); + return; + } + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + if (net_ratelimit()) + printk("ip6t_REJECT: Can't alloc skb\n"); + dst_release(dst); + return; + } + + nskb->dst = dst; + + skb_reserve(nskb, hh_len + dst->header_len); + + ip6h = nskb->nh.ipv6h = (struct ipv6hdr *) + skb_put(nskb, sizeof(struct ipv6hdr)); + ip6h->version = 6; + ip6h->hop_limit = dst_metric(dst, RTAX_HOPLIMIT); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->payload_len = htons(sizeof(struct tcphdr)); + ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr); + ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr); + + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&nskb->nh.ipv6h->saddr, + &nskb->nh.ipv6h->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial((char *)tcph, + sizeof(struct tcphdr), 0)); + + NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, nskb, NULL, nskb->dst->dev, + dst_output); +} + +static inline void +send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) +{ + if (hooknum == NF_IP6_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = &loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); +} + +static unsigned int reject6_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_reject_info *reject = targinfo; + + DEBUGP(KERN_DEBUG "%s: medium point\n", __FUNCTION__); + /* WARNING: This code causes reentry within ip6tables. + This means that the ip6tables jump stack is now crap. We + must return an absolute verdict. --RR */ + switch (reject->with) { + case IP6T_ICMP6_NO_ROUTE: + send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); + break; + case IP6T_ICMP6_ADM_PROHIBITED: + send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); + break; + case IP6T_ICMP6_NOT_NEIGHBOUR: + send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); + break; + case IP6T_ICMP6_ADDR_UNREACH: + send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); + break; + case IP6T_ICMP6_PORT_UNREACH: + send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); + break; + case IP6T_ICMP6_ECHOREPLY: + /* Do nothing */ + break; + case IP6T_TCP_RESET: + send_reset(*pskb); + break; + default: + if (net_ratelimit()) + printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with); + break; + } + + return NF_DROP; +} + +static int check(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const struct ip6t_reject_info *rejinfo = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_reject_info))) { + DEBUGP("ip6t_REJECT: targinfosize %u != 0\n", targinfosize); + return 0; + } + + /* Only allow these for packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("ip6t_REJECT: bad table `%s'.\n", tablename); + return 0; + } + + if ((hook_mask & ~((1 << NF_IP6_LOCAL_IN) + | (1 << NF_IP6_FORWARD) + | (1 << NF_IP6_LOCAL_OUT))) != 0) { + DEBUGP("ip6t_REJECT: bad hook mask %X\n", hook_mask); + return 0; + } + + if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { + printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); + return 0; + } else if (rejinfo->with == IP6T_TCP_RESET) { + /* Must specify that it's a TCP packet */ + if (e->ipv6.proto != IPPROTO_TCP + || (e->ipv6.invflags & IP6T_INV_PROTO)) { + DEBUGP("ip6t_REJECT: TCP_RESET illegal for non-tcp\n"); + return 0; + } + } + + return 1; +} + +static struct ip6t_target ip6t_reject_reg = { + .name = "REJECT", + .target = reject6_target, + .checkentry = check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + if (ip6t_register_target(&ip6t_reject_reg)) + return -EINVAL; + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_reject_reg); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From 05465343bf74e00c8c2c5a310740157de3149f27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:43 -0700 Subject: [NETFILTER]: Add goto target Originally written by Henrik Nordstrom , taken from netfilter patch-o-matic and added ip6_tables support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 ++- include/linux/netfilter_ipv6/ip6_tables.h | 3 ++- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 12ce47808e7d..d19d65cf4530 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -109,7 +109,8 @@ struct ipt_counters /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ -#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ +#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ +#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f1ce3b009853..58c72a52dc65 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -111,7 +111,8 @@ struct ip6t_counters #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ #define IP6T_F_TOS 0x02 /* Match the TOS. */ -#define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ +#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ +#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ /* Values for "inv" field in struct ip6t_ip6. */ #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ff8d85d2070d..eef99a1b5de6 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -340,8 +340,8 @@ ipt_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ip.flags & IPT_F_GOTO)) { /* Save old back ptr in next entry */ struct ipt_entry *next = (void *)e + e->next_offset; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 41a67cf6e33a..1cb8adb2787f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -433,8 +433,8 @@ ip6t_do_table(struct sk_buff **pskb, back->comefrom); continue; } - if (table_base + v - != (void *)e + e->next_offset) { + if (table_base + v != (void *)e + e->next_offset + && !(e->ipv6.flags & IP6T_F_GOTO)) { /* Save old back ptr in next entry */ struct ip6t_entry *next = (void *)e + e->next_offset; -- cgit v1.2.3 From e5b4376074e02b783e56a8f7c42d544e18112c4e Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 25 Aug 2005 13:01:03 -0700 Subject: [IPV4]: Prepare FIB core for RCU. * RCU versions of hlist_***_rcu * fib_alias partial rcu port just whats needed now. Signed-off-by: Robert Olsson Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/list.h | 21 +++++++++++++++++++++ net/ipv4/fib_lookup.h | 1 + net/ipv4/fib_semantics.c | 3 ++- 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 0f2435f92db3..9b9b0eec1e8a 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,27 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index b729d97cfa93..ef6609ea0eb7 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -7,6 +7,7 @@ struct fib_alias { struct list_head fa_list; + struct rcu_head rcu; struct fib_info *fa_info; u8 fa_tos; u8 fa_type; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7e4651b3caa8..d41219e8037c 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -854,6 +854,7 @@ failure: return NULL; } +/* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, struct fib_result *res, __u32 zone, __u32 mask, int prefixlen) @@ -861,7 +862,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, struct fib_alias *fa; int nh_sel = 0; - list_for_each_entry(fa, head, fa_list) { + list_for_each_entry_rcu(fa, head, fa_list) { int err; if (fa->fa_tos && -- cgit v1.2.3 From 57bf1451ac79640c5a0a4f31284c43539fac2903 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Aug 2005 16:06:19 -0700 Subject: [NET]: net/802: more endian annotations The rest of endian warnings now belongs to tr.c exclusively. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 3 +-- include/linux/if_fc.h | 2 +- include/linux/if_fddi.h | 2 +- include/linux/if_hippi.h | 6 +++--- net/802/fc.c | 2 +- net/802/fddi.c | 4 ++-- net/802/hippi.c | 2 +- 7 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9bc3b688d2ee..bab303dafd6e 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -31,8 +31,7 @@ struct hippi_cb { __u32 ifield; }; -extern unsigned short hippi_type_trans(struct sk_buff *skb, - struct net_device *dev); +extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h index 33330b458b95..376a34ea4723 100644 --- a/include/linux/if_fc.h +++ b/include/linux/if_fc.h @@ -44,7 +44,7 @@ struct fcllc { __u8 ssap; /* source SAP */ __u8 llc; /* LLC control field */ __u8 protid[3]; /* protocol id */ - __u16 ethertype; /* ether type field */ + __be16 ethertype; /* ether type field */ }; #endif /* _LINUX_IF_FC_H */ diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index a912818e6361..1288a161bc0b 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -85,7 +85,7 @@ struct fddi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); /* Define FDDI LLC frame header */ diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index c8ca72c46f76..94d31ca7d71a 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -102,9 +102,9 @@ struct hippi_fp_hdr #error "Please fix " #endif #else - __u32 fixed; + __be32 fixed; #endif - __u32 d2_size; + __be32 d2_size; } __attribute__ ((packed)); struct hippi_le_hdr @@ -144,7 +144,7 @@ struct hippi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); struct hippi_hdr diff --git a/net/802/fc.c b/net/802/fc.c index 640d34e026c2..282c4ab1abe6 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -87,7 +87,7 @@ static int fc_rebuild_header(struct sk_buff *skb) struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); if(fcllc->ethertype != htons(ETH_P_IP)) { - printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(fcllc->ethertype)); + printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); return 0; } #ifdef CONFIG_INET diff --git a/net/802/fddi.c b/net/802/fddi.c index 5ce24c4bb840..ac242a4bc346 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -108,8 +108,8 @@ static int fddi_rebuild_header(struct sk_buff *skb) else #endif { - printk("%s: Don't know how to resolve type %02X addresses.\n", - skb->dev->name, htons(fddi->hdr.llc_snap.ethertype)); + printk("%s: Don't know how to resolve type %04X addresses.\n", + skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); return(0); } } diff --git a/net/802/hippi.c b/net/802/hippi.c index cb45ae1310cb..6d7fed3dd99a 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -124,7 +124,7 @@ static int hippi_rebuild_header(struct sk_buff *skb) * Determine the packet's protocol ID. */ -unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev) +__be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev) { struct hippi_hdr *hip; -- cgit v1.2.3 From cf4ef01440ca5c6d96f2ea2b793a37a0a863a045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Aug 2005 16:08:37 -0700 Subject: [LIST]: Add docbook header comments for hlist_add_{before,after}_rcu() Signed-off-by: David S. Miller --- include/linux/list.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 9b9b0eec1e8a..e6ec59682274 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,21 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +/** + * hlist_add_before_rcu - adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { @@ -644,6 +659,21 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, *(n->pprev) = n; } +/** + * hlist_add_after_rcu - adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_after_rcu(struct hlist_node *prev, struct hlist_node *n) { -- cgit v1.2.3 From 5f2c3b910744f68e1a507f027398f404b3feb5fb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:03 -0700 Subject: [NETFILTER]: Add new iptables TTL target This new iptables target allows manipulation of the TTL of an IPv4 packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_TTL.h | 21 ++++++ net/ipv4/netfilter/Kconfig | 14 ++++ net/ipv4/netfilter/Makefile | 1 + net/ipv4/netfilter/ipt_TTL.c | 119 +++++++++++++++++++++++++++++++++ 4 files changed, 155 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_TTL.h create mode 100644 net/ipv4/netfilter/ipt_TTL.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h new file mode 100644 index 000000000000..ee6611edc112 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_TTL.h @@ -0,0 +1,21 @@ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ + +#ifndef _IPT_TTL_H +#define _IPT_TTL_H + +enum { + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC +}; + +#define IPT_TTL_MAXMODE IPT_TTL_DEC + +struct ipt_TTL_info { + u_int8_t mode; + u_int8_t ttl; +}; + + +#endif diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c4213f3de505..e046f5521814 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -664,6 +664,20 @@ config IP_NF_TARGET_CLASSIFY To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_TTL + tristate 'TTL target support' + depends on IP_NF_MANGLE + help + This option adds a `TTL' target, which enables the user to modify + the TTL value of the IP header. + + While it is safe to decrement/lower the TTL, this target also enables + functionality to increment and set the TTL value of the IP header to + arbitrary values. This is EXTREMELY DANGEROUS since you can easily + create immortal packets that loop forever on the network. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_CONNMARK tristate 'CONNMARK target support' depends on IP_NF_CONNTRACK_MARK && IP_NF_MANGLE diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 89cae69ee208..a7bd38f50522 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o +obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c new file mode 100644 index 000000000000..b9ae6a9382f3 --- /dev/null +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -0,0 +1,119 @@ +/* TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("IP tables TTL modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int +ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = targinfo; + u_int16_t diffs[2]; + int new_ttl; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + iph = (*pskb)->nh.iph; + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->ttl = new_ttl; + diffs[1] = htons(((unsigned)iph->ttl) << 8); + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + } + + return IPT_CONTINUE; +} + +static int ipt_ttl_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_TTL_info *info = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { + printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_TTL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "ipt_TTL: can only be called from " + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) + return 0; + + return 1; +} + +static struct ipt_target ipt_TTL = { + .name = "TTL", + .target = ipt_ttl_target, + .checkentry = ipt_ttl_checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_TTL); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_TTL); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From 0ac4f893f20ed524198da5ebf591fc0b9e2ced2f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:30 -0700 Subject: [NETFILTER6]: Add new ip6tables HOPLIMIT target This target allows users to modify the hoplimit header field of the IPv6 header. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_HL.h | 22 ++++++ net/ipv6/netfilter/Kconfig | 16 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_HL.c | 118 +++++++++++++++++++++++++++++++++ 4 files changed, 157 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_HL.h create mode 100644 net/ipv6/netfilter/ip6t_HL.c (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h new file mode 100644 index 000000000000..afb7813d45ab --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -0,0 +1,22 @@ +/* Hop Limit modification module for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module */ + +#ifndef _IP6T_HL_H +#define _IP6T_HL_H + +enum { + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC +}; + +#define IP6T_HL_MAXMODE IP6T_HL_DEC + +struct ip6t_HL_info { + u_int8_t mode; + u_int8_t hop_limit; +}; + + +#endif diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 8a10c2d0d154..216fbe1ac65c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -239,6 +239,22 @@ config IP6_NF_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_HL + tristate 'HL (hoplimit) target support' + depends on IP6_NF_MANGLE + help + This option adds a `HL' target, which enables the user to decrement + the hoplimit value of the IPv6 header or set it to a given (lower) + value. + + While it is safe to decrement the hoplimit value, this option also + enables functionality to increment and set the hoplimit value of the + IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since + you can easily create immortal packets that loop forever on the + network. + + To compile it as a module, choose M here. If unsure, say N. + #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES config IP6_NF_RAW tristate 'raw table support (required for TRACE)' diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 70f6ba610102..bd9a16a5cbba 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_IP6_NF_MATCH_PHYSDEV) += ip6t_physdev.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c new file mode 100644 index 000000000000..8f5549b72720 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -0,0 +1,118 @@ +/* + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int ip6t_hl_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = targinfo; + u_int16_t diffs[2]; + int new_hl; + + if (!skb_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + ip6h = (*pskb)->nh.ipv6h; + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + if (new_hl != ip6h->hop_limit) { + diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + ip6h->hop_limit = new_hl; + diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); + } + + return IP6T_CONTINUE; +} + +static int ip6t_hl_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip6t_HL_info *info = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) { + printk(KERN_WARNING "ip6t_HL: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ip6t_HL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "ip6t_HL: can only be called from " + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_HL = { + .name = "HL", + .target = ip6t_hl_target, + .checkentry = ip6t_hl_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ip6t_register_target(&ip6t_HL); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_HL); +} + +module_init(init); +module_exit(fini); -- cgit v1.2.3 From a84ffe430342db6ee585a5038f3242a6b4112d69 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 04:51:32 -0300 Subject: [DCCP]: Introduce DCCP_SOCKOPT_PACKET_SIZE So that applications can set dccp_sock->dccps_pkt_size, that in turn is used in the CCID3 half connection init routines to set ccid3hc[tr]x_s and use it in its rate calculations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 +++++-- net/dccp/ccids/ccid3.c | 12 ++++++------ net/dccp/proto.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 59 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 9e3a1370b906..007c290f74d4 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -186,6 +186,9 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket options */ +#define DCCP_SOCKOPT_PACKET_SIZE 1 + #ifdef __KERNEL__ #include @@ -396,7 +399,7 @@ enum dccp_role { * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) * @dccps_pmtu_cookie - Last pmtu seen by socket - * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackpkts - receiver half connection acked packets @@ -417,7 +420,7 @@ struct dccp_sock { unsigned long dccps_service; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; - __u32 dccps_avg_packet_size; + __u32 dccps_packet_size; unsigned long dccps_ndp_count; __u16 dccps_ext_header_len; __u32 dccps_pmtu_cookie; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 4ff6ede0f07d..e22b0eefdbf9 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -672,9 +672,9 @@ static int ccid3_hc_tx_init(struct sock *sk) memset(hctx, 0, sizeof(*hctx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hctx->ccid3hctx_s = dp->dccps_packet_size; else hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE; @@ -1058,9 +1058,9 @@ static int ccid3_hc_rx_init(struct sock *sk) memset(hcrx, 0, sizeof(*hcrx)); - if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE && - dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE) - hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size; + if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE && + dp->dccps_packet_size <= TFRC_MAX_PACKET_SIZE) + hcrx->ccid3hcrx_s = dp->dccps_packet_size; else hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index f4da6561e40c..18a0e69c9dc7 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -205,23 +205,67 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int err; + int val; if (level != SOL_DCCP) return ip_setsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (optlen < sizeof(int)) + return -EINVAL; + + if (get_user(val, (int __user *)optval)) + return -EFAULT; + + lock_sock(sk); + + dp = dccp_sk(sk); + err = 0; + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + dp->dccps_packet_size = val; + break; + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; } int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - dccp_pr_debug("entry\n"); + struct dccp_sock *dp; + int val, len; if (level != SOL_DCCP) return ip_getsockopt(sk, level, optname, optval, optlen); - return -EOPNOTSUPP; + if (get_user(len, optlen)) + return -EFAULT; + + len = min_t(unsigned int, len, sizeof(int)); + if (len < 0) + return -EINVAL; + + dp = dccp_sk(sk); + + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + val = dp->dccps_packet_size; + break; + default: + return -ENOPROTOOPT; + } + + if (put_user(len, optlen) || copy_to_user(optval, &val, len)) + return -EFAULT; + + return 0; } int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, -- cgit v1.2.3 From fb120da678c517f72d4b39932062c2191827b331 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:42:59 +1000 Subject: [PATCH] Make MODULE_DEVICE_TABLE work for vio devices Make MODULE_DEVICE_TABLE work for vio devices. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/vio.c | 2 +- drivers/block/viodasd.c | 2 +- drivers/cdrom/viocd.c | 2 +- drivers/char/hvc_vio.c | 2 +- drivers/char/hvcs.c | 2 +- drivers/char/viotape.c | 2 +- drivers/net/ibmveth.c | 2 +- drivers/net/iseries_veth.c | 2 +- drivers/scsi/ibmvscsi/ibmvscsi.c | 2 +- include/asm-ppc64/vio.h | 6 +----- include/linux/mod_devicetable.h | 7 ++++++- scripts/mod/file2alias.c | 19 +++++++++++++++++++ 12 files changed, 35 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 93c437a0911b..c90e1dd875ce 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -111,7 +111,7 @@ EXPORT_SYMBOL(vio_unregister_driver); static const struct vio_device_id *vio_match_device( const struct vio_device_id *ids, const struct vio_dev *dev) { - while (ids->type) { + while (ids->type[0] != '\0') { if (vio_bus_ops.match(ids, dev)) return ids; ids++; diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 46e56a25d2c8..e46ecd23b3ac 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -776,7 +776,7 @@ static int viodasd_remove(struct vio_dev *vdev) */ static struct vio_device_id viodasd_device_table[] __devinitdata = { { "viodasd", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viodasd_device_table); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index 38dd9ffbe8bc..0829db58462f 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -734,7 +734,7 @@ static int viocd_remove(struct vio_dev *vdev) */ static struct vio_device_id viocd_device_table[] __devinitdata = { { "viocd", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viocd_device_table); diff --git a/drivers/char/hvc_vio.c b/drivers/char/hvc_vio.c index 60bb9152b832..78d681dc35a8 100644 --- a/drivers/char/hvc_vio.c +++ b/drivers/char/hvc_vio.c @@ -39,7 +39,7 @@ char hvc_driver_name[] = "hvc_console"; static struct vio_device_id hvc_driver_table[] __devinitdata = { {"serial", "hvterm1"}, - { NULL, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, hvc_driver_table); diff --git a/drivers/char/hvcs.c b/drivers/char/hvcs.c index 3236d2404905..f47f009f9259 100644 --- a/drivers/char/hvcs.c +++ b/drivers/char/hvcs.c @@ -527,7 +527,7 @@ static int khvcsd(void *unused) static struct vio_device_id hvcs_driver_table[] __devinitdata= { {"serial-server", "hvterm2"}, - { NULL, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, hvcs_driver_table); diff --git a/drivers/char/viotape.c b/drivers/char/viotape.c index 4764b4f9555d..0aff45fac2e6 100644 --- a/drivers/char/viotape.c +++ b/drivers/char/viotape.c @@ -991,7 +991,7 @@ static int viotape_remove(struct vio_dev *vdev) */ static struct vio_device_id viotape_device_table[] __devinitdata = { { "viotape", "" }, - { 0, } + { "", "" } }; MODULE_DEVICE_TABLE(vio, viotape_device_table); diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c index c39b0609742a..32d5fabd4b10 100644 --- a/drivers/net/ibmveth.c +++ b/drivers/net/ibmveth.c @@ -1144,7 +1144,7 @@ static void ibmveth_proc_unregister_driver(void) static struct vio_device_id ibmveth_device_table[] __devinitdata= { { "network", "IBM,l-lan"}, - { 0,} + { "", "" } }; MODULE_DEVICE_TABLE(vio, ibmveth_device_table); diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c index 55af32e9bf08..183ba97785b0 100644 --- a/drivers/net/iseries_veth.c +++ b/drivers/net/iseries_veth.c @@ -1370,7 +1370,7 @@ static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) */ static struct vio_device_id veth_device_table[] __devinitdata = { { "vlan", "" }, - { NULL, NULL } + { "", "" } }; MODULE_DEVICE_TABLE(vio, veth_device_table); diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index fe09d145542a..2cb3c8340ca8 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -1442,7 +1442,7 @@ static int ibmvscsi_remove(struct vio_dev *vdev) */ static struct vio_device_id ibmvscsi_device_table[] __devinitdata = { {"vscsi", "IBM,v-scsi"}, - {0,} + { "", "" } }; MODULE_DEVICE_TABLE(vio, ibmvscsi_device_table); diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 85420bb37d58..03f1b95f433b 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -52,11 +53,6 @@ struct vio_dev { struct device dev; }; -struct vio_device_id { - char *type; - char *compat; -}; - struct vio_driver { struct list_head node; char *name; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 97bbccdbcca3..47da39ba3f03 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -1,6 +1,6 @@ /* * Device tables which are exported to userspace via - * scripts/table2alias.c. You must keep that file in sync with this + * scripts/mod/file2alias.c. You must keep that file in sync with this * header. */ @@ -190,6 +190,11 @@ struct of_device_id #endif }; +/* VIO */ +struct vio_device_id { + char type[32]; + char compat[32]; +}; /* PCMCIA */ diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 5180405c1a84..d8ee38aede26 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -341,6 +341,22 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali return 1; } +static int do_vio_entry(const char *filename, struct vio_device_id *vio, + char *alias) +{ + char *tmp; + + sprintf(alias, "vio:T%sS%s", vio->type[0] ? vio->type : "*", + vio->compat[0] ? vio->compat : "*"); + + /* Replace all whitespace with underscores */ + for (tmp = alias; tmp && *tmp; tmp++) + if (isspace (*tmp)) + *tmp = '_'; + + return 1; +} + /* Ignore any prefix, eg. v850 prepends _ */ static inline int sym_is(const char *symbol, const char *name) { @@ -422,6 +438,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info, else if (sym_is(symname, "__mod_of_device_table")) do_table(symval, sym->st_size, sizeof(struct of_device_id), do_of_entry, mod); + else if (sym_is(symname, "__mod_vio_device_table")) + do_table(symval, sym->st_size, sizeof(struct vio_device_id), + do_vio_entry, mod); } -- cgit v1.2.3 From d568121ce3151c36cc4718dd4e977f217c6144c2 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Tue, 30 Aug 2005 08:58:37 +0200 Subject: [PATCH] Assign device pointer to OSS devices Add register_sound_special_device() function to allow assignment of device pointer to a specific OSS device for HAL. Signed-off-by: Takashi Iwai --- include/linux/sound.h | 2 ++ sound/core/sound_oss.c | 7 +++++-- sound/sound_core.c | 27 ++++++++++++++++++--------- 3 files changed, 25 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sound.h b/include/linux/sound.h index 428f59794f48..72b9af4c3fd4 100644 --- a/include/linux/sound.h +++ b/include/linux/sound.h @@ -29,7 +29,9 @@ * Sound core interface functions */ +struct device; extern int register_sound_special(struct file_operations *fops, int unit); +extern int register_sound_special_device(struct file_operations *fops, int unit, struct device *dev); extern int register_sound_mixer(struct file_operations *fops, int dev); extern int register_sound_midi(struct file_operations *fops, int dev); extern int register_sound_dsp(struct file_operations *fops, int dev); diff --git a/sound/core/sound_oss.c b/sound/core/sound_oss.c index de39d212bc15..e401c6703297 100644 --- a/sound/core/sound_oss.c +++ b/sound/core/sound_oss.c @@ -98,6 +98,7 @@ int snd_register_oss_device(int type, snd_card_t * card, int dev, snd_minor_t * int cidx = SNDRV_MINOR_OSS_CARD(minor); int track2 = -1; int register1 = -1, register2 = -1; + struct device *carddev = NULL; if (minor < 0) return minor; @@ -121,11 +122,13 @@ int snd_register_oss_device(int type, snd_card_t * card, int dev, snd_minor_t * track2 = SNDRV_MINOR_OSS(cidx, SNDRV_MINOR_OSS_DMMIDI1); break; } - register1 = register_sound_special(reg->f_ops, minor); + if (card) + carddev = card->dev; + register1 = register_sound_special_device(reg->f_ops, minor, carddev); if (register1 != minor) goto __end; if (track2 >= 0) { - register2 = register_sound_special(reg->f_ops, track2); + register2 = register_sound_special_device(reg->f_ops, track2, carddev); if (register2 != track2) goto __end; } diff --git a/sound/sound_core.c b/sound/sound_core.c index 21a69e096225..954f994592ab 100644 --- a/sound/sound_core.c +++ b/sound/sound_core.c @@ -153,7 +153,7 @@ static DEFINE_SPINLOCK(sound_loader_lock); * list. Acquires locks as needed */ -static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode) +static int sound_insert_unit(struct sound_unit **list, struct file_operations *fops, int index, int low, int top, const char *name, umode_t mode, struct device *dev) { struct sound_unit *s = kmalloc(sizeof(*s), GFP_KERNEL); int r; @@ -175,7 +175,7 @@ static int sound_insert_unit(struct sound_unit **list, struct file_operations *f devfs_mk_cdev(MKDEV(SOUND_MAJOR, s->unit_minor), S_IFCHR | mode, s->name); class_device_create(sound_class, MKDEV(SOUND_MAJOR, s->unit_minor), - NULL, s->name+6); + dev, s->name+6); return r; fail: @@ -227,16 +227,18 @@ static void sound_remove_unit(struct sound_unit **list, int unit) static struct sound_unit *chains[SOUND_STEP]; /** - * register_sound_special - register a special sound node + * register_sound_special_device - register a special sound node * @fops: File operations for the driver * @unit: Unit number to allocate + * @dev: device pointer * * Allocate a special sound device by minor number from the sound * subsystem. The allocated number is returned on succes. On failure * a negative error code is returned. */ -int register_sound_special(struct file_operations *fops, int unit) +int register_sound_special_device(struct file_operations *fops, int unit, + struct device *dev) { const int chain = unit % SOUND_STEP; int max_unit = 128 + chain; @@ -294,9 +296,16 @@ int register_sound_special(struct file_operations *fops, int unit) break; } return sound_insert_unit(&chains[chain], fops, -1, unit, max_unit, - name, S_IRUSR | S_IWUSR); + name, S_IRUSR | S_IWUSR, dev); } +EXPORT_SYMBOL(register_sound_special_device); + +int register_sound_special(struct file_operations *fops, int unit) +{ + return register_sound_special_device(fops, unit, NULL); +} + EXPORT_SYMBOL(register_sound_special); /** @@ -312,7 +321,7 @@ EXPORT_SYMBOL(register_sound_special); int register_sound_mixer(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[0], fops, dev, 0, 128, - "mixer", S_IRUSR | S_IWUSR); + "mixer", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_mixer); @@ -330,7 +339,7 @@ EXPORT_SYMBOL(register_sound_mixer); int register_sound_midi(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[2], fops, dev, 2, 130, - "midi", S_IRUSR | S_IWUSR); + "midi", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_midi); @@ -356,7 +365,7 @@ EXPORT_SYMBOL(register_sound_midi); int register_sound_dsp(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[3], fops, dev, 3, 131, - "dsp", S_IWUSR | S_IRUSR); + "dsp", S_IWUSR | S_IRUSR, NULL); } EXPORT_SYMBOL(register_sound_dsp); @@ -375,7 +384,7 @@ EXPORT_SYMBOL(register_sound_dsp); int register_sound_synth(struct file_operations *fops, int dev) { return sound_insert_unit(&chains[9], fops, dev, 9, 137, - "synth", S_IRUSR | S_IWUSR); + "synth", S_IRUSR | S_IWUSR, NULL); } EXPORT_SYMBOL(register_sound_synth); -- cgit v1.2.3 From 1623c81eece58740279b8de802fa5895221f2044 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 30 Aug 2005 03:37:42 -0400 Subject: [libata] allow ATAPI to be enabled with new atapi_enabled module option ATAPI is getting close to being ready. To increase exposure, we enable the code in the upstream kernel, but default it to off (present behavior). Users must pass atapi_enabled=1 as a module option (if module) or on the kernel command line (if built in) to turn on discovery of their ATAPI devices. --- drivers/scsi/libata-core.c | 4 ++++ drivers/scsi/libata-scsi.c | 8 ++++---- drivers/scsi/libata.h | 1 + include/linux/libata.h | 1 - 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index dee4b12b0342..d824938d05c9 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -75,6 +75,10 @@ static void __ata_qc_complete(struct ata_queued_cmd *qc); static unsigned int ata_unique_id = 1; static struct workqueue_struct *ata_wq; +int atapi_enabled = 0; +module_param(atapi_enabled, int, 0444); +MODULE_PARM_DESC(atapi_enabled, "Enable discovery of ATAPI devices (0=off, 1=on)"); + MODULE_AUTHOR("Jeff Garzik"); MODULE_DESCRIPTION("Library module for ATA devices"); MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index 346eb36b1e31..55823765425c 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1470,10 +1470,10 @@ ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev) if (unlikely(!ata_dev_present(dev))) return NULL; -#ifndef ATA_ENABLE_ATAPI - if (unlikely(dev->class == ATA_DEV_ATAPI)) - return NULL; -#endif + if (atapi_enabled) { + if (unlikely(dev->class == ATA_DEV_ATAPI)) + return NULL; + } return dev; } diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 809c634afbcd..d608b3a0f6fe 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -38,6 +38,7 @@ struct ata_scsi_args { }; /* libata-core.c */ +extern int atapi_enabled; extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, struct ata_device *dev); extern void ata_qc_free(struct ata_queued_cmd *qc); diff --git a/include/linux/libata.h b/include/linux/libata.h index fc05a9899288..1eaba4077e15 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -40,7 +40,6 @@ #undef ATA_VERBOSE_DEBUG /* yet more debugging output */ #undef ATA_IRQ_TRAP /* define to ack screaming irqs */ #undef ATA_NDEBUG /* define to disable quick runtime checks */ -#undef ATA_ENABLE_ATAPI /* define to enable ATAPI support */ #undef ATA_ENABLE_PATA /* define to enable PATA support in some * low-level drivers */ #undef ATAPI_ENABLE_DMADIR /* enables ATAPI DMADIR bridge support */ -- cgit v1.2.3 From 374b1873571bf80dc0c1fcceaaad067980f3b9de Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Tue, 30 Aug 2005 05:42:52 -0400 Subject: [libata] update several drivers to use pci_iomap()/pci_iounmap() --- drivers/scsi/ahci.c | 7 +++---- drivers/scsi/ata_piix.c | 9 ++++----- drivers/scsi/libata-core.c | 11 ++++++++++- drivers/scsi/sata_nv.c | 9 +++++---- drivers/scsi/sata_promise.c | 8 ++++---- drivers/scsi/sata_qstor.c | 8 ++++---- drivers/scsi/sata_sil.c | 7 ++++--- drivers/scsi/sata_svw.c | 5 ++--- drivers/scsi/sata_sx4.c | 15 +++++++-------- drivers/scsi/sata_vsc.c | 5 ++--- include/linux/libata.h | 1 + 11 files changed, 46 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 4cfb257a0f67..31065261de8e 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -995,8 +995,7 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent->dev = pci_dev_to_dev(pdev); INIT_LIST_HEAD(&probe_ent->node); - mmio_base = ioremap(pci_resource_start(pdev, AHCI_PCI_BAR), - pci_resource_len(pdev, AHCI_PCI_BAR)); + mmio_base = pci_iomap(pdev, AHCI_PCI_BAR, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; @@ -1040,7 +1039,7 @@ static int ahci_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) err_out_hpriv: kfree(hpriv); err_out_iounmap: - iounmap(mmio_base); + pci_iounmap(pdev, mmio_base); err_out_free_ent: kfree(probe_ent); err_out_msi: @@ -1081,7 +1080,7 @@ static void ahci_remove_one (struct pci_dev *pdev) } kfree(hpriv); - iounmap(host_set->mmio_base); + pci_iounmap(pdev, host_set->mmio_base); kfree(host_set); if (have_msi) diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 90c53b88a1ee..deec0cef88d9 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -584,7 +584,6 @@ static void pci_enable_intx(struct pci_dev *pdev) static int piix_disable_ahci(struct pci_dev *pdev) { void __iomem *mmio; - unsigned long addr; u32 tmp; int rc = 0; @@ -592,11 +591,11 @@ static int piix_disable_ahci(struct pci_dev *pdev) * works because this device is usually set up by BIOS. */ - addr = pci_resource_start(pdev, AHCI_PCI_BAR); - if (!addr || !pci_resource_len(pdev, AHCI_PCI_BAR)) + if (!pci_resource_start(pdev, AHCI_PCI_BAR) || + !pci_resource_len(pdev, AHCI_PCI_BAR)) return 0; - mmio = ioremap(addr, 64); + mmio = pci_iomap(pdev, AHCI_PCI_BAR, 64); if (!mmio) return -ENOMEM; @@ -610,7 +609,7 @@ static int piix_disable_ahci(struct pci_dev *pdev) rc = -EIO; } - iounmap(mmio); + pci_iounmap(pdev, mmio); return rc; } diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index dee4b12b0342..1fe20f76fb50 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4200,6 +4200,15 @@ ata_probe_ent_alloc(struct device *dev, struct ata_port_info *port) +#ifdef CONFIG_PCI + +void ata_pci_host_stop (struct ata_host_set *host_set) +{ + struct pci_dev *pdev = to_pci_dev(host_set->dev); + + pci_iounmap(pdev, host_set->mmio_base); +} + /** * ata_pci_init_native_mode - Initialize native-mode driver * @pdev: pci device to be initialized @@ -4212,7 +4221,6 @@ ata_probe_ent_alloc(struct device *dev, struct ata_port_info *port) * ata_probe_ent structure should then be freed with kfree(). */ -#ifdef CONFIG_PCI struct ata_probe_ent * ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port) { @@ -4595,6 +4603,7 @@ EXPORT_SYMBOL_GPL(ata_scsi_simulate); #ifdef CONFIG_PCI EXPORT_SYMBOL_GPL(pci_test_config_bits); +EXPORT_SYMBOL_GPL(ata_pci_host_stop); EXPORT_SYMBOL_GPL(ata_pci_init_native_mode); EXPORT_SYMBOL_GPL(ata_pci_init_one); EXPORT_SYMBOL_GPL(ata_pci_remove_one); diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 03d9bc6e69df..a1d62dee3be6 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -351,6 +351,7 @@ static void nv_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) static void nv_host_stop (struct ata_host_set *host_set) { struct nv_host *host = host_set->private_data; + struct pci_dev *pdev = to_pci_dev(host_set->dev); // Disable hotplug event interrupts. if (host->host_desc->disable_hotplug) @@ -358,7 +359,8 @@ static void nv_host_stop (struct ata_host_set *host_set) kfree(host); - ata_host_stop(host_set); + if (host_set->mmio_base) + pci_iounmap(pdev, host_set->mmio_base); } static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) @@ -420,8 +422,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO) { unsigned long base; - probe_ent->mmio_base = ioremap(pci_resource_start(pdev, 5), - pci_resource_len(pdev, 5)); + probe_ent->mmio_base = pci_iomap(pdev, 5, 0); if (probe_ent->mmio_base == NULL) { rc = -EIO; goto err_out_free_host; @@ -457,7 +458,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) err_out_iounmap: if (host->host_flags & NV_HOST_FLAGS_SCR_MMIO) - iounmap(probe_ent->mmio_base); + pci_iounmap(pdev, probe_ent->mmio_base); err_out_free_host: kfree(host); err_out_free_ent: diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index ed54f2810609..538ad727bd2e 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -92,6 +92,7 @@ static void pdc_exec_command_mmio(struct ata_port *ap, struct ata_taskfile *tf); static void pdc_irq_clear(struct ata_port *ap); static int pdc_qc_issue_prot(struct ata_queued_cmd *qc); + static Scsi_Host_Template pdc_ata_sht = { .module = THIS_MODULE, .name = DRV_NAME, @@ -132,7 +133,7 @@ static struct ata_port_operations pdc_sata_ops = { .scr_write = pdc_sata_scr_write, .port_start = pdc_port_start, .port_stop = pdc_port_stop, - .host_stop = ata_host_stop, + .host_stop = ata_pci_host_stop, }; static struct ata_port_operations pdc_pata_ops = { @@ -153,7 +154,7 @@ static struct ata_port_operations pdc_pata_ops = { .port_start = pdc_port_start, .port_stop = pdc_port_stop, - .host_stop = ata_host_stop, + .host_stop = ata_pci_host_stop, }; static struct ata_port_info pdc_port_info[] = { @@ -663,8 +664,7 @@ static int pdc_ata_init_one (struct pci_dev *pdev, const struct pci_device_id *e probe_ent->dev = pci_dev_to_dev(pdev); INIT_LIST_HEAD(&probe_ent->node); - mmio_base = ioremap(pci_resource_start(pdev, 3), - pci_resource_len(pdev, 3)); + mmio_base = pci_iomap(pdev, 3, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c index 9c99ab433bd3..029c2482e127 100644 --- a/drivers/scsi/sata_qstor.c +++ b/drivers/scsi/sata_qstor.c @@ -538,11 +538,12 @@ static void qs_port_stop(struct ata_port *ap) static void qs_host_stop(struct ata_host_set *host_set) { void __iomem *mmio_base = host_set->mmio_base; + struct pci_dev *pdev = to_pci_dev(host_set->dev); writeb(0, mmio_base + QS_HCT_CTRL); /* disable host interrupts */ writeb(QS_CNFG3_GSRST, mmio_base + QS_HCF_CNFG3); /* global reset */ - ata_host_stop(host_set); + pci_iounmap(pdev, mmio_base); } static void qs_host_init(unsigned int chip_id, struct ata_probe_ent *pe) @@ -646,8 +647,7 @@ static int qs_ata_init_one(struct pci_dev *pdev, goto err_out_regions; } - mmio_base = ioremap(pci_resource_start(pdev, 4), - pci_resource_len(pdev, 4)); + mmio_base = pci_iomap(pdev, 4, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_regions; @@ -697,7 +697,7 @@ static int qs_ata_init_one(struct pci_dev *pdev, return 0; err_out_iounmap: - iounmap(mmio_base); + pci_iounmap(pdev, mmio_base); err_out_regions: pci_release_regions(pdev); err_out: diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index b1a696fcec81..ba98a175ee3a 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -86,6 +86,7 @@ static u32 sil_scr_read (struct ata_port *ap, unsigned int sc_reg); static void sil_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val); static void sil_post_set_mode (struct ata_port *ap); + static struct pci_device_id sil_pci_tbl[] = { { 0x1095, 0x3112, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, { 0x1095, 0x0240, PCI_ANY_ID, PCI_ANY_ID, 0, 0, sil_3112_m15w }, @@ -172,7 +173,7 @@ static struct ata_port_operations sil_ops = { .scr_write = sil_scr_write, .port_start = ata_port_start, .port_stop = ata_port_stop, - .host_stop = ata_host_stop, + .host_stop = ata_pci_host_stop, }; static struct ata_port_info sil_port_info[] = { @@ -231,6 +232,7 @@ MODULE_LICENSE("GPL"); MODULE_DEVICE_TABLE(pci, sil_pci_tbl); MODULE_VERSION(DRV_VERSION); + static unsigned char sil_get_device_cache_line(struct pci_dev *pdev) { u8 cache_line = 0; @@ -426,8 +428,7 @@ static int sil_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) probe_ent->irq_flags = SA_SHIRQ; probe_ent->host_flags = sil_port_info[ent->driver_data].host_flags; - mmio_base = ioremap(pci_resource_start(pdev, 5), - pci_resource_len(pdev, 5)); + mmio_base = pci_iomap(pdev, 5, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index d48de9547fb3..d89d968bedac 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -318,7 +318,7 @@ static struct ata_port_operations k2_sata_ops = { .scr_write = k2_sata_scr_write, .port_start = ata_port_start, .port_stop = ata_port_stop, - .host_stop = ata_host_stop, + .host_stop = ata_pci_host_stop, }; static void k2_sata_setup_port(struct ata_ioports *port, unsigned long base) @@ -392,8 +392,7 @@ static int k2_sata_init_one (struct pci_dev *pdev, const struct pci_device_id *e probe_ent->dev = pci_dev_to_dev(pdev); INIT_LIST_HEAD(&probe_ent->node); - mmio_base = ioremap(pci_resource_start(pdev, 5), - pci_resource_len(pdev, 5)); + mmio_base = pci_iomap(pdev, 5, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 38b3dd2d7504..540a85191172 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -245,13 +245,14 @@ static struct pci_driver pdc_sata_pci_driver = { static void pdc20621_host_stop(struct ata_host_set *host_set) { + struct pci_dev *pdev = to_pci_dev(host_set->dev); struct pdc_host_priv *hpriv = host_set->private_data; void *dimm_mmio = hpriv->dimm_mmio; - iounmap(dimm_mmio); + pci_iounmap(pdev, dimm_mmio); kfree(hpriv); - ata_host_stop(host_set); + pci_iounmap(pdev, host_set->mmio_base); } static int pdc_port_start(struct ata_port *ap) @@ -1418,8 +1419,7 @@ static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id * probe_ent->dev = pci_dev_to_dev(pdev); INIT_LIST_HEAD(&probe_ent->node); - mmio_base = ioremap(pci_resource_start(pdev, 3), - pci_resource_len(pdev, 3)); + mmio_base = pci_iomap(pdev, 3, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; @@ -1433,8 +1433,7 @@ static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id * } memset(hpriv, 0, sizeof(*hpriv)); - dimm_mmio = ioremap(pci_resource_start(pdev, 4), - pci_resource_len(pdev, 4)); + dimm_mmio = pci_iomap(pdev, 4, 0); if (!dimm_mmio) { kfree(hpriv); rc = -ENOMEM; @@ -1481,9 +1480,9 @@ static int pdc_sata_init_one (struct pci_dev *pdev, const struct pci_device_id * err_out_iounmap_dimm: /* only get to this label if 20621 */ kfree(hpriv); - iounmap(dimm_mmio); + pci_iounmap(pdev, dimm_mmio); err_out_iounmap: - iounmap(mmio_base); + pci_iounmap(pdev, mmio_base); err_out_free_ent: kfree(probe_ent); err_out_regions: diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index 3985f344da4d..cf94e0158a8d 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -252,7 +252,7 @@ static struct ata_port_operations vsc_sata_ops = { .scr_write = vsc_sata_scr_write, .port_start = ata_port_start, .port_stop = ata_port_stop, - .host_stop = ata_host_stop, + .host_stop = ata_pci_host_stop, }; static void __devinit vsc_sata_setup_port(struct ata_ioports *port, unsigned long base) @@ -326,8 +326,7 @@ static int __devinit vsc_sata_init_one (struct pci_dev *pdev, const struct pci_d probe_ent->dev = pci_dev_to_dev(pdev); INIT_LIST_HEAD(&probe_ent->node); - mmio_base = ioremap(pci_resource_start(pdev, 0), - pci_resource_len(pdev, 0)); + mmio_base = pci_iomap(pdev, 0, 0); if (mmio_base == NULL) { rc = -ENOMEM; goto err_out_free_ent; diff --git a/include/linux/libata.h b/include/linux/libata.h index fc05a9899288..bd0f79dfb9cd 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -450,6 +450,7 @@ struct pci_bits { unsigned long val; }; +extern void ata_pci_host_stop (struct ata_host_set *host_set); extern struct ata_probe_ent * ata_pci_init_native_mode(struct pci_dev *pdev, struct ata_port_info **port); extern int pci_test_config_bits(struct pci_dev *pdev, struct pci_bits *bits); -- cgit v1.2.3 From 53c165e0a6c8a4ff7df316557528fa7a52d20711 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Mon, 22 Aug 2005 10:06:19 -0500 Subject: [SCSI] correct attribute_container list usage One of the changes in the attribute_container code in the scsi-misc tree was to add a lock to protect the list of devices per container. This, unfortunately, leads to potential scheduling while atomic problems if there's a sleep in the function called by a trigger. The correct solution is to use the kernel klist infrastructure instead which allows lockless traversal of a list. Signed-off-by: James Bottomley --- drivers/base/attribute_container.c | 51 +++++++++++++++++++++---------------- include/linux/attribute_container.h | 4 +-- 2 files changed, 31 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/attribute_container.c b/drivers/base/attribute_container.c index ebcae5c34133..6c0f49340eb2 100644 --- a/drivers/base/attribute_container.c +++ b/drivers/base/attribute_container.c @@ -22,7 +22,7 @@ /* This is a private structure used to tie the classdev and the * container .. it should never be visible outside this file */ struct internal_container { - struct list_head node; + struct klist_node node; struct attribute_container *cont; struct class_device classdev; }; @@ -57,8 +57,7 @@ int attribute_container_register(struct attribute_container *cont) { INIT_LIST_HEAD(&cont->node); - INIT_LIST_HEAD(&cont->containers); - spin_lock_init(&cont->containers_lock); + klist_init(&cont->containers); down(&attribute_container_mutex); list_add_tail(&cont->node, &attribute_container_list); @@ -78,13 +77,13 @@ attribute_container_unregister(struct attribute_container *cont) { int retval = -EBUSY; down(&attribute_container_mutex); - spin_lock(&cont->containers_lock); - if (!list_empty(&cont->containers)) + spin_lock(&cont->containers.k_lock); + if (!list_empty(&cont->containers.k_list)) goto out; retval = 0; list_del(&cont->node); out: - spin_unlock(&cont->containers_lock); + spin_unlock(&cont->containers.k_lock); up(&attribute_container_mutex); return retval; @@ -143,7 +142,6 @@ attribute_container_add_device(struct device *dev, continue; } memset(ic, 0, sizeof(struct internal_container)); - INIT_LIST_HEAD(&ic->node); ic->cont = cont; class_device_initialize(&ic->classdev); ic->classdev.dev = get_device(dev); @@ -154,13 +152,22 @@ attribute_container_add_device(struct device *dev, fn(cont, dev, &ic->classdev); else attribute_container_add_class_device(&ic->classdev); - spin_lock(&cont->containers_lock); - list_add_tail(&ic->node, &cont->containers); - spin_unlock(&cont->containers_lock); + klist_add_tail(&ic->node, &cont->containers); } up(&attribute_container_mutex); } +/* FIXME: can't break out of this unless klist_iter_exit is also + * called before doing the break + */ +#define klist_for_each_entry(pos, head, member, iter) \ + for (klist_iter_init(head, iter); (pos = ({ \ + struct klist_node *n = klist_next(iter); \ + n ? ({ klist_iter_exit(iter) ; NULL; }) : \ + container_of(n, typeof(*pos), member);\ + }) ) != NULL; ) + + /** * attribute_container_remove_device - make device eligible for removal. * @@ -187,18 +194,19 @@ attribute_container_remove_device(struct device *dev, down(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { - struct internal_container *ic, *tmp; + struct internal_container *ic; + struct klist_iter iter; if (attribute_container_no_classdevs(cont)) continue; if (!cont->match(cont, dev)) continue; - spin_lock(&cont->containers_lock); - list_for_each_entry_safe(ic, tmp, &cont->containers, node) { + + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev != ic->classdev.dev) continue; - list_del(&ic->node); + klist_remove(&ic->node); if (fn) fn(cont, dev, &ic->classdev); else { @@ -206,7 +214,6 @@ attribute_container_remove_device(struct device *dev, class_device_unregister(&ic->classdev); } } - spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -232,7 +239,8 @@ attribute_container_device_trigger(struct device *dev, down(&attribute_container_mutex); list_for_each_entry(cont, &attribute_container_list, node) { - struct internal_container *ic, *tmp; + struct internal_container *ic; + struct klist_iter iter; if (!cont->match(cont, dev)) continue; @@ -242,12 +250,10 @@ attribute_container_device_trigger(struct device *dev, continue; } - spin_lock(&cont->containers_lock); - list_for_each_entry_safe(ic, tmp, &cont->containers, node) { + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (dev == ic->classdev.dev) fn(cont, dev, &ic->classdev); } - spin_unlock(&cont->containers_lock); } up(&attribute_container_mutex); } @@ -397,15 +403,16 @@ attribute_container_find_class_device(struct attribute_container *cont, { struct class_device *cdev = NULL; struct internal_container *ic; + struct klist_iter iter; - spin_lock(&cont->containers_lock); - list_for_each_entry(ic, &cont->containers, node) { + klist_for_each_entry(ic, &cont->containers, node, &iter) { if (ic->classdev.dev == dev) { cdev = &ic->classdev; + /* FIXME: must exit iterator then break */ + klist_iter_exit(&iter); break; } } - spin_unlock(&cont->containers_lock); return cdev; } diff --git a/include/linux/attribute_container.h b/include/linux/attribute_container.h index ee83fe64a102..93bfb0beb62a 100644 --- a/include/linux/attribute_container.h +++ b/include/linux/attribute_container.h @@ -11,12 +11,12 @@ #include #include +#include #include struct attribute_container { struct list_head node; - struct list_head containers; - spinlock_t containers_lock; + struct klist containers; struct class *class; struct class_device_attribute **attrs; int (*match)(struct attribute_container *, struct device *); -- cgit v1.2.3 From 61a7afa2c476a3be261cf88a95b0dea0c3bd29d4 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 16 Aug 2005 18:27:34 -0500 Subject: [SCSI] embryonic RAID class The idea behind a RAID class is to provide a uniform interface to all RAID subsystems (both hardware and software) in the kernel. To do that, I've made this class a transport class that's entirely subsystem independent (although the matching routines have to match per subsystem, as you'll see looking at the code). I put it in the scsi subdirectory purely because I needed somewhere to play with it, but it's not a scsi specific module. I used a fusion raid card as the test bed for this; with that kind of card, this is the type of class output you get: jejb@titanic> ls -l /sys/class/raid_devices/20\:0\:0\:0/ total 0 lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-0 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:0/20:1:0:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 component-1 -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:1:1/20:1:1:0/ lrwxrwxrwx 1 root root 0 Aug 16 17:21 device -> ../../../devices/pci0000:80/0000:80:04.0/host20/target20:0:0/20:0:0:0/ -r--r--r-- 1 root root 16384 Aug 16 17:21 level -r--r--r-- 1 root root 16384 Aug 16 17:21 resync -r--r--r-- 1 root root 16384 Aug 16 17:21 state So it's really simple: for a SCSI device representing a hardware raid, it shows the raid level, the array state, the resync % complete (if the state is resyncing) and the underlying components of the RAID (these are exposed in fusion on the virtual channel 1). As you can see, this type of information can be exported by almost anything, including software raid. The more difficult trick, of course, is going to be getting it to perform configuration type actions with writable attributes. Signed-off-by: James Bottomley --- drivers/scsi/Kconfig | 6 ++ drivers/scsi/Makefile | 2 + drivers/scsi/raid_class.c | 250 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/raid_class.h | 59 +++++++++++ 4 files changed, 317 insertions(+) create mode 100644 drivers/scsi/raid_class.c create mode 100644 include/linux/raid_class.h (limited to 'include/linux') diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 96df148ed969..68adc3cc8ad2 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -1,5 +1,11 @@ menu "SCSI device support" +config RAID_ATTRS + tristate "RAID Transport Class" + default n + ---help--- + Provides RAID + config SCSI tristate "SCSI device support" ---help--- diff --git a/drivers/scsi/Makefile b/drivers/scsi/Makefile index 3746fb9fa2f5..85f9e6bb34b9 100644 --- a/drivers/scsi/Makefile +++ b/drivers/scsi/Makefile @@ -22,6 +22,8 @@ subdir-$(CONFIG_PCMCIA) += pcmcia obj-$(CONFIG_SCSI) += scsi_mod.o +obj-$(CONFIG_RAID_ATTRS) += raid_class.o + # --- NOTE ORDERING HERE --- # For kernel non-modular link, transport attributes need to # be initialised before drivers diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c new file mode 100644 index 000000000000..f1ea5027865f --- /dev/null +++ b/drivers/scsi/raid_class.c @@ -0,0 +1,250 @@ +/* + * RAID Attributes + */ +#include +#include +#include +#include +#include +#include + +#define RAID_NUM_ATTRS 3 + +struct raid_internal { + struct raid_template r; + struct raid_function_template *f; + /* The actual attributes */ + struct class_device_attribute private_attrs[RAID_NUM_ATTRS]; + /* The array of null terminated pointers to attributes + * needed by scsi_sysfs.c */ + struct class_device_attribute *attrs[RAID_NUM_ATTRS + 1]; +}; + +struct raid_component { + struct list_head node; + struct device *dev; + int num; +}; + +#define to_raid_internal(tmpl) container_of(tmpl, struct raid_internal, r) + +#define tc_to_raid_internal(tcont) ({ \ + struct raid_template *r = \ + container_of(tcont, struct raid_template, raid_attrs); \ + to_raid_internal(r); \ +}) + +#define ac_to_raid_internal(acont) ({ \ + struct transport_container *tc = \ + container_of(acont, struct transport_container, ac); \ + tc_to_raid_internal(tc); \ +}) + +#define class_device_to_raid_internal(cdev) ({ \ + struct attribute_container *ac = \ + attribute_container_classdev_to_container(cdev); \ + ac_to_raid_internal(ac); \ +}) + + +static int raid_match(struct attribute_container *cont, struct device *dev) +{ + /* We have to look for every subsystem that could house + * emulated RAID devices, so start with SCSI */ + struct raid_internal *i = ac_to_raid_internal(cont); + + if (scsi_is_sdev_device(dev)) { + struct scsi_device *sdev = to_scsi_device(dev); + + if (i->f->cookie != sdev->host->hostt) + return 0; + + return i->f->is_raid(dev); + } + /* FIXME: look at other subsystems too */ + return 0; +} + +static int raid_setup(struct transport_container *tc, struct device *dev, + struct class_device *cdev) +{ + struct raid_data *rd; + + BUG_ON(class_get_devdata(cdev)); + + rd = kmalloc(sizeof(*rd), GFP_KERNEL); + if (!rd) + return -ENOMEM; + + memset(rd, 0, sizeof(*rd)); + INIT_LIST_HEAD(&rd->component_list); + class_set_devdata(cdev, rd); + + return 0; +} + +static int raid_remove(struct transport_container *tc, struct device *dev, + struct class_device *cdev) +{ + struct raid_data *rd = class_get_devdata(cdev); + struct raid_component *rc, *next; + class_set_devdata(cdev, NULL); + list_for_each_entry_safe(rc, next, &rd->component_list, node) { + char buf[40]; + snprintf(buf, sizeof(buf), "component-%d", rc->num); + list_del(&rc->node); + sysfs_remove_link(&cdev->kobj, buf); + kfree(rc); + } + kfree(class_get_devdata(cdev)); + return 0; +} + +static DECLARE_TRANSPORT_CLASS(raid_class, + "raid_devices", + raid_setup, + raid_remove, + NULL); + +static struct { + enum raid_state value; + char *name; +} raid_states[] = { + { RAID_ACTIVE, "active" }, + { RAID_DEGRADED, "degraded" }, + { RAID_RESYNCING, "resyncing" }, + { RAID_OFFLINE, "offline" }, +}; + +static const char *raid_state_name(enum raid_state state) +{ + int i; + char *name = NULL; + + for (i = 0; i < sizeof(raid_states)/sizeof(raid_states[0]); i++) { + if (raid_states[i].value == state) { + name = raid_states[i].name; + break; + } + } + return name; +} + + +#define raid_attr_show_internal(attr, fmt, var, code) \ +static ssize_t raid_show_##attr(struct class_device *cdev, char *buf) \ +{ \ + struct raid_data *rd = class_get_devdata(cdev); \ + code \ + return snprintf(buf, 20, #fmt "\n", var); \ +} + +#define raid_attr_ro_states(attr, states, code) \ +raid_attr_show_internal(attr, %s, name, \ + const char *name; \ + code \ + name = raid_##states##_name(rd->attr); \ +) \ +static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL) + + +#define raid_attr_ro_internal(attr, code) \ +raid_attr_show_internal(attr, %d, rd->attr, code) \ +static CLASS_DEVICE_ATTR(attr, S_IRUGO, raid_show_##attr, NULL) + +#define ATTR_CODE(attr) \ + struct raid_internal *i = class_device_to_raid_internal(cdev); \ + if (i->f->get_##attr) \ + i->f->get_##attr(cdev->dev); + +#define raid_attr_ro(attr) raid_attr_ro_internal(attr, ) +#define raid_attr_ro_fn(attr) raid_attr_ro_internal(attr, ATTR_CODE(attr)) +#define raid_attr_ro_state(attr) raid_attr_ro_states(attr, attr, ATTR_CODE(attr)) + +raid_attr_ro(level); +raid_attr_ro_fn(resync); +raid_attr_ro_state(state); + +void raid_component_add(struct raid_template *r,struct device *raid_dev, + struct device *component_dev) +{ + struct class_device *cdev = + attribute_container_find_class_device(&r->raid_attrs.ac, + raid_dev); + struct raid_component *rc; + struct raid_data *rd = class_get_devdata(cdev); + char buf[40]; + + rc = kmalloc(sizeof(*rc), GFP_KERNEL); + if (!rc) + return; + + INIT_LIST_HEAD(&rc->node); + rc->dev = component_dev; + rc->num = rd->component_count++; + + snprintf(buf, sizeof(buf), "component-%d", rc->num); + list_add_tail(&rc->node, &rd->component_list); + sysfs_create_link(&cdev->kobj, &component_dev->kobj, buf); +} +EXPORT_SYMBOL(raid_component_add); + +struct raid_template * +raid_class_attach(struct raid_function_template *ft) +{ + struct raid_internal *i = kmalloc(sizeof(struct raid_internal), + GFP_KERNEL); + int count = 0; + + if (unlikely(!i)) + return NULL; + + memset(i, 0, sizeof(*i)); + + i->f = ft; + + i->r.raid_attrs.ac.class = &raid_class.class; + i->r.raid_attrs.ac.match = raid_match; + i->r.raid_attrs.ac.attrs = &i->attrs[0]; + + attribute_container_register(&i->r.raid_attrs.ac); + + i->attrs[count++] = &class_device_attr_level; + i->attrs[count++] = &class_device_attr_resync; + i->attrs[count++] = &class_device_attr_state; + + i->attrs[count] = NULL; + BUG_ON(count > RAID_NUM_ATTRS); + + return &i->r; +} +EXPORT_SYMBOL(raid_class_attach); + +void +raid_class_release(struct raid_template *r) +{ + struct raid_internal *i = to_raid_internal(r); + + attribute_container_unregister(&i->r.raid_attrs.ac); + + kfree(i); +} +EXPORT_SYMBOL(raid_class_release); + +static __init int raid_init(void) +{ + return transport_class_register(&raid_class); +} + +static __exit void raid_exit(void) +{ + transport_class_unregister(&raid_class); +} + +MODULE_AUTHOR("James Bottomley"); +MODULE_DESCRIPTION("RAID device class"); +MODULE_LICENSE("GPL"); + +module_init(raid_init); +module_exit(raid_exit); + diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h new file mode 100644 index 000000000000..a71123c28272 --- /dev/null +++ b/include/linux/raid_class.h @@ -0,0 +1,59 @@ +/* + */ +#include + +struct raid_template { + struct transport_container raid_attrs; +}; + +struct raid_function_template { + void *cookie; + int (*is_raid)(struct device *); + void (*get_resync)(struct device *); + void (*get_state)(struct device *); +}; + +enum raid_state { + RAID_ACTIVE = 1, + RAID_DEGRADED, + RAID_RESYNCING, + RAID_OFFLINE, +}; + +struct raid_data { + struct list_head component_list; + int component_count; + int level; + enum raid_state state; + int resync; +}; + +#define DEFINE_RAID_ATTRIBUTE(type, attr) \ +static inline void \ +raid_set_##attr(struct raid_template *r, struct device *dev, type value) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + rd->attr = value; \ +} \ +static inline type \ +raid_get_##attr(struct raid_template *r, struct device *dev) { \ + struct class_device *cdev = \ + attribute_container_find_class_device(&r->raid_attrs.ac, dev);\ + struct raid_data *rd; \ + BUG_ON(!cdev); \ + rd = class_get_devdata(cdev); \ + return rd->attr; \ +} + +DEFINE_RAID_ATTRIBUTE(int, level) +DEFINE_RAID_ATTRIBUTE(int, resync) +DEFINE_RAID_ATTRIBUTE(enum raid_state, state) + +struct raid_template *raid_class_attach(struct raid_function_template *); +void raid_class_release(struct raid_template *); + +void raid_component_add(struct raid_template *, struct device *, + struct device *); -- cgit v1.2.3 From 0f302dc35412dc67035efc188b9d5c40711b4222 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Wed, 31 Aug 2005 21:48:47 +0100 Subject: [ARM] 2866/1: add i.MX set_mctrl / get_mctrl functions Patch from Sascha Hauer This patch adds support for setting and getting RTS / CTS via set_mtctrl / get_mctrl functions. Signed-off-by: Sascha Hauer Signed-off-by: Russell King --- drivers/char/Kconfig | 4 +- drivers/misc/Kconfig | 2 +- drivers/net/Kconfig | 2 +- drivers/serial/8250.c | 79 -------------------------- drivers/serial/Kconfig | 2 +- drivers/serial/imx.c | 20 ++++++- drivers/serial/serial_core.c | 132 ------------------------------------------- include/linux/serial.h | 4 -- include/linux/serial_core.h | 2 - 9 files changed, 24 insertions(+), 223 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 7333b41d4224..42734d3bf3b9 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -896,8 +896,8 @@ source "drivers/char/pcmcia/Kconfig" config MWAVE tristate "ACP Modem (Mwave) support" - depends on X86 - select SERIAL_8250 + depends on X86 && BROKEN + select SERIAL_8250 # PLEASE DO NOT DO THIS - move this driver to drivers/serial ---help--- The ACP modem (Mwave) for Linux is a WinModem. It is composed of a kernel driver and a user level application. Together these components diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 7fc692a8f5b0..dea6589d1533 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -6,7 +6,7 @@ menu "Misc devices" config IBM_ASM tristate "Device driver for IBM RSA service processor" - depends on X86 && PCI && EXPERIMENTAL + depends on X86 && PCI && EXPERIMENTAL && BROKEN ---help--- This option enables device driver support for in-band access to the IBM RSA (Condor) service processor in eServer xSeries systems. diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index e0239a10d325..7d8bcb38797a 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -447,7 +447,7 @@ config NET_SB1250_MAC config SGI_IOC3_ETH bool "SGI IOC3 Ethernet" - depends on NET_ETHERNET && PCI && SGI_IP27 + depends on NET_ETHERNET && PCI && SGI_IP27 && BROKEN select CRC32 select MII help diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index cedb5f2f35cc..30a0a3d10145 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -2590,82 +2590,3 @@ module_param_array(probe_rsa, ulong, &probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); #endif MODULE_ALIAS_CHARDEV_MAJOR(TTY_MAJOR); - -/** - * register_serial - configure a 16x50 serial port at runtime - * @req: request structure - * - * Configure the serial port specified by the request. If the - * port exists and is in use an error is returned. If the port - * is not currently in the table it is added. - * - * The port is then probed and if necessary the IRQ is autodetected - * If this fails an error is returned. - * - * On success the port is ready to use and the line number is returned. - * - * Note: this function is deprecated - use serial8250_register_port - * instead. - */ -int register_serial(struct serial_struct *req) -{ - struct uart_port port; - - port.iobase = req->port; - port.membase = req->iomem_base; - port.irq = req->irq; - port.uartclk = req->baud_base * 16; - port.fifosize = req->xmit_fifo_size; - port.regshift = req->iomem_reg_shift; - port.iotype = req->io_type; - port.flags = req->flags | UPF_BOOT_AUTOCONF; - port.mapbase = req->iomap_base; - port.dev = NULL; - - if (share_irqs) - port.flags |= UPF_SHARE_IRQ; - - if (HIGH_BITS_OFFSET) - port.iobase |= (long) req->port_high << HIGH_BITS_OFFSET; - - /* - * If a clock rate wasn't specified by the low level driver, then - * default to the standard clock rate. This should be 115200 (*16) - * and should not depend on the architecture's BASE_BAUD definition. - * However, since this API will be deprecated, it's probably a - * better idea to convert the drivers to use the new API - * (serial8250_register_port and serial8250_unregister_port). - */ - if (port.uartclk == 0) { - printk(KERN_WARNING - "Serial: registering port at [%08x,%08lx,%p] irq %d with zero baud_base\n", - port.iobase, port.mapbase, port.membase, port.irq); - printk(KERN_WARNING "Serial: see %s:%d for more information\n", - __FILE__, __LINE__); - dump_stack(); - - /* - * Fix it up for now, but this is only a temporary measure. - */ - port.uartclk = BASE_BAUD * 16; - } - - return serial8250_register_port(&port); -} -EXPORT_SYMBOL(register_serial); - -/** - * unregister_serial - remove a 16x50 serial port at runtime - * @line: serial line number - * - * Remove one serial port. This may not be called from interrupt - * context. We hand the port back to our local PM control. - * - * Note: this function is deprecated - use serial8250_unregister_port - * instead. - */ -void unregister_serial(int line) -{ - serial8250_unregister_port(line); -} -EXPORT_SYMBOL(unregister_serial); diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig index d5797618a3b9..74b80f7c062d 100644 --- a/drivers/serial/Kconfig +++ b/drivers/serial/Kconfig @@ -830,7 +830,7 @@ config SERIAL_M32R_PLDSIO config SERIAL_TXX9 bool "TMPTX39XX/49XX SIO support" - depends HAS_TXX9_SERIAL + depends HAS_TXX9_SERIAL && BROKEN select SERIAL_CORE default y diff --git a/drivers/serial/imx.c b/drivers/serial/imx.c index 8861bcf84adf..4c985e6b3784 100644 --- a/drivers/serial/imx.c +++ b/drivers/serial/imx.c @@ -291,13 +291,31 @@ static unsigned int imx_tx_empty(struct uart_port *port) return USR2((u32)sport->port.membase) & USR2_TXDC ? TIOCSER_TEMT : 0; } +/* + * We have a modem side uart, so the meanings of RTS and CTS are inverted. + */ static unsigned int imx_get_mctrl(struct uart_port *port) { - return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; + struct imx_port *sport = (struct imx_port *)port; + unsigned int tmp = TIOCM_DSR | TIOCM_CAR; + + if (USR1((u32)sport->port.membase) & USR1_RTSS) + tmp |= TIOCM_CTS; + + if (UCR2((u32)sport->port.membase) & UCR2_CTS) + tmp |= TIOCM_RTS; + + return tmp; } static void imx_set_mctrl(struct uart_port *port, unsigned int mctrl) { + struct imx_port *sport = (struct imx_port *)port; + + if (mctrl & TIOCM_RTS) + UCR2((u32)sport->port.membase) |= UCR2_CTS; + else + UCR2((u32)sport->port.membase) &= ~UCR2_CTS; } /* diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c index ac3a0bf924db..dea156a62d0a 100644 --- a/drivers/serial/serial_core.c +++ b/drivers/serial/serial_core.c @@ -2289,143 +2289,11 @@ int uart_match_port(struct uart_port *port1, struct uart_port *port2) } EXPORT_SYMBOL(uart_match_port); -/* - * Try to find an unused uart_state slot for a port. - */ -static struct uart_state * -uart_find_match_or_unused(struct uart_driver *drv, struct uart_port *port) -{ - int i; - - /* - * First, find a port entry which matches. Note: if we do - * find a matching entry, and it has a non-zero use count, - * then we can't register the port. - */ - for (i = 0; i < drv->nr; i++) - if (uart_match_port(drv->state[i].port, port)) - return &drv->state[i]; - - /* - * We didn't find a matching entry, so look for the first - * free entry. We look for one which hasn't been previously - * used (indicated by zero iobase). - */ - for (i = 0; i < drv->nr; i++) - if (drv->state[i].port->type == PORT_UNKNOWN && - drv->state[i].port->iobase == 0 && - drv->state[i].count == 0) - return &drv->state[i]; - - /* - * That also failed. Last resort is to find any currently - * entry which doesn't have a real port associated with it. - */ - for (i = 0; i < drv->nr; i++) - if (drv->state[i].port->type == PORT_UNKNOWN && - drv->state[i].count == 0) - return &drv->state[i]; - - return NULL; -} - -/** - * uart_register_port: register uart settings with a port - * @drv: pointer to the uart low level driver structure for this port - * @port: uart port structure describing the port - * - * Register UART settings with the specified low level driver. Detect - * the type of the port if UPF_BOOT_AUTOCONF is set, and detect the - * IRQ if UPF_AUTO_IRQ is set. - * - * We try to pick the same port for the same IO base address, so that - * when a modem is plugged in, unplugged and plugged back in, it gets - * allocated the same port. - * - * Returns negative error, or positive line number. - */ -int uart_register_port(struct uart_driver *drv, struct uart_port *port) -{ - struct uart_state *state; - int ret; - - down(&port_sem); - - state = uart_find_match_or_unused(drv, port); - - if (state) { - /* - * Ok, we've found a line that we can use. - * - * If we find a port that matches this one, and it appears - * to be in-use (even if it doesn't have a type) we shouldn't - * alter it underneath itself - the port may be open and - * trying to do useful work. - */ - if (uart_users(state) != 0) { - ret = -EBUSY; - goto out; - } - - /* - * If the port is already initialised, don't touch it. - */ - if (state->port->type == PORT_UNKNOWN) { - state->port->iobase = port->iobase; - state->port->membase = port->membase; - state->port->irq = port->irq; - state->port->uartclk = port->uartclk; - state->port->fifosize = port->fifosize; - state->port->regshift = port->regshift; - state->port->iotype = port->iotype; - state->port->flags = port->flags; - state->port->line = state - drv->state; - state->port->mapbase = port->mapbase; - - uart_configure_port(drv, state, state->port); - } - - ret = state->port->line; - } else - ret = -ENOSPC; - out: - up(&port_sem); - return ret; -} - -/** - * uart_unregister_port - de-allocate a port - * @drv: pointer to the uart low level driver structure for this port - * @line: line index previously returned from uart_register_port() - * - * Hang up the specified line associated with the low level driver, - * and mark the port as unused. - */ -void uart_unregister_port(struct uart_driver *drv, int line) -{ - struct uart_state *state; - - if (line < 0 || line >= drv->nr) { - printk(KERN_ERR "Attempt to unregister "); - printk("%s%d", drv->dev_name, line); - printk("\n"); - return; - } - - state = drv->state + line; - - down(&port_sem); - uart_unconfigure_port(drv, state); - up(&port_sem); -} - EXPORT_SYMBOL(uart_write_wakeup); EXPORT_SYMBOL(uart_register_driver); EXPORT_SYMBOL(uart_unregister_driver); EXPORT_SYMBOL(uart_suspend_port); EXPORT_SYMBOL(uart_resume_port); -EXPORT_SYMBOL(uart_register_port); -EXPORT_SYMBOL(uart_unregister_port); EXPORT_SYMBOL(uart_add_one_port); EXPORT_SYMBOL(uart_remove_one_port); diff --git a/include/linux/serial.h b/include/linux/serial.h index 9f2d85284d0b..12cd9cf65e8f 100644 --- a/include/linux/serial.h +++ b/include/linux/serial.h @@ -176,10 +176,6 @@ struct serial_icounter_struct { #ifdef __KERNEL__ #include -/* Export to allow PCMCIA to use this - Dave Hinds */ -extern int __deprecated register_serial(struct serial_struct *req); -extern void __deprecated unregister_serial(int line); - /* Allow architectures to override entries in serial8250_ports[] at run time: */ struct uart_port; /* forward declaration */ extern int early_serial_setup(struct uart_port *port); diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 1c53302db041..cf0f64ea2bc0 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -360,8 +360,6 @@ struct tty_driver *uart_console_device(struct console *co, int *index); */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); -void __deprecated uart_unregister_port(struct uart_driver *reg, int line); -int __deprecated uart_register_port(struct uart_driver *reg, struct uart_port *port); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); int uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_match_port(struct uart_port *port1, struct uart_port *port2); -- cgit v1.2.3 From bc49a661e6e82bfa8219c3d0a2e4dea51c847d23 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 1 Sep 2005 15:56:26 +0100 Subject: [SERIAL] Move serial8250_*_port prototypes to linux/serial_8250.h Signed-off-by: Russell King --- drivers/serial/8250.h | 6 +----- include/linux/serial_8250.h | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/serial/8250.h b/drivers/serial/8250.h index 9225c82faeb8..b1b459efda52 100644 --- a/drivers/serial/8250.h +++ b/drivers/serial/8250.h @@ -16,11 +16,7 @@ */ #include - -int serial8250_register_port(struct uart_port *); -void serial8250_unregister_port(int line); -void serial8250_suspend_port(int line); -void serial8250_resume_port(int line); +#include struct old_serial_port { unsigned int uart; diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index 3e3c1fa35b06..d8a023d804d4 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -14,6 +14,9 @@ #include #include +/* + * This is the platform device platform_data structure + */ struct plat_serial8250_port { unsigned long iobase; /* io base address */ void __iomem *membase; /* ioremap cookie or NULL */ @@ -26,4 +29,17 @@ struct plat_serial8250_port { unsigned int flags; /* UPF_* flags */ }; +/* + * This should be used by drivers which want to register + * their own 8250 ports without registering their own + * platform device. Using these will make your driver + * dependent on the 8250 driver. + */ +struct uart_port; + +int serial8250_register_port(struct uart_port *); +void serial8250_unregister_port(int line); +void serial8250_suspend_port(int line); +void serial8250_resume_port(int line); + #endif -- cgit v1.2.3 From ff4cc3ac93e1d0369928fd60ec1fe82417afc576 Mon Sep 17 00:00:00 2001 From: Mike Kershaw Date: Thu, 1 Sep 2005 17:40:05 -0700 Subject: [TUNTAP]: Allow setting the linktype of the tap device from userspace Currently tun/tap only supports the EN10MB ARP type. For use with wireless and other networking types it should be possible to set the ARP type via an ioctl. Patch v2: Included check that the tap interface is down before changing the link type out from underneath it Signed-off-by: Mike Kershaw Signed-off-by: David S. Miller --- drivers/net/tun.c | 15 +++++++++++++++ include/linux/if_tun.h | 1 + 2 files changed, 16 insertions(+) (limited to 'include/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index effab0b9adca..50b8c6754b1e 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -18,6 +18,9 @@ /* * Changes: * + * Mike Kershaw 2005/08/14 + * Add TUNSETLINK ioctl to set the link encapsulation + * * Mark Smith * Use random_ether_addr() for tap MAC address. * @@ -612,6 +615,18 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, DBG(KERN_INFO "%s: owner set to %d\n", tun->dev->name, tun->owner); break; + case TUNSETLINK: + /* Only allow setting the type when the interface is down */ + if (tun->dev->flags & IFF_UP) { + DBG(KERN_INFO "%s: Linktype set failed because interface is up\n", + tun->dev->name); + return -EBUSY; + } else { + tun->dev->type = (int) arg; + DBG(KERN_INFO "%s: linktype set to %d\n", tun->dev->name, tun->dev->type); + } + break; + #ifdef TUN_DEBUG case TUNSETDEBUG: tun->debug = arg; diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index 096a85a58ae5..88aef7b86ef4 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -77,6 +77,7 @@ struct tun_struct { #define TUNSETIFF _IOW('T', 202, int) #define TUNSETPERSIST _IOW('T', 203, int) #define TUNSETOWNER _IOW('T', 204, int) +#define TUNSETLINK _IOW('T', 205, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 -- cgit v1.2.3 From 64baf3cfea974d2b9e671ccfdbc03e030ea5ebc6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 1 Sep 2005 17:43:05 -0700 Subject: [CRYPTO]: Added CRYPTO_TFM_REQ_MAY_SLEEP flag The crypto layer currently uses in_atomic() to determine whether it is allowed to sleep. This is incorrect since spin locks don't always cause in_atomic() to return true. Instead of that, this patch returns to an earlier idea of a per-tfm flag which determines whether sleeping is allowed. Unlike the earlier version, the default is to not allow sleeping. This ensures that no existing code can break. As usual, this flag may either be set through crypto_alloc_tfm(), or just before a specific crypto operation. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 3 ++- crypto/cipher.c | 4 ---- crypto/internal.h | 3 ++- include/linux/crypto.h | 1 + 4 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/crypto/api.c b/crypto/api.c index b4728811ce3b..959c4e5f264f 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -66,7 +66,8 @@ static inline struct crypto_alg *crypto_alg_mod_lookup(const char *name) static int crypto_init_flags(struct crypto_tfm *tfm, u32 flags) { - tfm->crt_flags = 0; + tfm->crt_flags = flags & CRYPTO_TFM_REQ_MASK; + flags &= ~CRYPTO_TFM_REQ_MASK; switch (crypto_tfm_alg_type(tfm)) { case CRYPTO_ALG_TYPE_CIPHER: diff --git a/crypto/cipher.c b/crypto/cipher.c index 8da644364cb4..3df47f93c9db 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -377,11 +377,7 @@ static int nocrypt_iv(struct crypto_tfm *tfm, int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags) { u32 mode = flags & CRYPTO_TFM_MODE_MASK; - tfm->crt_cipher.cit_mode = mode ? mode : CRYPTO_TFM_MODE_ECB; - if (flags & CRYPTO_TFM_REQ_WEAK_KEY) - tfm->crt_flags = CRYPTO_TFM_REQ_WEAK_KEY; - return 0; } diff --git a/crypto/internal.h b/crypto/internal.h index 37515beafc8c..37aa652ce5ce 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -17,6 +17,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -38,7 +39,7 @@ static inline void crypto_kunmap(void *vaddr, int out) static inline void crypto_yield(struct crypto_tfm *tfm) { - if (!in_atomic()) + if (tfm->crt_flags & CRYPTO_TFM_REQ_MAY_SLEEP) cond_resched(); } diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 5e2bcc636a02..3c89df6e7768 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -45,6 +45,7 @@ #define CRYPTO_TFM_MODE_CTR 0x00000008 #define CRYPTO_TFM_REQ_WEAK_KEY 0x00000100 +#define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 #define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 #define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 #define CRYPTO_TFM_RES_BAD_KEY_SCHED 0x00400000 -- cgit v1.2.3 From d51fe1be3f738e7d73f63bb5aa0df88bafb41a21 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Fri, 2 Sep 2005 08:59:25 +0200 Subject: [PATCH] remove driverfs references from include/linux/cpu.h and net/sunrpc/rpc_pipe.c This patch is against 2.6.10, but still applies cleanly. It's just s/driverfs/sysfs/ in these two files. Signed-off-by: Rolf Eike Beer Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e8904c0da686..86980c68234a 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -8,7 +8,7 @@ * Basic handling of the devices is done in drivers/base/cpu.c * and system devices are handled in drivers/base/sys.c. * - * CPUs are exported via driverfs in the class/cpu/devices/ + * CPUs are exported via sysfs in the class/cpu/devices/ * directory. * * Per-cpu interfaces can be implemented using a struct device_interface. diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index fe1a73ce6cff..ded6c63f11ec 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -3,7 +3,7 @@ * * Userland/kernel interface for rpcauth_gss. * Code shamelessly plagiarized from fs/nfsd/nfsctl.c - * and fs/driverfs/inode.c + * and fs/sysfs/inode.c * * Copyright (c) 2002, Trond Myklebust * -- cgit v1.2.3 From 865e9f13c94891daed4f6a5f69c5d6ec04d4932f Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Sat, 3 Sep 2005 16:45:02 +0100 Subject: [MMC] ios for mmc chip select Adds a new ios for setting the chip select pin on MMC cards. Needed on SD controllers which use this pin for other things and therefore cannot have it pulled high at all times. Signed-off-by: Pierre Ossman Signed-off-by: Russell King --- drivers/mmc/mmc.c | 12 ++++++++++++ include/linux/mmc/host.h | 6 ++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 3c5904834fe8..0a8165974ba7 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -457,6 +457,11 @@ static void mmc_idle_cards(struct mmc_host *host) { struct mmc_command cmd; + host->ios.chip_select = MMC_CS_HIGH; + host->ops->set_ios(host, &host->ios); + + mmc_delay(1); + cmd.opcode = MMC_GO_IDLE_STATE; cmd.arg = 0; cmd.flags = MMC_RSP_NONE; @@ -464,6 +469,11 @@ static void mmc_idle_cards(struct mmc_host *host) mmc_wait_for_cmd(host, &cmd, 0); mmc_delay(1); + + host->ios.chip_select = MMC_CS_DONTCARE; + host->ops->set_ios(host, &host->ios); + + mmc_delay(1); } /* @@ -475,6 +485,7 @@ static void mmc_power_up(struct mmc_host *host) host->ios.vdd = bit; host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; + host->ios.chip_select = MMC_CS_DONTCARE; host->ios.power_mode = MMC_POWER_UP; host->ops->set_ios(host, &host->ios); @@ -492,6 +503,7 @@ static void mmc_power_off(struct mmc_host *host) host->ios.clock = 0; host->ios.vdd = 0; host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; + host->ios.chip_select = MMC_CS_DONTCARE; host->ios.power_mode = MMC_POWER_OFF; host->ops->set_ios(host, &host->ios); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9a0893f3249e..30f68c0c8c6e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -46,6 +46,12 @@ struct mmc_ios { #define MMC_BUSMODE_OPENDRAIN 1 #define MMC_BUSMODE_PUSHPULL 2 + unsigned char chip_select; /* SPI chip select */ + +#define MMC_CS_DONTCARE 0 +#define MMC_CS_HIGH 1 +#define MMC_CS_LOW 2 + unsigned char power_mode; /* power supply mode */ #define MMC_POWER_OFF 0 -- cgit v1.2.3 From 8a409b0118c2d78f84f740f60fe03abda1fe3333 Mon Sep 17 00:00:00 2001 From: Vojtech Pavlik Date: Mon, 5 Sep 2005 00:08:08 -0500 Subject: Input: HID - add more consumer usages Extend mapping of the consumer usage page in hid-input.c to handle more cases appearing on new USB keyboards. Signed-off-by: Vojtech Pavlik Signed-off-by: Dmitry Torokhov --- drivers/usb/input/hid-debug.h | 17 +++++++++++------ drivers/usb/input/hid-input.c | 19 ++++++++++++++++--- drivers/usb/input/hid.h | 1 + include/linux/input.h | 8 ++++++++ 4 files changed, 36 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/input/hid-debug.h b/drivers/usb/input/hid-debug.h index 789df807b113..ceebab99eff2 100644 --- a/drivers/usb/input/hid-debug.h +++ b/drivers/usb/input/hid-debug.h @@ -109,6 +109,7 @@ static const struct hid_usage_entry hid_usage_table[] = { {0, 0x03, "ScrollLock"}, {0, 0x04, "Compose"}, {0, 0x05, "Kana"}, + {0, 0x4b, "GenericIndicator"}, { 9, 0, "Button" }, { 10, 0, "Ordinal" }, { 12, 0, "Consumer" }, @@ -591,7 +592,8 @@ static char *keys[KEY_MAX + 1] = { [KEY_EXIT] = "Exit", [KEY_MOVE] = "Move", [KEY_EDIT] = "Edit", [KEY_SCROLLUP] = "ScrollUp", [KEY_SCROLLDOWN] = "ScrollDown", [KEY_KPLEFTPAREN] = "KPLeftParenthesis", - [KEY_KPRIGHTPAREN] = "KPRightParenthesis", [KEY_F13] = "F13", + [KEY_KPRIGHTPAREN] = "KPRightParenthesis", [KEY_NEW] = "New", + [KEY_REDO] = "Redo", [KEY_F13] = "F13", [KEY_F14] = "F14", [KEY_F15] = "F15", [KEY_F16] = "F16", [KEY_F17] = "F17", [KEY_F18] = "F18", [KEY_F19] = "F19", @@ -601,15 +603,15 @@ static char *keys[KEY_MAX + 1] = { [KEY_PAUSECD] = "PauseCD", [KEY_PROG3] = "Prog3", [KEY_PROG4] = "Prog4", [KEY_SUSPEND] = "Suspend", [KEY_CLOSE] = "Close", [KEY_PLAY] = "Play", - [KEY_FASTFORWARD] = "Fast Forward", [KEY_BASSBOOST] = "Bass Boost", + [KEY_FASTFORWARD] = "FastForward", [KEY_BASSBOOST] = "BassBoost", [KEY_PRINT] = "Print", [KEY_HP] = "HP", [KEY_CAMERA] = "Camera", [KEY_SOUND] = "Sound", [KEY_QUESTION] = "Question", [KEY_EMAIL] = "Email", [KEY_CHAT] = "Chat", [KEY_SEARCH] = "Search", [KEY_CONNECT] = "Connect", [KEY_FINANCE] = "Finance", [KEY_SPORT] = "Sport", [KEY_SHOP] = "Shop", - [KEY_ALTERASE] = "Alternate Erase", [KEY_CANCEL] = "Cancel", - [KEY_BRIGHTNESSDOWN] = "Brightness down", [KEY_BRIGHTNESSUP] = "Brightness up", + [KEY_ALTERASE] = "AlternateErase", [KEY_CANCEL] = "Cancel", + [KEY_BRIGHTNESSDOWN] = "BrightnessDown", [KEY_BRIGHTNESSUP] = "BrightnessUp", [KEY_MEDIA] = "Media", [KEY_UNKNOWN] = "Unknown", [BTN_0] = "Btn0", [BTN_1] = "Btn1", [BTN_2] = "Btn2", [BTN_3] = "Btn3", @@ -639,8 +641,8 @@ static char *keys[KEY_MAX + 1] = { [BTN_TOOL_AIRBRUSH] = "ToolAirbrush", [BTN_TOOL_FINGER] = "ToolFinger", [BTN_TOOL_MOUSE] = "ToolMouse", [BTN_TOOL_LENS] = "ToolLens", [BTN_TOUCH] = "Touch", [BTN_STYLUS] = "Stylus", - [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "Tool Doubletap", - [BTN_TOOL_TRIPLETAP] = "Tool Tripletap", [BTN_GEAR_DOWN] = "WheelBtn", + [BTN_STYLUS2] = "Stylus2", [BTN_TOOL_DOUBLETAP] = "ToolDoubleTap", + [BTN_TOOL_TRIPLETAP] = "ToolTripleTap", [BTN_GEAR_DOWN] = "WheelBtn", [BTN_GEAR_UP] = "Gear up", [KEY_OK] = "Ok", [KEY_SELECT] = "Select", [KEY_GOTO] = "Goto", [KEY_CLEAR] = "Clear", [KEY_POWER2] = "Power2", @@ -676,6 +678,9 @@ static char *keys[KEY_MAX + 1] = { [KEY_TWEN] = "TWEN", [KEY_DEL_EOL] = "DeleteEOL", [KEY_DEL_EOS] = "DeleteEOS", [KEY_INS_LINE] = "InsertLine", [KEY_DEL_LINE] = "DeleteLine", + [KEY_SEND] = "Send", [KEY_REPLY] = "Reply", + [KEY_FORWARDMAIL] = "ForwardMail", [KEY_SAVE] = "Save", + [KEY_DOCUMENTS] = "Documents", }; static char *relatives[REL_MAX + 1] = { diff --git a/drivers/usb/input/hid-input.c b/drivers/usb/input/hid-input.c index fa4f79d88aa7..b28cf8593b48 100644 --- a/drivers/usb/input/hid-input.c +++ b/drivers/usb/input/hid-input.c @@ -78,8 +78,8 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel { struct input_dev *input = &hidinput->input; struct hid_device *device = hidinput->input.private; - int max, code; - unsigned long *bit; + int max = 0, code; + unsigned long *bit = NULL; field->hidinput = hidinput; @@ -248,7 +248,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x034: map_key_clear(KEY_SLEEP); break; case 0x036: map_key_clear(BTN_MISC); break; case 0x08a: map_key_clear(KEY_WWW); break; + case 0x08d: map_key_clear(KEY_PROGRAM); break; case 0x095: map_key_clear(KEY_HELP); break; + case 0x09c: map_key_clear(KEY_CHANNELUP); break; + case 0x09d: map_key_clear(KEY_CHANNELDOWN); break; case 0x0b0: map_key_clear(KEY_PLAY); break; case 0x0b1: map_key_clear(KEY_PAUSE); break; case 0x0b2: map_key_clear(KEY_RECORD); break; @@ -268,6 +271,11 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x18a: map_key_clear(KEY_MAIL); break; case 0x192: map_key_clear(KEY_CALC); break; case 0x194: map_key_clear(KEY_FILE); break; + case 0x1a7: map_key_clear(KEY_DOCUMENTS); break; + case 0x201: map_key_clear(KEY_NEW); break; + case 0x207: map_key_clear(KEY_SAVE); break; + case 0x208: map_key_clear(KEY_PRINT); break; + case 0x209: map_key_clear(KEY_PROPS); break; case 0x21a: map_key_clear(KEY_UNDO); break; case 0x21b: map_key_clear(KEY_COPY); break; case 0x21c: map_key_clear(KEY_CUT); break; @@ -280,7 +288,11 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case 0x227: map_key_clear(KEY_REFRESH); break; case 0x22a: map_key_clear(KEY_BOOKMARKS); break; case 0x238: map_rel(REL_HWHEEL); break; - default: goto unknown; + case 0x279: map_key_clear(KEY_REDO); break; + case 0x289: map_key_clear(KEY_REPLY); break; + case 0x28b: map_key_clear(KEY_FORWARDMAIL); break; + case 0x28c: map_key_clear(KEY_SEND); break; + default: goto ignore; } break; @@ -306,6 +318,7 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel case HID_UP_MSVENDOR: case HID_UP_LOGIVENDOR: + case HID_UP_LOGIVENDOR2: goto ignore; diff --git a/drivers/usb/input/hid.h b/drivers/usb/input/hid.h index cea5cf34b5f9..ca3e170ce0b3 100644 --- a/drivers/usb/input/hid.h +++ b/drivers/usb/input/hid.h @@ -184,6 +184,7 @@ struct hid_item { #define HID_UP_HPVENDOR 0xff7f0000 #define HID_UP_MSVENDOR 0xff000000 #define HID_UP_LOGIVENDOR 0x00ff0000 +#define HID_UP_LOGIVENDOR2 0xffbc0000 #define HID_USAGE 0x0000ffff diff --git a/include/linux/input.h b/include/linux/input.h index bdc53c6cc962..227a497c0678 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -287,6 +287,8 @@ struct input_absinfo { #define KEY_SCROLLDOWN 178 #define KEY_KPLEFTPAREN 179 #define KEY_KPRIGHTPAREN 180 +#define KEY_NEW 181 +#define KEY_REDO 182 #define KEY_F13 183 #define KEY_F14 184 @@ -333,6 +335,12 @@ struct input_absinfo { #define KEY_KBDILLUMDOWN 229 #define KEY_KBDILLUMUP 230 +#define KEY_SEND 231 +#define KEY_REPLY 232 +#define KEY_FORWARDMAIL 233 +#define KEY_SAVE 234 +#define KEY_DOCUMENTS 235 + #define KEY_UNKNOWN 240 #define BTN_MISC 0x100 -- cgit v1.2.3 From 802f192e4a600f7ef84ca25c8b818c8830acef5a Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sat, 3 Sep 2005 15:54:26 -0700 Subject: [PATCH] SPARSEMEM EXTREME A new option for SPARSEMEM is ARCH_SPARSEMEM_EXTREME. Architecture platforms with a very sparse physical address space would likely want to select this option. For those architecture platforms that don't select the option, the code generated is equivalent to SPARSEMEM currently in -mm. I'll be posting a patch on ia64 ml which uses this new SPARSEMEM feature. ARCH_SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM -mm implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. ARCH_SPARSEMEM_EXTREME depends on 64BIT and is by default boolean false. I've boot tested under aim load ia64 configured for ARCH_SPARSEMEM_EXTREME. I've also boot tested a 4 way Opteron machine with !ARCH_SPARSEMEM_EXTREME and tested with aim. Signed-off-by: Andy Whitcroft Signed-off-by: Bob Picco Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/mm/init.c | 27 +++++++++------------------ arch/ppc64/mm/numa.c | 43 ++++++++++++++++++++++++++++++++++++++++--- include/asm-ppc64/lmb.h | 22 ++++++++++++++++++++++ include/linux/mmzone.h | 30 ++++++++++++++++++++++++++++-- mm/Kconfig | 9 +++++++++ mm/sparse.c | 38 ++++++++++++++++++++++++++++++++------ 6 files changed, 140 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index c02dc9809ca5..b3b1e9c1770a 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -552,27 +552,18 @@ void __init do_init_bootmem(void) /* Add all physical memory to the bootmem map, mark each area * present. */ - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base, size; - unsigned long start_pfn, end_pfn; - - base = lmb.memory.region[i].base; - size = lmb.memory.region[i].size; - - start_pfn = base >> PAGE_SHIFT; - end_pfn = start_pfn + (size >> PAGE_SHIFT); - memory_present(0, start_pfn, end_pfn); - - free_bootmem(base, size); - } + for (i=0; i < lmb.memory.cnt; i++) + free_bootmem(lmb_start_pfn(&lmb.memory, i), + lmb_size_bytes(&lmb.memory, i)); /* reserve the sections we're already using */ - for (i=0; i < lmb.reserved.cnt; i++) { - unsigned long base = lmb.reserved.region[i].base; - unsigned long size = lmb.reserved.region[i].size; + for (i=0; i < lmb.reserved.cnt; i++) + reserve_bootmem(lmb_start_pfn(&lmb.reserved, i), + lmb_size_bytes(&lmb.reserved, i)); - reserve_bootmem(base, size); - } + for (i=0; i < lmb.memory.cnt; i++) + memory_present(0, lmb_start_pfn(&lmb.memory, i), + lmb_end_pfn(&lmb.memory, i)); } /* diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index c3116f0d788c..cb864b8f2750 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -440,8 +440,6 @@ new_range: for (i = start ; i < (start+size); i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = numa_domain; - memory_present(numa_domain, start >> PAGE_SHIFT, - (start + size) >> PAGE_SHIFT); if (--ranges) goto new_range; @@ -483,7 +481,6 @@ static void __init setup_nonnuma(void) for (i = 0 ; i < top_of_ram; i += MEMORY_INCREMENT) numa_memory_lookup_table[i >> MEMORY_INCREMENT_SHIFT] = 0; - memory_present(0, 0, init_node_data[0].node_end_pfn); } static void __init dump_numa_topology(void) @@ -695,6 +692,46 @@ new_range: size); } } + /* + * This loop may look famaliar, but we have to do it again + * after marking our reserved memory to mark memory present + * for sparsemem. + */ + addr_cells = get_mem_addr_cells(); + size_cells = get_mem_size_cells(); + memory = NULL; + while ((memory = of_find_node_by_type(memory, "memory")) != NULL) { + unsigned long mem_start, mem_size; + int numa_domain, ranges; + unsigned int *memcell_buf; + unsigned int len; + + memcell_buf = (unsigned int *)get_property(memory, "reg", &len); + if (!memcell_buf || len <= 0) + continue; + + ranges = memory->n_addrs; /* ranges in cell */ +new_range2: + mem_start = read_n_cells(addr_cells, &memcell_buf); + mem_size = read_n_cells(size_cells, &memcell_buf); + if (numa_enabled) { + numa_domain = of_node_numa_domain(memory); + if (numa_domain >= MAX_NUMNODES) + numa_domain = 0; + } else + numa_domain = 0; + + if (numa_domain != nid) + continue; + + mem_size = numa_enforce_memory_limit(mem_start, mem_size); + memory_present(numa_domain, mem_start >> PAGE_SHIFT, + (mem_start + mem_size) >> PAGE_SHIFT); + + if (--ranges) /* process all ranges in cell */ + goto new_range2; + } + } } diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index cb368bf0f264..de91e034bd98 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h @@ -56,4 +56,26 @@ extern void lmb_dump_all(void); extern unsigned long io_hole_start; +static inline unsigned long +lmb_size_bytes(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].size; +} +static inline unsigned long +lmb_size_pages(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_size_bytes(type, region_nr) >> PAGE_SHIFT; +} +static inline unsigned long +lmb_start_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return type->region[region_nr].base >> PAGE_SHIFT; +} +static inline unsigned long +lmb_end_pfn(struct lmb_region *type, unsigned long region_nr) +{ + return lmb_start_pfn(type, region_nr) + + lmb_size_pages(type, region_nr); +} + #endif /* _PPC64_LMB_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6c90461ed99f..b97054bbc394 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -487,6 +487,28 @@ struct mem_section { unsigned long section_mem_map; }; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +/* + * Should we ever require GCC 4 or later then the flat array scheme + * can be eliminated and a uniform solution for EXTREME and !EXTREME can + * be arrived at. + */ +#define SECTION_ROOT_SHIFT (PAGE_SHIFT-3) +#define SECTION_ROOT_MASK ((1UL<> SECTION_ROOT_SHIFT) +#define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT) + +extern struct mem_section *mem_section[NR_SECTION_ROOTS]; + +static inline struct mem_section *__nr_to_section(unsigned long nr) +{ + if (!mem_section[SECTION_TO_ROOT(nr)]) + return NULL; + return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; +} + +#else + extern struct mem_section mem_section[NR_MEM_SECTIONS]; static inline struct mem_section *__nr_to_section(unsigned long nr) @@ -494,6 +516,10 @@ static inline struct mem_section *__nr_to_section(unsigned long nr) return &mem_section[nr]; } +#define sparse_index_init(_sec, _nid) do {} while (0) + +#endif + /* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least @@ -513,12 +539,12 @@ static inline struct page *__section_mem_map_addr(struct mem_section *section) static inline int valid_section(struct mem_section *section) { - return (section->section_mem_map & SECTION_MARKED_PRESENT); + return (section && (section->section_mem_map & SECTION_MARKED_PRESENT)); } static inline int section_has_mem_map(struct mem_section *section) { - return (section->section_mem_map & SECTION_HAS_MEM_MAP); + return (section && (section->section_mem_map & SECTION_HAS_MEM_MAP)); } static inline int valid_section_nr(unsigned long nr) diff --git a/mm/Kconfig b/mm/Kconfig index cd379936cac6..fc644c5c065d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -89,3 +89,12 @@ config NEED_MULTIPLE_NODES config HAVE_MEMORY_PRESENT def_bool y depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM + +# +# Architectecture platforms which require a two level mem_section in SPARSEMEM +# must select this option. This is usually for architecture platforms with +# an extremely sparse physical address space. +# +config ARCH_SPARSEMEM_EXTREME + def_bool n + depends on SPARSEMEM && 64BIT diff --git a/mm/sparse.c b/mm/sparse.c index b54e304df4a7..b2b456bf0a5d 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -13,7 +13,26 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -struct mem_section mem_section[NR_MEM_SECTIONS]; +#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +struct mem_section *mem_section[NR_SECTION_ROOTS] + ____cacheline_maxaligned_in_smp; + +static void sparse_index_init(unsigned long section, int nid) +{ + unsigned long root = SECTION_TO_ROOT(section); + + if (mem_section[root]) + return; + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + if (mem_section[root]) + memset(mem_section[root], 0, PAGE_SIZE); + else + panic("memory_present: NO MEMORY\n"); +} +#else +struct mem_section mem_section[NR_MEM_SECTIONS] + ____cacheline_maxaligned_in_smp; +#endif EXPORT_SYMBOL(mem_section); /* Record a memory area against a node. */ @@ -24,8 +43,13 @@ void memory_present(int nid, unsigned long start, unsigned long end) start &= PAGE_SECTION_MASK; for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { unsigned long section = pfn_to_section_nr(pfn); - if (!mem_section[section].section_mem_map) - mem_section[section].section_mem_map = SECTION_MARKED_PRESENT; + struct mem_section *ms; + + sparse_index_init(section, nid); + + ms = __nr_to_section(section); + if (!ms->section_mem_map) + ms->section_mem_map = SECTION_MARKED_PRESENT; } } @@ -85,6 +109,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) { struct page *map; int nid = early_pfn_to_nid(section_nr_to_pfn(pnum)); + struct mem_section *ms = __nr_to_section(pnum); map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); if (map) @@ -96,7 +121,7 @@ static struct page *sparse_early_mem_map_alloc(unsigned long pnum) return map; printk(KERN_WARNING "%s: allocation failed\n", __FUNCTION__); - mem_section[pnum].section_mem_map = 0; + ms->section_mem_map = 0; return NULL; } @@ -114,8 +139,9 @@ void sparse_init(void) continue; map = sparse_early_mem_map_alloc(pnum); - if (map) - sparse_init_one_section(&mem_section[pnum], pnum, map); + if (!map) + continue; + sparse_init_one_section(__nr_to_section(pnum), pnum, map); } } -- cgit v1.2.3 From 3e347261a80b57df792ab9464b5f0ed59add53a8 Mon Sep 17 00:00:00 2001 From: Bob Picco Date: Sat, 3 Sep 2005 15:54:28 -0700 Subject: [PATCH] sparsemem extreme implementation With cleanups from Dave Hansen SPARSEMEM_EXTREME makes mem_section a one dimensional array of pointers to mem_sections. This two level layout scheme is able to achieve smaller memory requirements for SPARSEMEM with the tradeoff of an additional shift and load when fetching the memory section. The current SPARSEMEM implementation is a one dimensional array of mem_sections which is the default SPARSEMEM configuration. The patch attempts isolates the implementation details of the physical layout of the sparsemem section array. SPARSEMEM_EXTREME requires bootmem to be functioning at the time of memory_present() calls. This is not always feasible, so architectures which do not need it may allocate everything statically by using SPARSEMEM_STATIC. Signed-off-by: Andy Whitcroft Signed-off-by: Bob Picco Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 1 + include/linux/mmzone.h | 40 +++++++++++++++------------------------- mm/Kconfig | 19 ++++++++++++++++--- mm/sparse.c | 26 +++++++++++++++++--------- 4 files changed, 49 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 619d843ba231..dcb0ad098c60 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -754,6 +754,7 @@ config NUMA depends on SMP && HIGHMEM64G && (X86_NUMAQ || X86_GENERICARCH || (X86_SUMMIT && ACPI)) default n if X86_PC default y if (X86_NUMAQ || X86_SUMMIT) + select SPARSEMEM_STATIC # Need comments to help the hapless user trying to turn on NUMA support comment "NUMA (NUMA-Q) requires SMP, 64GB highmem support" diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b97054bbc394..79cf578e21b9 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -487,39 +487,29 @@ struct mem_section { unsigned long section_mem_map; }; -#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME -/* - * Should we ever require GCC 4 or later then the flat array scheme - * can be eliminated and a uniform solution for EXTREME and !EXTREME can - * be arrived at. - */ -#define SECTION_ROOT_SHIFT (PAGE_SHIFT-3) -#define SECTION_ROOT_MASK ((1UL<> SECTION_ROOT_SHIFT) -#define NR_SECTION_ROOTS (NR_MEM_SECTIONS >> SECTION_ROOT_SHIFT) +#ifdef CONFIG_SPARSEMEM_EXTREME +#define SECTIONS_PER_ROOT (PAGE_SIZE / sizeof (struct mem_section)) +#else +#define SECTIONS_PER_ROOT 1 +#endif -extern struct mem_section *mem_section[NR_SECTION_ROOTS]; - -static inline struct mem_section *__nr_to_section(unsigned long nr) -{ - if (!mem_section[SECTION_TO_ROOT(nr)]) - return NULL; - return &mem_section[SECTION_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; -} +#define SECTION_NR_TO_ROOT(sec) ((sec) / SECTIONS_PER_ROOT) +#define NR_SECTION_ROOTS (NR_MEM_SECTIONS / SECTIONS_PER_ROOT) +#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) +#ifdef CONFIG_SPARSEMEM_EXTREME +extern struct mem_section *mem_section[NR_SECTION_ROOTS]; #else - -extern struct mem_section mem_section[NR_MEM_SECTIONS]; +extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; +#endif static inline struct mem_section *__nr_to_section(unsigned long nr) { - return &mem_section[nr]; + if (!mem_section[SECTION_NR_TO_ROOT(nr)]) + return NULL; + return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; } -#define sparse_index_init(_sec, _nid) do {} while (0) - -#endif - /* * We use the lower bits of the mem_map pointer to store * a little bit of information. There should be at least diff --git a/mm/Kconfig b/mm/Kconfig index fc644c5c065d..4e9937ac3529 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -90,11 +90,24 @@ config HAVE_MEMORY_PRESENT def_bool y depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM +# +# SPARSEMEM_EXTREME (which is the default) does some bootmem +# allocations when memory_present() is called. If this can not +# be done on your architecture, select this option. However, +# statically allocating the mem_section[] array can potentially +# consume vast quantities of .bss, so be careful. +# +# This option will also potentially produce smaller runtime code +# with gcc 3.4 and later. +# +config SPARSEMEM_STATIC + def_bool n + # # Architectecture platforms which require a two level mem_section in SPARSEMEM # must select this option. This is usually for architecture platforms with # an extremely sparse physical address space. # -config ARCH_SPARSEMEM_EXTREME - def_bool n - depends on SPARSEMEM && 64BIT +config SPARSEMEM_EXTREME + def_bool y + depends on SPARSEMEM && !SPARSEMEM_STATIC diff --git a/mm/sparse.c b/mm/sparse.c index b2b456bf0a5d..fa01292157a9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -13,28 +13,36 @@ * * 1) mem_section - memory sections, mem_map's for valid memory */ -#ifdef CONFIG_ARCH_SPARSEMEM_EXTREME +#ifdef CONFIG_SPARSEMEM_EXTREME struct mem_section *mem_section[NR_SECTION_ROOTS] ____cacheline_maxaligned_in_smp; +#else +struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] + ____cacheline_maxaligned_in_smp; +#endif +EXPORT_SYMBOL(mem_section); + +static void sparse_alloc_root(unsigned long root, int nid) +{ +#ifdef CONFIG_SPARSEMEM_EXTREME + mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); +#endif +} static void sparse_index_init(unsigned long section, int nid) { - unsigned long root = SECTION_TO_ROOT(section); + unsigned long root = SECTION_NR_TO_ROOT(section); if (mem_section[root]) return; - mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); + + sparse_alloc_root(root, nid); + if (mem_section[root]) memset(mem_section[root], 0, PAGE_SIZE); else panic("memory_present: NO MEMORY\n"); } -#else -struct mem_section mem_section[NR_MEM_SECTIONS] - ____cacheline_maxaligned_in_smp; -#endif -EXPORT_SYMBOL(mem_section); - /* Record a memory area against a node. */ void memory_present(int nid, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 28ae55c98e4d16eac9a05a8a259d7763ef3aeb18 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 3 Sep 2005 15:54:29 -0700 Subject: [PATCH] sparsemem extreme: hotplug preparation This splits up sparse_index_alloc() into two pieces. This is needed because we'll allocate the memory for the second level in a different place from where we actually consume it to keep the allocation from happening underneath a lock Signed-off-by: Dave Hansen Signed-off-by: Bob Picco Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 + mm/sparse.c | 53 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 79cf578e21b9..5ed471b58f4f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -588,6 +588,7 @@ static inline int pfn_valid(unsigned long pfn) void sparse_init(void); #else #define sparse_init() do {} while (0) +#define sparse_index_init(_sec, _nid) do {} while (0) #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_NODES_SPAN_OTHER_NODES diff --git a/mm/sparse.c b/mm/sparse.c index fa01292157a9..347249a4917a 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -6,6 +6,7 @@ #include #include #include +#include #include /* @@ -22,27 +23,55 @@ struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] #endif EXPORT_SYMBOL(mem_section); -static void sparse_alloc_root(unsigned long root, int nid) -{ #ifdef CONFIG_SPARSEMEM_EXTREME - mem_section[root] = alloc_bootmem_node(NODE_DATA(nid), PAGE_SIZE); -#endif +static struct mem_section *sparse_index_alloc(int nid) +{ + struct mem_section *section = NULL; + unsigned long array_size = SECTIONS_PER_ROOT * + sizeof(struct mem_section); + + section = alloc_bootmem_node(NODE_DATA(nid), array_size); + + if (section) + memset(section, 0, array_size); + + return section; } -static void sparse_index_init(unsigned long section, int nid) +static int sparse_index_init(unsigned long section_nr, int nid) { - unsigned long root = SECTION_NR_TO_ROOT(section); + static spinlock_t index_init_lock = SPIN_LOCK_UNLOCKED; + unsigned long root = SECTION_NR_TO_ROOT(section_nr); + struct mem_section *section; + int ret = 0; if (mem_section[root]) - return; + return -EEXIST; - sparse_alloc_root(root, nid); + section = sparse_index_alloc(nid); + /* + * This lock keeps two different sections from + * reallocating for the same index + */ + spin_lock(&index_init_lock); - if (mem_section[root]) - memset(mem_section[root], 0, PAGE_SIZE); - else - panic("memory_present: NO MEMORY\n"); + if (mem_section[root]) { + ret = -EEXIST; + goto out; + } + + mem_section[root] = section; +out: + spin_unlock(&index_init_lock); + return ret; } +#else /* !SPARSEMEM_EXTREME */ +static inline int sparse_index_init(unsigned long section_nr, int nid) +{ + return 0; +} +#endif + /* Record a memory area against a node. */ void memory_present(int nid, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 11d31886dbcb61039ed3789e583d21c6e70960fd Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 3 Sep 2005 15:54:34 -0700 Subject: [PATCH] swap: swap extent list is ordered There are several comments that swap's extent_list.prev points to the lowest extent: that's not so, it's extent_list.next which points to it, as you'd expect. And a couple of loops in add_swap_extent which go all the way through the list, when they should just add to the other end. Fix those up, and let map_swap_page search the list forwards: profiles shows it to be twice as quick that way - because prefetch works better on how the structs are typically kmalloc'ed? or because usually more is written to than read from swap, and swap is allocated ascendingly? Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 -- mm/swapfile.c | 27 +++++++++------------------ 2 files changed, 9 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index bfe3e763ccf2..38f288475e67 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -116,8 +116,6 @@ enum { /* * The in-memory structure used to track swap areas. - * extent_list.prev points at the lowest-index extent. That list is - * sorted. */ struct swap_info_struct { unsigned int flags; diff --git a/mm/swapfile.c b/mm/swapfile.c index 4b39e9501d44..73521d39e985 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -832,9 +832,9 @@ sector_t map_swap_page(struct swap_info_struct *sis, pgoff_t offset) offset < (se->start_page + se->nr_pages)) { return se->start_block + (offset - se->start_page); } - lh = se->list.prev; + lh = se->list.next; if (lh == &sis->extent_list) - lh = lh->prev; + lh = lh->next; se = list_entry(lh, struct swap_extent, list); sis->curr_swap_extent = se; BUG_ON(se == start_se); /* It *must* be present */ @@ -859,10 +859,9 @@ static void destroy_swap_extents(struct swap_info_struct *sis) /* * Add a block range (and the corresponding page range) into this swapdev's - * extent list. The extent list is kept sorted in block order. + * extent list. The extent list is kept sorted in page order. * - * This function rather assumes that it is called in ascending sector_t order. - * It doesn't look for extent coalescing opportunities. + * This function rather assumes that it is called in ascending page order. */ static int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, @@ -872,16 +871,15 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, struct swap_extent *new_se; struct list_head *lh; - lh = sis->extent_list.next; /* The highest-addressed block */ - while (lh != &sis->extent_list) { + lh = sis->extent_list.prev; /* The highest page extent */ + if (lh != &sis->extent_list) { se = list_entry(lh, struct swap_extent, list); - if (se->start_block + se->nr_pages == start_block && - se->start_page + se->nr_pages == start_page) { + BUG_ON(se->start_page + se->nr_pages != start_page); + if (se->start_block + se->nr_pages == start_block) { /* Merge it */ se->nr_pages += nr_pages; return 0; } - lh = lh->next; } /* @@ -894,14 +892,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, new_se->nr_pages = nr_pages; new_se->start_block = start_block; - lh = sis->extent_list.prev; /* The lowest block */ - while (lh != &sis->extent_list) { - se = list_entry(lh, struct swap_extent, list); - if (se->start_block > start_block) - break; - lh = lh->prev; - } - list_add_tail(&new_se->list, lh); + list_add_tail(&new_se->list, &sis->extent_list); sis->nr_extents++; return 0; } -- cgit v1.2.3 From 53092a7402f227151a681b0c92ec8598c5618b1a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 3 Sep 2005 15:54:34 -0700 Subject: [PATCH] swap: show span of swap extents The "Adding %dk swap" message shows the number of swap extents, as a guide to how fragmented the swapfile may be. But a useful further guide is what total extent they span across (sometimes scarily large). And there's no need to keep nr_extents in swap_info: it's unused after the initial message, so save a little space by keeping it on stack. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 1 - mm/swapfile.c | 44 ++++++++++++++++++++++++++++++-------------- 2 files changed, 30 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 38f288475e67..f2b16ac0b539 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -123,7 +123,6 @@ struct swap_info_struct { struct file *swap_file; struct block_device *bdev; struct list_head extent_list; - int nr_extents; struct swap_extent *curr_swap_extent; unsigned old_block_size; unsigned short * swap_map; diff --git a/mm/swapfile.c b/mm/swapfile.c index 73521d39e985..d4da84ee3925 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -854,7 +854,6 @@ static void destroy_swap_extents(struct swap_info_struct *sis) list_del(&se->list); kfree(se); } - sis->nr_extents = 0; } /* @@ -893,8 +892,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, new_se->start_block = start_block; list_add_tail(&new_se->list, &sis->extent_list); - sis->nr_extents++; - return 0; + return 1; } /* @@ -928,7 +926,7 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, * This is extremely effective. The average number of iterations in * map_swap_page() has been measured at about 0.3 per page. - akpm. */ -static int setup_swap_extents(struct swap_info_struct *sis) +static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) { struct inode *inode; unsigned blocks_per_page; @@ -936,11 +934,15 @@ static int setup_swap_extents(struct swap_info_struct *sis) unsigned blkbits; sector_t probe_block; sector_t last_block; + sector_t lowest_block = -1; + sector_t highest_block = 0; + int nr_extents = 0; int ret; inode = sis->swap_file->f_mapping->host; if (S_ISBLK(inode->i_mode)) { ret = add_swap_extent(sis, 0, sis->max, 0); + *span = sis->pages; goto done; } @@ -985,19 +987,28 @@ static int setup_swap_extents(struct swap_info_struct *sis) } } + first_block >>= (PAGE_SHIFT - blkbits); + if (page_no) { /* exclude the header page */ + if (first_block < lowest_block) + lowest_block = first_block; + if (first_block > highest_block) + highest_block = first_block; + } + /* * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks */ - ret = add_swap_extent(sis, page_no, 1, - first_block >> (PAGE_SHIFT - blkbits)); - if (ret) + ret = add_swap_extent(sis, page_no, 1, first_block); + if (ret < 0) goto out; + nr_extents += ret; page_no++; probe_block += blocks_per_page; reprobe: continue; } - ret = 0; + ret = nr_extents; + *span = 1 + highest_block - lowest_block; if (page_no == 0) page_no = 1; /* force Empty message */ sis->max = page_no; @@ -1265,6 +1276,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) union swap_header *swap_header = NULL; int swap_header_version; int nr_good_pages = 0; + int nr_extents; + sector_t span; unsigned long maxpages = 1; int swapfilesize; unsigned short *swap_map; @@ -1300,7 +1313,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) nr_swapfiles = type+1; INIT_LIST_HEAD(&p->extent_list); p->flags = SWP_USED; - p->nr_extents = 0; p->swap_file = NULL; p->old_block_size = 0; p->swap_map = NULL; @@ -1477,9 +1489,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) p->swap_map[0] = SWAP_MAP_BAD; p->max = maxpages; p->pages = nr_good_pages; - error = setup_swap_extents(p); - if (error) + nr_extents = setup_swap_extents(p, &span); + if (nr_extents < 0) { + error = nr_extents; goto bad_swap; + } nr_good_pages = p->pages; } if (!nr_good_pages) { @@ -1494,9 +1508,11 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) p->flags = SWP_ACTIVE; nr_swap_pages += nr_good_pages; total_swap_pages += nr_good_pages; - printk(KERN_INFO "Adding %dk swap on %s. Priority:%d extents:%d\n", - nr_good_pages<<(PAGE_SHIFT-10), name, - p->prio, p->nr_extents); + + printk(KERN_INFO "Adding %dk swap on %s. " + "Priority:%d extents:%d across:%lluk\n", + nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, + nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10)); /* insert swap space into swap_list: */ prev = -1; -- cgit v1.2.3 From 6eb396dc4a9781c5e7951143ab56ce5710687ab3 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 3 Sep 2005 15:54:35 -0700 Subject: [PATCH] swap: swap unsigned int consistency The swap header's unsigned int last_page determines the range of swap pages, but swap_info has been using int or unsigned long in some cases: use unsigned int throughout (except, in several places a local unsigned long is useful to avoid overflows when adding). Signed-off-by: Hugh Dickins Signed-off-by: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 +++--- mm/swapfile.c | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index f2b16ac0b539..93f0eca7f916 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -130,10 +130,10 @@ struct swap_info_struct { unsigned int highest_bit; unsigned int cluster_next; unsigned int cluster_nr; + unsigned int pages; + unsigned int max; + unsigned int inuse_pages; int prio; /* swap priority */ - int pages; - unsigned long max; - unsigned long inuse_pages; int next; /* next entry on swap list */ }; diff --git a/mm/swapfile.c b/mm/swapfile.c index d4da84ee3925..6cc6dfb4d27b 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -84,7 +84,7 @@ void swap_unplug_io_fn(struct backing_dev_info *unused_bdi, struct page *page) up_read(&swap_unplug_sem); } -static inline int scan_swap_map(struct swap_info_struct *si) +static inline unsigned long scan_swap_map(struct swap_info_struct *si) { unsigned long offset; /* @@ -531,10 +531,11 @@ static int unuse_mm(struct mm_struct *mm, * Scan swap_map from current position to next entry still in use. * Recycle to start on reaching the end, returning 0 when empty. */ -static int find_next_to_unuse(struct swap_info_struct *si, int prev) +static unsigned int find_next_to_unuse(struct swap_info_struct *si, + unsigned int prev) { - int max = si->max; - int i = prev; + unsigned int max = si->max; + unsigned int i = prev; int count; /* @@ -577,7 +578,7 @@ static int try_to_unuse(unsigned int type) unsigned short swcount; struct page *page; swp_entry_t entry; - int i = 0; + unsigned int i = 0; int retval = 0; int reset_overflow = 0; int shmem; @@ -1216,7 +1217,7 @@ static int swap_show(struct seq_file *swap, void *v) file = ptr->swap_file; len = seq_path(swap, file->f_vfsmnt, file->f_dentry, " \t\n\\"); - seq_printf(swap, "%*s%s\t%d\t%ld\t%d\n", + seq_printf(swap, "%*s%s\t%u\t%u\t%d\n", len < 40 ? 40 - len : 1, " ", S_ISBLK(file->f_dentry->d_inode->i_mode) ? "partition" : "file\t", @@ -1275,8 +1276,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) static int least_priority; union swap_header *swap_header = NULL; int swap_header_version; - int nr_good_pages = 0; - int nr_extents; + unsigned int nr_good_pages = 0; + int nr_extents = 0; sector_t span; unsigned long maxpages = 1; int swapfilesize; @@ -1509,7 +1510,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) nr_swap_pages += nr_good_pages; total_swap_pages += nr_good_pages; - printk(KERN_INFO "Adding %dk swap on %s. " + printk(KERN_INFO "Adding %uk swap on %s. " "Priority:%d extents:%d across:%lluk\n", nr_good_pages<<(PAGE_SHIFT-10), name, p->prio, nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10)); -- cgit v1.2.3 From 52b7efdbe5f5696fc80338560a3fc51e0b0a993c Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 3 Sep 2005 15:54:39 -0700 Subject: [PATCH] swap: scan_swap_map drop swap_device_lock get_swap_page has often shown up on latency traces, doing lengthy scans while holding two spinlocks. swap_list_lock is already dropped, now scan_swap_map drop swap_device_lock before scanning the swap_map. While scanning for an empty cluster, don't worry that racing tasks may allocate what was free and free what was allocated; but when allocating an entry, check it's still free after retaking the lock. Avoid dropping the lock in the expected common path. No barriers beyond the locks, just let the cookie crumble; highest_bit limit is volatile, but benign. Guard against swapoff: must check SWP_WRITEOK before allocating, must raise SWP_SCANNING reference count while in scan_swap_map, swapoff wait for that to fall - just use schedule_timeout, we don't want to burden scan_swap_map itself, and it's very unlikely that anyone can really still be in scan_swap_map once swapoff gets this far. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 ++ mm/swapfile.c | 42 +++++++++++++++++++++++++++++++++++------- 2 files changed, 37 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 93f0eca7f916..db3b5de7c92f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -107,6 +107,8 @@ enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ SWP_ACTIVE = (SWP_USED | SWP_WRITEOK), + /* add others here before... */ + SWP_SCANNING = (1 << 8), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32 diff --git a/mm/swapfile.c b/mm/swapfile.c index c70248aab536..fdee145afc6f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -98,10 +98,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) * But we do now try to find an empty cluster. -Andrea */ + si->flags += SWP_SCANNING; if (unlikely(!si->cluster_nr)) { si->cluster_nr = SWAPFILE_CLUSTER - 1; if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) goto lowest; + swap_device_unlock(si); offset = si->lowest_bit; last_in_cluster = offset + SWAPFILE_CLUSTER - 1; @@ -111,10 +113,12 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { + swap_device_lock(si); si->cluster_next = offset-SWAPFILE_CLUSTER-1; goto cluster; } } + swap_device_lock(si); goto lowest; } @@ -123,10 +127,12 @@ cluster: offset = si->cluster_next; if (offset > si->highest_bit) lowest: offset = si->lowest_bit; +checks: if (!(si->flags & SWP_WRITEOK)) + goto no_page; if (!si->highest_bit) goto no_page; if (!si->swap_map[offset]) { -got_page: if (offset == si->lowest_bit) + if (offset == si->lowest_bit) si->lowest_bit++; if (offset == si->highest_bit) si->highest_bit--; @@ -137,16 +143,22 @@ got_page: if (offset == si->lowest_bit) } si->swap_map[offset] = 1; si->cluster_next = offset + 1; + si->flags -= SWP_SCANNING; return offset; } + swap_device_unlock(si); while (++offset <= si->highest_bit) { - if (!si->swap_map[offset]) - goto got_page; + if (!si->swap_map[offset]) { + swap_device_lock(si); + goto checks; + } } + swap_device_lock(si); goto lowest; no_page: + si->flags -= SWP_SCANNING; return 0; } @@ -1111,10 +1123,6 @@ asmlinkage long sys_swapoff(const char __user * specialfile) err = try_to_unuse(type); current->flags &= ~PF_SWAPOFF; - /* wait for any unplug function to finish */ - down_write(&swap_unplug_sem); - up_write(&swap_unplug_sem); - if (err) { /* re-insert swap space back into swap_list */ swap_list_lock(); @@ -1128,10 +1136,28 @@ asmlinkage long sys_swapoff(const char __user * specialfile) swap_info[prev].next = p - swap_info; nr_swap_pages += p->pages; total_swap_pages += p->pages; + swap_device_lock(p); p->flags |= SWP_WRITEOK; + swap_device_unlock(p); swap_list_unlock(); goto out_dput; } + + /* wait for any unplug function to finish */ + down_write(&swap_unplug_sem); + up_write(&swap_unplug_sem); + + /* wait for anyone still in scan_swap_map */ + swap_device_lock(p); + p->highest_bit = 0; /* cuts scans short */ + while (p->flags >= SWP_SCANNING) { + swap_device_unlock(p); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(1); + swap_device_lock(p); + } + swap_device_unlock(p); + destroy_swap_extents(p); down(&swapon_sem); swap_list_lock(); @@ -1431,6 +1457,8 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) } p->lowest_bit = 1; + p->cluster_next = 1; + /* * Find out how many pages are allowed for a single swap * device. There are two limiting factors: 1) the number of -- cgit v1.2.3 From 5d337b9194b1ce3b6fd5f3cb2799455ed2f9a3d1 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 3 Sep 2005 15:54:41 -0700 Subject: [PATCH] swap: swap_lock replace list+device The idea of a swap_device_lock per device, and a swap_list_lock over them all, is appealing; but in practice almost every holder of swap_device_lock must already hold swap_list_lock, which defeats the purpose of the split. The only exceptions have been swap_duplicate, valid_swaphandles and an untrodden path in try_to_unuse (plus a few places added in this series). valid_swaphandles doesn't show up high in profiles, but swap_duplicate does demand attention. However, with the hold time in get_swap_pages so much reduced, I've not yet found a load and set of swap device priorities to show even swap_duplicate benefitting from the split. Certainly the split is mere overhead in the common case of a single swap device. So, replace swap_list_lock and swap_device_lock by spinlock_t swap_lock (generally we seem to prefer an _ in the name, and not hide in a macro). If someone can show a regression in swap_duplicate, then probably we should add a hashlock for the swap_map entries alone (shorts being anatomic), so as to help the case of the single swap device too. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/vm/locking | 15 +++--- include/linux/swap.h | 11 +---- mm/filemap.c | 7 ++- mm/rmap.c | 3 +- mm/swapfile.c | 125 ++++++++++++++++++++--------------------------- 5 files changed, 66 insertions(+), 95 deletions(-) (limited to 'include/linux') diff --git a/Documentation/vm/locking b/Documentation/vm/locking index c3ef09ae3bb1..f366fa956179 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -83,19 +83,18 @@ single address space optimization, so that the zap_page_range (from vmtruncate) does not lose sending ipi's to cloned threads that might be spawned underneath it and go to user mode to drag in pte's into tlbs. -swap_list_lock/swap_device_lock -------------------------------- +swap_lock +-------------- The swap devices are chained in priority order from the "swap_list" header. The "swap_list" is used for the round-robin swaphandle allocation strategy. The #free swaphandles is maintained in "nr_swap_pages". These two together -are protected by the swap_list_lock. +are protected by the swap_lock. -The swap_device_lock, which is per swap device, protects the reference -counts on the corresponding swaphandles, maintained in the "swap_map" -array, and the "highest_bit" and "lowest_bit" fields. +The swap_lock also protects all the device reference counts on the +corresponding swaphandles, maintained in the "swap_map" array, and the +"highest_bit" and "lowest_bit" fields. -Both of these are spinlocks, and are never acquired from intr level. The -locking hierarchy is swap_list_lock -> swap_device_lock. +The swap_lock is a spinlock, and is never acquired from intr level. To prevent races between swap space deletion or async readahead swapins deciding whether a swap handle is being used, ie worthy of being read in diff --git a/include/linux/swap.h b/include/linux/swap.h index db3b5de7c92f..3c9ff0048153 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -121,7 +121,7 @@ enum { */ struct swap_info_struct { unsigned int flags; - spinlock_t sdev_lock; + int prio; /* swap priority */ struct file *swap_file; struct block_device *bdev; struct list_head extent_list; @@ -135,7 +135,6 @@ struct swap_info_struct { unsigned int pages; unsigned int max; unsigned int inuse_pages; - int prio; /* swap priority */ int next; /* next entry on swap list */ }; @@ -221,13 +220,7 @@ extern int can_share_swap_page(struct page *); extern int remove_exclusive_swap_page(struct page *); struct backing_dev_info; -extern struct swap_list_t swap_list; -extern spinlock_t swaplock; - -#define swap_list_lock() spin_lock(&swaplock) -#define swap_list_unlock() spin_unlock(&swaplock) -#define swap_device_lock(p) spin_lock(&p->sdev_lock) -#define swap_device_unlock(p) spin_unlock(&p->sdev_lock) +extern spinlock_t swap_lock; /* linux/mm/thrash.c */ extern struct mm_struct * swap_token_mm; diff --git a/mm/filemap.c b/mm/filemap.c index c11418dd94e8..edc54436fa94 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -54,9 +54,8 @@ * * ->i_mmap_lock (vmtruncate) * ->private_lock (__free_pte->__set_page_dirty_buffers) - * ->swap_list_lock - * ->swap_device_lock (exclusive_swap_page, others) - * ->mapping->tree_lock + * ->swap_lock (exclusive_swap_page, others) + * ->mapping->tree_lock * * ->i_sem * ->i_mmap_lock (truncate->unmap_mapping_range) @@ -86,7 +85,7 @@ * ->page_table_lock (anon_vma_prepare and various) * * ->page_table_lock - * ->swap_device_lock (try_to_unmap_one) + * ->swap_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one) * ->zone.lru_lock (follow_page->mark_page_accessed) diff --git a/mm/rmap.c b/mm/rmap.c index 08ac5c7fa91f..facb8cdca665 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -34,9 +34,8 @@ * anon_vma->lock * mm->page_table_lock * zone->lru_lock (in mark_page_accessed) - * swap_list_lock (in swap_free etc's swap_info_get) + * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) - * swap_device_lock (in swap_duplicate, swap_info_get) * mapping->private_lock (in __set_page_dirty_buffers) * inode_lock (in set_page_dirty's __mark_inode_dirty) * sb_lock (within inode_lock in fs/fs-writeback.c) diff --git a/mm/swapfile.c b/mm/swapfile.c index e675ae55f87d..4b6e8bf986bc 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -31,7 +31,7 @@ #include #include -DEFINE_SPINLOCK(swaplock); +DEFINE_SPINLOCK(swap_lock); unsigned int nr_swapfiles; long total_swap_pages; static int swap_overflow; @@ -51,7 +51,7 @@ static DECLARE_MUTEX(swapon_sem); /* * We need this because the bdev->unplug_fn can sleep and we cannot - * hold swap_list_lock while calling the unplug_fn. And swap_list_lock + * hold swap_lock while calling the unplug_fn. And swap_lock * cannot be turned into a semaphore. */ static DECLARE_RWSEM(swap_unplug_sem); @@ -105,7 +105,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) si->cluster_nr = SWAPFILE_CLUSTER - 1; if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) goto lowest; - swap_device_unlock(si); + spin_unlock(&swap_lock); offset = si->lowest_bit; last_in_cluster = offset + SWAPFILE_CLUSTER - 1; @@ -115,7 +115,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) if (si->swap_map[offset]) last_in_cluster = offset + SWAPFILE_CLUSTER; else if (offset == last_in_cluster) { - swap_device_lock(si); + spin_lock(&swap_lock); si->cluster_next = offset-SWAPFILE_CLUSTER-1; goto cluster; } @@ -124,7 +124,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si) latency_ration = LATENCY_LIMIT; } } - swap_device_lock(si); + spin_lock(&swap_lock); goto lowest; } @@ -153,10 +153,10 @@ checks: if (!(si->flags & SWP_WRITEOK)) return offset; } - swap_device_unlock(si); + spin_unlock(&swap_lock); while (++offset <= si->highest_bit) { if (!si->swap_map[offset]) { - swap_device_lock(si); + spin_lock(&swap_lock); goto checks; } if (unlikely(--latency_ration < 0)) { @@ -164,7 +164,7 @@ checks: if (!(si->flags & SWP_WRITEOK)) latency_ration = LATENCY_LIMIT; } } - swap_device_lock(si); + spin_lock(&swap_lock); goto lowest; no_page: @@ -179,7 +179,7 @@ swp_entry_t get_swap_page(void) int type, next; int wrapped = 0; - swap_list_lock(); + spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; nr_swap_pages--; @@ -199,19 +199,17 @@ swp_entry_t get_swap_page(void) continue; swap_list.next = next; - swap_device_lock(si); - swap_list_unlock(); offset = scan_swap_map(si); - swap_device_unlock(si); - if (offset) + if (offset) { + spin_unlock(&swap_lock); return swp_entry(type, offset); - swap_list_lock(); + } next = swap_list.next; } nr_swap_pages++; noswap: - swap_list_unlock(); + spin_unlock(&swap_lock); return (swp_entry_t) {0}; } @@ -233,8 +231,7 @@ static struct swap_info_struct * swap_info_get(swp_entry_t entry) goto bad_offset; if (!p->swap_map[offset]) goto bad_free; - swap_list_lock(); - swap_device_lock(p); + spin_lock(&swap_lock); return p; bad_free: @@ -252,12 +249,6 @@ out: return NULL; } -static void swap_info_put(struct swap_info_struct * p) -{ - swap_device_unlock(p); - swap_list_unlock(); -} - static int swap_entry_free(struct swap_info_struct *p, unsigned long offset) { int count = p->swap_map[offset]; @@ -290,7 +281,7 @@ void swap_free(swp_entry_t entry) p = swap_info_get(entry); if (p) { swap_entry_free(p, swp_offset(entry)); - swap_info_put(p); + spin_unlock(&swap_lock); } } @@ -308,7 +299,7 @@ static inline int page_swapcount(struct page *page) if (p) { /* Subtract the 1 for the swap cache itself */ count = p->swap_map[swp_offset(entry)] - 1; - swap_info_put(p); + spin_unlock(&swap_lock); } return count; } @@ -365,7 +356,7 @@ int remove_exclusive_swap_page(struct page *page) } write_unlock_irq(&swapper_space.tree_lock); } - swap_info_put(p); + spin_unlock(&swap_lock); if (retval) { swap_free(entry); @@ -388,7 +379,7 @@ void free_swap_and_cache(swp_entry_t entry) if (p) { if (swap_entry_free(p, swp_offset(entry)) == 1) page = find_trylock_page(&swapper_space, entry.val); - swap_info_put(p); + spin_unlock(&swap_lock); } if (page) { int one_user; @@ -558,10 +549,10 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si, int count; /* - * No need for swap_device_lock(si) here: we're just looking + * No need for swap_lock here: we're just looking * for whether an entry is in use, not modifying it; false * hits are okay, and sys_swapoff() has already prevented new - * allocations from this area (while holding swap_list_lock()). + * allocations from this area (while holding swap_lock). */ for (;;) { if (++i >= max) { @@ -751,9 +742,9 @@ static int try_to_unuse(unsigned int type) * report them; but do report if we reset SWAP_MAP_MAX. */ if (*swap_map == SWAP_MAP_MAX) { - swap_device_lock(si); + spin_lock(&swap_lock); *swap_map = 1; - swap_device_unlock(si); + spin_unlock(&swap_lock); reset_overflow = 1; } @@ -817,9 +808,9 @@ static int try_to_unuse(unsigned int type) } /* - * After a successful try_to_unuse, if no swap is now in use, we know we - * can empty the mmlist. swap_list_lock must be held on entry and exit. - * Note that mmlist_lock nests inside swap_list_lock, and an mm must be + * After a successful try_to_unuse, if no swap is now in use, we know + * we can empty the mmlist. swap_lock must be held on entry and exit. + * Note that mmlist_lock nests inside swap_lock, and an mm must be * added to the mmlist just after page_duplicate - before would be racy. */ static void drain_mmlist(void) @@ -1092,7 +1083,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) mapping = victim->f_mapping; prev = -1; - swap_list_lock(); + spin_lock(&swap_lock); for (type = swap_list.head; type >= 0; type = swap_info[type].next) { p = swap_info + type; if ((p->flags & SWP_ACTIVE) == SWP_ACTIVE) { @@ -1103,14 +1094,14 @@ asmlinkage long sys_swapoff(const char __user * specialfile) } if (type < 0) { err = -EINVAL; - swap_list_unlock(); + spin_unlock(&swap_lock); goto out_dput; } if (!security_vm_enough_memory(p->pages)) vm_unacct_memory(p->pages); else { err = -ENOMEM; - swap_list_unlock(); + spin_unlock(&swap_lock); goto out_dput; } if (prev < 0) { @@ -1124,10 +1115,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile) } nr_swap_pages -= p->pages; total_swap_pages -= p->pages; - swap_device_lock(p); p->flags &= ~SWP_WRITEOK; - swap_device_unlock(p); - swap_list_unlock(); + spin_unlock(&swap_lock); current->flags |= PF_SWAPOFF; err = try_to_unuse(type); @@ -1135,7 +1124,7 @@ asmlinkage long sys_swapoff(const char __user * specialfile) if (err) { /* re-insert swap space back into swap_list */ - swap_list_lock(); + spin_lock(&swap_lock); for (prev = -1, i = swap_list.head; i >= 0; prev = i, i = swap_info[i].next) if (p->prio >= swap_info[i].prio) break; @@ -1146,10 +1135,8 @@ asmlinkage long sys_swapoff(const char __user * specialfile) swap_info[prev].next = p - swap_info; nr_swap_pages += p->pages; total_swap_pages += p->pages; - swap_device_lock(p); p->flags |= SWP_WRITEOK; - swap_device_unlock(p); - swap_list_unlock(); + spin_unlock(&swap_lock); goto out_dput; } @@ -1157,30 +1144,27 @@ asmlinkage long sys_swapoff(const char __user * specialfile) down_write(&swap_unplug_sem); up_write(&swap_unplug_sem); + destroy_swap_extents(p); + down(&swapon_sem); + spin_lock(&swap_lock); + drain_mmlist(); + /* wait for anyone still in scan_swap_map */ - swap_device_lock(p); p->highest_bit = 0; /* cuts scans short */ while (p->flags >= SWP_SCANNING) { - swap_device_unlock(p); + spin_unlock(&swap_lock); set_current_state(TASK_UNINTERRUPTIBLE); schedule_timeout(1); - swap_device_lock(p); + spin_lock(&swap_lock); } - swap_device_unlock(p); - destroy_swap_extents(p); - down(&swapon_sem); - swap_list_lock(); - drain_mmlist(); - swap_device_lock(p); swap_file = p->swap_file; p->swap_file = NULL; p->max = 0; swap_map = p->swap_map; p->swap_map = NULL; p->flags = 0; - swap_device_unlock(p); - swap_list_unlock(); + spin_unlock(&swap_lock); up(&swapon_sem); vfree(swap_map); inode = mapping->host; @@ -1324,7 +1308,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - swap_list_lock(); + spin_lock(&swap_lock); p = swap_info; for (type = 0 ; type < nr_swapfiles ; type++,p++) if (!(p->flags & SWP_USED)) @@ -1343,7 +1327,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) * swp_entry_t or the architecture definition of a swap pte. */ if (type > swp_type(pte_to_swp_entry(swp_entry_to_pte(swp_entry(~0UL,0))))) { - swap_list_unlock(); + spin_unlock(&swap_lock); goto out; } if (type >= nr_swapfiles) @@ -1357,7 +1341,6 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) p->highest_bit = 0; p->cluster_nr = 0; p->inuse_pages = 0; - spin_lock_init(&p->sdev_lock); p->next = -1; if (swap_flags & SWAP_FLAG_PREFER) { p->prio = @@ -1365,7 +1348,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) } else { p->prio = --least_priority; } - swap_list_unlock(); + spin_unlock(&swap_lock); name = getname(specialfile); error = PTR_ERR(name); if (IS_ERR(name)) { @@ -1542,8 +1525,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) } down(&swapon_sem); - swap_list_lock(); - swap_device_lock(p); + spin_lock(&swap_lock); p->flags = SWP_ACTIVE; nr_swap_pages += nr_good_pages; total_swap_pages += nr_good_pages; @@ -1567,8 +1549,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) } else { swap_info[prev].next = p - swap_info; } - swap_device_unlock(p); - swap_list_unlock(); + spin_unlock(&swap_lock); up(&swapon_sem); error = 0; goto out; @@ -1579,14 +1560,14 @@ bad_swap: } destroy_swap_extents(p); bad_swap_2: - swap_list_lock(); + spin_lock(&swap_lock); swap_map = p->swap_map; p->swap_file = NULL; p->swap_map = NULL; p->flags = 0; if (!(swap_flags & SWAP_FLAG_PREFER)) ++least_priority; - swap_list_unlock(); + spin_unlock(&swap_lock); vfree(swap_map); if (swap_file) filp_close(swap_file, NULL); @@ -1610,7 +1591,7 @@ void si_swapinfo(struct sysinfo *val) unsigned int i; unsigned long nr_to_be_unused = 0; - swap_list_lock(); + spin_lock(&swap_lock); for (i = 0; i < nr_swapfiles; i++) { if (!(swap_info[i].flags & SWP_USED) || (swap_info[i].flags & SWP_WRITEOK)) @@ -1619,7 +1600,7 @@ void si_swapinfo(struct sysinfo *val) } val->freeswap = nr_swap_pages + nr_to_be_unused; val->totalswap = total_swap_pages + nr_to_be_unused; - swap_list_unlock(); + spin_unlock(&swap_lock); } /* @@ -1640,7 +1621,7 @@ int swap_duplicate(swp_entry_t entry) p = type + swap_info; offset = swp_offset(entry); - swap_device_lock(p); + spin_lock(&swap_lock); if (offset < p->max && p->swap_map[offset]) { if (p->swap_map[offset] < SWAP_MAP_MAX - 1) { p->swap_map[offset]++; @@ -1652,7 +1633,7 @@ int swap_duplicate(swp_entry_t entry) result = 1; } } - swap_device_unlock(p); + spin_unlock(&swap_lock); out: return result; @@ -1668,7 +1649,7 @@ get_swap_info_struct(unsigned type) } /* - * swap_device_lock prevents swap_map being freed. Don't grab an extra + * swap_lock prevents swap_map being freed. Don't grab an extra * reference on the swaphandle, it doesn't matter if it becomes unused. */ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) @@ -1684,7 +1665,7 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) toff++, i--; *offset = toff; - swap_device_lock(swapdev); + spin_lock(&swap_lock); do { /* Don't read-ahead past the end of the swap area */ if (toff >= swapdev->max) @@ -1697,6 +1678,6 @@ int valid_swaphandles(swp_entry_t entry, unsigned long *offset) toff++; ret++; } while (--i); - swap_device_unlock(swapdev); + spin_unlock(&swap_lock); return ret; } -- cgit v1.2.3 From 6e21c8f145f5052c1c2fb4a4b41bee01c848159b Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Sat, 3 Sep 2005 15:54:45 -0700 Subject: [PATCH] /proc//numa_maps to show on which nodes pages reside This patch was recently discussed on linux-mm: http://marc.theaimsgroup.com/?t=112085728500002&r=1&w=2 I inherited a large code base from Ray for page migration. There was a small patch in there that I find to be very useful since it allows the display of the locality of the pages in use by a process. I reworked that patch and came up with a /proc//numa_maps that gives more information about the vma's of a process. numa_maps is indexes by the start address found in /proc//maps. F.e. with this patch you can see the page use of the "getty" process: margin:/proc/12008 # cat maps 00000000-00004000 r--p 00000000 00:00 0 2000000000000000-200000000002c000 r-xp 00000000 08:04 516 /lib/ld-2.3.3.so 2000000000038000-2000000000040000 rw-p 00028000 08:04 516 /lib/ld-2.3.3.so 2000000000040000-2000000000044000 rw-p 2000000000040000 00:00 0 2000000000058000-2000000000260000 r-xp 00000000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000260000-2000000000268000 ---p 00208000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000268000-2000000000274000 rw-p 00200000 08:04 54707842 /lib/tls/libc.so.6.1 2000000000274000-2000000000280000 rw-p 2000000000274000 00:00 0 2000000000280000-20000000002b4000 r--p 00000000 08:04 9126923 /usr/lib/locale/en_US.utf8/LC_CTYPE 2000000000300000-2000000000308000 r--s 00000000 08:04 60071467 /usr/lib/gconv/gconv-modules.cache 2000000000318000-2000000000328000 rw-p 2000000000318000 00:00 0 4000000000000000-4000000000008000 r-xp 00000000 08:04 29576399 /sbin/mingetty 6000000000004000-6000000000008000 rw-p 00004000 08:04 29576399 /sbin/mingetty 6000000000008000-600000000002c000 rw-p 6000000000008000 00:00 0 [heap] 60000fff7fffc000-60000fff80000000 rw-p 60000fff7fffc000 00:00 0 60000ffffff44000-60000ffffff98000 rw-p 60000ffffff44000 00:00 0 [stack] a000000000000000-a000000000020000 ---p 00000000 00:00 0 [vdso] cat numa_maps 2000000000000000 default MaxRef=43 Pages=11 Mapped=11 N0=4 N1=3 N2=2 N3=2 2000000000038000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000040000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 2000000000058000 default MaxRef=43 Pages=61 Mapped=61 N0=14 N1=15 N2=16 N3=16 2000000000268000 default MaxRef=1 Pages=2 Mapped=2 Anon=2 N0=2 2000000000274000 default MaxRef=1 Pages=3 Mapped=3 Anon=3 N0=3 2000000000280000 default MaxRef=8 Pages=3 Mapped=3 N0=3 2000000000300000 default MaxRef=8 Pages=2 Mapped=2 N0=2 2000000000318000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N2=1 4000000000000000 default MaxRef=6 Pages=2 Mapped=2 N1=2 6000000000004000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 6000000000008000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000fff7fffc000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 60000ffffff44000 default MaxRef=1 Pages=1 Mapped=1 Anon=1 N0=1 getty uses ld.so. The first vma is the code segment which is used by 43 other processes and the pages are evenly distributed over the 4 nodes. The second vma is the process specific data portion for ld.so. This is only one page. The display format is: Links to information in /proc//map This can be "default" "interleave={}", "prefer=" or "bind={}" MaxRef= Pages= Mapped= Anon= Nx= The content of the proc-file is self-evident. If this would be tied into the sparsemem system then the contents of this file would not be too useful. Signed-off-by: Christoph Lameter Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 35 ++++++++++++ fs/proc/task_mmu.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/mempolicy.h | 3 ++ mm/mempolicy.c | 12 ++--- 4 files changed, 176 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 491f2d9f89ac..b796bf90a0b1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -65,6 +65,7 @@ enum pid_directory_inos { PROC_TGID_STAT, PROC_TGID_STATM, PROC_TGID_MAPS, + PROC_TGID_NUMA_MAPS, PROC_TGID_MOUNTS, PROC_TGID_WCHAN, #ifdef CONFIG_SCHEDSTATS @@ -102,6 +103,7 @@ enum pid_directory_inos { PROC_TID_STAT, PROC_TID_STATM, PROC_TID_MAPS, + PROC_TID_NUMA_MAPS, PROC_TID_MOUNTS, PROC_TID_WCHAN, #ifdef CONFIG_SCHEDSTATS @@ -144,6 +146,9 @@ static struct pid_entry tgid_base_stuff[] = { E(PROC_TGID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TGID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TGID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TGID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TGID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TGID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -180,6 +185,9 @@ static struct pid_entry tid_base_stuff[] = { E(PROC_TID_STAT, "stat", S_IFREG|S_IRUGO), E(PROC_TID_STATM, "statm", S_IFREG|S_IRUGO), E(PROC_TID_MAPS, "maps", S_IFREG|S_IRUGO), +#ifdef CONFIG_NUMA + E(PROC_TID_NUMA_MAPS, "numa_maps", S_IFREG|S_IRUGO), +#endif E(PROC_TID_MEM, "mem", S_IFREG|S_IRUSR|S_IWUSR), #ifdef CONFIG_SECCOMP E(PROC_TID_SECCOMP, "seccomp", S_IFREG|S_IRUSR|S_IWUSR), @@ -515,6 +523,27 @@ static struct file_operations proc_maps_operations = { .release = seq_release, }; +#ifdef CONFIG_NUMA +extern struct seq_operations proc_pid_numa_maps_op; +static int numa_maps_open(struct inode *inode, struct file *file) +{ + struct task_struct *task = proc_task(inode); + int ret = seq_open(file, &proc_pid_numa_maps_op); + if (!ret) { + struct seq_file *m = file->private_data; + m->private = task; + } + return ret; +} + +static struct file_operations proc_numa_maps_operations = { + .open = numa_maps_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; +#endif + extern struct seq_operations mounts_op; static int mounts_open(struct inode *inode, struct file *file) { @@ -1524,6 +1553,12 @@ static struct dentry *proc_pident_lookup(struct inode *dir, case PROC_TGID_MAPS: inode->i_fop = &proc_maps_operations; break; +#ifdef CONFIG_NUMA + case PROC_TID_NUMA_MAPS: + case PROC_TGID_NUMA_MAPS: + inode->i_fop = &proc_numa_maps_operations; + break; +#endif case PROC_TID_MEM: case PROC_TGID_MEM: inode->i_op = &proc_mem_inode_operations; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 28b4a0253a92..64e84cadfa3c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include "internal.h" @@ -233,3 +235,133 @@ struct seq_operations proc_pid_maps_op = { .stop = m_stop, .show = show_map }; + +#ifdef CONFIG_NUMA + +struct numa_maps { + unsigned long pages; + unsigned long anon; + unsigned long mapped; + unsigned long mapcount_max; + unsigned long node[MAX_NUMNODES]; +}; + +/* + * Calculate numa node maps for a vma + */ +static struct numa_maps *get_numa_maps(const struct vm_area_struct *vma) +{ + struct page *page; + unsigned long vaddr; + struct mm_struct *mm = vma->vm_mm; + int i; + struct numa_maps *md = kmalloc(sizeof(struct numa_maps), GFP_KERNEL); + + if (!md) + return NULL; + md->pages = 0; + md->anon = 0; + md->mapped = 0; + md->mapcount_max = 0; + for_each_node(i) + md->node[i] =0; + + spin_lock(&mm->page_table_lock); + for (vaddr = vma->vm_start; vaddr < vma->vm_end; vaddr += PAGE_SIZE) { + page = follow_page(mm, vaddr, 0); + if (page) { + int count = page_mapcount(page); + + if (count) + md->mapped++; + if (count > md->mapcount_max) + md->mapcount_max = count; + md->pages++; + if (PageAnon(page)) + md->anon++; + md->node[page_to_nid(page)]++; + } + } + spin_unlock(&mm->page_table_lock); + return md; +} + +static int show_numa_map(struct seq_file *m, void *v) +{ + struct task_struct *task = m->private; + struct vm_area_struct *vma = v; + struct mempolicy *pol; + struct numa_maps *md; + struct zone **z; + int n; + int first; + + if (!vma->vm_mm) + return 0; + + md = get_numa_maps(vma); + if (!md) + return 0; + + seq_printf(m, "%08lx", vma->vm_start); + pol = get_vma_policy(task, vma, vma->vm_start); + /* Print policy */ + switch (pol->policy) { + case MPOL_PREFERRED: + seq_printf(m, " prefer=%d", pol->v.preferred_node); + break; + case MPOL_BIND: + seq_printf(m, " bind={"); + first = 1; + for (z = pol->v.zonelist->zones; *z; z++) { + + if (!first) + seq_putc(m, ','); + else + first = 0; + seq_printf(m, "%d/%s", (*z)->zone_pgdat->node_id, + (*z)->name); + } + seq_putc(m, '}'); + break; + case MPOL_INTERLEAVE: + seq_printf(m, " interleave={"); + first = 1; + for_each_node(n) { + if (test_bit(n, pol->v.nodes)) { + if (!first) + seq_putc(m,','); + else + first = 0; + seq_printf(m, "%d",n); + } + } + seq_putc(m, '}'); + break; + default: + seq_printf(m," default"); + break; + } + seq_printf(m, " MaxRef=%lu Pages=%lu Mapped=%lu", + md->mapcount_max, md->pages, md->mapped); + if (md->anon) + seq_printf(m," Anon=%lu",md->anon); + + for_each_online_node(n) { + if (md->node[n]) + seq_printf(m, " N%d=%lu", n, md->node[n]); + } + seq_putc(m, '\n'); + kfree(md); + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + return 0; +} + +struct seq_operations proc_pid_numa_maps_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_numa_map +}; +#endif diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8480aef10e62..94a46f38c532 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -150,6 +150,9 @@ void mpol_free_shared_policy(struct shared_policy *p); struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx); +struct mempolicy *get_vma_policy(struct task_struct *task, + struct vm_area_struct *vma, unsigned long addr); + extern void numa_default_policy(void); extern void numa_policy_init(void); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b4eababc8198..13492d66b7c8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -664,10 +664,10 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, #endif /* Return effective policy for a VMA */ -static struct mempolicy * -get_vma_policy(struct vm_area_struct *vma, unsigned long addr) +struct mempolicy * +get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = task->mempolicy; if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) @@ -786,7 +786,7 @@ static struct page *alloc_page_interleave(unsigned int __nocast gfp, unsigned or struct page * alloc_page_vma(unsigned int __nocast gfp, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); cpuset_update_current_mems_allowed(); @@ -908,7 +908,7 @@ void __mpol_free(struct mempolicy *p) /* Find first node suitable for an allocation */ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); switch (pol->policy) { case MPOL_DEFAULT: @@ -928,7 +928,7 @@ int mpol_first_node(struct vm_area_struct *vma, unsigned long addr) /* Find secondary valid nodes for an allocation */ int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = get_vma_policy(vma, addr); + struct mempolicy *pol = get_vma_policy(current, vma, addr); switch (pol->policy) { case MPOL_PREFERRED: -- cgit v1.2.3 From 242e54686257493f0b10ac557e730419d9af7d24 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 3 Sep 2005 15:54:50 -0700 Subject: [PATCH] mm: remove atomic This bitop does not need to be atomic because it is performed when there will be no references to the page (ie. the page is being freed). Signed-off-by: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 1 + mm/page_alloc.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f5a6695d4d21..99f7cc495065 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -194,6 +194,7 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta); #define SetPageDirty(page) set_bit(PG_dirty, &(page)->flags) #define TestSetPageDirty(page) test_and_set_bit(PG_dirty, &(page)->flags) #define ClearPageDirty(page) clear_bit(PG_dirty, &(page)->flags) +#define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags) #define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags) #define SetPageLRU(page) set_bit(PG_lru, &(page)->flags) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 8d088371196a..620aa11b24eb 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -329,7 +329,7 @@ static inline void free_pages_check(const char *function, struct page *page) 1 << PG_writeback ))) bad_page(function, page); if (PageDirty(page)) - ClearPageDirty(page); + __ClearPageDirty(page); } /* -- cgit v1.2.3 From bce5f6ba340b09d8b29902add204bb95a6d3d88b Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Sat, 3 Sep 2005 15:54:50 -0700 Subject: [PATCH] VM: add capabilites check to set_zone_reclaim Add a capability check to sys_set_zone_reclaim(). This syscall is not something that should be available to a user. Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/capability.h | 1 + mm/vmscan.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/capability.h b/include/linux/capability.h index 8d139f4acf23..6b4618902d3d 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -233,6 +233,7 @@ typedef __u32 kernel_cap_t; /* Allow enabling/disabling tagged queuing on SCSI controllers and sending arbitrary SCSI commands */ /* Allow setting encryption key on loopback filesystem */ +/* Allow setting zone reclaim policy */ #define CAP_SYS_ADMIN 21 diff --git a/mm/vmscan.c b/mm/vmscan.c index cfffe5098d53..ab631a3c62c3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1375,6 +1375,9 @@ asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone, struct zone *z; int i; + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + if (node >= MAX_NUMNODES || !node_online(node)) return -EINVAL; -- cgit v1.2.3 From e83a9596712eb784e7e6604f43a2c140eb912743 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sat, 3 Sep 2005 15:54:53 -0700 Subject: [PATCH] comment typo fix smp_entry_t -> swap_entry_t Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index d4c7db35e708..87b9d14c710d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -4,7 +4,7 @@ * the low-order bits. * * We arrange the `type' and `offset' fields so that `type' is at the five - * high-order bits of the smp_entry_t and `offset' is right-aligned in the + * high-order bits of the swp_entry_t and `offset' is right-aligned in the * remaining bits. * * swp_entry_t's are *never* stored anywhere in their arch-dependent format. -- cgit v1.2.3 From fd195c49fb17a21e232f50bddb2267150053cf34 Mon Sep 17 00:00:00 2001 From: Deepak Saxena Date: Sat, 3 Sep 2005 15:54:58 -0700 Subject: [PATCH] arm: allow for arch-specific IOREMAP_MAX_ORDER Version 6 of the ARM architecture introduces the concept of 16MB pages (supersections) and 36-bit (40-bit actually, but nobody uses this) physical addresses. 36-bit addressed memory and I/O and ARMv6 can only be mapped using supersections and the requirement on these is that both virtual and physical addresses be 16MB aligned. In trying to add support for ioremap() of 36-bit I/O, we run into the issue that get_vm_area() allows for a maximum of 512K alignment via the IOREMAP_MAX_ORDER constant. To work around this, we can: - Allocate a larger VM area than needed (size + (1ul << IOREMAP_MAX_ORDER)) and then align the pointer ourselves, but this ends up with 512K of wasted VM per ioremap(). - Provide a new __get_vm_area_aligned() API and make __get_vm_area() sit on top of this. I did this and it works but I don't like the idea adding another VM API just for this one case. - My preferred solution which is to allow the architecture to override the IOREMAP_MAX_ORDER constant with it's own version. Signed-off-by: Deepak Saxena Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 8 ++++++++ mm/vmalloc.c | 2 -- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6409d9cf5965..b244f69ef682 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,6 +10,14 @@ #define VM_MAP 0x00000004 /* vmap()ed pages */ /* bits [20..32] reserved for arch specific ioremap internals */ +/* + * Maximum alignment for ioremap() regions. + * Can be overriden by arch-specific value. + */ +#ifndef IOREMAP_MAX_ORDER +#define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ +#endif + struct vm_struct { void *addr; unsigned long size; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 8ff16a1eee6a..67b358e57ef6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -158,8 +158,6 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) return err; } -#define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ - struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 0e5c9f39f64d8a55c5db37a5ea43e37d3422fd92 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Sat, 3 Sep 2005 15:55:02 -0700 Subject: [PATCH] remove hugetlb_clean_stale_pgtable() and fix huge_pte_alloc() I don't think we need to call hugetlb_clean_stale_pgtable() anymore in 2.6.13 because of the rework with free_pgtables(). It now collect all the pte page at the time of munmap. It used to only collect page table pages when entire one pgd can be freed and left with staled pte pages. Not anymore with 2.6.13. This function will never be called and We should turn it into a BUG_ON. I also spotted two problems here, not Adam's fault :-) (1) in huge_pte_alloc(), it looks like a bug to me that pud is not checked before calling pmd_alloc() (2) in hugetlb_clean_stale_pgtable(), it also missed a call to pmd_free_tlb. I think a tlb flush is required to flush the mapping for the page table itself when we clear out the pmd pointing to a pte page. However, since hugetlb_clean_stale_pgtable() is never called, so it won't trigger the bug. Signed-off-by: Ken Chen Cc: Adam Litke Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/hugetlbpage.c | 23 +++-------------------- include/asm-i386/page.h | 1 - include/asm-x86_64/page.h | 1 - include/linux/hugetlb.h | 6 ------ 4 files changed, 3 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 24c8a536b588..d524127c9afc 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c @@ -22,20 +22,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { pgd_t *pgd; pud_t *pud; - pmd_t *pmd; pte_t *pte = NULL; pgd = pgd_offset(mm, addr); pud = pud_alloc(mm, pgd, addr); - pmd = pmd_alloc(mm, pud, addr); + if (pud) + pte = (pte_t *) pmd_alloc(mm, pud, addr); + BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); - if (!pmd) - goto out; - - pte = (pte_t *) pmd; - if (!pte_none(*pte) && !pte_huge(*pte)) - hugetlb_clean_stale_pgtable(pte); -out: return pte; } @@ -130,17 +124,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, } #endif -void hugetlb_clean_stale_pgtable(pte_t *pte) -{ - pmd_t *pmd = (pmd_t *) pte; - struct page *page; - - page = pmd_page(*pmd); - pmd_clear(pmd); - dec_page_state(nr_page_table_pages); - page_cache_release(page); -} - /* x86_64 also uses this file */ #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 10045fd82103..73296d9924fb 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -68,7 +68,6 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA -#define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE #endif #define pgd_val(x) ((x).pgd) diff --git a/include/asm-x86_64/page.h b/include/asm-x86_64/page.h index fcf890aa8c81..135ffaa0393b 100644 --- a/include/asm-x86_64/page.h +++ b/include/asm-x86_64/page.h @@ -28,7 +28,6 @@ #define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) #define HPAGE_MASK (~(HPAGE_SIZE - 1)) #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -#define ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f529d1442815..e670b0d13fe0 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -70,12 +70,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, void hugetlb_prefault_arch_hook(struct mm_struct *mm); #endif -#ifndef ARCH_HAS_HUGETLB_CLEAN_STALE_PGTABLE -#define hugetlb_clean_stale_pgtable(pte) BUG() -#else -void hugetlb_clean_stale_pgtable(pte_t *pte); -#endif - #else /* !CONFIG_HUGETLB_PAGE */ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma) -- cgit v1.2.3 From c07e02db76940c75fc92f2f2c9adcdbb09ed70d0 Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Sat, 3 Sep 2005 15:55:11 -0700 Subject: [PATCH] VM: add page_state info to per-node meminfo Add page_state info to the per-node meminfo file in sysfs. This is mostly just for informational purposes. The lack of this information was brought up recently during a discussion regarding pagecache clearing, and I put this patch together to test out one of the suggestions. It seems like interesting info to have, so I'm submitting the patch. Signed-off-by: Martin Hicks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/node.c | 24 ++++++++++++++++++++++-- include/linux/page-flags.h | 1 + mm/page_alloc.c | 25 ++++++++++++++++++++----- 3 files changed, 43 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/node.c b/drivers/base/node.c index 904b27caf697..16c513aa4d48 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -39,13 +39,25 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) int n; int nid = dev->id; struct sysinfo i; + struct page_state ps; unsigned long inactive; unsigned long active; unsigned long free; si_meminfo_node(&i, nid); + get_page_state_node(&ps, nid); __get_zone_counts(&active, &inactive, &free, NODE_DATA(nid)); + /* Check for negative values in these approximate counters */ + if ((long)ps.nr_dirty < 0) + ps.nr_dirty = 0; + if ((long)ps.nr_writeback < 0) + ps.nr_writeback = 0; + if ((long)ps.nr_mapped < 0) + ps.nr_mapped = 0; + if ((long)ps.nr_slab < 0) + ps.nr_slab = 0; + n = sprintf(buf, "\n" "Node %d MemTotal: %8lu kB\n" "Node %d MemFree: %8lu kB\n" @@ -55,7 +67,11 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) "Node %d HighTotal: %8lu kB\n" "Node %d HighFree: %8lu kB\n" "Node %d LowTotal: %8lu kB\n" - "Node %d LowFree: %8lu kB\n", + "Node %d LowFree: %8lu kB\n" + "Node %d Dirty: %8lu kB\n" + "Node %d Writeback: %8lu kB\n" + "Node %d Mapped: %8lu kB\n" + "Node %d Slab: %8lu kB\n", nid, K(i.totalram), nid, K(i.freeram), nid, K(i.totalram - i.freeram), @@ -64,7 +80,11 @@ static ssize_t node_read_meminfo(struct sys_device * dev, char * buf) nid, K(i.totalhigh), nid, K(i.freehigh), nid, K(i.totalram - i.totalhigh), - nid, K(i.freeram - i.freehigh)); + nid, K(i.freeram - i.freehigh), + nid, K(ps.nr_dirty), + nid, K(ps.nr_writeback), + nid, K(ps.nr_mapped), + nid, K(ps.nr_slab)); n += hugetlb_report_node_meminfo(nid, buf + n); return n; } diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 99f7cc495065..f34767c5fc79 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -134,6 +134,7 @@ struct page_state { }; extern void get_page_state(struct page_state *ret); +extern void get_page_state_node(struct page_state *ret, int node); extern void get_full_page_state(struct page_state *ret); extern unsigned long __read_page_state(unsigned long offset); extern void __mod_page_state(unsigned long offset, unsigned long delta); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d157dae8c9f3..b06a9636d971 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1130,19 +1130,20 @@ EXPORT_SYMBOL(nr_pagecache); DEFINE_PER_CPU(long, nr_pagecache_local) = 0; #endif -void __get_page_state(struct page_state *ret, int nr) +void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) { int cpu = 0; memset(ret, 0, sizeof(*ret)); + cpus_and(*cpumask, *cpumask, cpu_online_map); - cpu = first_cpu(cpu_online_map); + cpu = first_cpu(*cpumask); while (cpu < NR_CPUS) { unsigned long *in, *out, off; in = (unsigned long *)&per_cpu(page_states, cpu); - cpu = next_cpu(cpu, cpu_online_map); + cpu = next_cpu(cpu, *cpumask); if (cpu < NR_CPUS) prefetch(&per_cpu(page_states, cpu)); @@ -1153,19 +1154,33 @@ void __get_page_state(struct page_state *ret, int nr) } } +void get_page_state_node(struct page_state *ret, int node) +{ + int nr; + cpumask_t mask = node_to_cpumask(node); + + nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); + nr /= sizeof(unsigned long); + + __get_page_state(ret, nr+1, &mask); +} + void get_page_state(struct page_state *ret) { int nr; + cpumask_t mask = CPU_MASK_ALL; nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); nr /= sizeof(unsigned long); - __get_page_state(ret, nr + 1); + __get_page_state(ret, nr + 1, &mask); } void get_full_page_state(struct page_state *ret) { - __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long)); + cpumask_t mask = CPU_MASK_ALL; + + __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); } unsigned long __read_page_state(unsigned long offset) -- cgit v1.2.3 From d01c08c9ae91c1526d4564b400b3e0e04b49d1ba Mon Sep 17 00:00:00 2001 From: "Mark A. Greer" Date: Sat, 3 Sep 2005 15:55:56 -0700 Subject: [PATCH] ppc32: mv64x60 updates & enhancements Updates and enhancement to the ppc32 mv64x60 code: - move code to get mem size from mem ctlr to bootwrapper - address some errata in the mv64360 pic code - some minor cleanups - export one of the bridge's regs via sysfs so user daemon can watch for extraction events Signed-off-by: Mark A. Greer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/Kconfig.debug | 3 +- arch/ppc/boot/simple/misc-mv64x60.c | 27 ++++ arch/ppc/syslib/mv64360_pic.c | 31 +++-- arch/ppc/syslib/mv64x60.c | 246 ++++++++++++++++++++++-------------- include/asm-ppc/mv64x60.h | 7 + include/asm-ppc/mv64x60_defs.h | 9 +- include/linux/mv643xx.h | 2 +- 7 files changed, 212 insertions(+), 113 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/Kconfig.debug b/arch/ppc/Kconfig.debug index e16c7710d4be..61653cb60c4e 100644 --- a/arch/ppc/Kconfig.debug +++ b/arch/ppc/Kconfig.debug @@ -62,7 +62,8 @@ config BOOTX_TEXT config SERIAL_TEXT_DEBUG bool "Support for early boot texts over serial port" - depends on 4xx || GT64260 || LOPEC || PPLUS || PRPMC800 || PPC_GEN550 || PPC_MPC52xx + depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \ + PPC_GEN550 || PPC_MPC52xx config PPC_OCP bool diff --git a/arch/ppc/boot/simple/misc-mv64x60.c b/arch/ppc/boot/simple/misc-mv64x60.c index 7e88fc6d207d..258d4599fadc 100644 --- a/arch/ppc/boot/simple/misc-mv64x60.c +++ b/arch/ppc/boot/simple/misc-mv64x60.c @@ -19,6 +19,33 @@ extern struct bi_record *decompress_kernel(unsigned long load_addr, int num_words, unsigned long cksum); + +u32 size_reg[MV64x60_CPU2MEM_WINDOWS] = { + MV64x60_CPU2MEM_0_SIZE, MV64x60_CPU2MEM_1_SIZE, + MV64x60_CPU2MEM_2_SIZE, MV64x60_CPU2MEM_3_SIZE +}; + +/* Read mem ctlr to get the amount of mem in system */ +unsigned long +mv64360_get_mem_size(void) +{ + u32 enables, i, v; + u32 mem = 0; + + enables = in_le32((void __iomem *)CONFIG_MV64X60_NEW_BASE + + MV64360_CPU_BAR_ENABLE) & 0xf; + + for (i=0; i 1)) - mask |= 0x1; /* enable DPErr on 64460 */ - /* Clear old errors and register PCI 0 error intr handler */ mv64x60_write(&bh, MV64x60_PCI0_ERR_CAUSE, 0); if ((rc = request_irq(MV64360_IRQ_PCI0 + mv64360_irq_base, @@ -407,7 +402,11 @@ mv64360_register_hdlrs(void) rc); mv64x60_write(&bh, MV64x60_PCI0_ERR_MASK, 0); - mv64x60_write(&bh, MV64x60_PCI0_ERR_MASK, mask); + mv64x60_write(&bh, MV64x60_PCI0_ERR_MASK, MV64360_PCI0_ERR_MASK_VAL); + + /* Erratum FEr PCI-#16 says to clear bit 0 of PCI SERRn Mask reg. */ + mv64x60_write(&bh, MV64x60_PCI0_ERR_SERR_MASK, + mv64x60_read(&bh, MV64x60_PCI0_ERR_SERR_MASK) & ~0x1UL); /* Clear old errors and register PCI 1 error intr handler */ mv64x60_write(&bh, MV64x60_PCI1_ERR_CAUSE, 0); @@ -418,7 +417,11 @@ mv64360_register_hdlrs(void) rc); mv64x60_write(&bh, MV64x60_PCI1_ERR_MASK, 0); - mv64x60_write(&bh, MV64x60_PCI1_ERR_MASK, mask); + mv64x60_write(&bh, MV64x60_PCI1_ERR_MASK, MV64360_PCI0_ERR_MASK_VAL); + + /* Erratum FEr PCI-#16 says to clear bit 0 of PCI Intr Mask reg. */ + mv64x60_write(&bh, MV64x60_PCI1_ERR_SERR_MASK, + mv64x60_read(&bh, MV64x60_PCI1_ERR_SERR_MASK) & ~0x1UL); return 0; } diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c index cc77177fa1c6..6262b11f366f 100644 --- a/arch/ppc/syslib/mv64x60.c +++ b/arch/ppc/syslib/mv64x60.c @@ -30,13 +30,16 @@ #include -u8 mv64x60_pci_exclude_bridge = 1; +u8 mv64x60_pci_exclude_bridge = 1; spinlock_t mv64x60_lock = SPIN_LOCK_UNLOCKED; -static phys_addr_t mv64x60_bridge_pbase = 0; -static void *mv64x60_bridge_vbase = 0; +static phys_addr_t mv64x60_bridge_pbase; +static void *mv64x60_bridge_vbase; static u32 mv64x60_bridge_type = MV64x60_TYPE_INVALID; -static u32 mv64x60_bridge_rev = 0; +static u32 mv64x60_bridge_rev; +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) +static struct pci_controller sysfs_hose_a; +#endif static u32 gt64260_translate_size(u32 base, u32 size, u32 num_bits); static u32 gt64260_untranslate_size(u32 base, u32 size, u32 num_bits); @@ -432,6 +435,20 @@ static struct platform_device i2c_device = { }; #endif +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) +static struct mv64xxx_pdata mv64xxx_pdata = { + .hs_reg_valid = 0, +}; + +static struct platform_device mv64xxx_device = { /* general mv64x60 stuff */ + .name = MV64XXX_DEV_NAME, + .id = 0, + .dev = { + .platform_data = &mv64xxx_pdata, + }, +}; +#endif + static struct platform_device *mv64x60_pd_devs[] __initdata = { #ifdef CONFIG_SERIAL_MPSC &mpsc_shared_device, @@ -453,6 +470,9 @@ static struct platform_device *mv64x60_pd_devs[] __initdata = { #ifdef CONFIG_I2C_MV64XXX &i2c_device, #endif +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) + &mv64xxx_device, +#endif }; /* @@ -574,6 +594,11 @@ mv64x60_early_init(struct mv64x60_handle *bh, struct mv64x60_setup_info *si) bh->hose_a = &hose_a; bh->hose_b = &hose_b; +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) + /* Save a copy of hose_a for sysfs functions -- hack */ + memcpy(&sysfs_hose_a, &hose_a, sizeof(hose_a)); +#endif + mv64x60_set_bus(bh, 0, 0); mv64x60_set_bus(bh, 1, 0); @@ -590,8 +615,6 @@ mv64x60_early_init(struct mv64x60_handle *bh, struct mv64x60_setup_info *si) mv64x60_set_bits(bh, MV64x60_PCI0_TO_RETRY, 0xffff); mv64x60_set_bits(bh, MV64x60_PCI1_TO_RETRY, 0xffff); - - return; } /* @@ -628,19 +651,15 @@ mv64x60_get_32bit_window(struct mv64x60_handle *bh, u32 window, val = mv64x60_read(bh, size_reg); val = get_from_field(val, size_bits); *size = bh->ci->untranslate_size(*base, val, size_bits); - } - else + } else *size = 0; - } - else { + } else { *base = 0; *size = 0; } pr_debug("get 32bit window: %d, base: 0x%x, size: 0x%x\n", window, *base, *size); - - return; } /* @@ -677,8 +696,6 @@ mv64x60_set_32bit_window(struct mv64x60_handle *bh, u32 window, (void)mv64x60_read(bh, base_reg); /* Flush FIFO */ } - - return; } /* @@ -712,11 +729,9 @@ mv64x60_get_64bit_window(struct mv64x60_handle *bh, u32 window, val = get_from_field(val, size_bits); *size = bh->ci->untranslate_size(*base_lo, val, size_bits); - } - else + } else *size = 0; - } - else { + } else { *base_hi = 0; *base_lo = 0; *size = 0; @@ -724,8 +739,6 @@ mv64x60_get_64bit_window(struct mv64x60_handle *bh, u32 window, pr_debug("get 64bit window: %d, base hi: 0x%x, base lo: 0x%x, " "size: 0x%x\n", window, *base_hi, *base_lo, *size); - - return; } /* @@ -766,8 +779,6 @@ mv64x60_set_64bit_window(struct mv64x60_handle *bh, u32 window, (void)mv64x60_read(bh, base_lo_reg); /* Flush FIFO */ } - - return; } /* @@ -1008,8 +1019,6 @@ mv64x60_get_mem_windows(struct mv64x60_handle *bh, mem_windows[i][0] = 0; mem_windows[i][1] = 0; } - - return; } /* @@ -1077,8 +1086,6 @@ mv64x60_config_cpu2mem_windows(struct mv64x60_handle *bh, } } - - return; } /* @@ -1112,8 +1119,7 @@ mv64x60_config_cpu2pci_windows(struct mv64x60_handle *bh, mv64x60_set_32bit_window(bh, remap_tab[bus][0], pi->pci_io.pci_base_lo, 0, 0); bh->ci->enable_window_32bit(bh, win_tab[bus][0]); - } - else /* Actually, the window should already be disabled */ + } else /* Actually, the window should already be disabled */ bh->ci->disable_window_32bit(bh, win_tab[bus][0]); for (i=0; i<3; i++) @@ -1125,11 +1131,8 @@ mv64x60_config_cpu2pci_windows(struct mv64x60_handle *bh, pi->pci_mem[i].pci_base_hi, pi->pci_mem[i].pci_base_lo, 0, 0); bh->ci->enable_window_32bit(bh, win_tab[bus][i+1]); - } - else /* Actually, the window should already be disabled */ + } else /* Actually, the window should already be disabled */ bh->ci->disable_window_32bit(bh, win_tab[bus][i+1]); - - return; } /* @@ -1206,8 +1209,6 @@ mv64x60_config_pci2mem_windows(struct mv64x60_handle *bh, MV64x60_PCI0_BAR_ENABLE : MV64x60_PCI1_BAR_ENABLE), (1 << i)); } - - return; } /* @@ -1229,7 +1230,6 @@ mv64x60_alloc_hose(struct mv64x60_handle *bh, u32 cfg_addr, u32 cfg_data, *hose = pcibios_alloc_controller(); setup_indirect_pci_nomap(*hose, bh->v_base + cfg_addr, bh->v_base + cfg_data); - return; } /* @@ -1272,7 +1272,6 @@ mv64x60_config_resources(struct pci_controller *hose, pi->pci_mem[0].size - 1; hose->pci_mem_offset = pi->pci_mem[0].cpu_base - pi->pci_mem[0].pci_base_lo; - return; } /* @@ -1309,7 +1308,6 @@ mv64x60_config_pci_params(struct pci_controller *hose, early_write_config_word(hose, 0, devfn, PCI_CACHE_LINE_SIZE, u16_val); mv64x60_pci_exclude_bridge = save_exclude; - return; } /* @@ -1336,8 +1334,7 @@ mv64x60_set_bus(struct mv64x60_handle *bh, u32 bus, u32 child_bus) p2p_cfg = MV64x60_PCI0_P2P_CONFIG; pci_cfg_offset = 0x64; hose = bh->hose_a; - } - else { + } else { pci_mode = bh->pci_mode_b; p2p_cfg = MV64x60_PCI1_P2P_CONFIG; pci_cfg_offset = 0xe4; @@ -1352,8 +1349,7 @@ mv64x60_set_bus(struct mv64x60_handle *bh, u32 bus, u32 child_bus) val |= (child_bus << 16) | 0xff; mv64x60_write(bh, p2p_cfg, val); (void)mv64x60_read(bh, p2p_cfg); /* Flush FIFO */ - } - else { /* PCI-X */ + } else { /* PCI-X */ /* * Need to use the current bus/dev number (that's in the * P2P CONFIG reg) to access the bridge's pci config space. @@ -1365,8 +1361,6 @@ mv64x60_set_bus(struct mv64x60_handle *bh, u32 bus, u32 child_bus) pci_cfg_offset, child_bus << 8); mv64x60_pci_exclude_bridge = save_exclude; } - - return; } /* @@ -1423,8 +1417,6 @@ mv64x60_pd_fixup(struct mv64x60_handle *bh, struct platform_device *pd_devs[], j++; } } - - return; } /* @@ -1498,8 +1490,6 @@ gt64260_set_pci2mem_window(struct pci_controller *hose, u32 bus, u32 window, early_write_config_dword(hose, 0, PCI_DEVFN(0, 0), gt64260_reg_addrs[bus][window], mv64x60_mask(base, 20) | 0x8); mv64x60_pci_exclude_bridge = save_exclude; - - return; } /* @@ -1523,8 +1513,6 @@ gt64260_set_pci2regs_window(struct mv64x60_handle *bh, early_write_config_dword(hose, 0, PCI_DEVFN(0,0), gt64260_offset[bus], (base << 16)); mv64x60_pci_exclude_bridge = save_exclude; - - return; } /* @@ -1561,7 +1549,6 @@ static void __init gt64260_enable_window_32bit(struct mv64x60_handle *bh, u32 window) { pr_debug("enable 32bit window: %d\n", window); - return; } /* @@ -1584,8 +1571,6 @@ gt64260_disable_window_32bit(struct mv64x60_handle *bh, u32 window) mv64x60_write(bh, gt64260_32bit_windows[window].base_reg,0xfff); mv64x60_write(bh, gt64260_32bit_windows[window].size_reg, 0); } - - return; } /* @@ -1599,7 +1584,6 @@ static void __init gt64260_enable_window_64bit(struct mv64x60_handle *bh, u32 window) { pr_debug("enable 64bit window: %d\n", window); - return; /* Enabled when window configured (i.e., when top >= base) */ } /* @@ -1624,8 +1608,6 @@ gt64260_disable_window_64bit(struct mv64x60_handle *bh, u32 window) mv64x60_write(bh, gt64260_64bit_windows[window].base_hi_reg, 0); mv64x60_write(bh, gt64260_64bit_windows[window].size_reg, 0); } - - return; } /* @@ -1712,8 +1694,6 @@ gt64260_disable_all_windows(struct mv64x60_handle *bh, mv64x60_write(bh, GT64260_IC_CPU_INT_1_MASK, 0); mv64x60_write(bh, GT64260_IC_CPU_INT_2_MASK, 0); mv64x60_write(bh, GT64260_IC_CPU_INT_3_MASK, 0); - - return; } /* @@ -1781,14 +1761,11 @@ gt64260a_chip_specific_init(struct mv64x60_handle *bh, mv64x60_mpsc1_pdata.cache_mgmt = 1; if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 0)) - != NULL) { - + != NULL) { r->start = MV64x60_IRQ_SDMA_0; r->end = MV64x60_IRQ_SDMA_0; } #endif - - return; } /* @@ -1861,14 +1838,11 @@ gt64260b_chip_specific_init(struct mv64x60_handle *bh, mv64x60_mpsc1_pdata.cache_mgmt = 1; if ((r = platform_get_resource(&mpsc1_device, IORESOURCE_IRQ, 0)) - != NULL) { - + != NULL) { r->start = MV64x60_IRQ_SDMA_0; r->end = MV64x60_IRQ_SDMA_0; } #endif - - return; } /* @@ -1945,8 +1919,6 @@ mv64360_set_pci2mem_window(struct pci_controller *hose, u32 bus, u32 window, mv64360_reg_addrs[bus][window].base_lo_bar, mv64x60_mask(base,20) | 0xc); mv64x60_pci_exclude_bridge = save_exclude; - - return; } /* @@ -1972,8 +1944,6 @@ mv64360_set_pci2regs_window(struct mv64x60_handle *bh, early_write_config_dword(hose, 0, PCI_DEVFN(0,0), mv64360_offset[bus][1], 0); mv64x60_pci_exclude_bridge = save_exclude; - - return; } /* @@ -2082,8 +2052,6 @@ mv64360_enable_window_32bit(struct mv64x60_handle *bh, u32 window) "32bit table corrupted"); } } - - return; } /* @@ -2139,8 +2107,6 @@ mv64360_disable_window_32bit(struct mv64x60_handle *bh, u32 window) "32bit table corrupted"); } } - - return; } /* @@ -2158,8 +2124,7 @@ mv64360_enable_window_64bit(struct mv64x60_handle *bh, u32 window) (mv64360_64bit_windows[window].size_reg != 0)) { if ((mv64360_64bit_windows[window].extra & MV64x60_EXTRA_MASK) - == MV64x60_EXTRA_PCIACC_ENAB) - + == MV64x60_EXTRA_PCIACC_ENAB) mv64x60_set_bits(bh, mv64360_64bit_windows[window].base_lo_reg, (1 << (mv64360_64bit_windows[window].extra & @@ -2168,8 +2133,6 @@ mv64360_enable_window_64bit(struct mv64x60_handle *bh, u32 window) printk(KERN_ERR "mv64360_enable: %s\n", "64bit table corrupted"); } - - return; } /* @@ -2186,11 +2149,9 @@ mv64360_disable_window_64bit(struct mv64x60_handle *bh, u32 window) mv64360_64bit_windows[window].size_reg); if ((mv64360_64bit_windows[window].base_lo_reg != 0) && - (mv64360_64bit_windows[window].size_reg != 0)) { - + (mv64360_64bit_windows[window].size_reg != 0)) { if ((mv64360_64bit_windows[window].extra & MV64x60_EXTRA_MASK) - == MV64x60_EXTRA_PCIACC_ENAB) - + == MV64x60_EXTRA_PCIACC_ENAB) mv64x60_clr_bits(bh, mv64360_64bit_windows[window].base_lo_reg, (1 << (mv64360_64bit_windows[window].extra & @@ -2199,8 +2160,6 @@ mv64360_disable_window_64bit(struct mv64x60_handle *bh, u32 window) printk(KERN_ERR "mv64360_disable: %s\n", "64bit table corrupted"); } - - return; } /* @@ -2241,8 +2200,6 @@ mv64360_disable_all_windows(struct mv64x60_handle *bh, /* Disable all PCI-> windows */ mv64x60_set_bits(bh, MV64x60_PCI0_BAR_ENABLE, 0x0000f9ff); mv64x60_set_bits(bh, MV64x60_PCI1_BAR_ENABLE, 0x0000f9ff); - - return; } /* @@ -2335,8 +2292,6 @@ mv64360_config_io2mem_windows(struct mv64x60_handle *bh, mv64x60_set_bits(bh, MV64360_IDMA2MEM_ACC_PROT_3, (0x3 << (i << 1))); } - - return; } /* @@ -2350,42 +2305,145 @@ static void __init mv64360_set_mpsc2regs_window(struct mv64x60_handle *bh, u32 base) { pr_debug("set mpsc->internal regs, base: 0x%x\n", base); - mv64x60_write(bh, MV64360_MPSC2REGS_BASE, base & 0xffff0000); - return; } /* * mv64360_chip_specific_init() * - * No errata work arounds for the MV64360 implemented at this point. + * Implement errata work arounds for the MV64360. */ static void __init mv64360_chip_specific_init(struct mv64x60_handle *bh, struct mv64x60_setup_info *si) { +#if !defined(CONFIG_NOT_COHERENT_CACHE) + mv64x60_set_bits(bh, MV64360_D_UNIT_CONTROL_HIGH, (1<<24)); +#endif #ifdef CONFIG_SERIAL_MPSC mv64x60_mpsc0_pdata.brg_can_tune = 1; mv64x60_mpsc0_pdata.cache_mgmt = 1; mv64x60_mpsc1_pdata.brg_can_tune = 1; mv64x60_mpsc1_pdata.cache_mgmt = 1; #endif - - return; } /* * mv64460_chip_specific_init() * - * No errata work arounds for the MV64460 implemented at this point. + * Implement errata work arounds for the MV64460. */ static void __init mv64460_chip_specific_init(struct mv64x60_handle *bh, struct mv64x60_setup_info *si) { +#if !defined(CONFIG_NOT_COHERENT_CACHE) + mv64x60_set_bits(bh, MV64360_D_UNIT_CONTROL_HIGH, (1<<24) | (1<<25)); + mv64x60_set_bits(bh, MV64460_D_UNIT_MMASK, (1<<1) | (1<<4)); +#endif #ifdef CONFIG_SERIAL_MPSC mv64x60_mpsc0_pdata.brg_can_tune = 1; + mv64x60_mpsc0_pdata.cache_mgmt = 1; mv64x60_mpsc1_pdata.brg_can_tune = 1; + mv64x60_mpsc1_pdata.cache_mgmt = 1; #endif - return; } + + +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) +/* Export the hotswap register via sysfs for enum event monitoring */ +#define VAL_LEN_MAX 11 /* 32-bit hex or dec stringified number + '\n' */ + +DECLARE_MUTEX(mv64xxx_hs_lock); + +static ssize_t +mv64xxx_hs_reg_read(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + u32 v; + u8 save_exclude; + + if (off > 0) + return 0; + if (count < VAL_LEN_MAX) + return -EINVAL; + + if (down_interruptible(&mv64xxx_hs_lock)) + return -ERESTARTSYS; + save_exclude = mv64x60_pci_exclude_bridge; + mv64x60_pci_exclude_bridge = 0; + early_read_config_dword(&sysfs_hose_a, 0, PCI_DEVFN(0, 0), + MV64360_PCICFG_CPCI_HOTSWAP, &v); + mv64x60_pci_exclude_bridge = save_exclude; + up(&mv64xxx_hs_lock); + + return sprintf(buf, "0x%08x\n", v); +} + +static ssize_t +mv64xxx_hs_reg_write(struct kobject *kobj, char *buf, loff_t off, size_t count) +{ + u32 v; + u8 save_exclude; + + if (off > 0) + return 0; + if (count <= 0) + return -EINVAL; + + if (sscanf(buf, "%i", &v) == 1) { + if (down_interruptible(&mv64xxx_hs_lock)) + return -ERESTARTSYS; + save_exclude = mv64x60_pci_exclude_bridge; + mv64x60_pci_exclude_bridge = 0; + early_write_config_dword(&sysfs_hose_a, 0, PCI_DEVFN(0, 0), + MV64360_PCICFG_CPCI_HOTSWAP, v); + mv64x60_pci_exclude_bridge = save_exclude; + up(&mv64xxx_hs_lock); + } + else + count = -EINVAL; + + return count; +} + +static struct bin_attribute mv64xxx_hs_reg_attr = { /* Hotswap register */ + .attr = { + .name = "hs_reg", + .mode = S_IRUGO | S_IWUSR, + .owner = THIS_MODULE, + }, + .size = VAL_LEN_MAX, + .read = mv64xxx_hs_reg_read, + .write = mv64xxx_hs_reg_write, +}; + +/* Provide sysfs file indicating if this platform supports the hs_reg */ +static ssize_t +mv64xxx_hs_reg_valid_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct platform_device *pdev; + struct mv64xxx_pdata *pdp; + u32 v; + + pdev = container_of(dev, struct platform_device, dev); + pdp = (struct mv64xxx_pdata *)pdev->dev.platform_data; + + if (down_interruptible(&mv64xxx_hs_lock)) + return -ERESTARTSYS; + v = pdp->hs_reg_valid; + up(&mv64xxx_hs_lock); + + return sprintf(buf, "%i\n", v); +} +static DEVICE_ATTR(hs_reg_valid, S_IRUGO, mv64xxx_hs_reg_valid_show, NULL); + +static int __init +mv64xxx_sysfs_init(void) +{ + sysfs_create_bin_file(&mv64xxx_device.dev.kobj, &mv64xxx_hs_reg_attr); + sysfs_create_file(&mv64xxx_device.dev.kobj,&dev_attr_hs_reg_valid.attr); + return 0; +} +subsys_initcall(mv64xxx_sysfs_init); +#endif diff --git a/include/asm-ppc/mv64x60.h b/include/asm-ppc/mv64x60.h index cc25b921ad4f..835930d6faa1 100644 --- a/include/asm-ppc/mv64x60.h +++ b/include/asm-ppc/mv64x60.h @@ -278,6 +278,13 @@ mv64x60_modify(struct mv64x60_handle *bh, u32 offs, u32 data, u32 mask) #define mv64x60_set_bits(bh, offs, bits) mv64x60_modify(bh, offs, ~0, bits) #define mv64x60_clr_bits(bh, offs, bits) mv64x60_modify(bh, offs, 0, bits) +#if defined(CONFIG_SYSFS) && !defined(CONFIG_GT64260) +#define MV64XXX_DEV_NAME "mv64xxx" + +struct mv64xxx_pdata { + u32 hs_reg_valid; +}; +#endif /* Externally visible function prototypes */ int mv64x60_init(struct mv64x60_handle *bh, struct mv64x60_setup_info *si); diff --git a/include/asm-ppc/mv64x60_defs.h b/include/asm-ppc/mv64x60_defs.h index 2f428746c02b..f8f7f16b9b53 100644 --- a/include/asm-ppc/mv64x60_defs.h +++ b/include/asm-ppc/mv64x60_defs.h @@ -333,7 +333,7 @@ /* ***************************************************************************** * - * SRAM Cotnroller Registers + * SRAM Controller Registers * ***************************************************************************** */ @@ -352,7 +352,7 @@ /* ***************************************************************************** * - * SDRAM/MEM Cotnroller Registers + * SDRAM/MEM Controller Registers * ***************************************************************************** */ @@ -375,6 +375,7 @@ /* SDRAM Control Registers */ #define MV64360_D_UNIT_CONTROL_LOW 0x1404 #define MV64360_D_UNIT_CONTROL_HIGH 0x1424 +#define MV64460_D_UNIT_MMASK 0x14b0 /* SDRAM Error Report Registers (64360) */ #define MV64360_SDRAM_ERR_DATA_LO 0x1444 @@ -388,7 +389,7 @@ /* ***************************************************************************** * - * Device/BOOT Cotnroller Registers + * Device/BOOT Controller Registers * ***************************************************************************** */ @@ -680,6 +681,8 @@ #define MV64x60_PCI1_SLAVE_P2P_IO_REMAP 0x0dec #define MV64x60_PCI1_SLAVE_CPU_REMAP 0x0df0 +#define MV64360_PCICFG_CPCI_HOTSWAP 0x68 + /* ***************************************************************************** * diff --git a/include/linux/mv643xx.h b/include/linux/mv643xx.h index 5773ea42f6e4..0b08cd692201 100644 --- a/include/linux/mv643xx.h +++ b/include/linux/mv643xx.h @@ -980,7 +980,7 @@ /* I2C Registers */ /****************************************/ -#define MV64XXX_I2C_CTLR_NAME "mv64xxx i2c" +#define MV64XXX_I2C_CTLR_NAME "mv64xxx_i2c" #define MV64XXX_I2C_OFFSET 0xc000 #define MV64XXX_I2C_REG_BLOCK_SIZE 0x0020 -- cgit v1.2.3 From dc4ec916f6ea0c2818e9b81c4e9b33231f5f70e4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Sat, 3 Sep 2005 15:56:12 -0700 Subject: [PATCH] MIPS Technologies PCI ID bits - MIPS Denmark does no longer exist; the PCI vendor ID is now owned by MIPS Technologies. - Add ID for SOC-it, MIPS's system controller. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pci_ids.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d513c1634006..95c941f8c747 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2147,6 +2147,9 @@ #define PCI_DEVICE_ID_ENE_1420 0x1420 #define PCI_VENDOR_ID_CHELSIO 0x1425 +#define PCI_VENDOR_ID_MIPS 0x153f +#define PCI_DEVICE_ID_SOC_IT 0x0001 + #define PCI_VENDOR_ID_SYBA 0x1592 #define PCI_DEVICE_ID_SYBA_2P_EPP 0x0782 #define PCI_DEVICE_ID_SYBA_1P_ECP 0x0783 -- cgit v1.2.3 From 7ae65fd334232468a9d6b523a4fc141cd6ec5ea4 Mon Sep 17 00:00:00 2001 From: Matt Tolentino Date: Sat, 3 Sep 2005 15:56:27 -0700 Subject: [PATCH] x86: fix EFI memory map parsing The memory descriptors that comprise the EFI memory map are not fixed in stone such that the size could change in the future. This uses the memory descriptor size obtained from EFI to iterate over the memory map entries during boot. This enables the removal of an x86 specific pad (and ifdef) in the EFI header. I also couldn't stomach the broken up nature of the function to put EFI runtime calls into virtual mode any longer so I fixed that up a bit as well. For reference, this patch only impacts x86. Signed-off-by: Matt Tolentino Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/efi.c | 101 +++++++++++++++++++++++------------------------ arch/i386/kernel/setup.c | 14 ++++--- arch/i386/mm/init.c | 5 ++- include/asm-i386/setup.h | 2 +- include/linux/efi.h | 14 ++----- 5 files changed, 67 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 385883ea8c19..850648ae8305 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -233,22 +233,23 @@ void __init efi_map_memmap(void) { memmap.map = NULL; - memmap.map = (efi_memory_desc_t *) - bt_ioremap((unsigned long) memmap.phys_map, - (memmap.nr_map * sizeof(efi_memory_desc_t))); - + memmap.map = bt_ioremap((unsigned long) memmap.phys_map, + (memmap.nr_map * memmap.desc_size)); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not remap the EFI memmap!\n"); + + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); } #if EFI_DEBUG static void __init print_efi_memmap(void) { efi_memory_desc_t *md; + void *p; int i; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) { + md = p; printk(KERN_INFO "mem%02u: type=%u, attr=0x%llx, " "range=[0x%016llx-0x%016llx) (%lluMB)\n", i, md->type, md->attribute, md->phys_addr, @@ -271,10 +272,10 @@ void efi_memmap_walk(efi_freemem_callback_t callback, void *arg) } prev, curr; efi_memory_desc_t *md; unsigned long start, end; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->num_pages == 0) || (!is_available_memory(md))) continue; @@ -325,6 +326,7 @@ void __init efi_init(void) memmap.phys_map = EFI_MEMMAP; memmap.nr_map = EFI_MEMMAP_SIZE/EFI_MEMDESC_SIZE; memmap.desc_version = EFI_MEMDESC_VERSION; + memmap.desc_size = EFI_MEMDESC_SIZE; efi.systab = (efi_system_table_t *) boot_ioremap((unsigned long) efi_phys.systab, @@ -428,22 +430,30 @@ void __init efi_init(void) printk(KERN_ERR PFX "Could not map the runtime service table!\n"); /* Map the EFI memory map for use until paging_init() */ - - memmap.map = (efi_memory_desc_t *) - boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); - + memmap.map = boot_ioremap((unsigned long) EFI_MEMMAP, EFI_MEMMAP_SIZE); if (memmap.map == NULL) printk(KERN_ERR PFX "Could not map the EFI memory map!\n"); - if (EFI_MEMDESC_SIZE != sizeof(efi_memory_desc_t)) { - printk(KERN_WARNING PFX "Warning! Kernel-defined memdesc doesn't " - "match the one from EFI!\n"); - } + memmap.map_end = memmap.map + (memmap.nr_map * memmap.desc_size); + #if EFI_DEBUG print_efi_memmap(); #endif } +static inline void __init check_range_for_systab(efi_memory_desc_t *md) +{ + if (((unsigned long)md->phys_addr <= (unsigned long)efi_phys.systab) && + ((unsigned long)efi_phys.systab < md->phys_addr + + ((unsigned long)md->num_pages << EFI_PAGE_SHIFT))) { + unsigned long addr; + + addr = md->virt_addr - md->phys_addr + + (unsigned long)efi_phys.systab; + efi.systab = (efi_system_table_t *)addr; + } +} + /* * This function will switch the EFI runtime services to virtual mode. * Essentially, look through the EFI memmap and map every region that @@ -457,43 +467,32 @@ void __init efi_enter_virtual_mode(void) { efi_memory_desc_t *md; efi_status_t status; - int i; + void *p; efi.systab = NULL; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; - if (md->attribute & EFI_MEMORY_RUNTIME) { - md->virt_addr = - (unsigned long)ioremap(md->phys_addr, - md->num_pages << EFI_PAGE_SHIFT); - if (!(unsigned long)md->virt_addr) { - printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", - (unsigned long)md->phys_addr); - } + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; - if (((unsigned long)md->phys_addr <= - (unsigned long)efi_phys.systab) && - ((unsigned long)efi_phys.systab < - md->phys_addr + - ((unsigned long)md->num_pages << - EFI_PAGE_SHIFT))) { - unsigned long addr; - - addr = md->virt_addr - md->phys_addr + - (unsigned long)efi_phys.systab; - efi.systab = (efi_system_table_t *)addr; - } + md->virt_addr = (unsigned long)ioremap(md->phys_addr, + md->num_pages << EFI_PAGE_SHIFT); + if (!(unsigned long)md->virt_addr) { + printk(KERN_ERR PFX "ioremap of 0x%lX failed\n", + (unsigned long)md->phys_addr); } + /* update the virtual address of the EFI system table */ + check_range_for_systab(md); } if (!efi.systab) BUG(); status = phys_efi_set_virtual_address_map( - sizeof(efi_memory_desc_t) * memmap.nr_map, - sizeof(efi_memory_desc_t), + memmap.desc_size * memmap.nr_map, + memmap.desc_size, memmap.desc_version, memmap.phys_map); @@ -533,10 +532,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, { struct resource *res; efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) @@ -613,10 +612,10 @@ efi_initialize_iomem_resources(struct resource *code_resource, u32 efi_mem_type(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->type; @@ -627,10 +626,10 @@ u32 efi_mem_type(unsigned long phys_addr) u64 efi_mem_attributes(unsigned long phys_addr) { efi_memory_desc_t *md; - int i; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if ((md->phys_addr <= phys_addr) && (phys_addr < (md->phys_addr + (md-> num_pages << EFI_PAGE_SHIFT)) )) return md->attribute; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index af4de58cab54..9adbf710ec8d 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -370,12 +370,16 @@ static void __init limit_regions(unsigned long long size) int i; if (efi_enabled) { - for (i = 0; i < memmap.nr_map; i++) { - current_addr = memmap.map[i].phys_addr + - (memmap.map[i].num_pages << 12); - if (memmap.map[i].type == EFI_CONVENTIONAL_MEMORY) { + efi_memory_desc_t *md; + void *p; + + for (p = memmap.map, i = 0; p < memmap.map_end; + p += memmap.desc_size, i++) { + md = p; + current_addr = md->phys_addr + (md->num_pages << 12); + if (md->type == EFI_CONVENTIONAL_MEMORY) { if (current_addr >= size) { - memmap.map[i].num_pages -= + md->num_pages -= (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT); memmap.nr_map = i + 1; return; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 12216b52e28b..d8b23ab76533 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -198,9 +198,10 @@ int page_is_ram(unsigned long pagenr) if (efi_enabled) { efi_memory_desc_t *md; + void *p; - for (i = 0; i < memmap.nr_map; i++) { - md = &memmap.map[i]; + for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + md = p; if (!is_available_memory(md)) continue; addr = (md->phys_addr+PAGE_SIZE-1) >> PAGE_SHIFT; diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 7a32184d54bf..826a8ca50ac8 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h @@ -44,7 +44,7 @@ extern unsigned char boot_params[PARAM_SIZE]; #define EFI_SYSTAB ((efi_system_table_t *) *((unsigned long *)(PARAM+0x1c4))) #define EFI_MEMDESC_SIZE (*((unsigned long *) (PARAM+0x1c8))) #define EFI_MEMDESC_VERSION (*((unsigned long *) (PARAM+0x1cc))) -#define EFI_MEMMAP ((efi_memory_desc_t *) *((unsigned long *)(PARAM+0x1d0))) +#define EFI_MEMMAP ((void *) *((unsigned long *)(PARAM+0x1d0))) #define EFI_MEMMAP_SIZE (*((unsigned long *) (PARAM+0x1d4))) #define MOUNT_ROOT_RDONLY (*(unsigned short *) (PARAM+0x1F2)) #define RAMDISK_FLAGS (*(unsigned short *) (PARAM+0x1F8)) diff --git a/include/linux/efi.h b/include/linux/efi.h index 73781ec165b4..c7c5dd316182 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -91,11 +91,6 @@ typedef struct { #define EFI_PAGE_SHIFT 12 -/* - * For current x86 implementations of EFI, there is - * additional padding in the mem descriptors. This is not - * the case in ia64. Need to have this fixed in the f/w. - */ typedef struct { u32 type; u32 pad; @@ -103,9 +98,6 @@ typedef struct { u64 virt_addr; u64 num_pages; u64 attribute; -#if defined (__i386__) - u64 pad1; -#endif } efi_memory_desc_t; typedef int (*efi_freemem_callback_t) (unsigned long start, unsigned long end, void *arg); @@ -240,10 +232,12 @@ typedef struct { } efi_system_table_t; struct efi_memory_map { - efi_memory_desc_t *phys_map; - efi_memory_desc_t *map; + void *phys_map; + void *map; + void *map_end; int nr_map; unsigned long desc_version; + unsigned long desc_size; }; /* -- cgit v1.2.3 From ca078bae813dd46c0f9b102fdfb4a3384641ff48 Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 3 Sep 2005 15:56:57 -0700 Subject: [PATCH] swsusp: switch pm_message_t to struct This adds type-checking to pm_message_t, so that people can't confuse it with int or u32. It also allows us to fix "disk yoyo" during suspend (disk spinning down/up/down). [We've tried that before; since that cpufreq problems were fixed and I've tried make allyes config and fixed resulting damage.] Signed-off-by: Pavel Machek Signed-off-by: Alexander Nyberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc/syslib/of_device.c | 2 +- drivers/base/power/resume.c | 8 ++++---- drivers/base/power/runtime.c | 8 ++++---- drivers/base/power/suspend.c | 12 ++++++------ drivers/base/power/sysfs.c | 8 ++++---- drivers/ide/ide.c | 4 ++-- drivers/ide/pci/sc1200.c | 12 ++++++------ drivers/ide/ppc/pmac.c | 8 ++++---- drivers/macintosh/mediabay.c | 4 ++-- drivers/macintosh/via-pmu.c | 2 +- drivers/media/dvb/cinergyT2/cinergyT2.c | 2 +- drivers/net/irda/vlsi_ir.c | 10 +++++----- drivers/net/wireless/airo.c | 10 +++++----- drivers/pci/pci.c | 14 +++++++++----- drivers/scsi/mesh.c | 6 +++--- drivers/serial/pmac_zilog.c | 6 +++--- drivers/usb/core/hub.c | 18 +++++++++--------- drivers/usb/core/usb.c | 2 +- drivers/usb/host/ehci-dbg.c | 2 +- drivers/usb/host/ohci-dbg.c | 2 +- drivers/usb/host/sl811-hcd.c | 6 +++--- drivers/usb/misc/usbtest.c | 2 +- drivers/video/aty/aty128fb.c | 14 ++++++-------- drivers/video/aty/atyfb_base.c | 11 +++++------ drivers/video/aty/radeon_pm.c | 12 ++++++------ drivers/video/chipsfb.c | 4 ++-- drivers/video/i810/i810_main.c | 6 +++--- include/linux/pm.h | 14 ++++++++++---- 28 files changed, 108 insertions(+), 101 deletions(-) (limited to 'include/linux') diff --git a/arch/ppc/syslib/of_device.c b/arch/ppc/syslib/of_device.c index 1eb4f726ca9f..da8a0f2128dc 100644 --- a/arch/ppc/syslib/of_device.c +++ b/arch/ppc/syslib/of_device.c @@ -105,7 +105,7 @@ static int of_device_remove(struct device *dev) return 0; } -static int of_device_suspend(struct device *dev, u32 state) +static int of_device_suspend(struct device *dev, pm_message_t state) { struct of_device * of_dev = to_of_device(dev); struct of_platform_driver * drv = to_of_platform_driver(dev->driver); diff --git a/drivers/base/power/resume.c b/drivers/base/power/resume.c index bdd96b03b885..0a7aa07b9a2a 100644 --- a/drivers/base/power/resume.c +++ b/drivers/base/power/resume.c @@ -26,11 +26,11 @@ int resume_device(struct device * dev) down(&dev->sem); if (dev->power.pm_parent - && dev->power.pm_parent->power.power_state) { + && dev->power.pm_parent->power.power_state.event) { dev_err(dev, "PM: resume from %d, parent %s still %d\n", - dev->power.power_state, + dev->power.power_state.event, dev->power.pm_parent->bus_id, - dev->power.pm_parent->power.power_state); + dev->power.pm_parent->power.power_state.event); } if (dev->bus && dev->bus->resume) { dev_dbg(dev,"resuming\n"); @@ -54,7 +54,7 @@ void dpm_resume(void) list_add_tail(entry, &dpm_active); up(&dpm_list_sem); - if (!dev->power.prev_state) + if (!dev->power.prev_state.event) resume_device(dev); down(&dpm_list_sem); put_device(dev); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 325962d80191..e8f0519f5dfa 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -13,10 +13,10 @@ static void runtime_resume(struct device * dev) { dev_dbg(dev, "resuming\n"); - if (!dev->power.power_state) + if (!dev->power.power_state.event) return; if (!resume_device(dev)) - dev->power.power_state = 0; + dev->power.power_state = PMSG_ON; } @@ -49,10 +49,10 @@ int dpm_runtime_suspend(struct device * dev, pm_message_t state) int error = 0; down(&dpm_sem); - if (dev->power.power_state == state) + if (dev->power.power_state.event == state.event) goto Done; - if (dev->power.power_state) + if (dev->power.power_state.event) runtime_resume(dev); if (!(error = suspend_device(dev, state))) diff --git a/drivers/base/power/suspend.c b/drivers/base/power/suspend.c index 2ccee3763acf..50501764d050 100644 --- a/drivers/base/power/suspend.c +++ b/drivers/base/power/suspend.c @@ -40,22 +40,22 @@ int suspend_device(struct device * dev, pm_message_t state) int error = 0; down(&dev->sem); - if (dev->power.power_state) { + if (dev->power.power_state.event) { dev_dbg(dev, "PM: suspend %d-->%d\n", - dev->power.power_state, state); + dev->power.power_state.event, state.event); } if (dev->power.pm_parent - && dev->power.pm_parent->power.power_state) { + && dev->power.pm_parent->power.power_state.event) { dev_err(dev, "PM: suspend %d->%d, parent %s already %d\n", - dev->power.power_state, state, + dev->power.power_state.event, state.event, dev->power.pm_parent->bus_id, - dev->power.pm_parent->power.power_state); + dev->power.pm_parent->power.power_state.event); } dev->power.prev_state = dev->power.power_state; - if (dev->bus && dev->bus->suspend && !dev->power.power_state) { + if (dev->bus && dev->bus->suspend && !dev->power.power_state.event) { dev_dbg(dev, "suspending\n"); error = dev->bus->suspend(dev, state); } diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c index f82b3df9545f..8d04fb435c17 100644 --- a/drivers/base/power/sysfs.c +++ b/drivers/base/power/sysfs.c @@ -26,19 +26,19 @@ static ssize_t state_show(struct device * dev, struct device_attribute *attr, char * buf) { - return sprintf(buf, "%u\n", dev->power.power_state); + return sprintf(buf, "%u\n", dev->power.power_state.event); } static ssize_t state_store(struct device * dev, struct device_attribute *attr, const char * buf, size_t n) { - u32 state; + pm_message_t state; char * rest; int error = 0; - state = simple_strtoul(buf, &rest, 10); + state.event = simple_strtoul(buf, &rest, 10); if (*rest) return -EINVAL; - if (state) + if (state.event) error = dpm_runtime_suspend(dev, state); else dpm_runtime_resume(dev); diff --git a/drivers/ide/ide.c b/drivers/ide/ide.c index dae1bd5b8c3e..73ca8f73917d 100644 --- a/drivers/ide/ide.c +++ b/drivers/ide/ide.c @@ -1229,7 +1229,7 @@ static int generic_ide_suspend(struct device *dev, pm_message_t state) rq.special = &args; rq.pm = &rqpm; rqpm.pm_step = ide_pm_state_start_suspend; - rqpm.pm_state = state; + rqpm.pm_state = state.event; return ide_do_drive_cmd(drive, &rq, ide_wait); } @@ -1248,7 +1248,7 @@ static int generic_ide_resume(struct device *dev) rq.special = &args; rq.pm = &rqpm; rqpm.pm_step = ide_pm_state_start_resume; - rqpm.pm_state = 0; + rqpm.pm_state = PM_EVENT_ON; return ide_do_drive_cmd(drive, &rq, ide_head_wait); } diff --git a/drivers/ide/pci/sc1200.c b/drivers/ide/pci/sc1200.c index 10592cec6c43..24e21b2838c1 100644 --- a/drivers/ide/pci/sc1200.c +++ b/drivers/ide/pci/sc1200.c @@ -350,9 +350,9 @@ static int sc1200_suspend (struct pci_dev *dev, pm_message_t state) { ide_hwif_t *hwif = NULL; - printk("SC1200: suspend(%u)\n", state); + printk("SC1200: suspend(%u)\n", state.event); - if (state == 0) { + if (state.event == PM_EVENT_ON) { // we only save state when going from full power to less // @@ -386,8 +386,8 @@ static int sc1200_suspend (struct pci_dev *dev, pm_message_t state) /* You don't need to iterate over disks -- sysfs should have done that for you already */ pci_disable_device(dev); - pci_set_power_state(dev,state); - dev->current_state = state; + pci_set_power_state(dev, pci_choose_state(dev, state)); + dev->current_state = state.event; return 0; } @@ -396,8 +396,8 @@ static int sc1200_resume (struct pci_dev *dev) ide_hwif_t *hwif = NULL; printk("SC1200: resume\n"); - pci_set_power_state(dev,0); // bring chip back from sleep state - dev->current_state = 0; + pci_set_power_state(dev, PCI_D0); // bring chip back from sleep state + dev->current_state = PM_EVENT_ON; pci_enable_device(dev); // // loop over all interfaces that are part of this pci device: diff --git a/drivers/ide/ppc/pmac.c b/drivers/ide/ppc/pmac.c index d2760b8ca159..87d1f8a1f41e 100644 --- a/drivers/ide/ppc/pmac.c +++ b/drivers/ide/ppc/pmac.c @@ -1509,7 +1509,7 @@ pmac_ide_macio_suspend(struct macio_dev *mdev, pm_message_t state) ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev); int rc = 0; - if (state != mdev->ofdev.dev.power.power_state && state >= 2) { + if (state.event != mdev->ofdev.dev.power.power_state.event && state.event >= PM_EVENT_SUSPEND) { rc = pmac_ide_do_suspend(hwif); if (rc == 0) mdev->ofdev.dev.power.power_state = state; @@ -1524,7 +1524,7 @@ pmac_ide_macio_resume(struct macio_dev *mdev) ide_hwif_t *hwif = (ide_hwif_t *)dev_get_drvdata(&mdev->ofdev.dev); int rc = 0; - if (mdev->ofdev.dev.power.power_state != 0) { + if (mdev->ofdev.dev.power.power_state.event != PM_EVENT_ON) { rc = pmac_ide_do_resume(hwif); if (rc == 0) mdev->ofdev.dev.power.power_state = PMSG_ON; @@ -1613,7 +1613,7 @@ pmac_ide_pci_suspend(struct pci_dev *pdev, pm_message_t state) ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev); int rc = 0; - if (state != pdev->dev.power.power_state && state >= 2) { + if (state.event != pdev->dev.power.power_state.event && state.event >= 2) { rc = pmac_ide_do_suspend(hwif); if (rc == 0) pdev->dev.power.power_state = state; @@ -1628,7 +1628,7 @@ pmac_ide_pci_resume(struct pci_dev *pdev) ide_hwif_t *hwif = (ide_hwif_t *)pci_get_drvdata(pdev); int rc = 0; - if (pdev->dev.power.power_state != 0) { + if (pdev->dev.power.power_state.event != PM_EVENT_ON) { rc = pmac_ide_do_resume(hwif); if (rc == 0) pdev->dev.power.power_state = PMSG_ON; diff --git a/drivers/macintosh/mediabay.c b/drivers/macintosh/mediabay.c index 7e1afca75e41..c0712a1ea5af 100644 --- a/drivers/macintosh/mediabay.c +++ b/drivers/macintosh/mediabay.c @@ -708,7 +708,7 @@ static int __pmac media_bay_suspend(struct macio_dev *mdev, pm_message_t state) { struct media_bay_info *bay = macio_get_drvdata(mdev); - if (state != mdev->ofdev.dev.power.power_state && state == PM_SUSPEND_MEM) { + if (state.event != mdev->ofdev.dev.power.power_state.event && state.event == PM_EVENT_SUSPEND) { down(&bay->lock); bay->sleeping = 1; set_mb_power(bay, 0); @@ -723,7 +723,7 @@ static int __pmac media_bay_resume(struct macio_dev *mdev) { struct media_bay_info *bay = macio_get_drvdata(mdev); - if (mdev->ofdev.dev.power.power_state != 0) { + if (mdev->ofdev.dev.power.power_state.event != PM_EVENT_ON) { mdev->ofdev.dev.power.power_state = PMSG_ON; /* We re-enable the bay using it's previous content diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 4a0a0ad2d03c..645a2e5c70ab 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -3065,7 +3065,7 @@ static int pmu_sys_suspended = 0; static int pmu_sys_suspend(struct sys_device *sysdev, pm_message_t state) { - if (state != PM_SUSPEND_DISK || pmu_sys_suspended) + if (state.event != PM_EVENT_SUSPEND || pmu_sys_suspended) return 0; /* Suspend PMU event interrupts */ diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c index 7d8b3cad350b..9ea5747b1211 100644 --- a/drivers/media/dvb/cinergyT2/cinergyT2.c +++ b/drivers/media/dvb/cinergyT2/cinergyT2.c @@ -888,7 +888,7 @@ static int cinergyt2_suspend (struct usb_interface *intf, pm_message_t state) if (down_interruptible(&cinergyt2->sem)) return -ERESTARTSYS; - if (state > 0) { /* state 0 seems to mean DEVICE_PM_ON */ + if (state.event > PM_EVENT_ON) { struct cinergyt2 *cinergyt2 = usb_get_intfdata (intf); #ifdef ENABLE_RC cancel_delayed_work(&cinergyt2->rc_query_work); diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c index 4be95398bac4..6d9de626c967 100644 --- a/drivers/net/irda/vlsi_ir.c +++ b/drivers/net/irda/vlsi_ir.c @@ -1757,12 +1757,12 @@ static int vlsi_irda_suspend(struct pci_dev *pdev, pm_message_t state) idev = ndev->priv; down(&idev->sem); if (pdev->current_state != 0) { /* already suspended */ - if (state > pdev->current_state) { /* simply go deeper */ - pci_set_power_state(pdev,state); - pdev->current_state = state; + if (state.event > pdev->current_state) { /* simply go deeper */ + pci_set_power_state(pdev, pci_choose_state(pdev, state)); + pdev->current_state = state.event; } else - IRDA_ERROR("%s - %s: invalid suspend request %u -> %u\n", __FUNCTION__, PCIDEV_NAME(pdev), pdev->current_state, state); + IRDA_ERROR("%s - %s: invalid suspend request %u -> %u\n", __FUNCTION__, PCIDEV_NAME(pdev), pdev->current_state, state.event); up(&idev->sem); return 0; } @@ -1777,7 +1777,7 @@ static int vlsi_irda_suspend(struct pci_dev *pdev, pm_message_t state) } pci_set_power_state(pdev, pci_choose_state(pdev, state)); - pdev->current_state = state; + pdev->current_state = state.event; idev->resume_ok = 1; up(&idev->sem); return 0; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 6db1fb6461de..abac1e40154b 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2239,7 +2239,7 @@ static void airo_read_stats(struct airo_info *ai) { u32 *vals = stats_rid.vals; clear_bit(JOB_STATS, &ai->flags); - if (ai->power) { + if (ai->power.event) { up(&ai->sem); return; } @@ -2969,7 +2969,7 @@ static int airo_thread(void *data) { break; } - if (ai->power || test_bit(FLAG_FLASHING, &ai->flags)) { + if (ai->power.event || test_bit(FLAG_FLASHING, &ai->flags)) { up(&ai->sem); continue; } @@ -5521,7 +5521,7 @@ static int airo_pci_resume(struct pci_dev *pdev) pci_restore_state(pdev); pci_enable_wake(pdev, pci_choose_state(pdev, ai->power), 0); - if (ai->power > 1) { + if (ai->power.event > 1) { reset_card(dev, 0); mpi_init_descriptors(ai); setup_card(ai, dev->dev_addr, 0); @@ -7123,7 +7123,7 @@ static int airo_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) int rc = 0; struct airo_info *ai = (struct airo_info *)dev->priv; - if (ai->power) + if (ai->power.event) return 0; switch (cmd) { @@ -7202,7 +7202,7 @@ static void airo_read_wireless_stats(struct airo_info *local) /* Get stats out of the card */ clear_bit(JOB_WSTATS, &local->flags); - if (local->power) { + if (local->power.event) { up(&local->sem); return; } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 1b34fc56067e..c62d2f043397 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -333,13 +333,17 @@ pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state) if (platform_pci_choose_state) { ret = platform_pci_choose_state(dev, state); if (ret >= 0) - state = ret; + state.event = ret; } - switch (state) { - case 0: return PCI_D0; - case 3: return PCI_D3hot; + + switch (state.event) { + case PM_EVENT_ON: + return PCI_D0; + case PM_EVENT_FREEZE: + case PM_EVENT_SUSPEND: + return PCI_D3hot; default: - printk("They asked me for state %d\n", state); + printk("They asked me for state %d\n", state.event); BUG(); } return PCI_D0; diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index ff1933298da6..a4857db4f9b8 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1766,7 +1766,7 @@ static int mesh_suspend(struct macio_dev *mdev, pm_message_t state) struct mesh_state *ms = (struct mesh_state *)macio_get_drvdata(mdev); unsigned long flags; - if (state == mdev->ofdev.dev.power.power_state || state < 2) + if (state.event == mdev->ofdev.dev.power.power_state.event || state.event < 2) return 0; scsi_block_requests(ms->host); @@ -1791,7 +1791,7 @@ static int mesh_resume(struct macio_dev *mdev) struct mesh_state *ms = (struct mesh_state *)macio_get_drvdata(mdev); unsigned long flags; - if (mdev->ofdev.dev.power.power_state == 0) + if (mdev->ofdev.dev.power.power_state.event == PM_EVENT_ON) return 0; set_mesh_power(ms, 1); @@ -1802,7 +1802,7 @@ static int mesh_resume(struct macio_dev *mdev) enable_irq(ms->meshintr); scsi_unblock_requests(ms->host); - mdev->ofdev.dev.power.power_state = 0; + mdev->ofdev.dev.power.power_state.event = PM_EVENT_ON; return 0; } diff --git a/drivers/serial/pmac_zilog.c b/drivers/serial/pmac_zilog.c index 5bfde99e245e..5ddd8ab1f108 100644 --- a/drivers/serial/pmac_zilog.c +++ b/drivers/serial/pmac_zilog.c @@ -1600,7 +1600,7 @@ static int pmz_suspend(struct macio_dev *mdev, pm_message_t pm_state) return 0; } - if (pm_state == mdev->ofdev.dev.power.power_state || pm_state < 2) + if (pm_state.event == mdev->ofdev.dev.power.power_state.event) return 0; pmz_debug("suspend, switching to state %d\n", pm_state); @@ -1660,7 +1660,7 @@ static int pmz_resume(struct macio_dev *mdev) if (uap == NULL) return 0; - if (mdev->ofdev.dev.power.power_state == 0) + if (mdev->ofdev.dev.power.power_state.event == PM_EVENT_ON) return 0; pmz_debug("resume, switching to state 0\n"); @@ -1713,7 +1713,7 @@ static int pmz_resume(struct macio_dev *mdev) pmz_debug("resume, switching complete\n"); - mdev->ofdev.dev.power.power_state = 0; + mdev->ofdev.dev.power.power_state.event = PM_EVENT_ON; return 0; } diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c3e46d24a37e..c9412daff682 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1570,7 +1570,7 @@ static int __usb_suspend_device (struct usb_device *udev, int port1, struct usb_driver *driver; intf = udev->actconfig->interface[i]; - if (state <= intf->dev.power.power_state) + if (state.event <= intf->dev.power.power_state.event) continue; if (!intf->dev.driver) continue; @@ -1578,11 +1578,11 @@ static int __usb_suspend_device (struct usb_device *udev, int port1, if (driver->suspend) { status = driver->suspend(intf, state); - if (intf->dev.power.power_state != state + if (intf->dev.power.power_state.event != state.event || status) dev_err(&intf->dev, "suspend %d fail, code %d\n", - state, status); + state.event, status); } /* only drivers with suspend() can ever resume(); @@ -1595,7 +1595,7 @@ static int __usb_suspend_device (struct usb_device *udev, int port1, * since we know every driver's probe/disconnect works * even for drivers that can't suspend. */ - if (!driver->suspend || state > PM_SUSPEND_MEM) { + if (!driver->suspend || state.event > PM_EVENT_FREEZE) { #if 1 dev_warn(&intf->dev, "resume is unsafe!\n"); #else @@ -1616,7 +1616,7 @@ static int __usb_suspend_device (struct usb_device *udev, int port1, * policies (when HNP doesn't apply) once we have mechanisms to * turn power back on! (Likely not before 2.7...) */ - if (state > PM_SUSPEND_MEM) { + if (state.event > PM_EVENT_FREEZE) { dev_warn(&udev->dev, "no poweroff yet, suspending instead\n"); } @@ -1733,7 +1733,7 @@ static int finish_port_resume(struct usb_device *udev) struct usb_driver *driver; intf = udev->actconfig->interface[i]; - if (intf->dev.power.power_state == PMSG_ON) + if (intf->dev.power.power_state.event == PM_EVENT_ON) continue; if (!intf->dev.driver) { /* FIXME maybe force to alt 0 */ @@ -1747,11 +1747,11 @@ static int finish_port_resume(struct usb_device *udev) /* can we do better than just logging errors? */ status = driver->resume(intf); - if (intf->dev.power.power_state != PMSG_ON + if (intf->dev.power.power_state.event != PM_EVENT_ON || status) dev_dbg(&intf->dev, "resume fail, state %d code %d\n", - intf->dev.power.power_state, status); + intf->dev.power.power_state.event, status); } status = 0; @@ -1934,7 +1934,7 @@ static int hub_resume(struct usb_interface *intf) unsigned port1; int status; - if (intf->dev.power.power_state == PM_SUSPEND_ON) + if (intf->dev.power.power_state.event == PM_EVENT_ON) return 0; for (port1 = 1; port1 <= hdev->maxchild; port1++) { diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 99c85d2f92da..2cddd8a00437 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1400,7 +1400,7 @@ static int usb_generic_suspend(struct device *dev, pm_message_t message) driver = to_usb_driver(dev->driver); /* there's only one USB suspend state */ - if (intf->dev.power.power_state) + if (intf->dev.power.power_state.event) return 0; if (driver->suspend) diff --git a/drivers/usb/host/ehci-dbg.c b/drivers/usb/host/ehci-dbg.c index b01efb6b36f6..65ac9fef3a7c 100644 --- a/drivers/usb/host/ehci-dbg.c +++ b/drivers/usb/host/ehci-dbg.c @@ -641,7 +641,7 @@ show_registers (struct class_device *class_dev, char *buf) spin_lock_irqsave (&ehci->lock, flags); - if (bus->controller->power.power_state) { + if (bus->controller->power.power_state.event) { size = scnprintf (next, size, "bus %s, device %s (driver " DRIVER_VERSION ")\n" "%s\n" diff --git a/drivers/usb/host/ohci-dbg.c b/drivers/usb/host/ohci-dbg.c index c58408c95c3d..447f488f5d93 100644 --- a/drivers/usb/host/ohci-dbg.c +++ b/drivers/usb/host/ohci-dbg.c @@ -631,7 +631,7 @@ show_registers (struct class_device *class_dev, char *buf) hcd->product_desc, hcd_name); - if (bus->controller->power.power_state) { + if (bus->controller->power.power_state.event) { size -= scnprintf (next, size, "SUSPENDED (no register access)\n"); goto done; diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index 7a890a65f55d..80eaf659c198 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -1781,9 +1781,9 @@ sl811h_suspend(struct device *dev, pm_message_t state, u32 phase) if (phase != SUSPEND_POWER_DOWN) return retval; - if (state <= PM_SUSPEND_MEM) + if (state.event == PM_EVENT_FREEZE) retval = sl811h_hub_suspend(hcd); - else + else if (state.event == PM_EVENT_SUSPEND) port_power(sl811, 0); if (retval == 0) dev->power.power_state = state; @@ -1802,7 +1802,7 @@ sl811h_resume(struct device *dev, u32 phase) /* with no "check to see if VBUS is still powered" board hook, * let's assume it'd only be powered to enable remote wakeup. */ - if (dev->power.power_state > PM_SUSPEND_MEM + if (dev->power.power_state.event == PM_EVENT_SUSPEND || !hcd->can_wakeup) { sl811->port1 = 0; port_power(sl811, 1); diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index cda7249a90b2..fd7fb98e4b20 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -1533,7 +1533,7 @@ usbtest_ioctl (struct usb_interface *intf, unsigned int code, void *buf) if (down_interruptible (&dev->sem)) return -ERESTARTSYS; - if (intf->dev.power.power_state != PMSG_ON) { + if (intf->dev.power.power_state.event != PM_EVENT_ON) { up (&dev->sem); return -EHOSTUNREACH; } diff --git a/drivers/video/aty/aty128fb.c b/drivers/video/aty/aty128fb.c index 7bc1d44d8814..b0eba3ac6420 100644 --- a/drivers/video/aty/aty128fb.c +++ b/drivers/video/aty/aty128fb.c @@ -2323,17 +2323,16 @@ static int aty128_pci_suspend(struct pci_dev *pdev, pm_message_t state) * can properly take care of D3 ? Also, with swsusp, we * know we'll be rebooted, ... */ -#ifdef CONFIG_PPC_PMAC +#ifndef CONFIG_PPC_PMAC /* HACK ALERT ! Once I find a proper way to say to each driver * individually what will happen with it's PCI slot, I'll change * that. On laptops, the AGP slot is just unclocked, so D2 is * expected, while on desktops, the card is powered off */ - if (state >= 3) - state = 2; + return 0; #endif /* CONFIG_PPC_PMAC */ - if (state != 2 || state == pdev->dev.power.power_state) + if (state.event == pdev->dev.power.power_state.event) return 0; printk(KERN_DEBUG "aty128fb: suspending...\n"); @@ -2367,7 +2366,7 @@ static int aty128_pci_suspend(struct pci_dev *pdev, pm_message_t state) * used dummy fb ops, 2.5 need proper support for this at the * fbdev level */ - if (state == 2) + if (state.event != PM_EVENT_ON) aty128_set_suspend(par, 1); release_console_sem(); @@ -2382,12 +2381,11 @@ static int aty128_do_resume(struct pci_dev *pdev) struct fb_info *info = pci_get_drvdata(pdev); struct aty128fb_par *par = info->par; - if (pdev->dev.power.power_state == 0) + if (pdev->dev.power.power_state.event == PM_EVENT_ON) return 0; /* Wakeup chip */ - if (pdev->dev.power.power_state == 2) - aty128_set_suspend(par, 0); + aty128_set_suspend(par, 0); par->asleep = 0; /* Restore display & engine */ diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c index 8c42538dc8c1..3e10bd837d9e 100644 --- a/drivers/video/aty/atyfb_base.c +++ b/drivers/video/aty/atyfb_base.c @@ -2022,17 +2022,16 @@ static int atyfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) struct fb_info *info = pci_get_drvdata(pdev); struct atyfb_par *par = (struct atyfb_par *) info->par; -#ifdef CONFIG_PPC_PMAC +#ifndef CONFIG_PPC_PMAC /* HACK ALERT ! Once I find a proper way to say to each driver * individually what will happen with it's PCI slot, I'll change * that. On laptops, the AGP slot is just unclocked, so D2 is * expected, while on desktops, the card is powered off */ - if (state >= 3) - state = 2; + return 0; #endif /* CONFIG_PPC_PMAC */ - if (state != 2 || state == pdev->dev.power.power_state) + if (state.event == pdev->dev.power.power_state.event) return 0; acquire_console_sem(); @@ -2071,12 +2070,12 @@ static int atyfb_pci_resume(struct pci_dev *pdev) struct fb_info *info = pci_get_drvdata(pdev); struct atyfb_par *par = (struct atyfb_par *) info->par; - if (pdev->dev.power.power_state == 0) + if (pdev->dev.power.power_state.event == PM_EVENT_ON) return 0; acquire_console_sem(); - if (pdev->dev.power.power_state == 2) + if (pdev->dev.power.power_state.event == 2) aty_power_mgmt(0, par); par->asleep = 0; diff --git a/drivers/video/aty/radeon_pm.c b/drivers/video/aty/radeon_pm.c index 98352af39325..59a1b6f85067 100644 --- a/drivers/video/aty/radeon_pm.c +++ b/drivers/video/aty/radeon_pm.c @@ -2526,18 +2526,18 @@ int radeonfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) struct radeonfb_info *rinfo = info->par; int i; - if (state == pdev->dev.power.power_state) + if (state.event == pdev->dev.power.power_state.event) return 0; printk(KERN_DEBUG "radeonfb (%s): suspending to state: %d...\n", - pci_name(pdev), state); + pci_name(pdev), state.event); /* For suspend-to-disk, we cheat here. We don't suspend anything and * let fbcon continue drawing until we are all set. That shouldn't * really cause any problem at this point, provided that the wakeup * code knows that any state in memory may not match the HW */ - if (state != PM_SUSPEND_MEM) + if (state.event == PM_EVENT_FREEZE) goto done; acquire_console_sem(); @@ -2616,7 +2616,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev) struct radeonfb_info *rinfo = info->par; int rc = 0; - if (pdev->dev.power.power_state == 0) + if (pdev->dev.power.power_state.event == PM_EVENT_ON) return 0; if (rinfo->no_schedule) { @@ -2626,7 +2626,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev) acquire_console_sem(); printk(KERN_DEBUG "radeonfb (%s): resuming from state: %d...\n", - pci_name(pdev), pdev->dev.power.power_state); + pci_name(pdev), pdev->dev.power.power_state.event); if (pci_enable_device(pdev)) { @@ -2637,7 +2637,7 @@ int radeonfb_pci_resume(struct pci_dev *pdev) } pci_set_master(pdev); - if (pdev->dev.power.power_state == PM_SUSPEND_MEM) { + if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) { /* Wakeup chip. Check from config space if we were powered off * (todo: additionally, check CLK_PIN_CNTL too) */ diff --git a/drivers/video/chipsfb.c b/drivers/video/chipsfb.c index e75a965ec760..4131243cfdf8 100644 --- a/drivers/video/chipsfb.c +++ b/drivers/video/chipsfb.c @@ -462,9 +462,9 @@ static int chipsfb_pci_suspend(struct pci_dev *pdev, pm_message_t state) { struct fb_info *p = pci_get_drvdata(pdev); - if (state == pdev->dev.power.power_state) + if (state.event == pdev->dev.power.power_state.event) return 0; - if (state != PM_SUSPEND_MEM) + if (state.event != PM_SUSPEND_MEM) goto done; acquire_console_sem(); diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c index 7513fb9b19cf..6db183462b92 100644 --- a/drivers/video/i810/i810_main.c +++ b/drivers/video/i810/i810_main.c @@ -1506,12 +1506,12 @@ static int i810fb_suspend(struct pci_dev *dev, pm_message_t state) struct i810fb_par *par = (struct i810fb_par *) info->par; int blank = 0, prev_state = par->cur_state; - if (state == prev_state) + if (state.event == prev_state) return 0; - par->cur_state = state; + par->cur_state = state.event; - switch (state) { + switch (state.event) { case 1: blank = VESA_VSYNC_SUSPEND; break; diff --git a/include/linux/pm.h b/include/linux/pm.h index 7aeb208ed713..5cfb07648eca 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -186,7 +186,9 @@ extern int pm_suspend(suspend_state_t state); struct device; -typedef u32 __bitwise pm_message_t; +typedef struct pm_message { + int event; +} pm_message_t; /* * There are 4 important states driver can be in: @@ -207,9 +209,13 @@ typedef u32 __bitwise pm_message_t; * or something similar soon. */ -#define PMSG_FREEZE ((__force pm_message_t) 3) -#define PMSG_SUSPEND ((__force pm_message_t) 3) -#define PMSG_ON ((__force pm_message_t) 0) +#define PM_EVENT_ON 0 +#define PM_EVENT_FREEZE 1 +#define PM_EVENT_SUSPEND 2 + +#define PMSG_FREEZE ((struct pm_message){ .event = PM_EVENT_FREEZE, }) +#define PMSG_SUSPEND ((struct pm_message){ .event = PM_EVENT_SUSPEND, }) +#define PMSG_ON ((struct pm_message){ .event = PM_EVENT_ON, }) struct dev_pm_info { pm_message_t power_state; -- cgit v1.2.3 From ed75e8d58010fdc06e2c3a81bfbebae92314c7e3 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Sat, 3 Sep 2005 15:57:18 -0700 Subject: [PATCH] UML Support - Ptrace: adds the host SYSEMU support, for UML and general usage Jeff Dike , Paolo 'Blaisorblade' Giarrusso , Bodo Stroesser Adds a new ptrace(2) mode, called PTRACE_SYSEMU, resembling PTRACE_SYSCALL except that the kernel does not execute the requested syscall; this is useful to improve performance for virtual environments, like UML, which want to run the syscall on their own. In fact, using PTRACE_SYSCALL means stopping child execution twice, on entry and on exit, and each time you also have two context switches; with SYSEMU you avoid the 2nd stop and so save two context switches per syscall. Also, some architectures don't have support in the host for changing the syscall number via ptrace(), which is currently needed to skip syscall execution (UML turns any syscall into getpid() to avoid it being executed on the host). Fixing that is hard, while SYSEMU is easier to implement. * This version of the patch includes some suggestions of Jeff Dike to avoid adding any instructions to the syscall fast path, plus some other little changes, by myself, to make it work even when the syscall is executed with SYSENTER (but I'm unsure about them). It has been widely tested for quite a lot of time. * Various fixed were included to handle the various switches between various states, i.e. when for instance a syscall entry is traced with one of PT_SYSCALL / _SYSEMU / _SINGLESTEP and another one is used on exit. Basically, this is done by remembering which one of them was used even after the call to ptrace_notify(). * We're combining TIF_SYSCALL_EMU with TIF_SYSCALL_TRACE or TIF_SINGLESTEP to make do_syscall_trace() notice that the current syscall was started with SYSEMU on entry, so that no notification ought to be done in the exit path; this is a bit of a hack, so this problem is solved in another way in next patches. * Also, the effects of the patch: "Ptrace - i386: fix Syscall Audit interaction with singlestep" are cancelled; they are restored back in the last patch of this series. Detailed descriptions of the patches doing this kind of processing follow (but I've already summed everything up). * Fix behaviour when changing interception kind #1. In do_syscall_trace(), we check the status of the TIF_SYSCALL_EMU flag only after doing the debugger notification; but the debugger might have changed the status of this flag because he continued execution with PTRACE_SYSCALL, so this is wrong. This patch fixes it by saving the flag status before calling ptrace_notify(). * Fix behaviour when changing interception kind #2: avoid intercepting syscall on return when using SYSCALL again. A guest process switching from using PTRACE_SYSEMU to PTRACE_SYSCALL crashes. The problem is in arch/i386/kernel/entry.S. The current SYSEMU patch inhibits the syscall-handler to be called, but does not prevent do_syscall_trace() to be called after this for syscall completion interception. The appended patch fixes this. It reuses the flag TIF_SYSCALL_EMU to remember "we come from PTRACE_SYSEMU and now are in PTRACE_SYSCALL", since the flag is unused in the depicted situation. * Fix behaviour when changing interception kind #3: avoid intercepting syscall on return when using SINGLESTEP. When testing 2.6.9 and the skas3.v6 patch, with my latest patch and had problems with singlestepping on UML in SKAS with SYSEMU. It looped receiving SIGTRAPs without moving forward. EIP of the traced process was the same for all SIGTRAPs. What's missing is to handle switching from PTRACE_SYSCALL_EMU to PTRACE_SINGLESTEP in a way very similar to what is done for the change from PTRACE_SYSCALL_EMU to PTRACE_SYSCALL_TRACE. I.e., after calling ptrace(PTRACE_SYSEMU), on the return path, the debugger is notified and then wake ups the process; the syscall is executed (or skipped, when do_syscall_trace() returns 0, i.e. when using PTRACE_SYSEMU), and do_syscall_trace() is called again. Since we are on the return path of a SYSEMU'd syscall, if the wake up is performed through ptrace(PTRACE_SYSCALL), we must still avoid notifying the parent of the syscall exit. Now, this behaviour is extended even to resuming with PTRACE_SINGLESTEP. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 9 ++++--- arch/i386/kernel/ptrace.c | 57 +++++++++++++++++++++++++++++------------- include/asm-i386/thread_info.h | 5 +++- include/linux/ptrace.h | 1 + kernel/fork.c | 3 +++ 5 files changed, 53 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index a991d4e5edd2..b389e5f3bdee 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -203,7 +203,7 @@ sysenter_past_esp: GET_THREAD_INFO(%ebp) /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -226,9 +226,9 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) - # system call tracing in operation + # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ - testw $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),TI_flags(%ebp) + testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) jnz syscall_trace_entry cmpl $(nr_syscalls), %eax jae syscall_badsys @@ -338,6 +338,9 @@ syscall_trace_entry: movl %esp, %eax xorl %edx,%edx call do_syscall_trace + cmpl $0, %eax + jne syscall_exit # ret != 0 -> running under PTRACE_SYSEMU, + # so must skip actual syscall movl ORIG_EAX(%esp), %eax cmpl $(nr_syscalls), %eax jnae syscall_call diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5ee9e1d60653..5b569dc1c227 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -509,15 +509,27 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) } break; + case PTRACE_SYSEMU: /* continue and stop at next syscall, which will not be executed */ case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */ case PTRACE_CONT: /* restart after signal. */ ret = -EIO; if (!valid_signal(data)) break; + /* If we came here with PTRACE_SYSEMU and now continue with + * PTRACE_SYSCALL, entry.S used to intercept the syscall return. + * But it shouldn't! + * So we don't clear TIF_SYSCALL_EMU, which is always unused in + * this special case, to remember, we came from SYSEMU. That + * flag will be cleared by do_syscall_trace(). + */ + if (request == PTRACE_SYSEMU) { + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + } else if (request == PTRACE_CONT) { + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + } if (request == PTRACE_SYSCALL) { set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); - } - else { + } else { clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } child->exit_code = data; @@ -546,6 +558,8 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) ret = -EIO; if (!valid_signal(data)) break; + /*See do_syscall_trace to know why we don't clear + * TIF_SYSCALL_EMU.*/ clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_singlestep(child); child->exit_code = data; @@ -678,37 +692,43 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) * - triggered by current->work.syscall_trace */ __attribute__((regparm(3))) -void do_syscall_trace(struct pt_regs *regs, int entryexit) +int do_syscall_trace(struct pt_regs *regs, int entryexit) { + int is_sysemu, is_systrace, is_singlestep, ret = 0; /* do the secure computing check first */ secure_computing(regs->orig_eax); - if (unlikely(current->audit_context)) { - if (entryexit) - audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); - - /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only - * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is - * not used, entry.S will call us only on syscall exit, not - * entry ; so when TIF_SYSCALL_AUDIT is used we must avoid - * calling send_sigtrap() on syscall entry. - */ - else if (is_singlestep) - goto out; - } + if (unlikely(current->audit_context) && entryexit) + audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); if (!(current->ptrace & PT_PTRACED)) goto out; + is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); + is_systrace = test_thread_flag(TIF_SYSCALL_TRACE); + is_singlestep = test_thread_flag(TIF_SINGLESTEP); + + /* We can detect the case of coming from PTRACE_SYSEMU and now running + * with PTRACE_SYSCALL or PTRACE_SINGLESTEP, by TIF_SYSCALL_EMU being + * set additionally. + * If so let's reset the flag and return without action (no singlestep + * nor syscall tracing, since no actual step has been executed). + */ + if (is_sysemu && (is_systrace || is_singlestep)) { + clear_thread_flag(TIF_SYSCALL_EMU); + goto out; + } + /* Fake a debug trap */ if (test_thread_flag(TIF_SINGLESTEP)) send_sigtrap(current, regs, 0); - if (!test_thread_flag(TIF_SYSCALL_TRACE)) + if (!is_systrace && !is_sysemu) goto out; /* the 0x80 provides a way for the tracing parent to distinguish between a syscall stop and SIGTRAP delivery */ + /* Note that the debugger could change the result of test_thread_flag!*/ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); /* @@ -720,9 +740,10 @@ void do_syscall_trace(struct pt_regs *regs, int entryexit) send_sig(current->exit_code, current, 1); current->exit_code = 0; } + ret = is_sysemu; out: if (unlikely(current->audit_context) && !entryexit) audit_syscall_entry(current, AUDIT_ARCH_I386, regs->orig_eax, regs->ebx, regs->ecx, regs->edx, regs->esi); - + return ret; } diff --git a/include/asm-i386/thread_info.h b/include/asm-i386/thread_info.h index 95add81237ea..e2cb9fa6f563 100644 --- a/include/asm-i386/thread_info.h +++ b/include/asm-i386/thread_info.h @@ -139,6 +139,7 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ #define TIF_IRET 5 /* return with iret */ +#define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */ @@ -150,13 +151,15 @@ register unsigned long current_stack_pointer asm("esp") __attribute_used__; #define _TIF_NEED_RESCHED (1< Date: Sat, 3 Sep 2005 15:57:20 -0700 Subject: [PATCH] Uml support: add PTRACE_SYSEMU_SINGLESTEP option to i386 This patch implements the new ptrace option PTRACE_SYSEMU_SINGLESTEP, which can be used by UML to singlestep a process: it will receive SINGLESTEP interceptions for normal instructions and syscalls, but syscall execution will be skipped just like with PTRACE_SYSEMU. Signed-off-by: Bodo Stroesser Signed-off-by: Paolo 'Blaisorblade' Giarrusso Cc: Jeff Dike Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/ptrace.c | 21 +++++++++++++++++---- include/linux/ptrace.h | 1 + 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 18642f05dde1..3196ba50fcd5 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -547,11 +547,17 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data) wake_up_process(child); break; + case PTRACE_SYSEMU_SINGLESTEP: /* Same as SYSEMU, but singlestep if not syscall */ case PTRACE_SINGLESTEP: /* set the trap flag. */ ret = -EIO; if (!valid_signal(data)) break; - clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + + if (request == PTRACE_SYSEMU_SINGLESTEP) + set_tsk_thread_flag(child, TIF_SYSCALL_EMU); + else + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); + clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); set_singlestep(child); child->exit_code = data; @@ -686,7 +692,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) __attribute__((regparm(3))) int do_syscall_trace(struct pt_regs *regs, int entryexit) { - int is_sysemu, is_singlestep, ret = 0; + int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU), ret = 0; + /* With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP */ + int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); + /* do the secure computing check first */ secure_computing(regs->orig_eax); @@ -696,8 +705,11 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) if (!(current->ptrace & PT_PTRACED)) goto out; - is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); - is_singlestep = test_thread_flag(TIF_SINGLESTEP); + /* If a process stops on the 1st tracepoint with SYSCALL_TRACE + * and then is resumed with SYSEMU_SINGLESTEP, it will come in + * here. We have to check this and return */ + if (is_sysemu && entryexit) + return 0; /* Fake a debug trap */ if (is_singlestep) @@ -728,6 +740,7 @@ int do_syscall_trace(struct pt_regs *regs, int entryexit) if (ret == 0) return 0; + regs->orig_eax = -1; /* force skip of syscall restarting */ if (unlikely(current->audit_context)) audit_syscall_exit(current, AUDITSC_RESULT(regs->eax), regs->eax); return 1; diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 7528afb6b2ad..2afdafb62123 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -21,6 +21,7 @@ #define PTRACE_SYSCALL 24 #define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 /* 0x4200-0x4300 are reserved for architecture-independent additions. */ #define PTRACE_SETOPTIONS 0x4200 -- cgit v1.2.3 From a61fc683ae1b7871d8d81ac5025af1a923731547 Mon Sep 17 00:00:00 2001 From: "bgardner@wabtec.com" Date: Wed, 27 Jul 2005 12:43:03 -0500 Subject: [PATCH] I2C: add kobj_to_i2c_client Move the inline function kobj_to_i2c_client() from max6875.c to i2c.h. Signed-off-by: Ben Gardner Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/chips/max6875.c | 5 ----- include/linux/i2c.h | 5 +++++ 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index 1a7993e63541..35a8e9215294 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -131,11 +131,6 @@ exit_up: up(&data->update_lock); } -static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) -{ - return to_i2c_client(container_of(kobj, struct device, kobj)); -} - static ssize_t max6875_read(struct kobject *kobj, char *buf, loff_t off, size_t count) { diff --git a/include/linux/i2c.h b/include/linux/i2c.h index be837b13f297..017445943816 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -160,6 +160,11 @@ struct i2c_client { }; #define to_i2c_client(d) container_of(d, struct i2c_client, dev) +static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) +{ + return to_i2c_client(container_of(kobj, struct device, kobj)); +} + static inline void *i2c_get_clientdata (struct i2c_client *dev) { return dev_get_drvdata (&dev->dev); -- cgit v1.2.3 From 1236441f38b6a98caf4c7983e7efdecc2d1527b5 Mon Sep 17 00:00:00 2001 From: "Mark M. Hoffman" Date: Fri, 15 Jul 2005 21:38:08 -0400 Subject: [PATCH] I2C hwmon: hwmon sysfs class This patch adds the sysfs class "hwmon" for use by hardware monitoring (sensors) chip drivers. It also fixes up the related Kconfig/Makefile bits. Signed-off-by: Mark M. Hoffman Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/Kconfig | 7 +++- drivers/hwmon/Makefile | 2 ++ drivers/hwmon/hwmon.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/hwmon.h | 24 +++++++++++++ 4 files changed, 130 insertions(+), 1 deletion(-) create mode 100644 drivers/hwmon/hwmon.c create mode 100644 include/linux/hwmon.h (limited to 'include/linux') diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index f9aa3faa6b88..8c3911313700 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -12,7 +12,12 @@ config HWMON of a system. Most modern motherboards include such a device. It can include temperature sensors, voltage sensors, fan speed sensors and various additional features such as the ability to - control the speed of the fans. + control the speed of the fans. If you want this support you + should say Y here and also to the specific driver(s) for your + sensors chip(s) below. + + This support can also be built as a module. If so, the module + will be called hwmon. config SENSORS_ADM1021 tristate "Analog Devices ADM1021 and compatibles" diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 2781403a0236..bd1bd59eb149 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -2,6 +2,8 @@ # Makefile for sensor chip drivers. # +obj-$(CONFIG_HWMON) += hwmon.o + # asb100, then w83781d go first, as they can override other drivers' addresses. obj-$(CONFIG_SENSORS_ASB100) += asb100.o obj-$(CONFIG_SENSORS_W83627HF) += w83627hf.o diff --git a/drivers/hwmon/hwmon.c b/drivers/hwmon/hwmon.c new file mode 100644 index 000000000000..9b41c9bd805f --- /dev/null +++ b/drivers/hwmon/hwmon.c @@ -0,0 +1,98 @@ +/* + hwmon.c - part of lm_sensors, Linux kernel modules for hardware monitoring + + This file defines the sysfs class "hwmon", for use by sensors drivers. + + Copyright (C) 2005 Mark M. Hoffman + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. +*/ + +#include +#include +#include +#include +#include +#include + +#define HWMON_ID_PREFIX "hwmon" +#define HWMON_ID_FORMAT HWMON_ID_PREFIX "%d" + +static struct class *hwmon_class; + +static DEFINE_IDR(hwmon_idr); + +/** + * hwmon_device_register - register w/ hwmon sysfs class + * @dev: the device to register + * + * hwmon_device_unregister() must be called when the class device is no + * longer needed. + * + * Returns the pointer to the new struct class device. + */ +struct class_device *hwmon_device_register(struct device *dev) +{ + struct class_device *cdev; + int id; + + if (idr_pre_get(&hwmon_idr, GFP_KERNEL) == 0) + return ERR_PTR(-ENOMEM); + + if (idr_get_new(&hwmon_idr, NULL, &id) < 0) + return ERR_PTR(-ENOMEM); + + id = id & MAX_ID_MASK; + cdev = class_device_create(hwmon_class, MKDEV(0,0), dev, + HWMON_ID_FORMAT, id); + + if (IS_ERR(cdev)) + idr_remove(&hwmon_idr, id); + + return cdev; +} + +/** + * hwmon_device_unregister - removes the previously registered class device + * + * @cdev: the class device to destroy + */ +void hwmon_device_unregister(struct class_device *cdev) +{ + int id; + + if (sscanf(cdev->class_id, HWMON_ID_FORMAT, &id) == 1) { + class_device_unregister(cdev); + idr_remove(&hwmon_idr, id); + } else + dev_dbg(cdev->dev, + "hwmon_device_unregister() failed: bad class ID!\n"); +} + +static int __init hwmon_init(void) +{ + hwmon_class = class_create(THIS_MODULE, "hwmon"); + if (IS_ERR(hwmon_class)) { + printk(KERN_ERR "hwmon.c: couldn't create sysfs class\n"); + return PTR_ERR(hwmon_class); + } + return 0; +} + +static void __exit hwmon_exit(void) +{ + class_destroy(hwmon_class); +} + +module_init(hwmon_init); +module_exit(hwmon_exit); + +EXPORT_SYMBOL_GPL(hwmon_device_register); +EXPORT_SYMBOL_GPL(hwmon_device_unregister); + +MODULE_AUTHOR("Mark M. Hoffman "); +MODULE_DESCRIPTION("hardware monitoring sysfs/class support"); +MODULE_LICENSE("GPL"); + diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h new file mode 100644 index 000000000000..bf90e6001e3b --- /dev/null +++ b/include/linux/hwmon.h @@ -0,0 +1,24 @@ +/* + hwmon.h - part of lm_sensors, Linux kernel modules for hardware monitoring + + This file declares helper functions for the sysfs class "hwmon", + for use by sensors drivers. + + Copyright (C) 2005 Mark M. Hoffman + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. +*/ + +#ifndef _HWMON_H_ +#define _HWMON_H_ + +#include + +struct class_device *hwmon_device_register(struct device *dev); + +void hwmon_device_unregister(struct class_device *cdev); + +#endif + -- cgit v1.2.3 From efde723fdac02111872bff606ef362074fc1efa8 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 20 Jul 2005 23:03:50 +0200 Subject: [PATCH] I2C: Separate non-i2c hwmon drivers from i2c-core (1/9) Temporarily export a few structures and functions from i2c-core, because we will soon need them in i2c-isa. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.c | 14 ++++++++++---- include/linux/i2c.h | 7 +++++++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 4a9ead277596..7a7837ae3114 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -61,7 +61,7 @@ static int i2c_bus_resume(struct device * dev) return rc; } -static struct bus_type i2c_bus_type = { +struct bus_type i2c_bus_type = { .name = "i2c", .match = i2c_device_match, .suspend = i2c_bus_suspend, @@ -78,13 +78,13 @@ static int i2c_device_remove(struct device *dev) return 0; } -static void i2c_adapter_dev_release(struct device *dev) +void i2c_adapter_dev_release(struct device *dev) { struct i2c_adapter *adap = dev_to_i2c_adapter(dev); complete(&adap->dev_released); } -static struct device_driver i2c_adapter_driver = { +struct device_driver i2c_adapter_driver = { .name = "i2c_adapter", .bus = &i2c_bus_type, .probe = i2c_device_probe, @@ -97,7 +97,7 @@ static void i2c_adapter_class_dev_release(struct class_device *dev) complete(&adap->class_dev_released); } -static struct class i2c_adapter_class = { +struct class i2c_adapter_class = { .name = "i2c-adapter", .release = &i2c_adapter_class_dev_release, }; @@ -1171,6 +1171,12 @@ s32 i2c_smbus_xfer(struct i2c_adapter * adapter, u16 addr, unsigned short flags, } +/* Next four are needed by i2c-isa */ +EXPORT_SYMBOL_GPL(i2c_adapter_dev_release); +EXPORT_SYMBOL_GPL(i2c_adapter_driver); +EXPORT_SYMBOL_GPL(i2c_adapter_class); +EXPORT_SYMBOL_GPL(i2c_bus_type); + EXPORT_SYMBOL(i2c_add_adapter); EXPORT_SYMBOL(i2c_del_adapter); EXPORT_SYMBOL(i2c_add_driver); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 017445943816..3be06105034c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -34,6 +34,13 @@ #include /* for struct device */ #include +/* --- For i2c-isa ---------------------------------------------------- */ + +extern void i2c_adapter_dev_release(struct device *dev); +extern struct device_driver i2c_adapter_driver; +extern struct class i2c_adapter_class; +extern struct bus_type i2c_bus_type; + /* --- General options ------------------------------------------------ */ struct i2c_msg; -- cgit v1.2.3 From 400c455eaa0d0819d18cd42a74070e0e238a73dc Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 19 Jul 2005 23:48:43 +0200 Subject: [PATCH] I2C: Separate non-i2c hwmon drivers from i2c-core (2/9) Convert i2c-isa from a dumb i2c_adapter into a pseudo i2c-core for ISA hardware monitoring drivers. The isa i2c_adapter is no more registered with i2c-core, drivers have to explicitely connect to it using the new i2c_isa_{add,del}_driver interface. At this point, all ISA chip drivers are useless, because they still register with i2c-core in the hope i2c-isa is registered there as well, but it isn't anymore. The fake bus will be named i2c-9191 in sysfs. This is the number it already had internally in various places, so it's not exactly new, except that now the number is seen in userspace as well. This shouldn't be a problem until someone really has 9192 I2C busses in a given system ;) The fake bus will no more show in "i2cdetect -l", as it won't be seen by i2c-dev anymore (not being registered with i2c-core), which is a good thing, as i2cdetect/i2cdump/i2cset cannot operate on this fake bus anyway. Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-isa.c | 158 ++++++++++++++++++++++++++++++++++++++++--- include/linux/i2c-isa.h | 29 ++++++++ 2 files changed, 177 insertions(+), 10 deletions(-) create mode 100644 include/linux/i2c-isa.h (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index 00e7f7157b75..a60f4801757e 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -1,6 +1,8 @@ /* - i2c-isa.c - Part of lm_sensors, Linux kernel modules for hardware - monitoring + i2c-isa.c - an i2c-core-like thing for ISA hardware monitoring chips + Copyright (C) 2005 Jean Delvare + + Based on the i2c-isa pseudo-adapter from the lm_sensors project Copyright (c) 1998, 1999 Frodo Looijaard This program is free software; you can redistribute it and/or modify @@ -18,17 +20,24 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* This implements an i2c algorithm/adapter for ISA bus. Not that this is - on first sight very useful; almost no functionality is preserved. - Except that it makes writing drivers for chips which can be on both - the SMBus and the ISA bus very much easier. See lm78.c for an example - of this. */ +/* This implements an i2c-core-like thing for ISA hardware monitoring + chips. Such chips are linked to the i2c subsystem for historical + reasons (because the early ISA hardware monitoring chips such as the + LM78 had both an I2C and an ISA interface). They used to be + registered with the main i2c-core, but as a first step in the + direction of a clean separation between I2C and ISA chip drivers, + we now have this separate core for ISA ones. It is significantly + more simple than the real one, of course, because we don't have to + handle multiple busses: there is only one (fake) ISA adapter. + It is worth noting that we still rely on i2c-core for some things + at the moment - but hopefully this won't last. */ #include #include #include #include #include +#include static u32 isa_func(struct i2c_adapter *adapter); @@ -53,17 +62,146 @@ static u32 isa_func(struct i2c_adapter *adapter) return 0; } + +/* Copied from i2c-core */ +static ssize_t show_adapter_name(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_adapter *adap = dev_to_i2c_adapter(dev); + return sprintf(buf, "%s\n", adap->name); +} +static DEVICE_ATTR(name, S_IRUGO, show_adapter_name, NULL); + +static int i2c_isa_device_probe(struct device *dev) +{ + return -ENODEV; +} + +static int i2c_isa_device_remove(struct device *dev) +{ + return 0; +} + + +/* We implement an interface which resembles i2c_{add,del}_driver, + but for i2c-isa drivers. We don't have to remember and handle lists + of drivers and adapters so this is much more simple, of course. */ + +int i2c_isa_add_driver(struct i2c_driver *driver) +{ + int res; + + /* Add the driver to the list of i2c drivers in the driver core */ + driver->driver.name = driver->name; + driver->driver.bus = &i2c_bus_type; + driver->driver.probe = i2c_isa_device_probe; + driver->driver.remove = i2c_isa_device_remove; + res = driver_register(&driver->driver); + if (res) + return res; + dev_dbg(&isa_adapter.dev, "Driver %s registered\n", driver->name); + + /* Now look for clients */ + driver->attach_adapter(&isa_adapter); + + return 0; +} + +int i2c_isa_del_driver(struct i2c_driver *driver) +{ + struct list_head *item, *_n; + struct i2c_client *client; + int res; + + /* Detach all clients belonging to this one driver */ + list_for_each_safe(item, _n, &isa_adapter.clients) { + client = list_entry(item, struct i2c_client, list); + if (client->driver != driver) + continue; + dev_dbg(&isa_adapter.dev, "Detaching client %s at 0x%x\n", + client->name, client->addr); + if ((res = driver->detach_client(client))) { + dev_err(&isa_adapter.dev, "Failed, driver " + "%s not unregistered!\n", + driver->name); + return res; + } + } + + /* Get the driver off the core list */ + driver_unregister(&driver->driver); + dev_dbg(&isa_adapter.dev, "Driver %s unregistered\n", driver->name); + + return 0; +} + + static int __init i2c_isa_init(void) { - return i2c_add_adapter(&isa_adapter); + init_MUTEX(&isa_adapter.clist_lock); + INIT_LIST_HEAD(&isa_adapter.clients); + + isa_adapter.nr = ANY_I2C_ISA_BUS; + isa_adapter.dev.parent = &platform_bus; + sprintf(isa_adapter.dev.bus_id, "i2c-%d", isa_adapter.nr); + isa_adapter.dev.driver = &i2c_adapter_driver; + isa_adapter.dev.release = &i2c_adapter_dev_release; + device_register(&isa_adapter.dev); + device_create_file(&isa_adapter.dev, &dev_attr_name); + + /* Add this adapter to the i2c_adapter class */ + memset(&isa_adapter.class_dev, 0x00, sizeof(struct class_device)); + isa_adapter.class_dev.dev = &isa_adapter.dev; + isa_adapter.class_dev.class = &i2c_adapter_class; + strlcpy(isa_adapter.class_dev.class_id, isa_adapter.dev.bus_id, + BUS_ID_SIZE); + class_device_register(&isa_adapter.class_dev); + + dev_dbg(&isa_adapter.dev, "%s registered\n", isa_adapter.name); + + return 0; } static void __exit i2c_isa_exit(void) { - i2c_del_adapter(&isa_adapter); +#ifdef DEBUG + struct list_head *item, *_n; + struct i2c_client *client = NULL; +#endif + + /* There should be no more active client */ +#ifdef DEBUG + dev_dbg(&isa_adapter.dev, "Looking for clients\n"); + list_for_each_safe(item, _n, &isa_adapter.clients) { + client = list_entry(item, struct i2c_client, list); + dev_err(&isa_adapter.dev, "Driver %s still has an active " + "ISA client at 0x%x\n", client->driver->name, + client->addr); + } + if (client != NULL) + return; +#endif + + /* Clean up the sysfs representation */ + dev_dbg(&isa_adapter.dev, "Unregistering from sysfs\n"); + init_completion(&isa_adapter.dev_released); + init_completion(&isa_adapter.class_dev_released); + class_device_unregister(&isa_adapter.class_dev); + device_remove_file(&isa_adapter.dev, &dev_attr_name); + device_unregister(&isa_adapter.dev); + + /* Wait for sysfs to drop all references */ + dev_dbg(&isa_adapter.dev, "Waiting for sysfs completion\n"); + wait_for_completion(&isa_adapter.dev_released); + wait_for_completion(&isa_adapter.class_dev_released); + + dev_dbg(&isa_adapter.dev, "%s unregistered\n", isa_adapter.name); } -MODULE_AUTHOR("Frodo Looijaard "); +EXPORT_SYMBOL(i2c_isa_add_driver); +EXPORT_SYMBOL(i2c_isa_del_driver); + +MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("ISA bus access through i2c"); MODULE_LICENSE("GPL"); diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h new file mode 100644 index 000000000000..b5727d7702e1 --- /dev/null +++ b/include/linux/i2c-isa.h @@ -0,0 +1,29 @@ +/* + * i2c-isa.h - definitions for the i2c-isa pseudo-i2c-adapter interface + * + * Copyright (C) 2005 Jean Delvare + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _LINUX_I2C_ISA_H +#define _LINUX_I2C_ISA_H + +#include + +extern int i2c_isa_add_driver(struct i2c_driver *driver); +extern int i2c_isa_del_driver(struct i2c_driver *driver); + +#endif /* _LINUX_I2C_ISA_H */ -- cgit v1.2.3 From 5071860aba7fc69279ab822638ed2c2e4549f9fd Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 20 Jul 2005 00:02:32 +0200 Subject: [PATCH] I2C: Separate non-i2c hwmon drivers from i2c-core (7/9) Kill normal_isa in header files, documentation and all chip drivers, as it is no more used. normal_i2c could be renamed to normal, but I decided not to do so at the moment, so as to limit the number of changes. This might be done later as part of the i2c_probe/i2c_detect merge. Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/porting-clients | 7 ++++--- Documentation/i2c/writing-clients | 35 +++++++++++++++-------------------- drivers/hwmon/adm1021.c | 1 - drivers/hwmon/adm1025.c | 1 - drivers/hwmon/adm1026.c | 1 - drivers/hwmon/adm1031.c | 1 - drivers/hwmon/adm9240.c | 2 -- drivers/hwmon/asb100.c | 3 --- drivers/hwmon/atxp1.c | 1 - drivers/hwmon/ds1621.c | 1 - drivers/hwmon/fscher.c | 1 - drivers/hwmon/fscpos.c | 1 - drivers/hwmon/gl518sm.c | 1 - drivers/hwmon/gl520sm.c | 1 - drivers/hwmon/it87.c | 1 - drivers/hwmon/lm63.c | 1 - drivers/hwmon/lm75.c | 1 - drivers/hwmon/lm77.c | 1 - drivers/hwmon/lm78.c | 1 - drivers/hwmon/lm80.c | 1 - drivers/hwmon/lm83.c | 1 - drivers/hwmon/lm85.c | 1 - drivers/hwmon/lm87.c | 1 - drivers/hwmon/lm90.c | 1 - drivers/hwmon/lm92.c | 1 - drivers/hwmon/max1619.c | 1 - drivers/hwmon/w83781d.c | 1 - drivers/hwmon/w83l785ts.c | 1 - drivers/i2c/chips/ds1337.c | 1 - drivers/i2c/chips/eeprom.c | 1 - drivers/i2c/chips/max6875.c | 1 - drivers/i2c/chips/pca9539.c | 1 - drivers/i2c/chips/pcf8574.c | 1 - drivers/i2c/chips/pcf8591.c | 1 - include/linux/i2c-sensor.h | 36 +++++++++++++++--------------------- include/linux/i2c.h | 8 ++------ 36 files changed, 36 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index a7adbdd9ea8a..105c6186b912 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -29,8 +29,8 @@ Technical changes: Please respect this inclusion order. Some extra headers may be required for a given driver (e.g. "lm75.h"). -* [Addresses] SENSORS_I2C_END becomes I2C_CLIENT_END, SENSORS_ISA_END - becomes I2C_CLIENT_ISA_END. +* [Addresses] SENSORS_I2C_END becomes I2C_CLIENT_END, ISA addresses + are no more handled by the i2c core. * [Client data] Get rid of sysctl_id. Try using standard names for register values (for example, temp_os becomes temp_max). You're @@ -72,7 +72,8 @@ Technical changes: name string, which will be filled with a lowercase, short string (typically the driver name, e.g. "lm75"). In i2c-only drivers, drop the i2c_is_isa_adapter check, it's - useless. + useless. Same for isa-only drivers, as the test would always be + true. Only hybrid drivers (which are quite rare) still need it. The errorN labels are reduced to the number needed. If that number is 2 (i2c-only drivers), it is advised that the labels are named exit and exit_free. For i2c+isa drivers, labels should be named diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index 91664be91ffc..e6b546dd4f7b 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -195,31 +195,28 @@ Probing classes (sensors) ------------------------- If you write a `sensors' driver, you use a slightly different interface. -As well as I2C addresses, we have to cope with ISA addresses. Also, we -use a enum of chip types. Don't forget to include `sensors.h'. +Also, we use a enum of chip types. Don't forget to include `sensors.h'. The following lists are used internally. They are all lists of integers. - normal_i2c: filled in by the module writer. Terminated by SENSORS_I2C_END. + normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_END. A list of I2C addresses which should normally be examined. - normal_isa: filled in by the module writer. Terminated by SENSORS_ISA_END. - A list of ISA addresses which should normally be examined. - probe: insmod parameter. Initialize this list with SENSORS_I2C_END values. - A list of pairs. The first value is a bus number (SENSORS_ISA_BUS for - the ISA bus, -1 for any I2C bus), the second is the address. These - addresses are also probed, as if they were in the 'normal' list. - ignore: insmod parameter. Initialize this list with SENSORS_I2C_END values. - A list of pairs. The first value is a bus number (SENSORS_ISA_BUS for - the ISA bus, -1 for any I2C bus), the second is the I2C address. These - addresses are never probed. This parameter overrules 'normal' and - 'probe', but not the 'force' lists. + probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. + A list of pairs. The first value is a bus number (ANY_I2C_BUS for any + I2C bus), the second is the address. These addresses are also probed, + as if they were in the 'normal' list. + ignore: insmod parameter. Initialize this list with I2C_CLIENT_END values. + A list of pairs. The first value is a bus number (ANY_I2C_BUS for any + I2C bus), the second is the I2C address. These addresses are never + probed. This parameter overrules 'normal' and 'probe', but not the + 'force' lists. Also used is a list of pointers to sensors_force_data structures: force_data: insmod parameters. A list, ending with an element of which the force field is NULL. Each element contains the type of chip and a list of pairs. - The first value is a bus number (SENSORS_ISA_BUS for the ISA bus, - -1 for any I2C bus), the second is the address. + The first value is a bus number (ANY_I2C_BUS for any I2C bus), the + second is the address. These are automatically translated to insmod variables of the form force_foo. @@ -227,13 +224,11 @@ So we have a generic insmod variabled `force', and chip-specific variables `force_CHIPNAME'. Fortunately, as a module writer, you just have to define the `normal_i2c' -and `normal_isa' parameters, and define what chip names are used. -The complete declaration could look like this: +parameter, and define what chip names are used. The complete declaration +could look like this: /* Scan i2c addresses 0x37, and 0x48 to 0x4f */ static unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; - /* Scan ISA address 0x290 */ - static unsigned int normal_isa[] = {0x0290,SENSORS_ISA_END}; /* Define chips foo and bar, as well as all module parameters and things */ SENSORS_INSMOD_2(foo,bar); diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index a483d96e4cef..093d09cc4c14 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -34,7 +34,6 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_8(adm1021, adm1023, max1617, max1617a, thmc10, lm84, gl523sm, mc1066); diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index b68b292c00d4..bdba01e2f3ed 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -62,7 +62,6 @@ */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index eb55133a13ee..8e8b0ef8fff3 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -36,7 +36,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(adm1026); diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index ac3b1542556e..a60187e24830 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -61,7 +61,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_2(adm1030, adm1031); diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 7ef61206ba10..d52100027665 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -54,8 +54,6 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; - /* Insmod parameters */ SENSORS_INSMOD_3(adm9240, ds1780, lm81); diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index 3ab7a2ddafba..d1856acf87a0 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -56,9 +56,6 @@ /* I2C addresses to scan */ static unsigned short normal_i2c[] = { 0x2d, I2C_CLIENT_END }; -/* ISA addresses to scan (none) */ -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; - /* Insmod parameters */ SENSORS_INSMOD_1(asb100); I2C_CLIENT_MODULE_PARM(force_subclients, "List of subclient addresses: " diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 5f79f07a4ab8..ced54189a579 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -42,7 +42,6 @@ MODULE_AUTHOR("Sebastian Witt "); #define ATXP1_GPIO1MASK 0x0f static unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; SENSORS_INSMOD_1(atxp1); diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c index 9ed21ac46e97..4a316a7f7980 100644 --- a/drivers/hwmon/ds1621.c +++ b/drivers/hwmon/ds1621.c @@ -34,7 +34,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(ds1621); diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c index b794580a0726..c7caa95c643b 100644 --- a/drivers/hwmon/fscher.c +++ b/drivers/hwmon/fscher.c @@ -40,7 +40,6 @@ */ static unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c index 4cb33b231124..fd75e444e758 100644 --- a/drivers/hwmon/fscpos.c +++ b/drivers/hwmon/fscpos.c @@ -43,7 +43,6 @@ * Addresses to scan */ static unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c index 49972929a69b..34d85b1e5539 100644 --- a/drivers/hwmon/gl518sm.c +++ b/drivers/hwmon/gl518sm.c @@ -47,7 +47,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_2(gl518sm_r00, gl518sm_r80); diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index ce482e17e03c..b129d153deab 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -38,7 +38,6 @@ MODULE_PARM_DESC(extra_sensor_type, "Type of extra sensor (0=autodetect, 1=tempe /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(gl520sm); diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 722ef0cd5c00..0a8d795f15c2 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -48,7 +48,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; static unsigned short isa_address = 0x290; /* Insmod parameters */ diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index cba0a40ad667..e19b11fd4816 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -53,7 +53,6 @@ */ static unsigned short normal_i2c[] = { 0x4c, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index 129c8f213331..54dda7d11ace 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -32,7 +32,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(lm75); diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c index 15f30fdc75c6..d47aab3b3c05 100644 --- a/drivers/hwmon/lm77.c +++ b/drivers/hwmon/lm77.c @@ -36,7 +36,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(lm77); diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index c3712f8d9964..784935f77016 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -34,7 +34,6 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; static unsigned short isa_address = 0x290; /* Insmod parameters */ diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index dbf8df386250..fa2cb17018cc 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -33,7 +33,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(lm80); diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index f3f3901c7294..0223b4d2ce1d 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -47,7 +47,6 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 4203f904bbe2..8954d64614ff 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -35,7 +35,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_6(lm85b, lm85c, adm1027, adt7463, emc6d100, emc6d102); diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index 7e14858c257b..1dc3bf52b40d 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -68,7 +68,6 @@ */ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index c1e8d0e965f7..4b914ec205fa 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -91,7 +91,6 @@ */ static unsigned short normal_i2c[] = { 0x4c, 0x4d, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c index 0fb601c07519..9c43120d6bd7 100644 --- a/drivers/hwmon/lm92.c +++ b/drivers/hwmon/lm92.c @@ -52,7 +52,6 @@ resulting in 4 possible addresses. */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(lm92); diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c index 56c34c2d3619..5f0376575c62 100644 --- a/drivers/hwmon/max1619.c +++ b/drivers/hwmon/max1619.c @@ -39,7 +39,6 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, 0x29, 0x2a, 0x2b, 0x4c, 0x4d, 0x4e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index a4ab819ac360..70718559de75 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -50,7 +50,6 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; static unsigned short isa_address = 0x290; /* Insmod parameters */ diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c index 1f763499dac4..9cd1939cd040 100644 --- a/drivers/hwmon/w83l785ts.c +++ b/drivers/hwmon/w83l785ts.c @@ -49,7 +49,6 @@ */ static unsigned short normal_i2c[] = { 0x2e, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* * Insmod parameters diff --git a/drivers/i2c/chips/ds1337.c b/drivers/i2c/chips/ds1337.c index 82cf959989fd..6ac0a6e00769 100644 --- a/drivers/i2c/chips/ds1337.c +++ b/drivers/i2c/chips/ds1337.c @@ -39,7 +39,6 @@ * Functions declaration */ static unsigned short normal_i2c[] = { 0x68, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; SENSORS_INSMOD_1(ds1337); diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index a2da31b0dd7b..88f83bac3845 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -38,7 +38,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(eeprom); diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index f0e306237739..d1d48586b90e 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -36,7 +36,6 @@ /* Do not scan - the MAX6875 access method will write to some EEPROM chips */ static unsigned short normal_i2c[] = {I2C_CLIENT_END}; -static unsigned int normal_isa[] = {I2C_CLIENT_ISA_END}; /* Insmod parameters */ SENSORS_INSMOD_1(max6875); diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c index 9f3ad45daae2..c5b052363d9a 100644 --- a/drivers/i2c/chips/pca9539.c +++ b/drivers/i2c/chips/pca9539.c @@ -17,7 +17,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = {0x74, 0x75, 0x76, 0x77, I2C_CLIENT_END}; -static unsigned int normal_isa[] = {I2C_CLIENT_ISA_END}; /* Insmod parameters */ SENSORS_INSMOD_1(pca9539); diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c index cfcf64654080..7a1fa7914630 100644 --- a/drivers/i2c/chips/pcf8574.c +++ b/drivers/i2c/chips/pcf8574.c @@ -45,7 +45,6 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_2(pcf8574, pcf8574a); diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c index db812ade8564..225b512dd4ad 100644 --- a/drivers/i2c/chips/pcf8591.c +++ b/drivers/i2c/chips/pcf8591.c @@ -29,7 +29,6 @@ /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; -static unsigned int normal_isa[] = { I2C_CLIENT_ISA_END }; /* Insmod parameters */ SENSORS_INSMOD_1(pcf8591); diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h index 21b625204956..ae73b9e789cb 100644 --- a/include/linux/i2c-sensor.h +++ b/include/linux/i2c-sensor.h @@ -27,11 +27,10 @@ that place. If a specific chip is given, the module blindly assumes this chip type is present; if a general force (kind == 0) is given, the module will still try to figure out what type of chip is present. This is useful - if for some reasons the detect for SMBus or ISA address space filled - fails. - probe: insmod parameter. Initialize this list with I2C_CLIENT_ISA_END values. - A list of pairs. The first value is a bus number (ANY_I2C_ISA_BUS for - the ISA bus, -1 for any I2C bus), the second is the address. + if for some reasons the detect for SMBus address space filled fails. + probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. + A list of pairs. The first value is a bus number (ANY_I2C_BUS for any + I2C bus), the second is the address. kind: The kind of chip. 0 equals any chip. */ struct i2c_force_data { @@ -40,25 +39,22 @@ struct i2c_force_data { }; /* A structure containing the detect information. - normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_ISA_END. + normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_END. A list of I2C addresses which should normally be examined. - normal_isa: filled in by the module writer. Terminated by SENSORS_ISA_END. - A list of ISA addresses which should normally be examined. - probe: insmod parameter. Initialize this list with I2C_CLIENT_ISA_END values. - A list of pairs. The first value is a bus number (ANY_I2C_ISA_BUS for - the ISA bus, -1 for any I2C bus), the second is the address. These - addresses are also probed, as if they were in the 'normal' list. - ignore: insmod parameter. Initialize this list with I2C_CLIENT_ISA_END values. - A list of pairs. The first value is a bus number (ANY_I2C_ISA_BUS for - the ISA bus, -1 for any I2C bus), the second is the I2C address. These - addresses are never probed. This parameter overrules 'normal' and - 'probe', but not the 'force' lists. + probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. + A list of pairs. The first value is a bus number (ANY_I2C_BUS for any + I2C bus), the second is the address. These addresses are also probed, + as if they were in the 'normal' list. + ignore: insmod parameter. Initialize this list with I2C_CLIENT_END values. + A list of pairs. The first value is a bus number (ANY_I2C_BUS for any + I2C bus), the second is the I2C address. These addresses are never + probed. This parameter overrules 'normal' and probe', but not the + 'force' lists. force_data: insmod parameters. A list, ending with an element of which the force field is NULL. */ struct i2c_address_data { unsigned short *normal_i2c; - unsigned int *normal_isa; unsigned short *probe; unsigned short *ignore; struct i2c_force_data *forces; @@ -78,7 +74,6 @@ struct i2c_address_data { "List of adapter,address pairs not to scan"); \ static struct i2c_address_data addr_data = { \ .normal_i2c = normal_i2c, \ - .normal_isa = normal_isa, \ .probe = probe, \ .ignore = ignore, \ .forces = forces, \ @@ -242,8 +237,7 @@ struct i2c_address_data { /* Detect function. It iterates over all possible addresses itself. For SMBus addresses, it will only call found_proc if some client is connected - to the SMBus (unless a 'force' matched); for ISA detections, this is not - done. */ + to the SMBus (unless a 'force' matched). */ extern int i2c_detect(struct i2c_adapter *adapter, struct i2c_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3be06105034c..39ff363eadec 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -150,12 +150,9 @@ struct i2c_driver { */ struct i2c_client { unsigned int flags; /* div., see below */ - unsigned int addr; /* chip address - NOTE: 7bit */ + unsigned short addr; /* chip address - NOTE: 7bit */ /* addresses are stored in the */ - /* _LOWER_ 7 bits of this char */ - /* addr: unsigned int to make lm_sensors i2c-isa adapter work - more cleanly. It does not take any more memory space, due to - alignment considerations */ + /* _LOWER_ 7 bits */ struct i2c_adapter *adapter; /* the adapter we sit on */ struct i2c_driver *driver; /* and our access routines */ int usage_count; /* How many accesses currently */ @@ -309,7 +306,6 @@ struct i2c_client_address_data { /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -#define I2C_CLIENT_ISA_END 0xfffefffeU /* The numbers to use to set I2C bus address */ #define ANY_I2C_BUS 0xffff -- cgit v1.2.3 From 570aefc361d3315ec6749f573009286106b0b2d8 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Wed, 20 Jul 2005 00:09:03 +0200 Subject: [PATCH] I2C: Separate non-i2c hwmon drivers from i2c-core (9/9) Move the definitions of i2c_is_isa_client and i2c_is_isa_adapter from i2c.h to i2c-isa.h. Only hybrid drivers still need them. Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c-isa.h | 7 +++++++ include/linux/i2c.h | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h index b5727d7702e1..db793b68356c 100644 --- a/include/linux/i2c-isa.h +++ b/include/linux/i2c-isa.h @@ -26,4 +26,11 @@ extern int i2c_isa_add_driver(struct i2c_driver *driver); extern int i2c_isa_del_driver(struct i2c_driver *driver); +/* Detect whether we are on the isa bus. This is only useful to hybrid + (i2c+isa) drivers. */ +#define i2c_is_isa_client(clientptr) \ + ((clientptr)->adapter->algo->id == I2C_ALGO_ISA) +#define i2c_is_isa_adapter(adapptr) \ + ((adapptr)->algo->id == I2C_ALGO_ISA) + #endif /* _LINUX_I2C_ISA_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 39ff363eadec..da4faa016b17 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -580,11 +580,4 @@ union i2c_smbus_data { .force = force, \ } -/* Detect whether we are on the isa bus. If this returns true, all i2c - access will fail! */ -#define i2c_is_isa_client(clientptr) \ - ((clientptr)->adapter->algo->id == I2C_ALGO_ISA) -#define i2c_is_isa_adapter(adapptr) \ - ((adapptr)->algo->id == I2C_ALGO_ISA) - #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From 5563e27d3a42667734e81c1cb8ad72bff76321f6 Mon Sep 17 00:00:00 2001 From: "R.Marek@sh.cvut.cz" Date: Wed, 27 Jul 2005 11:43:47 +0000 Subject: [PATCH] I2C: W83792D driver 1/3 I would like to announce support for W83792D chip. This driver was developed by Winbond Electronics Corp. I added sysfs attributes callbacks infrastructure plus various code fixes and codingstyle cleanups. I would like to thank Winbond for supporting free software. This patch is against 2.6.13rc3 plus hwmon-class and hwmon-split. Separate patch for documantation and hwmon class register will follow. Signed-off-by: Rudolf Marek Signed-off-by: Chunhao Huang Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/Kconfig | 10 + drivers/hwmon/Makefile | 1 + drivers/hwmon/w83792d.c | 1633 +++++++++++++++++++++++++++++++++++++++++++ include/linux/hwmon-sysfs.h | 15 + 4 files changed, 1659 insertions(+) create mode 100644 drivers/hwmon/w83792d.c (limited to 'include/linux') diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 29583e1f7a20..6483ff696b5a 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -377,6 +377,16 @@ config SENSORS_W83781D This driver can also be built as a module. If so, the module will be called w83781d. +config SENSORS_W83792D + tristate "Winbond W83792D" + depends on HWMON && I2C && EXPERIMENTAL + select I2C_SENSOR + help + If you say yes here you get support for the Winbond W83792D chip. + + This driver can also be built as a module. If so, the module + will be called w83792d. + config SENSORS_W83L785TS tristate "Winbond W83L785TS-S" depends on HWMON && I2C && EXPERIMENTAL diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index bd1bd59eb149..187b89d47f83 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_HWMON) += hwmon.o # asb100, then w83781d go first, as they can override other drivers' addresses. obj-$(CONFIG_SENSORS_ASB100) += asb100.o obj-$(CONFIG_SENSORS_W83627HF) += w83627hf.o +obj-$(CONFIG_SENSORS_W83792D) += w83792d.o obj-$(CONFIG_SENSORS_W83781D) += w83781d.o obj-$(CONFIG_SENSORS_ADM1021) += adm1021.o diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c new file mode 100644 index 000000000000..fa43b6fd1212 --- /dev/null +++ b/drivers/hwmon/w83792d.c @@ -0,0 +1,1633 @@ +/* + w83792d.c - Part of lm_sensors, Linux kernel modules for hardware + monitoring + Copyright (C) 2004, 2005 Winbond Electronics Corp. + Chunhao Huang , + Rudolf Marek + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + + Note: + 1. This driver is only for 2.6 kernel, 2.4 kernel need a different driver. + 2. This driver is only for Winbond W83792D C version device, there + are also some motherboards with B version W83792D device. The + calculation method to in6-in7(measured value, limits) is a little + different between C and B version. C or B version can be identified + by CR[0x49h]. +*/ + +/* + Supports following chips: + + Chip #vin #fanin #pwm #temp wchipid vendid i2c ISA + w83792d 9 7 7 3 0x7a 0x5ca3 yes no +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* Addresses to scan */ +static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; + +/* Insmod parameters */ +SENSORS_INSMOD_1(w83792d); +I2C_CLIENT_MODULE_PARM(force_subclients, "List of subclient addresses: " + "{bus, clientaddr, subclientaddr1, subclientaddr2}"); + +static int init; +module_param(init, bool, 0); +MODULE_PARM_DESC(init, "Set to one to force chip initialization"); + +/* The W83792D registers */ +static const u8 W83792D_REG_IN[9] = { + 0x20, /* Vcore A in DataSheet */ + 0x21, /* Vcore B in DataSheet */ + 0x22, /* VIN0 in DataSheet */ + 0x23, /* VIN1 in DataSheet */ + 0x24, /* VIN2 in DataSheet */ + 0x25, /* VIN3 in DataSheet */ + 0x26, /* 5VCC in DataSheet */ + 0xB0, /* 5VSB in DataSheet */ + 0xB1 /* VBAT in DataSheet */ +}; +#define W83792D_REG_LOW_BITS1 0x3E /* Low Bits I in DataSheet */ +#define W83792D_REG_LOW_BITS2 0x3F /* Low Bits II in DataSheet */ +static const u8 W83792D_REG_IN_MAX[9] = { + 0x2B, /* Vcore A High Limit in DataSheet */ + 0x2D, /* Vcore B High Limit in DataSheet */ + 0x2F, /* VIN0 High Limit in DataSheet */ + 0x31, /* VIN1 High Limit in DataSheet */ + 0x33, /* VIN2 High Limit in DataSheet */ + 0x35, /* VIN3 High Limit in DataSheet */ + 0x37, /* 5VCC High Limit in DataSheet */ + 0xB4, /* 5VSB High Limit in DataSheet */ + 0xB6 /* VBAT High Limit in DataSheet */ +}; +static const u8 W83792D_REG_IN_MIN[9] = { + 0x2C, /* Vcore A Low Limit in DataSheet */ + 0x2E, /* Vcore B Low Limit in DataSheet */ + 0x30, /* VIN0 Low Limit in DataSheet */ + 0x32, /* VIN1 Low Limit in DataSheet */ + 0x34, /* VIN2 Low Limit in DataSheet */ + 0x36, /* VIN3 Low Limit in DataSheet */ + 0x38, /* 5VCC Low Limit in DataSheet */ + 0xB5, /* 5VSB Low Limit in DataSheet */ + 0xB7 /* VBAT Low Limit in DataSheet */ +}; +static const u8 W83792D_REG_FAN[7] = { + 0x28, /* FAN 1 Count in DataSheet */ + 0x29, /* FAN 2 Count in DataSheet */ + 0x2A, /* FAN 3 Count in DataSheet */ + 0xB8, /* FAN 4 Count in DataSheet */ + 0xB9, /* FAN 5 Count in DataSheet */ + 0xBA, /* FAN 6 Count in DataSheet */ + 0xBE /* FAN 7 Count in DataSheet */ +}; +static const u8 W83792D_REG_FAN_MIN[7] = { + 0x3B, /* FAN 1 Count Low Limit in DataSheet */ + 0x3C, /* FAN 2 Count Low Limit in DataSheet */ + 0x3D, /* FAN 3 Count Low Limit in DataSheet */ + 0xBB, /* FAN 4 Count Low Limit in DataSheet */ + 0xBC, /* FAN 5 Count Low Limit in DataSheet */ + 0xBD, /* FAN 6 Count Low Limit in DataSheet */ + 0xBF /* FAN 7 Count Low Limit in DataSheet */ +}; +#define W83792D_REG_FAN_CFG 0x84 /* FAN Configuration in DataSheet */ +static const u8 W83792D_REG_FAN_DIV[4] = { + 0x47, /* contains FAN2 and FAN1 Divisor */ + 0x5B, /* contains FAN4 and FAN3 Divisor */ + 0x5C, /* contains FAN6 and FAN5 Divisor */ + 0x9E /* contains FAN7 Divisor. */ +}; +static const u8 W83792D_REG_PWM[7] = { + 0x81, /* FAN 1 Duty Cycle, be used to control */ + 0x83, /* FAN 2 Duty Cycle, be used to control */ + 0x94, /* FAN 3 Duty Cycle, be used to control */ + 0xA3, /* FAN 4 Duty Cycle, be used to control */ + 0xA4, /* FAN 5 Duty Cycle, be used to control */ + 0xA5, /* FAN 6 Duty Cycle, be used to control */ + 0xA6 /* FAN 7 Duty Cycle, be used to control */ +}; +#define W83792D_REG_BANK 0x4E +#define W83792D_REG_TEMP2_CONFIG 0xC2 +#define W83792D_REG_TEMP3_CONFIG 0xCA + +static const u8 W83792D_REG_TEMP1[3] = { + 0x27, /* TEMP 1 in DataSheet */ + 0x39, /* TEMP 1 Over in DataSheet */ + 0x3A, /* TEMP 1 Hyst in DataSheet */ +}; + +static const u8 W83792D_REG_TEMP_ADD[2][6] = { + { 0xC0, /* TEMP 2 in DataSheet */ + 0xC1, /* TEMP 2(0.5 deg) in DataSheet */ + 0xC5, /* TEMP 2 Over High part in DataSheet */ + 0xC6, /* TEMP 2 Over Low part in DataSheet */ + 0xC3, /* TEMP 2 Thyst High part in DataSheet */ + 0xC4 }, /* TEMP 2 Thyst Low part in DataSheet */ + { 0xC8, /* TEMP 3 in DataSheet */ + 0xC9, /* TEMP 3(0.5 deg) in DataSheet */ + 0xCD, /* TEMP 3 Over High part in DataSheet */ + 0xCE, /* TEMP 3 Over Low part in DataSheet */ + 0xCB, /* TEMP 3 Thyst High part in DataSheet */ + 0xCC } /* TEMP 3 Thyst Low part in DataSheet */ +}; + +static const u8 W83792D_REG_THERMAL[3] = { + 0x85, /* SmartFanI: Fan1 target value */ + 0x86, /* SmartFanI: Fan2 target value */ + 0x96 /* SmartFanI: Fan3 target value */ +}; + +static const u8 W83792D_REG_TOLERANCE[3] = { + 0x87, /* (bit3-0)SmartFan Fan1 tolerance */ + 0x87, /* (bit7-4)SmartFan Fan2 tolerance */ + 0x97 /* (bit3-0)SmartFan Fan3 tolerance */ +}; + +static const u8 W83792D_REG_POINTS[3][4] = { + { 0x85, /* SmartFanII: Fan1 temp point 1 */ + 0xE3, /* SmartFanII: Fan1 temp point 2 */ + 0xE4, /* SmartFanII: Fan1 temp point 3 */ + 0xE5 }, /* SmartFanII: Fan1 temp point 4 */ + { 0x86, /* SmartFanII: Fan2 temp point 1 */ + 0xE6, /* SmartFanII: Fan2 temp point 2 */ + 0xE7, /* SmartFanII: Fan2 temp point 3 */ + 0xE8 }, /* SmartFanII: Fan2 temp point 4 */ + { 0x96, /* SmartFanII: Fan3 temp point 1 */ + 0xE9, /* SmartFanII: Fan3 temp point 2 */ + 0xEA, /* SmartFanII: Fan3 temp point 3 */ + 0xEB } /* SmartFanII: Fan3 temp point 4 */ +}; + +static const u8 W83792D_REG_LEVELS[3][4] = { + { 0x88, /* (bit3-0) SmartFanII: Fan1 Non-Stop */ + 0x88, /* (bit7-4) SmartFanII: Fan1 Level 1 */ + 0xE0, /* (bit7-4) SmartFanII: Fan1 Level 2 */ + 0xE0 }, /* (bit3-0) SmartFanII: Fan1 Level 3 */ + { 0x89, /* (bit3-0) SmartFanII: Fan2 Non-Stop */ + 0x89, /* (bit7-4) SmartFanII: Fan2 Level 1 */ + 0xE1, /* (bit7-4) SmartFanII: Fan2 Level 2 */ + 0xE1 }, /* (bit3-0) SmartFanII: Fan2 Level 3 */ + { 0x98, /* (bit3-0) SmartFanII: Fan3 Non-Stop */ + 0x98, /* (bit7-4) SmartFanII: Fan3 Level 1 */ + 0xE2, /* (bit7-4) SmartFanII: Fan3 Level 2 */ + 0xE2 } /* (bit3-0) SmartFanII: Fan3 Level 3 */ +}; + +#define W83792D_REG_CONFIG 0x40 +#define W83792D_REG_VID_FANDIV 0x47 +#define W83792D_REG_CHIPID 0x49 +#define W83792D_REG_WCHIPID 0x58 +#define W83792D_REG_CHIPMAN 0x4F +#define W83792D_REG_PIN 0x4B +#define W83792D_REG_I2C_SUBADDR 0x4A + +#define W83792D_REG_ALARM1 0xA9 /* realtime status register1 */ +#define W83792D_REG_ALARM2 0xAA /* realtime status register2 */ +#define W83792D_REG_ALARM3 0xAB /* realtime status register3 */ +#define W83792D_REG_CHASSIS 0x42 /* Bit 5: Case Open status bit */ +#define W83792D_REG_CHASSIS_CLR 0x44 /* Bit 7: Case Open CLR_CHS/Reset bit */ + +/* control in0/in1 's limit modifiability */ +#define W83792D_REG_VID_IN_B 0x17 + +#define W83792D_REG_VBAT 0x5D +#define W83792D_REG_I2C_ADDR 0x48 + +/* Conversions. Rounding and limit checking is only done on the TO_REG + variants. Note that you should be a bit careful with which arguments + these macros are called: arguments may be evaluated more than once. + Fixing this is just not worth it. */ +#define IN_FROM_REG(nr,val) (((nr)<=1)?(val*2): \ + ((((nr)==6)||((nr)==7))?(val*6):(val*4))) +#define IN_TO_REG(nr,val) (((nr)<=1)?(val/2): \ + ((((nr)==6)||((nr)==7))?(val/6):(val/4))) + +static inline u8 +FAN_TO_REG(long rpm, int div) +{ + if (rpm == 0) + return 255; + rpm = SENSORS_LIMIT(rpm, 1, 1000000); + return SENSORS_LIMIT((1350000 + rpm * div / 2) / (rpm * div), 1, 254); +} + +#define FAN_FROM_REG(val,div) ((val) == 0 ? -1 : \ + ((val) == 255 ? 0 : \ + 1350000 / ((val) * (div)))) + +/* for temp1 */ +#define TEMP1_TO_REG(val) (SENSORS_LIMIT(((val) < 0 ? (val)+0x100*1000 \ + : (val)) / 1000, 0, 0xff)) +#define TEMP1_FROM_REG(val) (((val) & 0x80 ? (val)-0x100 : (val)) * 1000) +/* for temp2 and temp3, because they need addtional resolution */ +#define TEMP_ADD_FROM_REG(val1, val2) \ + ((((val1) & 0x80 ? (val1)-0x100 \ + : (val1)) * 1000) + ((val2 & 0x80) ? 500 : 0)) +#define TEMP_ADD_TO_REG_HIGH(val) \ + (SENSORS_LIMIT(((val) < 0 ? (val)+0x100*1000 \ + : (val)) / 1000, 0, 0xff)) +#define TEMP_ADD_TO_REG_LOW(val) ((val%1000) ? 0x80 : 0x00) + +#define PWM_FROM_REG(val) (val) +#define PWM_TO_REG(val) (SENSORS_LIMIT((val),0,255)) +#define DIV_FROM_REG(val) (1 << (val)) + +static inline u8 +DIV_TO_REG(long val) +{ + int i; + val = SENSORS_LIMIT(val, 1, 128) >> 1; + for (i = 0; i < 6; i++) { + if (val == 0) + break; + val >>= 1; + } + return ((u8) i); +} + +struct w83792d_data { + struct i2c_client client; + struct semaphore lock; + enum chips type; + + struct semaphore update_lock; + char valid; /* !=0 if following fields are valid */ + unsigned long last_updated; /* In jiffies */ + + /* array of 2 pointers to subclients */ + struct i2c_client *lm75[2]; + + u8 in[9]; /* Register value */ + u8 in_max[9]; /* Register value */ + u8 in_min[9]; /* Register value */ + u8 low_bits[2]; /* Additional resolution to voltage in0-6 */ + u8 fan[7]; /* Register value */ + u8 fan_min[7]; /* Register value */ + u8 temp1[3]; /* current, over, thyst */ + u8 temp_add[2][6]; /* Register value */ + u8 fan_div[7]; /* Register encoding, shifted right */ + u8 pwm[7]; /* We only consider the first 3 set of pwm, + although 792 chip has 7 set of pwm. */ + u8 pwmenable[3]; + u8 pwm_mode[7]; /* indicates PWM or DC mode: 1->PWM; 0->DC */ + u32 alarms; /* realtime status register encoding,combined */ + u8 chassis; /* Chassis status */ + u8 chassis_clear; /* CLR_CHS, clear chassis intrusion detection */ + u8 thermal_cruise[3]; /* Smart FanI: Fan1,2,3 target value */ + u8 tolerance[3]; /* Fan1,2,3 tolerance(Smart Fan I/II) */ + u8 sf2_points[3][4]; /* Smart FanII: Fan1,2,3 temperature points */ + u8 sf2_levels[3][4]; /* Smart FanII: Fan1,2,3 duty cycle levels */ +}; + +static int w83792d_attach_adapter(struct i2c_adapter *adapter); +static int w83792d_detect(struct i2c_adapter *adapter, int address, int kind); +static int w83792d_detach_client(struct i2c_client *client); + +static int w83792d_read_value(struct i2c_client *client, u8 register); +static int w83792d_write_value(struct i2c_client *client, u8 register, + u8 value); +static struct w83792d_data *w83792d_update_device(struct device *dev); + +#ifdef DEBUG +static void w83792d_print_debug(struct w83792d_data *data, struct device *dev); +#endif + +static void w83792d_init_client(struct i2c_client *client); + +static struct i2c_driver w83792d_driver = { + .owner = THIS_MODULE, + .name = "w83792d", + .flags = I2C_DF_NOTIFY, + .attach_adapter = w83792d_attach_adapter, + .detach_client = w83792d_detach_client, +}; + +static long in_count_from_reg(int nr, struct w83792d_data *data) +{ + u16 vol_count = data->in[nr]; + u16 low_bits = 0; + vol_count = (vol_count << 2); + switch (nr) + { + case 0: /* vin0 */ + low_bits = (data->low_bits[0]) & 0x03; + break; + case 1: /* vin1 */ + low_bits = ((data->low_bits[0]) & 0x0c) >> 2; + break; + case 2: /* vin2 */ + low_bits = ((data->low_bits[0]) & 0x30) >> 4; + break; + case 3: /* vin3 */ + low_bits = ((data->low_bits[0]) & 0xc0) >> 6; + break; + case 4: /* vin4 */ + low_bits = (data->low_bits[1]) & 0x03; + break; + case 5: /* vin5 */ + low_bits = ((data->low_bits[1]) & 0x0c) >> 2; + break; + case 6: /* vin6 */ + low_bits = ((data->low_bits[1]) & 0x30) >> 4; + default: + break; + } + vol_count = vol_count | low_bits; + return vol_count; +} + +/* following are the sysfs callback functions */ +static ssize_t show_in(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf,"%ld\n", IN_FROM_REG(nr,(in_count_from_reg(nr, data)))); +} + +#define show_in_reg(reg) \ +static ssize_t show_##reg(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); \ + int nr = sensor_attr->index; \ + struct w83792d_data *data = w83792d_update_device(dev); \ + return sprintf(buf,"%ld\n", (long)(IN_FROM_REG(nr, (data->reg[nr])*4))); \ +} + +show_in_reg(in_min); +show_in_reg(in_max); + +#define store_in_reg(REG, reg) \ +static ssize_t store_in_##reg (struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); \ + int nr = sensor_attr->index; \ + struct i2c_client *client = to_i2c_client(dev); \ + struct w83792d_data *data = i2c_get_clientdata(client); \ + u32 val; \ + \ + val = simple_strtoul(buf, NULL, 10); \ + data->in_##reg[nr] = SENSORS_LIMIT(IN_TO_REG(nr, val)/4, 0, 255); \ + w83792d_write_value(client, W83792D_REG_IN_##REG[nr], data->in_##reg[nr]); \ + \ + return count; \ +} +store_in_reg(MIN, min); +store_in_reg(MAX, max); + +#define sysfs_in_reg(offset) \ +static SENSOR_DEVICE_ATTR(in##offset##_input, S_IRUGO, show_in, \ + NULL, offset); \ +static SENSOR_DEVICE_ATTR(in##offset##_min, S_IRUGO | S_IWUSR, \ + show_in_min, store_in_min, offset); \ +static SENSOR_DEVICE_ATTR(in##offset##_max, S_IRUGO | S_IWUSR, \ + show_in_max, store_in_max, offset); + +sysfs_in_reg(0); +sysfs_in_reg(1); +sysfs_in_reg(2); +sysfs_in_reg(3); +sysfs_in_reg(4); +sysfs_in_reg(5); +sysfs_in_reg(6); +sysfs_in_reg(7); +sysfs_in_reg(8); + +#define device_create_file_in(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_in##offset##_input.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_in##offset##_max.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_in##offset##_min.dev_attr); \ +} while (0) + +#define show_fan_reg(reg) \ +static ssize_t show_##reg (struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); \ + int nr = sensor_attr->index - 1; \ + struct w83792d_data *data = w83792d_update_device(dev); \ + return sprintf(buf,"%d\n", \ + FAN_FROM_REG(data->reg[nr], DIV_FROM_REG(data->fan_div[nr]))); \ +} + +show_fan_reg(fan); +show_fan_reg(fan_min); + +static ssize_t +store_fan_min(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + + val = simple_strtoul(buf, NULL, 10); + data->fan_min[nr] = FAN_TO_REG(val, DIV_FROM_REG(data->fan_div[nr])); + w83792d_write_value(client, W83792D_REG_FAN_MIN[nr], + data->fan_min[nr]); + + return count; +} + +static ssize_t +show_fan_div(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%u\n", DIV_FROM_REG(data->fan_div[nr - 1])); +} + +/* Note: we save and restore the fan minimum here, because its value is + determined in part by the fan divisor. This follows the principle of + least suprise; the user doesn't expect the fan minimum to change just + because the divisor changed. */ +static ssize_t +store_fan_div(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + unsigned long min; + /*u8 reg;*/ + u8 fan_div_reg = 0; + u8 tmp_fan_div; + + /* Save fan_min */ + min = FAN_FROM_REG(data->fan_min[nr], + DIV_FROM_REG(data->fan_div[nr])); + + data->fan_div[nr] = DIV_TO_REG(simple_strtoul(buf, NULL, 10)); + + fan_div_reg = w83792d_read_value(client, W83792D_REG_FAN_DIV[nr >> 1]); + fan_div_reg &= (nr & 0x01) ? 0x8f : 0xf8; + tmp_fan_div = (nr & 0x01) ? (((data->fan_div[nr]) << 4) & 0x70) + : ((data->fan_div[nr]) & 0x07); + w83792d_write_value(client, W83792D_REG_FAN_DIV[nr >> 1], + fan_div_reg | tmp_fan_div); + + /* Restore fan_min */ + data->fan_min[nr] = FAN_TO_REG(min, DIV_FROM_REG(data->fan_div[nr])); + w83792d_write_value(client, W83792D_REG_FAN_MIN[nr], data->fan_min[nr]); + + return count; +} + +#define sysfs_fan(offset) \ +static SENSOR_DEVICE_ATTR(fan##offset##_input, S_IRUGO, show_fan, NULL, \ + offset); \ +static SENSOR_DEVICE_ATTR(fan##offset##_div, S_IRUGO | S_IWUSR, \ + show_fan_div, store_fan_div, offset); \ +static SENSOR_DEVICE_ATTR(fan##offset##_min, S_IRUGO | S_IWUSR, \ + show_fan_min, store_fan_min, offset); + +sysfs_fan(1); +sysfs_fan(2); +sysfs_fan(3); +sysfs_fan(4); +sysfs_fan(5); +sysfs_fan(6); +sysfs_fan(7); + +#define device_create_file_fan(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_fan##offset##_input.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_fan##offset##_div.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_fan##offset##_min.dev_attr); \ +} while (0) + + +/* read/write the temperature1, includes measured value and limits */ + +static ssize_t show_temp1(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", TEMP1_FROM_REG(data->temp1[nr])); +} + +static ssize_t store_temp1(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + s32 val; + + val = simple_strtol(buf, NULL, 10); + + data->temp1[nr] = TEMP1_TO_REG(val); + w83792d_write_value(client, W83792D_REG_TEMP1[nr], + data->temp1[nr]); + + return count; +} + + +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp1, NULL, 0); +static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO | S_IWUSR, show_temp1, + store_temp1, 1); +static SENSOR_DEVICE_ATTR(temp1_max_hyst, S_IRUGO | S_IWUSR, show_temp1, + store_temp1, 2); + +#define device_create_file_temp1(client) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_temp1_input.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_temp1_max.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_temp1_max_hyst.dev_attr); \ +} while (0) + + +/* read/write the temperature2-3, includes measured value and limits */ + +static ssize_t show_temp23(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr; + int index = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf,"%ld\n", + (long)TEMP_ADD_FROM_REG(data->temp_add[nr][index], + data->temp_add[nr][index+1])); +} + +static ssize_t store_temp23(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr; + int index = sensor_attr->index; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + s32 val; + + val = simple_strtol(buf, NULL, 10); + + data->temp_add[nr][index] = TEMP_ADD_TO_REG_HIGH(val); + data->temp_add[nr][index+1] = TEMP_ADD_TO_REG_LOW(val); + w83792d_write_value(client, W83792D_REG_TEMP_ADD[nr][index], + data->temp_add[nr][index]); + w83792d_write_value(client, W83792D_REG_TEMP_ADD[nr][index+1], + data->temp_add[nr][index+1]); + + return count; +} + +#define sysfs_temp23(name,idx) \ +static SENSOR_DEVICE_ATTR_2(name##_input, S_IRUGO, show_temp23, NULL, \ + idx, 0); \ +static SENSOR_DEVICE_ATTR_2(name##_max, S_IRUGO | S_IWUSR, \ + show_temp23, store_temp23, idx, 2); \ +static SENSOR_DEVICE_ATTR_2(name##_max_hyst, S_IRUGO | S_IWUSR, \ + show_temp23, store_temp23, idx, 4); + +sysfs_temp23(temp2,0) +sysfs_temp23(temp3,1) + +#define device_create_file_temp_add(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_temp##offset##_input.dev_attr); \ +device_create_file(&client->dev, &sensor_dev_attr_temp##offset##_max.dev_attr); \ +device_create_file(&client->dev, \ +&sensor_dev_attr_temp##offset##_max_hyst.dev_attr); \ +} while (0) + + +/* get reatime status of all sensors items: voltage, temp, fan */ +static ssize_t +show_alarms_reg(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", data->alarms); +} + +static +DEVICE_ATTR(alarms, S_IRUGO, show_alarms_reg, NULL); +#define device_create_file_alarms(client) \ +device_create_file(&client->dev, &dev_attr_alarms); + + + +static ssize_t +show_pwm(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%ld\n", (long) PWM_FROM_REG(data->pwm[nr-1])); +} + +static ssize_t +show_pwmenable(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct w83792d_data *data = w83792d_update_device(dev); + long pwm_enable_tmp = 1; + + switch (data->pwmenable[nr]) { + case 0: + pwm_enable_tmp = 1; /* manual mode */ + break; + case 1: + pwm_enable_tmp = 3; /*thermal cruise/Smart Fan I */ + break; + case 2: + pwm_enable_tmp = 2; /* Smart Fan II */ + break; + } + + return sprintf(buf, "%ld\n", pwm_enable_tmp); +} + +static ssize_t +store_pwm(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + + val = simple_strtoul(buf, NULL, 10); + data->pwm[nr] = PWM_TO_REG(val); + w83792d_write_value(client, W83792D_REG_PWM[nr], data->pwm[nr]); + + return count; +} + +static ssize_t +store_pwmenable(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 fan_cfg_tmp, cfg1_tmp, cfg2_tmp, cfg3_tmp, cfg4_tmp; + + val = simple_strtoul(buf, NULL, 10); + switch (val) { + case 1: + data->pwmenable[nr] = 0; /* manual mode */ + break; + case 2: + data->pwmenable[nr] = 2; /* Smart Fan II */ + break; + case 3: + data->pwmenable[nr] = 1; /* thermal cruise/Smart Fan I */ + break; + default: + return -EINVAL; + } + cfg1_tmp = data->pwmenable[0]; + cfg2_tmp = (data->pwmenable[1]) << 2; + cfg3_tmp = (data->pwmenable[2]) << 4; + cfg4_tmp = w83792d_read_value(client,W83792D_REG_FAN_CFG) & 0xc0; + fan_cfg_tmp = ((cfg4_tmp | cfg3_tmp) | cfg2_tmp) | cfg1_tmp; + w83792d_write_value(client, W83792D_REG_FAN_CFG, fan_cfg_tmp); + + return count; +} + +#define sysfs_pwm(offset) \ +static SENSOR_DEVICE_ATTR(pwm##offset, S_IRUGO | S_IWUSR, \ + show_pwm, store_pwm, offset); \ +static SENSOR_DEVICE_ATTR(pwm##offset##_enable, S_IRUGO | S_IWUSR, \ + show_pwmenable, store_pwmenable, offset); \ + +sysfs_pwm(1); +sysfs_pwm(2); +sysfs_pwm(3); + + +#define device_create_file_pwm(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_pwm##offset.dev_attr); \ +} while (0) + +#define device_create_file_pwmenable(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_pwm##offset##_enable.dev_attr); \ +} while (0) + + +static ssize_t +show_pwm_mode(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", data->pwm_mode[nr-1]); +} + +static ssize_t +store_pwm_mode(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 pwm_mode_mask = 0; + + val = simple_strtoul(buf, NULL, 10); + data->pwm_mode[nr] = SENSORS_LIMIT(val, 0, 1); + pwm_mode_mask = w83792d_read_value(client, + W83792D_REG_PWM[nr]) & 0x7f; + w83792d_write_value(client, W83792D_REG_PWM[nr], + ((data->pwm_mode[nr]) << 7) | pwm_mode_mask); + + return count; +} + +#define sysfs_pwm_mode(offset) \ +static SENSOR_DEVICE_ATTR(pwm##offset##_mode, S_IRUGO | S_IWUSR, \ + show_pwm_mode, store_pwm_mode, offset); + +sysfs_pwm_mode(1); +sysfs_pwm_mode(2); +sysfs_pwm_mode(3); + +#define device_create_file_pwm_mode(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_pwm##offset##_mode.dev_attr); \ +} while (0) + + +static ssize_t +show_regs_chassis(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", data->chassis); +} + +static DEVICE_ATTR(chassis, S_IRUGO, show_regs_chassis, NULL); + +#define device_create_file_chassis(client) \ +do { \ +device_create_file(&client->dev, &dev_attr_chassis); \ +} while (0) + + +static ssize_t +show_chassis_clear(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", data->chassis_clear); +} + +static ssize_t +store_chassis_clear(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 temp1 = 0, temp2 = 0; + + val = simple_strtoul(buf, NULL, 10); + + data->chassis_clear = SENSORS_LIMIT(val, 0 ,1); + temp1 = ((data->chassis_clear) << 7) & 0x80; + temp2 = w83792d_read_value(client, + W83792D_REG_CHASSIS_CLR) & 0x7f; + w83792d_write_value(client, W83792D_REG_CHASSIS_CLR, temp1 | temp2); + + return count; +} + +static DEVICE_ATTR(chassis_clear, S_IRUGO | S_IWUSR, + show_chassis_clear, store_chassis_clear); + +#define device_create_file_chassis_clear(client) \ +do { \ +device_create_file(&client->dev, &dev_attr_chassis_clear); \ +} while (0) + + + +/* For Smart Fan I / Thermal Cruise */ +static ssize_t +show_thermal_cruise(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%ld\n", (long)data->thermal_cruise[nr-1]); +} + +static ssize_t +store_thermal_cruise(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 target_tmp=0, target_mask=0; + + val = simple_strtoul(buf, NULL, 10); + target_tmp = val; + target_tmp = target_tmp & 0x7f; + target_mask = w83792d_read_value(client, W83792D_REG_THERMAL[nr]) & 0x80; + data->thermal_cruise[nr] = SENSORS_LIMIT(target_tmp, 0, 255); + w83792d_write_value(client, W83792D_REG_THERMAL[nr], + (data->thermal_cruise[nr]) | target_mask); + + return count; +} + +#define sysfs_thermal_cruise(offset) \ +static SENSOR_DEVICE_ATTR(thermal_cruise##offset, S_IRUGO | S_IWUSR, \ + show_thermal_cruise, store_thermal_cruise, offset); + +sysfs_thermal_cruise(1); +sysfs_thermal_cruise(2); +sysfs_thermal_cruise(3); + +#define device_create_file_thermal_cruise(client, offset) \ +do { \ +device_create_file(&client->dev, \ +&sensor_dev_attr_thermal_cruise##offset.dev_attr); \ +} while (0) + + +/* For Smart Fan I/Thermal Cruise and Smart Fan II */ +static ssize_t +show_tolerance(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%ld\n", (long)data->tolerance[nr-1]); +} + +static ssize_t +store_tolerance(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute *sensor_attr = to_sensor_dev_attr(attr); + int nr = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 tol_tmp, tol_mask; + + val = simple_strtoul(buf, NULL, 10); + tol_mask = w83792d_read_value(client, + W83792D_REG_TOLERANCE[nr]) & ((nr == 1) ? 0x0f : 0xf0); + tol_tmp = SENSORS_LIMIT(val, 0, 15); + tol_tmp &= 0x0f; + data->tolerance[nr] = tol_tmp; + if (nr == 1) { + tol_tmp <<= 4; + } + w83792d_write_value(client, W83792D_REG_TOLERANCE[nr], + tol_mask | tol_tmp); + + return count; +} + +#define sysfs_tolerance(offset) \ +static SENSOR_DEVICE_ATTR(tolerance##offset, S_IRUGO | S_IWUSR, \ + show_tolerance, store_tolerance, offset); + +sysfs_tolerance(1); +sysfs_tolerance(2); +sysfs_tolerance(3); + +#define device_create_file_tolerance(client, offset) \ +do { \ +device_create_file(&client->dev, &sensor_dev_attr_tolerance##offset.dev_attr); \ +} while (0) + + +/* For Smart Fan II */ +static ssize_t +show_sf2_point(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr; + int index = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%ld\n", (long)data->sf2_points[index-1][nr-1]); +} + +static ssize_t +store_sf2_point(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr - 1; + int index = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 mask_tmp = 0; + + val = simple_strtoul(buf, NULL, 10); + data->sf2_points[index][nr] = SENSORS_LIMIT(val, 0, 127); + mask_tmp = w83792d_read_value(client, + W83792D_REG_POINTS[index][nr]) & 0x80; + w83792d_write_value(client, W83792D_REG_POINTS[index][nr], + mask_tmp|data->sf2_points[index][nr]); + + return count; +} + +#define sysfs_sf2_point(offset, index) \ +static SENSOR_DEVICE_ATTR_2(sf2_point##offset##_fan##index, S_IRUGO | S_IWUSR, \ + show_sf2_point, store_sf2_point, offset, index); + +sysfs_sf2_point(1, 1); /* Fan1 */ +sysfs_sf2_point(2, 1); /* Fan1 */ +sysfs_sf2_point(3, 1); /* Fan1 */ +sysfs_sf2_point(4, 1); /* Fan1 */ +sysfs_sf2_point(1, 2); /* Fan2 */ +sysfs_sf2_point(2, 2); /* Fan2 */ +sysfs_sf2_point(3, 2); /* Fan2 */ +sysfs_sf2_point(4, 2); /* Fan2 */ +sysfs_sf2_point(1, 3); /* Fan3 */ +sysfs_sf2_point(2, 3); /* Fan3 */ +sysfs_sf2_point(3, 3); /* Fan3 */ +sysfs_sf2_point(4, 3); /* Fan3 */ + +#define device_create_file_sf2_point(client, offset, index) \ +do { \ +device_create_file(&client->dev, \ +&sensor_dev_attr_sf2_point##offset##_fan##index.dev_attr); \ +} while (0) + + +static ssize_t +show_sf2_level(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr; + int index = sensor_attr->index; + struct w83792d_data *data = w83792d_update_device(dev); + return sprintf(buf, "%d\n", + (((data->sf2_levels[index-1][nr]) * 100) / 15)); +} + +static ssize_t +store_sf2_level(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct sensor_device_attribute_2 *sensor_attr = to_sensor_dev_attr_2(attr); + int nr = sensor_attr->nr; + int index = sensor_attr->index - 1; + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + u32 val; + u8 mask_tmp=0, level_tmp=0; + + val = simple_strtoul(buf, NULL, 10); + data->sf2_levels[index][nr] = SENSORS_LIMIT((val * 15) / 100, 0, 15); + mask_tmp = w83792d_read_value(client, W83792D_REG_LEVELS[index][nr]) + & ((nr==3) ? 0xf0 : 0x0f); + if (nr==3) { + level_tmp = data->sf2_levels[index][nr]; + } else { + level_tmp = data->sf2_levels[index][nr] << 4; + } + w83792d_write_value(client, W83792D_REG_LEVELS[index][nr], level_tmp | mask_tmp); + + return count; +} + +#define sysfs_sf2_level(offset, index) \ +static SENSOR_DEVICE_ATTR_2(sf2_level##offset##_fan##index, S_IRUGO | S_IWUSR, \ + show_sf2_level, store_sf2_level, offset, index); + +sysfs_sf2_level(1, 1); /* Fan1 */ +sysfs_sf2_level(2, 1); /* Fan1 */ +sysfs_sf2_level(3, 1); /* Fan1 */ +sysfs_sf2_level(1, 2); /* Fan2 */ +sysfs_sf2_level(2, 2); /* Fan2 */ +sysfs_sf2_level(3, 2); /* Fan2 */ +sysfs_sf2_level(1, 3); /* Fan3 */ +sysfs_sf2_level(2, 3); /* Fan3 */ +sysfs_sf2_level(3, 3); /* Fan3 */ + +#define device_create_file_sf2_level(client, offset, index) \ +do { \ +device_create_file(&client->dev, \ +&sensor_dev_attr_sf2_level##offset##_fan##index.dev_attr); \ +} while (0) + + +/* This function is called when: + * w83792d_driver is inserted (when this module is loaded), for each + available adapter + * when a new adapter is inserted (and w83792d_driver is still present) */ +static int +w83792d_attach_adapter(struct i2c_adapter *adapter) +{ + if (!(adapter->class & I2C_CLASS_HWMON)) + return 0; + return i2c_detect(adapter, &addr_data, w83792d_detect); +} + + +static int +w83792d_create_subclient(struct i2c_adapter *adapter, + struct i2c_client *new_client, int addr, + struct i2c_client **sub_cli) +{ + int err; + struct i2c_client *sub_client; + + (*sub_cli) = sub_client = kmalloc(sizeof(struct i2c_client), GFP_KERNEL); + if (!(sub_client)) { + return -ENOMEM; + } + memset(sub_client, 0x00, sizeof(struct i2c_client)); + sub_client->addr = 0x48 + addr; + i2c_set_clientdata(sub_client, NULL); + sub_client->adapter = adapter; + sub_client->driver = &w83792d_driver; + sub_client->flags = 0; + strlcpy(sub_client->name, "w83792d subclient", I2C_NAME_SIZE); + if ((err = i2c_attach_client(sub_client))) { + dev_err(&new_client->dev, "subclient registration " + "at address 0x%x failed\n", sub_client->addr); + kfree(sub_client); + return err; + } + return 0; +} + + +static int +w83792d_detect_subclients(struct i2c_adapter *adapter, int address, int kind, + struct i2c_client *new_client) +{ + int i, id, err; + u8 val; + struct w83792d_data *data = i2c_get_clientdata(new_client); + + id = i2c_adapter_id(adapter); + if (force_subclients[0] == id && force_subclients[1] == address) { + for (i = 2; i <= 3; i++) { + if (force_subclients[i] < 0x48 || + force_subclients[i] > 0x4f) { + dev_err(&new_client->dev, "invalid subclient " + "address %d; must be 0x48-0x4f\n", + force_subclients[i]); + err = -ENODEV; + goto ERROR_SC_0; + } + } + w83792d_write_value(new_client, W83792D_REG_I2C_SUBADDR, + (force_subclients[2] & 0x07) | + ((force_subclients[3] & 0x07) << 4)); + } + + val = w83792d_read_value(new_client, W83792D_REG_I2C_SUBADDR); + if (!(val & 0x08)) { + err = w83792d_create_subclient(adapter, new_client, val & 0x7, + &data->lm75[0]); + if (err < 0) + goto ERROR_SC_0; + } + if (!(val & 0x80)) { + if ((data->lm75[0] != NULL) && + ((val & 0x7) == ((val >> 4) & 0x7))) { + dev_err(&new_client->dev, "duplicate addresses 0x%x, " + "use force_subclient\n", data->lm75[0]->addr); + err = -ENODEV; + goto ERROR_SC_1; + } + err = w83792d_create_subclient(adapter, new_client, + (val >> 4) & 0x7, &data->lm75[1]); + if (err < 0) + goto ERROR_SC_1; + } + + return 0; + +/* Undo inits in case of errors */ + +ERROR_SC_1: + if (data->lm75[0] != NULL) { + i2c_detach_client(data->lm75[0]); + kfree(data->lm75[0]); + } +ERROR_SC_0: + return err; +} + + +static int +w83792d_detect(struct i2c_adapter *adapter, int address, int kind) +{ + int i = 0, val1 = 0, val2; + struct i2c_client *new_client; + struct w83792d_data *data; + int err = 0; + const char *client_name = ""; + + if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_BYTE_DATA)) { + goto ERROR0; + } + + /* OK. For now, we presume we have a valid client. We now create the + client structure, even though we cannot fill it completely yet. + But it allows us to access w83792d_{read,write}_value. */ + + if (!(data = kmalloc(sizeof(struct w83792d_data), GFP_KERNEL))) { + err = -ENOMEM; + goto ERROR0; + } + memset(data, 0, sizeof(struct w83792d_data)); + + new_client = &data->client; + i2c_set_clientdata(new_client, data); + new_client->addr = address; + init_MUTEX(&data->lock); + new_client->adapter = adapter; + new_client->driver = &w83792d_driver; + new_client->flags = 0; + + /* Now, we do the remaining detection. */ + + /* The w83792d may be stuck in some other bank than bank 0. This may + make reading other information impossible. Specify a force=... or + force_*=... parameter, and the Winbond will be reset to the right + bank. */ + if (kind < 0) { + if (w83792d_read_value(new_client, W83792D_REG_CONFIG) & 0x80) { + dev_warn(&new_client->dev, "Detection failed at step " + "3\n"); + goto ERROR1; + } + val1 = w83792d_read_value(new_client, W83792D_REG_BANK); + val2 = w83792d_read_value(new_client, W83792D_REG_CHIPMAN); + /* Check for Winbond ID if in bank 0 */ + if (!(val1 & 0x07)) { /* is Bank0 */ + if (((!(val1 & 0x80)) && (val2 != 0xa3)) || + ((val1 & 0x80) && (val2 != 0x5c))) { + goto ERROR1; + } + } + /* If Winbond chip, address of chip and W83792D_REG_I2C_ADDR + should match */ + if (w83792d_read_value(new_client, + W83792D_REG_I2C_ADDR) != address) { + dev_warn(&new_client->dev, "Detection failed " + "at step 5\n"); + goto ERROR1; + } + } + + /* We have either had a force parameter, or we have already detected the + Winbond. Put it now into bank 0 and Vendor ID High Byte */ + w83792d_write_value(new_client, + W83792D_REG_BANK, + (w83792d_read_value(new_client, + W83792D_REG_BANK) & 0x78) | 0x80); + + /* Determine the chip type. */ + if (kind <= 0) { + /* get vendor ID */ + val2 = w83792d_read_value(new_client, W83792D_REG_CHIPMAN); + if (val2 != 0x5c) { /* the vendor is NOT Winbond */ + goto ERROR1; + } + val1 = w83792d_read_value(new_client, W83792D_REG_WCHIPID); + if (val1 == 0x7a && address >= 0x2c) { + kind = w83792d; + } else { + if (kind == 0) + dev_warn(&new_client->dev, + "w83792d: Ignoring 'force' parameter for" + " unknown chip at adapter %d, address" + " 0x%02x\n", i2c_adapter_id(adapter), + address); + goto ERROR1; + } + } + + if (kind == w83792d) { + client_name = "w83792d"; + } else { + dev_err(&new_client->dev, "w83792d: Internal error: unknown" + " kind (%d)?!?", kind); + goto ERROR1; + } + + /* Fill in the remaining client fields and put into the global list */ + strlcpy(new_client->name, client_name, I2C_NAME_SIZE); + data->type = kind; + + data->valid = 0; + init_MUTEX(&data->update_lock); + + /* Tell the I2C layer a new client has arrived */ + if ((err = i2c_attach_client(new_client))) + goto ERROR1; + + if ((err = w83792d_detect_subclients(adapter, address, + kind, new_client))) + goto ERROR2; + + /* Initialize the chip */ + w83792d_init_client(new_client); + + /* A few vars need to be filled upon startup */ + for (i = 1; i <= 7; i++) { + data->fan_min[i - 1] = w83792d_read_value(new_client, + W83792D_REG_FAN_MIN[i]); + } + + /* Register sysfs hooks */ + device_create_file_in(new_client, 0); + device_create_file_in(new_client, 1); + device_create_file_in(new_client, 2); + device_create_file_in(new_client, 3); + device_create_file_in(new_client, 4); + device_create_file_in(new_client, 5); + device_create_file_in(new_client, 6); + device_create_file_in(new_client, 7); + device_create_file_in(new_client, 8); + + device_create_file_fan(new_client, 1); + device_create_file_fan(new_client, 2); + device_create_file_fan(new_client, 3); + device_create_file_fan(new_client, 4); + device_create_file_fan(new_client, 5); + device_create_file_fan(new_client, 6); + device_create_file_fan(new_client, 7); + + device_create_file_temp1(new_client); /* Temp1 */ + device_create_file_temp_add(new_client, 2); /* Temp2 */ + device_create_file_temp_add(new_client, 3); /* Temp3 */ + + device_create_file_alarms(new_client); + + device_create_file_pwm(new_client, 1); + device_create_file_pwm(new_client, 2); + device_create_file_pwm(new_client, 3); + + device_create_file_pwmenable(new_client, 1); + device_create_file_pwmenable(new_client, 2); + device_create_file_pwmenable(new_client, 3); + + device_create_file_pwm_mode(new_client, 1); + device_create_file_pwm_mode(new_client, 2); + device_create_file_pwm_mode(new_client, 3); + + device_create_file_chassis(new_client); + device_create_file_chassis_clear(new_client); + + device_create_file_thermal_cruise(new_client, 1); + device_create_file_thermal_cruise(new_client, 2); + device_create_file_thermal_cruise(new_client, 3); + + device_create_file_tolerance(new_client, 1); + device_create_file_tolerance(new_client, 2); + device_create_file_tolerance(new_client, 3); + + device_create_file_sf2_point(new_client, 1, 1); /* Fan1 */ + device_create_file_sf2_point(new_client, 2, 1); /* Fan1 */ + device_create_file_sf2_point(new_client, 3, 1); /* Fan1 */ + device_create_file_sf2_point(new_client, 4, 1); /* Fan1 */ + device_create_file_sf2_point(new_client, 1, 2); /* Fan2 */ + device_create_file_sf2_point(new_client, 2, 2); /* Fan2 */ + device_create_file_sf2_point(new_client, 3, 2); /* Fan2 */ + device_create_file_sf2_point(new_client, 4, 2); /* Fan2 */ + device_create_file_sf2_point(new_client, 1, 3); /* Fan3 */ + device_create_file_sf2_point(new_client, 2, 3); /* Fan3 */ + device_create_file_sf2_point(new_client, 3, 3); /* Fan3 */ + device_create_file_sf2_point(new_client, 4, 3); /* Fan3 */ + + device_create_file_sf2_level(new_client, 1, 1); /* Fan1 */ + device_create_file_sf2_level(new_client, 2, 1); /* Fan1 */ + device_create_file_sf2_level(new_client, 3, 1); /* Fan1 */ + device_create_file_sf2_level(new_client, 1, 2); /* Fan2 */ + device_create_file_sf2_level(new_client, 2, 2); /* Fan2 */ + device_create_file_sf2_level(new_client, 3, 2); /* Fan2 */ + device_create_file_sf2_level(new_client, 1, 3); /* Fan3 */ + device_create_file_sf2_level(new_client, 2, 3); /* Fan3 */ + device_create_file_sf2_level(new_client, 3, 3); /* Fan3 */ + + return 0; + +ERROR2: + i2c_detach_client(new_client); +ERROR1: + kfree(data); +ERROR0: + return err; +} + +static int +w83792d_detach_client(struct i2c_client *client) +{ + int err; + + if ((err = i2c_detach_client(client))) { + dev_err(&client->dev, + "Client deregistration failed, client not detached.\n"); + return err; + } + + if (i2c_get_clientdata(client)==NULL) { + /* subclients */ + kfree(client); + } else { + /* main client */ + kfree(i2c_get_clientdata(client)); + } + + return 0; +} + +/* The SMBus locks itself, usually, but nothing may access the Winbond between + bank switches. ISA access must always be locked explicitly! + We ignore the W83792D BUSY flag at this moment - it could lead to deadlocks, + would slow down the W83792D access and should not be necessary. + There are some ugly typecasts here, but the good news is - they should + nowhere else be necessary! */ +static int +w83792d_read_value(struct i2c_client *client, u8 reg) +{ + int res=0; + res = i2c_smbus_read_byte_data(client, reg); + + return res; +} + +static int +w83792d_write_value(struct i2c_client *client, u8 reg, u8 value) +{ + i2c_smbus_write_byte_data(client, reg, value); + return 0; +} + +/* Called when we have found a new W83792D. It should set limits, etc. */ +static void +w83792d_init_client(struct i2c_client *client) +{ + u8 temp2_cfg, temp3_cfg, vid_in_b; + + if (init) { + w83792d_write_value(client, W83792D_REG_CONFIG, 0x80); + } + /* Clear the bit6 of W83792D_REG_VID_IN_B(set it into 0): + W83792D_REG_VID_IN_B bit6 = 0: the high/low limit of + vin0/vin1 can be modified by user; + W83792D_REG_VID_IN_B bit6 = 1: the high/low limit of + vin0/vin1 auto-updated, can NOT be modified by user. */ + vid_in_b = w83792d_read_value(client, W83792D_REG_VID_IN_B); + w83792d_write_value(client, W83792D_REG_VID_IN_B, + vid_in_b & 0xbf); + + temp2_cfg = w83792d_read_value(client, W83792D_REG_TEMP2_CONFIG); + temp3_cfg = w83792d_read_value(client, W83792D_REG_TEMP3_CONFIG); + w83792d_write_value(client, W83792D_REG_TEMP2_CONFIG, + temp2_cfg & 0xe6); + w83792d_write_value(client, W83792D_REG_TEMP3_CONFIG, + temp3_cfg & 0xe6); + + /* Start monitoring */ + w83792d_write_value(client, W83792D_REG_CONFIG, + (w83792d_read_value(client, + W83792D_REG_CONFIG) & 0xf7) + | 0x01); +} + +static struct w83792d_data *w83792d_update_device(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct w83792d_data *data = i2c_get_clientdata(client); + int i, j; + u8 reg_array_tmp[4], pwm_array_tmp[7], reg_tmp; + + down(&data->update_lock); + + if (time_after + (jiffies - data->last_updated, (unsigned long) (HZ * 3)) + || time_before(jiffies, data->last_updated) || !data->valid) { + dev_dbg(dev, "Starting device update\n"); + + /* Update the voltages measured value and limits */ + for (i = 0; i < 9; i++) { + data->in[i] = w83792d_read_value(client, + W83792D_REG_IN[i]); + data->in_max[i] = w83792d_read_value(client, + W83792D_REG_IN_MAX[i]); + data->in_min[i] = w83792d_read_value(client, + W83792D_REG_IN_MIN[i]); + } + data->low_bits[0] = w83792d_read_value(client, + W83792D_REG_LOW_BITS1); + data->low_bits[1] = w83792d_read_value(client, + W83792D_REG_LOW_BITS2); + for (i = 0; i < 7; i++) { + /* Update the Fan measured value and limits */ + data->fan[i] = w83792d_read_value(client, + W83792D_REG_FAN[i]); + data->fan_min[i] = w83792d_read_value(client, + W83792D_REG_FAN_MIN[i]); + /* Update the PWM/DC Value and PWM/DC flag */ + pwm_array_tmp[i] = w83792d_read_value(client, + W83792D_REG_PWM[i]); + data->pwm[i] = pwm_array_tmp[i] & 0x0f; + data->pwm_mode[i] = (pwm_array_tmp[i] >> 7) & 0x01; + } + + reg_tmp = w83792d_read_value(client, W83792D_REG_FAN_CFG); + data->pwmenable[0] = reg_tmp & 0x03; + data->pwmenable[1] = (reg_tmp>>2) & 0x03; + data->pwmenable[2] = (reg_tmp>>4) & 0x03; + + for (i = 0; i < 3; i++) { + data->temp1[i] = w83792d_read_value(client, + W83792D_REG_TEMP1[i]); + } + for (i = 0; i < 2; i++) { + for (j = 0; j < 6; j++) { + data->temp_add[i][j] = w83792d_read_value( + client,W83792D_REG_TEMP_ADD[i][j]); + } + } + + /* Update the Fan Divisor */ + for (i = 0; i < 4; i++) { + reg_array_tmp[i] = w83792d_read_value(client, + W83792D_REG_FAN_DIV[i]); + } + data->fan_div[0] = reg_array_tmp[0] & 0x07; + data->fan_div[1] = (reg_array_tmp[0] >> 4) & 0x07; + data->fan_div[2] = reg_array_tmp[1] & 0x07; + data->fan_div[3] = (reg_array_tmp[1] >> 4) & 0x07; + data->fan_div[4] = reg_array_tmp[2] & 0x07; + data->fan_div[5] = (reg_array_tmp[2] >> 4) & 0x07; + data->fan_div[6] = reg_array_tmp[3] & 0x07; + + /* Update the realtime status */ + data->alarms = w83792d_read_value(client, W83792D_REG_ALARM1) + + (w83792d_read_value(client, W83792D_REG_ALARM2) << 8) + + (w83792d_read_value(client, W83792D_REG_ALARM3) << 16); + + /* Update CaseOpen status and it's CLR_CHS. */ + data->chassis = (w83792d_read_value(client, + W83792D_REG_CHASSIS) >> 5) & 0x01; + data->chassis_clear = (w83792d_read_value(client, + W83792D_REG_CHASSIS_CLR) >> 7) & 0x01; + + /* Update Thermal Cruise/Smart Fan I target value */ + for (i = 0; i < 3; i++) { + data->thermal_cruise[i] = + w83792d_read_value(client, + W83792D_REG_THERMAL[i]) & 0x7f; + } + + /* Update Smart Fan I/II tolerance */ + reg_tmp = w83792d_read_value(client, W83792D_REG_TOLERANCE[0]); + data->tolerance[0] = reg_tmp & 0x0f; + data->tolerance[1] = (reg_tmp >> 4) & 0x0f; + data->tolerance[2] = w83792d_read_value(client, + W83792D_REG_TOLERANCE[2]) & 0x0f; + + /* Update Smart Fan II temperature points */ + for (i = 0; i < 3; i++) { + for (j = 0; j < 4; j++) { + data->sf2_points[i][j] = w83792d_read_value( + client,W83792D_REG_POINTS[i][j]) & 0x7f; + } + } + + /* Update Smart Fan II duty cycle levels */ + for (i = 0; i < 3; i++) { + reg_tmp = w83792d_read_value(client, + W83792D_REG_LEVELS[i][0]); + data->sf2_levels[i][0] = reg_tmp & 0x0f; + data->sf2_levels[i][1] = (reg_tmp >> 4) & 0x0f; + reg_tmp = w83792d_read_value(client, + W83792D_REG_LEVELS[i][2]); + data->sf2_levels[i][2] = (reg_tmp >> 4) & 0x0f; + data->sf2_levels[i][3] = reg_tmp & 0x0f; + } + + data->last_updated = jiffies; + data->valid = 1; + } + + up(&data->update_lock); + +#ifdef DEBUG + w83792d_print_debug(data, dev); +#endif + + return data; +} + +#ifdef DEBUG +static void w83792d_print_debug(struct w83792d_data *data, struct device *dev) +{ + int i=0, j=0; + dev_dbg(dev, "==========The following is the debug message...========\n"); + dev_dbg(dev, "9 set of Voltages: =====>\n"); + for (i=0; i<9; i++) { + dev_dbg(dev, "vin[%d] is: 0x%x\n", i, data->in[i]); + dev_dbg(dev, "vin[%d] max is: 0x%x\n", i, data->in_max[i]); + dev_dbg(dev, "vin[%d] min is: 0x%x\n", i, data->in_min[i]); + } + dev_dbg(dev, "Low Bit1 is: 0x%x\n", data->low_bits[0]); + dev_dbg(dev, "Low Bit2 is: 0x%x\n", data->low_bits[1]); + dev_dbg(dev, "7 set of Fan Counts and Duty Cycles: =====>\n"); + for (i=0; i<7; i++) { + dev_dbg(dev, "fan[%d] is: 0x%x\n", i, data->fan[i]); + dev_dbg(dev, "fan[%d] min is: 0x%x\n", i, data->fan_min[i]); + dev_dbg(dev, "pwm[%d] is: 0x%x\n", i, data->pwm[i]); + dev_dbg(dev, "pwm_mode[%d] is: 0x%x\n", i, data->pwm_mode[i]); + } + dev_dbg(dev, "3 set of Temperatures: =====>\n"); + for (i=0; i<3; i++) { + dev_dbg(dev, "temp1[%d] is: 0x%x\n", i, data->temp1[i]); + } + + for (i=0; i<2; i++) { + for (j=0; j<6; j++) { + dev_dbg(dev, "temp_add[%d][%d] is: 0x%x\n", i, j, + data->temp_add[i][j]); + } + } + + for (i=0; i<7; i++) { + dev_dbg(dev, "fan_div[%d] is: 0x%x\n", i, data->fan_div[i]); + } + dev_dbg(dev, "==========End of the debug message...==================\n"); + dev_dbg(dev, "\n"); +} +#endif + +static int __init +sensors_w83792d_init(void) +{ + return i2c_add_driver(&w83792d_driver); +} + +static void __exit +sensors_w83792d_exit(void) +{ + i2c_del_driver(&w83792d_driver); +} + +MODULE_AUTHOR("Chunhao Huang @ Winbond "); +MODULE_DESCRIPTION("W83792AD/D driver for linux-2.6"); +MODULE_LICENSE("GPL"); + +module_init(sensors_w83792d_init); +module_exit(sensors_w83792d_exit); + diff --git a/include/linux/hwmon-sysfs.h b/include/linux/hwmon-sysfs.h index 1b5018a965f5..7eb4004b3601 100644 --- a/include/linux/hwmon-sysfs.h +++ b/include/linux/hwmon-sysfs.h @@ -33,4 +33,19 @@ struct sensor_device_attribute sensor_dev_attr_##_name = { \ .index = _index, \ } +struct sensor_device_attribute_2 { + struct device_attribute dev_attr; + u8 index; + u8 nr; +}; +#define to_sensor_dev_attr_2(_dev_attr) \ + container_of(_dev_attr, struct sensor_device_attribute_2, dev_attr) + +#define SENSOR_DEVICE_ATTR_2(_name,_mode,_show,_store,_nr,_index) \ +struct sensor_device_attribute_2 sensor_dev_attr_##_name = { \ + .dev_attr = __ATTR(_name,_mode,_show,_store), \ + .index = _index, \ + .nr = _nr, \ +} + #endif /* _LINUX_HWMON_SYSFS_H */ -- cgit v1.2.3 From cdcb19219714c796ddef1202e952566c5f86354d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Jul 2005 23:09:40 +0200 Subject: [PATCH] I2C: inline i2c_adapter_id We could inline i2c_adapter_id, as it is really, really short. Doing so saves a few bytes both in i2c-core and in the drivers using this function. before after diff drivers/hwmon/adm1026.ko 41344 41305 -39 drivers/hwmon/asb100.ko 27325 27246 -79 drivers/hwmon/gl518sm.ko 20824 20785 -39 drivers/hwmon/it87.ko 26419 26380 -39 drivers/hwmon/lm78.ko 21424 21385 -39 drivers/hwmon/lm85.ko 41034 40939 -95 drivers/hwmon/w83781d.ko 39561 39514 -47 drivers/hwmon/w83792d.ko 32979 32932 -47 drivers/i2c/i2c-core.ko 24708 24531 -177 Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-core.c | 9 --------- include/linux/i2c.h | 10 ++++++---- 2 files changed, 6 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index b0bceb2fb8b1..20f92e355f56 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -759,14 +759,6 @@ int i2c_probe(struct i2c_adapter *adapter, return 0; } -/* - * return id number for a specific adapter - */ -int i2c_adapter_id(struct i2c_adapter *adap) -{ - return adap->nr; -} - struct i2c_adapter* i2c_get_adapter(int id) { struct i2c_adapter *adapter; @@ -1196,7 +1188,6 @@ EXPORT_SYMBOL(i2c_master_send); EXPORT_SYMBOL(i2c_master_recv); EXPORT_SYMBOL(i2c_control); EXPORT_SYMBOL(i2c_transfer); -EXPORT_SYMBOL(i2c_adapter_id); EXPORT_SYMBOL(i2c_get_adapter); EXPORT_SYMBOL(i2c_put_adapter); EXPORT_SYMBOL(i2c_probe); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index da4faa016b17..ad1c0fb164bc 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -364,10 +364,6 @@ extern int i2c_probe(struct i2c_adapter *adapter, */ extern int i2c_control(struct i2c_client *,unsigned int, unsigned long); -/* This call returns a unique low identifier for each registered adapter, - * or -1 if the adapter was not registered. - */ -extern int i2c_adapter_id(struct i2c_adapter *adap); extern struct i2c_adapter* i2c_get_adapter(int id); extern void i2c_put_adapter(struct i2c_adapter *adap); @@ -384,6 +380,12 @@ static inline int i2c_check_functionality(struct i2c_adapter *adap, u32 func) return (func & i2c_get_functionality(adap)) == func; } +/* Return id number for a specific adapter */ +static inline int i2c_adapter_id(struct i2c_adapter *adap) +{ + return adap->nr; +} + /* * I2C Message - used for pure i2c transaction, also from /dev interface */ -- cgit v1.2.3 From 53ae11b08353268c4012ef107bf205a0724d71aa Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 28 Jul 2005 23:14:59 +0200 Subject: [PATCH] hwmon: move SENSORS_LIMIT to hwmon.h Move SENSORS_LIMIT from i2c-sensor.h to hwmon.h, as it is in no way related to i2c. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/lm75.h | 2 +- include/linux/hwmon.h | 11 +++++++++++ include/linux/i2c-sensor.h | 12 ------------ 3 files changed, 12 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwmon/lm75.h b/drivers/hwmon/lm75.h index 63e3f2fb4c21..af7dc650ee15 100644 --- a/drivers/hwmon/lm75.h +++ b/drivers/hwmon/lm75.h @@ -25,7 +25,7 @@ which contains this code, we don't worry about the wasted space. */ -#include +#include /* straight from the datasheet */ #define LM75_TEMP_MIN (-55000) diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index bf90e6001e3b..0efd994c37f1 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -20,5 +20,16 @@ struct class_device *hwmon_device_register(struct device *dev); void hwmon_device_unregister(struct class_device *cdev); +/* Scale user input to sensible values */ +static inline int SENSORS_LIMIT(long value, long low, long high) +{ + if (value < low) + return low; + else if (value > high) + return high; + else + return value; +} + #endif diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h index ae73b9e789cb..e832d3286a46 100644 --- a/include/linux/i2c-sensor.h +++ b/include/linux/i2c-sensor.h @@ -242,16 +242,4 @@ extern int i2c_detect(struct i2c_adapter *adapter, struct i2c_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)); - -/* This macro is used to scale user-input to sensible values in almost all - chip drivers. */ -static inline int SENSORS_LIMIT(long value, long low, long high) -{ - if (value < low) - return low; - else if (value > high) - return high; - else - return value; -} #endif /* def _LINUX_I2C_SENSOR_H */ -- cgit v1.2.3 From 9fc6adfa9adf2be84119a3c2592287f33bd1dff2 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:20:43 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (01/11) Add support for kind-forced addresses to i2c_probe, like i2c_detect has for (essentially) hardware monitoring drivers. Note that this change will slightly increase the size of the drivers using I2C_CLIENT_INSMOD, with no immediate benefit. This is a requirement if we want to merge i2c_probe and i2c_detect though, and seems a reasonable price to pay in comparison with the previous cleanups which saved much more than that (such as the i2c-isa cleanup or the i2c address ranges removal.) Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/chips/ds1374.c | 1 - drivers/i2c/chips/m41t00.c | 1 - drivers/i2c/chips/rtc8564.c | 1 - drivers/i2c/i2c-core.c | 38 ++++++++++++++++++++++++++++---------- drivers/media/video/adv7170.c | 1 - drivers/media/video/adv7175.c | 1 - drivers/media/video/bt819.c | 1 - drivers/media/video/bt856.c | 1 - drivers/media/video/saa7110.c | 1 - drivers/media/video/saa7111.c | 1 - drivers/media/video/saa7114.c | 1 - drivers/media/video/saa7185.c | 1 - drivers/media/video/tuner-3036.c | 1 - drivers/media/video/vpx3220.c | 1 - include/linux/i2c.h | 9 ++++++--- 15 files changed, 34 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/chips/ds1374.c b/drivers/i2c/chips/ds1374.c index a445736d8838..e2d1daf79880 100644 --- a/drivers/i2c/chips/ds1374.c +++ b/drivers/i2c/chips/ds1374.c @@ -53,7 +53,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_addr, .probe = ignore, .ignore = ignore, - .force = ignore, }; static ulong ds1374_read_rtc(void) diff --git a/drivers/i2c/chips/m41t00.c b/drivers/i2c/chips/m41t00.c index 778d7e12859d..e516dadc453f 100644 --- a/drivers/i2c/chips/m41t00.c +++ b/drivers/i2c/chips/m41t00.c @@ -42,7 +42,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_addr, .probe = ignore, .ignore = ignore, - .force = ignore, }; ulong diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c index 588fc2261a91..0b5385c892b1 100644 --- a/drivers/i2c/chips/rtc8564.c +++ b/drivers/i2c/chips/rtc8564.c @@ -67,7 +67,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_addr, .probe = ignore, .ignore = ignore, - .force = ignore, }; static int rtc8564_read_mem(struct i2c_client *client, struct mem *mem); diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 19d8a994b3b7..372b5996d045 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -662,6 +662,28 @@ int i2c_control(struct i2c_client *client, * Will not work for 10-bit addresses! * ---------------------------------------------------- */ +/* Return: kind (>= 0) if force found, -1 if not found */ +static inline int i2c_probe_forces(struct i2c_adapter *adapter, int addr, + unsigned short **forces) +{ + unsigned short kind; + int j, adap_id = i2c_adapter_id(adapter); + + for (kind = 0; forces[kind]; kind++) { + for (j = 0; forces[kind][j] != I2C_CLIENT_END; j += 2) { + if ((forces[kind][j] == adap_id || + forces[kind][j] == ANY_I2C_BUS) + && forces[kind][j + 1] == addr) { + dev_dbg(&adapter->dev, "found force parameter, " + "addr 0x%02x, kind %u\n", addr, kind); + return kind; + } + } + } + + return -1; +} + int i2c_probe(struct i2c_adapter *adapter, struct i2c_client_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)) @@ -683,19 +705,15 @@ int i2c_probe(struct i2c_adapter *adapter, at all */ found = 0; - for (i = 0; !found && (address_data->force[i] != I2C_CLIENT_END); i += 2) { - if (((adap_id == address_data->force[i]) || - (address_data->force[i] == ANY_I2C_BUS)) && - (addr == address_data->force[i+1])) { - dev_dbg(&adapter->dev, "found force parameter for adapter %d, addr %04x\n", - adap_id, addr); - if ((err = found_proc(adapter,addr,0))) + if (address_data->forces) { + int kind = i2c_probe_forces(adapter, addr, + address_data->forces); + if (kind >= 0) { /* force found */ + if ((err = found_proc(adapter, addr, kind))) return err; - found = 1; + continue; } } - if (found) - continue; /* If this address is in one of the ignores, we can forget about it right now */ diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c index 48989eda2400..52e32f05d625 100644 --- a/drivers/media/video/adv7170.c +++ b/drivers/media/video/adv7170.c @@ -391,7 +391,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_adv7170; diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c index f898b6586374..b5ed9544bdea 100644 --- a/drivers/media/video/adv7175.c +++ b/drivers/media/video/adv7175.c @@ -441,7 +441,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_adv7175; diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c index 8733588f6db3..c6cfa7c48b04 100644 --- a/drivers/media/video/bt819.c +++ b/drivers/media/video/bt819.c @@ -507,7 +507,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_bt819; diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c index a5d529ccf3ad..c13d28658868 100644 --- a/drivers/media/video/bt856.c +++ b/drivers/media/video/bt856.c @@ -295,7 +295,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_bt856; diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c index 22d055d8a695..e116bdbed310 100644 --- a/drivers/media/video/saa7110.c +++ b/drivers/media/video/saa7110.c @@ -470,7 +470,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_saa7110; diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c index fcd897382fcf..f18df53d98ff 100644 --- a/drivers/media/video/saa7111.c +++ b/drivers/media/video/saa7111.c @@ -489,7 +489,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_saa7111; diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c index 2ba997f5ef1d..e0c70f54f073 100644 --- a/drivers/media/video/saa7114.c +++ b/drivers/media/video/saa7114.c @@ -827,7 +827,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_saa7114; diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c index 108e7a4a0273..e93412f4407c 100644 --- a/drivers/media/video/saa7185.c +++ b/drivers/media/video/saa7185.c @@ -387,7 +387,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver i2c_driver_saa7185; diff --git a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c index 7d825e510ffd..103def1abe3c 100644 --- a/drivers/media/video/tuner-3036.c +++ b/drivers/media/video/tuner-3036.c @@ -41,7 +41,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; /* ---------------------------------------------------------------------- */ diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c index 5dbd9f6bf353..4437bdebe24f 100644 --- a/drivers/media/video/vpx3220.c +++ b/drivers/media/video/vpx3220.c @@ -576,7 +576,6 @@ static struct i2c_client_address_data addr_data = { .normal_i2c = normal_i2c, .probe = &ignore, .ignore = &ignore, - .force = &ignore, }; static struct i2c_driver vpx3220_i2c_driver; diff --git a/include/linux/i2c.h b/include/linux/i2c.h index ad1c0fb164bc..9419bc5584ad 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -48,7 +48,6 @@ struct i2c_algorithm; struct i2c_adapter; struct i2c_client; struct i2c_driver; -struct i2c_client_address_data; union i2c_smbus_data; /* @@ -301,7 +300,7 @@ struct i2c_client_address_data { unsigned short *normal_i2c; unsigned short *probe; unsigned short *ignore; - unsigned short *force; + unsigned short **forces; }; /* Internal numbers to terminate lists */ @@ -575,11 +574,15 @@ union i2c_smbus_data { I2C_CLIENT_MODULE_PARM(force, \ "List of adapter,address pairs to boldly assume " \ "to be present"); \ + static unsigned short *addr_forces[] = { \ + force, \ + NULL \ + }; \ static struct i2c_client_address_data addr_data = { \ .normal_i2c = normal_i2c, \ .probe = probe, \ .ignore = ignore, \ - .force = force, \ + .forces = addr_forces, \ } #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From ef8dec5d8b3e96e359f377f35cd8caff42fe6d58 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:33:23 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (02/11) The way i2c-sensor handles forced addresses could be optimized. It defines a structure (i2c_force_data) to associate a module parameter with a given kind value, but in fact this kind value is always the index of the structure in each array it is used in. So this additional value can be omitted, and still be deduced in the code handling these arrays. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-sensor-detect.c | 13 ++-- include/linux/i2c-sensor.h | 133 ++++++++++++++++++---------------------- 2 files changed, 67 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-sensor-detect.c b/drivers/i2c/i2c-sensor-detect.c index 5aede3ca9cb0..c952926c1482 100644 --- a/drivers/i2c/i2c-sensor-detect.c +++ b/drivers/i2c/i2c-sensor-detect.c @@ -32,7 +32,6 @@ int i2c_detect(struct i2c_adapter *adapter, int (*found_proc) (struct i2c_adapter *, int, int)) { int addr, i, found, j, err; - struct i2c_force_data *this_force; int adapter_id = i2c_adapter_id(adapter); unsigned short *normal_i2c; unsigned short *probe; @@ -58,13 +57,13 @@ int i2c_detect(struct i2c_adapter *adapter, /* If it is in one of the force entries, we don't do any detection at all */ found = 0; - for (i = 0; !found && (this_force = address_data->forces + i, this_force->force); i++) { - for (j = 0; !found && (this_force->force[j] != I2C_CLIENT_END); j += 2) { - if ( ((adapter_id == this_force->force[j]) || - (this_force->force[j] == ANY_I2C_BUS)) && - (addr == this_force->force[j + 1]) ) { + for (i = 0; address_data->forces[i]; i++) { + for (j = 0; !found && (address_data->forces[i][j] != I2C_CLIENT_END); j += 2) { + if ( ((adapter_id == address_data->forces[i][j]) || + (address_data->forces[i][j] == ANY_I2C_BUS)) && + (addr == address_data->forces[i][j + 1]) ) { dev_dbg(&adapter->dev, "found force parameter for adapter %d, addr %04x\n", adapter_id, addr); - if ((err = found_proc(adapter, addr, this_force->kind))) + if ((err = found_proc(adapter, addr, i))) return err; found = 1; } diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h index e832d3286a46..4facaa0b9054 100644 --- a/include/linux/i2c-sensor.h +++ b/include/linux/i2c-sensor.h @@ -22,22 +22,6 @@ #ifndef _LINUX_I2C_SENSOR_H #define _LINUX_I2C_SENSOR_H -/* A structure containing detect information. - Force variables overrule all other variables; they force a detection on - that place. If a specific chip is given, the module blindly assumes this - chip type is present; if a general force (kind == 0) is given, the module - will still try to figure out what type of chip is present. This is useful - if for some reasons the detect for SMBus address space filled fails. - probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. - A list of pairs. The first value is a bus number (ANY_I2C_BUS for any - I2C bus), the second is the address. - kind: The kind of chip. 0 equals any chip. -*/ -struct i2c_force_data { - unsigned short *force; - unsigned short kind; -}; - /* A structure containing the detect information. normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_END. A list of I2C addresses which should normally be examined. @@ -50,14 +34,18 @@ struct i2c_force_data { I2C bus), the second is the I2C address. These addresses are never probed. This parameter overrules 'normal' and probe', but not the 'force' lists. - force_data: insmod parameters. A list, ending with an element of which - the force field is NULL. + forces: insmod parameters. A list, ending with a NULL element. + Force variables overrule all other variables; they force a detection on + that place. If a specific chip is given, the module blindly assumes this + chip type is present; if a general force (kind == 0) is given, the module + will still try to figure out what type of chip is present. This is useful + if for some reasons the detect for SMBus address space filled fails. */ struct i2c_address_data { unsigned short *normal_i2c; unsigned short *probe; unsigned short *ignore; - struct i2c_force_data *forces; + unsigned short **forces; }; #define SENSORS_MODULE_PARM_FORCE(name) \ @@ -88,7 +76,8 @@ struct i2c_address_data { I2C_CLIENT_MODULE_PARM(force, \ "List of adapter,address pairs to boldly assume " \ "to be present"); \ - static struct i2c_force_data forces[] = {{force,any_chip},{NULL}}; \ + static unsigned short *forces[] = { force, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_1(chip1) \ @@ -97,9 +86,9 @@ struct i2c_address_data { "List of adapter,address pairs to boldly assume " \ "to be present"); \ SENSORS_MODULE_PARM_FORCE(chip1); \ - static struct i2c_force_data forces[] = {{force,any_chip},\ - {force_ ## chip1,chip1}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_2(chip1,chip2) \ @@ -109,10 +98,10 @@ struct i2c_address_data { "to be present"); \ SENSORS_MODULE_PARM_FORCE(chip1); \ SENSORS_MODULE_PARM_FORCE(chip2); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_3(chip1,chip2,chip3) \ @@ -123,11 +112,11 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip1); \ SENSORS_MODULE_PARM_FORCE(chip2); \ SENSORS_MODULE_PARM_FORCE(chip3); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_4(chip1,chip2,chip3,chip4) \ @@ -139,12 +128,12 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip2); \ SENSORS_MODULE_PARM_FORCE(chip3); \ SENSORS_MODULE_PARM_FORCE(chip4); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {force_ ## chip4,chip4}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + force_##chip4, \ + NULL}; \ SENSORS_INSMOD #define SENSORS_INSMOD_5(chip1,chip2,chip3,chip4,chip5) \ @@ -157,13 +146,13 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip3); \ SENSORS_MODULE_PARM_FORCE(chip4); \ SENSORS_MODULE_PARM_FORCE(chip5); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {force_ ## chip4,chip4}, \ - {force_ ## chip5,chip5}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + force_##chip4, \ + force_##chip5, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_6(chip1,chip2,chip3,chip4,chip5,chip6) \ @@ -177,14 +166,14 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip4); \ SENSORS_MODULE_PARM_FORCE(chip5); \ SENSORS_MODULE_PARM_FORCE(chip6); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {force_ ## chip4,chip4}, \ - {force_ ## chip5,chip5}, \ - {force_ ## chip6,chip6}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + force_##chip4, \ + force_##chip5, \ + force_##chip6, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_7(chip1,chip2,chip3,chip4,chip5,chip6,chip7) \ @@ -199,15 +188,15 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip5); \ SENSORS_MODULE_PARM_FORCE(chip6); \ SENSORS_MODULE_PARM_FORCE(chip7); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {force_ ## chip4,chip4}, \ - {force_ ## chip5,chip5}, \ - {force_ ## chip6,chip6}, \ - {force_ ## chip7,chip7}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + force_##chip4, \ + force_##chip5, \ + force_##chip6, \ + force_##chip7, \ + NULL }; \ SENSORS_INSMOD #define SENSORS_INSMOD_8(chip1,chip2,chip3,chip4,chip5,chip6,chip7,chip8) \ @@ -223,16 +212,16 @@ struct i2c_address_data { SENSORS_MODULE_PARM_FORCE(chip6); \ SENSORS_MODULE_PARM_FORCE(chip7); \ SENSORS_MODULE_PARM_FORCE(chip8); \ - static struct i2c_force_data forces[] = {{force,any_chip}, \ - {force_ ## chip1,chip1}, \ - {force_ ## chip2,chip2}, \ - {force_ ## chip3,chip3}, \ - {force_ ## chip4,chip4}, \ - {force_ ## chip5,chip5}, \ - {force_ ## chip6,chip6}, \ - {force_ ## chip7,chip7}, \ - {force_ ## chip8,chip8}, \ - {NULL}}; \ + static unsigned short *forces[] = { force, \ + force_##chip1, \ + force_##chip2, \ + force_##chip3, \ + force_##chip4, \ + force_##chip5, \ + force_##chip6, \ + force_##chip7, \ + force_##chip8, \ + NULL }; \ SENSORS_INSMOD /* Detect function. It iterates over all possible addresses itself. For -- cgit v1.2.3 From b78ec31582c60578ee1d3bb470791d6dde96ccf7 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:36:24 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (03/11) We now have two identical structures, i2c_address_data in i2c-sensor.h and i2c_client_address_data in i2c.h. We can kill one of them, I choose to keep the one in i2c.h as it makes more sense (this structure is not specific to sensors.) Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/i2c-sensor-detect.c | 2 +- include/linux/i2c-sensor.h | 30 +++--------------------------- 2 files changed, 4 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-sensor-detect.c b/drivers/i2c/i2c-sensor-detect.c index c952926c1482..70fcf7281988 100644 --- a/drivers/i2c/i2c-sensor-detect.c +++ b/drivers/i2c/i2c-sensor-detect.c @@ -28,7 +28,7 @@ static unsigned short empty[] = {I2C_CLIENT_END}; /* Won't work for 10-bit addresses! */ int i2c_detect(struct i2c_adapter *adapter, - struct i2c_address_data *address_data, + struct i2c_client_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)) { int addr, i, found, j, err; diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h index 4facaa0b9054..fc9284af9fae 100644 --- a/include/linux/i2c-sensor.h +++ b/include/linux/i2c-sensor.h @@ -22,31 +22,7 @@ #ifndef _LINUX_I2C_SENSOR_H #define _LINUX_I2C_SENSOR_H -/* A structure containing the detect information. - normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_END. - A list of I2C addresses which should normally be examined. - probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. - A list of pairs. The first value is a bus number (ANY_I2C_BUS for any - I2C bus), the second is the address. These addresses are also probed, - as if they were in the 'normal' list. - ignore: insmod parameter. Initialize this list with I2C_CLIENT_END values. - A list of pairs. The first value is a bus number (ANY_I2C_BUS for any - I2C bus), the second is the I2C address. These addresses are never - probed. This parameter overrules 'normal' and probe', but not the - 'force' lists. - forces: insmod parameters. A list, ending with a NULL element. - Force variables overrule all other variables; they force a detection on - that place. If a specific chip is given, the module blindly assumes this - chip type is present; if a general force (kind == 0) is given, the module - will still try to figure out what type of chip is present. This is useful - if for some reasons the detect for SMBus address space filled fails. -*/ -struct i2c_address_data { - unsigned short *normal_i2c; - unsigned short *probe; - unsigned short *ignore; - unsigned short **forces; -}; +#include #define SENSORS_MODULE_PARM_FORCE(name) \ I2C_CLIENT_MODULE_PARM(force_ ## name, \ @@ -60,7 +36,7 @@ struct i2c_address_data { "List of adapter,address pairs to scan additionally"); \ I2C_CLIENT_MODULE_PARM(ignore, \ "List of adapter,address pairs not to scan"); \ - static struct i2c_address_data addr_data = { \ + static struct i2c_client_address_data addr_data = { \ .normal_i2c = normal_i2c, \ .probe = probe, \ .ignore = ignore, \ @@ -228,7 +204,7 @@ struct i2c_address_data { SMBus addresses, it will only call found_proc if some client is connected to the SMBus (unless a 'force' matched). */ extern int i2c_detect(struct i2c_adapter *adapter, - struct i2c_address_data *address_data, + struct i2c_client_address_data *address_data, int (*found_proc) (struct i2c_adapter *, int, int)); #endif /* def _LINUX_I2C_SENSOR_H */ -- cgit v1.2.3 From 96478ef3f3f71fa929cc905cc794993e312d9a5d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:45:27 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (05/11) The i2c_detect function has no more user, delete it. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/Makefile | 2 +- drivers/i2c/i2c-sensor-detect.c | 125 ---------------------------------------- drivers/i2c/i2c-sensor-vid.c | 5 ++ include/linux/i2c-sensor.h | 7 --- 4 files changed, 6 insertions(+), 133 deletions(-) delete mode 100644 drivers/i2c/i2c-sensor-detect.c (limited to 'include/linux') diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index cd170395a8c7..71d68ad0e5ce 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_I2C_CHARDEV) += i2c-dev.o obj-$(CONFIG_I2C_SENSOR) += i2c-sensor.o obj-y += busses/ chips/ algos/ -i2c-sensor-objs := i2c-sensor-detect.o i2c-sensor-vid.o +i2c-sensor-objs := i2c-sensor-vid.o ifeq ($(CONFIG_I2C_DEBUG_CORE),y) diff --git a/drivers/i2c/i2c-sensor-detect.c b/drivers/i2c/i2c-sensor-detect.c deleted file mode 100644 index 70fcf7281988..000000000000 --- a/drivers/i2c/i2c-sensor-detect.c +++ /dev/null @@ -1,125 +0,0 @@ -/* - i2c-sensor-detect.c - Part of lm_sensors, Linux kernel modules for hardware - monitoring - Copyright (c) 1998 - 2001 Frodo Looijaard and - Mark D. Studebaker - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include -#include - -static unsigned short empty[] = {I2C_CLIENT_END}; - -/* Won't work for 10-bit addresses! */ -int i2c_detect(struct i2c_adapter *adapter, - struct i2c_client_address_data *address_data, - int (*found_proc) (struct i2c_adapter *, int, int)) -{ - int addr, i, found, j, err; - int adapter_id = i2c_adapter_id(adapter); - unsigned short *normal_i2c; - unsigned short *probe; - unsigned short *ignore; - - /* Forget it if we can't probe using SMBUS_QUICK */ - if (!i2c_check_functionality(adapter, I2C_FUNC_SMBUS_QUICK)) - return -1; - - /* Use default "empty" list if the adapter doesn't specify any */ - normal_i2c = probe = ignore = empty; - if (address_data->normal_i2c) - normal_i2c = address_data->normal_i2c; - if (address_data->probe) - probe = address_data->probe; - if (address_data->ignore) - ignore = address_data->ignore; - - for (addr = 0x00; addr <= 0x7f; addr++) { - if (i2c_check_addr(adapter, addr)) - continue; - - /* If it is in one of the force entries, we don't do any - detection at all */ - found = 0; - for (i = 0; address_data->forces[i]; i++) { - for (j = 0; !found && (address_data->forces[i][j] != I2C_CLIENT_END); j += 2) { - if ( ((adapter_id == address_data->forces[i][j]) || - (address_data->forces[i][j] == ANY_I2C_BUS)) && - (addr == address_data->forces[i][j + 1]) ) { - dev_dbg(&adapter->dev, "found force parameter for adapter %d, addr %04x\n", adapter_id, addr); - if ((err = found_proc(adapter, addr, i))) - return err; - found = 1; - } - } - } - if (found) - continue; - - /* If this address is in one of the ignores, we can forget about it - right now */ - for (i = 0; !found && (ignore[i] != I2C_CLIENT_END); i += 2) { - if ( ((adapter_id == ignore[i]) || - (ignore[i] == ANY_I2C_BUS)) && - (addr == ignore[i + 1])) { - dev_dbg(&adapter->dev, "found ignore parameter for adapter %d, addr %04x\n", adapter_id, addr); - found = 1; - } - } - if (found) - continue; - - /* Now, we will do a detection, but only if it is in the normal or - probe entries */ - for (i = 0; !found && (normal_i2c[i] != I2C_CLIENT_END); i += 1) { - if (addr == normal_i2c[i]) { - found = 1; - dev_dbg(&adapter->dev, "found normal i2c entry for adapter %d, addr %02x\n", adapter_id, addr); - } - } - - for (i = 0; - !found && (probe[i] != I2C_CLIENT_END); - i += 2) { - if (((adapter_id == probe[i]) || - (probe[i] == ANY_I2C_BUS)) - && (addr == probe[i + 1])) { - dev_dbg(&adapter->dev, "found probe parameter for adapter %d, addr %04x\n", adapter_id, addr); - found = 1; - } - } - if (!found) - continue; - - /* OK, so we really should examine this address. First check - whether there is some client here at all! */ - if (i2c_smbus_xfer(adapter, addr, 0, 0, 0, I2C_SMBUS_QUICK, NULL) >= 0) - if ((err = found_proc(adapter, addr, -1))) - return err; - } - return 0; -} - -EXPORT_SYMBOL(i2c_detect); - -MODULE_AUTHOR("Frodo Looijaard , " - "Rudolf Marek "); - -MODULE_DESCRIPTION("i2c-sensor driver"); -MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/i2c-sensor-vid.c b/drivers/i2c/i2c-sensor-vid.c index 922e22f054bb..b8ef289fc80e 100644 --- a/drivers/i2c/i2c-sensor-vid.c +++ b/drivers/i2c/i2c-sensor-vid.c @@ -96,3 +96,8 @@ int i2c_which_vrm(void) #endif EXPORT_SYMBOL(i2c_which_vrm); + +MODULE_AUTHOR("Rudolf Marek "); + +MODULE_DESCRIPTION("i2c-sensor driver"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h index fc9284af9fae..1563d445dfd1 100644 --- a/include/linux/i2c-sensor.h +++ b/include/linux/i2c-sensor.h @@ -200,11 +200,4 @@ NULL }; \ SENSORS_INSMOD -/* Detect function. It iterates over all possible addresses itself. For - SMBus addresses, it will only call found_proc if some client is connected - to the SMBus (unless a 'force' matched). */ -extern int i2c_detect(struct i2c_adapter *adapter, - struct i2c_client_address_data *address_data, - int (*found_proc) (struct i2c_adapter *, int, int)); - #endif /* def _LINUX_I2C_SENSOR_H */ -- cgit v1.2.3 From f4b50261207c987913f076d867c2e154d71fd012 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:49:03 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (06/11) The only thing left in i2c-sensor.h are module parameter definition macros. It's only an extension of what i2c.h offers, and this extension is not sensors-specific. As a matter of fact, a few non-sensors drivers use them. So we better merge them in i2c.h, and get rid of i2c-sensor.h altogether. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/porting-clients | 8 +- Documentation/i2c/writing-clients | 68 +++---------- drivers/hwmon/adm1021.c | 3 +- drivers/hwmon/adm1025.c | 3 +- drivers/hwmon/adm1026.c | 3 +- drivers/hwmon/adm1031.c | 3 +- drivers/hwmon/adm9240.c | 3 +- drivers/hwmon/asb100.c | 3 +- drivers/hwmon/atxp1.c | 3 +- drivers/hwmon/ds1621.c | 3 +- drivers/hwmon/fscher.c | 3 +- drivers/hwmon/fscpos.c | 3 +- drivers/hwmon/gl518sm.c | 3 +- drivers/hwmon/gl520sm.c | 3 +- drivers/hwmon/it87.c | 3 +- drivers/hwmon/lm63.c | 3 +- drivers/hwmon/lm75.c | 3 +- drivers/hwmon/lm77.c | 3 +- drivers/hwmon/lm78.c | 3 +- drivers/hwmon/lm80.c | 3 +- drivers/hwmon/lm83.c | 3 +- drivers/hwmon/lm85.c | 3 +- drivers/hwmon/lm87.c | 3 +- drivers/hwmon/lm90.c | 3 +- drivers/hwmon/lm92.c | 3 +- drivers/hwmon/max1619.c | 3 +- drivers/hwmon/sis5595.c | 1 - drivers/hwmon/smsc47m1.c | 1 - drivers/hwmon/via686a.c | 1 - drivers/hwmon/w83627hf.c | 1 - drivers/hwmon/w83781d.c | 3 +- drivers/hwmon/w83792d.c | 3 +- drivers/hwmon/w83l785ts.c | 3 +- drivers/i2c/chips/ds1337.c | 3 +- drivers/i2c/chips/eeprom.c | 3 +- drivers/i2c/chips/max6875.c | 3 +- drivers/i2c/chips/pca9539.c | 3 +- drivers/i2c/chips/pcf8574.c | 3 +- drivers/i2c/chips/pcf8591.c | 3 +- include/linux/i2c-sensor.h | 203 -------------------------------------- include/linux/i2c.h | 148 ++++++++++++++++++++++++--- 41 files changed, 188 insertions(+), 342 deletions(-) delete mode 100644 include/linux/i2c-sensor.h (limited to 'include/linux') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index f9099211bd0b..8b819379adcb 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -1,4 +1,4 @@ -Revision 4, 2004-03-30 +Revision 5, 2005-07-29 Jean Delvare Greg KH @@ -17,13 +17,12 @@ yours for best results. Technical changes: -* [Includes] Get rid of "version.h". Replace with - . Includes typically look like that: +* [Includes] Get rid of "version.h" and . + Includes typically look like that: #include #include #include #include - #include #include /* if you need VRM support */ #include /* if you have I/O operations */ Please respect this inclusion order. Some extra headers may be @@ -31,6 +30,7 @@ Technical changes: * [Addresses] SENSORS_I2C_END becomes I2C_CLIENT_END, ISA addresses are no more handled by the i2c core. + SENSORS_INSMOD_ becomes I2C_CLIENT_INSMOD_. * [Client data] Get rid of sysctl_id. Try using standard names for register values (for example, temp_os becomes temp_max). You're diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients index 43d7928056ae..97e138cbb2a9 100644 --- a/Documentation/i2c/writing-clients +++ b/Documentation/i2c/writing-clients @@ -155,8 +155,8 @@ NOTE: If you want to write a `sensors' driver, the interface is slightly -Probing classes (i2c) ---------------------- +Probing classes +--------------- All parameters are given as lists of unsigned 16-bit integers. Lists are terminated by I2C_CLIENT_END. @@ -171,12 +171,18 @@ The following lists are used internally: ignore: insmod parameter. A list of pairs. The first value is a bus number (-1 for any I2C bus), the second is the I2C address. These addresses are never probed. - This parameter overrules 'normal' and 'probe', but not the 'force' lists. + This parameter overrules the 'normal_i2c' list only. force: insmod parameter. A list of pairs. The first value is a bus number (-1 for any I2C bus), the second is the I2C address. A device is blindly assumed to be on the given address, no probing is done. +Additionally, kind-specific force lists may optionally be defined if +the driver supports several chip kinds. They are grouped in a +NULL-terminated list of pointers named forces, those first element if the +generic force list mentioned above. Each additional list correspond to an +insmod parameter of the form force_. + Fortunately, as a module writer, you just have to define the `normal_i2c' parameter. The complete declaration could look like this: @@ -186,61 +192,17 @@ parameter. The complete declaration could look like this: /* Magic definition of all other variables and things */ I2C_CLIENT_INSMOD; + /* Or, if your driver supports, say, 2 kind of devices: */ + I2C_CLIENT_INSMOD_2(foo, bar); + +If you use the multi-kind form, an enum will be defined for you: + enum chips { any_chip, foo, bar, ... } +You can then (and certainly should) use it in the driver code. Note that you *have* to call the defined variable `normal_i2c', without any prefix! -Probing classes (sensors) -------------------------- - -If you write a `sensors' driver, you use a slightly different interface. -Also, we use a enum of chip types. Don't forget to include `sensors.h'. - -The following lists are used internally. They are all lists of integers. - - normal_i2c: filled in by the module writer. Terminated by I2C_CLIENT_END. - A list of I2C addresses which should normally be examined. - probe: insmod parameter. Initialize this list with I2C_CLIENT_END values. - A list of pairs. The first value is a bus number (ANY_I2C_BUS for any - I2C bus), the second is the address. These addresses are also probed, - as if they were in the 'normal' list. - ignore: insmod parameter. Initialize this list with I2C_CLIENT_END values. - A list of pairs. The first value is a bus number (ANY_I2C_BUS for any - I2C bus), the second is the I2C address. These addresses are never - probed. This parameter overrules 'normal' and 'probe', but not the - 'force' lists. - -Also used is a list of pointers to sensors_force_data structures: - force_data: insmod parameters. A list, ending with an element of which - the force field is NULL. - Each element contains the type of chip and a list of pairs. - The first value is a bus number (ANY_I2C_BUS for any I2C bus), the - second is the address. - These are automatically translated to insmod variables of the form - force_foo. - -So we have a generic insmod variabled `force', and chip-specific variables -`force_CHIPNAME'. - -Fortunately, as a module writer, you just have to define the `normal_i2c' -parameter, and define what chip names are used. The complete declaration -could look like this: - /* Scan i2c addresses 0x37, and 0x48 to 0x4f */ - static unsigned short normal_i2c[] = { 0x37, 0x48, 0x49, 0x4a, 0x4b, 0x4c, - 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; - - /* Define chips foo and bar, as well as all module parameters and things */ - SENSORS_INSMOD_2(foo,bar); - -If you have one chip, you use macro SENSORS_INSMOD_1(chip), if you have 2 -you use macro SENSORS_INSMOD_2(chip1,chip2), etc. If you do not want to -bother with chip types, you can use SENSORS_INSMOD_0. - -A enum is automatically defined as follows: - enum chips { any_chip, chip1, chip2, ... } - - Attaching to an adapter ----------------------- diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c index 21f6dfeb04ac..e928cdb041cb 100644 --- a/drivers/hwmon/adm1021.c +++ b/drivers/hwmon/adm1021.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include @@ -36,7 +35,7 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_8(adm1021, adm1023, max1617, max1617a, thmc10, lm84, gl523sm, mc1066); +I2C_CLIENT_INSMOD_8(adm1021, adm1023, max1617, max1617a, thmc10, lm84, gl523sm, mc1066); /* adm1021 constants specified below */ diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index 5b21284df2f8..229fd0de6f9e 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -67,7 +66,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_2(adm1025, ne1619); +I2C_CLIENT_INSMOD_2(adm1025, ne1619); /* * The ADM1025 registers diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index f3a78f792919..f32f819efcfc 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -38,7 +37,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(adm1026); +I2C_CLIENT_INSMOD_1(adm1026); static int gpio_input[17] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c index 9221653590a8..58338ed7c8a1 100644 --- a/drivers/hwmon/adm1031.c +++ b/drivers/hwmon/adm1031.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -63,7 +62,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_2(adm1030, adm1031); +I2C_CLIENT_INSMOD_2(adm1030, adm1031); typedef u8 auto_chan_table_t[8][2]; diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 6b20b28aa3b9..0a742cb88f4c 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -55,7 +54,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_3(adm9240, ds1780, lm81); +I2C_CLIENT_INSMOD_3(adm9240, ds1780, lm81); /* ADM9240 registers */ #define ADM9240_REG_MAN_ID 0x3e diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index a6c6c9d3fddd..66b0dbd1af0e 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,7 @@ static unsigned short normal_i2c[] = { 0x2d, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(asb100); +I2C_CLIENT_INSMOD_1(asb100); I2C_CLIENT_MODULE_PARM(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 329ddafd524b..5cf77e67a2ed 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -43,7 +42,7 @@ MODULE_AUTHOR("Sebastian Witt "); static unsigned short normal_i2c[] = { 0x37, 0x4e, I2C_CLIENT_END }; -SENSORS_INSMOD_1(atxp1); +I2C_CLIENT_INSMOD_1(atxp1); static int atxp1_attach_adapter(struct i2c_adapter * adapter); static int atxp1_detach_client(struct i2c_client * client); diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c index a3b3a5887621..b0199e063d0e 100644 --- a/drivers/hwmon/ds1621.c +++ b/drivers/hwmon/ds1621.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include "lm75.h" @@ -36,7 +35,7 @@ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(ds1621); +I2C_CLIENT_INSMOD_1(ds1621); static int polarity = -1; module_param(polarity, int, 0); MODULE_PARM_DESC(polarity, "Output's polarity: 0 = active high, 1 = active low"); diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c index 5629e68a5ca3..eef6061d786b 100644 --- a/drivers/hwmon/fscher.c +++ b/drivers/hwmon/fscher.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -45,7 +44,7 @@ static unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_1(fscher); +I2C_CLIENT_INSMOD_1(fscher); /* * The FSCHER registers diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c index edc84f2f6454..5fc77a5fed07 100644 --- a/drivers/hwmon/fscpos.c +++ b/drivers/hwmon/fscpos.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -47,7 +46,7 @@ static unsigned short normal_i2c[] = { 0x73, I2C_CLIENT_END }; /* * Insmod parameters */ -SENSORS_INSMOD_1(fscpos); +I2C_CLIENT_INSMOD_1(fscpos); /* * The FSCPOS registers diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c index 15376a6e0494..256b9323c84b 100644 --- a/drivers/hwmon/gl518sm.c +++ b/drivers/hwmon/gl518sm.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include @@ -49,7 +48,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_2(gl518sm_r00, gl518sm_r80); +I2C_CLIENT_INSMOD_2(gl518sm_r00, gl518sm_r80); /* Many GL518 constants specified below */ diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index 18539c9559c6..de6608a159c3 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +39,7 @@ MODULE_PARM_DESC(extra_sensor_type, "Type of extra sensor (0=autodetect, 1=tempe static unsigned short normal_i2c[] = { 0x2c, 0x2d, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(gl520sm); +I2C_CLIENT_INSMOD_1(gl520sm); /* Many GL520 constants specified below One of the inputs can be configured as either temp or voltage. diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index d1e04c40e64c..84877665b66e 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include @@ -51,7 +50,7 @@ static unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, static unsigned short isa_address = 0x290; /* Insmod parameters */ -SENSORS_INSMOD_2(it87, it8712); +I2C_CLIENT_INSMOD_2(it87, it8712); #define REG 0x2e /* The register to read/write */ #define DEV 0x07 /* Register: Logical device select */ diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c index dd2702131aed..be5c7095ecbb 100644 --- a/drivers/hwmon/lm63.c +++ b/drivers/hwmon/lm63.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +57,7 @@ static unsigned short normal_i2c[] = { 0x4c, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_1(lm63); +I2C_CLIENT_INSMOD_1(lm63); /* * The LM63 registers diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c index bd39452db895..9a3ebdf583f4 100644 --- a/drivers/hwmon/lm75.c +++ b/drivers/hwmon/lm75.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include "lm75.h" @@ -34,7 +33,7 @@ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(lm75); +I2C_CLIENT_INSMOD_1(lm75); /* Many LM75 constants specified below */ diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c index 52218570f874..866eab96a6f6 100644 --- a/drivers/hwmon/lm77.c +++ b/drivers/hwmon/lm77.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -38,7 +37,7 @@ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(lm77); +I2C_CLIENT_INSMOD_1(lm77); /* The LM77 registers */ #define LM77_REG_TEMP 0x00 diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c index 008fd9310061..51c0b37c4990 100644 --- a/drivers/hwmon/lm78.c +++ b/drivers/hwmon/lm78.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -37,7 +36,7 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, static unsigned short isa_address = 0x290; /* Insmod parameters */ -SENSORS_INSMOD_2(lm78, lm79); +I2C_CLIENT_INSMOD_2(lm78, lm79); /* Many LM78 constants specified below */ diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c index 500c38f3feae..83af8b3a0cac 100644 --- a/drivers/hwmon/lm80.c +++ b/drivers/hwmon/lm80.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -35,7 +34,7 @@ static unsigned short normal_i2c[] = { 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(lm80); +I2C_CLIENT_INSMOD_1(lm80); /* Many LM80 constants specified below */ diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c index 5b78d19693e8..d74b2c20c719 100644 --- a/drivers/hwmon/lm83.c +++ b/drivers/hwmon/lm83.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -52,7 +51,7 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, * Insmod parameters */ -SENSORS_INSMOD_1(lm83); +I2C_CLIENT_INSMOD_1(lm83); /* * The LM83 registers diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index 8976565113f4..aeb478815f72 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include @@ -37,7 +36,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_6(lm85b, lm85c, adm1027, adt7463, emc6d100, emc6d102); +I2C_CLIENT_INSMOD_6(lm85b, lm85c, adm1027, adt7463, emc6d100, emc6d102); /* The LM85 registers */ diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index af161203ce1d..d0d2464c1b73 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include @@ -73,7 +72,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_1(lm87); +I2C_CLIENT_INSMOD_1(lm87); /* * The LM87 registers diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c index 68155c72a4e4..14de05fcd431 100644 --- a/drivers/hwmon/lm90.c +++ b/drivers/hwmon/lm90.c @@ -75,7 +75,6 @@ #include #include #include -#include #include #include #include @@ -96,7 +95,7 @@ static unsigned short normal_i2c[] = { 0x4c, 0x4d, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_6(lm90, adm1032, lm99, lm86, max6657, adt7461); +I2C_CLIENT_INSMOD_6(lm90, adm1032, lm99, lm86, max6657, adt7461); /* * The LM90 registers diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c index 7ddc9116d091..647b7c7cd575 100644 --- a/drivers/hwmon/lm92.c +++ b/drivers/hwmon/lm92.c @@ -44,7 +44,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,7 @@ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(lm92); +I2C_CLIENT_INSMOD_1(lm92); /* The LM92 registers */ #define LM92_REG_CONFIG 0x01 /* 8-bit, RW */ diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c index 056506bae5f2..16bf71f3a04d 100644 --- a/drivers/hwmon/max1619.c +++ b/drivers/hwmon/max1619.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include @@ -44,7 +43,7 @@ static unsigned short normal_i2c[] = { 0x18, 0x19, 0x1a, * Insmod parameters */ -SENSORS_INSMOD_1(max1619); +I2C_CLIENT_INSMOD_1(max1619); /* * The MAX1619 registers diff --git a/drivers/hwmon/sis5595.c b/drivers/hwmon/sis5595.c index 55716cb579aa..8610bce08244 100644 --- a/drivers/hwmon/sis5595.c +++ b/drivers/hwmon/sis5595.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/hwmon/smsc47m1.c b/drivers/hwmon/smsc47m1.c index dab22bd75b68..7e699a8ede26 100644 --- a/drivers/hwmon/smsc47m1.c +++ b/drivers/hwmon/smsc47m1.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/hwmon/via686a.c b/drivers/hwmon/via686a.c index d9251fb0b625..eb84997627c8 100644 --- a/drivers/hwmon/via686a.c +++ b/drivers/hwmon/via686a.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 0466cc4b760e..2d2fcfb706d7 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index f269faeffa47..47607983acfd 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -53,7 +52,7 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, static unsigned short isa_address = 0x290; /* Insmod parameters */ -SENSORS_INSMOD_5(w83781d, w83782d, w83783s, w83627hf, as99127f); +I2C_CLIENT_INSMOD_5(w83781d, w83782d, w83783s, w83627hf, as99127f); I2C_CLIENT_MODULE_PARM(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index 49e3ccd84bd0..d6d8c0f04e32 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -50,7 +49,7 @@ static unsigned short normal_i2c[] = { 0x2c, 0x2d, 0x2e, 0x2f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(w83792d); +I2C_CLIENT_INSMOD_1(w83792d); I2C_CLIENT_MODULE_PARM(force_subclients, "List of subclient addresses: " "{bus, clientaddr, subclientaddr1, subclientaddr2}"); diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c index 129d4012e8fd..133e34ab1d0a 100644 --- a/drivers/hwmon/w83l785ts.c +++ b/drivers/hwmon/w83l785ts.c @@ -36,7 +36,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,7 @@ static unsigned short normal_i2c[] = { 0x2e, I2C_CLIENT_END }; * Insmod parameters */ -SENSORS_INSMOD_1(w83l785ts); +I2C_CLIENT_INSMOD_1(w83l785ts); /* * The W83L785TS-S registers diff --git a/drivers/i2c/chips/ds1337.c b/drivers/i2c/chips/ds1337.c index c612f19fc7e6..9d3175c03395 100644 --- a/drivers/i2c/chips/ds1337.c +++ b/drivers/i2c/chips/ds1337.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include /* get the user-level API */ #include @@ -40,7 +39,7 @@ */ static unsigned short normal_i2c[] = { 0x68, I2C_CLIENT_END }; -SENSORS_INSMOD_1(ds1337); +I2C_CLIENT_INSMOD_1(ds1337); static int ds1337_attach_adapter(struct i2c_adapter *adapter); static int ds1337_detect(struct i2c_adapter *adapter, int address, int kind); diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c index befac01ecda6..a27420a54c84 100644 --- a/drivers/i2c/chips/eeprom.c +++ b/drivers/i2c/chips/eeprom.c @@ -33,14 +33,13 @@ #include #include #include -#include /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(eeprom); +I2C_CLIENT_INSMOD_1(eeprom); /* Size of EEPROM in bytes */ diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c index 42663f921ecc..31cee2d34a17 100644 --- a/drivers/i2c/chips/max6875.c +++ b/drivers/i2c/chips/max6875.c @@ -31,14 +31,13 @@ #include #include #include -#include #include /* Do not scan - the MAX6875 access method will write to some EEPROM chips */ static unsigned short normal_i2c[] = {I2C_CLIENT_END}; /* Insmod parameters */ -SENSORS_INSMOD_1(max6875); +I2C_CLIENT_INSMOD_1(max6875); /* The MAX6875 can only read/write 16 bytes at a time */ #define SLICE_SIZE 16 diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c index c8ea2a1e1a45..225577fdda4d 100644 --- a/drivers/i2c/chips/pca9539.c +++ b/drivers/i2c/chips/pca9539.c @@ -13,13 +13,12 @@ #include #include #include -#include /* Addresses to scan */ static unsigned short normal_i2c[] = {0x74, 0x75, 0x76, 0x77, I2C_CLIENT_END}; /* Insmod parameters */ -SENSORS_INSMOD_1(pca9539); +I2C_CLIENT_INSMOD_1(pca9539); enum pca9539_cmd { diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c index 01ec9ce19768..6525743ff9fd 100644 --- a/drivers/i2c/chips/pcf8574.c +++ b/drivers/i2c/chips/pcf8574.c @@ -39,7 +39,6 @@ #include #include #include -#include /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, @@ -47,7 +46,7 @@ static unsigned short normal_i2c[] = { 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_2(pcf8574, pcf8574a); +I2C_CLIENT_INSMOD_2(pcf8574, pcf8574a); /* Initial values */ #define PCF8574_INIT 255 /* All outputs on (input mode) */ diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c index dd03f2c725c8..80f1df9a4500 100644 --- a/drivers/i2c/chips/pcf8591.c +++ b/drivers/i2c/chips/pcf8591.c @@ -24,14 +24,13 @@ #include #include #include -#include /* Addresses to scan */ static unsigned short normal_i2c[] = { 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, I2C_CLIENT_END }; /* Insmod parameters */ -SENSORS_INSMOD_1(pcf8591); +I2C_CLIENT_INSMOD_1(pcf8591); static int input_mode; module_param(input_mode, int, 0); diff --git a/include/linux/i2c-sensor.h b/include/linux/i2c-sensor.h deleted file mode 100644 index 1563d445dfd1..000000000000 --- a/include/linux/i2c-sensor.h +++ /dev/null @@ -1,203 +0,0 @@ -/* - i2c-sensor.h - Part of the i2c package - was originally sensors.h - Part of lm_sensors, Linux kernel modules - for hardware monitoring - Copyright (c) 1998, 1999 Frodo Looijaard - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#ifndef _LINUX_I2C_SENSOR_H -#define _LINUX_I2C_SENSOR_H - -#include - -#define SENSORS_MODULE_PARM_FORCE(name) \ - I2C_CLIENT_MODULE_PARM(force_ ## name, \ - "List of adapter,address pairs which are unquestionably" \ - " assumed to contain a `" # name "' chip") - - -/* This defines several insmod variables, and the addr_data structure */ -#define SENSORS_INSMOD \ - I2C_CLIENT_MODULE_PARM(probe, \ - "List of adapter,address pairs to scan additionally"); \ - I2C_CLIENT_MODULE_PARM(ignore, \ - "List of adapter,address pairs not to scan"); \ - static struct i2c_client_address_data addr_data = { \ - .normal_i2c = normal_i2c, \ - .probe = probe, \ - .ignore = ignore, \ - .forces = forces, \ - } - -/* The following functions create an enum with the chip names as elements. - The first element of the enum is any_chip. These are the only macros - a module will want to use. */ - -#define SENSORS_INSMOD_0 \ - enum chips { any_chip }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - static unsigned short *forces[] = { force, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_1(chip1) \ - enum chips { any_chip, chip1 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_2(chip1,chip2) \ - enum chips { any_chip, chip1, chip2 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_3(chip1,chip2,chip3) \ - enum chips { any_chip, chip1, chip2, chip3 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_4(chip1,chip2,chip3,chip4) \ - enum chips { any_chip, chip1, chip2, chip3, chip4 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - SENSORS_MODULE_PARM_FORCE(chip4); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - force_##chip4, \ - NULL}; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_5(chip1,chip2,chip3,chip4,chip5) \ - enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - SENSORS_MODULE_PARM_FORCE(chip4); \ - SENSORS_MODULE_PARM_FORCE(chip5); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - force_##chip4, \ - force_##chip5, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_6(chip1,chip2,chip3,chip4,chip5,chip6) \ - enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - SENSORS_MODULE_PARM_FORCE(chip4); \ - SENSORS_MODULE_PARM_FORCE(chip5); \ - SENSORS_MODULE_PARM_FORCE(chip6); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - force_##chip4, \ - force_##chip5, \ - force_##chip6, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_7(chip1,chip2,chip3,chip4,chip5,chip6,chip7) \ - enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, chip7 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - SENSORS_MODULE_PARM_FORCE(chip4); \ - SENSORS_MODULE_PARM_FORCE(chip5); \ - SENSORS_MODULE_PARM_FORCE(chip6); \ - SENSORS_MODULE_PARM_FORCE(chip7); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - force_##chip4, \ - force_##chip5, \ - force_##chip6, \ - force_##chip7, \ - NULL }; \ - SENSORS_INSMOD - -#define SENSORS_INSMOD_8(chip1,chip2,chip3,chip4,chip5,chip6,chip7,chip8) \ - enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8 }; \ - I2C_CLIENT_MODULE_PARM(force, \ - "List of adapter,address pairs to boldly assume " \ - "to be present"); \ - SENSORS_MODULE_PARM_FORCE(chip1); \ - SENSORS_MODULE_PARM_FORCE(chip2); \ - SENSORS_MODULE_PARM_FORCE(chip3); \ - SENSORS_MODULE_PARM_FORCE(chip4); \ - SENSORS_MODULE_PARM_FORCE(chip5); \ - SENSORS_MODULE_PARM_FORCE(chip6); \ - SENSORS_MODULE_PARM_FORCE(chip7); \ - SENSORS_MODULE_PARM_FORCE(chip8); \ - static unsigned short *forces[] = { force, \ - force_##chip1, \ - force_##chip2, \ - force_##chip3, \ - force_##chip4, \ - force_##chip5, \ - force_##chip6, \ - force_##chip7, \ - force_##chip8, \ - NULL }; \ - SENSORS_INSMOD - -#endif /* def _LINUX_I2C_SENSOR_H */ diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 9419bc5584ad..3ad3969b6f0d 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -565,24 +565,148 @@ union i2c_smbus_data { module_param_array(var, short, &var##_num, 0); \ MODULE_PARM_DESC(var,desc) -/* This is the one you want to use in your own modules */ +#define I2C_CLIENT_MODULE_PARM_FORCE(name) \ +I2C_CLIENT_MODULE_PARM(force_##name, \ + "List of adapter,address pairs which are " \ + "unquestionably assumed to contain a `" \ + # name "' chip") + + +#define I2C_CLIENT_INSMOD_COMMON \ +I2C_CLIENT_MODULE_PARM(probe, "List of adapter,address pairs to scan " \ + "additionally"); \ +I2C_CLIENT_MODULE_PARM(ignore, "List of adapter,address pairs not to " \ + "scan"); \ +static struct i2c_client_address_data addr_data = { \ + .normal_i2c = normal_i2c, \ + .probe = probe, \ + .ignore = ignore, \ + .forces = forces, \ +} + +/* These are the ones you want to use in your own drivers. Pick the one + which matches the number of devices the driver differenciates between. */ #define I2C_CLIENT_INSMOD \ - I2C_CLIENT_MODULE_PARM(probe, \ - "List of adapter,address pairs to scan additionally"); \ - I2C_CLIENT_MODULE_PARM(ignore, \ - "List of adapter,address pairs not to scan"); \ I2C_CLIENT_MODULE_PARM(force, \ "List of adapter,address pairs to boldly assume " \ "to be present"); \ - static unsigned short *addr_forces[] = { \ + static unsigned short *forces[] = { \ force, \ NULL \ }; \ - static struct i2c_client_address_data addr_data = { \ - .normal_i2c = normal_i2c, \ - .probe = probe, \ - .ignore = ignore, \ - .forces = addr_forces, \ - } +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_1(chip1) \ +enum chips { any_chip, chip1 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +static unsigned short *forces[] = { force, force_##chip1, NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_2(chip1, chip2) \ +enum chips { any_chip, chip1, chip2 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_3(chip1, chip2, chip3) \ +enum chips { any_chip, chip1, chip2, chip3 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_4(chip1, chip2, chip3, chip4) \ +enum chips { any_chip, chip1, chip2, chip3, chip4 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + force_##chip4, NULL}; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_5(chip1, chip2, chip3, chip4, chip5) \ +enum chips { any_chip, chip1, chip2, chip3, chip4, chip5 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + force_##chip4, force_##chip5, \ + NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_6(chip1, chip2, chip3, chip4, chip5, chip6) \ +enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + force_##chip4, force_##chip5, \ + force_##chip6, NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_7(chip1, chip2, chip3, chip4, chip5, chip6, chip7) \ +enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, \ + chip7 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip7); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + force_##chip4, force_##chip5, \ + force_##chip6, force_##chip7, \ + NULL }; \ +I2C_CLIENT_INSMOD_COMMON + +#define I2C_CLIENT_INSMOD_8(chip1, chip2, chip3, chip4, chip5, chip6, chip7, chip8) \ +enum chips { any_chip, chip1, chip2, chip3, chip4, chip5, chip6, \ + chip7, chip8 }; \ +I2C_CLIENT_MODULE_PARM(force, "List of adapter,address pairs to " \ + "boldly assume to be present"); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip1); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip2); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip3); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip4); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip5); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip6); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip7); \ +I2C_CLIENT_MODULE_PARM_FORCE(chip8); \ +static unsigned short *forces[] = { force, force_##chip1, \ + force_##chip2, force_##chip3, \ + force_##chip4, force_##chip5, \ + force_##chip6, force_##chip7, \ + force_##chip8, NULL }; \ +I2C_CLIENT_INSMOD_COMMON #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From 303760b44a7a142cb9f4c9df4609fb63bbda98db Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:52:01 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (07/11) The only part left in i2c-sensor is the VRM/VRD/VID handling code. This is in no way related to i2c, so it doesn't belong there. Move the code to hwmon, where it belongs. Note that not all hardware monitoring drivers do VRM/VRD/VID operations, so less drivers depend on hwmon-vid than there were depending on i2c-sensor. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- Documentation/i2c/porting-clients | 4 +- drivers/hwmon/Kconfig | 50 ++++++----------- drivers/hwmon/Makefile | 1 + drivers/hwmon/adm1025.c | 4 +- drivers/hwmon/adm1026.c | 6 +- drivers/hwmon/adm9240.c | 4 +- drivers/hwmon/asb100.c | 4 +- drivers/hwmon/atxp1.c | 4 +- drivers/hwmon/gl520sm.c | 4 +- drivers/hwmon/hwmon-vid.c | 103 +++++++++++++++++++++++++++++++++++ drivers/hwmon/it87.c | 6 +- drivers/hwmon/lm85.c | 4 +- drivers/hwmon/lm87.c | 4 +- drivers/hwmon/pc87360.c | 2 +- drivers/hwmon/w83627hf.c | 4 +- drivers/hwmon/w83781d.c | 4 +- drivers/hwmon/w83792d.c | 1 - drivers/i2c/Makefile | 4 -- drivers/i2c/chips/Kconfig | 10 ---- drivers/i2c/i2c-sensor-vid.c | 103 ----------------------------------- include/linux/hwmon-vid.h | 112 ++++++++++++++++++++++++++++++++++++++ include/linux/i2c-vid.h | 111 ------------------------------------- 22 files changed, 260 insertions(+), 289 deletions(-) create mode 100644 drivers/hwmon/hwmon-vid.c delete mode 100644 drivers/i2c/i2c-sensor-vid.c create mode 100644 include/linux/hwmon-vid.h delete mode 100644 include/linux/i2c-vid.h (limited to 'include/linux') diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients index 8b819379adcb..5eb8d37cc679 100644 --- a/Documentation/i2c/porting-clients +++ b/Documentation/i2c/porting-clients @@ -23,7 +23,9 @@ Technical changes: #include #include #include - #include /* if you need VRM support */ + #include /* for hardware monitoring drivers */ + #include + #include /* if you need VRM support */ #include /* if you have I/O operations */ Please respect this inclusion order. Some extra headers may be required for a given driver (e.g. "lm75.h"). diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 6483ff696b5a..35fe7d1d4028 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -19,10 +19,13 @@ config HWMON This support can also be built as a module. If so, the module will be called hwmon. +config HWMON_VID + tristate + default n + config SENSORS_ADM1021 tristate "Analog Devices ADM1021 and compatibles" depends on HWMON && I2C - select I2C_SENSOR help If you say yes here you get support for Analog Devices ADM1021 and ADM1023 sensor chips and clones: Maxim MAX1617 and MAX1617A, @@ -35,7 +38,7 @@ config SENSORS_ADM1021 config SENSORS_ADM1025 tristate "Analog Devices ADM1025 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for Analog Devices ADM1025 and Philips NE1619 sensor chips. @@ -46,7 +49,7 @@ config SENSORS_ADM1025 config SENSORS_ADM1026 tristate "Analog Devices ADM1026 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for Analog Devices ADM1026 sensor chip. @@ -57,7 +60,6 @@ config SENSORS_ADM1026 config SENSORS_ADM1031 tristate "Analog Devices ADM1031 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Analog Devices ADM1031 and ADM1030 sensor chips. @@ -68,7 +70,7 @@ config SENSORS_ADM1031 config SENSORS_ADM9240 tristate "Analog Devices ADM9240 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for Analog Devices ADM9240, Dallas DS1780, National Semiconductor LM81 sensor chips. @@ -79,7 +81,7 @@ config SENSORS_ADM9240 config SENSORS_ASB100 tristate "Asus ASB100 Bach" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for the ASB100 Bach sensor chip found on some Asus mainboards. @@ -90,7 +92,7 @@ config SENSORS_ASB100 config SENSORS_ATXP1 tristate "Attansic ATXP1 VID controller" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for the Attansic ATXP1 VID controller. @@ -104,7 +106,6 @@ config SENSORS_ATXP1 config SENSORS_DS1621 tristate "Dallas Semiconductor DS1621 and DS1625" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Dallas Semiconductor DS1621 and DS1625 sensor chips. @@ -115,7 +116,6 @@ config SENSORS_DS1621 config SENSORS_FSCHER tristate "FSC Hermes" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Fujitsu Siemens Computers Hermes sensor chips. @@ -126,7 +126,6 @@ config SENSORS_FSCHER config SENSORS_FSCPOS tristate "FSC Poseidon" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Fujitsu Siemens Computers Poseidon sensor chips. @@ -137,7 +136,6 @@ config SENSORS_FSCPOS config SENSORS_GL518SM tristate "Genesys Logic GL518SM" depends on HWMON && I2C - select I2C_SENSOR help If you say yes here you get support for Genesys Logic GL518SM sensor chips. @@ -148,7 +146,7 @@ config SENSORS_GL518SM config SENSORS_GL520SM tristate "Genesys Logic GL520SM" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for Genesys Logic GL520SM sensor chips. @@ -159,8 +157,8 @@ config SENSORS_GL520SM config SENSORS_IT87 tristate "ITE IT87xx and compatibles" depends on HWMON && I2C - select I2C_SENSOR select I2C_ISA + select HWMON_VID help If you say yes here you get support for ITE IT87xx sensor chips and clones: SiS960. @@ -171,7 +169,6 @@ config SENSORS_IT87 config SENSORS_LM63 tristate "National Semiconductor LM63" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for the National Semiconductor LM63 remote diode digital temperature sensor with integrated fan @@ -184,7 +181,6 @@ config SENSORS_LM63 config SENSORS_LM75 tristate "National Semiconductor LM75 and compatibles" depends on HWMON && I2C - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM75 sensor chips and clones: Dallas Semiconductor DS75 and DS1775 (in @@ -200,7 +196,6 @@ config SENSORS_LM75 config SENSORS_LM77 tristate "National Semiconductor LM77" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM77 sensor chips. @@ -211,7 +206,6 @@ config SENSORS_LM77 config SENSORS_LM78 tristate "National Semiconductor LM78 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA help If you say yes here you get support for National Semiconductor LM78, @@ -223,7 +217,6 @@ config SENSORS_LM78 config SENSORS_LM80 tristate "National Semiconductor LM80" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM80 sensor chips. @@ -234,7 +227,6 @@ config SENSORS_LM80 config SENSORS_LM83 tristate "National Semiconductor LM83" depends on HWMON && I2C - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM83 sensor chips. @@ -245,7 +237,7 @@ config SENSORS_LM83 config SENSORS_LM85 tristate "National Semiconductor LM85 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for National Semiconductor LM85 sensor chips and clones: ADT7463, EMC6D100, EMC6D102 and ADM1027. @@ -256,7 +248,7 @@ config SENSORS_LM85 config SENSORS_LM87 tristate "National Semiconductor LM87" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR + select HWMON_VID help If you say yes here you get support for National Semiconductor LM87 sensor chips. @@ -267,7 +259,6 @@ config SENSORS_LM87 config SENSORS_LM90 tristate "National Semiconductor LM90 and compatibles" depends on HWMON && I2C - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM90, LM86, LM89 and LM99, Analog Devices ADM1032 and Maxim MAX6657 and @@ -282,7 +273,6 @@ config SENSORS_LM90 config SENSORS_LM92 tristate "National Semiconductor LM92 and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for National Semiconductor LM92 and Maxim MAX6635 sensor chips. @@ -293,7 +283,6 @@ config SENSORS_LM92 config SENSORS_MAX1619 tristate "Maxim MAX1619 sensor chip" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for MAX1619 sensor chip. @@ -303,8 +292,8 @@ config SENSORS_MAX1619 config SENSORS_PC87360 tristate "National Semiconductor PC87360 family" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA + select HWMON_VID help If you say yes here you get access to the hardware monitoring functions of the National Semiconductor PC8736x Super-I/O chips. @@ -318,7 +307,6 @@ config SENSORS_PC87360 config SENSORS_SIS5595 tristate "Silicon Integrated Systems Corp. SiS5595" depends on HWMON && I2C && PCI && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA help If you say yes here you get support for the integrated sensors in @@ -330,7 +318,6 @@ config SENSORS_SIS5595 config SENSORS_SMSC47M1 tristate "SMSC LPC47M10x and compatibles" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA help If you say yes here you get support for the integrated fan @@ -343,7 +330,6 @@ config SENSORS_SMSC47M1 config SENSORS_SMSC47B397 tristate "SMSC LPC47B397-NC" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA help If you say yes here you get support for the SMSC LPC47B397-NC @@ -355,7 +341,6 @@ config SENSORS_SMSC47B397 config SENSORS_VIA686A tristate "VIA686A" depends on HWMON && I2C && PCI - select I2C_SENSOR select I2C_ISA help If you say yes here you get support for the integrated sensors in @@ -367,8 +352,8 @@ config SENSORS_VIA686A config SENSORS_W83781D tristate "Winbond W83781D, W83782D, W83783S, W83627HF, Asus AS99127F" depends on HWMON && I2C - select I2C_SENSOR select I2C_ISA + select HWMON_VID help If you say yes here you get support for the Winbond W8378x series of sensor chips: the W83781D, W83782D, W83783S and W83627HF, @@ -380,7 +365,6 @@ config SENSORS_W83781D config SENSORS_W83792D tristate "Winbond W83792D" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for the Winbond W83792D chip. @@ -390,7 +374,6 @@ config SENSORS_W83792D config SENSORS_W83L785TS tristate "Winbond W83L785TS-S" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for the Winbond W83L785TS-S sensor chip, which is used on the Asus A7N8X, among other @@ -402,8 +385,8 @@ config SENSORS_W83L785TS config SENSORS_W83627HF tristate "Winbond W83627HF, W83627THF, W83637HF, W83697HF" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA + select HWMON_VID help If you say yes here you get support for the Winbond W836X7 series of sensor chips: the W83627HF, W83627THF, W83637HF, and the W83697HF @@ -414,7 +397,6 @@ config SENSORS_W83627HF config SENSORS_W83627EHF tristate "Winbond W83627EHF" depends on HWMON && I2C && EXPERIMENTAL - select I2C_SENSOR select I2C_ISA help If you say yes here you get preliminary support for the hardware diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 187b89d47f83..381f1bf04cc5 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -3,6 +3,7 @@ # obj-$(CONFIG_HWMON) += hwmon.o +obj-$(CONFIG_HWMON_VID) += hwmon-vid.o # asb100, then w83781d go first, as they can override other drivers' addresses. obj-$(CONFIG_SENSORS_ASB100) += asb100.o diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c index 229fd0de6f9e..526b7ff179eb 100644 --- a/drivers/hwmon/adm1025.c +++ b/drivers/hwmon/adm1025.c @@ -50,8 +50,8 @@ #include #include #include -#include #include +#include #include /* @@ -473,7 +473,7 @@ static void adm1025_init_client(struct i2c_client *client) struct adm1025_data *data = i2c_get_clientdata(client); int i; - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); /* * Set high limits diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c index f32f819efcfc..625158110fd4 100644 --- a/drivers/hwmon/adm1026.c +++ b/drivers/hwmon/adm1026.c @@ -28,9 +28,9 @@ #include #include #include -#include -#include #include +#include +#include #include /* Addresses to scan */ @@ -1552,7 +1552,7 @@ int adm1026_detect(struct i2c_adapter *adapter, int address, goto exitfree; /* Set the VRM version */ - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); /* Initialize the ADM1026 chip */ adm1026_init_client(new_client); diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c index 0a742cb88f4c..bc7faef162f7 100644 --- a/drivers/hwmon/adm9240.c +++ b/drivers/hwmon/adm9240.c @@ -45,8 +45,8 @@ #include #include #include -#include #include +#include #include /* Addresses to scan */ @@ -657,7 +657,7 @@ static void adm9240_init_client(struct i2c_client *client) u8 conf = adm9240_read_value(client, ADM9240_REG_CONFIG); u8 mode = adm9240_read_value(client, ADM9240_REG_TEMP_CONF) & 3; - data->vrm = i2c_which_vrm(); /* need this to report vid as mV */ + data->vrm = vid_which_vrm(); /* need this to report vid as mV */ dev_info(&client->dev, "Using VRM: %d.%d\n", data->vrm / 10, data->vrm % 10); diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c index 66b0dbd1af0e..8e34855a6274 100644 --- a/drivers/hwmon/asb100.c +++ b/drivers/hwmon/asb100.c @@ -39,8 +39,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -973,7 +973,7 @@ static void asb100_init_client(struct i2c_client *client) vid = asb100_read_value(client, ASB100_REG_VID_FANDIV) & 0x0f; vid |= (asb100_read_value(client, ASB100_REG_CHIPID) & 0x01) << 4; - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); vid = vid_from_reg(vid, data->vrm); /* Start monitoring */ diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c index 5cf77e67a2ed..deb4d34c9539 100644 --- a/drivers/hwmon/atxp1.c +++ b/drivers/hwmon/atxp1.c @@ -23,8 +23,8 @@ #include #include #include -#include #include +#include #include MODULE_LICENSE("GPL"); @@ -296,7 +296,7 @@ static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind) } /* Get VRM */ - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); if ((data->vrm != 90) && (data->vrm != 91)) { dev_err(&new_client->dev, "Not supporting VRM %d.%d\n", diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c index de6608a159c3..12fd757066fc 100644 --- a/drivers/hwmon/gl520sm.c +++ b/drivers/hwmon/gl520sm.c @@ -26,8 +26,8 @@ #include #include #include -#include #include +#include #include /* Type of the extra sensor */ @@ -617,7 +617,7 @@ static void gl520_init_client(struct i2c_client *client) conf = oldconf = gl520_read_value(client, GL520_REG_CONF); data->alarm_mask = 0xff; - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); if (extra_sensor_type == 1) conf &= ~0x10; diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c new file mode 100644 index 000000000000..ce475c93f836 --- /dev/null +++ b/drivers/hwmon/hwmon-vid.c @@ -0,0 +1,103 @@ +/* + hwmon-vid.c - VID/VRM/VRD voltage conversions + + Copyright (c) 2004 Rudolf Marek + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include +#include +#include +#include + +struct vrm_model { + u8 vendor; + u8 eff_family; + u8 eff_model; + int vrm_type; +}; + +#define ANY 0xFF + +#ifdef CONFIG_X86 + +static struct vrm_model vrm_models[] = { + {X86_VENDOR_AMD, 0x6, ANY, 90}, /* Athlon Duron etc */ + {X86_VENDOR_AMD, 0xF, ANY, 24}, /* Athlon 64, Opteron */ + {X86_VENDOR_INTEL, 0x6, 0x9, 85}, /* 0.13um too */ + {X86_VENDOR_INTEL, 0x6, 0xB, 85}, /* 0xB Tualatin */ + {X86_VENDOR_INTEL, 0x6, ANY, 82}, /* any P6 */ + {X86_VENDOR_INTEL, 0x7, ANY, 0}, /* Itanium */ + {X86_VENDOR_INTEL, 0xF, 0x3, 100}, /* P4 Prescott */ + {X86_VENDOR_INTEL, 0xF, ANY, 90}, /* P4 before Prescott */ + {X86_VENDOR_INTEL, 0x10,ANY, 0}, /* Itanium 2 */ + {X86_VENDOR_UNKNOWN, ANY, ANY, 0} /* stop here */ + }; + +static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor) +{ + int i = 0; + + while (vrm_models[i].vendor!=X86_VENDOR_UNKNOWN) { + if (vrm_models[i].vendor==vendor) + if ((vrm_models[i].eff_family==eff_family)&& \ + ((vrm_models[i].eff_model==eff_model)|| \ + (vrm_models[i].eff_model==ANY))) + return vrm_models[i].vrm_type; + i++; + } + + return 0; +} + +int vid_which_vrm(void) +{ + struct cpuinfo_x86 *c = cpu_data; + u32 eax; + u8 eff_family, eff_model; + int vrm_ret; + + if (c->x86 < 6) return 0; /* any CPU with familly lower than 6 + dont have VID and/or CPUID */ + eax = cpuid_eax(1); + eff_family = ((eax & 0x00000F00)>>8); + eff_model = ((eax & 0x000000F0)>>4); + if (eff_family == 0xF) { /* use extended model & family */ + eff_family += ((eax & 0x00F00000)>>20); + eff_model += ((eax & 0x000F0000)>>16)<<4; + } + vrm_ret = find_vrm(eff_family,eff_model,c->x86_vendor); + if (vrm_ret == 0) + printk(KERN_INFO "hwmon-vid: Unknown VRM version of your" + " x86 CPU\n"); + return vrm_ret; +} + +/* and now for something completely different for Non-x86 world*/ +#else +int vid_which_vrm(void) +{ + printk(KERN_INFO "hwmon-vid: Unknown VRM version of your CPU\n"); + return 0; +} +#endif + +EXPORT_SYMBOL(vid_which_vrm); + +MODULE_AUTHOR("Rudolf Marek "); + +MODULE_DESCRIPTION("hwmon-vid driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 84877665b66e..53cc2b6d6385 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -37,9 +37,9 @@ #include #include #include -#include -#include #include +#include +#include #include #include @@ -919,7 +919,7 @@ int it87_detect(struct i2c_adapter *adapter, int address, int kind) } if (data->type == it8712) { - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); device_create_file_vrm(new_client); device_create_file_vid(new_client); } diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c index aeb478815f72..ab214df9624b 100644 --- a/drivers/hwmon/lm85.c +++ b/drivers/hwmon/lm85.c @@ -28,8 +28,8 @@ #include #include #include -#include #include +#include #include /* Addresses to scan */ @@ -1147,7 +1147,7 @@ int lm85_detect(struct i2c_adapter *adapter, int address, goto ERROR1; /* Set the VRM version */ - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); /* Initialize the LM85 chip */ lm85_init_client(new_client); diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c index d0d2464c1b73..dca996de4c33 100644 --- a/drivers/hwmon/lm87.c +++ b/drivers/hwmon/lm87.c @@ -57,8 +57,8 @@ #include #include #include -#include #include +#include #include /* @@ -694,7 +694,7 @@ static void lm87_init_client(struct i2c_client *client) u8 config; data->channel = lm87_read_value(client, LM87_REG_CHANNEL_MODE); - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); config = lm87_read_value(client, LM87_REG_CONFIG); if (!(config & 0x01)) { diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c index c66ae4f6e802..08fcb5aea764 100644 --- a/drivers/hwmon/pc87360.c +++ b/drivers/hwmon/pc87360.c @@ -39,8 +39,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c index 2d2fcfb706d7..02bd5c0239a2 100644 --- a/drivers/hwmon/w83627hf.c +++ b/drivers/hwmon/w83627hf.c @@ -43,8 +43,8 @@ #include #include #include -#include #include +#include #include #include #include "lm75.h" @@ -1316,7 +1316,7 @@ static void w83627hf_init_client(struct i2c_client *client) data->vrm = (data->vrm_ovt & 0x01) ? 90 : 82; } else { /* Convert VID to voltage based on default VRM */ - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); } tmp = w83627hf_read_value(client, W83781D_REG_SCFG1); diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c index 47607983acfd..4c43337ca780 100644 --- a/drivers/hwmon/w83781d.c +++ b/drivers/hwmon/w83781d.c @@ -39,8 +39,8 @@ #include #include #include -#include #include +#include #include #include #include "lm75.h" @@ -1478,7 +1478,7 @@ w83781d_init_client(struct i2c_client *client) w83781d_write_value(client, W83781D_REG_BEEP_INTS2, 0); } - data->vrm = i2c_which_vrm(); + data->vrm = vid_which_vrm(); if ((type != w83781d) && (type != as99127f)) { tmp = w83781d_read_value(client, W83781D_REG_SCFG1); diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c index d6d8c0f04e32..ba0c28015f6a 100644 --- a/drivers/hwmon/w83792d.c +++ b/drivers/hwmon/w83792d.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/i2c/Makefile b/drivers/i2c/Makefile index 71d68ad0e5ce..71c5a854ac5d 100644 --- a/drivers/i2c/Makefile +++ b/drivers/i2c/Makefile @@ -4,12 +4,8 @@ obj-$(CONFIG_I2C) += i2c-core.o obj-$(CONFIG_I2C_CHARDEV) += i2c-dev.o -obj-$(CONFIG_I2C_SENSOR) += i2c-sensor.o obj-y += busses/ chips/ algos/ -i2c-sensor-objs := i2c-sensor-vid.o - - ifeq ($(CONFIG_I2C_DEBUG_CORE),y) EXTRA_CFLAGS += -DDEBUG endif diff --git a/drivers/i2c/chips/Kconfig b/drivers/i2c/chips/Kconfig index 43f70dbfc03f..6bd44a44cd28 100644 --- a/drivers/i2c/chips/Kconfig +++ b/drivers/i2c/chips/Kconfig @@ -2,17 +2,12 @@ # Miscellaneous I2C chip drivers configuration # -config I2C_SENSOR - tristate - default n - menu "Miscellaneous I2C Chip support" depends on I2C config SENSORS_DS1337 tristate "Dallas Semiconductor DS1337 and DS1339 Real Time Clock" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Dallas Semiconductor DS1337 and DS1339 real-time clock chips. @@ -23,7 +18,6 @@ config SENSORS_DS1337 config SENSORS_DS1374 tristate "Maxim/Dallas Semiconductor DS1374 Real Time Clock" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Dallas Semiconductor DS1374 real-time clock chips. @@ -34,7 +28,6 @@ config SENSORS_DS1374 config SENSORS_EEPROM tristate "EEPROM reader" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get read-only access to the EEPROM data available on modern memory DIMMs and Sony Vaio laptops. Such @@ -46,7 +39,6 @@ config SENSORS_EEPROM config SENSORS_PCF8574 tristate "Philips PCF8574 and PCF8574A" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Philips PCF8574 and PCF8574A chips. @@ -67,7 +59,6 @@ config SENSORS_PCA9539 config SENSORS_PCF8591 tristate "Philips PCF8591" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for Philips PCF8591 chips. @@ -77,7 +68,6 @@ config SENSORS_PCF8591 config SENSORS_RTC8564 tristate "Epson 8564 RTC chip" depends on I2C && EXPERIMENTAL - select I2C_SENSOR help If you say yes here you get support for the Epson 8564 RTC chip. diff --git a/drivers/i2c/i2c-sensor-vid.c b/drivers/i2c/i2c-sensor-vid.c deleted file mode 100644 index b8ef289fc80e..000000000000 --- a/drivers/i2c/i2c-sensor-vid.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - i2c-sensor-vid.c - Part of lm_sensors, Linux kernel modules for hardware - monitoring - - Copyright (c) 2004 Rudolf Marek - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -#include -#include -#include - -struct vrm_model { - u8 vendor; - u8 eff_family; - u8 eff_model; - int vrm_type; -}; - -#define ANY 0xFF - -#ifdef CONFIG_X86 - -static struct vrm_model vrm_models[] = { - {X86_VENDOR_AMD, 0x6, ANY, 90}, /* Athlon Duron etc */ - {X86_VENDOR_AMD, 0xF, ANY, 24}, /* Athlon 64, Opteron */ - {X86_VENDOR_INTEL, 0x6, 0x9, 85}, /* 0.13um too */ - {X86_VENDOR_INTEL, 0x6, 0xB, 85}, /* 0xB Tualatin */ - {X86_VENDOR_INTEL, 0x6, ANY, 82}, /* any P6 */ - {X86_VENDOR_INTEL, 0x7, ANY, 0}, /* Itanium */ - {X86_VENDOR_INTEL, 0xF, 0x3, 100}, /* P4 Prescott */ - {X86_VENDOR_INTEL, 0xF, ANY, 90}, /* P4 before Prescott */ - {X86_VENDOR_INTEL, 0x10,ANY, 0}, /* Itanium 2 */ - {X86_VENDOR_UNKNOWN, ANY, ANY, 0} /* stop here */ - }; - -static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor) -{ - int i = 0; - - while (vrm_models[i].vendor!=X86_VENDOR_UNKNOWN) { - if (vrm_models[i].vendor==vendor) - if ((vrm_models[i].eff_family==eff_family)&& \ - ((vrm_models[i].eff_model==eff_model)|| \ - (vrm_models[i].eff_model==ANY))) - return vrm_models[i].vrm_type; - i++; - } - - return 0; -} - -int i2c_which_vrm(void) -{ - struct cpuinfo_x86 *c = cpu_data; - u32 eax; - u8 eff_family, eff_model; - int vrm_ret; - - if (c->x86 < 6) return 0; /* any CPU with familly lower than 6 - dont have VID and/or CPUID */ - eax = cpuid_eax(1); - eff_family = ((eax & 0x00000F00)>>8); - eff_model = ((eax & 0x000000F0)>>4); - if (eff_family == 0xF) { /* use extended model & family */ - eff_family += ((eax & 0x00F00000)>>20); - eff_model += ((eax & 0x000F0000)>>16)<<4; - } - vrm_ret = find_vrm(eff_family,eff_model,c->x86_vendor); - if (vrm_ret == 0) - printk(KERN_INFO "i2c-sensor.o: Unknown VRM version of your" - " x86 CPU\n"); - return vrm_ret; -} - -/* and now for something completely different for Non-x86 world*/ -#else -int i2c_which_vrm(void) -{ - printk(KERN_INFO "i2c-sensor.o: Unknown VRM version of your CPU\n"); - return 0; -} -#endif - -EXPORT_SYMBOL(i2c_which_vrm); - -MODULE_AUTHOR("Rudolf Marek "); - -MODULE_DESCRIPTION("i2c-sensor driver"); -MODULE_LICENSE("GPL"); diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h new file mode 100644 index 000000000000..c45cd872c55e --- /dev/null +++ b/include/linux/hwmon-vid.h @@ -0,0 +1,112 @@ +/* + hwmon-vid.h - VID/VRM/VRD voltage conversions + + Originally part of lm_sensors + Copyright (c) 2002 Mark D. Studebaker + With assistance from Trent Piepho + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + This file contains common code for decoding VID pins. + This file is #included in various chip drivers in this directory. + As the user is unlikely to load more than one driver which + includes this code we don't worry about the wasted space. + Reference: VRM x.y DC-DC Converter Design Guidelines, + available at http://developer.intel.com +*/ + +/* + AMD Opteron processors don't follow the Intel VRM spec. + I'm going to "make up" 2.4 as the VRM spec for the Opterons. + No good reason just a mnemonic for the 24x Opteron processor + series + + Opteron VID encoding is: + + 00000 = 1.550 V + 00001 = 1.525 V + . . . . + 11110 = 0.800 V + 11111 = 0.000 V (off) + */ + +/* + Legal val values 0x00 - 0x1f; except for VRD 10.0, 0x00 - 0x3f. + vrm is the Intel VRM document version. + Note: vrm version is scaled by 10 and the return value is scaled by 1000 + to avoid floating point in the kernel. +*/ + +int vid_which_vrm(void); + +#define DEFAULT_VRM 82 + +static inline int vid_from_reg(int val, int vrm) +{ + int vid; + + switch(vrm) { + + case 0: + return 0; + + case 100: /* VRD 10.0 */ + if((val & 0x1f) == 0x1f) + return 0; + if((val & 0x1f) <= 0x09 || val == 0x0a) + vid = 10875 - (val & 0x1f) * 250; + else + vid = 18625 - (val & 0x1f) * 250; + if(val & 0x20) + vid -= 125; + vid /= 10; /* only return 3 dec. places for now */ + return vid; + + case 24: /* Opteron processor */ + return(val == 0x1f ? 0 : 1550 - val * 25); + + case 91: /* VRM 9.1 */ + case 90: /* VRM 9.0 */ + return(val == 0x1f ? 0 : + 1850 - val * 25); + + case 85: /* VRM 8.5 */ + return((val & 0x10 ? 25 : 0) + + ((val & 0x0f) > 0x04 ? 2050 : 1250) - + ((val & 0x0f) * 50)); + + case 84: /* VRM 8.4 */ + val &= 0x0f; + /* fall through */ + default: /* VRM 8.2 */ + return(val == 0x1f ? 0 : + val & 0x10 ? 5100 - (val) * 100 : + 2050 - (val) * 50); + } +} + +static inline int vid_to_reg(int val, int vrm) +{ + switch (vrm) { + case 91: /* VRM 9.1 */ + case 90: /* VRM 9.0 */ + return ((val >= 1100) && (val <= 1850) ? + ((18499 - val * 10) / 25 + 5) / 10 : -1); + default: + return -1; + } +} diff --git a/include/linux/i2c-vid.h b/include/linux/i2c-vid.h deleted file mode 100644 index 41d0635e0ba9..000000000000 --- a/include/linux/i2c-vid.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - i2c-vid.h - Part of lm_sensors, Linux kernel modules for hardware - monitoring - Copyright (c) 2002 Mark D. Studebaker - With assistance from Trent Piepho - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -*/ - -/* - This file contains common code for decoding VID pins. - This file is #included in various chip drivers in this directory. - As the user is unlikely to load more than one driver which - includes this code we don't worry about the wasted space. - Reference: VRM x.y DC-DC Converter Design Guidelines, - available at http://developer.intel.com -*/ - -/* - AMD Opteron processors don't follow the Intel VRM spec. - I'm going to "make up" 2.4 as the VRM spec for the Opterons. - No good reason just a mnemonic for the 24x Opteron processor - series - - Opteron VID encoding is: - - 00000 = 1.550 V - 00001 = 1.525 V - . . . . - 11110 = 0.800 V - 11111 = 0.000 V (off) - */ - -/* - Legal val values 0x00 - 0x1f; except for VRD 10.0, 0x00 - 0x3f. - vrm is the Intel VRM document version. - Note: vrm version is scaled by 10 and the return value is scaled by 1000 - to avoid floating point in the kernel. -*/ - -int i2c_which_vrm(void); - -#define DEFAULT_VRM 82 - -static inline int vid_from_reg(int val, int vrm) -{ - int vid; - - switch(vrm) { - - case 0: - return 0; - - case 100: /* VRD 10.0 */ - if((val & 0x1f) == 0x1f) - return 0; - if((val & 0x1f) <= 0x09 || val == 0x0a) - vid = 10875 - (val & 0x1f) * 250; - else - vid = 18625 - (val & 0x1f) * 250; - if(val & 0x20) - vid -= 125; - vid /= 10; /* only return 3 dec. places for now */ - return vid; - - case 24: /* Opteron processor */ - return(val == 0x1f ? 0 : 1550 - val * 25); - - case 91: /* VRM 9.1 */ - case 90: /* VRM 9.0 */ - return(val == 0x1f ? 0 : - 1850 - val * 25); - - case 85: /* VRM 8.5 */ - return((val & 0x10 ? 25 : 0) + - ((val & 0x0f) > 0x04 ? 2050 : 1250) - - ((val & 0x0f) * 50)); - - case 84: /* VRM 8.4 */ - val &= 0x0f; - /* fall through */ - default: /* VRM 8.2 */ - return(val == 0x1f ? 0 : - val & 0x10 ? 5100 - (val) * 100 : - 2050 - (val) * 50); - } -} - -static inline int vid_to_reg(int val, int vrm) -{ - switch (vrm) { - case 91: /* VRM 9.1 */ - case 90: /* VRM 9.0 */ - return ((val >= 1100) && (val <= 1850) ? - ((18499 - val * 10) / 25 + 5) / 10 : -1); - default: - return -1; - } -} -- cgit v1.2.3 From ee70d3a33368038d41985474d9e70ac07f19651c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sun, 31 Jul 2005 21:57:33 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (09/11) Delete DEFAULT_VRM from hwmon-vid.h, it has no more users. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/hwmon-vid.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h index c45cd872c55e..6d9d4f9bad58 100644 --- a/include/linux/hwmon-vid.h +++ b/include/linux/hwmon-vid.h @@ -53,8 +53,6 @@ int vid_which_vrm(void); -#define DEFAULT_VRM 82 - static inline int vid_from_reg(int val, int vrm) { int vid; -- cgit v1.2.3 From d0f282706df877f8fd8869419e308d24eedb523b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 1 Aug 2005 22:50:08 +0200 Subject: [PATCH] hwmon: hwmon vs i2c, second round (10/11) I see very little reason why vid_from_reg is inlined. It is not exactly short, its parameters are seldom known in advance, and it is never called in speed critical areas. Uninlining it should cause little performance loss if any, and saves a signficant space as well as compilation time. As suggested by Alexey Dobriyan, I am leaving vid_to_reg inline for now, as it is short and has a single user so far. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/hwmon/hwmon-vid.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/hwmon-vid.h | 83 +++++------------------------------------------ 2 files changed, 91 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c index 7c8ed0353302..a54d08699973 100644 --- a/drivers/hwmon/hwmon-vid.c +++ b/drivers/hwmon/hwmon-vid.c @@ -3,6 +3,10 @@ Copyright (c) 2004 Rudolf Marek + Partly imported from i2c-vid.h of the lm_sensors project + Copyright (c) 2002 Mark D. Studebaker + With assistance from Trent Piepho + This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or @@ -23,6 +27,83 @@ #include #include +/* + Common code for decoding VID pins. + + References: + + For VRM 8.4 to 9.1, "VRM x.y DC-DC Converter Design Guidelines", + available at http://developer.intel.com/. + + For VRD 10.0 and up, "VRD x.y Design Guide", + available at http://developer.intel.com/. + + AMD Opteron processors don't follow the Intel specifications. + I'm going to "make up" 2.4 as the spec number for the Opterons. + No good reason just a mnemonic for the 24x Opteron processor + series. + + Opteron VID encoding is: + 00000 = 1.550 V + 00001 = 1.525 V + . . . . + 11110 = 0.800 V + 11111 = 0.000 V (off) +*/ + +/* vrm is the VRM/VRD document version multiplied by 10. + val is the 4-, 5- or 6-bit VID code. + Returned value is in mV to avoid floating point in the kernel. */ +int vid_from_reg(int val, int vrm) +{ + int vid; + + switch(vrm) { + + case 0: + return 0; + + case 100: /* VRD 10.0 */ + if((val & 0x1f) == 0x1f) + return 0; + if((val & 0x1f) <= 0x09 || val == 0x0a) + vid = 10875 - (val & 0x1f) * 250; + else + vid = 18625 - (val & 0x1f) * 250; + if(val & 0x20) + vid -= 125; + vid /= 10; /* only return 3 dec. places for now */ + return vid; + + case 24: /* Opteron processor */ + return(val == 0x1f ? 0 : 1550 - val * 25); + + case 91: /* VRM 9.1 */ + case 90: /* VRM 9.0 */ + return(val == 0x1f ? 0 : + 1850 - val * 25); + + case 85: /* VRM 8.5 */ + return((val & 0x10 ? 25 : 0) + + ((val & 0x0f) > 0x04 ? 2050 : 1250) - + ((val & 0x0f) * 50)); + + case 84: /* VRM 8.4 */ + val &= 0x0f; + /* fall through */ + default: /* VRM 8.2 */ + return(val == 0x1f ? 0 : + val & 0x10 ? 5100 - (val) * 100 : + 2050 - (val) * 50); + } +} + + +/* + After this point is the code to automatically determine which + VRM/VRD specification should be used depending on the CPU. +*/ + struct vrm_model { u8 vendor; u8 eff_family; @@ -96,6 +177,7 @@ int vid_which_vrm(void) } #endif +EXPORT_SYMBOL(vid_from_reg); EXPORT_SYMBOL(vid_which_vrm); MODULE_AUTHOR("Rudolf Marek "); diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h index 6d9d4f9bad58..cd4b7a042b86 100644 --- a/include/linux/hwmon-vid.h +++ b/include/linux/hwmon-vid.h @@ -20,83 +20,16 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/* - This file contains common code for decoding VID pins. - This file is #included in various chip drivers in this directory. - As the user is unlikely to load more than one driver which - includes this code we don't worry about the wasted space. - Reference: VRM x.y DC-DC Converter Design Guidelines, - available at http://developer.intel.com -*/ - -/* - AMD Opteron processors don't follow the Intel VRM spec. - I'm going to "make up" 2.4 as the VRM spec for the Opterons. - No good reason just a mnemonic for the 24x Opteron processor - series - - Opteron VID encoding is: - - 00000 = 1.550 V - 00001 = 1.525 V - . . . . - 11110 = 0.800 V - 11111 = 0.000 V (off) - */ - -/* - Legal val values 0x00 - 0x1f; except for VRD 10.0, 0x00 - 0x3f. - vrm is the Intel VRM document version. - Note: vrm version is scaled by 10 and the return value is scaled by 1000 - to avoid floating point in the kernel. -*/ +#ifndef _LINUX_HWMON_VID_H +#define _LINUX_HWMON_VID_H +int vid_from_reg(int val, int vrm); int vid_which_vrm(void); -static inline int vid_from_reg(int val, int vrm) -{ - int vid; - - switch(vrm) { - - case 0: - return 0; - - case 100: /* VRD 10.0 */ - if((val & 0x1f) == 0x1f) - return 0; - if((val & 0x1f) <= 0x09 || val == 0x0a) - vid = 10875 - (val & 0x1f) * 250; - else - vid = 18625 - (val & 0x1f) * 250; - if(val & 0x20) - vid -= 125; - vid /= 10; /* only return 3 dec. places for now */ - return vid; - - case 24: /* Opteron processor */ - return(val == 0x1f ? 0 : 1550 - val * 25); - - case 91: /* VRM 9.1 */ - case 90: /* VRM 9.0 */ - return(val == 0x1f ? 0 : - 1850 - val * 25); - - case 85: /* VRM 8.5 */ - return((val & 0x10 ? 25 : 0) + - ((val & 0x0f) > 0x04 ? 2050 : 1250) - - ((val & 0x0f) * 50)); - - case 84: /* VRM 8.4 */ - val &= 0x0f; - /* fall through */ - default: /* VRM 8.2 */ - return(val == 0x1f ? 0 : - val & 0x10 ? 5100 - (val) * 100 : - 2050 - (val) * 50); - } -} - +/* vrm is the VRM/VRD document version multiplied by 10. + val is in mV to avoid floating point in the kernel. + Returned value is the 4-, 5- or 6-bit VID code. + Note that only VRM 9.x is supported for now. */ static inline int vid_to_reg(int val, int vrm) { switch (vrm) { @@ -108,3 +41,5 @@ static inline int vid_to_reg(int val, int vrm) return -1; } } + +#endif /* _LINUX_HWMON_VID_H */ -- cgit v1.2.3 From 975185880d55676b1352047e82a0cb84173c6c28 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:33:24 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.name (1/7) The name member of the i2c_algorithm is never used, although all drivers conscientiously fill it. We can drop it completely, this structure doesn't need to have a name. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/algos/i2c-algo-bit.c | 1 - drivers/i2c/algos/i2c-algo-ite.c | 1 - drivers/i2c/algos/i2c-algo-pca.c | 1 - drivers/i2c/algos/i2c-algo-pcf.c | 1 - drivers/i2c/algos/i2c-algo-sgi.c | 1 - drivers/i2c/algos/i2c-algo-sibyte.c | 1 - drivers/i2c/busses/i2c-ali1535.c | 1 - drivers/i2c/busses/i2c-ali1563.c | 1 - drivers/i2c/busses/i2c-ali15x3.c | 1 - drivers/i2c/busses/i2c-amd756.c | 1 - drivers/i2c/busses/i2c-amd8111.c | 1 - drivers/i2c/busses/i2c-au1550.c | 1 - drivers/i2c/busses/i2c-i801.c | 1 - drivers/i2c/busses/i2c-ibm_iic.c | 1 - drivers/i2c/busses/i2c-iop3xx.c | 1 - drivers/i2c/busses/i2c-isa.c | 1 - drivers/i2c/busses/i2c-keywest.c | 1 - drivers/i2c/busses/i2c-mpc.c | 1 - drivers/i2c/busses/i2c-mv64xxx.c | 1 - drivers/i2c/busses/i2c-nforce2.c | 1 - drivers/i2c/busses/i2c-piix4.c | 1 - drivers/i2c/busses/i2c-s3c2410.c | 1 - drivers/i2c/busses/i2c-sis5595.c | 1 - drivers/i2c/busses/i2c-sis630.c | 1 - drivers/i2c/busses/i2c-sis96x.c | 1 - drivers/i2c/busses/i2c-stub.c | 1 - drivers/i2c/busses/i2c-viapro.c | 1 - drivers/i2c/busses/scx200_acb.c | 1 - drivers/media/common/saa7146_i2c.c | 1 - drivers/media/dvb/b2c2/flexcop-i2c.c | 1 - drivers/media/dvb/dvb-usb/cxusb.c | 1 - drivers/media/dvb/dvb-usb/dibusb-common.c | 1 - drivers/media/dvb/dvb-usb/digitv.c | 1 - drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c | 1 - drivers/media/video/bttv-i2c.c | 1 - drivers/media/video/saa7134/saa7134-i2c.c | 1 - drivers/usb/media/w9968cf.c | 1 - include/linux/i2c.h | 1 - 38 files changed, 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index fb5b732238ed..6e1e1601accb 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -519,7 +519,6 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm i2c_bit_algo = { - .name = "Bit-shift algorithm", .id = I2C_ALGO_BIT, .master_xfer = bit_xfer, .functionality = bit_func, diff --git a/drivers/i2c/algos/i2c-algo-ite.c b/drivers/i2c/algos/i2c-algo-ite.c index e6cae39f47aa..a9c2b1115e07 100644 --- a/drivers/i2c/algos/i2c-algo-ite.c +++ b/drivers/i2c/algos/i2c-algo-ite.c @@ -713,7 +713,6 @@ static u32 iic_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm iic_algo = { - .name = "ITE IIC algorithm", .id = I2C_ALGO_IIC, .master_xfer = iic_xfer, .algo_control = algo_control, /* ioctl */ diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index ff2db0da4417..16157391cdf5 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -356,7 +356,6 @@ static int pca_init(struct i2c_algo_pca_data *adap) } static struct i2c_algorithm pca_algo = { - .name = "PCA9564 algorithm", .id = I2C_ALGO_PCA, .master_xfer = pca_xfer, .functionality = pca_func, diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 8d087dac32af..eb3e9e39d75b 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -459,7 +459,6 @@ static u32 pcf_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm pcf_algo = { - .name = "PCF8584 algorithm", .id = I2C_ALGO_PCF, .master_xfer = pcf_xfer, .functionality = pcf_func, diff --git a/drivers/i2c/algos/i2c-algo-sgi.c b/drivers/i2c/algos/i2c-algo-sgi.c index 422721b241e5..55284c9e41dc 100644 --- a/drivers/i2c/algos/i2c-algo-sgi.c +++ b/drivers/i2c/algos/i2c-algo-sgi.c @@ -158,7 +158,6 @@ static u32 sgi_func(struct i2c_adapter *adap) } static struct i2c_algorithm sgi_algo = { - .name = "SGI algorithm", .id = I2C_ALGO_SGI, .master_xfer = sgi_xfer, .functionality = sgi_func, diff --git a/drivers/i2c/algos/i2c-algo-sibyte.c b/drivers/i2c/algos/i2c-algo-sibyte.c index f2785499237b..4ca3e69461bc 100644 --- a/drivers/i2c/algos/i2c-algo-sibyte.c +++ b/drivers/i2c/algos/i2c-algo-sibyte.c @@ -135,7 +135,6 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm i2c_sibyte_algo = { - .name = "SiByte algorithm", .id = I2C_ALGO_SIBYTE, .smbus_xfer = smbus_xfer, .algo_control = algo_control, /* ioctl */ diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index f634a0780cf0..d7e05a1a5125 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -472,7 +472,6 @@ static u32 ali1535_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-i2c SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = ali1535_access, .functionality = ali1535_func, diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index fdd881aee618..48f85e53d7d6 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -366,7 +366,6 @@ static void ali1563_shutdown(struct pci_dev *dev) } static struct i2c_algorithm ali1563_algorithm = { - .name = "Non-i2c SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = ali1563_access, .functionality = ali1563_func, diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 0f781a1a3323..523f0129e4cd 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -462,7 +462,6 @@ static u32 ali15x3_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = ali15x3_access, .functionality = ali15x3_func, diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c index 6347ebc6fb53..7cf33325ac14 100644 --- a/drivers/i2c/busses/i2c-amd756.c +++ b/drivers/i2c/busses/i2c-amd756.c @@ -295,7 +295,6 @@ static u32 amd756_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = amd756_access, .functionality = amd756_func, diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index d6644481d2a0..3b98d3b40042 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -323,7 +323,6 @@ static u32 amd8111_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus 2.0 adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = amd8111_access, .functionality = amd8111_func, diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c index a7ff112e49bf..41fff3531aee 100644 --- a/drivers/i2c/busses/i2c-au1550.c +++ b/drivers/i2c/busses/i2c-au1550.c @@ -283,7 +283,6 @@ au1550_func(struct i2c_adapter *adap) } static struct i2c_algorithm au1550_algo = { - .name = "Au1550 algorithm", .id = I2C_ALGO_AU1550, .master_xfer = au1550_xfer, .functionality = au1550_func, diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index 1ab41313ce51..b916317b76a6 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -535,7 +535,6 @@ static u32 i801_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = i801_access, .functionality = i801_func, diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 93ca36dc777e..1a3366287087 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -627,7 +627,6 @@ static u32 iic_func(struct i2c_adapter *adap) } static struct i2c_algorithm iic_algo = { - .name = "IBM IIC algorithm", .id = I2C_ALGO_OCP, .master_xfer = iic_xfer, .functionality = iic_func diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index 6b682e903f09..c763039638d9 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -399,7 +399,6 @@ iop3xx_i2c_func(struct i2c_adapter *adap) } static struct i2c_algorithm iop3xx_i2c_algo = { - .name = "IOP3xx I2C algorithm", .id = I2C_ALGO_IOP3XX, .master_xfer = iop3xx_i2c_master_xfer, .algo_control = iop3xx_i2c_algo_control, diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index a60f4801757e..bbfd4449b921 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -43,7 +43,6 @@ static u32 isa_func(struct i2c_adapter *adapter); /* This is the actual algorithm we define */ static struct i2c_algorithm isa_algorithm = { - .name = "ISA bus algorithm", .id = I2C_ALGO_ISA, .functionality = isa_func, }; diff --git a/drivers/i2c/busses/i2c-keywest.c b/drivers/i2c/busses/i2c-keywest.c index 94ae808314f7..2937f115abf1 100644 --- a/drivers/i2c/busses/i2c-keywest.c +++ b/drivers/i2c/busses/i2c-keywest.c @@ -498,7 +498,6 @@ keywest_func(struct i2c_adapter * adapter) /* For now, we only handle combined mode (smbus) */ static struct i2c_algorithm keywest_algorithm = { - .name = "Keywest i2c", .id = I2C_ALGO_SMBUS, .smbus_xfer = keywest_smbus_xfer, .master_xfer = keywest_xfer, diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 9ad3e9262e8a..ae988cc8cb48 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -272,7 +272,6 @@ static u32 mpc_functionality(struct i2c_adapter *adap) } static struct i2c_algorithm mpc_algo = { - .name = "MPC algorithm", .id = I2C_ALGO_MPC107, .master_xfer = mpc_xfer, .functionality = mpc_functionality, diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 5b852782d2f5..f5927c68784c 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -433,7 +433,6 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } static struct i2c_algorithm mv64xxx_i2c_algo = { - .name = MV64XXX_I2C_CTLR_NAME " algorithm", .id = I2C_ALGO_MV64XXX, .master_xfer = mv64xxx_i2c_xfer, .functionality = mv64xxx_i2c_functionality, diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index 1df601bb7f4f..44b6dfdd3832 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -110,7 +110,6 @@ static u32 nforce2_func(struct i2c_adapter *adapter); static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = nforce2_access, .functionality = nforce2_func, diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 6d34ee381ce1..976df581e853 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -399,7 +399,6 @@ static u32 piix4_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = piix4_access, .functionality = piix4_func, diff --git a/drivers/i2c/busses/i2c-s3c2410.c b/drivers/i2c/busses/i2c-s3c2410.c index a3b38257cc3d..73a092fb0e7e 100644 --- a/drivers/i2c/busses/i2c-s3c2410.c +++ b/drivers/i2c/busses/i2c-s3c2410.c @@ -568,7 +568,6 @@ static u32 s3c24xx_i2c_func(struct i2c_adapter *adap) /* i2c bus registration info */ static struct i2c_algorithm s3c24xx_i2c_algorithm = { - .name = "S3C2410-I2C-Algorithm", .master_xfer = s3c24xx_i2c_xfer, .functionality = s3c24xx_i2c_func, }; diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c index bbd5e4e52f09..0308ed87e2b9 100644 --- a/drivers/i2c/busses/i2c-sis5595.c +++ b/drivers/i2c/busses/i2c-sis5595.c @@ -357,7 +357,6 @@ static u32 sis5595_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = sis5595_access, .functionality = sis5595_func, diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index f58455e7689e..8708f9044023 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -448,7 +448,6 @@ exit: static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = sis630_access, .functionality = sis630_func, diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c index 6484792e23a1..6e7202ed3265 100644 --- a/drivers/i2c/busses/i2c-sis96x.c +++ b/drivers/i2c/busses/i2c-sis96x.c @@ -249,7 +249,6 @@ static u32 sis96x_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = sis96x_access, .functionality = sis96x_func, diff --git a/drivers/i2c/busses/i2c-stub.c b/drivers/i2c/busses/i2c-stub.c index 00d94e886955..d7203207754f 100644 --- a/drivers/i2c/busses/i2c-stub.c +++ b/drivers/i2c/busses/i2c-stub.c @@ -109,7 +109,6 @@ static u32 stub_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .functionality = stub_func, .smbus_xfer = stub_xfer, diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 6b5008005c6f..608ad525e7e1 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -286,7 +286,6 @@ static u32 vt596_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .name = "Non-I2C SMBus adapter", .id = I2C_ALGO_SMBUS, .smbus_xfer = vt596_access, .functionality = vt596_func, diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index a18bdd9aa7ba..b695233d5455 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -395,7 +395,6 @@ static u32 scx200_acb_func(struct i2c_adapter *adapter) /* For now, we only handle combined mode (smbus) */ static struct i2c_algorithm scx200_acb_algorithm = { - .name = "NatSemi SCx200 ACCESS.bus", .id = I2C_ALGO_SMBUS, .smbus_xfer = scx200_acb_smbus_xfer, .functionality = scx200_acb_func, diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index 781f23f0cbcc..dceda86cd99d 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -387,7 +387,6 @@ static int saa7146_i2c_xfer(struct i2c_adapter* adapter, struct i2c_msg *msg, in /* exported algorithm data */ static struct i2c_algorithm saa7146_algo = { - .name = "saa7146 i2c algorithm", .id = I2C_ALGO_SAA7146, .master_xfer = saa7146_i2c_xfer, .functionality = saa7146_i2c_func, diff --git a/drivers/media/dvb/b2c2/flexcop-i2c.c b/drivers/media/dvb/b2c2/flexcop-i2c.c index be4266d4ae91..75f2c94f3190 100644 --- a/drivers/media/dvb/b2c2/flexcop-i2c.c +++ b/drivers/media/dvb/b2c2/flexcop-i2c.c @@ -172,7 +172,6 @@ static u32 flexcop_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm flexcop_algo = { - .name = "FlexCop I2C algorithm", .id = I2C_ALGO_BIT, .master_xfer = flexcop_master_xfer, .functionality = flexcop_i2c_func, diff --git a/drivers/media/dvb/dvb-usb/cxusb.c b/drivers/media/dvb/dvb-usb/cxusb.c index c3e1b661aae6..36fe602f85b6 100644 --- a/drivers/media/dvb/dvb-usb/cxusb.c +++ b/drivers/media/dvb/dvb-usb/cxusb.c @@ -141,7 +141,6 @@ static u32 cxusb_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm cxusb_i2c_algo = { - .name = "Conexant USB I2C algorithm", .id = I2C_ALGO_BIT, .master_xfer = cxusb_i2c_xfer, .functionality = cxusb_i2c_func, diff --git a/drivers/media/dvb/dvb-usb/dibusb-common.c b/drivers/media/dvb/dvb-usb/dibusb-common.c index 9b9d6f8ee74e..c3a639520e8a 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-common.c +++ b/drivers/media/dvb/dvb-usb/dibusb-common.c @@ -156,7 +156,6 @@ static u32 dibusb_i2c_func(struct i2c_adapter *adapter) } struct i2c_algorithm dibusb_i2c_algo = { - .name = "DiBcom USB I2C algorithm", .id = I2C_ALGO_BIT, .master_xfer = dibusb_i2c_xfer, .functionality = dibusb_i2c_func, diff --git a/drivers/media/dvb/dvb-usb/digitv.c b/drivers/media/dvb/dvb-usb/digitv.c index 9a676afc1d6e..6e4e1e9158e3 100644 --- a/drivers/media/dvb/dvb-usb/digitv.c +++ b/drivers/media/dvb/dvb-usb/digitv.c @@ -77,7 +77,6 @@ static u32 digitv_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm digitv_i2c_algo = { - .name = "Nebula DigiTV USB I2C algorithm", .id = I2C_ALGO_BIT, .master_xfer = digitv_i2c_xfer, .functionality = digitv_i2c_func, diff --git a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c index aa43b5fcb8e7..c337ee546bac 100644 --- a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c +++ b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c @@ -1472,7 +1472,6 @@ static void frontend_init(struct ttusb* ttusb) static struct i2c_algorithm ttusb_dec_algo = { - .name = "ttusb dec i2c algorithm", .id = I2C_ALGO_BIT, .master_xfer = master_xfer, .functionality = functionality, diff --git a/drivers/media/video/bttv-i2c.c b/drivers/media/video/bttv-i2c.c index 234a85563769..dbc96fce7501 100644 --- a/drivers/media/video/bttv-i2c.c +++ b/drivers/media/video/bttv-i2c.c @@ -270,7 +270,6 @@ static int bttv_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int } static struct i2c_algorithm bttv_algo = { - .name = "bt878", .id = I2C_ALGO_BIT | I2C_HW_B_BT848 /* FIXME */, .master_xfer = bttv_i2c_xfer, .algo_control = algo_control, diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c index 1203b93a572c..1a53c7eb104a 100644 --- a/drivers/media/video/saa7134/saa7134-i2c.c +++ b/drivers/media/video/saa7134/saa7134-i2c.c @@ -370,7 +370,6 @@ static int attach_inform(struct i2c_client *client) } static struct i2c_algorithm saa7134_algo = { - .name = "saa7134", .id = I2C_ALGO_SAA7134, .master_xfer = saa7134_i2c_xfer, .algo_control = algo_control, diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index ca9f3a30634f..ad1d6777e226 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -1573,7 +1573,6 @@ static int w9968cf_i2c_init(struct w9968cf_device* cam) int err = 0; static struct i2c_algorithm algo = { - .name = "W996[87]CF algorithm", .id = I2C_ALGO_SMBUS, .smbus_xfer = w9968cf_i2c_smbus_xfer, .algo_control = w9968cf_i2c_control, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 3ad3969b6f0d..f04b1fc6a0fa 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -192,7 +192,6 @@ static inline char *i2c_clientname(struct i2c_client *c) * to name two of the most common. */ struct i2c_algorithm { - char name[32]; /* textual description */ unsigned int id; /* If an adapter algorithm can't do I2C-level access, set master_xfer -- cgit v1.2.3 From e51cc6b3a365e170d3ebe51c2308fdd42e027a46 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:36:49 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.id (2/7) Use the adapter id rather than the algorithm id to detect the i2c-isa pseudo-adapter. This saves one level of dereferencing, and the algorithm ids will soon be gone anyway. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-isa.c | 1 + include/linux/i2c-isa.h | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index bbfd4449b921..dc666d7cf45a 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -50,6 +50,7 @@ static struct i2c_algorithm isa_algorithm = { /* There can only be one... */ static struct i2c_adapter isa_adapter = { .owner = THIS_MODULE, + .id = I2C_ALGO_ISA | I2C_HW_ISA, .class = I2C_CLASS_HWMON, .algo = &isa_algorithm, .name = "ISA main adapter", diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h index db793b68356c..54c27e88d101 100644 --- a/include/linux/i2c-isa.h +++ b/include/linux/i2c-isa.h @@ -28,9 +28,9 @@ extern int i2c_isa_del_driver(struct i2c_driver *driver); /* Detect whether we are on the isa bus. This is only useful to hybrid (i2c+isa) drivers. */ -#define i2c_is_isa_client(clientptr) \ - ((clientptr)->adapter->algo->id == I2C_ALGO_ISA) #define i2c_is_isa_adapter(adapptr) \ - ((adapptr)->algo->id == I2C_ALGO_ISA) + ((adapptr)->id == (I2C_ALGO_ISA | I2C_HW_ISA)) +#define i2c_is_isa_client(clientptr) \ + i2c_is_isa_adapter((clientptr)->adapter) #endif /* _LINUX_I2C_ISA_H */ -- cgit v1.2.3 From 1d8b9e1bad35fa3ea829990b9056c2a257d8fe79 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:40:19 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.id (4/7) There are no more users of i2c_algorithm.id, so we can finally drop this structure member. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/algos/i2c-algo-bit.c | 1 - drivers/i2c/algos/i2c-algo-ite.c | 1 - drivers/i2c/algos/i2c-algo-pca.c | 1 - drivers/i2c/algos/i2c-algo-pcf.c | 1 - drivers/i2c/algos/i2c-algo-sgi.c | 1 - drivers/i2c/algos/i2c-algo-sibyte.c | 1 - drivers/i2c/busses/i2c-ali1535.c | 1 - drivers/i2c/busses/i2c-ali1563.c | 1 - drivers/i2c/busses/i2c-ali15x3.c | 1 - drivers/i2c/busses/i2c-amd756.c | 1 - drivers/i2c/busses/i2c-amd8111.c | 1 - drivers/i2c/busses/i2c-au1550.c | 1 - drivers/i2c/busses/i2c-i801.c | 1 - drivers/i2c/busses/i2c-ibm_iic.c | 1 - drivers/i2c/busses/i2c-iop3xx.c | 1 - drivers/i2c/busses/i2c-isa.c | 1 - drivers/i2c/busses/i2c-keywest.c | 1 - drivers/i2c/busses/i2c-mpc.c | 1 - drivers/i2c/busses/i2c-mv64xxx.c | 1 - drivers/i2c/busses/i2c-nforce2.c | 1 - drivers/i2c/busses/i2c-piix4.c | 1 - drivers/i2c/busses/i2c-sis5595.c | 1 - drivers/i2c/busses/i2c-sis630.c | 1 - drivers/i2c/busses/i2c-sis96x.c | 1 - drivers/i2c/busses/i2c-stub.c | 1 - drivers/i2c/busses/i2c-viapro.c | 1 - drivers/i2c/busses/scx200_acb.c | 1 - drivers/media/common/saa7146_i2c.c | 1 - drivers/media/dvb/b2c2/flexcop-i2c.c | 1 - drivers/media/dvb/dvb-usb/cxusb.c | 1 - drivers/media/dvb/dvb-usb/dibusb-common.c | 1 - drivers/media/dvb/dvb-usb/digitv.c | 1 - drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c | 1 - drivers/media/video/bttv-i2c.c | 1 - drivers/media/video/saa7134/saa7134-i2c.c | 1 - drivers/usb/media/w9968cf.c | 1 - include/linux/i2c.h | 2 -- 37 files changed, 38 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index b8767382bbac..6060b10ab0ce 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -519,7 +519,6 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm i2c_bit_algo = { - .id = I2C_ALGO_BIT, .master_xfer = bit_xfer, .functionality = bit_func, }; diff --git a/drivers/i2c/algos/i2c-algo-ite.c b/drivers/i2c/algos/i2c-algo-ite.c index 6cb02f685db3..b460e57216e1 100644 --- a/drivers/i2c/algos/i2c-algo-ite.c +++ b/drivers/i2c/algos/i2c-algo-ite.c @@ -713,7 +713,6 @@ static u32 iic_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm iic_algo = { - .id = I2C_ALGO_IIC, .master_xfer = iic_xfer, .algo_control = algo_control, /* ioctl */ .functionality = iic_func, diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index 79b3c2edcf08..be2c8abc6682 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -356,7 +356,6 @@ static int pca_init(struct i2c_algo_pca_data *adap) } static struct i2c_algorithm pca_algo = { - .id = I2C_ALGO_PCA, .master_xfer = pca_xfer, .functionality = pca_func, }; diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index fbc0b87fe070..95f023ad8c62 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -459,7 +459,6 @@ static u32 pcf_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm pcf_algo = { - .id = I2C_ALGO_PCF, .master_xfer = pcf_xfer, .functionality = pcf_func, }; diff --git a/drivers/i2c/algos/i2c-algo-sgi.c b/drivers/i2c/algos/i2c-algo-sgi.c index 8863a671d977..142505105d6f 100644 --- a/drivers/i2c/algos/i2c-algo-sgi.c +++ b/drivers/i2c/algos/i2c-algo-sgi.c @@ -158,7 +158,6 @@ static u32 sgi_func(struct i2c_adapter *adap) } static struct i2c_algorithm sgi_algo = { - .id = I2C_ALGO_SGI, .master_xfer = sgi_xfer, .functionality = sgi_func, }; diff --git a/drivers/i2c/algos/i2c-algo-sibyte.c b/drivers/i2c/algos/i2c-algo-sibyte.c index 6cda0a6332d9..c01108ae7b69 100644 --- a/drivers/i2c/algos/i2c-algo-sibyte.c +++ b/drivers/i2c/algos/i2c-algo-sibyte.c @@ -135,7 +135,6 @@ static u32 bit_func(struct i2c_adapter *adap) /* -----exported algorithm data: ------------------------------------- */ static struct i2c_algorithm i2c_sibyte_algo = { - .id = I2C_ALGO_SIBYTE, .smbus_xfer = smbus_xfer, .algo_control = algo_control, /* ioctl */ .functionality = bit_func, diff --git a/drivers/i2c/busses/i2c-ali1535.c b/drivers/i2c/busses/i2c-ali1535.c index d7e05a1a5125..f021acd2674e 100644 --- a/drivers/i2c/busses/i2c-ali1535.c +++ b/drivers/i2c/busses/i2c-ali1535.c @@ -472,7 +472,6 @@ static u32 ali1535_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = ali1535_access, .functionality = ali1535_func, }; diff --git a/drivers/i2c/busses/i2c-ali1563.c b/drivers/i2c/busses/i2c-ali1563.c index 48f85e53d7d6..86947504aea1 100644 --- a/drivers/i2c/busses/i2c-ali1563.c +++ b/drivers/i2c/busses/i2c-ali1563.c @@ -366,7 +366,6 @@ static void ali1563_shutdown(struct pci_dev *dev) } static struct i2c_algorithm ali1563_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = ali1563_access, .functionality = ali1563_func, }; diff --git a/drivers/i2c/busses/i2c-ali15x3.c b/drivers/i2c/busses/i2c-ali15x3.c index 523f0129e4cd..b3f50bff39a0 100644 --- a/drivers/i2c/busses/i2c-ali15x3.c +++ b/drivers/i2c/busses/i2c-ali15x3.c @@ -462,7 +462,6 @@ static u32 ali15x3_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = ali15x3_access, .functionality = ali15x3_func, }; diff --git a/drivers/i2c/busses/i2c-amd756.c b/drivers/i2c/busses/i2c-amd756.c index 7cf33325ac14..6ad0603384b8 100644 --- a/drivers/i2c/busses/i2c-amd756.c +++ b/drivers/i2c/busses/i2c-amd756.c @@ -295,7 +295,6 @@ static u32 amd756_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = amd756_access, .functionality = amd756_func, }; diff --git a/drivers/i2c/busses/i2c-amd8111.c b/drivers/i2c/busses/i2c-amd8111.c index 3b98d3b40042..45ea24ba14d5 100644 --- a/drivers/i2c/busses/i2c-amd8111.c +++ b/drivers/i2c/busses/i2c-amd8111.c @@ -323,7 +323,6 @@ static u32 amd8111_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = amd8111_access, .functionality = amd8111_func, }; diff --git a/drivers/i2c/busses/i2c-au1550.c b/drivers/i2c/busses/i2c-au1550.c index 41fff3531aee..d06edce03bf4 100644 --- a/drivers/i2c/busses/i2c-au1550.c +++ b/drivers/i2c/busses/i2c-au1550.c @@ -283,7 +283,6 @@ au1550_func(struct i2c_adapter *adap) } static struct i2c_algorithm au1550_algo = { - .id = I2C_ALGO_AU1550, .master_xfer = au1550_xfer, .functionality = au1550_func, }; diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index b916317b76a6..709beab76609 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -535,7 +535,6 @@ static u32 i801_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = i801_access, .functionality = i801_func, }; diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index ade9b14e6840..f42ab909eb78 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -627,7 +627,6 @@ static u32 iic_func(struct i2c_adapter *adap) } static struct i2c_algorithm iic_algo = { - .id = I2C_ALGO_OCP, .master_xfer = iic_xfer, .functionality = iic_func }; diff --git a/drivers/i2c/busses/i2c-iop3xx.c b/drivers/i2c/busses/i2c-iop3xx.c index c763039638d9..7bd9102db701 100644 --- a/drivers/i2c/busses/i2c-iop3xx.c +++ b/drivers/i2c/busses/i2c-iop3xx.c @@ -399,7 +399,6 @@ iop3xx_i2c_func(struct i2c_adapter *adap) } static struct i2c_algorithm iop3xx_i2c_algo = { - .id = I2C_ALGO_IOP3XX, .master_xfer = iop3xx_i2c_master_xfer, .algo_control = iop3xx_i2c_algo_control, .functionality = iop3xx_i2c_func, diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index dc666d7cf45a..baae5dbc198e 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -43,7 +43,6 @@ static u32 isa_func(struct i2c_adapter *adapter); /* This is the actual algorithm we define */ static struct i2c_algorithm isa_algorithm = { - .id = I2C_ALGO_ISA, .functionality = isa_func, }; diff --git a/drivers/i2c/busses/i2c-keywest.c b/drivers/i2c/busses/i2c-keywest.c index 2937f115abf1..5254d2db282c 100644 --- a/drivers/i2c/busses/i2c-keywest.c +++ b/drivers/i2c/busses/i2c-keywest.c @@ -498,7 +498,6 @@ keywest_func(struct i2c_adapter * adapter) /* For now, we only handle combined mode (smbus) */ static struct i2c_algorithm keywest_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = keywest_smbus_xfer, .master_xfer = keywest_xfer, .functionality = keywest_func, diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index ae988cc8cb48..70c5ffaee6a3 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -272,7 +272,6 @@ static u32 mpc_functionality(struct i2c_adapter *adap) } static struct i2c_algorithm mpc_algo = { - .id = I2C_ALGO_MPC107, .master_xfer = mpc_xfer, .functionality = mpc_functionality, }; diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index f5927c68784c..6e5dd4949237 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -433,7 +433,6 @@ mv64xxx_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) } static struct i2c_algorithm mv64xxx_i2c_algo = { - .id = I2C_ALGO_MV64XXX, .master_xfer = mv64xxx_i2c_xfer, .functionality = mv64xxx_i2c_functionality, }; diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c index 44b6dfdd3832..e0b7a913431e 100644 --- a/drivers/i2c/busses/i2c-nforce2.c +++ b/drivers/i2c/busses/i2c-nforce2.c @@ -110,7 +110,6 @@ static u32 nforce2_func(struct i2c_adapter *adapter); static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = nforce2_access, .functionality = nforce2_func, }; diff --git a/drivers/i2c/busses/i2c-piix4.c b/drivers/i2c/busses/i2c-piix4.c index 976df581e853..6d48a4da7bed 100644 --- a/drivers/i2c/busses/i2c-piix4.c +++ b/drivers/i2c/busses/i2c-piix4.c @@ -399,7 +399,6 @@ static u32 piix4_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = piix4_access, .functionality = piix4_func, }; diff --git a/drivers/i2c/busses/i2c-sis5595.c b/drivers/i2c/busses/i2c-sis5595.c index 0308ed87e2b9..080318d6f54b 100644 --- a/drivers/i2c/busses/i2c-sis5595.c +++ b/drivers/i2c/busses/i2c-sis5595.c @@ -357,7 +357,6 @@ static u32 sis5595_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = sis5595_access, .functionality = sis5595_func, }; diff --git a/drivers/i2c/busses/i2c-sis630.c b/drivers/i2c/busses/i2c-sis630.c index 8708f9044023..86f0f448fa0b 100644 --- a/drivers/i2c/busses/i2c-sis630.c +++ b/drivers/i2c/busses/i2c-sis630.c @@ -448,7 +448,6 @@ exit: static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = sis630_access, .functionality = sis630_func, }; diff --git a/drivers/i2c/busses/i2c-sis96x.c b/drivers/i2c/busses/i2c-sis96x.c index 6e7202ed3265..ead2ff3cf60e 100644 --- a/drivers/i2c/busses/i2c-sis96x.c +++ b/drivers/i2c/busses/i2c-sis96x.c @@ -249,7 +249,6 @@ static u32 sis96x_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = sis96x_access, .functionality = sis96x_func, }; diff --git a/drivers/i2c/busses/i2c-stub.c b/drivers/i2c/busses/i2c-stub.c index d7203207754f..73f481e93a36 100644 --- a/drivers/i2c/busses/i2c-stub.c +++ b/drivers/i2c/busses/i2c-stub.c @@ -109,7 +109,6 @@ static u32 stub_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .functionality = stub_func, .smbus_xfer = stub_xfer, }; diff --git a/drivers/i2c/busses/i2c-viapro.c b/drivers/i2c/busses/i2c-viapro.c index 608ad525e7e1..99d209e0485a 100644 --- a/drivers/i2c/busses/i2c-viapro.c +++ b/drivers/i2c/busses/i2c-viapro.c @@ -286,7 +286,6 @@ static u32 vt596_func(struct i2c_adapter *adapter) } static struct i2c_algorithm smbus_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = vt596_access, .functionality = vt596_func, }; diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index b695233d5455..46b9a7594c99 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -395,7 +395,6 @@ static u32 scx200_acb_func(struct i2c_adapter *adapter) /* For now, we only handle combined mode (smbus) */ static struct i2c_algorithm scx200_acb_algorithm = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = scx200_acb_smbus_xfer, .functionality = scx200_acb_func, }; diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index dceda86cd99d..e413ee7f267a 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -387,7 +387,6 @@ static int saa7146_i2c_xfer(struct i2c_adapter* adapter, struct i2c_msg *msg, in /* exported algorithm data */ static struct i2c_algorithm saa7146_algo = { - .id = I2C_ALGO_SAA7146, .master_xfer = saa7146_i2c_xfer, .functionality = saa7146_i2c_func, }; diff --git a/drivers/media/dvb/b2c2/flexcop-i2c.c b/drivers/media/dvb/b2c2/flexcop-i2c.c index 75f2c94f3190..848910ff3c9c 100644 --- a/drivers/media/dvb/b2c2/flexcop-i2c.c +++ b/drivers/media/dvb/b2c2/flexcop-i2c.c @@ -172,7 +172,6 @@ static u32 flexcop_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm flexcop_algo = { - .id = I2C_ALGO_BIT, .master_xfer = flexcop_master_xfer, .functionality = flexcop_i2c_func, }; diff --git a/drivers/media/dvb/dvb-usb/cxusb.c b/drivers/media/dvb/dvb-usb/cxusb.c index 36fe602f85b6..9e96a188f1e9 100644 --- a/drivers/media/dvb/dvb-usb/cxusb.c +++ b/drivers/media/dvb/dvb-usb/cxusb.c @@ -141,7 +141,6 @@ static u32 cxusb_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm cxusb_i2c_algo = { - .id = I2C_ALGO_BIT, .master_xfer = cxusb_i2c_xfer, .functionality = cxusb_i2c_func, }; diff --git a/drivers/media/dvb/dvb-usb/dibusb-common.c b/drivers/media/dvb/dvb-usb/dibusb-common.c index c3a639520e8a..00b946419b40 100644 --- a/drivers/media/dvb/dvb-usb/dibusb-common.c +++ b/drivers/media/dvb/dvb-usb/dibusb-common.c @@ -156,7 +156,6 @@ static u32 dibusb_i2c_func(struct i2c_adapter *adapter) } struct i2c_algorithm dibusb_i2c_algo = { - .id = I2C_ALGO_BIT, .master_xfer = dibusb_i2c_xfer, .functionality = dibusb_i2c_func, }; diff --git a/drivers/media/dvb/dvb-usb/digitv.c b/drivers/media/dvb/dvb-usb/digitv.c index 6e4e1e9158e3..f70e0be0920a 100644 --- a/drivers/media/dvb/dvb-usb/digitv.c +++ b/drivers/media/dvb/dvb-usb/digitv.c @@ -77,7 +77,6 @@ static u32 digitv_i2c_func(struct i2c_adapter *adapter) } static struct i2c_algorithm digitv_i2c_algo = { - .id = I2C_ALGO_BIT, .master_xfer = digitv_i2c_xfer, .functionality = digitv_i2c_func, }; diff --git a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c index c337ee546bac..11afec52f31f 100644 --- a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c +++ b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c @@ -1472,7 +1472,6 @@ static void frontend_init(struct ttusb* ttusb) static struct i2c_algorithm ttusb_dec_algo = { - .id = I2C_ALGO_BIT, .master_xfer = master_xfer, .functionality = functionality, }; diff --git a/drivers/media/video/bttv-i2c.c b/drivers/media/video/bttv-i2c.c index dbc96fce7501..82beb5a8a59c 100644 --- a/drivers/media/video/bttv-i2c.c +++ b/drivers/media/video/bttv-i2c.c @@ -270,7 +270,6 @@ static int bttv_i2c_xfer(struct i2c_adapter *i2c_adap, struct i2c_msg *msgs, int } static struct i2c_algorithm bttv_algo = { - .id = I2C_ALGO_BIT | I2C_HW_B_BT848 /* FIXME */, .master_xfer = bttv_i2c_xfer, .algo_control = algo_control, .functionality = functionality, diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c index 1a53c7eb104a..0bb1073d94bd 100644 --- a/drivers/media/video/saa7134/saa7134-i2c.c +++ b/drivers/media/video/saa7134/saa7134-i2c.c @@ -370,7 +370,6 @@ static int attach_inform(struct i2c_client *client) } static struct i2c_algorithm saa7134_algo = { - .id = I2C_ALGO_SAA7134, .master_xfer = saa7134_i2c_xfer, .algo_control = algo_control, .functionality = functionality, diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index ad1d6777e226..908cfdf17039 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -1573,7 +1573,6 @@ static int w9968cf_i2c_init(struct w9968cf_device* cam) int err = 0; static struct i2c_algorithm algo = { - .id = I2C_ALGO_SMBUS, .smbus_xfer = w9968cf_i2c_smbus_xfer, .algo_control = w9968cf_i2c_control, .functionality = w9968cf_i2c_func, diff --git a/include/linux/i2c.h b/include/linux/i2c.h index f04b1fc6a0fa..af4983b488b6 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -192,8 +192,6 @@ static inline char *i2c_clientname(struct i2c_client *c) * to name two of the most common. */ struct i2c_algorithm { - unsigned int id; - /* If an adapter algorithm can't do I2C-level access, set master_xfer to NULL. If an adapter algorithm can do SMBus access, set smbus_xfer. If set to NULL, the SMBus protocol is simulated -- cgit v1.2.3 From c7a46533ff7ef9e1c51bae6e54208527c5275b24 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:41:56 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.id (5/7) Merge the algorithm id part (16 upper bits) of the i2c adapters ids into the definition of the adapters ids directly. After that, we don't need to OR both ids together for each i2c_adapter structure. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/algos/i2c-algo-bit.c | 2 - drivers/i2c/algos/i2c-algo-ite.c | 2 - drivers/i2c/algos/i2c-algo-pca.c | 2 - drivers/i2c/algos/i2c-algo-pcf.c | 2 - drivers/i2c/algos/i2c-algo-sgi.c | 1 - drivers/i2c/algos/i2c-algo-sibyte.c | 2 - drivers/i2c/busses/i2c-ibm_iic.c | 2 +- drivers/i2c/busses/i2c-isa.c | 2 +- drivers/i2c/busses/i2c-mpc.c | 2 +- drivers/i2c/busses/i2c-mv64xxx.c | 2 +- drivers/media/video/bt832.c | 2 +- drivers/media/video/bttv-i2c.c | 2 +- drivers/media/video/ir-kbd-i2c.c | 2 +- drivers/media/video/ovcamchip/ov6x20.c | 6 +- drivers/media/video/ovcamchip/ov6x30.c | 4 +- drivers/media/video/ovcamchip/ovcamchip_core.c | 8 +- drivers/media/video/tda7432.c | 2 +- drivers/media/video/tda9875.c | 2 +- drivers/media/video/tda9887.c | 4 +- drivers/media/video/tuner-3036.c | 2 +- drivers/media/video/tvaudio.c | 6 +- drivers/media/video/tveeprom.c | 2 +- drivers/media/video/tvmixer.c | 6 +- drivers/usb/media/w9968cf.c | 2 +- drivers/video/matrox/matroxfb_maven.c | 2 +- include/linux/i2c-id.h | 128 ++++++++++++------------- include/linux/i2c-isa.h | 2 +- 27 files changed, 95 insertions(+), 106 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/algos/i2c-algo-bit.c b/drivers/i2c/algos/i2c-algo-bit.c index 6060b10ab0ce..df05df1a0ef6 100644 --- a/drivers/i2c/algos/i2c-algo-bit.c +++ b/drivers/i2c/algos/i2c-algo-bit.c @@ -539,8 +539,6 @@ int i2c_bit_add_bus(struct i2c_adapter *adap) DEB2(dev_dbg(&adap->dev, "hw routines registered.\n")); /* register new adapter to i2c module... */ - - adap->id |= I2C_ALGO_BIT; adap->algo = &i2c_bit_algo; adap->timeout = 100; /* default values, should */ diff --git a/drivers/i2c/algos/i2c-algo-ite.c b/drivers/i2c/algos/i2c-algo-ite.c index b460e57216e1..2db7bfc85225 100644 --- a/drivers/i2c/algos/i2c-algo-ite.c +++ b/drivers/i2c/algos/i2c-algo-ite.c @@ -736,8 +736,6 @@ int i2c_iic_add_bus(struct i2c_adapter *adap) adap->name)); /* register new adapter to i2c module... */ - - adap->id |= I2C_ALGO_IIC; adap->algo = &iic_algo; adap->timeout = 100; /* default values, should */ diff --git a/drivers/i2c/algos/i2c-algo-pca.c b/drivers/i2c/algos/i2c-algo-pca.c index be2c8abc6682..beb10edfe9c1 100644 --- a/drivers/i2c/algos/i2c-algo-pca.c +++ b/drivers/i2c/algos/i2c-algo-pca.c @@ -369,8 +369,6 @@ int i2c_pca_add_bus(struct i2c_adapter *adap) int rval; /* register new adapter to i2c module... */ - - adap->id |= I2C_ALGO_PCA; adap->algo = &pca_algo; adap->timeout = 100; /* default values, should */ diff --git a/drivers/i2c/algos/i2c-algo-pcf.c b/drivers/i2c/algos/i2c-algo-pcf.c index 95f023ad8c62..6e498df1f717 100644 --- a/drivers/i2c/algos/i2c-algo-pcf.c +++ b/drivers/i2c/algos/i2c-algo-pcf.c @@ -474,8 +474,6 @@ int i2c_pcf_add_bus(struct i2c_adapter *adap) DEB2(dev_dbg(&adap->dev, "hw routines registered.\n")); /* register new adapter to i2c module... */ - - adap->id |= I2C_ALGO_PCF; adap->algo = &pcf_algo; adap->timeout = 100; /* default values, should */ diff --git a/drivers/i2c/algos/i2c-algo-sgi.c b/drivers/i2c/algos/i2c-algo-sgi.c index 142505105d6f..2f8df81317ff 100644 --- a/drivers/i2c/algos/i2c-algo-sgi.c +++ b/drivers/i2c/algos/i2c-algo-sgi.c @@ -167,7 +167,6 @@ static struct i2c_algorithm sgi_algo = { */ int i2c_sgi_add_bus(struct i2c_adapter *adap) { - adap->id |= I2C_ALGO_SGI; adap->algo = &sgi_algo; return i2c_add_adapter(adap); diff --git a/drivers/i2c/algos/i2c-algo-sibyte.c b/drivers/i2c/algos/i2c-algo-sibyte.c index c01108ae7b69..8ed5ad12552f 100644 --- a/drivers/i2c/algos/i2c-algo-sibyte.c +++ b/drivers/i2c/algos/i2c-algo-sibyte.c @@ -149,8 +149,6 @@ int i2c_sibyte_add_bus(struct i2c_adapter *i2c_adap, int speed) struct i2c_algo_sibyte_data *adap = i2c_adap->algo_data; /* register new adapter to i2c module... */ - - i2c_adap->id |= I2C_ALGO_SIBYTE; i2c_adap->algo = &i2c_sibyte_algo; /* Set the frequency to 100 kHz */ diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index f42ab909eb78..a3ed9590f028 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -725,7 +725,7 @@ static int __devinit iic_probe(struct ocp_device *ocp){ adap = &dev->adap; strcpy(adap->name, "IBM IIC"); i2c_set_adapdata(adap, dev); - adap->id = I2C_ALGO_OCP | I2C_HW_OCP; + adap->id = I2C_HW_OCP; adap->algo = &iic_algo; adap->client_register = NULL; adap->client_unregister = NULL; diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c index baae5dbc198e..bdc6806dafae 100644 --- a/drivers/i2c/busses/i2c-isa.c +++ b/drivers/i2c/busses/i2c-isa.c @@ -49,7 +49,7 @@ static struct i2c_algorithm isa_algorithm = { /* There can only be one... */ static struct i2c_adapter isa_adapter = { .owner = THIS_MODULE, - .id = I2C_ALGO_ISA | I2C_HW_ISA, + .id = I2C_HW_ISA, .class = I2C_CLASS_HWMON, .algo = &isa_algorithm, .name = "ISA main adapter", diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 70c5ffaee6a3..f065583ddcf1 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -279,7 +279,7 @@ static struct i2c_algorithm mpc_algo = { static struct i2c_adapter mpc_ops = { .owner = THIS_MODULE, .name = "MPC adapter", - .id = I2C_ALGO_MPC107 | I2C_HW_MPC107, + .id = I2C_HW_MPC107, .algo = &mpc_algo, .class = I2C_CLASS_HWMON, .timeout = 1, diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c index 6e5dd4949237..eb6cc0869938 100644 --- a/drivers/i2c/busses/i2c-mv64xxx.c +++ b/drivers/i2c/busses/i2c-mv64xxx.c @@ -521,7 +521,7 @@ mv64xxx_i2c_probe(struct device *dev) drv_data->freq_m = pdata->freq_m; drv_data->freq_n = pdata->freq_n; drv_data->irq = platform_get_irq(pd, 0); - drv_data->adapter.id = I2C_ALGO_MV64XXX | I2C_HW_MV64XXX; + drv_data->adapter.id = I2C_HW_MV64XXX; drv_data->adapter.algo = &mv64xxx_i2c_algo; drv_data->adapter.owner = THIS_MODULE; drv_data->adapter.class = I2C_CLASS_HWMON; diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c index a070417e65e6..67ffed8a7fea 100644 --- a/drivers/media/video/bt832.c +++ b/drivers/media/video/bt832.c @@ -188,7 +188,7 @@ static int bt832_probe(struct i2c_adapter *adap) if (adap->class & I2C_CLASS_TV_ANALOG) return i2c_probe(adap, &addr_data, bt832_attach); #else - if (adap->id == (I2C_ALGO_BIT | I2C_HW_B_BT848)) + if (adap->id == I2C_HW_B_BT848) return i2c_probe(adap, &addr_data, bt832_attach); #endif return 0; diff --git a/drivers/media/video/bttv-i2c.c b/drivers/media/video/bttv-i2c.c index 82beb5a8a59c..1db95f75ac68 100644 --- a/drivers/media/video/bttv-i2c.c +++ b/drivers/media/video/bttv-i2c.c @@ -281,7 +281,7 @@ static struct i2c_adapter bttv_i2c_adap_hw_template = { .class = I2C_CLASS_TV_ANALOG, #endif I2C_DEVNAME("bt878"), - .id = I2C_ALGO_BIT | I2C_HW_B_BT848 /* FIXME */, + .id = I2C_HW_B_BT848 /* FIXME */, .algo = &bttv_algo, .client_register = attach_inform, }; diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index 9fc5055e001c..c2f32d522668 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -429,7 +429,7 @@ static int ir_probe(struct i2c_adapter *adap) struct i2c_client c; char buf; int i,rc; switch (adap->id) { - case I2C_ALGO_BIT | I2C_HW_B_BT848: + case I2C_HW_B_BT848: probe = probe_bttv; break; case I2C_ALGO_SAA7134: diff --git a/drivers/media/video/ovcamchip/ov6x20.c b/drivers/media/video/ovcamchip/ov6x20.c index 3433619ad93f..b3f4d266cede 100644 --- a/drivers/media/video/ovcamchip/ov6x20.c +++ b/drivers/media/video/ovcamchip/ov6x20.c @@ -164,10 +164,10 @@ static int ov6x20_init(struct i2c_client *c) DDEBUG(4, &c->dev, "entered"); switch (c->adapter->id) { - case I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV511: + case I2C_HW_SMBUS_OV511: rc = ov_write_regvals(c, regvals_init_6x20_511); break; - case I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV518: + case I2C_HW_SMBUS_OV518: rc = ov_write_regvals(c, regvals_init_6x20_518); break; default: @@ -338,7 +338,7 @@ static int ov6x20_mode_init(struct i2c_client *c, struct ovcamchip_window *win) /******** Palette-specific regs ********/ /* OV518 needs 8 bit multiplexed in color mode, and 16 bit in B&W */ - if (c->adapter->id == (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV518)) { + if (c->adapter->id == I2C_HW_SMBUS_OV518) { if (win->format == VIDEO_PALETTE_GREY) ov_write_mask(c, 0x13, 0x00, 0x20); else diff --git a/drivers/media/video/ovcamchip/ov6x30.c b/drivers/media/video/ovcamchip/ov6x30.c index 44a842379b45..6eab458ab792 100644 --- a/drivers/media/video/ovcamchip/ov6x30.c +++ b/drivers/media/video/ovcamchip/ov6x30.c @@ -301,7 +301,7 @@ static int ov6x30_mode_init(struct i2c_client *c, struct ovcamchip_window *win) /******** Palette-specific regs ********/ if (win->format == VIDEO_PALETTE_GREY) { - if (c->adapter->id == (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV518)) { + if (c->adapter->id == I2C_HW_SMBUS_OV518) { /* Do nothing - we're already in 8-bit mode */ } else { ov_write_mask(c, 0x13, 0x20, 0x20); @@ -313,7 +313,7 @@ static int ov6x30_mode_init(struct i2c_client *c, struct ovcamchip_window *win) * Therefore, the OV6630 needs to be in 8-bit multiplexed * output mode */ - if (c->adapter->id == (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV518)) { + if (c->adapter->id == I2C_HW_SMBUS_OV518) { /* Do nothing - we want to stay in 8-bit mode */ /* Warning: Messing with reg 0x13 breaks OV518 color */ } else { diff --git a/drivers/media/video/ovcamchip/ovcamchip_core.c b/drivers/media/video/ovcamchip/ovcamchip_core.c index 54dd5612d3b8..b98c64ab7c58 100644 --- a/drivers/media/video/ovcamchip/ovcamchip_core.c +++ b/drivers/media/video/ovcamchip/ovcamchip_core.c @@ -296,10 +296,10 @@ static int ovcamchip_attach(struct i2c_adapter *adap) * attach to adapters that are known to contain OV camera chips. */ switch (adap->id) { - case (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV511): - case (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OV518): - case (I2C_ALGO_SMBUS | I2C_HW_SMBUS_OVFX2): - case (I2C_ALGO_SMBUS | I2C_HW_SMBUS_W9968CF): + case I2C_HW_SMBUS_OV511: + case I2C_HW_SMBUS_OV518: + case I2C_HW_SMBUS_OVFX2: + case I2C_HW_SMBUS_W9968CF: PDEBUG(1, "Adapter ID 0x%06x accepted", adap->id); break; default: diff --git a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c index 7cb1fb3e66f9..bc6b6c0cea6d 100644 --- a/drivers/media/video/tda7432.c +++ b/drivers/media/video/tda7432.c @@ -328,7 +328,7 @@ static int tda7432_probe(struct i2c_adapter *adap) if (adap->class & I2C_CLASS_TV_ANALOG) return i2c_probe(adap, &addr_data, tda7432_attach); #else - if (adap->id == (I2C_ALGO_BIT | I2C_HW_B_BT848)) + if (adap->id == I2C_HW_B_BT848) return i2c_probe(adap, &addr_data, tda7432_attach); #endif return 0; diff --git a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c index 566e1a5ca135..3eaf2e1211e5 100644 --- a/drivers/media/video/tda9875.c +++ b/drivers/media/video/tda9875.c @@ -262,7 +262,7 @@ static int tda9875_probe(struct i2c_adapter *adap) if (adap->class & I2C_CLASS_TV_ANALOG) return i2c_probe(adap, &addr_data, tda9875_attach); #else - if (adap->id == (I2C_ALGO_BIT | I2C_HW_B_BT848)) + if (adap->id == I2C_HW_B_BT848) return i2c_probe(adap, &addr_data, tda9875_attach); #endif return 0; diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index a28a395d6dfe..f1b1bb5acf70 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -618,8 +618,8 @@ static int tda9887_probe(struct i2c_adapter *adap) return i2c_probe(adap, &addr_data, tda9887_attach); #else switch (adap->id) { - case I2C_ALGO_BIT | I2C_HW_B_BT848: - case I2C_ALGO_BIT | I2C_HW_B_RIVA: + case I2C_HW_B_BT848: + case I2C_HW_B_RIVA: case I2C_ALGO_SAA7134: return i2c_probe(adap, &addr_data, tda9887_attach); break; diff --git a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c index 103def1abe3c..79203595b9c1 100644 --- a/drivers/media/video/tuner-3036.c +++ b/drivers/media/video/tuner-3036.c @@ -165,7 +165,7 @@ static int tuner_probe(struct i2c_adapter *adap) { this_adap = 0; - if (adap->id == (I2C_ALGO_BIT | I2C_HW_B_LP)) + if (adap->id == I2C_HW_B_LP) return i2c_probe(adap, &addr_data, tuner_attach); return 0; } diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index f42a1efa8fcf..9420b5f52915 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1098,7 +1098,7 @@ static int tda8425_initialize(struct CHIPSTATE *chip) /* extern */ TDA8425_S1_CH1, /* intern */ TDA8425_S1_OFF, /* off */ TDA8425_S1_OFF, /* on */ TDA8425_S1_CH2}; - if (chip->c.adapter->id == (I2C_ALGO_BIT | I2C_HW_B_RIVA)) { + if (chip->c.adapter->id == I2C_HW_B_RIVA) { memcpy (desc->inputmap, inputmap, sizeof (inputmap)); } return 0; @@ -1555,8 +1555,8 @@ static int chip_probe(struct i2c_adapter *adap) return i2c_probe(adap, &addr_data, chip_attach); #else switch (adap->id) { - case I2C_ALGO_BIT | I2C_HW_B_BT848: - case I2C_ALGO_BIT | I2C_HW_B_RIVA: + case I2C_HW_B_BT848: + case I2C_HW_B_RIVA: case I2C_ALGO_SAA7134: return i2c_probe(adap, &addr_data, chip_attach); } diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c index 127ec38ebd60..3c3356a01cc6 100644 --- a/drivers/media/video/tveeprom.c +++ b/drivers/media/video/tveeprom.c @@ -534,7 +534,7 @@ static int tveeprom_attach_adapter (struct i2c_adapter *adapter) { dprintk(1,"%s: id 0x%x\n",__FUNCTION__,adapter->id); - if (adapter->id != (I2C_ALGO_BIT | I2C_HW_B_BT848)) + if (adapter->id != I2C_HW_B_BT848) return 0; return i2c_probe(adapter, &addr_data, tveeprom_detect_client); } diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c index 51b99cdbf29e..f0635b19de30 100644 --- a/drivers/media/video/tvmixer.c +++ b/drivers/media/video/tvmixer.c @@ -276,9 +276,9 @@ static int tvmixer_clients(struct i2c_client *client) #else /* TV card ??? */ switch (client->adapter->id) { - case I2C_ALGO_BIT | I2C_HW_SMBUS_VOODOO3: - case I2C_ALGO_BIT | I2C_HW_B_BT848: - case I2C_ALGO_BIT | I2C_HW_B_RIVA: + case I2C_HW_SMBUS_VOODOO3: + case I2C_HW_B_BT848: + case I2C_HW_B_RIVA: /* ok, have a look ... */ break; default: diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index 908cfdf17039..83e8dd627154 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -1579,7 +1579,7 @@ static int w9968cf_i2c_init(struct w9968cf_device* cam) }; static struct i2c_adapter adap = { - .id = I2C_ALGO_SMBUS | I2C_HW_SMBUS_W9968CF, + .id = I2C_HW_SMBUS_W9968CF, .class = I2C_CLASS_CAM_DIGITAL, .owner = THIS_MODULE, .client_register = w9968cf_i2c_attach_inform, diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c index 67f85344f0cc..ad60bbb16cdf 100644 --- a/drivers/video/matrox/matroxfb_maven.c +++ b/drivers/video/matrox/matroxfb_maven.c @@ -1271,7 +1271,7 @@ ERROR0:; } static int maven_attach_adapter(struct i2c_adapter* adapter) { - if (adapter->id == (I2C_ALGO_BIT | I2C_HW_B_G400)) + if (adapter->id == I2C_HW_B_G400) return i2c_probe(adapter, &addr_data, &maven_detect_client); return 0; } diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 33f08258f22b..5b72f664cc21 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -218,103 +218,103 @@ */ /* --- Bit algorithm adapters */ -#define I2C_HW_B_LP 0x00 /* Parallel port Philips style adapter */ -#define I2C_HW_B_LPC 0x01 /* Parallel port, over control reg. */ -#define I2C_HW_B_SER 0x02 /* Serial line interface */ -#define I2C_HW_B_ELV 0x03 /* ELV Card */ -#define I2C_HW_B_VELLE 0x04 /* Vellemann K8000 */ -#define I2C_HW_B_BT848 0x05 /* BT848 video boards */ -#define I2C_HW_B_WNV 0x06 /* Winnov Videums */ -#define I2C_HW_B_VIA 0x07 /* Via vt82c586b */ -#define I2C_HW_B_HYDRA 0x08 /* Apple Hydra Mac I/O */ -#define I2C_HW_B_G400 0x09 /* Matrox G400 */ -#define I2C_HW_B_I810 0x0a /* Intel I810 */ -#define I2C_HW_B_VOO 0x0b /* 3dfx Voodoo 3 / Banshee */ -#define I2C_HW_B_PPORT 0x0c /* Primitive parallel port adapter */ -#define I2C_HW_B_SAVG 0x0d /* Savage 4 */ -#define I2C_HW_B_SCX200 0x0e /* Nat'l Semi SCx200 I2C */ -#define I2C_HW_B_RIVA 0x10 /* Riva based graphics cards */ -#define I2C_HW_B_IOC 0x11 /* IOC bit-wiggling */ -#define I2C_HW_B_TSUNA 0x12 /* DEC Tsunami chipset */ -#define I2C_HW_B_FRODO 0x13 /* 2d3D, Inc. SA-1110 Development Board */ -#define I2C_HW_B_OMAHA 0x14 /* Omaha I2C interface (ARM) */ -#define I2C_HW_B_GUIDE 0x15 /* Guide bit-basher */ -#define I2C_HW_B_IXP2000 0x16 /* GPIO on IXP2000 systems */ -#define I2C_HW_B_IXP4XX 0x17 /* GPIO on IXP4XX systems */ -#define I2C_HW_B_S3VIA 0x18 /* S3Via ProSavage adapter */ -#define I2C_HW_B_ZR36067 0x19 /* Zoran-36057/36067 based boards */ -#define I2C_HW_B_PCILYNX 0x1a /* TI PCILynx I2C adapter */ -#define I2C_HW_B_CX2388x 0x1b /* connexant 2388x based tv cards */ +#define I2C_HW_B_LP 0x010000 /* Parallel port Philips style */ +#define I2C_HW_B_LPC 0x010001 /* Parallel port control reg. */ +#define I2C_HW_B_SER 0x010002 /* Serial line interface */ +#define I2C_HW_B_ELV 0x010003 /* ELV Card */ +#define I2C_HW_B_VELLE 0x010004 /* Vellemann K8000 */ +#define I2C_HW_B_BT848 0x010005 /* BT848 video boards */ +#define I2C_HW_B_WNV 0x010006 /* Winnov Videums */ +#define I2C_HW_B_VIA 0x010007 /* Via vt82c586b */ +#define I2C_HW_B_HYDRA 0x010008 /* Apple Hydra Mac I/O */ +#define I2C_HW_B_G400 0x010009 /* Matrox G400 */ +#define I2C_HW_B_I810 0x01000a /* Intel I810 */ +#define I2C_HW_B_VOO 0x01000b /* 3dfx Voodoo 3 / Banshee */ +#define I2C_HW_B_PPORT 0x01000c /* Primitive parallel port adapter */ +#define I2C_HW_B_SAVG 0x01000d /* Savage 4 */ +#define I2C_HW_B_SCX200 0x01000e /* Nat'l Semi SCx200 I2C */ +#define I2C_HW_B_RIVA 0x010010 /* Riva based graphics cards */ +#define I2C_HW_B_IOC 0x010011 /* IOC bit-wiggling */ +#define I2C_HW_B_TSUNA 0x010012 /* DEC Tsunami chipset */ +#define I2C_HW_B_FRODO 0x010013 /* 2d3D SA-1110 Development Board */ +#define I2C_HW_B_OMAHA 0x010014 /* Omaha I2C interface (ARM) */ +#define I2C_HW_B_GUIDE 0x010015 /* Guide bit-basher */ +#define I2C_HW_B_IXP2000 0x010016 /* GPIO on IXP2000 systems */ +#define I2C_HW_B_IXP4XX 0x010017 /* GPIO on IXP4XX systems */ +#define I2C_HW_B_S3VIA 0x010018 /* S3Via ProSavage adapter */ +#define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ +#define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ +#define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ /* --- PCF 8584 based algorithms */ -#define I2C_HW_P_LP 0x00 /* Parallel port interface */ -#define I2C_HW_P_ISA 0x01 /* generic ISA Bus inteface card */ -#define I2C_HW_P_ELEK 0x02 /* Elektor ISA Bus inteface card */ +#define I2C_HW_P_LP 0x020000 /* Parallel port interface */ +#define I2C_HW_P_ISA 0x020001 /* generic ISA Bus inteface card */ +#define I2C_HW_P_ELEK 0x020002 /* Elektor ISA Bus inteface card */ /* --- PCA 9564 based algorithms */ -#define I2C_HW_A_ISA 0x00 /* generic ISA Bus interface card */ +#define I2C_HW_A_ISA 0x1a0000 /* generic ISA Bus interface card */ /* --- ACPI Embedded controller algorithms */ -#define I2C_HW_ACPI_EC 0x00 +#define I2C_HW_ACPI_EC 0x1f0000 /* --- MPC824x PowerPC adapters */ -#define I2C_HW_MPC824X 0x00 /* Motorola 8240 / 8245 */ +#define I2C_HW_MPC824X 0x100001 /* Motorola 8240 / 8245 */ /* --- MPC8xx PowerPC adapters */ -#define I2C_HW_MPC8XX_EPON 0x00 /* Eponymous MPC8xx I2C adapter */ +#define I2C_HW_MPC8XX_EPON 0x110000 /* Eponymous MPC8xx I2C adapter */ /* --- ITE based algorithms */ -#define I2C_HW_I_IIC 0x00 /* controller on the ITE */ +#define I2C_HW_I_IIC 0x080000 /* controller on the ITE */ /* --- PowerPC on-chip adapters */ -#define I2C_HW_OCP 0x00 /* IBM on-chip I2C adapter */ +#define I2C_HW_OCP 0x120000 /* IBM on-chip I2C adapter */ /* --- Broadcom SiByte adapters */ -#define I2C_HW_SIBYTE 0x00 +#define I2C_HW_SIBYTE 0x150000 /* --- SGI adapters */ -#define I2C_HW_SGI_VINO 0x00 -#define I2C_HW_SGI_MACE 0x01 +#define I2C_HW_SGI_VINO 0x160000 +#define I2C_HW_SGI_MACE 0x160001 /* --- XSCALE on-chip adapters */ -#define I2C_HW_IOP3XX 0x00 +#define I2C_HW_IOP3XX 0x140000 /* --- Au1550 PSC adapters adapters */ -#define I2C_HW_AU1550_PSC 0x00 +#define I2C_HW_AU1550_PSC 0x1b0000 /* --- SMBus only adapters */ -#define I2C_HW_SMBUS_PIIX4 0x00 -#define I2C_HW_SMBUS_ALI15X3 0x01 -#define I2C_HW_SMBUS_VIA2 0x02 -#define I2C_HW_SMBUS_VOODOO3 0x03 -#define I2C_HW_SMBUS_I801 0x04 -#define I2C_HW_SMBUS_AMD756 0x05 -#define I2C_HW_SMBUS_SIS5595 0x06 -#define I2C_HW_SMBUS_ALI1535 0x07 -#define I2C_HW_SMBUS_SIS630 0x08 -#define I2C_HW_SMBUS_SIS96X 0x09 -#define I2C_HW_SMBUS_AMD8111 0x0a -#define I2C_HW_SMBUS_SCX200 0x0b -#define I2C_HW_SMBUS_NFORCE2 0x0c -#define I2C_HW_SMBUS_W9968CF 0x0d -#define I2C_HW_SMBUS_OV511 0x0e /* OV511(+) USB 1.1 webcam ICs */ -#define I2C_HW_SMBUS_OV518 0x0f /* OV518(+) USB 1.1 webcam ICs */ -#define I2C_HW_SMBUS_OV519 0x10 /* OV519 USB 1.1 webcam IC */ -#define I2C_HW_SMBUS_OVFX2 0x11 /* Cypress/OmniVision FX2 webcam */ +#define I2C_HW_SMBUS_PIIX4 0x040000 +#define I2C_HW_SMBUS_ALI15X3 0x040001 +#define I2C_HW_SMBUS_VIA2 0x040002 +#define I2C_HW_SMBUS_VOODOO3 0x040003 +#define I2C_HW_SMBUS_I801 0x040004 +#define I2C_HW_SMBUS_AMD756 0x040005 +#define I2C_HW_SMBUS_SIS5595 0x040006 +#define I2C_HW_SMBUS_ALI1535 0x040007 +#define I2C_HW_SMBUS_SIS630 0x040008 +#define I2C_HW_SMBUS_SIS96X 0x040009 +#define I2C_HW_SMBUS_AMD8111 0x04000a +#define I2C_HW_SMBUS_SCX200 0x04000b +#define I2C_HW_SMBUS_NFORCE2 0x04000c +#define I2C_HW_SMBUS_W9968CF 0x04000d +#define I2C_HW_SMBUS_OV511 0x04000e /* OV511(+) USB 1.1 webcam ICs */ +#define I2C_HW_SMBUS_OV518 0x04000f /* OV518(+) USB 1.1 webcam ICs */ +#define I2C_HW_SMBUS_OV519 0x040010 /* OV519 USB 1.1 webcam IC */ +#define I2C_HW_SMBUS_OVFX2 0x040011 /* Cypress/OmniVision FX2 webcam */ /* --- ISA pseudo-adapter */ -#define I2C_HW_ISA 0x00 +#define I2C_HW_ISA 0x050000 /* --- IPMI pseudo-adapter */ -#define I2C_HW_IPMI 0x00 +#define I2C_HW_IPMI 0x0b0000 /* --- IPMB adapter */ -#define I2C_HW_IPMB 0x00 +#define I2C_HW_IPMB 0x0c0000 /* --- MCP107 adapter */ -#define I2C_HW_MPC107 0x00 +#define I2C_HW_MPC107 0x0d0000 /* --- Marvell mv64xxx i2c adapter */ -#define I2C_HW_MV64XXX 0x00 +#define I2C_HW_MV64XXX 0x190000 #endif /* LINUX_I2C_ID_H */ diff --git a/include/linux/i2c-isa.h b/include/linux/i2c-isa.h index 54c27e88d101..67e3598c4cec 100644 --- a/include/linux/i2c-isa.h +++ b/include/linux/i2c-isa.h @@ -29,7 +29,7 @@ extern int i2c_isa_del_driver(struct i2c_driver *driver); /* Detect whether we are on the isa bus. This is only useful to hybrid (i2c+isa) drivers. */ #define i2c_is_isa_adapter(adapptr) \ - ((adapptr)->id == (I2C_ALGO_ISA | I2C_HW_ISA)) + ((adapptr)->id == I2C_HW_ISA) #define i2c_is_isa_client(clientptr) \ i2c_is_isa_adapter((clientptr)->adapter) -- cgit v1.2.3 From 1684a984303abbfc39aa8b59b0fe825c717811a9 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:51:10 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.id (6/7) In theory, there should be no more users of I2C_ALGO_* at this point. However, it happens that several drivers were using I2C_ALGO_* for adapter ids, so we need to correct these before we can get rid of all the I2C_ALGO_* definitions. Note that this also fixes a bug in media/video/tvaudio.c: /* don't attach on saa7146 based cards, because dedicated drivers are used */ if ((adap->id & I2C_ALGO_SAA7146)) return 0; This test was plain broken, as it would succeed for many more adapters than just the saa7146: any those id would share at least one bit with the saa7146 id. We are really lucky that the few other adapters we want this driver to work with did not fulfill that condition. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/i2c/busses/i2c-keywest.c | 1 - drivers/i2c/busses/scx200_acb.c | 2 +- drivers/media/common/saa7146_i2c.c | 2 +- drivers/media/dvb/b2c2/flexcop-i2c.c | 1 - drivers/media/dvb/dvb-usb/dvb-usb-i2c.c | 1 - drivers/media/dvb/pluto2/pluto2.c | 1 - drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c | 1 - drivers/media/video/ir-kbd-i2c.c | 2 +- drivers/media/video/saa7134/saa7134-i2c.c | 2 +- drivers/media/video/tda9840.c | 2 +- drivers/media/video/tda9887.c | 2 +- drivers/media/video/tea6415c.c | 2 +- drivers/media/video/tea6420.c | 2 +- drivers/media/video/tvaudio.c | 4 ++-- drivers/video/aty/radeon_i2c.c | 2 +- drivers/video/nvidia/nv_i2c.c | 3 +-- drivers/video/riva/rivafb-i2c.c | 3 +-- drivers/video/savage/savagefb-i2c.c | 3 +-- include/linux/i2c-id.h | 7 +++++++ include/media/id.h | 5 ----- 20 files changed, 21 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-keywest.c b/drivers/i2c/busses/i2c-keywest.c index 5254d2db282c..e60ed6f49a62 100644 --- a/drivers/i2c/busses/i2c-keywest.c +++ b/drivers/i2c/busses/i2c-keywest.c @@ -619,7 +619,6 @@ create_iface(struct device_node *np, struct device *dev) sprintf(chan->adapter.name, "%s %d", np->parent->name, i); chan->iface = iface; chan->chan_no = i; - chan->adapter.id = I2C_ALGO_SMBUS; chan->adapter.algo = &keywest_algorithm; chan->adapter.algo_data = NULL; chan->adapter.client_register = NULL; diff --git a/drivers/i2c/busses/scx200_acb.c b/drivers/i2c/busses/scx200_acb.c index 46b9a7594c99..a1d580e05361 100644 --- a/drivers/i2c/busses/scx200_acb.c +++ b/drivers/i2c/busses/scx200_acb.c @@ -454,7 +454,7 @@ static int __init scx200_acb_create(int base, int index) i2c_set_adapdata(adapter, iface); snprintf(adapter->name, I2C_NAME_SIZE, "SCx200 ACB%d", index); adapter->owner = THIS_MODULE; - adapter->id = I2C_ALGO_SMBUS; + adapter->id = I2C_HW_SMBUS_SCX200; adapter->algo = &scx200_acb_algorithm; adapter->class = I2C_CLASS_HWMON; diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c index e413ee7f267a..6284894505c6 100644 --- a/drivers/media/common/saa7146_i2c.c +++ b/drivers/media/common/saa7146_i2c.c @@ -410,7 +410,7 @@ int saa7146_i2c_adapter_prepare(struct saa7146_dev *dev, struct i2c_adapter *i2c #endif i2c_adapter->algo = &saa7146_algo; i2c_adapter->algo_data = NULL; - i2c_adapter->id = I2C_ALGO_SAA7146; + i2c_adapter->id = I2C_HW_SAA7146; i2c_adapter->timeout = SAA7146_I2C_TIMEOUT; i2c_adapter->retries = SAA7146_I2C_RETRIES; } diff --git a/drivers/media/dvb/b2c2/flexcop-i2c.c b/drivers/media/dvb/b2c2/flexcop-i2c.c index 848910ff3c9c..56495cb6cd02 100644 --- a/drivers/media/dvb/b2c2/flexcop-i2c.c +++ b/drivers/media/dvb/b2c2/flexcop-i2c.c @@ -190,7 +190,6 @@ int flexcop_i2c_init(struct flexcop_device *fc) fc->i2c_adap.class = I2C_CLASS_TV_DIGITAL; fc->i2c_adap.algo = &flexcop_algo; fc->i2c_adap.algo_data = NULL; - fc->i2c_adap.id = I2C_ALGO_BIT; if ((ret = i2c_add_adapter(&fc->i2c_adap)) < 0) return ret; diff --git a/drivers/media/dvb/dvb-usb/dvb-usb-i2c.c b/drivers/media/dvb/dvb-usb/dvb-usb-i2c.c index 9f0a8d90d146..da970947dfc7 100644 --- a/drivers/media/dvb/dvb-usb/dvb-usb-i2c.c +++ b/drivers/media/dvb/dvb-usb/dvb-usb-i2c.c @@ -27,7 +27,6 @@ int dvb_usb_i2c_init(struct dvb_usb_device *d) #endif d->i2c_adap.algo = d->props.i2c_algo; d->i2c_adap.algo_data = NULL; - d->i2c_adap.id = I2C_ALGO_BIT; i2c_set_adapdata(&d->i2c_adap, d); diff --git a/drivers/media/dvb/pluto2/pluto2.c b/drivers/media/dvb/pluto2/pluto2.c index 706e0bcb5ede..85b437bbddcd 100644 --- a/drivers/media/dvb/pluto2/pluto2.c +++ b/drivers/media/dvb/pluto2/pluto2.c @@ -633,7 +633,6 @@ static int __devinit pluto2_probe(struct pci_dev *pdev, i2c_set_adapdata(&pluto->i2c_adap, pluto); strcpy(pluto->i2c_adap.name, DRIVER_NAME); pluto->i2c_adap.owner = THIS_MODULE; - pluto->i2c_adap.id = I2C_ALGO_BIT; pluto->i2c_adap.class = I2C_CLASS_TV_DIGITAL; pluto->i2c_adap.dev.parent = &pdev->dev; pluto->i2c_adap.algo_data = &pluto->i2c_bit; diff --git a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c index 11afec52f31f..7daf7b1598a0 100644 --- a/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c +++ b/drivers/media/dvb/ttusb-budget/dvb-ttusb-budget.c @@ -1523,7 +1523,6 @@ static int ttusb_probe(struct usb_interface *intf, const struct usb_device_id *i #endif ttusb->i2c_adap.algo = &ttusb_dec_algo; ttusb->i2c_adap.algo_data = NULL; - ttusb->i2c_adap.id = I2C_ALGO_BIT; result = i2c_add_adapter(&ttusb->i2c_adap); if (result) { diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index c2f32d522668..feccf08bc0e3 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -432,7 +432,7 @@ static int ir_probe(struct i2c_adapter *adap) case I2C_HW_B_BT848: probe = probe_bttv; break; - case I2C_ALGO_SAA7134: + case I2C_HW_SAA7134: probe = probe_saa7134; break; } diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c index 0bb1073d94bd..238ac3fdeb4a 100644 --- a/drivers/media/video/saa7134/saa7134-i2c.c +++ b/drivers/media/video/saa7134/saa7134-i2c.c @@ -381,7 +381,7 @@ static struct i2c_adapter saa7134_adap_template = { .class = I2C_CLASS_TV_ANALOG, #endif I2C_DEVNAME("saa7134"), - .id = I2C_ALGO_SAA7134, + .id = I2C_HW_SAA7134, .algo = &saa7134_algo, .client_register = attach_inform, }; diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c index c29bdfc3244e..c0594e09d63e 100644 --- a/drivers/media/video/tda9840.c +++ b/drivers/media/video/tda9840.c @@ -205,7 +205,7 @@ static int detect(struct i2c_adapter *adapter, int address, int kind) static int attach(struct i2c_adapter *adapter) { /* let's see whether this is a know adapter we can attach to */ - if (adapter->id != I2C_ALGO_SAA7146) { + if (adapter->id != I2C_HW_SAA7146) { dprintk("refusing to probe on unknown adapter [name='%s',id=0x%x]\n", adapter->name, adapter->id); return -ENODEV; } diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index f1b1bb5acf70..abb96ce464c7 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -620,7 +620,7 @@ static int tda9887_probe(struct i2c_adapter *adap) switch (adap->id) { case I2C_HW_B_BT848: case I2C_HW_B_RIVA: - case I2C_ALGO_SAA7134: + case I2C_HW_SAA7134: return i2c_probe(adap, &addr_data, tda9887_attach); break; } diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c index b44db8a7b94d..8334d6ccd0a0 100644 --- a/drivers/media/video/tea6415c.c +++ b/drivers/media/video/tea6415c.c @@ -86,7 +86,7 @@ static int detect(struct i2c_adapter *adapter, int address, int kind) static int attach(struct i2c_adapter *adapter) { /* let's see whether this is a know adapter we can attach to */ - if (adapter->id != I2C_ALGO_SAA7146) { + if (adapter->id != I2C_HW_SAA7146) { dprintk("refusing to probe on unknown adapter [name='%s',id=0x%x]\n", adapter->name, adapter->id); return -ENODEV; } diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c index 48d4db7d507b..9d09d2d23c2e 100644 --- a/drivers/media/video/tea6420.c +++ b/drivers/media/video/tea6420.c @@ -135,7 +135,7 @@ static int tea6420_detect(struct i2c_adapter *adapter, int address, int kind) static int attach(struct i2c_adapter *adapter) { /* let's see whether this is a know adapter we can attach to */ - if (adapter->id != I2C_ALGO_SAA7146) { + if (adapter->id != I2C_HW_SAA7146) { dprintk("refusing to probe on unknown adapter [name='%s',id=0x%x]\n", adapter->name, adapter->id); return -ENODEV; } diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index 9420b5f52915..f6aab89593ea 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -1548,7 +1548,7 @@ static int chip_probe(struct i2c_adapter *adap) { /* don't attach on saa7146 based cards, because dedicated drivers are used */ - if ((adap->id & I2C_ALGO_SAA7146)) + if (adap->id == I2C_HW_SAA7146) return 0; #ifdef I2C_CLASS_TV_ANALOG if (adap->class & I2C_CLASS_TV_ANALOG) @@ -1557,7 +1557,7 @@ static int chip_probe(struct i2c_adapter *adap) switch (adap->id) { case I2C_HW_B_BT848: case I2C_HW_B_RIVA: - case I2C_ALGO_SAA7134: + case I2C_HW_SAA7134: return i2c_probe(adap, &addr_data, chip_attach); } #endif diff --git a/drivers/video/aty/radeon_i2c.c b/drivers/video/aty/radeon_i2c.c index 762244164c81..a9d0414e4655 100644 --- a/drivers/video/aty/radeon_i2c.c +++ b/drivers/video/aty/radeon_i2c.c @@ -75,7 +75,7 @@ static int radeon_setup_i2c_bus(struct radeon_i2c_chan *chan, const char *name) strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_ALGO_ATI; + chan->adapter.id = I2C_HW_B_RADEON; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->rinfo->pdev->dev; chan->algo.setsda = radeon_gpio_setsda; diff --git a/drivers/video/nvidia/nv_i2c.c b/drivers/video/nvidia/nv_i2c.c index 3757c1407c19..1a91bffdda26 100644 --- a/drivers/video/nvidia/nv_i2c.c +++ b/drivers/video/nvidia/nv_i2c.c @@ -90,14 +90,13 @@ static int nvidia_gpio_getsda(void *data) return val; } -#define I2C_ALGO_NVIDIA 0x0e0000 static int nvidia_setup_i2c_bus(struct nvidia_i2c_chan *chan, const char *name) { int rc; strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_ALGO_NVIDIA; + chan->adapter.id = I2C_HW_B_NVIDIA; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pci_dev->dev; chan->algo.setsda = nvidia_gpio_setsda; diff --git a/drivers/video/riva/rivafb-i2c.c b/drivers/video/riva/rivafb-i2c.c index da1334dfd51d..77151d8e0766 100644 --- a/drivers/video/riva/rivafb-i2c.c +++ b/drivers/video/riva/rivafb-i2c.c @@ -92,14 +92,13 @@ static int riva_gpio_getsda(void* data) return val; } -#define I2C_ALGO_RIVA 0x0e0000 static int riva_setup_i2c_bus(struct riva_i2c_chan *chan, const char *name) { int rc; strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_ALGO_RIVA; + chan->adapter.id = I2C_HW_B_RIVA; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pdev->dev; chan->algo.setsda = riva_gpio_setsda; diff --git a/drivers/video/savage/savagefb-i2c.c b/drivers/video/savage/savagefb-i2c.c index 024a0cecff15..847698b5cfe7 100644 --- a/drivers/video/savage/savagefb-i2c.c +++ b/drivers/video/savage/savagefb-i2c.c @@ -137,7 +137,6 @@ static int prosavage_gpio_getsda(void* data) return (0 != (GET_CR_DATA(chan->ioaddr) & PROSAVAGE_I2C_SDA_IN)); } -#define I2C_ALGO_SAVAGE 0x0f0000 static int savage_setup_i2c_bus(struct savagefb_i2c_chan *chan, const char *name) { @@ -147,7 +146,7 @@ static int savage_setup_i2c_bus(struct savagefb_i2c_chan *chan, if (add_bus && chan->par) { strcpy(chan->adapter.name, name); chan->adapter.owner = THIS_MODULE; - chan->adapter.id = I2C_ALGO_SAVAGE; + chan->adapter.id = I2C_HW_B_SAVAGE; chan->adapter.algo_data = &chan->algo; chan->adapter.dev.parent = &chan->par->pcidev->dev; chan->algo.udelay = 40; diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index 5b72f664cc21..d044e738d383 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -245,6 +245,9 @@ #define I2C_HW_B_ZR36067 0x010019 /* Zoran-36057/36067 based boards */ #define I2C_HW_B_PCILYNX 0x01001a /* TI PCILynx I2C adapter */ #define I2C_HW_B_CX2388x 0x01001b /* connexant 2388x based tv cards */ +#define I2C_HW_B_NVIDIA 0x01001c /* nvidia framebuffer driver */ +#define I2C_HW_B_SAVAGE 0x01001d /* savage framebuffer driver */ +#define I2C_HW_B_RADEON 0x01001e /* radeon framebuffer driver */ /* --- PCF 8584 based algorithms */ #define I2C_HW_P_LP 0x020000 /* Parallel port interface */ @@ -317,4 +320,8 @@ /* --- Marvell mv64xxx i2c adapter */ #define I2C_HW_MV64XXX 0x190000 +/* --- Miscellaneous adapters */ +#define I2C_HW_SAA7146 0x060000 /* SAA7146 video decoder bus */ +#define I2C_HW_SAA7134 0x090000 /* SAA7134 video decoder bus */ + #endif /* LINUX_I2C_ID_H */ diff --git a/include/media/id.h b/include/media/id.h index a39a6423914b..801ddef301aa 100644 --- a/include/media/id.h +++ b/include/media/id.h @@ -34,8 +34,3 @@ #ifndef I2C_DRIVERID_SAA6752HS # define I2C_DRIVERID_SAA6752HS I2C_DRIVERID_EXP0+8 #endif - -/* algorithms */ -#ifndef I2C_ALGO_SAA7134 -# define I2C_ALGO_SAA7134 0x090000 -#endif -- cgit v1.2.3 From c2459cf257106cea5adbc83f084b76d0030eb700 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 11 Aug 2005 23:52:35 +0200 Subject: [PATCH] I2C: Kill i2c_algorithm.id (7/7) The I2C_ALGO_* constants have no more users, delete them. Also update the comments in i2c-id.h so that they reflect the current state of the file. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c-id.h | 57 +------------------------------------------------- 1 file changed, 1 insertion(+), 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h index d044e738d383..44f30876a1c9 100644 --- a/include/linux/i2c-id.h +++ b/include/linux/i2c-id.h @@ -1,6 +1,6 @@ /* ------------------------------------------------------------------------- */ /* */ -/* i2c.h - definitions for the i2c-bus interface */ +/* i2c-id.h - identifier values for i2c drivers and adapters */ /* */ /* ------------------------------------------------------------------------- */ /* Copyright (C) 1995-1999 Simon G. Vogl @@ -23,16 +23,6 @@ #ifndef LINUX_I2C_ID_H #define LINUX_I2C_ID_H -/* - * This file is part of the i2c-bus package and contains the identifier - * values for drivers, adapters and other folk populating these serial - * worlds. - * - * These will change often (i.e. additions) , therefore this has been - * separated from the functional interface definitions of the i2c api. - * - */ - /* * ---- Driver types ----------------------------------------------------- * device id name + number function description, i2c address(es) @@ -170,51 +160,6 @@ /* * ---- Adapter types ---------------------------------------------------- - * - * First, we distinguish between several algorithms to access the hardware - * interface types, as a PCF 8584 needs other care than a bit adapter. - */ - -#define I2C_ALGO_NONE 0x000000 -#define I2C_ALGO_BIT 0x010000 /* bit style adapters */ -#define I2C_ALGO_PCF 0x020000 /* PCF 8584 style adapters */ -#define I2C_ALGO_ATI 0x030000 /* ATI video card */ -#define I2C_ALGO_SMBUS 0x040000 -#define I2C_ALGO_ISA 0x050000 /* lm_sensors ISA pseudo-adapter */ -#define I2C_ALGO_SAA7146 0x060000 /* SAA 7146 video decoder bus */ -#define I2C_ALGO_ACB 0x070000 /* ACCESS.bus algorithm */ -#define I2C_ALGO_IIC 0x080000 /* ITE IIC bus */ -#define I2C_ALGO_SAA7134 0x090000 -#define I2C_ALGO_MPC824X 0x0a0000 /* Motorola 8240 / 8245 */ -#define I2C_ALGO_IPMI 0x0b0000 /* IPMI dummy adapter */ -#define I2C_ALGO_IPMB 0x0c0000 /* IPMB adapter */ -#define I2C_ALGO_MPC107 0x0d0000 -#define I2C_ALGO_EC 0x100000 /* ACPI embedded controller */ - -#define I2C_ALGO_MPC8XX 0x110000 /* MPC8xx PowerPC I2C algorithm */ -#define I2C_ALGO_OCP 0x120000 /* IBM or otherwise On-chip I2C algorithm */ -#define I2C_ALGO_BITHS 0x130000 /* enhanced bit style adapters */ -#define I2C_ALGO_IOP3XX 0x140000 /* XSCALE IOP3XX On-chip I2C alg */ -#define I2C_ALGO_SIBYTE 0x150000 /* Broadcom SiByte SOCs */ -#define I2C_ALGO_SGI 0x160000 /* SGI algorithm */ - -#define I2C_ALGO_USB 0x170000 /* USB algorithm */ -#define I2C_ALGO_VIRT 0x180000 /* Virtual bus adapter */ - -#define I2C_ALGO_MV64XXX 0x190000 /* Marvell mv64xxx i2c ctlr */ -#define I2C_ALGO_PCA 0x1a0000 /* PCA 9564 style adapters */ -#define I2C_ALGO_AU1550 0x1b0000 /* Au1550 PSC algorithm */ - -#define I2C_ALGO_EXP 0x800000 /* experimental */ - -#define I2C_ALGO_MASK 0xff0000 /* Mask for algorithms */ -#define I2C_ALGO_SHIFT 0x10 /* right shift to get index values */ - -#define I2C_HW_ADAPS 0x10000 /* # adapter types */ -#define I2C_HW_MASK 0xffff - - -/* hw specific modules that are defined per algorithm layer */ /* --- Bit algorithm adapters */ -- cgit v1.2.3 From 020789e9cb688ac8b15a9950d25fe45492b23398 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 13 Aug 2005 13:04:32 +0200 Subject: [PATCH] I2C: Outdated i2c_adapter comment Delete an outdated comment about i2c_algorithm.id being computed from algo->id. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index af4983b488b6..233c153b12b1 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -219,8 +219,7 @@ struct i2c_algorithm { */ struct i2c_adapter { struct module *owner; - unsigned int id;/* == is algo->id | hwdep.struct->id, */ - /* for registered values see below */ + unsigned int id; unsigned int class; struct i2c_algorithm *algo;/* the algorithm to access the bus */ void *algo_data; -- cgit v1.2.3 From fae91e72b79ba9a21f0ce7551a1fd7e8984c85a6 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 15 Aug 2005 19:57:04 +0200 Subject: [PATCH] I2C: Drop I2C_DEVNAME and i2c_clientname I2C_DEVNAME and i2c_clientname were introduced in 2.5.68 [1] to help media/video driver authors who wanted their code to be compatible with both Linux 2.4 and 2.6. The cause of the incompatibility has gone since [2], so I think we can get rid of them, as they tend to make the code harder to read and longer to preprocess/compile for no more benefit. I'd hope nobody seriously attempts to keep media/video driver compatible across Linux trees anymore, BTW. [1] http://marc.theaimsgroup.com/?l=linux-kernel&m=104930186524598&w=2 [2] http://www.linuxhq.com/kernel/v2.6/0-test3/include/linux/i2c.h Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- drivers/media/video/bt832.c | 2 +- drivers/media/video/bttv-i2c.c | 8 ++--- drivers/media/video/cx88/cx88-i2c.c | 8 ++--- drivers/media/video/ir-kbd-i2c.c | 2 +- drivers/media/video/msp3400.c | 4 +-- drivers/media/video/ovcamchip/ovcamchip_core.c | 6 ++-- drivers/media/video/saa7134/saa6752hs.c | 2 +- drivers/media/video/saa7134/saa7134-i2c.c | 6 ++-- drivers/media/video/tda7432.c | 2 +- drivers/media/video/tda9840.c | 2 +- drivers/media/video/tda9875.c | 2 +- drivers/media/video/tda9887.c | 2 +- drivers/media/video/tea6415c.c | 2 +- drivers/media/video/tea6420.c | 2 +- drivers/media/video/tuner-core.c | 2 +- drivers/media/video/tvaudio.c | 41 ++++++++++++-------------- drivers/media/video/tvmixer.c | 8 ++--- drivers/media/video/zoran_card.c | 2 +- drivers/usb/media/w9968cf.c | 8 ++--- include/linux/i2c.h | 7 ----- 20 files changed, 53 insertions(+), 65 deletions(-) (limited to 'include/linux') diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c index 67ffed8a7fea..76c1b63ebdf2 100644 --- a/drivers/media/video/bt832.c +++ b/drivers/media/video/bt832.c @@ -241,7 +241,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("bt832"), + .name = "bt832", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/bttv-i2c.c b/drivers/media/video/bttv-i2c.c index 1db95f75ac68..706dc48df962 100644 --- a/drivers/media/video/bttv-i2c.c +++ b/drivers/media/video/bttv-i2c.c @@ -109,7 +109,7 @@ static struct i2c_adapter bttv_i2c_adap_sw_template = { #ifdef I2C_CLASS_TV_ANALOG .class = I2C_CLASS_TV_ANALOG, #endif - I2C_DEVNAME("bt848"), + .name = "bt848", .id = I2C_HW_B_BT848, .client_register = attach_inform, }; @@ -280,7 +280,7 @@ static struct i2c_adapter bttv_i2c_adap_hw_template = { #ifdef I2C_CLASS_TV_ANALOG .class = I2C_CLASS_TV_ANALOG, #endif - I2C_DEVNAME("bt878"), + .name = "bt878", .id = I2C_HW_B_BT848 /* FIXME */, .algo = &bttv_algo, .client_register = attach_inform, @@ -296,7 +296,7 @@ static int attach_inform(struct i2c_client *client) if (bttv_debug) printk(KERN_DEBUG "bttv%d: %s i2c attach [addr=0x%x,client=%s]\n", btv->c.nr,client->driver->name,client->addr, - i2c_clientname(client)); + client->name); if (!client->driver->command) return 0; @@ -324,7 +324,7 @@ void bttv_call_i2c_clients(struct bttv *btv, unsigned int cmd, void *arg) } static struct i2c_client bttv_i2c_client_template = { - I2C_DEVNAME("bttv internal"), + .name = "bttv internal", }; diff --git a/drivers/media/video/cx88/cx88-i2c.c b/drivers/media/video/cx88/cx88-i2c.c index a628a55299c6..7f598039e025 100644 --- a/drivers/media/video/cx88/cx88-i2c.c +++ b/drivers/media/video/cx88/cx88-i2c.c @@ -95,7 +95,7 @@ static int attach_inform(struct i2c_client *client) struct cx88_core *core = i2c_get_adapdata(client->adapter); dprintk(1, "%s i2c attach [addr=0x%x,client=%s]\n", - client->driver->name,client->addr,i2c_clientname(client)); + client->driver->name, client->addr, client->name); if (!client->driver->command) return 0; @@ -128,7 +128,7 @@ static int detach_inform(struct i2c_client *client) { struct cx88_core *core = i2c_get_adapdata(client->adapter); - dprintk(1, "i2c detach [client=%s]\n", i2c_clientname(client)); + dprintk(1, "i2c detach [client=%s]\n", client->name); return 0; } @@ -152,7 +152,7 @@ static struct i2c_algo_bit_data cx8800_i2c_algo_template = { /* ----------------------------------------------------------------------- */ static struct i2c_adapter cx8800_i2c_adap_template = { - I2C_DEVNAME("cx2388x"), + .name = "cx2388x", .owner = THIS_MODULE, .id = I2C_HW_B_CX2388x, .client_register = attach_inform, @@ -160,7 +160,7 @@ static struct i2c_adapter cx8800_i2c_adap_template = { }; static struct i2c_client cx8800_i2c_client_template = { - I2C_DEVNAME("cx88xx internal"), + .name = "cx88xx internal", }; static char *i2c_devs[128] = { diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index feccf08bc0e3..1e273ff3f956 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -308,7 +308,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("unset"), + .name = "unset", .driver = &driver }; diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c index e956234abf24..ca02f6f14b00 100644 --- a/drivers/media/video/msp3400.c +++ b/drivers/media/video/msp3400.c @@ -1437,7 +1437,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("(unset)"), + .name = "(unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; @@ -1509,7 +1509,7 @@ static int msp_attach(struct i2c_adapter *adap, int addr, int kind) } /* hello world :-) */ - printk(KERN_INFO "msp34xx: init: chip=%s",i2c_clientname(c)); + printk(KERN_INFO "msp34xx: init: chip=%s", c->name); if (HAVE_NICAM(msp)) printk(" +nicam"); if (HAVE_SIMPLE(msp)) diff --git a/drivers/media/video/ovcamchip/ovcamchip_core.c b/drivers/media/video/ovcamchip/ovcamchip_core.c index b98c64ab7c58..2de34ebf0673 100644 --- a/drivers/media/video/ovcamchip/ovcamchip_core.c +++ b/drivers/media/video/ovcamchip/ovcamchip_core.c @@ -314,7 +314,7 @@ static int ovcamchip_attach(struct i2c_adapter *adap) } memcpy(c, &client_template, sizeof *c); c->adapter = adap; - strcpy(i2c_clientname(c), "OV????"); + strcpy(c->name, "OV????"); ov = kmalloc(sizeof *ov, GFP_KERNEL); if (!ov) { @@ -328,7 +328,7 @@ static int ovcamchip_attach(struct i2c_adapter *adap) if (rc < 0) goto error; - strcpy(i2c_clientname(c), chip_names[ov->subtype]); + strcpy(c->name, chip_names[ov->subtype]); PDEBUG(1, "Camera chip detection complete"); @@ -421,7 +421,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("(unset)"), + .name = "(unset)", .driver = &driver, }; diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c index 79d05ea1b69b..382911c6ef22 100644 --- a/drivers/media/video/saa7134/saa6752hs.c +++ b/drivers/media/video/saa7134/saa6752hs.c @@ -598,7 +598,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("saa6752hs"), + .name = "saa6752hs", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c index 238ac3fdeb4a..eae6b529713f 100644 --- a/drivers/media/video/saa7134/saa7134-i2c.c +++ b/drivers/media/video/saa7134/saa7134-i2c.c @@ -334,7 +334,7 @@ static int attach_inform(struct i2c_client *client) struct tuner_setup tun_setup; d1printk( "%s i2c attach [addr=0x%x,client=%s]\n", - client->driver->name,client->addr,i2c_clientname(client)); + client->driver->name, client->addr, client->name); if (!client->driver->command) return 0; @@ -380,14 +380,14 @@ static struct i2c_adapter saa7134_adap_template = { #ifdef I2C_CLASS_TV_ANALOG .class = I2C_CLASS_TV_ANALOG, #endif - I2C_DEVNAME("saa7134"), + .name = "saa7134", .id = I2C_HW_SAA7134, .algo = &saa7134_algo, .client_register = attach_inform, }; static struct i2c_client saa7134_client_template = { - I2C_DEVNAME("saa7134 internal"), + .name = "saa7134 internal", }; /* ----------------------------------------------------------- */ diff --git a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c index bc6b6c0cea6d..255b6088ebf9 100644 --- a/drivers/media/video/tda7432.c +++ b/drivers/media/video/tda7432.c @@ -513,7 +513,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("tda7432"), + .name = "tda7432", .driver = &driver, }; diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c index c0594e09d63e..1794686612c6 100644 --- a/drivers/media/video/tda9840.c +++ b/drivers/media/video/tda9840.c @@ -231,7 +231,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("tda9840"), + .name = "tda9840", .driver = &driver, }; diff --git a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c index 3eaf2e1211e5..7e3dcdb262b0 100644 --- a/drivers/media/video/tda9875.c +++ b/drivers/media/video/tda9875.c @@ -384,7 +384,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("tda9875"), + .name = "tda9875", .driver = &driver, }; diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index abb96ce464c7..d60fc562aecd 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -793,7 +793,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("tda9887"), + .name = "tda9887", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c index 8334d6ccd0a0..ee3688348b66 100644 --- a/drivers/media/video/tea6415c.c +++ b/drivers/media/video/tea6415c.c @@ -200,7 +200,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("tea6415c"), + .name = "tea6415c", .driver = &driver, }; diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c index 9d09d2d23c2e..17975c19da5e 100644 --- a/drivers/media/video/tea6420.c +++ b/drivers/media/video/tea6420.c @@ -177,7 +177,7 @@ static struct i2c_driver driver = { }; static struct i2c_client client_template = { - I2C_DEVNAME("tea6420"), + .name = "tea6420", .driver = &driver, }; diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index a155e99a263b..3b1893c2ae3b 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -709,7 +709,7 @@ static struct i2c_driver driver = { }, }; static struct i2c_client client_template = { - I2C_DEVNAME("(tuner unset)"), + .name = "(tuner unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c index f6aab89593ea..258724b2d6d2 100644 --- a/drivers/media/video/tvaudio.c +++ b/drivers/media/video/tvaudio.c @@ -162,24 +162,23 @@ static int chip_write(struct CHIPSTATE *chip, int subaddr, int val) unsigned char buffer[2]; if (-1 == subaddr) { - dprintk("%s: chip_write: 0x%x\n", - i2c_clientname(&chip->c), val); + dprintk("%s: chip_write: 0x%x\n", chip->c.name, val); chip->shadow.bytes[1] = val; buffer[0] = val; if (1 != i2c_master_send(&chip->c,buffer,1)) { printk(KERN_WARNING "%s: I/O error (write 0x%x)\n", - i2c_clientname(&chip->c), val); + chip->c.name, val); return -1; } } else { dprintk("%s: chip_write: reg%d=0x%x\n", - i2c_clientname(&chip->c), subaddr, val); + chip->c.name, subaddr, val); chip->shadow.bytes[subaddr+1] = val; buffer[0] = subaddr; buffer[1] = val; if (2 != i2c_master_send(&chip->c,buffer,2)) { printk(KERN_WARNING "%s: I/O error (write reg%d=0x%x)\n", - i2c_clientname(&chip->c), subaddr, val); + chip->c.name, subaddr, val); return -1; } } @@ -203,11 +202,10 @@ static int chip_read(struct CHIPSTATE *chip) unsigned char buffer; if (1 != i2c_master_recv(&chip->c,&buffer,1)) { - printk(KERN_WARNING "%s: I/O error (read)\n", - i2c_clientname(&chip->c)); + printk(KERN_WARNING "%s: I/O error (read)\n", chip->c.name); return -1; } - dprintk("%s: chip_read: 0x%x\n",i2c_clientname(&chip->c),buffer); + dprintk("%s: chip_read: 0x%x\n", chip->c.name, buffer); return buffer; } @@ -222,12 +220,11 @@ static int chip_read2(struct CHIPSTATE *chip, int subaddr) write[0] = subaddr; if (2 != i2c_transfer(chip->c.adapter,msgs,2)) { - printk(KERN_WARNING "%s: I/O error (read2)\n", - i2c_clientname(&chip->c)); + printk(KERN_WARNING "%s: I/O error (read2)\n", chip->c.name); return -1; } dprintk("%s: chip_read2: reg%d=0x%x\n", - i2c_clientname(&chip->c),subaddr,read[0]); + chip->c.name, subaddr, read[0]); return read[0]; } @@ -240,7 +237,7 @@ static int chip_cmd(struct CHIPSTATE *chip, char *name, audiocmd *cmd) /* update our shadow register set; print bytes if (debug > 0) */ dprintk("%s: chip_cmd(%s): reg=%d, data:", - i2c_clientname(&chip->c),name,cmd->bytes[0]); + chip->c.name, name, cmd->bytes[0]); for (i = 1; i < cmd->count; i++) { dprintk(" 0x%x",cmd->bytes[i]); chip->shadow.bytes[i+cmd->bytes[0]] = cmd->bytes[i]; @@ -249,7 +246,7 @@ static int chip_cmd(struct CHIPSTATE *chip, char *name, audiocmd *cmd) /* send data to the chip */ if (cmd->count != i2c_master_send(&chip->c,cmd->bytes,cmd->count)) { - printk(KERN_WARNING "%s: I/O error (%s)\n", i2c_clientname(&chip->c), name); + printk(KERN_WARNING "%s: I/O error (%s)\n", chip->c.name, name); return -1; } return 0; @@ -274,9 +271,9 @@ static int chip_thread(void *data) struct CHIPSTATE *chip = data; struct CHIPDESC *desc = chiplist + chip->type; - daemonize("%s",i2c_clientname(&chip->c)); + daemonize("%s", chip->c.name); allow_signal(SIGTERM); - dprintk("%s: thread started\n", i2c_clientname(&chip->c)); + dprintk("%s: thread started\n", chip->c.name); for (;;) { add_wait_queue(&chip->wq, &wait); @@ -288,7 +285,7 @@ static int chip_thread(void *data) try_to_freeze(); if (chip->done || signal_pending(current)) break; - dprintk("%s: thread wakeup\n", i2c_clientname(&chip->c)); + dprintk("%s: thread wakeup\n", chip->c.name); /* don't do anything for radio or if mode != auto */ if (chip->norm == VIDEO_MODE_RADIO || chip->mode != 0) @@ -301,7 +298,7 @@ static int chip_thread(void *data) mod_timer(&chip->wt, jiffies+2*HZ); } - dprintk("%s: thread exiting\n", i2c_clientname(&chip->c)); + dprintk("%s: thread exiting\n", chip->c.name); complete_and_exit(&chip->texit, 0); return 0; } @@ -314,7 +311,7 @@ static void generic_checkmode(struct CHIPSTATE *chip) if (mode == chip->prevmode) return; - dprintk("%s: thread checkmode\n", i2c_clientname(&chip->c)); + dprintk("%s: thread checkmode\n", chip->c.name); chip->prevmode = mode; if (mode & VIDEO_SOUND_STEREO) @@ -1501,7 +1498,7 @@ static int chip_attach(struct i2c_adapter *adap, int addr, int kind) (desc->flags & CHIP_HAS_INPUTSEL) ? " audiomux" : ""); /* fill required data structures */ - strcpy(i2c_clientname(&chip->c),desc->name); + strcpy(chip->c.name, desc->name); chip->type = desc-chiplist; chip->shadow.count = desc->registers+1; chip->prevmode = -1; @@ -1538,7 +1535,7 @@ static int chip_attach(struct i2c_adapter *adap, int addr, int kind) chip->tpid = kernel_thread(chip_thread,(void *)chip,0); if (chip->tpid < 0) printk(KERN_WARNING "%s: kernel_thread() failed\n", - i2c_clientname(&chip->c)); + chip->c.name); wake_up_interruptible(&chip->wq); } return 0; @@ -1591,7 +1588,7 @@ static int chip_command(struct i2c_client *client, struct CHIPSTATE *chip = i2c_get_clientdata(client); struct CHIPDESC *desc = chiplist + chip->type; - dprintk("%s: chip_command 0x%x\n",i2c_clientname(&chip->c),cmd); + dprintk("%s: chip_command 0x%x\n", chip->c.name, cmd); switch (cmd) { case AUDC_SET_INPUT: @@ -1702,7 +1699,7 @@ static struct i2c_driver driver = { static struct i2c_client client_template = { - I2C_DEVNAME("(unset)"), + .name = "(unset)", .flags = I2C_CLIENT_ALLOW_USE, .driver = &driver, }; diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c index f0635b19de30..a43301a154af 100644 --- a/drivers/media/video/tvmixer.c +++ b/drivers/media/video/tvmixer.c @@ -91,7 +91,7 @@ static int tvmixer_ioctl(struct inode *inode, struct file *file, unsigned int cm if (cmd == SOUND_MIXER_INFO) { mixer_info info; strlcpy(info.id, "tv card", sizeof(info.id)); - strlcpy(info.name, i2c_clientname(client), sizeof(info.name)); + strlcpy(info.name, client->name, sizeof(info.name)); info.modify_counter = 42 /* FIXME */; if (copy_to_user(argp, &info, sizeof(info))) return -EFAULT; @@ -100,7 +100,7 @@ static int tvmixer_ioctl(struct inode *inode, struct file *file, unsigned int cm if (cmd == SOUND_OLD_MIXER_INFO) { _old_mixer_info info; strlcpy(info.id, "tv card", sizeof(info.id)); - strlcpy(info.name, i2c_clientname(client), sizeof(info.name)); + strlcpy(info.name, client->name, sizeof(info.name)); if (copy_to_user(argp, &info, sizeof(info))) return -EFAULT; return 0; @@ -295,7 +295,7 @@ static int tvmixer_clients(struct i2c_client *client) devices[i].dev = NULL; devices[i].minor = -1; printk("tvmixer: %s unregistered (#1)\n", - i2c_clientname(client)); + client->name); return 0; } } @@ -354,7 +354,7 @@ static void __exit tvmixer_cleanup_module(void) if (devices[i].minor != -1) { unregister_sound_mixer(devices[i].minor); printk("tvmixer: %s unregistered (#2)\n", - i2c_clientname(devices[i].dev)); + devices[i].dev->name); } } } diff --git a/drivers/media/video/zoran_card.c b/drivers/media/video/zoran_card.c index 25743085b2d5..eed2acea1779 100644 --- a/drivers/media/video/zoran_card.c +++ b/drivers/media/video/zoran_card.c @@ -737,7 +737,7 @@ static struct i2c_algo_bit_data zoran_i2c_bit_data_template = { }; static struct i2c_adapter zoran_i2c_adapter_template = { - I2C_DEVNAME("zr36057"), + .name = "zr36057", .id = I2C_HW_B_ZR36067, .algo = NULL, .client_register = zoran_i2c_client_register, diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c index 83e8dd627154..f36c0b6c6e36 100644 --- a/drivers/usb/media/w9968cf.c +++ b/drivers/usb/media/w9968cf.c @@ -1523,7 +1523,6 @@ static u32 w9968cf_i2c_func(struct i2c_adapter* adap) static int w9968cf_i2c_attach_inform(struct i2c_client* client) { struct w9968cf_device* cam = i2c_get_adapdata(client->adapter); - const char* clientname = i2c_clientname(client); int id = client->driver->id, err = 0; if (id == I2C_DRIVERID_OVCAMCHIP) { @@ -1535,12 +1534,12 @@ static int w9968cf_i2c_attach_inform(struct i2c_client* client) } } else { DBG(4, "Rejected client [%s] with driver [%s]", - clientname, client->driver->name) + client->name, client->driver->name) return -EINVAL; } DBG(5, "I2C attach client [%s] with driver [%s]", - clientname, client->driver->name) + client->name, client->driver->name) return 0; } @@ -1549,12 +1548,11 @@ static int w9968cf_i2c_attach_inform(struct i2c_client* client) static int w9968cf_i2c_detach_inform(struct i2c_client* client) { struct w9968cf_device* cam = i2c_get_adapdata(client->adapter); - const char* clientname = i2c_clientname(client); if (cam->sensor_client == client) cam->sensor_client = NULL; - DBG(5, "I2C detach client [%s]", clientname) + DBG(5, "I2C detach client [%s]", client->name) return 0; } diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 233c153b12b1..1ead5195fde4 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -178,13 +178,6 @@ static inline void i2c_set_clientdata (struct i2c_client *dev, void *data) dev_set_drvdata (&dev->dev, data); } -#define I2C_DEVNAME(str) .name = str - -static inline char *i2c_clientname(struct i2c_client *c) -{ - return &c->name[0]; -} - /* * The following structs are for those who like to implement new bus drivers: * i2c_algorithm is the interface to a class of hardware solutions which can -- cgit v1.2.3 From 77ae84554cc0178e03862391599a0cedf96fa4c4 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Sat, 3 Sep 2005 10:52:11 +0200 Subject: [PATCH] I2C: Drop the I2C_ACK_TEST ioctl Drop the I2C_ACK_TEST ioctl, which was commented out. It never really existed (not after 1999 anyway), and there is no such thing as a ack test on I2C/SMBus anyway. Signed-off-by: Jean Delvare Signed-off-by: Greg Kroah-Hartman --- include/linux/i2c.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 1ead5195fde4..be35332b67e6 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -508,9 +508,6 @@ union i2c_smbus_data { #define I2C_FUNCS 0x0705 /* Get the adapter functionality */ #define I2C_RDWR 0x0707 /* Combined R/W transfer (one stop only)*/ #define I2C_PEC 0x0708 /* != 0 for SMBus PEC */ -#if 0 -#define I2C_ACK_TEST 0x0710 /* See if a slave is at a specific address */ -#endif #define I2C_SMBUS 0x0720 /* SMBus-level access */ -- cgit v1.2.3 From d856f1e337782326c638c70c0b4df2b909350dec Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Fri, 19 Aug 2005 09:14:01 -0400 Subject: [PATCH] klist: fix klist to have the same klist_add semantics as list_head at the moment, the list_head semantics are list_add(node, head) whereas current klist semantics are klist_add(head, node) This is bound to cause confusion, and since klist is the newcomer, it should follow the list_head semantics. I also added missing include guards to klist.h Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman --- drivers/base/bus.c | 4 ++-- drivers/base/core.c | 2 +- drivers/base/dd.c | 2 +- include/linux/klist.h | 8 ++++++-- lib/klist.c | 8 ++++---- 5 files changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 6966aff74efe..17e96698410e 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -360,7 +360,7 @@ int bus_add_device(struct device * dev) if (bus) { pr_debug("bus %s: add device %s\n", bus->name, dev->bus_id); device_attach(dev); - klist_add_tail(&bus->klist_devices, &dev->knode_bus); + klist_add_tail(&dev->knode_bus, &bus->klist_devices); error = device_add_attrs(bus, dev); if (!error) { sysfs_create_link(&bus->devices.kobj, &dev->kobj, dev->bus_id); @@ -448,7 +448,7 @@ int bus_add_driver(struct device_driver * drv) } driver_attach(drv); - klist_add_tail(&bus->klist_drivers, &drv->knode_bus); + klist_add_tail(&drv->knode_bus, &bus->klist_drivers); module_add_driver(drv->owner, drv); driver_add_attrs(bus, drv); diff --git a/drivers/base/core.c b/drivers/base/core.c index efe03a024a5b..c8a33df00761 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -249,7 +249,7 @@ int device_add(struct device *dev) if ((error = bus_add_device(dev))) goto BusError; if (parent) - klist_add_tail(&parent->klist_children, &dev->knode_parent); + klist_add_tail(&dev->knode_parent, &parent->klist_children); /* notify platform of device entry */ if (platform_notify) diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 16323f9cbff0..d5bbce38282f 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -42,7 +42,7 @@ void device_bind_driver(struct device * dev) { pr_debug("bound device '%s' to driver '%s'\n", dev->bus_id, dev->driver->name); - klist_add_tail(&dev->driver->klist_devices, &dev->knode_driver); + klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); sysfs_create_link(&dev->driver->kobj, &dev->kobj, kobject_name(&dev->kobj)); sysfs_create_link(&dev->kobj, &dev->driver->kobj, "driver"); diff --git a/include/linux/klist.h b/include/linux/klist.h index eebf5e5696ec..c4d1fae4dd89 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -9,6 +9,9 @@ * This file is rleased under the GPL v2. */ +#ifndef _LINUX_KLIST_H +#define _LINUX_KLIST_H + #include #include #include @@ -31,8 +34,8 @@ struct klist_node { struct completion n_removed; }; -extern void klist_add_tail(struct klist * k, struct klist_node * n); -extern void klist_add_head(struct klist * k, struct klist_node * n); +extern void klist_add_tail(struct klist_node * n, struct klist * k); +extern void klist_add_head(struct klist_node * n, struct klist * k); extern void klist_del(struct klist_node * n); extern void klist_remove(struct klist_node * n); @@ -53,3 +56,4 @@ extern void klist_iter_init_node(struct klist * k, struct klist_iter * i, extern void klist_iter_exit(struct klist_iter * i); extern struct klist_node * klist_next(struct klist_iter * i); +#endif diff --git a/lib/klist.c b/lib/klist.c index 738ab810160a..a70c836c5c4c 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -79,11 +79,11 @@ static void klist_node_init(struct klist * k, struct klist_node * n) /** * klist_add_head - Initialize a klist_node and add it to front. - * @k: klist it's going on. * @n: node we're adding. + * @k: klist it's going on. */ -void klist_add_head(struct klist * k, struct klist_node * n) +void klist_add_head(struct klist_node * n, struct klist * k) { klist_node_init(k, n); add_head(k, n); @@ -94,11 +94,11 @@ EXPORT_SYMBOL_GPL(klist_add_head); /** * klist_add_tail - Initialize a klist_node and add it to back. - * @k: klist it's going on. * @n: node we're adding. + * @k: klist it's going on. */ -void klist_add_tail(struct klist * k, struct klist_node * n) +void klist_add_tail(struct klist_node * n, struct klist * k) { klist_node_init(k, n); add_tail(k, n); -- cgit v1.2.3 From 93c37f292110a37dd77e4cc0aaf1c341d79bf6aa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Sep 2005 13:57:08 -0700 Subject: [SERIAL]: Avoid 'statement with no effect' warnings. When SUPPORT_SYSRQ is false, gcc can emit warnings for the uart_handle_sysrq_char() that results. Using an empty inline returning zero kills the warning. Signed-off-by: David S. Miller --- include/linux/serial_core.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index cf0f64ea2bc0..9b12fe731612 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -385,11 +385,11 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port); /* * The following are helper functions for the low level drivers. */ -#ifdef SUPPORT_SYSRQ static inline int uart_handle_sysrq_char(struct uart_port *port, unsigned int ch, struct pt_regs *regs) { +#ifdef SUPPORT_SYSRQ if (port->sysrq) { if (ch && time_before(jiffies, port->sysrq)) { handle_sysrq(ch, regs, NULL); @@ -398,11 +398,9 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch, } port->sysrq = 0; } +#endif return 0; } -#else -#define uart_handle_sysrq_char(port,ch,regs) (0) -#endif /* * We do the SysRQ and SAK checking like this... -- cgit v1.2.3 From 2248bcfcd8fb622ec88b8587d0c1f139635ffd2e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 6 Sep 2005 15:06:42 -0700 Subject: [NETFILTER]: Add support for permanent expectations A permanent expectation exists until timeing out and can expect multiple related connections. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 5 +++++ net/ipv4/netfilter/ip_conntrack_amanda.c | 1 + net/ipv4/netfilter/ip_conntrack_core.c | 12 ++++++++---- net/ipv4/netfilter/ip_conntrack_ftp.c | 1 + net/ipv4/netfilter/ip_conntrack_irc.c | 1 + net/ipv4/netfilter/ip_conntrack_netlink.c | 1 + net/ipv4/netfilter/ip_conntrack_tftp.c | 1 + 7 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 088742befe49..7e033e9271a8 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -263,6 +263,9 @@ struct ip_conntrack_expect /* Unique ID */ unsigned int id; + /* Flags */ + unsigned int flags; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -272,6 +275,8 @@ struct ip_conntrack_expect #endif }; +#define IP_CT_EXPECT_PERMANENT 0x1 + static inline struct ip_conntrack * tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash) { diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index be4c9eb3243f..dc20881004bc 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -108,6 +108,7 @@ static int help(struct sk_buff **pskb, } exp->expectfn = NULL; + exp->flags = 0; exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; exp->tuple.src.u.tcp.port = 0; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index a0648600190e..e23e8ca476c0 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -264,10 +264,14 @@ find_expectation(const struct ip_conntrack_tuple *tuple) master ct never got confirmed, we'd hold a reference to it and weird things would happen to future packets). */ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) - && is_confirmed(i->master) - && del_timer(&i->timeout)) { - unlink_expect(i); - return i; + && is_confirmed(i->master)) { + if (i->flags & IP_CT_EXPECT_PERMANENT) { + atomic_inc(&i->use); + return i; + } else if (del_timer(&i->timeout)) { + unlink_expect(i); + return i; + } } } return NULL; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 3a2627db1729..1b79ec36085f 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -421,6 +421,7 @@ static int help(struct sk_buff **pskb, { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; + exp->flags = 0; /* Now, NAT might want to mangle the packet, and register the * (possibly changed) expectation itself. */ diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index 25438eec21a1..d7a8a98c05e1 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -221,6 +221,7 @@ static int help(struct sk_buff **pskb, { { 0, { 0 } }, { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; + exp->flags = 0; if (ip_nat_irc_hook) ret = ip_nat_irc_hook(pskb, ctinfo, addr_beg_p - ib_ptr, diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index a4e9278db4ed..3dc3a7bab3b4 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1413,6 +1413,7 @@ ctnetlink_create_expect(struct nfattr *cda[]) } exp->expectfn = NULL; + exp->flags = 0; exp->master = ct; memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index f8ff170f390a..d2b590533452 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -75,6 +75,7 @@ static int tftp_help(struct sk_buff **pskb, exp->mask.dst.u.udp.port = 0xffff; exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; + exp->flags = 0; DEBUGP("expect: "); DUMP_TUPLE(&exp->tuple); -- cgit v1.2.3 From 03486a4f838c55481317fca5ac2e7d12550a4fb7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 6 Sep 2005 15:09:43 -0700 Subject: [NETFILTER]: Handle NAT module load race When the NAT module is loaded when connections are already confirmed it must not change their tuples anymore. This is especially important with CONFIG_NETFILTER_DEBUG, the netfilter listhelp functions will refuse to remove an entry from a list when it can not be found on the list, so when a changed tuple hashes to a new bucket the entry is kept in the list until and after the conntrack is freed. Allocate the exact conntrack tuple for NAT for already confirmed connections or drop them if that fails. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_nat_rule.h | 5 +++++ net/ipv4/netfilter/ip_nat_rule.c | 21 +++++++++++++++++++++ net/ipv4/netfilter/ip_nat_standalone.c | 8 ++++++-- 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_nat_rule.h b/include/linux/netfilter_ipv4/ip_nat_rule.h index fecd2a06dcd8..73b9552e6a89 100644 --- a/include/linux/netfilter_ipv4/ip_nat_rule.h +++ b/include/linux/netfilter_ipv4/ip_nat_rule.h @@ -19,5 +19,10 @@ extern unsigned int alloc_null_binding(struct ip_conntrack *conntrack, struct ip_nat_info *info, unsigned int hooknum); + +extern unsigned int +alloc_null_binding_confirmed(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum); #endif #endif /* _IP_NAT_RULE_H */ diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 60d70fa41a15..cb66b8bddeb3 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -255,6 +255,27 @@ alloc_null_binding(struct ip_conntrack *conntrack, return ip_nat_setup_info(conntrack, &range, hooknum); } +unsigned int +alloc_null_binding_confirmed(struct ip_conntrack *conntrack, + struct ip_nat_info *info, + unsigned int hooknum) +{ + u_int32_t ip + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); + u_int16_t all + = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC + ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all + : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); + struct ip_nat_range range + = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; + + DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", + conntrack, NIPQUAD(ip)); + return ip_nat_setup_info(conntrack, &range, hooknum); +} + int ip_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 89db052add81..0ff368b131f6 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -123,8 +123,12 @@ ip_nat_fn(unsigned int hooknum, if (!ip_nat_initialized(ct, maniptype)) { unsigned int ret; - /* LOCAL_IN hook doesn't have a chain! */ - if (hooknum == NF_IP_LOCAL_IN) + if (unlikely(is_confirmed(ct))) + /* NAT module was loaded late */ + ret = alloc_null_binding_confirmed(ct, info, + hooknum); + else if (hooknum == NF_IP_LOCAL_IN) + /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, info, hooknum); else ret = ip_nat_rule_find(pskb, hooknum, -- cgit v1.2.3 From 49719eb355d32fa07793017b4b46b1c02e88b275 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 6 Sep 2005 15:10:46 -0700 Subject: [NETFILTER]: kill __ip_ct_expect_unlink_destroy The following patch kills __ip_ct_expect_unlink_destroy and export unlink_expect as ip_ct_unlink_expect. As it was discussed [1], the function __ip_ct_expect_unlink_destroy is a bit confusing so better do the following sequence: ip_ct_destroy_expect and ip_conntrack_expect_put. [1] https://lists.netfilter.org/pipermail/netfilter-devel/2005-August/020794.html Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack_core.h | 2 +- net/ipv4/netfilter/ip_conntrack_core.c | 20 +++++++------------- net/ipv4/netfilter/ip_conntrack_netlink.c | 12 ++++++++---- net/ipv4/netfilter/ip_conntrack_standalone.c | 2 +- 4 files changed, 17 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index dc4d2a0575de..907d4f5ca5dc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -52,7 +52,7 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) return ret; } -extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); +extern void ip_ct_unlink_expect(struct ip_conntrack_expect *exp); extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index babce304c619..19cba16e6e1e 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -197,7 +197,7 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, /* ip_conntrack_expect helper functions */ -static void unlink_expect(struct ip_conntrack_expect *exp) +void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) { ASSERT_WRITE_LOCK(&ip_conntrack_lock); IP_NF_ASSERT(!timer_pending(&exp->timeout)); @@ -207,18 +207,12 @@ static void unlink_expect(struct ip_conntrack_expect *exp) ip_conntrack_expect_put(exp); } -void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp) -{ - unlink_expect(exp); - ip_conntrack_expect_put(exp); -} - static void expectation_timed_out(unsigned long ul_expect) { struct ip_conntrack_expect *exp = (void *)ul_expect; write_lock_bh(&ip_conntrack_lock); - unlink_expect(exp); + ip_ct_unlink_expect(exp); write_unlock_bh(&ip_conntrack_lock); ip_conntrack_expect_put(exp); } @@ -269,7 +263,7 @@ find_expectation(const struct ip_conntrack_tuple *tuple) atomic_inc(&i->use); return i; } else if (del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); return i; } } @@ -288,7 +282,7 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) { if (i->master == ct && del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); ip_conntrack_expect_put(i); } } @@ -929,7 +923,7 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) /* choose the the oldest expectation to evict */ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { if (expect_matches(i, exp) && del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); write_unlock_bh(&ip_conntrack_lock); ip_conntrack_expect_put(i); return; @@ -986,7 +980,7 @@ static void evict_oldest_expect(struct ip_conntrack *master) list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { if (i->master == master) { if (del_timer(&i->timeout)) { - unlink_expect(i); + ip_ct_unlink_expect(i); ip_conntrack_expect_put(i); } break; @@ -1103,7 +1097,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == me && del_timer(&exp->timeout)) { - unlink_expect(exp); + ip_ct_unlink_expect(exp); ip_conntrack_expect_put(exp); } } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 3dc3a7bab3b4..15aef3564742 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1349,8 +1349,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { if (exp->master->helper == h - && del_timer(&exp->timeout)) - __ip_ct_expect_unlink_destroy(exp); + && del_timer(&exp->timeout)) { + ip_ct_unlink_expect(exp); + ip_conntrack_expect_put(exp); + } } write_unlock(&ip_conntrack_lock); } else { @@ -1358,8 +1360,10 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, write_lock_bh(&ip_conntrack_lock); list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { - if (del_timer(&exp->timeout)) - __ip_ct_expect_unlink_destroy(exp); + if (del_timer(&exp->timeout)) { + ip_ct_unlink_expect(exp); + ip_conntrack_expect_put(exp); + } } write_unlock_bh(&ip_conntrack_lock); } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ee5895afd0c3..ae3e3e655db5 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -998,7 +998,7 @@ EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); -EXPORT_SYMBOL_GPL(__ip_ct_expect_unlink_destroy); +EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); -- cgit v1.2.3 From f2c383988d68c91a7d474b7cf26c0a2df49bbafe Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 6 Sep 2005 15:48:03 -0700 Subject: [NET]: skb_get/set_timestamp use const The new timestamp get/set routines should have const attribute on parameters (helps to indicate direction). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 42edce6abe23..da7da9c0ed1b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1251,7 +1251,7 @@ extern void skb_add_mtu(int mtu); * This function converts the offset back to a struct timeval and stores * it in stamp. */ -static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp) { stamp->tv_sec = skb->tstamp.off_sec; stamp->tv_usec = skb->tstamp.off_usec; @@ -1270,7 +1270,7 @@ static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) * This function converts a struct timeval to an offset and stores * it in the skb. */ -static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp) { skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; -- cgit v1.2.3 From aaec0fab5f8809fe1509fdc204e769bb35ebe41a Mon Sep 17 00:00:00 2001 From: Jens Osterkamp Date: Mon, 5 Sep 2005 15:19:29 -0700 Subject: [PATCH] net: add driver for the NIC on Cell Blades This patch adds a driver for a new 1000 Mbit ethernet NIC. It is integrated on the south bridge that is used for our Cell Blades. The code gets the MAC address from the Open Firmware device tree, so it won't compile on platforms other than ppc64. This is the first public release, so I don't expect the first version to get merged, but I'd aim for integration within the 2.6.13 time frame. Cc: Utz Bacher Signed-off-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Jeff Garzik --- drivers/net/Kconfig | 7 + drivers/net/Makefile | 2 + drivers/net/spider_net.c | 2298 ++++++++++++++++++++++++++++++++++++++ drivers/net/spider_net.h | 469 ++++++++ drivers/net/spider_net_ethtool.c | 107 ++ include/linux/pci_ids.h | 1 + 6 files changed, 2884 insertions(+) create mode 100644 drivers/net/spider_net.c create mode 100644 drivers/net/spider_net.h create mode 100644 drivers/net/spider_net_ethtool.c (limited to 'include/linux') diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index ae9e7a579b94..6bb9232514b4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -2058,6 +2058,13 @@ config BNX2 To compile this driver as a module, choose M here: the module will be called bnx2. This is recommended. +config SPIDER_NET + tristate "Spider Gigabit Ethernet driver" + depends on PCI && PPC_BPA + help + This driver supports the Gigabit Ethernet chips present on the + Cell Processor-Based Blades from IBM. + config GIANFAR tristate "Gianfar Ethernet" depends on 85xx || 83xx diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 5baafcd55610..8645c843cf4d 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -54,6 +54,8 @@ obj-$(CONFIG_STNIC) += stnic.o 8390.o obj-$(CONFIG_FEALNX) += fealnx.o obj-$(CONFIG_TIGON3) += tg3.o obj-$(CONFIG_BNX2) += bnx2.o +spidernet-y += spider_net.o spider_net_ethtool.o sungem_phy.o +obj-$(CONFIG_SPIDER_NET) += spidernet.o obj-$(CONFIG_TC35815) += tc35815.o obj-$(CONFIG_SKGE) += skge.o obj-$(CONFIG_SK98LIN) += sk98lin/ diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c new file mode 100644 index 000000000000..692a0437fef7 --- /dev/null +++ b/drivers/net/spider_net.c @@ -0,0 +1,2298 @@ +/* + * Network device driver for Cell Processor-Based Blade + * + * (C) Copyright IBM Corp. 2005 + * + * Authors : Utz Bacher + * Jens Osterkamp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "spider_net.h" + +MODULE_AUTHOR("Utz Bacher and Jens Osterkamp " \ + ""); +MODULE_DESCRIPTION("Spider Southbridge Gigabit Ethernet driver"); +MODULE_LICENSE("GPL"); + +static int rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_DEFAULT; +static int tx_descriptors = SPIDER_NET_TX_DESCRIPTORS_DEFAULT; + +module_param(rx_descriptors, int, 0644); +module_param(tx_descriptors, int, 0644); + +MODULE_PARM_DESC(rx_descriptors, "number of descriptors used " \ + "in rx chains"); +MODULE_PARM_DESC(tx_descriptors, "number of descriptors used " \ + "in tx chain"); + +char spider_net_driver_name[] = "spidernet"; + +static struct pci_device_id spider_net_pci_tbl[] = { + { PCI_VENDOR_ID_TOSHIBA_2, PCI_DEVICE_ID_TOSHIBA_SPIDER_NET, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { 0, } +}; + +MODULE_DEVICE_TABLE(pci, spider_net_pci_tbl); + +/** + * spider_net_read_reg - reads an SMMIO register of a card + * @card: device structure + * @reg: register to read from + * + * returns the content of the specified SMMIO register. + */ +static u32 +spider_net_read_reg(struct spider_net_card *card, u32 reg) +{ + u32 value; + + value = readl(card->regs + reg); + value = le32_to_cpu(value); + + return value; +} + +/** + * spider_net_write_reg - writes to an SMMIO register of a card + * @card: device structure + * @reg: register to write to + * @value: value to write into the specified SMMIO register + */ +static void +spider_net_write_reg(struct spider_net_card *card, u32 reg, u32 value) +{ + value = cpu_to_le32(value); + writel(value, card->regs + reg); +} + +/** + * spider_net_rx_irq_off - switch off rx irq on this spider card + * @card: device structure + * + * switches off rx irq by masking them out in the GHIINTnMSK register + */ +static void +spider_net_rx_irq_off(struct spider_net_card *card) +{ + u32 regvalue; + unsigned long flags; + + spin_lock_irqsave(&card->intmask_lock, flags); + regvalue = spider_net_read_reg(card, SPIDER_NET_GHIINT0MSK); + regvalue &= ~SPIDER_NET_RXINT; + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, regvalue); + spin_unlock_irqrestore(&card->intmask_lock, flags); +} + +/** spider_net_write_phy - write to phy register + * @netdev: adapter to be written to + * @mii_id: id of MII + * @reg: PHY register + * @val: value to be written to phy register + * + * spider_net_write_phy_register writes to an arbitrary PHY + * register via the spider GPCWOPCMD register. We assume the queue does + * not run full (not more than 15 commands outstanding). + **/ +static void +spider_net_write_phy(struct net_device *netdev, int mii_id, + int reg, int val) +{ + struct spider_net_card *card = netdev_priv(netdev); + u32 writevalue; + + writevalue = ((u32)mii_id << 21) | + ((u32)reg << 16) | ((u32)val); + + spider_net_write_reg(card, SPIDER_NET_GPCWOPCMD, writevalue); +} + +/** spider_net_read_phy - read from phy register + * @netdev: network device to be read from + * @mii_id: id of MII + * @reg: PHY register + * + * Returns value read from PHY register + * + * spider_net_write_phy reads from an arbitrary PHY + * register via the spider GPCROPCMD register + **/ +static int +spider_net_read_phy(struct net_device *netdev, int mii_id, int reg) +{ + struct spider_net_card *card = netdev_priv(netdev); + u32 readvalue; + + readvalue = ((u32)mii_id << 21) | ((u32)reg << 16); + spider_net_write_reg(card, SPIDER_NET_GPCROPCMD, readvalue); + + /* we don't use semaphores to wait for an SPIDER_NET_GPROPCMPINT + * interrupt, as we poll for the completion of the read operation + * in spider_net_read_phy. Should take about 50 us */ + do { + readvalue = spider_net_read_reg(card, SPIDER_NET_GPCROPCMD); + } while (readvalue & SPIDER_NET_GPREXEC); + + readvalue &= SPIDER_NET_GPRDAT_MASK; + + return readvalue; +} + +/** + * spider_net_rx_irq_on - switch on rx irq on this spider card + * @card: device structure + * + * switches on rx irq by enabling them in the GHIINTnMSK register + */ +static void +spider_net_rx_irq_on(struct spider_net_card *card) +{ + u32 regvalue; + unsigned long flags; + + spin_lock_irqsave(&card->intmask_lock, flags); + regvalue = spider_net_read_reg(card, SPIDER_NET_GHIINT0MSK); + regvalue |= SPIDER_NET_RXINT; + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, regvalue); + spin_unlock_irqrestore(&card->intmask_lock, flags); +} + +/** + * spider_net_tx_irq_off - switch off tx irq on this spider card + * @card: device structure + * + * switches off tx irq by masking them out in the GHIINTnMSK register + */ +static void +spider_net_tx_irq_off(struct spider_net_card *card) +{ + u32 regvalue; + unsigned long flags; + + spin_lock_irqsave(&card->intmask_lock, flags); + regvalue = spider_net_read_reg(card, SPIDER_NET_GHIINT0MSK); + regvalue &= ~SPIDER_NET_TXINT; + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, regvalue); + spin_unlock_irqrestore(&card->intmask_lock, flags); +} + +/** + * spider_net_tx_irq_on - switch on tx irq on this spider card + * @card: device structure + * + * switches on tx irq by enabling them in the GHIINTnMSK register + */ +static void +spider_net_tx_irq_on(struct spider_net_card *card) +{ + u32 regvalue; + unsigned long flags; + + spin_lock_irqsave(&card->intmask_lock, flags); + regvalue = spider_net_read_reg(card, SPIDER_NET_GHIINT0MSK); + regvalue |= SPIDER_NET_TXINT; + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, regvalue); + spin_unlock_irqrestore(&card->intmask_lock, flags); +} + +/** + * spider_net_set_promisc - sets the unicast address or the promiscuous mode + * @card: card structure + * + * spider_net_set_promisc sets the unicast destination address filter and + * thus either allows for non-promisc mode or promisc mode + */ +static void +spider_net_set_promisc(struct spider_net_card *card) +{ + u32 macu, macl; + struct net_device *netdev = card->netdev; + + if (netdev->flags & IFF_PROMISC) { + /* clear destination entry 0 */ + spider_net_write_reg(card, SPIDER_NET_GMRUAFILnR, 0); + spider_net_write_reg(card, SPIDER_NET_GMRUAFILnR + 0x04, 0); + spider_net_write_reg(card, SPIDER_NET_GMRUA0FIL15R, + SPIDER_NET_PROMISC_VALUE); + } else { + macu = netdev->dev_addr[0]; + macu <<= 8; + macu |= netdev->dev_addr[1]; + memcpy(&macl, &netdev->dev_addr[2], sizeof(macl)); + + macu |= SPIDER_NET_UA_DESCR_VALUE; + spider_net_write_reg(card, SPIDER_NET_GMRUAFILnR, macu); + spider_net_write_reg(card, SPIDER_NET_GMRUAFILnR + 0x04, macl); + spider_net_write_reg(card, SPIDER_NET_GMRUA0FIL15R, + SPIDER_NET_NONPROMISC_VALUE); + } +} + +/** + * spider_net_get_mac_address - read mac address from spider card + * @card: device structure + * + * reads MAC address from GMACUNIMACU and GMACUNIMACL registers + */ +static int +spider_net_get_mac_address(struct net_device *netdev) +{ + struct spider_net_card *card = netdev_priv(netdev); + u32 macl, macu; + + macl = spider_net_read_reg(card, SPIDER_NET_GMACUNIMACL); + macu = spider_net_read_reg(card, SPIDER_NET_GMACUNIMACU); + + netdev->dev_addr[0] = (macu >> 24) & 0xff; + netdev->dev_addr[1] = (macu >> 16) & 0xff; + netdev->dev_addr[2] = (macu >> 8) & 0xff; + netdev->dev_addr[3] = macu & 0xff; + netdev->dev_addr[4] = (macl >> 8) & 0xff; + netdev->dev_addr[5] = macl & 0xff; + + if (!is_valid_ether_addr(&netdev->dev_addr[0])) + return -EINVAL; + + return 0; +} + +/** + * spider_net_get_descr_status -- returns the status of a descriptor + * @descr: descriptor to look at + * + * returns the status as in the dmac_cmd_status field of the descriptor + */ +static enum spider_net_descr_status +spider_net_get_descr_status(struct spider_net_descr *descr) +{ + u32 cmd_status; + rmb(); + cmd_status = descr->dmac_cmd_status; + rmb(); + cmd_status >>= SPIDER_NET_DESCR_IND_PROC_SHIFT; + /* no need to mask out any bits, as cmd_status is 32 bits wide only + * (and unsigned) */ + return cmd_status; +} + +/** + * spider_net_set_descr_status -- sets the status of a descriptor + * @descr: descriptor to change + * @status: status to set in the descriptor + * + * changes the status to the specified value. Doesn't change other bits + * in the status + */ +static void +spider_net_set_descr_status(struct spider_net_descr *descr, + enum spider_net_descr_status status) +{ + u32 cmd_status; + /* read the status */ + mb(); + cmd_status = descr->dmac_cmd_status; + /* clean the upper 4 bits */ + cmd_status &= SPIDER_NET_DESCR_IND_PROC_MASKO; + /* add the status to it */ + cmd_status |= ((u32)status)<dmac_cmd_status = cmd_status; + wmb(); +} + +/** + * spider_net_free_chain - free descriptor chain + * @card: card structure + * @chain: address of chain + * + */ +static void +spider_net_free_chain(struct spider_net_card *card, + struct spider_net_descr_chain *chain) +{ + struct spider_net_descr *descr; + + for (descr = chain->tail; !descr->bus_addr; descr = descr->next) { + pci_unmap_single(card->pdev, descr->bus_addr, + SPIDER_NET_DESCR_SIZE, PCI_DMA_BIDIRECTIONAL); + descr->bus_addr = 0; + } +} + +/** + * spider_net_init_chain - links descriptor chain + * @card: card structure + * @chain: address of chain + * @start_descr: address of descriptor array + * @no: number of descriptors + * + * we manage a circular list that mirrors the hardware structure, + * except that the hardware uses bus addresses. + * + * returns 0 on success, <0 on failure + */ +static int +spider_net_init_chain(struct spider_net_card *card, + struct spider_net_descr_chain *chain, + struct spider_net_descr *start_descr, int no) +{ + int i; + struct spider_net_descr *descr; + + spin_lock_init(&card->chain_lock); + + descr = start_descr; + memset(descr, 0, sizeof(*descr) * no); + + /* set up the hardware pointers in each descriptor */ + for (i=0; ibus_addr = + pci_map_single(card->pdev, descr, + SPIDER_NET_DESCR_SIZE, + PCI_DMA_BIDIRECTIONAL); + + if (descr->bus_addr == DMA_ERROR_CODE) + goto iommu_error; + + descr->next = descr + 1; + descr->prev = descr - 1; + + } + /* do actual circular list */ + (descr-1)->next = start_descr; + start_descr->prev = descr-1; + + descr = start_descr; + for (i=0; i < no; i++, descr++) { + descr->next_descr_addr = descr->next->bus_addr; + } + + chain->head = start_descr; + chain->tail = start_descr; + + return 0; + +iommu_error: + descr = start_descr; + for (i=0; i < no; i++, descr++) + if (descr->bus_addr) + pci_unmap_single(card->pdev, descr->bus_addr, + SPIDER_NET_DESCR_SIZE, PCI_DMA_BIDIRECTIONAL); + return -ENOMEM; +} + +/** + * spider_net_free_rx_chain_contents - frees descr contents in rx chain + * @card: card structure + * + * returns 0 on success, <0 on failure + */ +static void +spider_net_free_rx_chain_contents(struct spider_net_card *card) +{ + struct spider_net_descr *descr; + + descr = card->rx_chain.head; + while (descr->next != card->rx_chain.head) { + if (descr->skb) { + dev_kfree_skb(descr->skb); + pci_unmap_single(card->pdev, descr->buf_addr, + SPIDER_NET_MAX_MTU, + PCI_DMA_BIDIRECTIONAL); + } + descr = descr->next; + } +} + +/** + * spider_net_prepare_rx_descr - reinitializes a rx descriptor + * @card: card structure + * @descr: descriptor to re-init + * + * return 0 on succes, <0 on failure + * + * allocates a new rx skb, iommu-maps it and attaches it to the descriptor. + * Activate the descriptor state-wise + */ +static int +spider_net_prepare_rx_descr(struct spider_net_card *card, + struct spider_net_descr *descr) +{ + int error = 0; + int offset; + int bufsize; + + /* we need to round up the buffer size to a multiple of 128 */ + bufsize = (SPIDER_NET_MAX_MTU + SPIDER_NET_RXBUF_ALIGN - 1) & + (~(SPIDER_NET_RXBUF_ALIGN - 1)); + + /* and we need to have it 128 byte aligned, therefore we allocate a + * bit more */ + /* allocate an skb */ + descr->skb = dev_alloc_skb(bufsize + SPIDER_NET_RXBUF_ALIGN - 1); + if (!descr->skb) { + if (net_ratelimit()) + if (netif_msg_rx_err(card)) + pr_err("Not enough memory to allocate " + "rx buffer\n"); + return -ENOMEM; + } + descr->buf_size = bufsize; + descr->result_size = 0; + descr->valid_size = 0; + descr->data_status = 0; + descr->data_error = 0; + + offset = ((unsigned long)descr->skb->data) & + (SPIDER_NET_RXBUF_ALIGN - 1); + if (offset) + skb_reserve(descr->skb, SPIDER_NET_RXBUF_ALIGN - offset); + /* io-mmu-map the skb */ + descr->buf_addr = pci_map_single(card->pdev, descr->skb->data, + SPIDER_NET_MAX_MTU, + PCI_DMA_BIDIRECTIONAL); + if (descr->buf_addr == DMA_ERROR_CODE) { + dev_kfree_skb_any(descr->skb); + if (netif_msg_rx_err(card)) + pr_err("Could not iommu-map rx buffer\n"); + spider_net_set_descr_status(descr, SPIDER_NET_DESCR_NOT_IN_USE); + } else { + descr->dmac_cmd_status = SPIDER_NET_DMAC_RX_CARDOWNED; + } + + return error; +} + +/** + * spider_net_enable_rxctails - sets RX dmac chain tail addresses + * @card: card structure + * + * spider_net_enable_rxctails sets the RX DMAC chain tail adresses in the + * chip by writing to the appropriate register. DMA is enabled in + * spider_net_enable_rxdmac. + */ +static void +spider_net_enable_rxchtails(struct spider_net_card *card) +{ + /* assume chain is aligned correctly */ + spider_net_write_reg(card, SPIDER_NET_GDADCHA , + card->rx_chain.tail->bus_addr); +} + +/** + * spider_net_enable_rxdmac - enables a receive DMA controller + * @card: card structure + * + * spider_net_enable_rxdmac enables the DMA controller by setting RX_DMA_EN + * in the GDADMACCNTR register + */ +static void +spider_net_enable_rxdmac(struct spider_net_card *card) +{ + spider_net_write_reg(card, SPIDER_NET_GDADMACCNTR, + SPIDER_NET_DMA_RX_VALUE); +} + +/** + * spider_net_refill_rx_chain - refills descriptors/skbs in the rx chains + * @card: card structure + * + * refills descriptors in all chains (last used chain first): allocates skbs + * and iommu-maps them. + */ +static void +spider_net_refill_rx_chain(struct spider_net_card *card) +{ + struct spider_net_descr_chain *chain; + int count = 0; + unsigned long flags; + + chain = &card->rx_chain; + + spin_lock_irqsave(&card->chain_lock, flags); + while (spider_net_get_descr_status(chain->head) == + SPIDER_NET_DESCR_NOT_IN_USE) { + if (spider_net_prepare_rx_descr(card, chain->head)) + break; + count++; + chain->head = chain->head->next; + } + spin_unlock_irqrestore(&card->chain_lock, flags); + + /* could be optimized, only do that, if we know the DMA processing + * has terminated */ + if (count) + spider_net_enable_rxdmac(card); +} + +/** + * spider_net_alloc_rx_skbs - allocates rx skbs in rx descriptor chains + * @card: card structure + * + * returns 0 on success, <0 on failure + */ +static int +spider_net_alloc_rx_skbs(struct spider_net_card *card) +{ + int result; + struct spider_net_descr_chain *chain; + + result = -ENOMEM; + + chain = &card->rx_chain; + /* put at least one buffer into the chain. if this fails, + * we've got a problem. if not, spider_net_refill_rx_chain + * will do the rest at the end of this function */ + if (spider_net_prepare_rx_descr(card, chain->head)) + goto error; + else + chain->head = chain->head->next; + + /* this will allocate the rest of the rx buffers; if not, it's + * business as usual later on */ + spider_net_refill_rx_chain(card); + return 0; + +error: + spider_net_free_rx_chain_contents(card); + return result; +} + +/** + * spider_net_release_tx_descr - processes a used tx descriptor + * @card: card structure + * @descr: descriptor to release + * + * releases a used tx descriptor (unmapping, freeing of skb) + */ +static void +spider_net_release_tx_descr(struct spider_net_card *card, + struct spider_net_descr *descr) +{ + struct sk_buff *skb; + + /* unmap the skb */ + skb = descr->skb; + pci_unmap_single(card->pdev, descr->buf_addr, skb->len, + PCI_DMA_BIDIRECTIONAL); + + dev_kfree_skb_any(skb); + + /* set status to not used */ + spider_net_set_descr_status(descr, SPIDER_NET_DESCR_NOT_IN_USE); +} + +/** + * spider_net_release_tx_chain - processes sent tx descriptors + * @card: adapter structure + * @brutal: if set, don't care about whether descriptor seems to be in use + * + * releases the tx descriptors that spider has finished with (if non-brutal) + * or simply release tx descriptors (if brutal) + */ +static void +spider_net_release_tx_chain(struct spider_net_card *card, int brutal) +{ + struct spider_net_descr_chain *tx_chain = &card->tx_chain; + enum spider_net_descr_status status; + + spider_net_tx_irq_off(card); + + /* no lock for chain needed, if this is only executed once at a time */ +again: + for (;;) { + status = spider_net_get_descr_status(tx_chain->tail); + switch (status) { + case SPIDER_NET_DESCR_CARDOWNED: + if (!brutal) goto out; + /* fallthrough, if we release the descriptors + * brutally (then we don't care about + * SPIDER_NET_DESCR_CARDOWNED) */ + case SPIDER_NET_DESCR_RESPONSE_ERROR: + case SPIDER_NET_DESCR_PROTECTION_ERROR: + case SPIDER_NET_DESCR_FORCE_END: + if (netif_msg_tx_err(card)) + pr_err("%s: forcing end of tx descriptor " + "with status x%02x\n", + card->netdev->name, status); + card->netdev_stats.tx_dropped++; + break; + + case SPIDER_NET_DESCR_COMPLETE: + card->netdev_stats.tx_packets++; + card->netdev_stats.tx_bytes += + tx_chain->tail->skb->len; + break; + + default: /* any other value (== SPIDER_NET_DESCR_NOT_IN_USE) */ + goto out; + } + spider_net_release_tx_descr(card, tx_chain->tail); + tx_chain->tail = tx_chain->tail->next; + } +out: + netif_wake_queue(card->netdev); + + if (!brutal) { + /* switch on tx irqs (while we are still in the interrupt + * handler, so we don't get an interrupt), check again + * for done descriptors. This results in fewer interrupts */ + spider_net_tx_irq_on(card); + status = spider_net_get_descr_status(tx_chain->tail); + switch (status) { + case SPIDER_NET_DESCR_RESPONSE_ERROR: + case SPIDER_NET_DESCR_PROTECTION_ERROR: + case SPIDER_NET_DESCR_FORCE_END: + case SPIDER_NET_DESCR_COMPLETE: + goto again; + default: + break; + } + } + +} + +/** + * spider_net_get_multicast_hash - generates hash for multicast filter table + * @addr: multicast address + * + * returns the hash value. + * + * spider_net_get_multicast_hash calculates a hash value for a given multicast + * address, that is used to set the multicast filter tables + */ +static u8 +spider_net_get_multicast_hash(struct net_device *netdev, __u8 *addr) +{ + /* FIXME: an addr of 01:00:5e:00:00:01 must result in 0xa9, + * ff:ff:ff:ff:ff:ff must result in 0xfd */ + u32 crc; + u8 hash; + + crc = crc32_be(~0, addr, netdev->addr_len); + + hash = (crc >> 27); + hash <<= 3; + hash |= crc & 7; + + return hash; +} + +/** + * spider_net_set_multi - sets multicast addresses and promisc flags + * @netdev: interface device structure + * + * spider_net_set_multi configures multicast addresses as needed for the + * netdev interface. It also sets up multicast, allmulti and promisc + * flags appropriately + */ +static void +spider_net_set_multi(struct net_device *netdev) +{ + struct dev_mc_list *mc; + u8 hash; + int i; + u32 reg; + struct spider_net_card *card = netdev_priv(netdev); + unsigned long bitmask[SPIDER_NET_MULTICAST_HASHES / BITS_PER_LONG] = + {0, }; + + spider_net_set_promisc(card); + + if (netdev->flags & IFF_ALLMULTI) { + for (i = 0; i < SPIDER_NET_MULTICAST_HASHES; i++) { + set_bit(i, bitmask); + } + goto write_hash; + } + + /* well, we know, what the broadcast hash value is: it's xfd + hash = spider_net_get_multicast_hash(netdev, netdev->broadcast); */ + set_bit(0xfd, bitmask); + + for (mc = netdev->mc_list; mc; mc = mc->next) { + hash = spider_net_get_multicast_hash(netdev, mc->dmi_addr); + set_bit(hash, bitmask); + } + +write_hash: + for (i = 0; i < SPIDER_NET_MULTICAST_HASHES / 4; i++) { + reg = 0; + if (test_bit(i * 4, bitmask)) + reg += 0x08; + reg <<= 8; + if (test_bit(i * 4 + 1, bitmask)) + reg += 0x08; + reg <<= 8; + if (test_bit(i * 4 + 2, bitmask)) + reg += 0x08; + reg <<= 8; + if (test_bit(i * 4 + 3, bitmask)) + reg += 0x08; + + spider_net_write_reg(card, SPIDER_NET_GMRMHFILnR + i * 4, reg); + } +} + +/** + * spider_net_disable_rxdmac - disables the receive DMA controller + * @card: card structure + * + * spider_net_disable_rxdmac terminates processing on the DMA controller by + * turing off DMA and issueing a force end + */ +static void +spider_net_disable_rxdmac(struct spider_net_card *card) +{ + spider_net_write_reg(card, SPIDER_NET_GDADMACCNTR, + SPIDER_NET_DMA_RX_FEND_VALUE); +} + +/** + * spider_net_stop - called upon ifconfig down + * @netdev: interface device structure + * + * always returns 0 + */ +int +spider_net_stop(struct net_device *netdev) +{ + struct spider_net_card *card = netdev_priv(netdev); + + netif_poll_disable(netdev); + netif_carrier_off(netdev); + netif_stop_queue(netdev); + + /* disable/mask all interrupts */ + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, 0); + spider_net_write_reg(card, SPIDER_NET_GHIINT1MSK, 0); + spider_net_write_reg(card, SPIDER_NET_GHIINT2MSK, 0); + + spider_net_write_reg(card, SPIDER_NET_GDTDMACCNTR, + SPIDER_NET_DMA_TX_FEND_VALUE); + + /* turn off DMA, force end */ + spider_net_disable_rxdmac(card); + + /* release chains */ + spider_net_release_tx_chain(card, 1); + + /* switch off card */ + spider_net_write_reg(card, SPIDER_NET_CKRCTRL, + SPIDER_NET_CKRCTRL_STOP_VALUE); + + spider_net_free_chain(card, &card->tx_chain); + spider_net_free_chain(card, &card->rx_chain); + + return 0; +} + +/** + * spider_net_get_next_tx_descr - returns the next available tx descriptor + * @card: device structure to get descriptor from + * + * returns the address of the next descriptor, or NULL if not available. + */ +static struct spider_net_descr * +spider_net_get_next_tx_descr(struct spider_net_card *card) +{ + /* check, if head points to not-in-use descr */ + if ( spider_net_get_descr_status(card->tx_chain.head) == + SPIDER_NET_DESCR_NOT_IN_USE ) { + return card->tx_chain.head; + } else { + return NULL; + } +} + +/** + * spider_net_set_txdescr_cmdstat - sets the tx descriptor command field + * @descr: descriptor structure to fill out + * @skb: packet to consider + * + * fills out the command and status field of the descriptor structure, + * depending on hardware checksum settings. This function assumes a wmb() + * has executed before. + */ +static void +spider_net_set_txdescr_cmdstat(struct spider_net_descr *descr, + struct sk_buff *skb) +{ + if (skb->ip_summed != CHECKSUM_HW) { + descr->dmac_cmd_status = SPIDER_NET_DMAC_CMDSTAT_NOCS; + return; + } + + /* is packet ip? + * if yes: tcp? udp? */ + if (skb->protocol == htons(ETH_P_IP)) { + if (skb->nh.iph->protocol == IPPROTO_TCP) { + descr->dmac_cmd_status = SPIDER_NET_DMAC_CMDSTAT_TCPCS; + } else if (skb->nh.iph->protocol == IPPROTO_UDP) { + descr->dmac_cmd_status = SPIDER_NET_DMAC_CMDSTAT_UDPCS; + } else { /* the stack should checksum non-tcp and non-udp + packets on his own: NETIF_F_IP_CSUM */ + descr->dmac_cmd_status = SPIDER_NET_DMAC_CMDSTAT_NOCS; + } + } +} + +/** + * spider_net_prepare_tx_descr - fill tx descriptor with skb data + * @card: card structure + * @descr: descriptor structure to fill out + * @skb: packet to use + * + * returns 0 on success, <0 on failure. + * + * fills out the descriptor structure with skb data and len. Copies data, + * if needed (32bit DMA!) + */ +static int +spider_net_prepare_tx_descr(struct spider_net_card *card, + struct spider_net_descr *descr, + struct sk_buff *skb) +{ + descr->buf_addr = pci_map_single(card->pdev, skb->data, + skb->len, PCI_DMA_BIDIRECTIONAL); + if (descr->buf_addr == DMA_ERROR_CODE) { + if (netif_msg_tx_err(card)) + pr_err("could not iommu-map packet (%p, %i). " + "Dropping packet\n", skb->data, skb->len); + return -ENOMEM; + } + + descr->buf_size = skb->len; + descr->skb = skb; + descr->data_status = 0; + + /* make sure the above values are in memory before we change the + * status */ + wmb(); + + spider_net_set_txdescr_cmdstat(descr,skb); + + return 0; +} + +/** + * spider_net_kick_tx_dma - enables TX DMA processing + * @card: card structure + * @descr: descriptor address to enable TX processing at + * + * spider_net_kick_tx_dma writes the current tx chain head as start address + * of the tx descriptor chain and enables the transmission DMA engine + */ +static void +spider_net_kick_tx_dma(struct spider_net_card *card, + struct spider_net_descr *descr) +{ + /* this is the only descriptor in the output chain. + * Enable TX DMA */ + + spider_net_write_reg(card, SPIDER_NET_GDTDCHA, + descr->bus_addr); + + spider_net_write_reg(card, SPIDER_NET_GDTDMACCNTR, + SPIDER_NET_DMA_TX_VALUE); +} + +/** + * spider_net_xmit - transmits a frame over the device + * @skb: packet to send out + * @netdev: interface device structure + * + * returns 0 on success, <0 on failure + */ +static int +spider_net_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct spider_net_card *card = netdev_priv(netdev); + struct spider_net_descr *descr; + int result; + + descr = spider_net_get_next_tx_descr(card); + + if (!descr) { + netif_stop_queue(netdev); + + descr = spider_net_get_next_tx_descr(card); + if (!descr) + goto error; + else + netif_start_queue(netdev); + } + + result = spider_net_prepare_tx_descr(card, descr, skb); + if (result) + goto error; + + card->tx_chain.head = card->tx_chain.head->next; + + /* make sure the status from spider_net_prepare_tx_descr is in + * memory before we check out the previous descriptor */ + wmb(); + + if (spider_net_get_descr_status(descr->prev) != + SPIDER_NET_DESCR_CARDOWNED) + spider_net_kick_tx_dma(card, descr); + + return NETDEV_TX_OK; + +error: + card->netdev_stats.tx_dropped++; + return NETDEV_TX_LOCKED; +} + +/** + * spider_net_do_ioctl - called for device ioctls + * @netdev: interface device structure + * @ifr: request parameter structure for ioctl + * @cmd: command code for ioctl + * + * returns 0 on success, <0 on failure. Currently, we have no special ioctls. + * -EOPNOTSUPP is returned, if an unknown ioctl was requested + */ +static int +spider_net_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { + default: + return -EOPNOTSUPP; + } +} + +/** + * spider_net_pass_skb_up - takes an skb from a descriptor and passes it on + * @descr: descriptor to process + * @card: card structure + * + * returns 1 on success, 0 if no packet was passed to the stack + * + * iommu-unmaps the skb, fills out skb structure and passes the data to the + * stack. The descriptor state is not changed. + */ +static int +spider_net_pass_skb_up(struct spider_net_descr *descr, + struct spider_net_card *card) +{ + struct sk_buff *skb; + struct net_device *netdev; + u32 data_status, data_error; + + data_status = descr->data_status; + data_error = descr->data_error; + + netdev = card->netdev; + + /* check for errors in the data_error flag */ + if ((data_error & SPIDER_NET_DATA_ERROR_MASK) && + netif_msg_rx_err(card)) + pr_err("error in received descriptor found, " + "data_status=x%08x, data_error=x%08x\n", + data_status, data_error); + + /* prepare skb, unmap descriptor */ + skb = descr->skb; + pci_unmap_single(card->pdev, descr->buf_addr, SPIDER_NET_MAX_MTU, + PCI_DMA_BIDIRECTIONAL); + + /* the cases we'll throw away the packet immediately */ + if (data_error & SPIDER_NET_DESTROY_RX_FLAGS) + return 0; + + skb->dev = netdev; + skb_put(skb, descr->valid_size); + + /* the card seems to add 2 bytes of junk in front + * of the ethernet frame */ +#define SPIDER_MISALIGN 2 + skb_pull(skb, SPIDER_MISALIGN); + skb->protocol = eth_type_trans(skb, netdev); + + /* checksum offload */ + if (card->options.rx_csum) { + if ( (data_status & SPIDER_NET_DATA_STATUS_CHK_MASK) && + (!(data_error & SPIDER_NET_DATA_ERROR_CHK_MASK)) ) + skb->ip_summed = CHECKSUM_UNNECESSARY; + else + skb->ip_summed = CHECKSUM_NONE; + } else { + skb->ip_summed = CHECKSUM_NONE; + } + + if (data_status & SPIDER_NET_VLAN_PACKET) { + /* further enhancements: HW-accel VLAN + * vlan_hwaccel_receive_skb + */ + } + + /* pass skb up to stack */ + netif_receive_skb(skb); + + /* update netdevice statistics */ + card->netdev_stats.rx_packets++; + card->netdev_stats.rx_bytes += skb->len; + + return 1; +} + +/** + * spider_net_decode_descr - processes an rx descriptor + * @card: card structure + * + * returns 1 if a packet has been sent to the stack, otherwise 0 + * + * processes an rx descriptor by iommu-unmapping the data buffer and passing + * the packet up to the stack + */ +static int +spider_net_decode_one_descr(struct spider_net_card *card) +{ + enum spider_net_descr_status status; + struct spider_net_descr *descr; + struct spider_net_descr_chain *chain; + int result; + + chain = &card->rx_chain; + descr = chain->tail; + + status = spider_net_get_descr_status(descr); + + if (status == SPIDER_NET_DESCR_CARDOWNED) { + /* nothing in the descriptor yet */ + return 0; + } + + if (status == SPIDER_NET_DESCR_NOT_IN_USE) { + /* not initialized yet, I bet chain->tail == chain->head + * and the ring is empty */ + spider_net_refill_rx_chain(card); + return 0; + } + + /* descriptor definitively used -- move on head */ + chain->tail = descr->next; + + result = 0; + if ( (status == SPIDER_NET_DESCR_RESPONSE_ERROR) || + (status == SPIDER_NET_DESCR_PROTECTION_ERROR) || + (status == SPIDER_NET_DESCR_FORCE_END) ) { + if (netif_msg_rx_err(card)) + pr_err("%s: dropping RX descriptor with state %d\n", + card->netdev->name, status); + card->netdev_stats.rx_dropped++; + goto refill; + } + + if ( (status != SPIDER_NET_DESCR_COMPLETE) && + (status != SPIDER_NET_DESCR_FRAME_END) ) { + if (netif_msg_rx_err(card)) + pr_err("%s: RX descriptor with state %d\n", + card->netdev->name, status); + goto refill; + } + + /* ok, we've got a packet in descr */ + result = spider_net_pass_skb_up(descr, card); +refill: + spider_net_set_descr_status(descr, SPIDER_NET_DESCR_NOT_IN_USE); + /* change the descriptor state: */ + spider_net_refill_rx_chain(card); + + return result; +} + +/** + * spider_net_poll - NAPI poll function called by the stack to return packets + * @netdev: interface device structure + * @budget: number of packets we can pass to the stack at most + * + * returns 0 if no more packets available to the driver/stack. Returns 1, + * if the quota is exceeded, but the driver has still packets. + * + * spider_net_poll returns all packets from the rx descriptors to the stack + * (using netif_receive_skb). If all/enough packets are up, the driver + * reenables interrupts and returns 0. If not, 1 is returned. + */ +static int +spider_net_poll(struct net_device *netdev, int *budget) +{ + struct spider_net_card *card = netdev_priv(netdev); + int packets_to_do, packets_done = 0; + int no_more_packets = 0; + + packets_to_do = min(*budget, netdev->quota); + + while (packets_to_do) { + if (spider_net_decode_one_descr(card)) { + packets_done++; + packets_to_do--; + } else { + /* no more packets for the stack */ + no_more_packets = 1; + break; + } + } + + netdev->quota -= packets_done; + *budget -= packets_done; + + /* if all packets are in the stack, enable interrupts and return 0 */ + /* if not, return 1 */ + if (no_more_packets) { + netif_rx_complete(netdev); + spider_net_rx_irq_on(card); + return 0; + } + + return 1; +} + +/** + * spider_net_vlan_rx_reg - initializes VLAN structures in the driver and card + * @netdev: interface device structure + * @grp: vlan_group structure that is registered (NULL on destroying interface) + */ +static void +spider_net_vlan_rx_reg(struct net_device *netdev, struct vlan_group *grp) +{ + /* further enhancement... yet to do */ + return; +} + +/** + * spider_net_vlan_rx_add - adds VLAN id to the card filter + * @netdev: interface device structure + * @vid: VLAN id to add + */ +static void +spider_net_vlan_rx_add(struct net_device *netdev, uint16_t vid) +{ + /* further enhancement... yet to do */ + /* add vid to card's VLAN filter table */ + return; +} + +/** + * spider_net_vlan_rx_kill - removes VLAN id to the card filter + * @netdev: interface device structure + * @vid: VLAN id to remove + */ +static void +spider_net_vlan_rx_kill(struct net_device *netdev, uint16_t vid) +{ + /* further enhancement... yet to do */ + /* remove vid from card's VLAN filter table */ +} + +/** + * spider_net_get_stats - get interface statistics + * @netdev: interface device structure + * + * returns the interface statistics residing in the spider_net_card struct + */ +static struct net_device_stats * +spider_net_get_stats(struct net_device *netdev) +{ + struct spider_net_card *card = netdev_priv(netdev); + struct net_device_stats *stats = &card->netdev_stats; + return stats; +} + +/** + * spider_net_change_mtu - changes the MTU of an interface + * @netdev: interface device structure + * @new_mtu: new MTU value + * + * returns 0 on success, <0 on failure + */ +static int +spider_net_change_mtu(struct net_device *netdev, int new_mtu) +{ + /* no need to re-alloc skbs or so -- the max mtu is about 2.3k + * and mtu is outbound only anyway */ + if ( (new_mtu < SPIDER_NET_MIN_MTU ) || + (new_mtu > SPIDER_NET_MAX_MTU) ) + return -EINVAL; + netdev->mtu = new_mtu; + return 0; +} + +/** + * spider_net_set_mac - sets the MAC of an interface + * @netdev: interface device structure + * @ptr: pointer to new MAC address + * + * Returns 0 on success, <0 on failure. Currently, we don't support this + * and will always return EOPNOTSUPP. + */ +static int +spider_net_set_mac(struct net_device *netdev, void *p) +{ + struct spider_net_card *card = netdev_priv(netdev); + u32 macl, macu; + struct sockaddr *addr = p; + + /* GMACTPE and GMACRPE must be off, so we only allow this, if + * the device is down */ + if (netdev->flags & IFF_UP) + return -EBUSY; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + macu = (addr->sa_data[0]<<24) + (addr->sa_data[1]<<16) + + (addr->sa_data[2]<<8) + (addr->sa_data[3]); + macl = (addr->sa_data[4]<<8) + (addr->sa_data[5]); + spider_net_write_reg(card, SPIDER_NET_GMACUNIMACU, macu); + spider_net_write_reg(card, SPIDER_NET_GMACUNIMACL, macl); + + spider_net_set_promisc(card); + + /* look up, whether we have been successful */ + if (spider_net_get_mac_address(netdev)) + return -EADDRNOTAVAIL; + if (memcmp(netdev->dev_addr,addr->sa_data,netdev->addr_len)) + return -EADDRNOTAVAIL; + + return 0; +} + +/** + * spider_net_enable_txdmac - enables a TX DMA controller + * @card: card structure + * + * spider_net_enable_txdmac enables the TX DMA controller by setting the + * descriptor chain tail address + */ +static void +spider_net_enable_txdmac(struct spider_net_card *card) +{ + /* assume chain is aligned correctly */ + spider_net_write_reg(card, SPIDER_NET_GDTDCHA, + card->tx_chain.tail->bus_addr); +} + +/** + * spider_net_handle_error_irq - handles errors raised by an interrupt + * @card: card structure + * @status_reg: interrupt status register 0 (GHIINT0STS) + * + * spider_net_handle_error_irq treats or ignores all error conditions + * found when an interrupt is presented + */ +static void +spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg) +{ + u32 error_reg1, error_reg2; + u32 i; + int show_error = 1; + + error_reg1 = spider_net_read_reg(card, SPIDER_NET_GHIINT1STS); + error_reg2 = spider_net_read_reg(card, SPIDER_NET_GHIINT2STS); + + /* check GHIINT0STS ************************************/ + if (status_reg) + for (i = 0; i < 32; i++) + if (status_reg & (1<tx_chain.tail == card->tx_chain.head) + spider_net_kick_tx_dma(card); + show_error = 0; */ + break; + + /* case SPIDER_NET_G1TMCNTINT: not used. print a message */ + /* case SPIDER_NET_GFREECNTINT: not used. print a message */ + } + + /* check GHIINT1STS ************************************/ + if (error_reg1) + for (i = 0; i < 32; i++) + if (error_reg1 & (1<netdev); + spider_net_enable_rxchtails(card); + spider_net_enable_rxdmac(card); + break; + + /* case SPIDER_NET_GTMSHTINT: problem, print a message */ + case SPIDER_NET_GDTINVDINT: + /* allrighty. tx from previous descr ok */ + show_error = 0; + break; + /* case SPIDER_NET_GRFDFLLINT: print a message down there */ + /* case SPIDER_NET_GRFCFLLINT: print a message down there */ + /* case SPIDER_NET_GRFBFLLINT: print a message down there */ + /* case SPIDER_NET_GRFAFLLINT: print a message down there */ + + /* chain end */ + case SPIDER_NET_GDDDCEINT: /* fallthrough */ + case SPIDER_NET_GDCDCEINT: /* fallthrough */ + case SPIDER_NET_GDBDCEINT: /* fallthrough */ + case SPIDER_NET_GDADCEINT: + if (netif_msg_intr(card)) + pr_err("got descriptor chain end interrupt, " + "restarting DMAC %c.\n", + 'D'+i-SPIDER_NET_GDDDCEINT); + spider_net_refill_rx_chain(card); + show_error = 0; + break; + + /* invalid descriptor */ + case SPIDER_NET_GDDINVDINT: /* fallthrough */ + case SPIDER_NET_GDCINVDINT: /* fallthrough */ + case SPIDER_NET_GDBINVDINT: /* fallthrough */ + case SPIDER_NET_GDAINVDINT: + /* could happen when rx chain is full */ + spider_net_refill_rx_chain(card); + show_error = 0; + break; + + /* case SPIDER_NET_GDTRSERINT: problem, print a message */ + /* case SPIDER_NET_GDDRSERINT: problem, print a message */ + /* case SPIDER_NET_GDCRSERINT: problem, print a message */ + /* case SPIDER_NET_GDBRSERINT: problem, print a message */ + /* case SPIDER_NET_GDARSERINT: problem, print a message */ + /* case SPIDER_NET_GDSERINT: problem, print a message */ + /* case SPIDER_NET_GDTPTERINT: problem, print a message */ + /* case SPIDER_NET_GDDPTERINT: problem, print a message */ + /* case SPIDER_NET_GDCPTERINT: problem, print a message */ + /* case SPIDER_NET_GDBPTERINT: problem, print a message */ + /* case SPIDER_NET_GDAPTERINT: problem, print a message */ + default: + show_error = 1; + break; + } + + /* check GHIINT2STS ************************************/ + if (error_reg2) + for (i = 0; i < 32; i++) + if (error_reg2 & (1<irq); + spider_net_interrupt(netdev->irq, netdev, NULL); + enable_irq(netdev->irq); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +/** + * spider_net_init_card - initializes the card + * @card: card structure + * + * spider_net_init_card initializes the card so that other registers can + * be used + */ +static void +spider_net_init_card(struct spider_net_card *card) +{ + spider_net_write_reg(card, SPIDER_NET_CKRCTRL, + SPIDER_NET_CKRCTRL_STOP_VALUE); + + spider_net_write_reg(card, SPIDER_NET_CKRCTRL, + SPIDER_NET_CKRCTRL_RUN_VALUE); +} + +/** + * spider_net_enable_card - enables the card by setting all kinds of regs + * @card: card structure + * + * spider_net_enable_card sets a lot of SMMIO registers to enable the device + */ +static void +spider_net_enable_card(struct spider_net_card *card) +{ + int i; + /* the following array consists of (register),(value) pairs + * that are set in this function. A register of 0 ends the list */ + u32 regs[][2] = { + { SPIDER_NET_GRESUMINTNUM, 0 }, + { SPIDER_NET_GREINTNUM, 0 }, + + /* set interrupt frame number registers */ + /* clear the single DMA engine registers first */ + { SPIDER_NET_GFAFRMNUM, SPIDER_NET_GFXFRAMES_VALUE }, + { SPIDER_NET_GFBFRMNUM, SPIDER_NET_GFXFRAMES_VALUE }, + { SPIDER_NET_GFCFRMNUM, SPIDER_NET_GFXFRAMES_VALUE }, + { SPIDER_NET_GFDFRMNUM, SPIDER_NET_GFXFRAMES_VALUE }, + /* then set, what we really need */ + { SPIDER_NET_GFFRMNUM, SPIDER_NET_FRAMENUM_VALUE }, + + /* timer counter registers and stuff */ + { SPIDER_NET_GFREECNNUM, 0 }, + { SPIDER_NET_GONETIMENUM, 0 }, + { SPIDER_NET_GTOUTFRMNUM, 0 }, + + /* RX mode setting */ + { SPIDER_NET_GRXMDSET, SPIDER_NET_RXMODE_VALUE }, + /* TX mode setting */ + { SPIDER_NET_GTXMDSET, SPIDER_NET_TXMODE_VALUE }, + /* IPSEC mode setting */ + { SPIDER_NET_GIPSECINIT, SPIDER_NET_IPSECINIT_VALUE }, + + { SPIDER_NET_GFTRESTRT, SPIDER_NET_RESTART_VALUE }, + + { SPIDER_NET_GMRWOLCTRL, 0 }, + { SPIDER_NET_GTESTMD, 0 }, + + { SPIDER_NET_GMACINTEN, 0 }, + + /* flow control stuff */ + { SPIDER_NET_GMACAPAUSE, SPIDER_NET_MACAPAUSE_VALUE }, + { SPIDER_NET_GMACTXPAUSE, SPIDER_NET_TXPAUSE_VALUE }, + + { SPIDER_NET_GMACBSTLMT, SPIDER_NET_BURSTLMT_VALUE }, + { 0, 0} + }; + + i = 0; + while (regs[i][0]) { + spider_net_write_reg(card, regs[i][0], regs[i][1]); + i++; + } + + /* clear unicast filter table entries 1 to 14 */ + for (i = 1; i <= 14; i++) { + spider_net_write_reg(card, + SPIDER_NET_GMRUAFILnR + i * 8, + 0x00080000); + spider_net_write_reg(card, + SPIDER_NET_GMRUAFILnR + i * 8 + 4, + 0x00000000); + } + + spider_net_write_reg(card, SPIDER_NET_GMRUA0FIL15R, 0x08080000); + + spider_net_write_reg(card, SPIDER_NET_ECMODE, SPIDER_NET_ECMODE_VALUE); + + /* set chain tail adress for RX chains and + * enable DMA */ + spider_net_enable_rxchtails(card); + spider_net_enable_rxdmac(card); + + spider_net_write_reg(card, SPIDER_NET_GRXDMAEN, SPIDER_NET_WOL_VALUE); + + /* set chain tail adress for TX chain */ + spider_net_enable_txdmac(card); + + spider_net_write_reg(card, SPIDER_NET_GMACLENLMT, + SPIDER_NET_LENLMT_VALUE); + spider_net_write_reg(card, SPIDER_NET_GMACMODE, + SPIDER_NET_MACMODE_VALUE); + spider_net_write_reg(card, SPIDER_NET_GMACOPEMD, + SPIDER_NET_OPMODE_VALUE); + + /* set interrupt mask registers */ + spider_net_write_reg(card, SPIDER_NET_GHIINT0MSK, + SPIDER_NET_INT0_MASK_VALUE); + spider_net_write_reg(card, SPIDER_NET_GHIINT1MSK, + SPIDER_NET_INT1_MASK_VALUE); + spider_net_write_reg(card, SPIDER_NET_GHIINT2MSK, + SPIDER_NET_INT2_MASK_VALUE); +} + +/** + * spider_net_open - called upon ifonfig up + * @netdev: interface device structure + * + * returns 0 on success, <0 on failure + * + * spider_net_open allocates all the descriptors and memory needed for + * operation, sets up multicast list and enables interrupts + */ +int +spider_net_open(struct net_device *netdev) +{ + struct spider_net_card *card = netdev_priv(netdev); + int result; + + result = -ENOMEM; + if (spider_net_init_chain(card, &card->tx_chain, + card->descr, tx_descriptors)) + goto alloc_tx_failed; + if (spider_net_init_chain(card, &card->rx_chain, + card->descr + tx_descriptors, rx_descriptors)) + goto alloc_rx_failed; + + /* allocate rx skbs */ + if (spider_net_alloc_rx_skbs(card)) + goto alloc_skbs_failed; + + spider_net_set_multi(netdev); + + /* further enhancement: setup hw vlan, if needed */ + + result = -EBUSY; + if (request_irq(netdev->irq, spider_net_interrupt, + SA_SHIRQ, netdev->name, netdev)) + goto register_int_failed; + + spider_net_enable_card(card); + + return 0; + +register_int_failed: + spider_net_free_rx_chain_contents(card); +alloc_skbs_failed: + spider_net_free_chain(card, &card->rx_chain); +alloc_rx_failed: + spider_net_free_chain(card, &card->tx_chain); +alloc_tx_failed: + return result; +} + +/** + * spider_net_setup_phy - setup PHY + * @card: card structure + * + * returns 0 on success, <0 on failure + * + * spider_net_setup_phy is used as part of spider_net_probe. Sets + * the PHY to 1000 Mbps + **/ +static int +spider_net_setup_phy(struct spider_net_card *card) +{ + struct mii_phy *phy = &card->phy; + + spider_net_write_reg(card, SPIDER_NET_GDTDMASEL, + SPIDER_NET_DMASEL_VALUE); + spider_net_write_reg(card, SPIDER_NET_GPCCTRL, + SPIDER_NET_PHY_CTRL_VALUE); + phy->mii_id = 1; + phy->dev = card->netdev; + phy->mdio_read = spider_net_read_phy; + phy->mdio_write = spider_net_write_phy; + + mii_phy_probe(phy, phy->mii_id); + + if (phy->def->ops->setup_forced) + phy->def->ops->setup_forced(phy, SPEED_1000, DUPLEX_FULL); + + /* the following two writes could be moved to sungem_phy.c */ + /* enable fiber mode */ + spider_net_write_phy(card->netdev, 1, MII_NCONFIG, 0x9020); + /* LEDs active in both modes, autosense prio = fiber */ + spider_net_write_phy(card->netdev, 1, MII_NCONFIG, 0x945f); + + phy->def->ops->read_link(phy); + pr_info("Found %s with %i Mbps, %s-duplex.\n", phy->def->name, + phy->speed, phy->duplex==1 ? "Full" : "Half"); + + return 0; +} + +/** + * spider_net_download_firmware - loads firmware into the adapter + * @card: card structure + * @firmware: firmware pointer + * + * spider_net_download_firmware loads the firmware opened by + * spider_net_init_firmware into the adapter. + */ +static void +spider_net_download_firmware(struct spider_net_card *card, + const struct firmware *firmware) +{ + int sequencer, i; + u32 *fw_ptr = (u32 *)firmware->data; + + /* stop sequencers */ + spider_net_write_reg(card, SPIDER_NET_GSINIT, + SPIDER_NET_STOP_SEQ_VALUE); + + for (sequencer = 0; sequencer < 6; sequencer++) { + spider_net_write_reg(card, + SPIDER_NET_GSnPRGADR + sequencer * 8, 0); + for (i = 0; i < SPIDER_NET_FIRMWARE_LEN; i++) { + spider_net_write_reg(card, SPIDER_NET_GSnPRGDAT + + sequencer * 8, *fw_ptr); + fw_ptr++; + } + } + + spider_net_write_reg(card, SPIDER_NET_GSINIT, + SPIDER_NET_RUN_SEQ_VALUE); +} + +/** + * spider_net_init_firmware - reads in firmware parts + * @card: card structure + * + * Returns 0 on success, <0 on failure + * + * spider_net_init_firmware opens the sequencer firmware and does some basic + * checks. This function opens and releases the firmware structure. A call + * to download the firmware is performed before the release. + * + * Firmware format + * =============== + * spider_fw.bin is expected to be a file containing 6*1024*4 bytes, 4k being + * the program for each sequencer. Use the command + * tail -q -n +2 Seq_code1_0x088.txt Seq_code2_0x090.txt \ + * Seq_code3_0x098.txt Seq_code4_0x0A0.txt Seq_code5_0x0A8.txt \ + * Seq_code6_0x0B0.txt | xxd -r -p -c4 > spider_fw.bin + * + * to generate spider_fw.bin, if you have sequencer programs with something + * like the following contents for each sequencer: + * + * + * + * ... + * <1024th 4-BYTES-WORD FOR SEQUENCER> + */ +static int +spider_net_init_firmware(struct spider_net_card *card) +{ + const struct firmware *firmware; + int err = -EIO; + + if (request_firmware(&firmware, + SPIDER_NET_FIRMWARE_NAME, &card->pdev->dev) < 0) { + if (netif_msg_probe(card)) + pr_err("Couldn't read in sequencer data file %s.\n", + SPIDER_NET_FIRMWARE_NAME); + firmware = NULL; + goto out; + } + + if (firmware->size != 6 * SPIDER_NET_FIRMWARE_LEN * sizeof(u32)) { + if (netif_msg_probe(card)) + pr_err("Invalid size of sequencer data file %s.\n", + SPIDER_NET_FIRMWARE_NAME); + goto out; + } + + spider_net_download_firmware(card, firmware); + + err = 0; +out: + release_firmware(firmware); + + return err; +} + +/** + * spider_net_workaround_rxramfull - work around firmware bug + * @card: card structure + * + * no return value + **/ +static void +spider_net_workaround_rxramfull(struct spider_net_card *card) +{ + int i, sequencer = 0; + + /* cancel reset */ + spider_net_write_reg(card, SPIDER_NET_CKRCTRL, + SPIDER_NET_CKRCTRL_RUN_VALUE); + + /* empty sequencer data */ + for (sequencer = 0; sequencer < 6; sequencer++) { + spider_net_write_reg(card, SPIDER_NET_GSnPRGDAT + + sequencer * 8, 0x0); + for (i = 0; i < SPIDER_NET_FIRMWARE_LEN; i++) { + spider_net_write_reg(card, SPIDER_NET_GSnPRGDAT + + sequencer * 8, 0x0); + } + } + + /* set sequencer operation */ + spider_net_write_reg(card, SPIDER_NET_GSINIT, 0x000000fe); + + /* reset */ + spider_net_write_reg(card, SPIDER_NET_CKRCTRL, + SPIDER_NET_CKRCTRL_STOP_VALUE); +} + +/** + * spider_net_tx_timeout_task - task scheduled by the watchdog timeout + * function (to be called not under interrupt status) + * @data: data, is interface device structure + * + * called as task when tx hangs, resets interface (if interface is up) + */ +static void +spider_net_tx_timeout_task(void *data) +{ + struct net_device *netdev = data; + struct spider_net_card *card = netdev_priv(netdev); + + if (!(netdev->flags & IFF_UP)) + goto out; + + netif_device_detach(netdev); + spider_net_stop(netdev); + + spider_net_workaround_rxramfull(card); + spider_net_init_card(card); + + if (spider_net_setup_phy(card)) + goto out; + if (spider_net_init_firmware(card)) + goto out; + + spider_net_open(netdev); + spider_net_kick_tx_dma(card, card->tx_chain.head); + netif_device_attach(netdev); + +out: + atomic_dec(&card->tx_timeout_task_counter); +} + +/** + * spider_net_tx_timeout - called when the tx timeout watchdog kicks in. + * @netdev: interface device structure + * + * called, if tx hangs. Schedules a task that resets the interface + */ +static void +spider_net_tx_timeout(struct net_device *netdev) +{ + struct spider_net_card *card; + + card = netdev_priv(netdev); + atomic_inc(&card->tx_timeout_task_counter); + if (netdev->flags & IFF_UP) + schedule_work(&card->tx_timeout_task); + else + atomic_dec(&card->tx_timeout_task_counter); +} + +/** + * spider_net_setup_netdev_ops - initialization of net_device operations + * @netdev: net_device structure + * + * fills out function pointers in the net_device structure + */ +static void +spider_net_setup_netdev_ops(struct net_device *netdev) +{ + netdev->open = &spider_net_open; + netdev->stop = &spider_net_stop; + netdev->hard_start_xmit = &spider_net_xmit; + netdev->get_stats = &spider_net_get_stats; + netdev->set_multicast_list = &spider_net_set_multi; + netdev->set_mac_address = &spider_net_set_mac; + netdev->change_mtu = &spider_net_change_mtu; + netdev->do_ioctl = &spider_net_do_ioctl; + /* tx watchdog */ + netdev->tx_timeout = &spider_net_tx_timeout; + netdev->watchdog_timeo = SPIDER_NET_WATCHDOG_TIMEOUT; + /* NAPI */ + netdev->poll = &spider_net_poll; + netdev->weight = SPIDER_NET_NAPI_WEIGHT; + /* HW VLAN */ + netdev->vlan_rx_register = &spider_net_vlan_rx_reg; + netdev->vlan_rx_add_vid = &spider_net_vlan_rx_add; + netdev->vlan_rx_kill_vid = &spider_net_vlan_rx_kill; +#ifdef CONFIG_NET_POLL_CONTROLLER + /* poll controller */ + netdev->poll_controller = &spider_net_poll_controller; +#endif /* CONFIG_NET_POLL_CONTROLLER */ + /* ethtool ops */ + netdev->ethtool_ops = &spider_net_ethtool_ops; +} + +/** + * spider_net_setup_netdev - initialization of net_device + * @card: card structure + * + * Returns 0 on success or <0 on failure + * + * spider_net_setup_netdev initializes the net_device structure + **/ +static int +spider_net_setup_netdev(struct spider_net_card *card) +{ + int result; + struct net_device *netdev = card->netdev; + struct device_node *dn; + struct sockaddr addr; + u8 *mac; + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &card->pdev->dev); + + pci_set_drvdata(card->pdev, netdev); + spin_lock_init(&card->intmask_lock); + netdev->irq = card->pdev->irq; + + card->options.rx_csum = SPIDER_NET_RX_CSUM_DEFAULT; + + spider_net_setup_netdev_ops(netdev); + + netdev->features = 0; + /* some time: NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX | + * NETIF_F_HW_VLAN_FILTER */ + + netdev->irq = card->pdev->irq; + + dn = pci_device_to_OF_node(card->pdev); + mac = (u8 *)get_property(dn, "local-mac-address", NULL); + memcpy(addr.sa_data, mac, ETH_ALEN); + + result = spider_net_set_mac(netdev, &addr); + if ((result) && (netif_msg_probe(card))) + pr_err("Failed to set MAC address: %i\n", result); + + result = register_netdev(netdev); + if (result) { + if (netif_msg_probe(card)) + pr_err("Couldn't register net_device: %i\n", + result); + return result; + } + + if (netif_msg_probe(card)) + pr_info("Initialized device %s.\n", netdev->name); + + return 0; +} + +/** + * spider_net_alloc_card - allocates net_device and card structure + * + * returns the card structure or NULL in case of errors + * + * the card and net_device structures are linked to each other + */ +static struct spider_net_card * +spider_net_alloc_card(void) +{ + struct net_device *netdev; + struct spider_net_card *card; + size_t alloc_size; + + alloc_size = sizeof (*card) + + sizeof (struct spider_net_descr) * rx_descriptors + + sizeof (struct spider_net_descr) * tx_descriptors; + netdev = alloc_etherdev(alloc_size); + if (!netdev) + return NULL; + + card = netdev_priv(netdev); + card->netdev = netdev; + card->msg_enable = SPIDER_NET_DEFAULT_MSG; + INIT_WORK(&card->tx_timeout_task, spider_net_tx_timeout_task, netdev); + init_waitqueue_head(&card->waitq); + atomic_set(&card->tx_timeout_task_counter, 0); + + return card; +} + +/** + * spider_net_undo_pci_setup - releases PCI ressources + * @card: card structure + * + * spider_net_undo_pci_setup releases the mapped regions + */ +static void +spider_net_undo_pci_setup(struct spider_net_card *card) +{ + iounmap(card->regs); + pci_release_regions(card->pdev); +} + +/** + * spider_net_setup_pci_dev - sets up the device in terms of PCI operations + * @card: card structure + * @pdev: PCI device + * + * Returns the card structure or NULL if any errors occur + * + * spider_net_setup_pci_dev initializes pdev and together with the + * functions called in spider_net_open configures the device so that + * data can be transferred over it + * The net_device structure is attached to the card structure, if the + * function returns without error. + **/ +static struct spider_net_card * +spider_net_setup_pci_dev(struct pci_dev *pdev) +{ + struct spider_net_card *card; + unsigned long mmio_start, mmio_len; + + if (pci_enable_device(pdev)) { + pr_err("Couldn't enable PCI device\n"); + return NULL; + } + + if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) { + pr_err("Couldn't find proper PCI device base address.\n"); + goto out_disable_dev; + } + + if (pci_request_regions(pdev, spider_net_driver_name)) { + pr_err("Couldn't obtain PCI resources, aborting.\n"); + goto out_disable_dev; + } + + pci_set_master(pdev); + + card = spider_net_alloc_card(); + if (!card) { + pr_err("Couldn't allocate net_device structure, " + "aborting.\n"); + goto out_release_regions; + } + card->pdev = pdev; + + /* fetch base address and length of first resource */ + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + + card->netdev->mem_start = mmio_start; + card->netdev->mem_end = mmio_start + mmio_len; + card->regs = ioremap(mmio_start, mmio_len); + + if (!card->regs) { + pr_err("Couldn't obtain PCI resources, aborting.\n"); + goto out_release_regions; + } + + return card; + +out_release_regions: + pci_release_regions(pdev); +out_disable_dev: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return NULL; +} + +/** + * spider_net_probe - initialization of a device + * @pdev: PCI device + * @ent: entry in the device id list + * + * Returns 0 on success, <0 on failure + * + * spider_net_probe initializes pdev and registers a net_device + * structure for it. After that, the device can be ifconfig'ed up + **/ +static int __devinit +spider_net_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err = -EIO; + struct spider_net_card *card; + + card = spider_net_setup_pci_dev(pdev); + if (!card) + goto out; + + spider_net_workaround_rxramfull(card); + spider_net_init_card(card); + + err = spider_net_setup_phy(card); + if (err) + goto out_undo_pci; + + err = spider_net_init_firmware(card); + if (err) + goto out_undo_pci; + + err = spider_net_setup_netdev(card); + if (err) + goto out_undo_pci; + + return 0; + +out_undo_pci: + spider_net_undo_pci_setup(card); + free_netdev(card->netdev); +out: + return err; +} + +/** + * spider_net_remove - removal of a device + * @pdev: PCI device + * + * Returns 0 on success, <0 on failure + * + * spider_net_remove is called to remove the device and unregisters the + * net_device + **/ +static void __devexit +spider_net_remove(struct pci_dev *pdev) +{ + struct net_device *netdev; + struct spider_net_card *card; + + netdev = pci_get_drvdata(pdev); + card = netdev_priv(netdev); + + wait_event(card->waitq, + atomic_read(&card->tx_timeout_task_counter) == 0); + + unregister_netdev(netdev); + spider_net_undo_pci_setup(card); + free_netdev(netdev); + + free_irq(to_pci_dev(netdev->class_dev.dev)->irq, netdev); +} + +static struct pci_driver spider_net_driver = { + .owner = THIS_MODULE, + .name = spider_net_driver_name, + .id_table = spider_net_pci_tbl, + .probe = spider_net_probe, + .remove = __devexit_p(spider_net_remove) +}; + +/** + * spider_net_init - init function when the driver is loaded + * + * spider_net_init registers the device driver + */ +static int __init spider_net_init(void) +{ + if (rx_descriptors < SPIDER_NET_RX_DESCRIPTORS_MIN) { + rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_MIN; + pr_info("adjusting rx descriptors to %i.\n", rx_descriptors); + } + if (rx_descriptors > SPIDER_NET_RX_DESCRIPTORS_MAX) { + rx_descriptors = SPIDER_NET_RX_DESCRIPTORS_MAX; + pr_info("adjusting rx descriptors to %i.\n", rx_descriptors); + } + if (tx_descriptors < SPIDER_NET_TX_DESCRIPTORS_MIN) { + tx_descriptors = SPIDER_NET_TX_DESCRIPTORS_MIN; + pr_info("adjusting tx descriptors to %i.\n", tx_descriptors); + } + if (tx_descriptors > SPIDER_NET_TX_DESCRIPTORS_MAX) { + tx_descriptors = SPIDER_NET_TX_DESCRIPTORS_MAX; + pr_info("adjusting tx descriptors to %i.\n", tx_descriptors); + } + + return pci_register_driver(&spider_net_driver); +} + +/** + * spider_net_cleanup - exit function when driver is unloaded + * + * spider_net_cleanup unregisters the device driver + */ +static void __exit spider_net_cleanup(void) +{ + pci_unregister_driver(&spider_net_driver); +} + +module_init(spider_net_init); +module_exit(spider_net_cleanup); diff --git a/drivers/net/spider_net.h b/drivers/net/spider_net.h new file mode 100644 index 000000000000..22b2f2347351 --- /dev/null +++ b/drivers/net/spider_net.h @@ -0,0 +1,469 @@ +/* + * Network device driver for Cell Processor-Based Blade + * + * (C) Copyright IBM Corp. 2005 + * + * Authors : Utz Bacher + * Jens Osterkamp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#ifndef _SPIDER_NET_H +#define _SPIDER_NET_H + +#include "sungem_phy.h" + +extern int spider_net_stop(struct net_device *netdev); +extern int spider_net_open(struct net_device *netdev); + +extern struct ethtool_ops spider_net_ethtool_ops; + +extern char spider_net_driver_name[]; + +#define SPIDER_NET_MAX_MTU 2308 +#define SPIDER_NET_MIN_MTU 64 + +#define SPIDER_NET_RXBUF_ALIGN 128 + +#define SPIDER_NET_RX_DESCRIPTORS_DEFAULT 64 +#define SPIDER_NET_RX_DESCRIPTORS_MIN 16 +#define SPIDER_NET_RX_DESCRIPTORS_MAX 256 + +#define SPIDER_NET_TX_DESCRIPTORS_DEFAULT 64 +#define SPIDER_NET_TX_DESCRIPTORS_MIN 16 +#define SPIDER_NET_TX_DESCRIPTORS_MAX 256 + +#define SPIDER_NET_RX_CSUM_DEFAULT 1 + +#define SPIDER_NET_WATCHDOG_TIMEOUT 5*HZ +#define SPIDER_NET_NAPI_WEIGHT 64 + +#define SPIDER_NET_FIRMWARE_LEN 1024 +#define SPIDER_NET_FIRMWARE_NAME "spider_fw.bin" + +/** spider_net SMMIO registers */ +#define SPIDER_NET_GHIINT0STS 0x00000000 +#define SPIDER_NET_GHIINT1STS 0x00000004 +#define SPIDER_NET_GHIINT2STS 0x00000008 +#define SPIDER_NET_GHIINT0MSK 0x00000010 +#define SPIDER_NET_GHIINT1MSK 0x00000014 +#define SPIDER_NET_GHIINT2MSK 0x00000018 + +#define SPIDER_NET_GRESUMINTNUM 0x00000020 +#define SPIDER_NET_GREINTNUM 0x00000024 + +#define SPIDER_NET_GFFRMNUM 0x00000028 +#define SPIDER_NET_GFAFRMNUM 0x0000002c +#define SPIDER_NET_GFBFRMNUM 0x00000030 +#define SPIDER_NET_GFCFRMNUM 0x00000034 +#define SPIDER_NET_GFDFRMNUM 0x00000038 + +/* clear them (don't use it) */ +#define SPIDER_NET_GFREECNNUM 0x0000003c +#define SPIDER_NET_GONETIMENUM 0x00000040 + +#define SPIDER_NET_GTOUTFRMNUM 0x00000044 + +#define SPIDER_NET_GTXMDSET 0x00000050 +#define SPIDER_NET_GPCCTRL 0x00000054 +#define SPIDER_NET_GRXMDSET 0x00000058 +#define SPIDER_NET_GIPSECINIT 0x0000005c +#define SPIDER_NET_GFTRESTRT 0x00000060 +#define SPIDER_NET_GRXDMAEN 0x00000064 +#define SPIDER_NET_GMRWOLCTRL 0x00000068 +#define SPIDER_NET_GPCWOPCMD 0x0000006c +#define SPIDER_NET_GPCROPCMD 0x00000070 +#define SPIDER_NET_GTTFRMCNT 0x00000078 +#define SPIDER_NET_GTESTMD 0x0000007c + +#define SPIDER_NET_GSINIT 0x00000080 +#define SPIDER_NET_GSnPRGADR 0x00000084 +#define SPIDER_NET_GSnPRGDAT 0x00000088 + +#define SPIDER_NET_GMACOPEMD 0x00000100 +#define SPIDER_NET_GMACLENLMT 0x00000108 +#define SPIDER_NET_GMACINTEN 0x00000118 +#define SPIDER_NET_GMACPHYCTRL 0x00000120 + +#define SPIDER_NET_GMACAPAUSE 0x00000154 +#define SPIDER_NET_GMACTXPAUSE 0x00000164 + +#define SPIDER_NET_GMACMODE 0x000001b0 +#define SPIDER_NET_GMACBSTLMT 0x000001b4 + +#define SPIDER_NET_GMACUNIMACU 0x000001c0 +#define SPIDER_NET_GMACUNIMACL 0x000001c8 + +#define SPIDER_NET_GMRMHFILnR 0x00000400 +#define SPIDER_NET_MULTICAST_HASHES 256 + +#define SPIDER_NET_GMRUAFILnR 0x00000500 +#define SPIDER_NET_GMRUA0FIL15R 0x00000578 + +/* RX DMA controller registers, all 0x00000a.. are for DMA controller A, + * 0x00000b.. for DMA controller B, etc. */ +#define SPIDER_NET_GDADCHA 0x00000a00 +#define SPIDER_NET_GDADMACCNTR 0x00000a04 +#define SPIDER_NET_GDACTDPA 0x00000a08 +#define SPIDER_NET_GDACTDCNT 0x00000a0c +#define SPIDER_NET_GDACDBADDR 0x00000a20 +#define SPIDER_NET_GDACDBSIZE 0x00000a24 +#define SPIDER_NET_GDACNEXTDA 0x00000a28 +#define SPIDER_NET_GDACCOMST 0x00000a2c +#define SPIDER_NET_GDAWBCOMST 0x00000a30 +#define SPIDER_NET_GDAWBRSIZE 0x00000a34 +#define SPIDER_NET_GDAWBVSIZE 0x00000a38 +#define SPIDER_NET_GDAWBTRST 0x00000a3c +#define SPIDER_NET_GDAWBTRERR 0x00000a40 + +/* TX DMA controller registers */ +#define SPIDER_NET_GDTDCHA 0x00000e00 +#define SPIDER_NET_GDTDMACCNTR 0x00000e04 +#define SPIDER_NET_GDTCDPA 0x00000e08 +#define SPIDER_NET_GDTDMASEL 0x00000e14 + +#define SPIDER_NET_ECMODE 0x00000f00 +/* clock and reset control register */ +#define SPIDER_NET_CKRCTRL 0x00000ff0 + +/** SCONFIG registers */ +#define SPIDER_NET_SCONFIG_IOACTE 0x00002810 + +/** hardcoded register values */ +#define SPIDER_NET_INT0_MASK_VALUE 0x3f7fe3ff +#define SPIDER_NET_INT1_MASK_VALUE 0xffffffff +/* no MAC aborts -> auto retransmission */ +#define SPIDER_NET_INT2_MASK_VALUE 0xfffffff1 + +/* clear counter when interrupt sources are cleared +#define SPIDER_NET_FRAMENUM_VALUE 0x0001f001 */ +/* we rely on flagged descriptor interrupts */ +#define SPIDER_NET_FRAMENUM_VALUE 0x00000000 +/* set this first, then the FRAMENUM_VALUE */ +#define SPIDER_NET_GFXFRAMES_VALUE 0x00000000 + +#define SPIDER_NET_STOP_SEQ_VALUE 0x00000000 +#define SPIDER_NET_RUN_SEQ_VALUE 0x0000007e + +#define SPIDER_NET_PHY_CTRL_VALUE 0x00040040 +/* #define SPIDER_NET_PHY_CTRL_VALUE 0x01070080*/ +#define SPIDER_NET_RXMODE_VALUE 0x00000011 +/* auto retransmission in case of MAC aborts */ +#define SPIDER_NET_TXMODE_VALUE 0x00010000 +#define SPIDER_NET_RESTART_VALUE 0x00000000 +#define SPIDER_NET_WOL_VALUE 0x00001111 +#if 0 +#define SPIDER_NET_WOL_VALUE 0x00000000 +#endif +#define SPIDER_NET_IPSECINIT_VALUE 0x00f000f8 + +/* pause frames: automatic, no upper retransmission count */ +/* outside loopback mode: ETOMOD signal dont matter, not connected */ +#define SPIDER_NET_OPMODE_VALUE 0x00000063 +/*#define SPIDER_NET_OPMODE_VALUE 0x001b0062*/ +#define SPIDER_NET_LENLMT_VALUE 0x00000908 + +#define SPIDER_NET_MACAPAUSE_VALUE 0x00000800 /* about 1 ms */ +#define SPIDER_NET_TXPAUSE_VALUE 0x00000000 + +#define SPIDER_NET_MACMODE_VALUE 0x00000001 +#define SPIDER_NET_BURSTLMT_VALUE 0x00000200 /* about 16 us */ + +/* 1(0) enable r/tx dma + * 0000000 fixed to 0 + * + * 000000 fixed to 0 + * 0(1) en/disable descr writeback on force end + * 0(1) force end + * + * 000000 fixed to 0 + * 00 burst alignment: 128 bytes + * + * 00000 fixed to 0 + * 0 descr writeback size 32 bytes + * 0(1) descr chain end interrupt enable + * 0(1) descr status writeback enable */ + +/* to set RX_DMA_EN */ +#define SPIDER_NET_DMA_RX_VALUE 0x80000000 +#define SPIDER_NET_DMA_RX_FEND_VALUE 0x00030003 +/* to set TX_DMA_EN */ +#define SPIDER_NET_DMA_TX_VALUE 0x80000000 +#define SPIDER_NET_DMA_TX_FEND_VALUE 0x00030003 + +/* SPIDER_NET_UA_DESCR_VALUE is OR'ed with the unicast address */ +#define SPIDER_NET_UA_DESCR_VALUE 0x00080000 +#define SPIDER_NET_PROMISC_VALUE 0x00080000 +#define SPIDER_NET_NONPROMISC_VALUE 0x00000000 + +#define SPIDER_NET_DMASEL_VALUE 0x00000001 + +#define SPIDER_NET_ECMODE_VALUE 0x00000000 + +#define SPIDER_NET_CKRCTRL_RUN_VALUE 0x1fff010f +#define SPIDER_NET_CKRCTRL_STOP_VALUE 0x0000010f + +#define SPIDER_NET_SBIMSTATE_VALUE 0x00000000 +#define SPIDER_NET_SBTMSTATE_VALUE 0x00000000 + +/* SPIDER_NET_GHIINT0STS bits, in reverse order so that they can be used + * with 1 << SPIDER_NET_... */ +enum spider_net_int0_status { + SPIDER_NET_GPHYINT = 0, + SPIDER_NET_GMAC2INT, + SPIDER_NET_GMAC1INT, + SPIDER_NET_GIPSINT, + SPIDER_NET_GFIFOINT, + SPIDER_NET_GDMACINT, + SPIDER_NET_GSYSINT, + SPIDER_NET_GPWOPCMPINT, + SPIDER_NET_GPROPCMPINT, + SPIDER_NET_GPWFFINT, + SPIDER_NET_GRMDADRINT, + SPIDER_NET_GRMARPINT, + SPIDER_NET_GRMMPINT, + SPIDER_NET_GDTDEN0INT, + SPIDER_NET_GDDDEN0INT, + SPIDER_NET_GDCDEN0INT, + SPIDER_NET_GDBDEN0INT, + SPIDER_NET_GDADEN0INT, + SPIDER_NET_GDTFDCINT, + SPIDER_NET_GDDFDCINT, + SPIDER_NET_GDCFDCINT, + SPIDER_NET_GDBFDCINT, + SPIDER_NET_GDAFDCINT, + SPIDER_NET_GTTEDINT, + SPIDER_NET_GDTDCEINT, + SPIDER_NET_GRFDNMINT, + SPIDER_NET_GRFCNMINT, + SPIDER_NET_GRFBNMINT, + SPIDER_NET_GRFANMINT, + SPIDER_NET_GRFNMINT, + SPIDER_NET_G1TMCNTINT, + SPIDER_NET_GFREECNTINT +}; +/* GHIINT1STS bits */ +enum spider_net_int1_status { + SPIDER_NET_GTMFLLINT = 0, + SPIDER_NET_GRMFLLINT, + SPIDER_NET_GTMSHTINT, + SPIDER_NET_GDTINVDINT, + SPIDER_NET_GRFDFLLINT, + SPIDER_NET_GDDDCEINT, + SPIDER_NET_GDDINVDINT, + SPIDER_NET_GRFCFLLINT, + SPIDER_NET_GDCDCEINT, + SPIDER_NET_GDCINVDINT, + SPIDER_NET_GRFBFLLINT, + SPIDER_NET_GDBDCEINT, + SPIDER_NET_GDBINVDINT, + SPIDER_NET_GRFAFLLINT, + SPIDER_NET_GDADCEINT, + SPIDER_NET_GDAINVDINT, + SPIDER_NET_GDTRSERINT, + SPIDER_NET_GDDRSERINT, + SPIDER_NET_GDCRSERINT, + SPIDER_NET_GDBRSERINT, + SPIDER_NET_GDARSERINT, + SPIDER_NET_GDSERINT, + SPIDER_NET_GDTPTERINT, + SPIDER_NET_GDDPTERINT, + SPIDER_NET_GDCPTERINT, + SPIDER_NET_GDBPTERINT, + SPIDER_NET_GDAPTERINT +}; +/* GHIINT2STS bits */ +enum spider_net_int2_status { + SPIDER_NET_GPROPERINT = 0, + SPIDER_NET_GMCTCRSNGINT, + SPIDER_NET_GMCTLCOLINT, + SPIDER_NET_GMCTTMOTINT, + SPIDER_NET_GMCRCAERINT, + SPIDER_NET_GMCRCALERINT, + SPIDER_NET_GMCRALNERINT, + SPIDER_NET_GMCROVRINT, + SPIDER_NET_GMCRRNTINT, + SPIDER_NET_GMCRRXERINT, + SPIDER_NET_GTITCSERINT, + SPIDER_NET_GTIFMTERINT, + SPIDER_NET_GTIPKTRVKINT, + SPIDER_NET_GTISPINGINT, + SPIDER_NET_GTISADNGINT, + SPIDER_NET_GTISPDNGINT, + SPIDER_NET_GRIFMTERINT, + SPIDER_NET_GRIPKTRVKINT, + SPIDER_NET_GRISPINGINT, + SPIDER_NET_GRISADNGINT, + SPIDER_NET_GRISPDNGINT +}; + +#define SPIDER_NET_TXINT ( (1 << SPIDER_NET_GTTEDINT) | \ + (1 << SPIDER_NET_GDTDCEINT) | \ + (1 << SPIDER_NET_GDTFDCINT) ) + +/* we rely on flagged descriptor interrupts*/ +#define SPIDER_NET_RXINT ( (1 << SPIDER_NET_GDAFDCINT) | \ + (1 << SPIDER_NET_GRMFLLINT) ) + +#define SPIDER_NET_GPREXEC 0x80000000 +#define SPIDER_NET_GPRDAT_MASK 0x0000ffff + +/* descriptor bits + * + * 1010 descriptor ready + * 0 descr in middle of chain + * 000 fixed to 0 + * + * 0 no interrupt on completion + * 000 fixed to 0 + * 1 no ipsec processing + * 1 last descriptor for this frame + * 00 no checksum + * 10 tcp checksum + * 11 udp checksum + * + * 00 fixed to 0 + * 0 fixed to 0 + * 0 no interrupt on response errors + * 0 no interrupt on invalid descr + * 0 no interrupt on dma process termination + * 0 no interrupt on descr chain end + * 0 no interrupt on descr complete + * + * 000 fixed to 0 + * 0 response error interrupt status + * 0 invalid descr status + * 0 dma termination status + * 0 descr chain end status + * 0 descr complete status */ +#define SPIDER_NET_DMAC_CMDSTAT_NOCS 0xa00c0000 +#define SPIDER_NET_DMAC_CMDSTAT_TCPCS 0xa00e0000 +#define SPIDER_NET_DMAC_CMDSTAT_UDPCS 0xa00f0000 +#define SPIDER_NET_DESCR_IND_PROC_SHIFT 28 +#define SPIDER_NET_DESCR_IND_PROC_MASKO 0x0fffffff + +/* descr ready, descr is in middle of chain, get interrupt on completion */ +#define SPIDER_NET_DMAC_RX_CARDOWNED 0xa0800000 + +/* multicast is no problem */ +#define SPIDER_NET_DATA_ERROR_MASK 0xffffbfff + +enum spider_net_descr_status { + SPIDER_NET_DESCR_COMPLETE = 0x00, /* used in rx and tx */ + SPIDER_NET_DESCR_RESPONSE_ERROR = 0x01, /* used in rx and tx */ + SPIDER_NET_DESCR_PROTECTION_ERROR = 0x02, /* used in rx and tx */ + SPIDER_NET_DESCR_FRAME_END = 0x04, /* used in rx */ + SPIDER_NET_DESCR_FORCE_END = 0x05, /* used in rx and tx */ + SPIDER_NET_DESCR_CARDOWNED = 0x0a, /* used in rx and tx */ + SPIDER_NET_DESCR_NOT_IN_USE /* any other value */ +}; + +struct spider_net_descr { + /* as defined by the hardware */ + dma_addr_t buf_addr; + u32 buf_size; + dma_addr_t next_descr_addr; + u32 dmac_cmd_status; + u32 result_size; + u32 valid_size; /* all zeroes for tx */ + u32 data_status; + u32 data_error; /* all zeroes for tx */ + + /* used in the driver */ + struct sk_buff *skb; + dma_addr_t bus_addr; + struct spider_net_descr *next; + struct spider_net_descr *prev; +} __attribute__((aligned(32))); + +struct spider_net_descr_chain { + /* we walk from tail to head */ + struct spider_net_descr *head; + struct spider_net_descr *tail; +}; + +/* descriptor data_status bits */ +#define SPIDER_NET_RXIPCHK 29 +#define SPIDER_NET_TCPUDPIPCHK 28 +#define SPIDER_NET_DATA_STATUS_CHK_MASK (1 << SPIDER_NET_RXIPCHK | \ + 1 << SPIDER_NET_TCPUDPIPCHK) + +#define SPIDER_NET_VLAN_PACKET 21 + +/* descriptor data_error bits */ +#define SPIDER_NET_RXIPCHKERR 27 +#define SPIDER_NET_RXTCPCHKERR 26 +#define SPIDER_NET_DATA_ERROR_CHK_MASK (1 << SPIDER_NET_RXIPCHKERR | \ + 1 << SPIDER_NET_RXTCPCHKERR) + +/* the cases we don't pass the packet to the stack */ +#define SPIDER_NET_DESTROY_RX_FLAGS 0x70138000 + +#define SPIDER_NET_DESCR_SIZE 32 + +/* this will be bigger some time */ +struct spider_net_options { + int rx_csum; /* for rx: if 0 ip_summed=NONE, + if 1 and hw has verified, ip_summed=UNNECESSARY */ +}; + +#define SPIDER_NET_DEFAULT_MSG ( NETIF_MSG_DRV | \ + NETIF_MSG_PROBE | \ + NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | \ + NETIF_MSG_IFDOWN | \ + NETIF_MSG_IFUP | \ + NETIF_MSG_RX_ERR | \ + NETIF_MSG_TX_ERR | \ + NETIF_MSG_TX_QUEUED | \ + NETIF_MSG_INTR | \ + NETIF_MSG_TX_DONE | \ + NETIF_MSG_RX_STATUS | \ + NETIF_MSG_PKTDATA | \ + NETIF_MSG_HW | \ + NETIF_MSG_WOL ) + +struct spider_net_card { + struct net_device *netdev; + struct pci_dev *pdev; + struct mii_phy phy; + + void __iomem *regs; + + struct spider_net_descr_chain tx_chain; + struct spider_net_descr_chain rx_chain; + spinlock_t chain_lock; + + struct net_device_stats netdev_stats; + + struct spider_net_options options; + + spinlock_t intmask_lock; + + struct work_struct tx_timeout_task; + atomic_t tx_timeout_task_counter; + wait_queue_head_t waitq; + + /* for ethtool */ + int msg_enable; + + struct spider_net_descr descr[0]; +}; + +#define pr_err(fmt,arg...) \ + printk(KERN_ERR fmt ,##arg) + +#endif diff --git a/drivers/net/spider_net_ethtool.c b/drivers/net/spider_net_ethtool.c new file mode 100644 index 000000000000..9447c2ccd70a --- /dev/null +++ b/drivers/net/spider_net_ethtool.c @@ -0,0 +1,107 @@ +/* + * Network device driver for Cell Processor-Based Blade + * + * (C) Copyright IBM Corp. 2005 + * + * Authors : Utz Bacher + * Jens Osterkamp + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include + +#include "spider_net.h" + +static void +spider_net_ethtool_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct spider_net_card *card; + card = netdev_priv(netdev); + + /* clear and fill out info */ + memset(drvinfo, 0, sizeof(struct ethtool_drvinfo)); + strncpy(drvinfo->driver, spider_net_driver_name, 32); + strncpy(drvinfo->version, "0.1", 32); + strcpy(drvinfo->fw_version, "no information"); + strncpy(drvinfo->bus_info, pci_name(card->pdev), 32); +} + +static void +spider_net_ethtool_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wolinfo) +{ + /* no support for wol */ + wolinfo->supported = 0; + wolinfo->wolopts = 0; +} + +static u32 +spider_net_ethtool_get_msglevel(struct net_device *netdev) +{ + struct spider_net_card *card; + card = netdev_priv(netdev); + return card->msg_enable; +} + +static void +spider_net_ethtool_set_msglevel(struct net_device *netdev, + u32 level) +{ + struct spider_net_card *card; + card = netdev_priv(netdev); + card->msg_enable = level; +} + +static int +spider_net_ethtool_nway_reset(struct net_device *netdev) +{ + if (netif_running(netdev)) { + spider_net_stop(netdev); + spider_net_open(netdev); + } + return 0; +} + +static u32 +spider_net_ethtool_get_rx_csum(struct net_device *netdev) +{ + struct spider_net_card *card = netdev->priv; + + return card->options.rx_csum; +} + +static int +spider_net_ethtool_set_rx_csum(struct net_device *netdev, u32 n) +{ + struct spider_net_card *card = netdev->priv; + + card->options.rx_csum = n; + return 0; +} + +struct ethtool_ops spider_net_ethtool_ops = { + .get_drvinfo = spider_net_ethtool_get_drvinfo, + .get_wol = spider_net_ethtool_get_wol, + .get_msglevel = spider_net_ethtool_get_msglevel, + .set_msglevel = spider_net_ethtool_set_msglevel, + .nway_reset = spider_net_ethtool_nway_reset, + .get_rx_csum = spider_net_ethtool_get_rx_csum, + .set_rx_csum = spider_net_ethtool_set_rx_csum, +}; + diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 95c941f8c747..ee0ab7a5f91b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1612,6 +1612,7 @@ #define PCI_DEVICE_ID_TOSHIBA_TC35815CF 0x0030 #define PCI_DEVICE_ID_TOSHIBA_TX4927 0x0180 #define PCI_DEVICE_ID_TOSHIBA_TC86C001_MISC 0x0108 +#define PCI_DEVICE_ID_TOSHIBA_SPIDER_NET 0x01b3 #define PCI_VENDOR_ID_RICOH 0x1180 #define PCI_DEVICE_ID_RICOH_RL5C465 0x0465 -- cgit v1.2.3 From 6582c164f2b3b6e58d1f13c1c031b19ee691eb14 Mon Sep 17 00:00:00 2001 From: Jean Tourrilhes Date: Fri, 2 Sep 2005 11:32:28 -0700 Subject: [PATCH] WE-19 for kernel 2.6.13 Hi Jeff, This is version 19 of the Wireless Extensions. It was supposed to be the fallback of the WPA API changes, but people seem quite happy about it (especially Jouni), so the patch is rather small. The patch has been fully tested with 2.6.13 and various wireless drivers, and is in its final version. Would you mind pushing that into Linus's kernel so that the driver and the apps can take advantage ot it ? It includes : o iwstat improvement (explicit dBm). This is the result of long discussions with Dan Williams, the authors of NetworkManager. Thanks to him for all the fruitful feedback. o remove pointer from event stream. I was not totally sure if this pointer was 32-64 bits clean, so I'd rather remove it and be at peace with it. o remove linux header from wireless.h. This has long been requested by people writting user space apps, now it's done, and it was not even painful. o final deprecation of spy_offset. You did not like it, it's now gone for good. o Start deprecating dev->get_wireless_stats -> debloat netdev o Add "check" version of event macros for ieee802.11 stack. Jiri Benc doesn't like the current macros, we aim to please ;-) All those changes, except the last one, have been bit-roting on my web pages for a while... Patches for most kernel drivers will follow. Patches for the Orinoco and the HostAP drivers have been sent to their respective maintainers. Have fun... Jean Signed-off-by: Jeff Garzik --- include/linux/wireless.h | 47 ++++++++++++------ include/net/iw_handler.h | 123 +++++++++++++++++++++++++++++++++++++++++------ net/core/wireless.c | 58 +++++++++++++--------- 3 files changed, 175 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wireless.h b/include/linux/wireless.h index ae485f9c916e..dab5afdaf71c 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -1,7 +1,7 @@ /* * This file define a set of standard wireless extensions * - * Version : 18 12.3.05 + * Version : 19 18.3.05 * * Authors : Jean Tourrilhes - HPL - * Copyright (c) 1997-2005 Jean Tourrilhes, All Rights Reserved. @@ -69,11 +69,12 @@ /***************************** INCLUDES *****************************/ -/* To minimise problems in user space, I might remove those headers - * at some point. Jean II */ -#include /* for "caddr_t" et al */ -#include /* for "struct sockaddr" et al */ -#include /* for IFNAMSIZ and co... */ +/* Do not put any header in this file, this creates a mess when + * exported to user space. Most users have included all the + * relevant headers anyway... Jean II */ +/*#include */ /* for "caddr_t" et al */ +/*#include */ /* for "struct sockaddr" et al */ +/*#include */ /* for IFNAMSIZ and co... */ /***************************** VERSION *****************************/ /* @@ -82,7 +83,7 @@ * (there is some stuff that will be added in the future...) * I just plan to increment with each new version. */ -#define WIRELESS_EXT 18 +#define WIRELESS_EXT 19 /* * Changes : @@ -197,6 +198,15 @@ * related parameters (extensible up to 4096 parameter values) * - Add wireless events: IWEVGENIE, IWEVMICHAELMICFAILURE, * IWEVASSOCREQIE, IWEVASSOCRESPIE, IWEVPMKIDCAND + * + * V18 to V19 + * ---------- + * - Remove (struct iw_point *)->pointer from events and streams + * - Remove header includes to help user space + * - Increase IW_ENCODING_TOKEN_MAX from 32 to 64 + * - Add IW_QUAL_ALL_UPDATED and IW_QUAL_ALL_INVALID macros + * - Add explicit flag to tell stats are in dBm : IW_QUAL_DBM + * - Add IW_IOCTL_IDX() and IW_EVENT_IDX() macros */ /**************************** CONSTANTS ****************************/ @@ -322,6 +332,7 @@ /* The first and the last (range) */ #define SIOCIWFIRST 0x8B00 #define SIOCIWLAST SIOCIWLASTPRIV /* 0x8BFF */ +#define IW_IOCTL_IDX(cmd) ((cmd) - SIOCIWFIRST) /* Even : get (world access), odd : set (root access) */ #define IW_IS_SET(cmd) (!((cmd) & 0x1)) @@ -366,6 +377,7 @@ * (struct iw_pmkid_cand) */ #define IWEVFIRST 0x8C00 +#define IW_EVENT_IDX(cmd) ((cmd) - IWEVFIRST) /* ------------------------- PRIVATE INFO ------------------------- */ /* @@ -427,12 +439,15 @@ #define IW_MODE_MONITOR 6 /* Passive monitor (listen only) */ /* Statistics flags (bitmask in updated) */ -#define IW_QUAL_QUAL_UPDATED 0x1 /* Value was updated since last read */ -#define IW_QUAL_LEVEL_UPDATED 0x2 -#define IW_QUAL_NOISE_UPDATED 0x4 +#define IW_QUAL_QUAL_UPDATED 0x01 /* Value was updated since last read */ +#define IW_QUAL_LEVEL_UPDATED 0x02 +#define IW_QUAL_NOISE_UPDATED 0x04 +#define IW_QUAL_ALL_UPDATED 0x07 +#define IW_QUAL_DBM 0x08 /* Level + Noise are dBm */ #define IW_QUAL_QUAL_INVALID 0x10 /* Driver doesn't provide value */ #define IW_QUAL_LEVEL_INVALID 0x20 #define IW_QUAL_NOISE_INVALID 0x40 +#define IW_QUAL_ALL_INVALID 0x70 /* Frequency flags */ #define IW_FREQ_AUTO 0x00 /* Let the driver decides */ @@ -443,7 +458,7 @@ #define IW_MAX_ENCODING_SIZES 8 /* Maximum size of the encoding token in bytes */ -#define IW_ENCODING_TOKEN_MAX 32 /* 256 bits (for now) */ +#define IW_ENCODING_TOKEN_MAX 64 /* 512 bits (for now) */ /* Flags for encoding (along with the token) */ #define IW_ENCODE_INDEX 0x00FF /* Token index (if needed) */ @@ -1039,12 +1054,16 @@ struct iw_event #define IW_EV_CHAR_LEN (IW_EV_LCP_LEN + IFNAMSIZ) #define IW_EV_UINT_LEN (IW_EV_LCP_LEN + sizeof(__u32)) #define IW_EV_FREQ_LEN (IW_EV_LCP_LEN + sizeof(struct iw_freq)) -#define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point)) #define IW_EV_PARAM_LEN (IW_EV_LCP_LEN + sizeof(struct iw_param)) #define IW_EV_ADDR_LEN (IW_EV_LCP_LEN + sizeof(struct sockaddr)) #define IW_EV_QUAL_LEN (IW_EV_LCP_LEN + sizeof(struct iw_quality)) -/* Note : in the case of iw_point, the extra data will come at the - * end of the event */ +/* iw_point events are special. First, the payload (extra data) come at + * the end of the event, so they are bigger than IW_EV_POINT_LEN. Second, + * we omit the pointer, so start at an offset. */ +#define IW_EV_POINT_OFF (((char *) &(((struct iw_point *) NULL)->length)) - \ + (char *) NULL) +#define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point) - \ + IW_EV_POINT_OFF) #endif /* _LINUX_WIRELESS_H */ diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h index 44edd48f1234..d67c8393a343 100644 --- a/include/net/iw_handler.h +++ b/include/net/iw_handler.h @@ -1,10 +1,10 @@ /* * This file define the new driver API for Wireless Extensions * - * Version : 6 21.6.04 + * Version : 7 18.3.05 * * Authors : Jean Tourrilhes - HPL - - * Copyright (c) 2001-2004 Jean Tourrilhes, All Rights Reserved. + * Copyright (c) 2001-2005 Jean Tourrilhes, All Rights Reserved. */ #ifndef _IW_HANDLER_H @@ -207,7 +207,7 @@ * will be needed... * I just plan to increment with each new version. */ -#define IW_HANDLER_VERSION 6 +#define IW_HANDLER_VERSION 7 /* * Changes : @@ -232,6 +232,13 @@ * - Remove spy #ifdef, they are always on -> cleaner code * - Add IW_DESCR_FLAG_NOMAX flag for very large requests * - Start migrating get_wireless_stats to struct iw_handler_def + * + * V6 to V7 + * -------- + * - Add struct ieee80211_device pointer in struct iw_public_data + * - Remove (struct iw_point *)->pointer from events and streams + * - Remove spy_offset from struct iw_handler_def + * - Add "check" version of event macros for ieee802.11 stack */ /**************************** CONSTANTS ****************************/ @@ -334,9 +341,6 @@ struct iw_handler_def * We will automatically export that to user space... */ const struct iw_priv_args * private_args; - /* This field will be *removed* in the next version of WE */ - long spy_offset; /* DO NOT USE */ - /* New location of get_wireless_stats, to de-bloat struct net_device. * The old pointer in struct net_device will be gradually phased * out, and drivers are encouraged to use this one... */ @@ -400,16 +404,21 @@ struct iw_spy_data /* --------------------- DEVICE WIRELESS DATA --------------------- */ /* * This is all the wireless data specific to a device instance that - * is managed by the core of Wireless Extensions. + * is managed by the core of Wireless Extensions or the 802.11 layer. * We only keep pointer to those structures, so that a driver is free * to share them between instances. * This structure should be initialised before registering the device. * Access to this data follow the same rules as any other struct net_device * data (i.e. valid as long as struct net_device exist, same locking rules). */ +/* Forward declaration */ +struct ieee80211_device; +/* The struct */ struct iw_public_data { /* Driver enhanced spy support */ - struct iw_spy_data * spy_data; + struct iw_spy_data * spy_data; + /* Structure managed by the in-kernel IEEE 802.11 layer */ + struct ieee80211_device * ieee80211; }; /**************************** PROTOTYPES ****************************/ @@ -424,7 +433,7 @@ struct iw_public_data { extern int dev_get_wireless_info(char * buffer, char **start, off_t offset, int length); -/* Handle IOCTLs, called in net/code/dev.c */ +/* Handle IOCTLs, called in net/core/dev.c */ extern int wireless_process_ioctl(struct ifreq *ifr, unsigned int cmd); /* Second : functions that may be called by driver modules */ @@ -479,7 +488,7 @@ iwe_stream_add_event(char * stream, /* Stream of events */ int event_len) /* Real size of payload */ { /* Check if it's possible */ - if((stream + event_len) < ends) { + if(likely((stream + event_len) < ends)) { iwe->len = event_len; memcpy(stream, (char *) iwe, event_len); stream += event_len; @@ -495,14 +504,17 @@ iwe_stream_add_event(char * stream, /* Stream of events */ static inline char * iwe_stream_add_point(char * stream, /* Stream of events */ char * ends, /* End of stream */ - struct iw_event *iwe, /* Payload */ - char * extra) + struct iw_event *iwe, /* Payload length + flags */ + char * extra) /* More payload */ { int event_len = IW_EV_POINT_LEN + iwe->u.data.length; /* Check if it's possible */ - if((stream + event_len) < ends) { + if(likely((stream + event_len) < ends)) { iwe->len = event_len; - memcpy(stream, (char *) iwe, IW_EV_POINT_LEN); + memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, + IW_EV_POINT_LEN - IW_EV_LCP_LEN); memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); stream += event_len; } @@ -526,7 +538,7 @@ iwe_stream_add_value(char * event, /* Event in the stream */ event_len -= IW_EV_LCP_LEN; /* Check if it's possible */ - if((value + event_len) < ends) { + if(likely((value + event_len) < ends)) { /* Add new value */ memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len); value += event_len; @@ -537,4 +549,85 @@ iwe_stream_add_value(char * event, /* Event in the stream */ return value; } +/*------------------------------------------------------------------*/ +/* + * Wrapper to add an Wireless Event to a stream of events. + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_event(char * stream, /* Stream of events */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload */ + int event_len, /* Size of payload */ + int * perr) /* Error report */ +{ + /* Check if it's possible, set error if not */ + if(likely((stream + event_len) < ends)) { + iwe->len = event_len; + memcpy(stream, (char *) iwe, event_len); + stream += event_len; + } else + *perr = -E2BIG; + return stream; +} + +/*------------------------------------------------------------------*/ +/* + * Wrapper to add an short Wireless Event containing a pointer to a + * stream of events. + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_point(char * stream, /* Stream of events */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload length + flags */ + char * extra, /* More payload */ + int * perr) /* Error report */ +{ + int event_len = IW_EV_POINT_LEN + iwe->u.data.length; + /* Check if it's possible */ + if(likely((stream + event_len) < ends)) { + iwe->len = event_len; + memcpy(stream, (char *) iwe, IW_EV_LCP_LEN); + memcpy(stream + IW_EV_LCP_LEN, + ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF, + IW_EV_POINT_LEN - IW_EV_LCP_LEN); + memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length); + stream += event_len; + } else + *perr = -E2BIG; + return stream; +} + +/*------------------------------------------------------------------*/ +/* + * Wrapper to add a value to a Wireless Event in a stream of events. + * Be careful, this one is tricky to use properly : + * At the first run, you need to have (value = event + IW_EV_LCP_LEN). + * Same as above, with explicit error check... + */ +static inline char * +iwe_stream_check_add_value(char * event, /* Event in the stream */ + char * value, /* Value in event */ + char * ends, /* End of stream */ + struct iw_event *iwe, /* Payload */ + int event_len, /* Size of payload */ + int * perr) /* Error report */ +{ + /* Don't duplicate LCP */ + event_len -= IW_EV_LCP_LEN; + + /* Check if it's possible */ + if(likely((value + event_len) < ends)) { + /* Add new value */ + memcpy(value, (char *) iwe + IW_EV_LCP_LEN, event_len); + value += event_len; + /* Patch LCP */ + iwe->len = value - event; + memcpy(event, (char *) iwe, IW_EV_LCP_LEN); + } else + *perr = -E2BIG; + return value; +} + #endif /* _IW_HANDLER_H */ diff --git a/net/core/wireless.c b/net/core/wireless.c index 5caae2399f3a..d17f1583ea3e 100644 --- a/net/core/wireless.c +++ b/net/core/wireless.c @@ -58,6 +58,13 @@ * o Add wmb() in iw_handler_set_spy() for non-coherent archs/cpus * Based on patch from Pavel Roskin : * o Fix kernel data leak to user space in private handler handling + * + * v7 - 18.3.05 - Jean II + * o Remove (struct iw_point *)->pointer from events and streams + * o Remove spy_offset from struct iw_handler_def + * o Start deprecating dev->get_wireless_stats, output a warning + * o If IW_QUAL_DBM is set, show dBm values in /proc/net/wireless + * o Don't loose INVALID/DBM flags when clearing UPDATED flags (iwstats) */ /***************************** INCLUDES *****************************/ @@ -446,10 +453,14 @@ static inline struct iw_statistics *get_wireless_stats(struct net_device *dev) (dev->wireless_handlers->get_wireless_stats != NULL)) return dev->wireless_handlers->get_wireless_stats(dev); - /* Old location, will be phased out in next WE */ - return (dev->get_wireless_stats ? - dev->get_wireless_stats(dev) : - (struct iw_statistics *) NULL); + /* Old location, field to be removed in next WE */ + if(dev->get_wireless_stats) { + printk(KERN_DEBUG "%s (WE) : Driver using old /proc/net/wireless support, please fix driver !\n", + dev->name); + return dev->get_wireless_stats(dev); + } + /* Not found */ + return (struct iw_statistics *) NULL; } /* ---------------------------------------------------------------- */ @@ -541,16 +552,18 @@ static __inline__ void wireless_seq_printf_stats(struct seq_file *seq, dev->name, stats->status, stats->qual.qual, stats->qual.updated & IW_QUAL_QUAL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.level), + ((__s32) stats->qual.level) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_LEVEL_UPDATED ? '.' : ' ', - ((__u8) stats->qual.noise), + ((__s32) stats->qual.noise) - + ((stats->qual.updated & IW_QUAL_DBM) ? 0x100 : 0), stats->qual.updated & IW_QUAL_NOISE_UPDATED ? '.' : ' ', stats->discard.nwid, stats->discard.code, stats->discard.fragment, stats->discard.retries, stats->discard.misc, stats->miss.beacon); - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; } } @@ -593,6 +606,7 @@ static struct file_operations wireless_seq_fops = { int __init wireless_proc_init(void) { + /* Create /proc/net/wireless entry */ if (!proc_net_fops_create("wireless", S_IRUGO, &wireless_seq_fops)) return -ENOMEM; @@ -627,9 +641,9 @@ static inline int dev_iwstats(struct net_device *dev, struct ifreq *ifr) sizeof(struct iw_statistics))) return -EFAULT; - /* Check if we need to clear the update flag */ + /* Check if we need to clear the updated flag */ if(wrq->u.data.flags != 0) - stats->qual.updated = 0; + stats->qual.updated &= ~IW_QUAL_ALL_UPDATED; return 0; } else return -EOPNOTSUPP; @@ -1161,10 +1175,11 @@ void wireless_send_event(struct net_device * dev, struct iw_event *event; /* Mallocated whole event */ int event_len; /* Its size */ int hdr_len; /* Size of the event header */ + int wrqu_off = 0; /* Offset in wrqu */ /* Don't "optimise" the following variable, it will crash */ unsigned cmd_index; /* *MUST* be unsigned */ - /* Get the description of the IOCTL */ + /* Get the description of the Event */ if(cmd <= SIOCIWLAST) { cmd_index = cmd - SIOCIWFIRST; if(cmd_index < standard_ioctl_num) @@ -1207,6 +1222,8 @@ void wireless_send_event(struct net_device * dev, /* Calculate extra_len - extra is NULL for restricted events */ if(extra != NULL) extra_len = wrqu->data.length * descr->token_size; + /* Always at an offset in wrqu */ + wrqu_off = IW_EV_POINT_OFF; #ifdef WE_EVENT_DEBUG printk(KERN_DEBUG "%s (WE) : Event 0x%04X, tokens %d, extra_len %d\n", dev->name, cmd, wrqu->data.length, extra_len); #endif /* WE_EVENT_DEBUG */ @@ -1217,7 +1234,7 @@ void wireless_send_event(struct net_device * dev, event_len = hdr_len + extra_len; #ifdef WE_EVENT_DEBUG - printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, event_len %d\n", dev->name, cmd, hdr_len, event_len); + printk(KERN_DEBUG "%s (WE) : Event 0x%04X, hdr_len %d, wrqu_off %d, event_len %d\n", dev->name, cmd, hdr_len, wrqu_off, event_len); #endif /* WE_EVENT_DEBUG */ /* Create temporary buffer to hold the event */ @@ -1228,7 +1245,7 @@ void wireless_send_event(struct net_device * dev, /* Fill event */ event->len = event_len; event->cmd = cmd; - memcpy(&event->u, wrqu, hdr_len - IW_EV_LCP_LEN); + memcpy(&event->u, ((char *) wrqu) + wrqu_off, hdr_len - IW_EV_LCP_LEN); if(extra != NULL) memcpy(((char *) event) + hdr_len, extra, extra_len); @@ -1249,7 +1266,7 @@ void wireless_send_event(struct net_device * dev, * Now, the driver can delegate this task to Wireless Extensions. * It needs to use those standard spy iw_handler in struct iw_handler_def, * push data to us via wireless_spy_update() and include struct iw_spy_data - * in its private part (and advertise it in iw_handler_def->spy_offset). + * in its private part (and export it in net_device->wireless_data->spy_data). * One of the main advantage of centralising spy support here is that * it becomes much easier to improve and extend it without having to touch * the drivers. One example is the addition of the Spy-Threshold events. @@ -1266,10 +1283,7 @@ static inline struct iw_spy_data * get_spydata(struct net_device *dev) /* This is the new way */ if(dev->wireless_data) return(dev->wireless_data->spy_data); - - /* This is the old way. Doesn't work for multi-headed drivers. - * It will be removed in the next version of WE. */ - return (dev->priv + dev->wireless_handlers->spy_offset); + return NULL; } /*------------------------------------------------------------------*/ @@ -1284,10 +1298,6 @@ int iw_handler_set_spy(struct net_device * dev, struct iw_spy_data * spydata = get_spydata(dev); struct sockaddr * address = (struct sockaddr *) extra; - if(!dev->wireless_data) - /* Help user know that driver needs updating */ - printk(KERN_DEBUG "%s (WE) : Driver using old/buggy spy support, please fix driver !\n", - dev->name); /* Make sure driver is not buggy or using the old API */ if(!spydata) return -EOPNOTSUPP; @@ -1318,7 +1328,7 @@ int iw_handler_set_spy(struct net_device * dev, sizeof(struct iw_quality) * IW_MAX_SPY); #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "iw_handler_set_spy() : offset %ld, spydata %p, num %d\n", dev->wireless_handlers->spy_offset, spydata, wrqu->data.length); + printk(KERN_DEBUG "iw_handler_set_spy() : wireless_data %p, spydata %p, num %d\n", dev->wireless_data, spydata, wrqu->data.length); for (i = 0; i < wrqu->data.length; i++) printk(KERN_DEBUG "%02X:%02X:%02X:%02X:%02X:%02X \n", @@ -1371,7 +1381,7 @@ int iw_handler_get_spy(struct net_device * dev, sizeof(struct iw_quality) * spydata->spy_number); /* Reset updated flags. */ for(i = 0; i < spydata->spy_number; i++) - spydata->spy_stat[i].updated = 0; + spydata->spy_stat[i].updated &= ~IW_QUAL_ALL_UPDATED; return 0; } @@ -1486,7 +1496,7 @@ void wireless_spy_update(struct net_device * dev, return; #ifdef WE_SPY_DEBUG - printk(KERN_DEBUG "wireless_spy_update() : offset %ld, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_handlers->spy_offset, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); + printk(KERN_DEBUG "wireless_spy_update() : wireless_data %p, spydata %p, address %02X:%02X:%02X:%02X:%02X:%02X\n", dev->wireless_data, spydata, address[0], address[1], address[2], address[3], address[4], address[5]); #endif /* WE_SPY_DEBUG */ /* Update all records that match */ -- cgit v1.2.3 From bbeec90b98a3066f6f2b8d41c80561f5665e4631 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Wed, 7 Sep 2005 00:27:54 -0400 Subject: [wireless] build fixes after merging WE-19 --- drivers/net/wireless/airo.c | 2 +- drivers/net/wireless/ray_cs.c | 1 + include/linux/wireless.h | 9 +++------ net/ieee80211/ieee80211_wx.c | 5 +++-- 4 files changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 89c2ff9570d5..2be65d308fbe 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -6867,7 +6867,7 @@ static inline char *airo_translate_scan(struct net_device *dev, } else { iwe.u.qual.level = (bss->dBm + 321) / 2; iwe.u.qual.qual = 0; - iwe.u.qual.updated = IW_QUAL_QUAL_INVALID; + iwe.u.qual.updated = IW_QUAL_QUAL_INVALID | IW_QUAL_LEVEL_UPDATED | IW_QUAL_DBM; } diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 74d66eeddef2..e9c5ea0f5535 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -53,6 +53,7 @@ #include #include +#include #include #include diff --git a/include/linux/wireless.h b/include/linux/wireless.h index dab5afdaf71c..a555a0f7a7b4 100644 --- a/include/linux/wireless.h +++ b/include/linux/wireless.h @@ -69,12 +69,9 @@ /***************************** INCLUDES *****************************/ -/* Do not put any header in this file, this creates a mess when - * exported to user space. Most users have included all the - * relevant headers anyway... Jean II */ -/*#include */ /* for "caddr_t" et al */ -/*#include */ /* for "struct sockaddr" et al */ -/*#include */ /* for IFNAMSIZ and co... */ +#include /* for "caddr_t" et al */ +#include /* for "struct sockaddr" et al */ +#include /* for IFNAMSIZ and co... */ /***************************** VERSION *****************************/ /* diff --git a/net/ieee80211/ieee80211_wx.c b/net/ieee80211/ieee80211_wx.c index 2cd571c525a9..510a1716a4f0 100644 --- a/net/ieee80211/ieee80211_wx.c +++ b/net/ieee80211/ieee80211_wx.c @@ -29,12 +29,13 @@ Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 ******************************************************************************/ -#include -#include + #include #include #include +#include + static const char *ieee80211_modes[] = { "?", "a", "b", "ab", "g", "ag", "bg", "abg" }; -- cgit v1.2.3 From 54d5d42404e7705cf3804593189e963350d470e5 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Tue, 6 Sep 2005 15:16:15 -0700 Subject: [PATCH] x86/x86_64: deferred handling of writes to /proc/irqxx/smp_affinity When handling writes to /proc/irq, current code is re-programming rte entries directly. This is not recommended and could potentially cause chipset's to lockup, or cause missing interrupts. CONFIG_IRQ_BALANCE does this correctly, where it re-programs only when the interrupt is pending. The same needs to be done for /proc/irq handling as well. Otherwise user space irq balancers are really not doing the right thing. - Changed pending_irq_balance_cpumask to pending_irq_migrate_cpumask for lack of a generic name. - added move_irq out of IRQ_BALANCE, and added this same to X86_64 - Added new proc handler for write, so we can do deferred write at irq handling time. - Display of /proc/irq/XX/smp_affinity used to display CPU_MASKALL, instead it now shows only active cpu masks, or exactly what was set. - Provided a common move_irq implementation, instead of duplicating when using generic irq framework. Tested on i386/x86_64 and ia64 with CONFIG_PCI_MSI turned on and off. Tested UP builds as well. MSI testing: tbd: I have cards, need to look for a x-over cable, although I did test an earlier version of this patch. Will test in a couple days. Signed-off-by: Ashok Raj Acked-by: Zwane Mwaikambo Grudgingly-acked-by: Andi Kleen Signed-off-by: Coywolf Qi Hunt Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 5 ++ arch/i386/kernel/io_apic.c | 55 ++++++++++--------- arch/ia64/Kconfig | 5 ++ arch/ia64/kernel/irq.c | 39 +------------- arch/x86_64/Kconfig | 5 ++ arch/x86_64/kernel/io_apic.c | 102 ++++++++++++++++++++++------------- drivers/pci/msi.c | 17 ++---- drivers/pci/msi.h | 5 -- include/asm-ia64/hw_irq.h | 7 --- include/asm-ia64/irq.h | 6 --- include/linux/irq.h | 123 +++++++++++++++++++++++++++++++++++++++++++ kernel/irq/manage.c | 4 ++ kernel/irq/proc.c | 14 ++++- 13 files changed, 253 insertions(+), 134 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 3b3b017e1c15..4b7de3e1e57b 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1318,6 +1318,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + config X86_SMP bool depends on SMP && !X86_VOYAGER diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 6578f40bd501..4a5940431579 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -33,6 +33,7 @@ #include #include #include + #include #include #include @@ -222,13 +223,21 @@ static void clear_IO_APIC (void) clear_IO_APIC_pin(apic, pin); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) { unsigned long flags; int pin; struct irq_pin_list *entry = irq_2_pin + irq; unsigned int apicid_value; + cpumask_t tmp; + cpus_and(tmp, cpumask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(cpumask, tmp, CPU_MASK_ALL); + apicid_value = cpu_mask_to_apicid(cpumask); /* Prepare to do the io_apic_write */ apicid_value = apicid_value << 24; @@ -242,6 +251,7 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) break; entry = irq_2_pin + entry->next; } + set_irq_info(irq, cpumask); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -259,7 +269,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask) # define Dprintk(x...) # endif -cpumask_t __cacheline_aligned pending_irq_balance_cpumask[NR_IRQS]; #define IRQBALANCE_CHECK_ARCH -999 static int irqbalance_disabled = IRQBALANCE_CHECK_ARCH; @@ -328,12 +337,7 @@ static inline void balance_irq(int cpu, int irq) cpus_and(allowed_mask, cpu_online_map, irq_affinity[irq]); new_cpu = move(cpu, allowed_mask, now, 1); if (cpu != new_cpu) { - irq_desc_t *desc = irq_desc + irq; - unsigned long flags; - - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[irq] = cpumask_of_cpu(new_cpu); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(irq, cpumask_of_cpu(new_cpu)); } } @@ -528,16 +532,12 @@ tryanotherirq: cpus_and(tmp, target_cpu_mask, allowed_mask); if (!cpus_empty(tmp)) { - irq_desc_t *desc = irq_desc + selected_irq; - unsigned long flags; Dprintk("irq = %d moved to cpu = %d\n", selected_irq, min_loaded); /* mark for change destination */ - spin_lock_irqsave(&desc->lock, flags); - pending_irq_balance_cpumask[selected_irq] = - cpumask_of_cpu(min_loaded); - spin_unlock_irqrestore(&desc->lock, flags); + set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded)); + /* Since we made a change, come back sooner to * check for more variation. */ @@ -568,7 +568,8 @@ static int balanced_irq(void *unused) /* push everything to CPU 0 to give us a starting point. */ for (i = 0 ; i < NR_IRQS ; i++) { - pending_irq_balance_cpumask[i] = cpumask_of_cpu(0); + pending_irq_cpumask[i] = cpumask_of_cpu(0); + set_pending_irq(i, cpumask_of_cpu(0)); } for ( ; ; ) { @@ -647,20 +648,9 @@ int __init irqbalance_disable(char *str) __setup("noirqbalance", irqbalance_disable); -static inline void move_irq(int irq) -{ - /* note - we hold the desc->lock */ - if (unlikely(!cpus_empty(pending_irq_balance_cpumask[irq]))) { - set_ioapic_affinity_irq(irq, pending_irq_balance_cpumask[irq]); - cpus_clear(pending_irq_balance_cpumask[irq]); - } -} - late_initcall(balanced_irq_init); - -#else /* !CONFIG_IRQBALANCE */ -static inline void move_irq(int irq) { } #endif /* CONFIG_IRQBALANCE */ +#endif /* CONFIG_SMP */ #ifndef CONFIG_SMP void fastcall send_IPI_self(int vector) @@ -820,6 +810,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -838,6 +829,7 @@ void __init setup_ioapic_dest(void) } } +#endif /* * EISA Edge/Level control register, ELCR @@ -1249,6 +1241,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1944,6 +1937,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1958,6 +1952,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_irq(vector); end_level_ioapic_irq(irq); } @@ -1975,14 +1970,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } #endif +#endif /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -2000,7 +1998,9 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -2011,7 +2011,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -2569,6 +2571,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 3deced637f07..17b5dbf8c311 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -434,6 +434,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + source "arch/ia64/hp/sim/Kconfig" source "arch/ia64/oprofile/Kconfig" diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c index 28f2aadc38d0..205d98028261 100644 --- a/arch/ia64/kernel/irq.c +++ b/arch/ia64/kernel/irq.c @@ -91,23 +91,8 @@ skip: } #ifdef CONFIG_SMP -/* - * This is updated when the user sets irq affinity via /proc - */ -static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; -static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; - static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; -/* - * Arch specific routine for deferred write to iosapic rte to reprogram - * intr destination. - */ -void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) -{ - pending_irq_cpumask[irq] = mask_val; -} - void set_irq_affinity_info (unsigned int irq, int hwid, int redir) { cpumask_t mask = CPU_MASK_NONE; @@ -116,32 +101,10 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir) if (irq < NR_IRQS) { irq_affinity[irq] = mask; + set_irq_info(irq, mask); irq_redir[irq] = (char) (redir & 0xff); } } - - -void move_irq(int irq) -{ - /* note - we hold desc->lock */ - cpumask_t tmp; - irq_desc_t *desc = irq_descp(irq); - int redir = test_bit(irq, pending_irq_redir); - - if (unlikely(!desc->handler->set_affinity)) - return; - - if (!cpus_empty(pending_irq_cpumask[irq])) { - cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); - if (unlikely(!cpus_empty(tmp))) { - desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), - pending_irq_cpumask[irq]); - } - cpus_clear(pending_irq_cpumask[irq]); - } -} - - #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 75e52c57f19c..251ce7cf1a38 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -441,6 +441,11 @@ config ISA_DMA_API bool default y +config GENERIC_PENDING_IRQ + bool + depends on GENERIC_HARDIRQS && SMP + default y + menu "Power management options" source kernel/power/Kconfig diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index d206d7e49cf5..76bcc4e6979d 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c @@ -78,6 +78,54 @@ int vector_irq[NR_VECTORS] = { [0 ... NR_VECTORS - 1] = -1}; #define vector_to_irq(vector) (vector) #endif +#define __DO_ACTION(R, ACTION, FINAL) \ + \ +{ \ + int pin; \ + struct irq_pin_list *entry = irq_2_pin + irq; \ + \ + for (;;) { \ + unsigned int reg; \ + pin = entry->pin; \ + if (pin == -1) \ + break; \ + reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ + reg ACTION; \ + io_apic_modify(entry->apic, reg); \ + if (!entry->next) \ + break; \ + entry = irq_2_pin + entry->next; \ + } \ + FINAL; \ +} + +#ifdef CONFIG_SMP +static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) +{ + unsigned long flags; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + tmp = TARGET_CPUS; + + cpus_and(mask, tmp, CPU_MASK_ALL); + + dest = cpu_mask_to_apicid(mask); + + /* + * Only the high 8 bits are valid. + */ + dest = SET_APIC_LOGICAL_ID(dest); + + spin_lock_irqsave(&ioapic_lock, flags); + __DO_ACTION(1, = dest, ) + set_irq_info(irq, mask); + spin_unlock_irqrestore(&ioapic_lock, flags); +} +#endif + /* * The common case is 1:1 IRQ<->pin mappings. Sometimes there are * shared ISA-space IRQs, so we have to support them. We are super @@ -101,26 +149,6 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) entry->pin = pin; } -#define __DO_ACTION(R, ACTION, FINAL) \ - \ -{ \ - int pin; \ - struct irq_pin_list *entry = irq_2_pin + irq; \ - \ - for (;;) { \ - unsigned int reg; \ - pin = entry->pin; \ - if (pin == -1) \ - break; \ - reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \ - reg ACTION; \ - io_apic_modify(entry->apic, reg); \ - if (!entry->next) \ - break; \ - entry = irq_2_pin + entry->next; \ - } \ - FINAL; \ -} #define DO_ACTION(name,R,ACTION, FINAL) \ \ @@ -767,6 +795,7 @@ static void __init setup_IO_APIC_irqs(void) spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); } } @@ -1314,6 +1343,7 @@ static unsigned int startup_edge_ioapic_irq(unsigned int irq) */ static void ack_edge_ioapic_irq(unsigned int irq) { + move_irq(irq); if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED)) == (IRQ_PENDING | IRQ_DISABLED)) mask_IO_APIC_irq(irq); @@ -1343,26 +1373,10 @@ static unsigned int startup_level_ioapic_irq (unsigned int irq) static void end_level_ioapic_irq (unsigned int irq) { + move_irq(irq); ack_APIC_irq(); } -static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) -{ - unsigned long flags; - unsigned int dest; - - dest = cpu_mask_to_apicid(mask); - - /* - * Only the high 8 bits are valid. - */ - dest = SET_APIC_LOGICAL_ID(dest); - - spin_lock_irqsave(&ioapic_lock, flags); - __DO_ACTION(1, = dest, ) - spin_unlock_irqrestore(&ioapic_lock, flags); -} - #ifdef CONFIG_PCI_MSI static unsigned int startup_edge_ioapic_vector(unsigned int vector) { @@ -1375,6 +1389,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -1389,6 +1404,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } @@ -1406,14 +1422,17 @@ static void unmask_IO_APIC_vector (unsigned int vector) unmask_IO_APIC_irq(irq); } +#ifdef CONFIG_SMP static void set_ioapic_affinity_vector (unsigned int vector, cpumask_t cpu_mask) { int irq = vector_to_irq(vector); + set_native_irq_info(vector, cpu_mask); set_ioapic_affinity_irq(irq, cpu_mask); } -#endif +#endif // CONFIG_SMP +#endif // CONFIG_PCI_MSI /* * Level and edge triggered IO-APIC interrupts need different handling, @@ -1432,7 +1451,9 @@ static struct hw_interrupt_type ioapic_edge_type = { .disable = disable_edge_ioapic, .ack = ack_edge_ioapic, .end = end_edge_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static struct hw_interrupt_type ioapic_level_type = { @@ -1443,7 +1464,9 @@ static struct hw_interrupt_type ioapic_level_type = { .disable = disable_level_ioapic, .ack = mask_and_ack_level_ioapic, .end = end_level_ioapic, +#ifdef CONFIG_SMP .set_affinity = set_ioapic_affinity, +#endif }; static inline void init_IO_APIC_traps(void) @@ -1918,6 +1941,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); + set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); spin_unlock_irqrestore(&ioapic_lock, flags); return 0; @@ -1931,6 +1955,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a * we need to reprogram the ioredtbls to cater for the cpus which have come online * so mask in all cases should simply be TARGET_CPUS */ +#ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; @@ -1949,3 +1974,4 @@ void __init setup_ioapic_dest(void) } } +#endif diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2b85aa39f954..532f73bb2224 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -91,6 +91,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) { struct msi_desc *entry; struct msg_address address; + unsigned int irq = vector; entry = (struct msi_desc *)msi_desc[vector]; if (!entry || !entry->dev) @@ -112,6 +113,7 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); pci_write_config_dword(entry->dev, msi_lower_address_reg(pos), address.lo_address.value); + set_native_irq_info(irq, cpu_mask); break; } case PCI_CAP_ID_MSIX: @@ -125,22 +127,13 @@ static void set_msi_affinity(unsigned int vector, cpumask_t cpu_mask) MSI_TARGET_CPU_SHIFT); entry->msi_attrib.current_cpu = cpu_mask_to_apicid(cpu_mask); writel(address.lo_address.value, entry->mask_base + offset); + set_native_irq_info(irq, cpu_mask); break; } default: break; } } - -#ifdef CONFIG_IRQBALANCE -static inline void move_msi(int vector) -{ - if (!cpus_empty(pending_irq_balance_cpumask[vector])) { - set_msi_affinity(vector, pending_irq_balance_cpumask[vector]); - cpus_clear(pending_irq_balance_cpumask[vector]); - } -} -#endif /* CONFIG_IRQBALANCE */ #endif /* CONFIG_SMP */ static void mask_MSI_irq(unsigned int vector) @@ -191,13 +184,13 @@ static void shutdown_msi_irq(unsigned int vector) static void end_msi_irq_wo_maskbit(unsigned int vector) { - move_msi(vector); + move_native_irq(vector); ack_APIC_irq(); } static void end_msi_irq_w_maskbit(unsigned int vector) { - move_msi(vector); + move_native_irq(vector); unmask_MSI_irq(vector); ack_APIC_irq(); } diff --git a/drivers/pci/msi.h b/drivers/pci/msi.h index 390f1851c0f1..402136a5c9e4 100644 --- a/drivers/pci/msi.h +++ b/drivers/pci/msi.h @@ -19,7 +19,6 @@ #define NR_HP_RESERVED_VECTORS 20 extern int vector_irq[NR_VECTORS]; -extern cpumask_t pending_irq_balance_cpumask[NR_IRQS]; extern void (*interrupt[NR_IRQS])(void); extern int pci_vector_resources(int last, int nr_released); @@ -29,10 +28,6 @@ extern int pci_vector_resources(int last, int nr_released); #define set_msi_irq_affinity NULL #endif -#ifndef CONFIG_IRQBALANCE -static inline void move_msi(int vector) {} -#endif - /* * MSI-X Address Register */ diff --git a/include/asm-ia64/hw_irq.h b/include/asm-ia64/hw_irq.h index 041ab8c51a64..0cf119b42f7d 100644 --- a/include/asm-ia64/hw_irq.h +++ b/include/asm-ia64/hw_irq.h @@ -116,13 +116,6 @@ __ia64_local_vector_to_irq (ia64_vector vec) * and to obtain the irq descriptor for a given irq number. */ -/* Return a pointer to the irq descriptor for IRQ. */ -static inline irq_desc_t * -irq_descp (int irq) -{ - return irq_desc + irq; -} - /* Extract the IA-64 vector that corresponds to IRQ. */ static inline ia64_vector irq_to_vector (int irq) diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index bd07d11d9f37..5d930fdc0bea 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -30,12 +30,6 @@ extern void disable_irq_nosync (unsigned int); extern void enable_irq (unsigned int); extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); -#ifdef CONFIG_SMP -extern void move_irq(int irq); -#else -#define move_irq(irq) -#endif - struct irqaction; struct pt_regs; int handle_IRQ_event(unsigned int, struct pt_regs *, struct irqaction *); diff --git a/include/linux/irq.h b/include/linux/irq.h index 069d3b84d311..4a362b9ec966 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -71,16 +71,139 @@ typedef struct irq_desc { unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irqs_unhandled; spinlock_t lock; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) + unsigned int move_irq; /* Flag need to re-target intr dest*/ +#endif } ____cacheline_aligned irq_desc_t; extern irq_desc_t irq_desc [NR_IRQS]; +/* Return a pointer to the irq descriptor for IRQ. */ +static inline irq_desc_t * +irq_descp (int irq) +{ + return irq_desc + irq; +} + #include /* the arch dependent stuff */ extern int setup_irq(unsigned int irq, struct irqaction * new); #ifdef CONFIG_GENERIC_HARDIRQS extern cpumask_t irq_affinity[NR_IRQS]; + +#ifdef CONFIG_SMP +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ + irq_affinity[irq] = mask; +} +#else +static inline void set_native_irq_info(int irq, cpumask_t mask) +{ +} +#endif + +#ifdef CONFIG_SMP + +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +extern cpumask_t pending_irq_cpumask[NR_IRQS]; + +static inline void set_pending_irq(unsigned int irq, cpumask_t mask) +{ + irq_desc_t *desc = irq_desc + irq; + unsigned long flags; + + spin_lock_irqsave(&desc->lock, flags); + desc->move_irq = 1; + pending_irq_cpumask[irq] = mask; + spin_unlock_irqrestore(&desc->lock, flags); +} + +static inline void +move_native_irq(int irq) +{ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (likely (!desc->move_irq)) + return; + + desc->move_irq = 0; + + if (likely(cpus_empty(pending_irq_cpumask[irq]))) + return; + + if (!desc->handler->set_affinity) + return; + + /* note - we hold the desc->lock */ + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + + /* + * If there was a valid mask to work with, please + * do the disable, re-program, enable sequence. + * This is *not* particularly important for level triggered + * but in a edge trigger case, we might be setting rte + * when an active trigger is comming in. This could + * cause some ioapics to mal-function. + * Being paranoid i guess! + */ + if (unlikely(!cpus_empty(tmp))) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq,tmp); + desc->handler->enable(irq); + } + cpus_clear(pending_irq_cpumask[irq]); +} + +#ifdef CONFIG_PCI_MSI +/* + * Wonder why these are dummies? + * For e.g the set_ioapic_affinity_vector() calls the set_ioapic_affinity_irq() + * counter part after translating the vector to irq info. We need to perform + * this operation on the real irq, when we dont use vector, i.e when + * pci_use_vector() is false. + */ +static inline void move_irq(int irq) +{ +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ +} + +#else // CONFIG_PCI_MSI + +static inline void move_irq(int irq) +{ + move_native_irq(irq); +} + +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} +#endif // CONFIG_PCI_MSI + +#else // CONFIG_GENERIC_PENDING_IRQ || CONFIG_IRQBALANCE + +#define move_irq(x) +#define move_native_irq(x) +#define set_pending_irq(x,y) +static inline void set_irq_info(int irq, cpumask_t mask) +{ + set_native_irq_info(irq, mask); +} + +#endif // CONFIG_GENERIC_PENDING_IRQ + +#else // CONFIG_SMP + +#define move_irq(x) +#define move_native_irq(x) + +#endif // CONFIG_SMP + extern int no_irq_affinity; extern int noirqdebug_setup(char *str); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index ac6700985705..1cfdb08ddf20 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -18,6 +18,10 @@ cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; +#if defined (CONFIG_GENERIC_PENDING_IRQ) || defined (CONFIG_IRQBALANCE) +cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; +#endif + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index 85d08daa6600..f26e534c6585 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -19,12 +19,22 @@ static struct proc_dir_entry *root_irq_dir, *irq_dir[NR_IRQS]; */ static struct proc_dir_entry *smp_affinity_entry[NR_IRQS]; -void __attribute__((weak)) -proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) +#ifdef CONFIG_GENERIC_PENDING_IRQ +void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) +{ + /* + * Save these away for later use. Re-progam when the + * interrupt is pending + */ + set_pending_irq(irq, mask_val); +} +#else +void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) { irq_affinity[irq] = mask_val; irq_desc[irq].handler->set_affinity(irq, mask_val); } +#endif static int irq_affinity_read_proc(char *page, char **start, off_t off, int count, int *eof, void *data) -- cgit v1.2.3 From 4732efbeb997189d9f9b04708dc26bf8613ed721 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Tue, 6 Sep 2005 15:16:25 -0700 Subject: [PATCH] FUTEX_WAKE_OP: pthread_cond_signal() speedup ATM pthread_cond_signal is unnecessarily slow, because it wakes one waiter (which at least on UP usually means an immediate context switch to one of the waiter threads). This waiter wakes up and after a few instructions it attempts to acquire the cv internal lock, but that lock is still held by the thread calling pthread_cond_signal. So it goes to sleep and eventually the signalling thread is scheduled in, unlocks the internal lock and wakes the waiter again. Now, before 2003-09-21 NPTL was using FUTEX_REQUEUE in pthread_cond_signal to avoid this performance issue, but it was removed when locks were redesigned to the 3 state scheme (unlocked, locked uncontended, locked contended). Following scenario shows why simply using FUTEX_REQUEUE in pthread_cond_signal together with using lll_mutex_unlock_force in place of lll_mutex_unlock is not enough and probably why it has been disabled at that time: The number is value in cv->__data.__lock. thr1 thr2 thr3 0 pthread_cond_wait 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) 0 lll_futex_wait (&cv->__data.__futex, futexval) 0 pthread_cond_signal 1 lll_mutex_lock (cv->__data.__lock) 1 pthread_cond_signal 2 lll_mutex_lock (cv->__data.__lock) 2 lll_futex_wait (&cv->__data.__lock, 2) 2 lll_futex_requeue (&cv->__data.__futex, 0, 1, &cv->__data.__lock) # FUTEX_REQUEUE, not FUTEX_CMP_REQUEUE 2 lll_mutex_unlock_force (cv->__data.__lock) 0 cv->__data.__lock = 0 0 lll_futex_wake (&cv->__data.__lock, 1) 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) # Here, lll_mutex_unlock doesn't know there are threads waiting # on the internal cv's lock Now, I believe it is possible to use FUTEX_REQUEUE in pthread_cond_signal, but it will cost us not one, but 2 extra syscalls and, what's worse, one of these extra syscalls will be done for every single waiting loop in pthread_cond_*wait. We would need to use lll_mutex_unlock_force in pthread_cond_signal after requeue and lll_mutex_cond_lock in pthread_cond_*wait after lll_futex_wait. Another alternative is to do the unlocking pthread_cond_signal needs to do (the lock can't be unlocked before lll_futex_wake, as that is racy) in the kernel. I have implemented both variants, futex-requeue-glibc.patch is the first one and futex-wake_op{,-glibc}.patch is the unlocking inside of the kernel. The kernel interface allows userland to specify how exactly an unlocking operation should look like (some atomic arithmetic operation with optional constant argument and comparison of the previous futex value with another constant). It has been implemented just for ppc*, x86_64 and i?86, for other architectures I'm including just a stub header which can be used as a starting point by maintainers to write support for their arches and ATM will just return -ENOSYS for FUTEX_WAKE_OP. The requeue patch has been (lightly) tested just on x86_64, the wake_op patch on ppc64 kernel running 32-bit and 64-bit NPTL and x86_64 kernel running 32-bit and 64-bit NPTL. With the following benchmark on UP x86-64 I get: for i in nptl-orig nptl-requeue nptl-wake_op; do echo time elf/ld.so --library-path .:$i /tmp/bench; \ for j in 1 2; do echo ( time elf/ld.so --library-path .:$i /tmp/bench ) 2>&1; done; done time elf/ld.so --library-path .:nptl-orig /tmp/bench real 0m0.655s user 0m0.253s sys 0m0.403s real 0m0.657s user 0m0.269s sys 0m0.388s time elf/ld.so --library-path .:nptl-requeue /tmp/bench real 0m0.496s user 0m0.225s sys 0m0.271s real 0m0.531s user 0m0.242s sys 0m0.288s time elf/ld.so --library-path .:nptl-wake_op /tmp/bench real 0m0.380s user 0m0.176s sys 0m0.204s real 0m0.382s user 0m0.175s sys 0m0.207s The benchmark is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00001.txt Older futex-requeue-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00002.txt Older futex-wake_op-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00003.txt Will post a new version (just x86-64 fixes so that the patch applies against pthread_cond_signal.S) to libc-hacker ml soon. Attached is the kernel FUTEX_WAKE_OP patch as well as a simple-minded testcase that will not test the atomicity of the operation, but at least check if the threads that should have been woken up are woken up and whether the arithmetic operation in the kernel gave the expected results. Acked-by: Ingo Molnar Cc: Ulrich Drepper Cc: Jamie Lokier Cc: Rusty Russell Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/futex.h | 53 +++++++++++++++++++ include/asm-arm/futex.h | 53 +++++++++++++++++++ include/asm-arm26/futex.h | 53 +++++++++++++++++++ include/asm-cris/futex.h | 53 +++++++++++++++++++ include/asm-frv/futex.h | 53 +++++++++++++++++++ include/asm-h8300/futex.h | 53 +++++++++++++++++++ include/asm-i386/futex.h | 108 +++++++++++++++++++++++++++++++++++++++ include/asm-ia64/futex.h | 53 +++++++++++++++++++ include/asm-m32r/futex.h | 53 +++++++++++++++++++ include/asm-m68k/futex.h | 53 +++++++++++++++++++ include/asm-m68knommu/futex.h | 53 +++++++++++++++++++ include/asm-mips/futex.h | 53 +++++++++++++++++++ include/asm-parisc/futex.h | 53 +++++++++++++++++++ include/asm-ppc/futex.h | 53 +++++++++++++++++++ include/asm-ppc64/futex.h | 83 ++++++++++++++++++++++++++++++ include/asm-ppc64/memory.h | 2 + include/asm-s390/futex.h | 53 +++++++++++++++++++ include/asm-sh/futex.h | 53 +++++++++++++++++++ include/asm-sh64/futex.h | 53 +++++++++++++++++++ include/asm-sparc/futex.h | 53 +++++++++++++++++++ include/asm-sparc64/futex.h | 53 +++++++++++++++++++ include/asm-um/futex.h | 53 +++++++++++++++++++ include/asm-v850/futex.h | 53 +++++++++++++++++++ include/asm-x86_64/futex.h | 98 +++++++++++++++++++++++++++++++++++ include/linux/futex.h | 36 +++++++++++-- kernel/futex.c | 116 ++++++++++++++++++++++++++++++++++++++++++ 26 files changed, 1498 insertions(+), 5 deletions(-) create mode 100644 include/asm-alpha/futex.h create mode 100644 include/asm-arm/futex.h create mode 100644 include/asm-arm26/futex.h create mode 100644 include/asm-cris/futex.h create mode 100644 include/asm-frv/futex.h create mode 100644 include/asm-h8300/futex.h create mode 100644 include/asm-i386/futex.h create mode 100644 include/asm-ia64/futex.h create mode 100644 include/asm-m32r/futex.h create mode 100644 include/asm-m68k/futex.h create mode 100644 include/asm-m68knommu/futex.h create mode 100644 include/asm-mips/futex.h create mode 100644 include/asm-parisc/futex.h create mode 100644 include/asm-ppc/futex.h create mode 100644 include/asm-ppc64/futex.h create mode 100644 include/asm-s390/futex.h create mode 100644 include/asm-sh/futex.h create mode 100644 include/asm-sh64/futex.h create mode 100644 include/asm-sparc/futex.h create mode 100644 include/asm-sparc64/futex.h create mode 100644 include/asm-um/futex.h create mode 100644 include/asm-v850/futex.h create mode 100644 include/asm-x86_64/futex.h (limited to 'include/linux') diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-alpha/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-arm/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-arm26/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-cris/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-frv/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-h8300/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h new file mode 100644 index 000000000000..44b9db806474 --- /dev/null +++ b/include/asm-i386/futex.h @@ -0,0 +1,108 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include +#include + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile ( \ +"1: " insn "\n" \ +"2: .section .fixup,\"ax\"\n\ +3: mov %3, %1\n\ + jmp 2b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 8\n\ + .long 1b,3b\n\ + .previous" \ + : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \ + : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0)) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile ( \ +"1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ + jnz 1b\n\ +3: .section .fixup,\"ax\"\n\ +4: mov %5, %1\n\ + jmp 3b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 8\n\ + .long 1b,4b,2b,4b\n\ + .previous" \ + : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \ + "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0)) + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + if (op == FUTEX_OP_SET) + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + else { +#ifndef CONFIG_X86_BSWAP + if (boot_cpu_data.x86 == 3) + ret = -ENOSYS; + else +#endif + switch (op) { + case FUTEX_OP_ADD: + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, + oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, + oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, + ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, + oparg); + break; + default: + ret = -ENOSYS; + } + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-ia64/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-m32r/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-m68k/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-m68knommu/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h new file mode 100644 index 000000000000..9feff4ce1424 --- /dev/null +++ b/include/asm-mips/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-parisc/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-ppc/futex.h b/include/asm-ppc/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-ppc/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-ppc64/futex.h b/include/asm-ppc64/futex.h new file mode 100644 index 000000000000..cb2640b3a408 --- /dev/null +++ b/include/asm-ppc64/futex.h @@ -0,0 +1,83 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile (SYNC_ON_SMP \ +"1: lwarx %0,0,%2\n" \ + insn \ +"2: stwcx. %1,0,%2\n\ + bne- 1b\n\ + li %1,0\n\ +3: .section .fixup,\"ax\"\n\ +4: li %1,%3\n\ + b 3b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 3\n\ + .llong 1b,4b,2b,4b\n\ + .previous" \ + : "=&r" (oldval), "=&r" (ret) \ + : "b" (uaddr), "i" (-EFAULT), "1" (oparg) \ + : "cr0", "memory") + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("add %1,%0,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("or %1,%0,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("andc %1,%0,%1\n", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("xor %1,%0,%1\n", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-ppc64/memory.h b/include/asm-ppc64/memory.h index 56e09face9a8..af53ffb55726 100644 --- a/include/asm-ppc64/memory.h +++ b/include/asm-ppc64/memory.h @@ -18,9 +18,11 @@ #ifdef CONFIG_SMP #define EIEIO_ON_SMP "eieio\n" #define ISYNC_ON_SMP "\n\tisync" +#define SYNC_ON_SMP "lwsync\n\t" #else #define EIEIO_ON_SMP #define ISYNC_ON_SMP +#define SYNC_ON_SMP #endif static inline void eieio(void) diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-s390/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-sh/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-sh64/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-sparc/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-sparc64/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-um/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h new file mode 100644 index 000000000000..2cac5ecd9d00 --- /dev/null +++ b/include/asm-v850/futex.h @@ -0,0 +1,53 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + case FUTEX_OP_ADD: + case FUTEX_OP_OR: + case FUTEX_OP_ANDN: + case FUTEX_OP_XOR: + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h new file mode 100644 index 000000000000..8602c09bf89e --- /dev/null +++ b/include/asm-x86_64/futex.h @@ -0,0 +1,98 @@ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile ( \ +"1: " insn "\n" \ +"2: .section .fixup,\"ax\"\n\ +3: mov %3, %1\n\ + jmp 2b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 8\n\ + .quad 1b,3b\n\ + .previous" \ + : "=r" (oldval), "=r" (ret), "=m" (*uaddr) \ + : "i" (-EFAULT), "m" (*uaddr), "0" (oparg), "1" (0)) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ + __asm__ __volatile ( \ +"1: movl %2, %0\n\ + movl %0, %3\n" \ + insn "\n" \ +"2: " LOCK_PREFIX "cmpxchgl %3, %2\n\ + jnz 1b\n\ +3: .section .fixup,\"ax\"\n\ +4: mov %5, %1\n\ + jmp 3b\n\ + .previous\n\ + .section __ex_table,\"a\"\n\ + .align 8\n\ + .quad 1b,4b,2b,4b\n\ + .previous" \ + : "=&a" (oldval), "=&r" (ret), "=m" (*uaddr), \ + "=&r" (tem) \ + : "r" (oparg), "i" (-EFAULT), "m" (*uaddr), "1" (0)) + +static inline int +futex_atomic_op_inuser (int encoded_op, int __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret, tem; + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + inc_preempt_count(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op1(LOCK_PREFIX "xaddl %0, %2", ret, oldval, + uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + dec_preempt_count(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; + case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; + case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; + case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; + case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; + case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; + default: ret = -ENOSYS; + } + } + return ret; +} + +#endif +#endif diff --git a/include/linux/futex.h b/include/linux/futex.h index 65d6cfdb6d39..10f96c31971e 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,14 +4,40 @@ /* Second argument to futex syscall */ -#define FUTEX_WAIT (0) -#define FUTEX_WAKE (1) -#define FUTEX_FD (2) -#define FUTEX_REQUEUE (3) -#define FUTEX_CMP_REQUEUE (4) +#define FUTEX_WAIT 0 +#define FUTEX_WAKE 1 +#define FUTEX_FD 2 +#define FUTEX_REQUEUE 3 +#define FUTEX_CMP_REQUEUE 4 +#define FUTEX_WAKE_OP 5 long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); +#define FUTEX_OP_SET 0 /* *(int *)UADDR2 = OPARG; */ +#define FUTEX_OP_ADD 1 /* *(int *)UADDR2 += OPARG; */ +#define FUTEX_OP_OR 2 /* *(int *)UADDR2 |= OPARG; */ +#define FUTEX_OP_ANDN 3 /* *(int *)UADDR2 &= ~OPARG; */ +#define FUTEX_OP_XOR 4 /* *(int *)UADDR2 ^= OPARG; */ + +#define FUTEX_OP_OPARG_SHIFT 8 /* Use (1 << OPARG) instead of OPARG. */ + +#define FUTEX_OP_CMP_EQ 0 /* if (oldval == CMPARG) wake */ +#define FUTEX_OP_CMP_NE 1 /* if (oldval != CMPARG) wake */ +#define FUTEX_OP_CMP_LT 2 /* if (oldval < CMPARG) wake */ +#define FUTEX_OP_CMP_LE 3 /* if (oldval <= CMPARG) wake */ +#define FUTEX_OP_CMP_GT 4 /* if (oldval > CMPARG) wake */ +#define FUTEX_OP_CMP_GE 5 /* if (oldval >= CMPARG) wake */ + +/* FUTEX_WAKE_OP will perform atomically + int oldval = *(int *)UADDR2; + *(int *)UADDR2 = oldval OP OPARG; + if (oldval CMP CMPARG) + wake UADDR2; */ + +#define FUTEX_OP(op, oparg, cmp, cmparg) \ + (((op & 0xf) << 28) | ((cmp & 0xf) << 24) \ + | ((oparg & 0xfff) << 12) | (cmparg & 0xfff)) + #endif diff --git a/kernel/futex.c b/kernel/futex.c index c7130f86106c..07ba87de9658 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -40,6 +40,7 @@ #include #include #include +#include #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8) @@ -326,6 +327,118 @@ out: return ret; } +/* + * Wake up all waiters hashed on the physical page that is mapped + * to this virtual address: + */ +static int futex_wake_op(unsigned long uaddr1, unsigned long uaddr2, int nr_wake, int nr_wake2, int op) +{ + union futex_key key1, key2; + struct futex_hash_bucket *bh1, *bh2; + struct list_head *head; + struct futex_q *this, *next; + int ret, op_ret, attempt = 0; + +retryfull: + down_read(¤t->mm->mmap_sem); + + ret = get_futex_key(uaddr1, &key1); + if (unlikely(ret != 0)) + goto out; + ret = get_futex_key(uaddr2, &key2); + if (unlikely(ret != 0)) + goto out; + + bh1 = hash_futex(&key1); + bh2 = hash_futex(&key2); + +retry: + if (bh1 < bh2) + spin_lock(&bh1->lock); + spin_lock(&bh2->lock); + if (bh1 > bh2) + spin_lock(&bh1->lock); + + op_ret = futex_atomic_op_inuser(op, (int __user *)uaddr2); + if (unlikely(op_ret < 0)) { + int dummy; + + spin_unlock(&bh1->lock); + if (bh1 != bh2) + spin_unlock(&bh2->lock); + + /* futex_atomic_op_inuser needs to both read and write + * *(int __user *)uaddr2, but we can't modify it + * non-atomically. Therefore, if get_user below is not + * enough, we need to handle the fault ourselves, while + * still holding the mmap_sem. */ + if (attempt++) { + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + + ret = -EFAULT; + if (attempt >= 2 || + !(vma = find_vma(mm, uaddr2)) || + vma->vm_start > uaddr2 || + !(vma->vm_flags & VM_WRITE)) + goto out; + + switch (handle_mm_fault(mm, vma, uaddr2, 1)) { + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + default: + goto out; + } + goto retry; + } + + /* If we would have faulted, release mmap_sem, + * fault it in and start all over again. */ + up_read(¤t->mm->mmap_sem); + + ret = get_user(dummy, (int __user *)uaddr2); + if (ret) + return ret; + + goto retryfull; + } + + head = &bh1->chain; + + list_for_each_entry_safe(this, next, head, list) { + if (match_futex (&this->key, &key1)) { + wake_futex(this); + if (++ret >= nr_wake) + break; + } + } + + if (op_ret > 0) { + head = &bh2->chain; + + op_ret = 0; + list_for_each_entry_safe(this, next, head, list) { + if (match_futex (&this->key, &key2)) { + wake_futex(this); + if (++op_ret >= nr_wake2) + break; + } + } + ret += op_ret; + } + + spin_unlock(&bh1->lock); + if (bh1 != bh2) + spin_unlock(&bh2->lock); +out: + up_read(¤t->mm->mmap_sem); + return ret; +} + /* * Requeue all waiters hashed on one physical page to another * physical page. @@ -740,6 +853,9 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, case FUTEX_CMP_REQUEUE: ret = futex_requeue(uaddr, uaddr2, val, val2, &val3); break; + case FUTEX_WAKE_OP: + ret = futex_wake_op(uaddr, uaddr2, val, val2, val3); + break; default: ret = -ENOSYS; } -- cgit v1.2.3 From 8446f1d391f3d27e6bf9c43d4cbcdac0ca720417 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 6 Sep 2005 15:16:27 -0700 Subject: [PATCH] detect soft lockups This patch adds a new kernel debug feature: CONFIG_DETECT_SOFTLOCKUP. When enabled then per-CPU watchdog threads are started, which try to run once per second. If they get delayed for more than 10 seconds then a callback from the timer interrupt detects this condition and prints out a warning message and a stack dump (once per lockup incident). The feature is otherwise non-intrusive, it doesnt try to unlock the box in any way, it only gets the debug info out, automatically, and on all CPUs affected by the lockup. Signed-off-by: Ingo Molnar Signed-off-by: Nishanth Aravamudan Signed-Off-By: Matthias Urlichs Signed-off-by: Richard Purdie Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/nmi.c | 5 ++ arch/i386/kernel/time.c | 1 + arch/x86_64/kernel/nmi.c | 2 + arch/x86_64/kernel/time.c | 1 + drivers/mtd/nand/nand_base.c | 1 + include/linux/sched.h | 17 +++++ init/main.c | 1 + kernel/Makefile | 1 + kernel/power/swsusp.c | 1 + kernel/softlockup.c | 151 +++++++++++++++++++++++++++++++++++++++++++ kernel/timer.c | 1 + lib/Kconfig.debug | 19 ++++++ 12 files changed, 201 insertions(+) create mode 100644 kernel/softlockup.c (limited to 'include/linux') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 8bbdbda07a2d..0178457db721 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -478,6 +478,11 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) alert_counter[i] = 0; + + /* + * Tickle the softlockup detector too: + */ + touch_softlockup_watchdog(); } extern void die_nmi(struct pt_regs *, const char *msg); diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 6f794a78ee1e..b0c5ee2b3446 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -422,6 +422,7 @@ static int timer_resume(struct sys_device *dev) last_timer->resume(); cur_timer = last_timer; last_timer = NULL; + touch_softlockup_watchdog(); return 0; } diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 64a8e05d5811..84cae81fff8b 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c @@ -463,6 +463,8 @@ void touch_nmi_watchdog (void) */ for (i = 0; i < NR_CPUS; i++) per_cpu(nmi_touch, i) = 1; + + touch_softlockup_watchdog(); } void nmi_watchdog_tick (struct pt_regs * regs, unsigned reason) diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 66bf6ddeb0c3..2b5d9da912a2 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -1041,6 +1041,7 @@ static int timer_resume(struct sys_device *dev) write_sequnlock_irqrestore(&xtime_lock,flags); jiffies += sleep_length; wall_jiffies += sleep_length; + touch_softlockup_watchdog(); return 0; } diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index eee5115658c8..04e54318bc6a 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -526,6 +526,7 @@ static void nand_wait_ready(struct mtd_info *mtd) do { if (this->dev_ready(mtd)) return; + touch_softlockup_watchdog(); } while (time_before(jiffies, timeo)); } diff --git a/include/linux/sched.h b/include/linux/sched.h index dec5827c7742..5fb31bede103 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,23 @@ extern void trap_init(void); extern void update_process_times(int user); extern void scheduler_tick(void); +#ifdef CONFIG_DETECT_SOFTLOCKUP +extern void softlockup_tick(struct pt_regs *regs); +extern void spawn_softlockup_task(void); +extern void touch_softlockup_watchdog(void); +#else +static inline void softlockup_tick(struct pt_regs *regs) +{ +} +static inline void spawn_softlockup_task(void) +{ +} +static inline void touch_softlockup_watchdog(void) +{ +} +#endif + + /* Attach to any functions which should be ignored in wchan output. */ #define __sched __attribute__((__section__(".sched.text"))) /* Is this address in the __sched functions? */ diff --git a/init/main.c b/init/main.c index ff410063e4e1..a29fb2ac7240 100644 --- a/init/main.c +++ b/init/main.c @@ -614,6 +614,7 @@ static void do_pre_smp_initcalls(void) migration_init(); #endif spawn_ksoftirqd(); + spawn_softlockup_task(); } static void run_init_process(char *init_filename) diff --git a/kernel/Makefile b/kernel/Makefile index cb05cd05d237..8d57a2f1226b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_AUDIT) += audit.o obj-$(CONFIG_AUDITSYSCALL) += auditsc.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_SYSFS) += ksysfs.o +obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o obj-$(CONFIG_GENERIC_HARDIRQS) += irq/ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_SECCOMP) += seccomp.o diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index eaacd5cb5889..d967e875ee82 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -1059,6 +1059,7 @@ int swsusp_resume(void) BUG_ON(!error); restore_processor_state(); restore_highmem(); + touch_softlockup_watchdog(); device_power_up(); local_irq_enable(); return error; diff --git a/kernel/softlockup.c b/kernel/softlockup.c new file mode 100644 index 000000000000..75976209cea7 --- /dev/null +++ b/kernel/softlockup.c @@ -0,0 +1,151 @@ +/* + * Detect Soft Lockups + * + * started by Ingo Molnar, (C) 2005, Red Hat + * + * this code detects soft lockups: incidents in where on a CPU + * the kernel does not reschedule for 10 seconds or more. + */ + +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_SPINLOCK(print_lock); + +static DEFINE_PER_CPU(unsigned long, timestamp) = 0; +static DEFINE_PER_CPU(unsigned long, print_timestamp) = 0; +static DEFINE_PER_CPU(struct task_struct *, watchdog_task); + +static int did_panic = 0; +static int softlock_panic(struct notifier_block *this, unsigned long event, + void *ptr) +{ + did_panic = 1; + + return NOTIFY_DONE; +} + +static struct notifier_block panic_block = { + .notifier_call = softlock_panic, +}; + +void touch_softlockup_watchdog(void) +{ + per_cpu(timestamp, raw_smp_processor_id()) = jiffies; +} +EXPORT_SYMBOL(touch_softlockup_watchdog); + +/* + * This callback runs from the timer interrupt, and checks + * whether the watchdog thread has hung or not: + */ +void softlockup_tick(struct pt_regs *regs) +{ + int this_cpu = smp_processor_id(); + unsigned long timestamp = per_cpu(timestamp, this_cpu); + + if (per_cpu(print_timestamp, this_cpu) == timestamp) + return; + + /* Do not cause a second panic when there already was one */ + if (did_panic) + return; + + if (time_after(jiffies, timestamp + 10*HZ)) { + per_cpu(print_timestamp, this_cpu) = timestamp; + + spin_lock(&print_lock); + printk(KERN_ERR "BUG: soft lockup detected on CPU#%d!\n", + this_cpu); + show_regs(regs); + spin_unlock(&print_lock); + } +} + +/* + * The watchdog thread - runs every second and touches the timestamp. + */ +static int watchdog(void * __bind_cpu) +{ + struct sched_param param = { .sched_priority = 99 }; + int this_cpu = (long) __bind_cpu; + + printk("softlockup thread %d started up.\n", this_cpu); + + sched_setscheduler(current, SCHED_FIFO, ¶m); + current->flags |= PF_NOFREEZE; + + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Run briefly once per second - if this gets delayed for + * more than 10 seconds then the debug-printout triggers + * in softlockup_tick(): + */ + while (!kthread_should_stop()) { + msleep_interruptible(1000); + touch_softlockup_watchdog(); + } + __set_current_state(TASK_RUNNING); + + return 0; +} + +/* + * Create/destroy watchdog threads as CPUs come and go: + */ +static int __devinit +cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + int hotcpu = (unsigned long)hcpu; + struct task_struct *p; + + switch (action) { + case CPU_UP_PREPARE: + BUG_ON(per_cpu(watchdog_task, hotcpu)); + p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu); + if (IS_ERR(p)) { + printk("watchdog for %i failed\n", hotcpu); + return NOTIFY_BAD; + } + per_cpu(watchdog_task, hotcpu) = p; + kthread_bind(p, hotcpu); + break; + case CPU_ONLINE: + + wake_up_process(per_cpu(watchdog_task, hotcpu)); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_UP_CANCELED: + /* Unbind so it can run. Fall thru. */ + kthread_bind(per_cpu(watchdog_task, hotcpu), smp_processor_id()); + case CPU_DEAD: + p = per_cpu(watchdog_task, hotcpu); + per_cpu(watchdog_task, hotcpu) = NULL; + kthread_stop(p); + break; +#endif /* CONFIG_HOTPLUG_CPU */ + } + return NOTIFY_OK; +} + +static struct notifier_block __devinitdata cpu_nfb = { + .notifier_call = cpu_callback +}; + +__init void spawn_softlockup_task(void) +{ + void *cpu = (void *)(long)smp_processor_id(); + + cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); + cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); + register_cpu_notifier(&cpu_nfb); + + notifier_chain_register(&panic_notifier_list, &panic_block); +} + diff --git a/kernel/timer.c b/kernel/timer.c index 5377f40723ff..1433d87f46b3 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -950,6 +950,7 @@ void do_timer(struct pt_regs *regs) { jiffies_64++; update_times(); + softlockup_tick(regs); } #ifdef __ARCH_WANT_SYS_ALARM diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 299f7f3b5b08..3754c9a8f5c8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -46,6 +46,25 @@ config LOG_BUF_SHIFT 13 => 8 KB 12 => 4 KB +config DETECT_SOFTLOCKUP + bool "Detect Soft Lockups" + depends on DEBUG_KERNEL + default y + help + Say Y here to enable the kernel to detect "soft lockups", + which are bugs that cause the kernel to loop in kernel + mode for more than 10 seconds, without giving other tasks a + chance to run. + + When a soft-lockup is detected, the kernel will print the + current stack trace (which you should report), but the + system will stay locked up. This feature has negligible + overhead. + + (Note that "hard lockups" are separate type of bugs that + can be detected via the NMI-watchdog, on platforms that + support it.) + config SCHEDSTATS bool "Collect scheduler statistics" depends on DEBUG_KERNEL && PROC_FS -- cgit v1.2.3 From e82894f84dbba130ab46c97748c03647f8204f92 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Tue, 6 Sep 2005 15:16:30 -0700 Subject: [PATCH] relayfs Here's the latest version of relayfs, against linux-2.6.11-mm2. I'm hoping you'll consider putting this version back into your tree - the previous rounds of comment seem to have shaken out all the API issues and the number of comments on the code itself have also steadily dwindled. This patch is essentially the same as the relayfs redux part 5 patch, with some minor changes based on reviewer comments. Thanks again to Pekka Enberg for those. The patch size without documentation is now a little smaller at just over 40k. Here's a detailed list of the changes: - removed the attribute_flags in relay open and changed it to a boolean specifying either overwrite or no-overwrite mode, and removed everything referencing the attribute flags. - added a check for NULL names in relayfs_create_entry() - got rid of the unnecessary multiple labels in relay_create_buf() - some minor simplification of relay_alloc_buf() which got rid of a couple params - updated the Documentation In addition, this version (through code contained in the relay-apps tarball linked to below, not as part of the relayfs patch) tries to make it as easy as possible to create the cooperating kernel/user pieces of a typical and common type of logging application, one where kernel logging is kicked off when a user space data collection app starts and stops when the collection app exits, with the data being automatically logged to disk in between. To create this type of application, you basically just include a header file (relay-app.h, included in the relay-apps tarball) in your kernel module, define a couple of callbacks and call an initialization function, and on the user side call a single function that sets up and continuously monitors the buffers, and writes data to files as it becomes available. Channels are created when the collection app is started and destroyed when it exits, not when the kernel module is inserted, so different channel buffer sizes can be specified for each separate run via command-line options. See the README in the relay-apps tarball for details. Also included in the relay-apps tarball are a couple examples demonstrating how you can use this to create quick and dirty kernel logging/debugging applications. They are: - tprintk, short for 'tee printk', which temporarily puts a kprobe on printk() and writes a duplicate stream of printk output to a relayfs channel. This could be used anywhere there's printk() debugging code in the kernel which you'd like to exercise, but would rather not have your system logs cluttered with debugging junk. You'd probably want to kill klogd while you do this, otherwise there wouldn't be much point (since putting a kprobe on printk() doesn't change the output of printk()). I've used this method to temporarily divert the packet logging output of the iptables LOG target from the system logs to relayfs files instead, for instance. - klog, which just provides a printk-like formatted logging function on top of relayfs. Again, you can use this to keep stuff out of your system logs if used in place of printk. The example applications can be found here: http://prdownloads.sourceforge.net/dprobes/relay-apps.tar.gz?download From: Christoph Hellwig avoid lookup_hash usage in relayfs Signed-off-by: Tom Zanussi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/relayfs.txt | 362 ++++++++++++++++++++ fs/Kconfig | 12 + fs/Makefile | 1 + fs/relayfs/Makefile | 4 + fs/relayfs/buffers.c | 189 +++++++++++ fs/relayfs/buffers.h | 12 + fs/relayfs/inode.c | 609 ++++++++++++++++++++++++++++++++++ fs/relayfs/relay.c | 431 ++++++++++++++++++++++++ fs/relayfs/relay.h | 12 + include/linux/relayfs_fs.h | 255 ++++++++++++++ 10 files changed, 1887 insertions(+) create mode 100644 Documentation/filesystems/relayfs.txt create mode 100644 fs/relayfs/Makefile create mode 100644 fs/relayfs/buffers.c create mode 100644 fs/relayfs/buffers.h create mode 100644 fs/relayfs/inode.c create mode 100644 fs/relayfs/relay.c create mode 100644 fs/relayfs/relay.h create mode 100644 include/linux/relayfs_fs.h (limited to 'include/linux') diff --git a/Documentation/filesystems/relayfs.txt b/Documentation/filesystems/relayfs.txt new file mode 100644 index 000000000000..d24e1b0d4f39 --- /dev/null +++ b/Documentation/filesystems/relayfs.txt @@ -0,0 +1,362 @@ + +relayfs - a high-speed data relay filesystem +============================================ + +relayfs is a filesystem designed to provide an efficient mechanism for +tools and facilities to relay large and potentially sustained streams +of data from kernel space to user space. + +The main abstraction of relayfs is the 'channel'. A channel consists +of a set of per-cpu kernel buffers each represented by a file in the +relayfs filesystem. Kernel clients write into a channel using +efficient write functions which automatically log to the current cpu's +channel buffer. User space applications mmap() the per-cpu files and +retrieve the data as it becomes available. + +The format of the data logged into the channel buffers is completely +up to the relayfs client; relayfs does however provide hooks which +allow clients to impose some stucture on the buffer data. Nor does +relayfs implement any form of data filtering - this also is left to +the client. The purpose is to keep relayfs as simple as possible. + +This document provides an overview of the relayfs API. The details of +the function parameters are documented along with the functions in the +filesystem code - please see that for details. + +Semantics +========= + +Each relayfs channel has one buffer per CPU, each buffer has one or +more sub-buffers. Messages are written to the first sub-buffer until +it is too full to contain a new message, in which case it it is +written to the next (if available). Messages are never split across +sub-buffers. At this point, userspace can be notified so it empties +the first sub-buffer, while the kernel continues writing to the next. + +When notified that a sub-buffer is full, the kernel knows how many +bytes of it are padding i.e. unused. Userspace can use this knowledge +to copy only valid data. + +After copying it, userspace can notify the kernel that a sub-buffer +has been consumed. + +relayfs can operate in a mode where it will overwrite data not yet +collected by userspace, and not wait for it to consume it. + +relayfs itself does not provide for communication of such data between +userspace and kernel, allowing the kernel side to remain simple and not +impose a single interface on userspace. It does provide a separate +helper though, described below. + +klog, relay-app & librelay +========================== + +relayfs itself is ready to use, but to make things easier, two +additional systems are provided. klog is a simple wrapper to make +writing formatted text or raw data to a channel simpler, regardless of +whether a channel to write into exists or not, or whether relayfs is +compiled into the kernel or is configured as a module. relay-app is +the kernel counterpart of userspace librelay.c, combined these two +files provide glue to easily stream data to disk, without having to +bother with housekeeping. klog and relay-app can be used together, +with klog providing high-level logging functions to the kernel and +relay-app taking care of kernel-user control and disk-logging chores. + +It is possible to use relayfs without relay-app & librelay, but you'll +have to implement communication between userspace and kernel, allowing +both to convey the state of buffers (full, empty, amount of padding). + +klog, relay-app and librelay can be found in the relay-apps tarball on +http://relayfs.sourceforge.net + +The relayfs user space API +========================== + +relayfs implements basic file operations for user space access to +relayfs channel buffer data. Here are the file operations that are +available and some comments regarding their behavior: + +open() enables user to open an _existing_ buffer. + +mmap() results in channel buffer being mapped into the caller's + memory space. Note that you can't do a partial mmap - you must + map the entire file, which is NRBUF * SUBBUFSIZE. + +read() read the contents of a channel buffer. The bytes read are + 'consumed' by the reader i.e. they won't be available again + to subsequent reads. If the channel is being used in + no-overwrite mode (the default), it can be read at any time + even if there's an active kernel writer. If the channel is + being used in overwrite mode and there are active channel + writers, results may be unpredictable - users should make + sure that all logging to the channel has ended before using + read() with overwrite mode. + +poll() POLLIN/POLLRDNORM/POLLERR supported. User applications are + notified when sub-buffer boundaries are crossed. + +close() decrements the channel buffer's refcount. When the refcount + reaches 0 i.e. when no process or kernel client has the buffer + open, the channel buffer is freed. + + +In order for a user application to make use of relayfs files, the +relayfs filesystem must be mounted. For example, + + mount -t relayfs relayfs /mnt/relay + +NOTE: relayfs doesn't need to be mounted for kernel clients to create + or use channels - it only needs to be mounted when user space + applications need access to the buffer data. + + +The relayfs kernel API +====================== + +Here's a summary of the API relayfs provides to in-kernel clients: + + + channel management functions: + + relay_open(base_filename, parent, subbuf_size, n_subbufs, + callbacks) + relay_close(chan) + relay_flush(chan) + relay_reset(chan) + relayfs_create_dir(name, parent) + relayfs_remove_dir(dentry) + + channel management typically called on instigation of userspace: + + relay_subbufs_consumed(chan, cpu, subbufs_consumed) + + write functions: + + relay_write(chan, data, length) + __relay_write(chan, data, length) + relay_reserve(chan, length) + + callbacks: + + subbuf_start(buf, subbuf, prev_subbuf, prev_padding) + buf_mapped(buf, filp) + buf_unmapped(buf, filp) + + helper functions: + + relay_buf_full(buf) + subbuf_start_reserve(buf, length) + + +Creating a channel +------------------ + +relay_open() is used to create a channel, along with its per-cpu +channel buffers. Each channel buffer will have an associated file +created for it in the relayfs filesystem, which can be opened and +mmapped from user space if desired. The files are named +basename0...basenameN-1 where N is the number of online cpus, and by +default will be created in the root of the filesystem. If you want a +directory structure to contain your relayfs files, you can create it +with relayfs_create_dir() and pass the parent directory to +relay_open(). Clients are responsible for cleaning up any directory +structure they create when the channel is closed - use +relayfs_remove_dir() for that. + +The total size of each per-cpu buffer is calculated by multiplying the +number of sub-buffers by the sub-buffer size passed into relay_open(). +The idea behind sub-buffers is that they're basically an extension of +double-buffering to N buffers, and they also allow applications to +easily implement random-access-on-buffer-boundary schemes, which can +be important for some high-volume applications. The number and size +of sub-buffers is completely dependent on the application and even for +the same application, different conditions will warrant different +values for these parameters at different times. Typically, the right +values to use are best decided after some experimentation; in general, +though, it's safe to assume that having only 1 sub-buffer is a bad +idea - you're guaranteed to either overwrite data or lose events +depending on the channel mode being used. + +Channel 'modes' +--------------- + +relayfs channels can be used in either of two modes - 'overwrite' or +'no-overwrite'. The mode is entirely determined by the implementation +of the subbuf_start() callback, as described below. In 'overwrite' +mode, also known as 'flight recorder' mode, writes continuously cycle +around the buffer and will never fail, but will unconditionally +overwrite old data regardless of whether it's actually been consumed. +In no-overwrite mode, writes will fail i.e. data will be lost, if the +number of unconsumed sub-buffers equals the total number of +sub-buffers in the channel. It should be clear that if there is no +consumer or if the consumer can't consume sub-buffers fast enought, +data will be lost in either case; the only difference is whether data +is lost from the beginning or the end of a buffer. + +As explained above, a relayfs channel is made of up one or more +per-cpu channel buffers, each implemented as a circular buffer +subdivided into one or more sub-buffers. Messages are written into +the current sub-buffer of the channel's current per-cpu buffer via the +write functions described below. Whenever a message can't fit into +the current sub-buffer, because there's no room left for it, the +client is notified via the subbuf_start() callback that a switch to a +new sub-buffer is about to occur. The client uses this callback to 1) +initialize the next sub-buffer if appropriate 2) finalize the previous +sub-buffer if appropriate and 3) return a boolean value indicating +whether or not to actually go ahead with the sub-buffer switch. + +To implement 'no-overwrite' mode, the userspace client would provide +an implementation of the subbuf_start() callback something like the +following: + +static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + if (relay_buf_full(buf)) + return 0; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; +} + +If the current buffer is full i.e. all sub-buffers remain unconsumed, +the callback returns 0 to indicate that the buffer switch should not +occur yet i.e. until the consumer has had a chance to read the current +set of ready sub-buffers. For the relay_buf_full() function to make +sense, the consumer is reponsible for notifying relayfs when +sub-buffers have been consumed via relay_subbufs_consumed(). Any +subsequent attempts to write into the buffer will again invoke the +subbuf_start() callback with the same parameters; only when the +consumer has consumed one or more of the ready sub-buffers will +relay_buf_full() return 0, in which case the buffer switch can +continue. + +The implementation of the subbuf_start() callback for 'overwrite' mode +would be very similar: + +static int subbuf_start(struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + unsigned int prev_padding) +{ + if (prev_subbuf) + *((unsigned *)prev_subbuf) = prev_padding; + + subbuf_start_reserve(buf, sizeof(unsigned int)); + + return 1; +} + +In this case, the relay_buf_full() check is meaningless and the +callback always returns 1, causing the buffer switch to occur +unconditionally. It's also meaningless for the client to use the +relay_subbufs_consumed() function in this mode, as it's never +consulted. + +The default subbuf_start() implementation, used if the client doesn't +define any callbacks, or doesn't define the subbuf_start() callback, +implements the simplest possible 'no-overwrite' mode i.e. it does +nothing but return 0. + +Header information can be reserved at the beginning of each sub-buffer +by calling the subbuf_start_reserve() helper function from within the +subbuf_start() callback. This reserved area can be used to store +whatever information the client wants. In the example above, room is +reserved in each sub-buffer to store the padding count for that +sub-buffer. This is filled in for the previous sub-buffer in the +subbuf_start() implementation; the padding value for the previous +sub-buffer is passed into the subbuf_start() callback along with a +pointer to the previous sub-buffer, since the padding value isn't +known until a sub-buffer is filled. The subbuf_start() callback is +also called for the first sub-buffer when the channel is opened, to +give the client a chance to reserve space in it. In this case the +previous sub-buffer pointer passed into the callback will be NULL, so +the client should check the value of the prev_subbuf pointer before +writing into the previous sub-buffer. + +Writing to a channel +-------------------- + +kernel clients write data into the current cpu's channel buffer using +relay_write() or __relay_write(). relay_write() is the main logging +function - it uses local_irqsave() to protect the buffer and should be +used if you might be logging from interrupt context. If you know +you'll never be logging from interrupt context, you can use +__relay_write(), which only disables preemption. These functions +don't return a value, so you can't determine whether or not they +failed - the assumption is that you wouldn't want to check a return +value in the fast logging path anyway, and that they'll always succeed +unless the buffer is full and no-overwrite mode is being used, in +which case you can detect a failed write in the subbuf_start() +callback by calling the relay_buf_full() helper function. + +relay_reserve() is used to reserve a slot in a channel buffer which +can be written to later. This would typically be used in applications +that need to write directly into a channel buffer without having to +stage data in a temporary buffer beforehand. Because the actual write +may not happen immediately after the slot is reserved, applications +using relay_reserve() can keep a count of the number of bytes actually +written, either in space reserved in the sub-buffers themselves or as +a separate array. See the 'reserve' example in the relay-apps tarball +at http://relayfs.sourceforge.net for an example of how this can be +done. Because the write is under control of the client and is +separated from the reserve, relay_reserve() doesn't protect the buffer +at all - it's up to the client to provide the appropriate +synchronization when using relay_reserve(). + +Closing a channel +----------------- + +The client calls relay_close() when it's finished using the channel. +The channel and its associated buffers are destroyed when there are no +longer any references to any of the channel buffers. relay_flush() +forces a sub-buffer switch on all the channel buffers, and can be used +to finalize and process the last sub-buffers before the channel is +closed. + +Misc +---- + +Some applications may want to keep a channel around and re-use it +rather than open and close a new channel for each use. relay_reset() +can be used for this purpose - it resets a channel to its initial +state without reallocating channel buffer memory or destroying +existing mappings. It should however only be called when it's safe to +do so i.e. when the channel isn't currently being written to. + +Finally, there are a couple of utility callbacks that can be used for +different purposes. buf_mapped() is called whenever a channel buffer +is mmapped from user space and buf_unmapped() is called when it's +unmapped. The client can use this notification to trigger actions +within the kernel application, such as enabling/disabling logging to +the channel. + + +Resources +========= + +For news, example code, mailing list, etc. see the relayfs homepage: + + http://relayfs.sourceforge.net + + +Credits +======= + +The ideas and specs for relayfs came about as a result of discussions +on tracing involving the following: + +Michel Dagenais +Richard Moore +Bob Wisniewski +Karim Yaghmour +Tom Zanussi + +Also thanks to Hubertus Franke for a lot of useful suggestions and bug +reports. diff --git a/fs/Kconfig b/fs/Kconfig index ed78d24ee426..740d6ff0367d 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -816,6 +816,18 @@ config RAMFS To compile this as a module, choose M here: the module will be called ramfs. +config RELAYFS_FS + tristate "Relayfs file system support" + ---help--- + Relayfs is a high-speed data relay filesystem designed to provide + an efficient mechanism for tools and facilities to relay large + amounts of data from kernel space to user space. + + To compile this code as a module, choose M here: the module will be + called relayfs. + + If unsure, say N. + endmenu menu "Miscellaneous filesystems" diff --git a/fs/Makefile b/fs/Makefile index cf95eb894fd5..15158309dee4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -90,6 +90,7 @@ obj-$(CONFIG_AUTOFS_FS) += autofs/ obj-$(CONFIG_AUTOFS4_FS) += autofs4/ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_UDF_FS) += udf/ +obj-$(CONFIG_RELAYFS_FS) += relayfs/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ diff --git a/fs/relayfs/Makefile b/fs/relayfs/Makefile new file mode 100644 index 000000000000..e76e182cdb38 --- /dev/null +++ b/fs/relayfs/Makefile @@ -0,0 +1,4 @@ +obj-$(CONFIG_RELAYFS_FS) += relayfs.o + +relayfs-y := relay.o inode.o buffers.o + diff --git a/fs/relayfs/buffers.c b/fs/relayfs/buffers.c new file mode 100644 index 000000000000..2aa8e2719999 --- /dev/null +++ b/fs/relayfs/buffers.c @@ -0,0 +1,189 @@ +/* + * RelayFS buffer management code. + * + * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +/* + * close() vm_op implementation for relayfs file mapping. + */ +static void relay_file_mmap_close(struct vm_area_struct *vma) +{ + struct rchan_buf *buf = vma->vm_private_data; + buf->chan->cb->buf_unmapped(buf, vma->vm_file); +} + +/* + * nopage() vm_op implementation for relayfs file mapping. + */ +static struct page *relay_buf_nopage(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct page *page; + struct rchan_buf *buf = vma->vm_private_data; + unsigned long offset = address - vma->vm_start; + + if (address > vma->vm_end) + return NOPAGE_SIGBUS; /* Disallow mremap */ + if (!buf) + return NOPAGE_OOM; + + page = vmalloc_to_page(buf->start + offset); + if (!page) + return NOPAGE_OOM; + get_page(page); + + if (type) + *type = VM_FAULT_MINOR; + + return page; +} + +/* + * vm_ops for relay file mappings. + */ +static struct vm_operations_struct relay_file_mmap_ops = { + .nopage = relay_buf_nopage, + .close = relay_file_mmap_close, +}; + +/** + * relay_mmap_buf: - mmap channel buffer to process address space + * @buf: relay channel buffer + * @vma: vm_area_struct describing memory to be mapped + * + * Returns 0 if ok, negative on error + * + * Caller should already have grabbed mmap_sem. + */ +int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) +{ + unsigned long length = vma->vm_end - vma->vm_start; + struct file *filp = vma->vm_file; + + if (!buf) + return -EBADF; + + if (length != (unsigned long)buf->chan->alloc_size) + return -EINVAL; + + vma->vm_ops = &relay_file_mmap_ops; + vma->vm_private_data = buf; + buf->chan->cb->buf_mapped(buf, filp); + + return 0; +} + +/** + * relay_alloc_buf - allocate a channel buffer + * @buf: the buffer struct + * @size: total size of the buffer + * + * Returns a pointer to the resulting buffer, NULL if unsuccessful + */ +static void *relay_alloc_buf(struct rchan_buf *buf, unsigned long size) +{ + void *mem; + unsigned int i, j, n_pages; + + size = PAGE_ALIGN(size); + n_pages = size >> PAGE_SHIFT; + + buf->page_array = kcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); + if (!buf->page_array) + return NULL; + + for (i = 0; i < n_pages; i++) { + buf->page_array[i] = alloc_page(GFP_KERNEL); + if (unlikely(!buf->page_array[i])) + goto depopulate; + } + mem = vmap(buf->page_array, n_pages, GFP_KERNEL, PAGE_KERNEL); + if (!mem) + goto depopulate; + + memset(mem, 0, size); + buf->page_count = n_pages; + return mem; + +depopulate: + for (j = 0; j < i; j++) + __free_page(buf->page_array[j]); + kfree(buf->page_array); + return NULL; +} + +/** + * relay_create_buf - allocate and initialize a channel buffer + * @alloc_size: size of the buffer to allocate + * @n_subbufs: number of sub-buffers in the channel + * + * Returns channel buffer if successful, NULL otherwise + */ +struct rchan_buf *relay_create_buf(struct rchan *chan) +{ + struct rchan_buf *buf = kcalloc(1, sizeof(struct rchan_buf), GFP_KERNEL); + if (!buf) + return NULL; + + buf->padding = kmalloc(chan->n_subbufs * sizeof(size_t *), GFP_KERNEL); + if (!buf->padding) + goto free_buf; + + buf->start = relay_alloc_buf(buf, chan->alloc_size); + if (!buf->start) + goto free_buf; + + buf->chan = chan; + kref_get(&buf->chan->kref); + return buf; + +free_buf: + kfree(buf->padding); + kfree(buf); + return NULL; +} + +/** + * relay_destroy_buf - destroy an rchan_buf struct and associated buffer + * @buf: the buffer struct + */ +void relay_destroy_buf(struct rchan_buf *buf) +{ + struct rchan *chan = buf->chan; + unsigned int i; + + if (likely(buf->start)) { + vunmap(buf->start); + for (i = 0; i < buf->page_count; i++) + __free_page(buf->page_array[i]); + kfree(buf->page_array); + } + kfree(buf->padding); + kfree(buf); + kref_put(&chan->kref, relay_destroy_channel); +} + +/** + * relay_remove_buf - remove a channel buffer + * + * Removes the file from the relayfs fileystem, which also frees the + * rchan_buf_struct and the channel buffer. Should only be called from + * kref_put(). + */ +void relay_remove_buf(struct kref *kref) +{ + struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); + relayfs_remove(buf->dentry); +} diff --git a/fs/relayfs/buffers.h b/fs/relayfs/buffers.h new file mode 100644 index 000000000000..37a12493f641 --- /dev/null +++ b/fs/relayfs/buffers.h @@ -0,0 +1,12 @@ +#ifndef _BUFFERS_H +#define _BUFFERS_H + +/* This inspired by rtai/shmem */ +#define FIX_SIZE(x) (((x) - 1) & PAGE_MASK) + PAGE_SIZE + +extern int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma); +extern struct rchan_buf *relay_create_buf(struct rchan *chan); +extern void relay_destroy_buf(struct rchan_buf *buf); +extern void relay_remove_buf(struct kref *kref); + +#endif/* _BUFFERS_H */ diff --git a/fs/relayfs/inode.c b/fs/relayfs/inode.c new file mode 100644 index 000000000000..0f7f88d067ad --- /dev/null +++ b/fs/relayfs/inode.c @@ -0,0 +1,609 @@ +/* + * VFS-related code for RelayFS, a high-speed data relay filesystem. + * + * Copyright (C) 2003-2005 - Tom Zanussi , IBM Corp + * Copyright (C) 2003-2005 - Karim Yaghmour + * + * Based on ramfs, Copyright (C) 2002 - Linus Torvalds + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +#define RELAYFS_MAGIC 0xF0B4A981 + +static struct vfsmount * relayfs_mount; +static int relayfs_mount_count; +static kmem_cache_t * relayfs_inode_cachep; + +static struct backing_dev_info relayfs_backing_dev_info = { + .ra_pages = 0, /* No readahead */ + .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK, +}; + +static struct inode *relayfs_get_inode(struct super_block *sb, int mode, + struct rchan *chan) +{ + struct rchan_buf *buf = NULL; + struct inode *inode; + + if (S_ISREG(mode)) { + BUG_ON(!chan); + buf = relay_create_buf(chan); + if (!buf) + return NULL; + } + + inode = new_inode(sb); + if (!inode) { + relay_destroy_buf(buf); + return NULL; + } + + inode->i_mode = mode; + inode->i_uid = 0; + inode->i_gid = 0; + inode->i_blksize = PAGE_CACHE_SIZE; + inode->i_blocks = 0; + inode->i_mapping->backing_dev_info = &relayfs_backing_dev_info; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + case S_IFREG: + inode->i_fop = &relayfs_file_operations; + RELAYFS_I(inode)->buf = buf; + break; + case S_IFDIR: + inode->i_op = &simple_dir_inode_operations; + inode->i_fop = &simple_dir_operations; + + /* directory inodes start off with i_nlink == 2 (for "." entry) */ + inode->i_nlink++; + break; + default: + break; + } + + return inode; +} + +/** + * relayfs_create_entry - create a relayfs directory or file + * @name: the name of the file to create + * @parent: parent directory + * @mode: mode + * @chan: relay channel associated with the file + * + * Returns the new dentry, NULL on failure + * + * Creates a file or directory with the specifed permissions. + */ +static struct dentry *relayfs_create_entry(const char *name, + struct dentry *parent, + int mode, + struct rchan *chan) +{ + struct dentry *d; + struct inode *inode; + int error = 0; + + BUG_ON(!name || !(S_ISREG(mode) || S_ISDIR(mode))); + + error = simple_pin_fs("relayfs", &relayfs_mount, &relayfs_mount_count); + if (error) { + printk(KERN_ERR "Couldn't mount relayfs: errcode %d\n", error); + return NULL; + } + + if (!parent && relayfs_mount && relayfs_mount->mnt_sb) + parent = relayfs_mount->mnt_sb->s_root; + + if (!parent) { + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + return NULL; + } + + parent = dget(parent); + down(&parent->d_inode->i_sem); + d = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(d)) { + d = NULL; + goto release_mount; + } + + if (d->d_inode) { + d = NULL; + goto release_mount; + } + + inode = relayfs_get_inode(parent->d_inode->i_sb, mode, chan); + if (!inode) { + d = NULL; + goto release_mount; + } + + d_instantiate(d, inode); + dget(d); /* Extra count - pin the dentry in core */ + + if (S_ISDIR(mode)) + parent->d_inode->i_nlink++; + + goto exit; + +release_mount: + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + +exit: + up(&parent->d_inode->i_sem); + dput(parent); + return d; +} + +/** + * relayfs_create_file - create a file in the relay filesystem + * @name: the name of the file to create + * @parent: parent directory + * @mode: mode, if not specied the default perms are used + * @chan: channel associated with the file + * + * Returns file dentry if successful, NULL otherwise. + * + * The file will be created user r on behalf of current user. + */ +struct dentry *relayfs_create_file(const char *name, struct dentry *parent, + int mode, struct rchan *chan) +{ + if (!mode) + mode = S_IRUSR; + mode = (mode & S_IALLUGO) | S_IFREG; + + return relayfs_create_entry(name, parent, mode, chan); +} + +/** + * relayfs_create_dir - create a directory in the relay filesystem + * @name: the name of the directory to create + * @parent: parent directory, NULL if parent should be fs root + * + * Returns directory dentry if successful, NULL otherwise. + * + * The directory will be created world rwx on behalf of current user. + */ +struct dentry *relayfs_create_dir(const char *name, struct dentry *parent) +{ + int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + return relayfs_create_entry(name, parent, mode, NULL); +} + +/** + * relayfs_remove - remove a file or directory in the relay filesystem + * @dentry: file or directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove(struct dentry *dentry) +{ + struct dentry *parent; + int error = 0; + + if (!dentry) + return -EINVAL; + parent = dentry->d_parent; + if (!parent) + return -EINVAL; + + parent = dget(parent); + down(&parent->d_inode->i_sem); + if (dentry->d_inode) { + if (S_ISDIR(dentry->d_inode->i_mode)) + error = simple_rmdir(parent->d_inode, dentry); + else + error = simple_unlink(parent->d_inode, dentry); + if (!error) + d_delete(dentry); + } + if (!error) + dput(dentry); + up(&parent->d_inode->i_sem); + dput(parent); + + if (!error) + simple_release_fs(&relayfs_mount, &relayfs_mount_count); + + return error; +} + +/** + * relayfs_remove_dir - remove a directory in the relay filesystem + * @dentry: directory dentry + * + * Returns 0 if successful, negative otherwise. + */ +int relayfs_remove_dir(struct dentry *dentry) +{ + return relayfs_remove(dentry); +} + +/** + * relayfs_open - open file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Increments the channel buffer refcount. + */ +static int relayfs_open(struct inode *inode, struct file *filp) +{ + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + kref_get(&buf->kref); + + return 0; +} + +/** + * relayfs_mmap - mmap file op for relayfs files + * @filp: the file + * @vma: the vma describing what to map + * + * Calls upon relay_mmap_buf to map the file into user space. + */ +static int relayfs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct inode *inode = filp->f_dentry->d_inode; + return relay_mmap_buf(RELAYFS_I(inode)->buf, vma); +} + +/** + * relayfs_poll - poll file op for relayfs files + * @filp: the file + * @wait: poll table + * + * Poll implemention. + */ +static unsigned int relayfs_poll(struct file *filp, poll_table *wait) +{ + unsigned int mask = 0; + struct inode *inode = filp->f_dentry->d_inode; + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + + if (buf->finalized) + return POLLERR; + + if (filp->f_mode & FMODE_READ) { + poll_wait(filp, &buf->read_wait, wait); + if (!relay_buf_empty(buf)) + mask |= POLLIN | POLLRDNORM; + } + + return mask; +} + +/** + * relayfs_release - release file op for relayfs files + * @inode: the inode + * @filp: the file + * + * Decrements the channel refcount, as the filesystem is + * no longer using it. + */ +static int relayfs_release(struct inode *inode, struct file *filp) +{ + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + kref_put(&buf->kref, relay_remove_buf); + + return 0; +} + +/** + * relayfs_read_consume - update the consumed count for the buffer + */ +static void relayfs_read_consume(struct rchan_buf *buf, + size_t read_pos, + size_t bytes_consumed) +{ + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + size_t read_subbuf; + + if (buf->bytes_consumed + bytes_consumed > subbuf_size) { + relay_subbufs_consumed(buf->chan, buf->cpu, 1); + buf->bytes_consumed = 0; + } + + buf->bytes_consumed += bytes_consumed; + read_subbuf = read_pos / buf->chan->subbuf_size; + if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) { + if ((read_subbuf == buf->subbufs_produced % n_subbufs) && + (buf->offset == subbuf_size)) + return; + relay_subbufs_consumed(buf->chan, buf->cpu, 1); + buf->bytes_consumed = 0; + } +} + +/** + * relayfs_read_avail - boolean, are there unconsumed bytes available? + */ +static int relayfs_read_avail(struct rchan_buf *buf, size_t read_pos) +{ + size_t bytes_produced, bytes_consumed, write_offset; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + size_t produced = buf->subbufs_produced % n_subbufs; + size_t consumed = buf->subbufs_consumed % n_subbufs; + + write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; + + if (consumed > produced) { + if ((produced > n_subbufs) && + (produced + n_subbufs - consumed <= n_subbufs)) + produced += n_subbufs; + } else if (consumed == produced) { + if (buf->offset > subbuf_size) { + produced += n_subbufs; + if (buf->subbufs_produced == buf->subbufs_consumed) + consumed += n_subbufs; + } + } + + if (buf->offset > subbuf_size) + bytes_produced = (produced - 1) * subbuf_size + write_offset; + else + bytes_produced = produced * subbuf_size + write_offset; + bytes_consumed = consumed * subbuf_size + buf->bytes_consumed; + + if (bytes_produced == bytes_consumed) + return 0; + + relayfs_read_consume(buf, read_pos, 0); + + return 1; +} + +/** + * relayfs_read_subbuf_avail - return bytes available in sub-buffer + */ +static size_t relayfs_read_subbuf_avail(size_t read_pos, + struct rchan_buf *buf) +{ + size_t padding, avail = 0; + size_t read_subbuf, read_offset, write_subbuf, write_offset; + size_t subbuf_size = buf->chan->subbuf_size; + + write_subbuf = (buf->data - buf->start) / subbuf_size; + write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; + read_subbuf = read_pos / subbuf_size; + read_offset = read_pos % subbuf_size; + padding = buf->padding[read_subbuf]; + + if (read_subbuf == write_subbuf) { + if (read_offset + padding < write_offset) + avail = write_offset - (read_offset + padding); + } else + avail = (subbuf_size - padding) - read_offset; + + return avail; +} + +/** + * relayfs_read_start_pos - find the first available byte to read + * + * If the read_pos is in the middle of padding, return the + * position of the first actually available byte, otherwise + * return the original value. + */ +static size_t relayfs_read_start_pos(size_t read_pos, + struct rchan_buf *buf) +{ + size_t read_subbuf, padding, padding_start, padding_end; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + + read_subbuf = read_pos / subbuf_size; + padding = buf->padding[read_subbuf]; + padding_start = (read_subbuf + 1) * subbuf_size - padding; + padding_end = (read_subbuf + 1) * subbuf_size; + if (read_pos >= padding_start && read_pos < padding_end) { + read_subbuf = (read_subbuf + 1) % n_subbufs; + read_pos = read_subbuf * subbuf_size; + } + + return read_pos; +} + +/** + * relayfs_read_end_pos - return the new read position + */ +static size_t relayfs_read_end_pos(struct rchan_buf *buf, + size_t read_pos, + size_t count) +{ + size_t read_subbuf, padding, end_pos; + size_t subbuf_size = buf->chan->subbuf_size; + size_t n_subbufs = buf->chan->n_subbufs; + + read_subbuf = read_pos / subbuf_size; + padding = buf->padding[read_subbuf]; + if (read_pos % subbuf_size + count + padding == subbuf_size) + end_pos = (read_subbuf + 1) * subbuf_size; + else + end_pos = read_pos + count; + if (end_pos >= subbuf_size * n_subbufs) + end_pos = 0; + + return end_pos; +} + +/** + * relayfs_read - read file op for relayfs files + * @filp: the file + * @buffer: the userspace buffer + * @count: number of bytes to read + * @ppos: position to read from + * + * Reads count bytes or the number of bytes available in the + * current sub-buffer being read, whichever is smaller. + */ +static ssize_t relayfs_read(struct file *filp, + char __user *buffer, + size_t count, + loff_t *ppos) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct rchan_buf *buf = RELAYFS_I(inode)->buf; + size_t read_start, avail; + ssize_t ret = 0; + void *from; + + down(&inode->i_sem); + if(!relayfs_read_avail(buf, *ppos)) + goto out; + + read_start = relayfs_read_start_pos(*ppos, buf); + avail = relayfs_read_subbuf_avail(read_start, buf); + if (!avail) + goto out; + + from = buf->start + read_start; + ret = count = min(count, avail); + if (copy_to_user(buffer, from, count)) { + ret = -EFAULT; + goto out; + } + relayfs_read_consume(buf, read_start, count); + *ppos = relayfs_read_end_pos(buf, read_start, count); +out: + up(&inode->i_sem); + return ret; +} + +/** + * relayfs alloc_inode() implementation + */ +static struct inode *relayfs_alloc_inode(struct super_block *sb) +{ + struct relayfs_inode_info *p = kmem_cache_alloc(relayfs_inode_cachep, SLAB_KERNEL); + if (!p) + return NULL; + p->buf = NULL; + + return &p->vfs_inode; +} + +/** + * relayfs destroy_inode() implementation + */ +static void relayfs_destroy_inode(struct inode *inode) +{ + if (RELAYFS_I(inode)->buf) + relay_destroy_buf(RELAYFS_I(inode)->buf); + + kmem_cache_free(relayfs_inode_cachep, RELAYFS_I(inode)); +} + +static void init_once(void *p, kmem_cache_t *cachep, unsigned long flags) +{ + struct relayfs_inode_info *i = p; + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) + inode_init_once(&i->vfs_inode); +} + +struct file_operations relayfs_file_operations = { + .open = relayfs_open, + .poll = relayfs_poll, + .mmap = relayfs_mmap, + .read = relayfs_read, + .llseek = no_llseek, + .release = relayfs_release, +}; + +static struct super_operations relayfs_ops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .alloc_inode = relayfs_alloc_inode, + .destroy_inode = relayfs_destroy_inode, +}; + +static int relayfs_fill_super(struct super_block * sb, void * data, int silent) +{ + struct inode *inode; + struct dentry *root; + int mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; + + sb->s_blocksize = PAGE_CACHE_SIZE; + sb->s_blocksize_bits = PAGE_CACHE_SHIFT; + sb->s_magic = RELAYFS_MAGIC; + sb->s_op = &relayfs_ops; + inode = relayfs_get_inode(sb, mode, NULL); + + if (!inode) + return -ENOMEM; + + root = d_alloc_root(inode); + if (!root) { + iput(inode); + return -ENOMEM; + } + sb->s_root = root; + + return 0; +} + +static struct super_block * relayfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_single(fs_type, flags, data, relayfs_fill_super); +} + +static struct file_system_type relayfs_fs_type = { + .owner = THIS_MODULE, + .name = "relayfs", + .get_sb = relayfs_get_sb, + .kill_sb = kill_litter_super, +}; + +static int __init init_relayfs_fs(void) +{ + int err; + + relayfs_inode_cachep = kmem_cache_create("relayfs_inode_cache", + sizeof(struct relayfs_inode_info), 0, + 0, init_once, NULL); + if (!relayfs_inode_cachep) + return -ENOMEM; + + err = register_filesystem(&relayfs_fs_type); + if (err) + kmem_cache_destroy(relayfs_inode_cachep); + + return err; +} + +static void __exit exit_relayfs_fs(void) +{ + unregister_filesystem(&relayfs_fs_type); + kmem_cache_destroy(relayfs_inode_cachep); +} + +module_init(init_relayfs_fs) +module_exit(exit_relayfs_fs) + +EXPORT_SYMBOL_GPL(relayfs_file_operations); +EXPORT_SYMBOL_GPL(relayfs_create_dir); +EXPORT_SYMBOL_GPL(relayfs_remove_dir); + +MODULE_AUTHOR("Tom Zanussi and Karim Yaghmour "); +MODULE_DESCRIPTION("Relay Filesystem"); +MODULE_LICENSE("GPL"); + diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c new file mode 100644 index 000000000000..16446a15c96d --- /dev/null +++ b/fs/relayfs/relay.c @@ -0,0 +1,431 @@ +/* + * Public API and common code for RelayFS. + * + * See Documentation/filesystems/relayfs.txt for an overview of relayfs. + * + * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) + * + * This file is released under the GPL. + */ + +#include +#include +#include +#include +#include +#include +#include "relay.h" +#include "buffers.h" + +/** + * relay_buf_empty - boolean, is the channel buffer empty? + * @buf: channel buffer + * + * Returns 1 if the buffer is empty, 0 otherwise. + */ +int relay_buf_empty(struct rchan_buf *buf) +{ + return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; +} + +/** + * relay_buf_full - boolean, is the channel buffer full? + * @buf: channel buffer + * + * Returns 1 if the buffer is full, 0 otherwise. + */ +int relay_buf_full(struct rchan_buf *buf) +{ + size_t ready = buf->subbufs_produced - buf->subbufs_consumed; + return (ready >= buf->chan->n_subbufs) ? 1 : 0; +} + +/* + * High-level relayfs kernel API and associated functions. + */ + +/* + * rchan_callback implementations defining default channel behavior. Used + * in place of corresponding NULL values in client callback struct. + */ + +/* + * subbuf_start() default callback. Does nothing. + */ +static int subbuf_start_default_callback (struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding) +{ + if (relay_buf_full(buf)) + return 0; + + return 1; +} + +/* + * buf_mapped() default callback. Does nothing. + */ +static void buf_mapped_default_callback(struct rchan_buf *buf, + struct file *filp) +{ +} + +/* + * buf_unmapped() default callback. Does nothing. + */ +static void buf_unmapped_default_callback(struct rchan_buf *buf, + struct file *filp) +{ +} + +/* relay channel default callbacks */ +static struct rchan_callbacks default_channel_callbacks = { + .subbuf_start = subbuf_start_default_callback, + .buf_mapped = buf_mapped_default_callback, + .buf_unmapped = buf_unmapped_default_callback, +}; + +/** + * wakeup_readers - wake up readers waiting on a channel + * @private: the channel buffer + * + * This is the work function used to defer reader waking. The + * reason waking is deferred is that calling directly from write + * causes problems if you're writing from say the scheduler. + */ +static void wakeup_readers(void *private) +{ + struct rchan_buf *buf = private; + wake_up_interruptible(&buf->read_wait); +} + +/** + * __relay_reset - reset a channel buffer + * @buf: the channel buffer + * @init: 1 if this is a first-time initialization + * + * See relay_reset for description of effect. + */ +static inline void __relay_reset(struct rchan_buf *buf, unsigned int init) +{ + size_t i; + + if (init) { + init_waitqueue_head(&buf->read_wait); + kref_init(&buf->kref); + INIT_WORK(&buf->wake_readers, NULL, NULL); + } else { + cancel_delayed_work(&buf->wake_readers); + flush_scheduled_work(); + } + + buf->subbufs_produced = 0; + buf->subbufs_consumed = 0; + buf->bytes_consumed = 0; + buf->finalized = 0; + buf->data = buf->start; + buf->offset = 0; + + for (i = 0; i < buf->chan->n_subbufs; i++) + buf->padding[i] = 0; + + buf->chan->cb->subbuf_start(buf, buf->data, NULL, 0); +} + +/** + * relay_reset - reset the channel + * @chan: the channel + * + * This has the effect of erasing all data from all channel buffers + * and restarting the channel in its initial state. The buffers + * are not freed, so any mappings are still in effect. + * + * NOTE: Care should be taken that the channel isn't actually + * being used by anything when this call is made. + */ +void relay_reset(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + __relay_reset(chan->buf[i], 0); + } +} + +/** + * relay_open_buf - create a new channel buffer in relayfs + * + * Internal - used by relay_open(). + */ +static struct rchan_buf *relay_open_buf(struct rchan *chan, + const char *filename, + struct dentry *parent) +{ + struct rchan_buf *buf; + struct dentry *dentry; + + /* Create file in fs */ + dentry = relayfs_create_file(filename, parent, S_IRUSR, chan); + if (!dentry) + return NULL; + + buf = RELAYFS_I(dentry->d_inode)->buf; + buf->dentry = dentry; + __relay_reset(buf, 1); + + return buf; +} + +/** + * relay_close_buf - close a channel buffer + * @buf: channel buffer + * + * Marks the buffer finalized and restores the default callbacks. + * The channel buffer and channel buffer data structure are then freed + * automatically when the last reference is given up. + */ +static inline void relay_close_buf(struct rchan_buf *buf) +{ + buf->finalized = 1; + buf->chan->cb = &default_channel_callbacks; + cancel_delayed_work(&buf->wake_readers); + flush_scheduled_work(); + kref_put(&buf->kref, relay_remove_buf); +} + +static inline void setup_callbacks(struct rchan *chan, + struct rchan_callbacks *cb) +{ + if (!cb) { + chan->cb = &default_channel_callbacks; + return; + } + + if (!cb->subbuf_start) + cb->subbuf_start = subbuf_start_default_callback; + if (!cb->buf_mapped) + cb->buf_mapped = buf_mapped_default_callback; + if (!cb->buf_unmapped) + cb->buf_unmapped = buf_unmapped_default_callback; + chan->cb = cb; +} + +/** + * relay_open - create a new relayfs channel + * @base_filename: base name of files to create + * @parent: dentry of parent directory, NULL for root directory + * @subbuf_size: size of sub-buffers + * @n_subbufs: number of sub-buffers + * @cb: client callback functions + * + * Returns channel pointer if successful, NULL otherwise. + * + * Creates a channel buffer for each cpu using the sizes and + * attributes specified. The created channel buffer files + * will be named base_filename0...base_filenameN-1. File + * permissions will be S_IRUSR. + */ +struct rchan *relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb) +{ + unsigned int i; + struct rchan *chan; + char *tmpname; + + if (!base_filename) + return NULL; + + if (!(subbuf_size && n_subbufs)) + return NULL; + + chan = kcalloc(1, sizeof(struct rchan), GFP_KERNEL); + if (!chan) + return NULL; + + chan->version = RELAYFS_CHANNEL_VERSION; + chan->n_subbufs = n_subbufs; + chan->subbuf_size = subbuf_size; + chan->alloc_size = FIX_SIZE(subbuf_size * n_subbufs); + setup_callbacks(chan, cb); + kref_init(&chan->kref); + + tmpname = kmalloc(NAME_MAX + 1, GFP_KERNEL); + if (!tmpname) + goto free_chan; + + for_each_online_cpu(i) { + sprintf(tmpname, "%s%d", base_filename, i); + chan->buf[i] = relay_open_buf(chan, tmpname, parent); + chan->buf[i]->cpu = i; + if (!chan->buf[i]) + goto free_bufs; + } + + kfree(tmpname); + return chan; + +free_bufs: + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + break; + relay_close_buf(chan->buf[i]); + } + kfree(tmpname); + +free_chan: + kref_put(&chan->kref, relay_destroy_channel); + return NULL; +} + +/** + * relay_switch_subbuf - switch to a new sub-buffer + * @buf: channel buffer + * @length: size of current event + * + * Returns either the length passed in or 0 if full. + + * Performs sub-buffer-switch tasks such as invoking callbacks, + * updating padding counts, waking up readers, etc. + */ +size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) +{ + void *old, *new; + size_t old_subbuf, new_subbuf; + + if (unlikely(length > buf->chan->subbuf_size)) + goto toobig; + + if (buf->offset != buf->chan->subbuf_size + 1) { + buf->prev_padding = buf->chan->subbuf_size - buf->offset; + old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + buf->padding[old_subbuf] = buf->prev_padding; + buf->subbufs_produced++; + if (waitqueue_active(&buf->read_wait)) { + PREPARE_WORK(&buf->wake_readers, wakeup_readers, buf); + schedule_delayed_work(&buf->wake_readers, 1); + } + } + + old = buf->data; + new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; + new = buf->start + new_subbuf * buf->chan->subbuf_size; + buf->offset = 0; + if (!buf->chan->cb->subbuf_start(buf, new, old, buf->prev_padding)) { + buf->offset = buf->chan->subbuf_size + 1; + return 0; + } + buf->data = new; + buf->padding[new_subbuf] = 0; + + if (unlikely(length + buf->offset > buf->chan->subbuf_size)) + goto toobig; + + return length; + +toobig: + printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length); + WARN_ON(1); + return 0; +} + +/** + * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count + * @chan: the channel + * @cpu: the cpu associated with the channel buffer to update + * @subbufs_consumed: number of sub-buffers to add to current buf's count + * + * Adds to the channel buffer's consumed sub-buffer count. + * subbufs_consumed should be the number of sub-buffers newly consumed, + * not the total consumed. + * + * NOTE: kernel clients don't need to call this function if the channel + * mode is 'overwrite'. + */ +void relay_subbufs_consumed(struct rchan *chan, + unsigned int cpu, + size_t subbufs_consumed) +{ + struct rchan_buf *buf; + + if (!chan) + return; + + if (cpu >= NR_CPUS || !chan->buf[cpu]) + return; + + buf = chan->buf[cpu]; + buf->subbufs_consumed += subbufs_consumed; + if (buf->subbufs_consumed > buf->subbufs_produced) + buf->subbufs_consumed = buf->subbufs_produced; +} + +/** + * relay_destroy_channel - free the channel struct + * + * Should only be called from kref_put(). + */ +void relay_destroy_channel(struct kref *kref) +{ + struct rchan *chan = container_of(kref, struct rchan, kref); + kfree(chan); +} + +/** + * relay_close - close the channel + * @chan: the channel + * + * Closes all channel buffers and frees the channel. + */ +void relay_close(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + relay_close_buf(chan->buf[i]); + } + + kref_put(&chan->kref, relay_destroy_channel); +} + +/** + * relay_flush - close the channel + * @chan: the channel + * + * Flushes all channel buffers i.e. forces buffer switch. + */ +void relay_flush(struct rchan *chan) +{ + unsigned int i; + + if (!chan) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!chan->buf[i]) + continue; + relay_switch_subbuf(chan->buf[i], 0); + } +} + +EXPORT_SYMBOL_GPL(relay_open); +EXPORT_SYMBOL_GPL(relay_close); +EXPORT_SYMBOL_GPL(relay_flush); +EXPORT_SYMBOL_GPL(relay_reset); +EXPORT_SYMBOL_GPL(relay_subbufs_consumed); +EXPORT_SYMBOL_GPL(relay_switch_subbuf); +EXPORT_SYMBOL_GPL(relay_buf_full); diff --git a/fs/relayfs/relay.h b/fs/relayfs/relay.h new file mode 100644 index 000000000000..703503fa22b6 --- /dev/null +++ b/fs/relayfs/relay.h @@ -0,0 +1,12 @@ +#ifndef _RELAY_H +#define _RELAY_H + +struct dentry *relayfs_create_file(const char *name, + struct dentry *parent, + int mode, + struct rchan *chan); +extern int relayfs_remove(struct dentry *dentry); +extern int relay_buf_empty(struct rchan_buf *buf); +extern void relay_destroy_channel(struct kref *kref); + +#endif /* _RELAY_H */ diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h new file mode 100644 index 000000000000..cfafc3e76bc2 --- /dev/null +++ b/include/linux/relayfs_fs.h @@ -0,0 +1,255 @@ +/* + * linux/include/linux/relayfs_fs.h + * + * Copyright (C) 2002, 2003 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp + * Copyright (C) 1999, 2000, 2001, 2002 - Karim Yaghmour (karim@opersys.com) + * + * RelayFS definitions and declarations + */ + +#ifndef _LINUX_RELAYFS_FS_H +#define _LINUX_RELAYFS_FS_H + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Tracks changes to rchan_buf struct + */ +#define RELAYFS_CHANNEL_VERSION 5 + +/* + * Per-cpu relay channel buffer + */ +struct rchan_buf +{ + void *start; /* start of channel buffer */ + void *data; /* start of current sub-buffer */ + size_t offset; /* current offset into sub-buffer */ + size_t subbufs_produced; /* count of sub-buffers produced */ + size_t subbufs_consumed; /* count of sub-buffers consumed */ + struct rchan *chan; /* associated channel */ + wait_queue_head_t read_wait; /* reader wait queue */ + struct work_struct wake_readers; /* reader wake-up work struct */ + struct dentry *dentry; /* channel file dentry */ + struct kref kref; /* channel buffer refcount */ + struct page **page_array; /* array of current buffer pages */ + unsigned int page_count; /* number of current buffer pages */ + unsigned int finalized; /* buffer has been finalized */ + size_t *padding; /* padding counts per sub-buffer */ + size_t prev_padding; /* temporary variable */ + size_t bytes_consumed; /* bytes consumed in cur read subbuf */ + unsigned int cpu; /* this buf's cpu */ +} ____cacheline_aligned; + +/* + * Relay channel data structure + */ +struct rchan +{ + u32 version; /* the version of this struct */ + size_t subbuf_size; /* sub-buffer size */ + size_t n_subbufs; /* number of sub-buffers per buffer */ + size_t alloc_size; /* total buffer size allocated */ + struct rchan_callbacks *cb; /* client callbacks */ + struct kref kref; /* channel refcount */ + void *private_data; /* for user-defined data */ + struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */ +}; + +/* + * Relayfs inode + */ +struct relayfs_inode_info +{ + struct inode vfs_inode; + struct rchan_buf *buf; +}; + +static inline struct relayfs_inode_info *RELAYFS_I(struct inode *inode) +{ + return container_of(inode, struct relayfs_inode_info, vfs_inode); +} + +/* + * Relay channel client callbacks + */ +struct rchan_callbacks +{ + /* + * subbuf_start - called on buffer-switch to a new sub-buffer + * @buf: the channel buffer containing the new sub-buffer + * @subbuf: the start of the new sub-buffer + * @prev_subbuf: the start of the previous sub-buffer + * @prev_padding: unused space at the end of previous sub-buffer + * + * The client should return 1 to continue logging, 0 to stop + * logging. + * + * NOTE: subbuf_start will also be invoked when the buffer is + * created, so that the first sub-buffer can be initialized + * if necessary. In this case, prev_subbuf will be NULL. + * + * NOTE: the client can reserve bytes at the beginning of the new + * sub-buffer by calling subbuf_start_reserve() in this callback. + */ + int (*subbuf_start) (struct rchan_buf *buf, + void *subbuf, + void *prev_subbuf, + size_t prev_padding); + + /* + * buf_mapped - relayfs buffer mmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully mmapped + */ + void (*buf_mapped)(struct rchan_buf *buf, + struct file *filp); + + /* + * buf_unmapped - relayfs buffer unmap notification + * @buf: the channel buffer + * @filp: relayfs file pointer + * + * Called when a relayfs file is successfully unmapped + */ + void (*buf_unmapped)(struct rchan_buf *buf, + struct file *filp); +}; + +/* + * relayfs kernel API, fs/relayfs/relay.c + */ + +struct rchan *relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb); +extern void relay_close(struct rchan *chan); +extern void relay_flush(struct rchan *chan); +extern void relay_subbufs_consumed(struct rchan *chan, + unsigned int cpu, + size_t consumed); +extern void relay_reset(struct rchan *chan); +extern int relay_buf_full(struct rchan_buf *buf); + +extern size_t relay_switch_subbuf(struct rchan_buf *buf, + size_t length); +extern struct dentry *relayfs_create_dir(const char *name, + struct dentry *parent); +extern int relayfs_remove_dir(struct dentry *dentry); + +/** + * relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling interrupts. Use this + * if you might be logging from interrupt context. Try + * __relay_write() if you know you won't be logging from + * interrupt context. + */ +static inline void relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + unsigned long flags; + struct rchan_buf *buf; + + local_irq_save(flags); + buf = chan->buf[smp_processor_id()]; + if (unlikely(buf->offset + length > chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + local_irq_restore(flags); +} + +/** + * __relay_write - write data into the channel + * @chan: relay channel + * @data: data to be written + * @length: number of bytes to write + * + * Writes data into the current cpu's channel buffer. + * + * Protects the buffer by disabling preemption. Use + * relay_write() if you might be logging from interrupt + * context. + */ +static inline void __relay_write(struct rchan *chan, + const void *data, + size_t length) +{ + struct rchan_buf *buf; + + buf = chan->buf[get_cpu()]; + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) + length = relay_switch_subbuf(buf, length); + memcpy(buf->data + buf->offset, data, length); + buf->offset += length; + put_cpu(); +} + +/** + * relay_reserve - reserve slot in channel buffer + * @chan: relay channel + * @length: number of bytes to reserve + * + * Returns pointer to reserved slot, NULL if full. + * + * Reserves a slot in the current cpu's channel buffer. + * Does not protect the buffer at all - caller must provide + * appropriate synchronization. + */ +static inline void *relay_reserve(struct rchan *chan, size_t length) +{ + void *reserved; + struct rchan_buf *buf = chan->buf[smp_processor_id()]; + + if (unlikely(buf->offset + length > buf->chan->subbuf_size)) { + length = relay_switch_subbuf(buf, length); + if (!length) + return NULL; + } + reserved = buf->data + buf->offset; + buf->offset += length; + + return reserved; +} + +/** + * subbuf_start_reserve - reserve bytes at the start of a sub-buffer + * @buf: relay channel buffer + * @length: number of bytes to reserve + * + * Helper function used to reserve bytes at the beginning of + * a sub-buffer in the subbuf_start() callback. + */ +static inline void subbuf_start_reserve(struct rchan_buf *buf, + size_t length) +{ + BUG_ON(length >= buf->chan->subbuf_size - 1); + buf->offset = length; +} + +/* + * exported relayfs file operations, fs/relayfs/inode.c + */ + +extern struct file_operations relayfs_file_operations; + +#endif /* _LINUX_RELAYFS_FS_H */ + -- cgit v1.2.3 From 202e5979af4d91c7ca05892641131dee22653259 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 6 Sep 2005 15:16:40 -0700 Subject: [PATCH] compat: be more consistent about [ug]id_t When I first wrote the compat layer patches, I was somewhat cavalier about the definition of compat_uid_t and compat_gid_t (or maybe I just misunderstood :-)). This patch makes the compat types much more consistent with the types we are being compatible with and hopefully will fix a few bugs along the way. compat type type in compat arch __compat_[ug]id_t __kernel_[ug]id_t __compat_[ug]id32_t __kernel_[ug]id32_t compat_[ug]id_t [ug]id_t The difference is that compat_uid_t is always 32 bits (for the archs we care about) but __compat_uid_t may be 16 bits on some. Signed-off-by: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/linux32.c | 16 ++++++++-------- fs/compat.c | 16 ++++++++-------- include/asm-ia64/compat.h | 20 ++++++++++---------- include/asm-mips/compat.h | 10 ++++++---- include/asm-parisc/compat.h | 10 ++++++---- include/asm-ppc64/compat.h | 18 ++++++++++-------- include/asm-s390/compat.h | 20 ++++++++++---------- include/asm-sparc64/compat.h | 18 ++++++++++-------- include/asm-x86_64/compat.h | 20 ++++++++++---------- include/linux/compat.h | 3 +++ ipc/compat.c | 12 ++++++------ 11 files changed, 87 insertions(+), 76 deletions(-) (limited to 'include/linux') diff --git a/arch/mips/kernel/linux32.c b/arch/mips/kernel/linux32.c index 4613219dd73e..ece4564919d8 100644 --- a/arch/mips/kernel/linux32.c +++ b/arch/mips/kernel/linux32.c @@ -546,20 +546,20 @@ struct msgbuf32 { s32 mtype; char mtext[1]; }; struct ipc_perm32 { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; struct ipc64_perm32 { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; unsigned short __pad1; diff --git a/fs/compat.c b/fs/compat.c index 6b06b6bae35e..8e03d31eec3b 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -720,14 +720,14 @@ compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) struct compat_ncp_mount_data { compat_int_t version; compat_uint_t ncp_fd; - compat_uid_t mounted_uid; + __compat_uid_t mounted_uid; compat_pid_t wdog_pid; unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; compat_uint_t time_out; compat_uint_t retry_count; compat_uint_t flags; - compat_uid_t uid; - compat_gid_t gid; + __compat_uid_t uid; + __compat_gid_t gid; compat_mode_t file_mode; compat_mode_t dir_mode; }; @@ -784,9 +784,9 @@ static void *do_ncp_super_data_conv(void *raw_data) struct compat_smb_mount_data { compat_int_t version; - compat_uid_t mounted_uid; - compat_uid_t uid; - compat_gid_t gid; + __compat_uid_t mounted_uid; + __compat_uid_t uid; + __compat_gid_t gid; compat_mode_t file_mode; compat_mode_t dir_mode; }; @@ -1808,8 +1808,8 @@ struct compat_nfsctl_export { compat_dev_t ex32_dev; compat_ino_t ex32_ino; compat_int_t ex32_flags; - compat_uid_t ex32_anon_uid; - compat_gid_t ex32_anon_gid; + __compat_uid_t ex32_anon_uid; + __compat_gid_t ex32_anon_gid; }; struct compat_nfsctl_fdparm { diff --git a/include/asm-ia64/compat.h b/include/asm-ia64/compat.h index 0c05e5bad8a0..aaf11f4e9169 100644 --- a/include/asm-ia64/compat.h +++ b/include/asm-ia64/compat.h @@ -13,10 +13,10 @@ typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_key_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -50,8 +50,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -120,10 +120,10 @@ typedef u32 compat_sigset_word; struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; diff --git a/include/asm-mips/compat.h b/include/asm-mips/compat.h index d78002afb1e1..2c084cd4bc0a 100644 --- a/include/asm-mips/compat.h +++ b/include/asm-mips/compat.h @@ -15,8 +15,10 @@ typedef s32 compat_clock_t; typedef s32 compat_suseconds_t; typedef s32 compat_pid_t; -typedef s32 compat_uid_t; -typedef s32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u32 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -52,8 +54,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; compat_dev_t st_rdev; s32 st_pad2[2]; compat_off_t st_size; diff --git a/include/asm-parisc/compat.h b/include/asm-parisc/compat.h index 7630d1ad2391..38b918feead9 100644 --- a/include/asm-parisc/compat.h +++ b/include/asm-parisc/compat.h @@ -13,8 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 compat_uid_t; -typedef u32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -67,8 +69,8 @@ struct compat_stat { compat_dev_t st_realdev; u16 st_basemode; u16 st_spareshort; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; u32 st_spare4[3]; }; diff --git a/include/asm-ppc64/compat.h b/include/asm-ppc64/compat.h index 12414f5fc666..6ec62cd2d1d1 100644 --- a/include/asm-ppc64/compat.h +++ b/include/asm-ppc64/compat.h @@ -13,8 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u32 compat_uid_t; -typedef u32 compat_gid_t; +typedef u32 __compat_uid_t; +typedef u32 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u32 compat_mode_t; typedef u32 compat_ino_t; typedef u32 compat_dev_t; @@ -48,8 +50,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid32_t st_uid; + __compat_gid32_t st_gid; compat_dev_t st_rdev; compat_off_t st_size; compat_off_t st_blksize; @@ -144,10 +146,10 @@ static inline void __user *compat_alloc_user_space(long len) */ struct compat_ipc64_perm { compat_key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned int seq; unsigned int __pad2; diff --git a/include/asm-s390/compat.h b/include/asm-s390/compat.h index 7f8f544eb262..a007715f4aea 100644 --- a/include/asm-s390/compat.h +++ b/include/asm-s390/compat.h @@ -13,10 +13,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -51,8 +51,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -140,10 +140,10 @@ static inline void __user *compat_alloc_user_space(long len) struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; compat_mode_t mode; unsigned short __pad1; unsigned short seq; diff --git a/include/asm-sparc64/compat.h b/include/asm-sparc64/compat.h index b59122dd176d..c73935dc7ba1 100644 --- a/include/asm-sparc64/compat.h +++ b/include/asm-sparc64/compat.h @@ -12,8 +12,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -47,8 +49,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; compat_off_t st_size; compat_time_t st_atime; @@ -177,10 +179,10 @@ static __inline__ void __user *compat_alloc_user_space(long len) struct compat_ipc64_perm { compat_key_t key; - __kernel_uid_t uid; - __kernel_gid_t gid; - __kernel_uid_t cuid; - __kernel_gid_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short __pad1; compat_mode_t mode; unsigned short __pad2; diff --git a/include/asm-x86_64/compat.h b/include/asm-x86_64/compat.h index d0f453c5adfc..f0155c38f639 100644 --- a/include/asm-x86_64/compat.h +++ b/include/asm-x86_64/compat.h @@ -14,10 +14,10 @@ typedef s32 compat_ssize_t; typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; -typedef u16 compat_uid_t; -typedef u16 compat_gid_t; -typedef u32 compat_uid32_t; -typedef u32 compat_gid32_t; +typedef u16 __compat_uid_t; +typedef u16 __compat_gid_t; +typedef u32 __compat_uid32_t; +typedef u32 __compat_gid32_t; typedef u16 compat_mode_t; typedef u32 compat_ino_t; typedef u16 compat_dev_t; @@ -52,8 +52,8 @@ struct compat_stat { compat_ino_t st_ino; compat_mode_t st_mode; compat_nlink_t st_nlink; - compat_uid_t st_uid; - compat_gid_t st_gid; + __compat_uid_t st_uid; + __compat_gid_t st_gid; compat_dev_t st_rdev; u16 __pad2; u32 st_size; @@ -122,10 +122,10 @@ typedef u32 compat_sigset_word; struct compat_ipc64_perm { compat_key_t key; - compat_uid32_t uid; - compat_gid32_t gid; - compat_uid32_t cuid; - compat_gid32_t cgid; + __compat_uid32_t uid; + __compat_gid32_t gid; + __compat_uid32_t cuid; + __compat_gid32_t cgid; unsigned short mode; unsigned short __pad1; unsigned short seq; diff --git a/include/linux/compat.h b/include/linux/compat.h index b58b7d6f2fdb..f9ca534787e2 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -18,6 +18,9 @@ #define compat_jiffies_to_clock_t(x) \ (((unsigned long)(x) * COMPAT_USER_HZ) / HZ) +typedef __compat_uid32_t compat_uid_t; +typedef __compat_gid32_t compat_gid_t; + struct rusage; struct compat_itimerspec { diff --git a/ipc/compat.c b/ipc/compat.c index 3881d564c668..1fe95f6659dd 100644 --- a/ipc/compat.c +++ b/ipc/compat.c @@ -42,10 +42,10 @@ struct compat_msgbuf { struct compat_ipc_perm { key_t key; - compat_uid_t uid; - compat_gid_t gid; - compat_uid_t cuid; - compat_gid_t cgid; + __compat_uid_t uid; + __compat_gid_t gid; + __compat_uid_t cuid; + __compat_gid_t cgid; compat_mode_t mode; unsigned short seq; }; @@ -174,8 +174,8 @@ static inline int __put_compat_ipc_perm(struct ipc64_perm *p, struct compat_ipc_perm __user *up) { int err; - compat_uid_t u; - compat_gid_t g; + __compat_uid_t u; + __compat_gid_t g; err = __put_user(p->key, &up->key); SET_UID(u, p->uid); -- cgit v1.2.3 From 022a4a7bbdefdedc2706a13c81c832d8c3173c6d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:41 -0700 Subject: [PATCH] fs/jbd/: cleanups This patch contains the following cleanups: - make needlessly global functions static - journal.c: remove the unused global function __journal_internal_check and move the check to journal_init - remove the following write-only global variable: - journal.c: current_journal - remove the following unneeded EXPORT_SYMBOL: - journal.c: journal_recover Signed-off-by: Adrian Bunk Acked-by: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/jbd/journal.c | 34 ++++++++++++++-------------------- fs/jbd/revoke.c | 3 ++- include/linux/jbd.h | 1 - 3 files changed, 16 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5e7b43949517..71cfe25d716e 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -65,7 +65,6 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_recover); EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); @@ -81,6 +80,7 @@ EXPORT_SYMBOL(journal_try_to_free_buffers); EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); +static void __journal_abort_soft (journal_t *journal, int errno); /* * Helper function used to manage commit timeouts @@ -93,16 +93,6 @@ static void commit_timeout(unsigned long __data) wake_up_process(p); } -/* Static check for data structure consistency. There's no code - * invoked --- we'll just get a linker failure if things aren't right. - */ -void __journal_internal_check(void) -{ - extern void journal_bad_superblock_size(void); - if (sizeof(struct journal_superblock_s) != 1024) - journal_bad_superblock_size(); -} - /* * kjournald: The main thread function used to manage a logging device * journal. @@ -119,16 +109,12 @@ void __journal_internal_check(void) * known as checkpointing, and this thread is responsible for that job. */ -journal_t *current_journal; // AKPM: debug - -int kjournald(void *arg) +static int kjournald(void *arg) { journal_t *journal = (journal_t *) arg; transaction_t *transaction; struct timer_list timer; - current_journal = journal; - daemonize("kjournald"); /* Set up an interval timer which can be used to trigger a @@ -1439,7 +1425,7 @@ int journal_wipe(journal_t *journal, int write) * device this journal is present. */ -const char *journal_dev_name(journal_t *journal, char *buffer) +static const char *journal_dev_name(journal_t *journal, char *buffer) { struct block_device *bdev; @@ -1485,7 +1471,7 @@ void __journal_abort_hard(journal_t *journal) /* Soft abort: record the abort error status in the journal superblock, * but don't do any other IO. */ -void __journal_abort_soft (journal_t *journal, int errno) +static void __journal_abort_soft (journal_t *journal, int errno) { if (journal->j_flags & JFS_ABORT) return; @@ -1880,7 +1866,7 @@ EXPORT_SYMBOL(journal_enable_debug); static struct proc_dir_entry *proc_jbd_debug; -int read_jbd_debug(char *page, char **start, off_t off, +static int read_jbd_debug(char *page, char **start, off_t off, int count, int *eof, void *data) { int ret; @@ -1890,7 +1876,7 @@ int read_jbd_debug(char *page, char **start, off_t off, return ret; } -int write_jbd_debug(struct file *file, const char __user *buffer, +static int write_jbd_debug(struct file *file, const char __user *buffer, unsigned long count, void *data) { char buf[32]; @@ -1979,6 +1965,14 @@ static int __init journal_init(void) { int ret; +/* Static check for data structure consistency. There's no code + * invoked --- we'll just get a linker failure if things aren't right. + */ + extern void journal_bad_superblock_size(void); + if (sizeof(struct journal_superblock_s) != 1024) + journal_bad_superblock_size(); + + ret = journal_init_caches(); if (ret != 0) journal_destroy_caches(); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index d327a598f861..93b9f45eebda 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -116,7 +116,8 @@ static inline int hash(journal_t *journal, unsigned long block) (block << (hash_shift - 12))) & (table->hash_size - 1); } -int insert_revoke_hash(journal_t *journal, unsigned long blocknr, tid_t seq) +static int insert_revoke_hash(journal_t *journal, unsigned long blocknr, + tid_t seq) { struct list_head *hash_list; struct jbd_revoke_record_s *record; diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 593407e865b1..84321a4cac93 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -914,7 +914,6 @@ extern int journal_wipe (journal_t *, int); extern int journal_skip_recovery (journal_t *); extern void journal_update_superblock (journal_t *, int); extern void __journal_abort_hard (journal_t *); -extern void __journal_abort_soft (journal_t *, int); extern void journal_abort (journal_t *, int); extern int journal_errno (journal_t *); extern void journal_ack_err (journal_t *); -- cgit v1.2.3 From 3676347a5e216a7fec7f8eedbbcf8bed6b9c4e40 Mon Sep 17 00:00:00 2001 From: Peter Osterlund Date: Tue, 6 Sep 2005 15:16:42 -0700 Subject: [PATCH] kill bio->bi_set Jens: ->bi_set is totally unnecessary bloat of struct bio. Just define a proper destructor for the bio and it already knows what bio_set it belongs too. Peter: Fixed the bugs. Signed-off-by: Jens Axboe Signed-off-by: Peter Osterlund Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-io.c | 6 ++++++ drivers/md/dm.c | 6 ++++++ fs/bio.c | 32 +++++++++++++++++++++----------- include/linux/bio.h | 2 +- 4 files changed, 34 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 45754bb6a799..9de000131a8a 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -239,6 +239,11 @@ static void vm_dp_init(struct dpages *dp, void *data) dp->context_ptr = data; } +static void dm_bio_destructor(struct bio *bio) +{ + bio_free(bio, _bios); +} + /*----------------------------------------------------------------- * IO routines that accept a list of pages. *---------------------------------------------------------------*/ @@ -263,6 +268,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where, bio->bi_bdev = where->bdev; bio->bi_end_io = endio; bio->bi_private = io; + bio->bi_destructor = dm_bio_destructor; bio_set_region(bio, region); /* diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d487d9deb98e..930b9fc27953 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -399,6 +399,11 @@ struct clone_info { unsigned short idx; }; +static void dm_bio_destructor(struct bio *bio) +{ + bio_free(bio, dm_set); +} + /* * Creates a little bio that is just does part of a bvec. */ @@ -410,6 +415,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, struct bio_vec *bv = bio->bi_io_vec + idx; clone = bio_alloc_bioset(GFP_NOIO, 1, dm_set); + clone->bi_destructor = dm_bio_destructor; *clone->bi_io_vec = *bv; clone->bi_sector = sector; diff --git a/fs/bio.c b/fs/bio.c index 1f2d4649b188..bf3ec9d2b54c 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -104,18 +104,22 @@ static inline struct bio_vec *bvec_alloc_bs(unsigned int __nocast gfp_mask, int return bvl; } -/* - * default destructor for a bio allocated with bio_alloc_bioset() - */ -static void bio_destructor(struct bio *bio) +void bio_free(struct bio *bio, struct bio_set *bio_set) { const int pool_idx = BIO_POOL_IDX(bio); - struct bio_set *bs = bio->bi_set; BIO_BUG_ON(pool_idx >= BIOVEC_NR_POOLS); - mempool_free(bio->bi_io_vec, bs->bvec_pools[pool_idx]); - mempool_free(bio, bs->bio_pool); + mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); + mempool_free(bio, bio_set->bio_pool); +} + +/* + * default destructor for a bio allocated with bio_alloc_bioset() + */ +static void bio_fs_destructor(struct bio *bio) +{ + bio_free(bio, fs_bio_set); } inline void bio_init(struct bio *bio) @@ -171,8 +175,6 @@ struct bio *bio_alloc_bioset(unsigned int __nocast gfp_mask, int nr_iovecs, stru bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; } bio->bi_io_vec = bvl; - bio->bi_destructor = bio_destructor; - bio->bi_set = bs; } out: return bio; @@ -180,7 +182,12 @@ out: struct bio *bio_alloc(unsigned int __nocast gfp_mask, int nr_iovecs) { - return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); + + if (bio) + bio->bi_destructor = bio_fs_destructor; + + return bio; } void zero_fill_bio(struct bio *bio) @@ -273,8 +280,10 @@ struct bio *bio_clone(struct bio *bio, unsigned int __nocast gfp_mask) { struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); - if (b) + if (b) { + b->bi_destructor = bio_fs_destructor; __bio_clone(b, bio); + } return b; } @@ -1075,6 +1084,7 @@ subsys_initcall(init_bio); EXPORT_SYMBOL(bio_alloc); EXPORT_SYMBOL(bio_put); +EXPORT_SYMBOL(bio_free); EXPORT_SYMBOL(bio_endio); EXPORT_SYMBOL(bio_init); EXPORT_SYMBOL(__bio_clone); diff --git a/include/linux/bio.h b/include/linux/bio.h index 36ef29fa0d8b..69e047989f1c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -111,7 +111,6 @@ struct bio { void *bi_private; bio_destructor_t *bi_destructor; /* destructor */ - struct bio_set *bi_set; /* memory pools set */ }; /* @@ -280,6 +279,7 @@ extern void bioset_free(struct bio_set *); extern struct bio *bio_alloc(unsigned int __nocast, int); extern struct bio *bio_alloc_bioset(unsigned int __nocast, int, struct bio_set *); extern void bio_put(struct bio *); +extern void bio_free(struct bio *, struct bio_set *); extern void bio_endio(struct bio *, unsigned int, int); struct request_queue; -- cgit v1.2.3 From 5dd42c262bd742fa3602180bbe5550b4828de8f3 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 6 Sep 2005 15:16:43 -0700 Subject: [PATCH] remove register_ioctl32_conversion and unregister_ioctl32_conversion All users have been converted. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/feature-removal-schedule.txt | 8 --- fs/compat.c | 90 ------------------------------ include/linux/ioctl32.h | 22 -------- 3 files changed, 120 deletions(-) (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 363909056e46..95e744353120 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -51,14 +51,6 @@ Who: Adrian Bunk --------------------------- -What: register_ioctl32_conversion() / unregister_ioctl32_conversion() -When: April 2005 -Why: Replaced by ->compat_ioctl in file_operations and other method - vecors. -Who: Andi Kleen , Christoph Hellwig - ---------------------------- - What: RCU API moves to EXPORT_SYMBOL_GPL When: April 2006 Files: include/linux/rcupdate.h, kernel/rcupdate.c diff --git a/fs/compat.c b/fs/compat.c index 8e03d31eec3b..2eb03c49b07c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -310,96 +310,6 @@ static int __init init_sys32_ioctl(void) __initcall(init_sys32_ioctl); -int register_ioctl32_conversion(unsigned int cmd, - ioctl_trans_handler_t handler) -{ - struct ioctl_trans *t; - struct ioctl_trans *new_t; - unsigned long hash = ioctl32_hash(cmd); - - new_t = kmalloc(sizeof(*new_t), GFP_KERNEL); - if (!new_t) - return -ENOMEM; - - down_write(&ioctl32_sem); - for (t = ioctl32_hash_table[hash]; t; t = t->next) { - if (t->cmd == cmd) { - printk(KERN_ERR "Trying to register duplicated ioctl32 " - "handler %x\n", cmd); - up_write(&ioctl32_sem); - kfree(new_t); - return -EINVAL; - } - } - new_t->next = NULL; - new_t->cmd = cmd; - new_t->handler = handler; - ioctl32_insert_translation(new_t); - - up_write(&ioctl32_sem); - return 0; -} -EXPORT_SYMBOL(register_ioctl32_conversion); - -static inline int builtin_ioctl(struct ioctl_trans *t) -{ - return t >= ioctl_start && t < (ioctl_start + ioctl_table_size); -} - -/* Problem: - This function cannot unregister duplicate ioctls, because they are not - unique. - When they happen we need to extend the prototype to pass the handler too. */ - -int unregister_ioctl32_conversion(unsigned int cmd) -{ - unsigned long hash = ioctl32_hash(cmd); - struct ioctl_trans *t, *t1; - - down_write(&ioctl32_sem); - - t = ioctl32_hash_table[hash]; - if (!t) { - up_write(&ioctl32_sem); - return -EINVAL; - } - - if (t->cmd == cmd) { - if (builtin_ioctl(t)) { - printk("%p tried to unregister builtin ioctl %x\n", - __builtin_return_address(0), cmd); - } else { - ioctl32_hash_table[hash] = t->next; - up_write(&ioctl32_sem); - kfree(t); - return 0; - } - } - while (t->next) { - t1 = t->next; - if (t1->cmd == cmd) { - if (builtin_ioctl(t1)) { - printk("%p tried to unregister builtin " - "ioctl %x\n", - __builtin_return_address(0), cmd); - goto out; - } else { - t->next = t1->next; - up_write(&ioctl32_sem); - kfree(t1); - return 0; - } - } - t = t1; - } - printk(KERN_ERR "Trying to free unknown 32bit ioctl handler %x\n", - cmd); -out: - up_write(&ioctl32_sem); - return -EINVAL; -} -EXPORT_SYMBOL(unregister_ioctl32_conversion); - static void compat_ioctl_error(struct file *filp, unsigned int fd, unsigned int cmd, unsigned long arg) { diff --git a/include/linux/ioctl32.h b/include/linux/ioctl32.h index e8c4af32b3bb..948809d99917 100644 --- a/include/linux/ioctl32.h +++ b/include/linux/ioctl32.h @@ -14,26 +14,4 @@ struct ioctl_trans { struct ioctl_trans *next; }; -/* - * Register an 32bit ioctl translation handler for ioctl cmd. - * - * handler == NULL: use 64bit ioctl handler. - * arguments to handler: fd: file descriptor - * cmd: ioctl command. - * arg: ioctl argument - * struct file *file: file descriptor pointer. - */ - -#ifdef CONFIG_COMPAT -extern int __deprecated register_ioctl32_conversion(unsigned int cmd, - ioctl_trans_handler_t handler); -extern int __deprecated unregister_ioctl32_conversion(unsigned int cmd); - -#else - -#define register_ioctl32_conversion(cmd, handler) ({ 0; }) -#define unregister_ioctl32_conversion(cmd) ({ 0; }) - -#endif - #endif -- cgit v1.2.3 From 36d57ac4a818cb4aa3edbdf63ad2ebc31106f925 Mon Sep 17 00:00:00 2001 From: "H. J. Lu" Date: Tue, 6 Sep 2005 15:16:49 -0700 Subject: [PATCH] auxiliary vector cleanups The size of auxiliary vector is fixed at 42 in linux/sched.h. But it isn't very obvious when looking at linux/elf.h. This patch adds AT_VECTOR_SIZE so that we can change it if necessary when a new vector is added. Because of include file ordering problems, doing this necessitated the extraction of the AT_* symbols into a standalone header file. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-alpha/auxvec.h | 24 ++++++++++++++++++++++++ include/asm-alpha/elf.h | 22 ++-------------------- include/asm-arm/auxvec.h | 4 ++++ include/asm-arm26/auxvec.h | 4 ++++ include/asm-cris/auxvec.h | 4 ++++ include/asm-frv/auxvec.h | 4 ++++ include/asm-h8300/auxvec.h | 4 ++++ include/asm-i386/auxvec.h | 11 +++++++++++ include/asm-i386/elf.h | 8 +------- include/asm-ia64/auxvec.h | 11 +++++++++++ include/asm-ia64/elf.h | 8 +------- include/asm-m32r/auxvec.h | 4 ++++ include/asm-m68k/auxvec.h | 4 ++++ include/asm-m68knommu/auxvec.h | 4 ++++ include/asm-mips/auxvec.h | 4 ++++ include/asm-parisc/auxvec.h | 4 ++++ include/asm-ppc/auxvec.h | 14 ++++++++++++++ include/asm-ppc/elf.h | 11 +---------- include/asm-ppc64/auxvec.h | 19 +++++++++++++++++++ include/asm-ppc64/elf.h | 16 +--------------- include/asm-s390/auxvec.h | 4 ++++ include/asm-sh/auxvec.h | 4 ++++ include/asm-sh64/auxvec.h | 4 ++++ include/asm-sparc/auxvec.h | 4 ++++ include/asm-sparc64/auxvec.h | 4 ++++ include/asm-um/auxvec.h | 4 ++++ include/asm-v850/auxvec.h | 4 ++++ include/asm-x86_64/auxvec.h | 4 ++++ include/asm-xtensa/auxvec.h | 4 ++++ include/linux/auxvec.h | 31 +++++++++++++++++++++++++++++++ include/linux/elf.h | 24 +----------------------- include/linux/sched.h | 4 +++- 32 files changed, 196 insertions(+), 83 deletions(-) create mode 100644 include/asm-alpha/auxvec.h create mode 100644 include/asm-arm/auxvec.h create mode 100644 include/asm-arm26/auxvec.h create mode 100644 include/asm-cris/auxvec.h create mode 100644 include/asm-frv/auxvec.h create mode 100644 include/asm-h8300/auxvec.h create mode 100644 include/asm-i386/auxvec.h create mode 100644 include/asm-ia64/auxvec.h create mode 100644 include/asm-m32r/auxvec.h create mode 100644 include/asm-m68k/auxvec.h create mode 100644 include/asm-m68knommu/auxvec.h create mode 100644 include/asm-mips/auxvec.h create mode 100644 include/asm-parisc/auxvec.h create mode 100644 include/asm-ppc/auxvec.h create mode 100644 include/asm-ppc64/auxvec.h create mode 100644 include/asm-s390/auxvec.h create mode 100644 include/asm-sh/auxvec.h create mode 100644 include/asm-sh64/auxvec.h create mode 100644 include/asm-sparc/auxvec.h create mode 100644 include/asm-sparc64/auxvec.h create mode 100644 include/asm-um/auxvec.h create mode 100644 include/asm-v850/auxvec.h create mode 100644 include/asm-x86_64/auxvec.h create mode 100644 include/asm-xtensa/auxvec.h create mode 100644 include/linux/auxvec.h (limited to 'include/linux') diff --git a/include/asm-alpha/auxvec.h b/include/asm-alpha/auxvec.h new file mode 100644 index 000000000000..e96fe880e310 --- /dev/null +++ b/include/asm-alpha/auxvec.h @@ -0,0 +1,24 @@ +#ifndef __ASM_ALPHA_AUXVEC_H +#define __ASM_ALPHA_AUXVEC_H + +/* Reserve these numbers for any future use of a VDSO. */ +#if 0 +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 +#endif + +/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the + value is -1, then the cache doesn't exist. Otherwise: + + bit 0-3: Cache set-associativity; 0 means fully associative. + bit 4-7: Log2 of cacheline size. + bit 8-31: Size of the entire cache >> 8. + bit 32-63: Reserved. +*/ + +#define AT_L1I_CACHESHAPE 34 +#define AT_L1D_CACHESHAPE 35 +#define AT_L2_CACHESHAPE 36 +#define AT_L3_CACHESHAPE 37 + +#endif /* __ASM_ALPHA_AUXVEC_H */ diff --git a/include/asm-alpha/elf.h b/include/asm-alpha/elf.h index e94a945a2314..6c2d78fba264 100644 --- a/include/asm-alpha/elf.h +++ b/include/asm-alpha/elf.h @@ -1,6 +1,8 @@ #ifndef __ASM_ALPHA_ELF_H #define __ASM_ALPHA_ELF_H +#include + /* Special values for the st_other field in the symbol table. */ #define STO_ALPHA_NOPV 0x80 @@ -142,26 +144,6 @@ extern int dump_elf_task_fp(elf_fpreg_t *dest, struct task_struct *task); : amask (AMASK_CIX) ? "ev6" : "ev67"); \ }) -/* Reserve these numbers for any future use of a VDSO. */ -#if 0 -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 -#endif - -/* More complete cache descriptions than AT_[DIU]CACHEBSIZE. If the - value is -1, then the cache doesn't exist. Otherwise: - - bit 0-3: Cache set-associativity; 0 means fully associative. - bit 4-7: Log2 of cacheline size. - bit 8-31: Size of the entire cache >> 8. - bit 32-63: Reserved. -*/ - -#define AT_L1I_CACHESHAPE 34 -#define AT_L1D_CACHESHAPE 35 -#define AT_L2_CACHESHAPE 36 -#define AT_L3_CACHESHAPE 37 - #ifdef __KERNEL__ #define SET_PERSONALITY(EX, IBCS2) \ diff --git a/include/asm-arm/auxvec.h b/include/asm-arm/auxvec.h new file mode 100644 index 000000000000..c0536f6b29a7 --- /dev/null +++ b/include/asm-arm/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMARM_AUXVEC_H +#define __ASMARM_AUXVEC_H + +#endif diff --git a/include/asm-arm26/auxvec.h b/include/asm-arm26/auxvec.h new file mode 100644 index 000000000000..c0536f6b29a7 --- /dev/null +++ b/include/asm-arm26/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMARM_AUXVEC_H +#define __ASMARM_AUXVEC_H + +#endif diff --git a/include/asm-cris/auxvec.h b/include/asm-cris/auxvec.h new file mode 100644 index 000000000000..cb30b01bf19f --- /dev/null +++ b/include/asm-cris/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMCRIS_AUXVEC_H +#define __ASMCRIS_AUXVEC_H + +#endif diff --git a/include/asm-frv/auxvec.h b/include/asm-frv/auxvec.h new file mode 100644 index 000000000000..07710778fa10 --- /dev/null +++ b/include/asm-frv/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __FRV_AUXVEC_H +#define __FRV_AUXVEC_H + +#endif diff --git a/include/asm-h8300/auxvec.h b/include/asm-h8300/auxvec.h new file mode 100644 index 000000000000..1d36fe38b088 --- /dev/null +++ b/include/asm-h8300/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMH8300_AUXVEC_H +#define __ASMH8300_AUXVEC_H + +#endif diff --git a/include/asm-i386/auxvec.h b/include/asm-i386/auxvec.h new file mode 100644 index 000000000000..395e13016bfb --- /dev/null +++ b/include/asm-i386/auxvec.h @@ -0,0 +1,11 @@ +#ifndef __ASMi386_AUXVEC_H +#define __ASMi386_AUXVEC_H + +/* + * Architecture-neutral AT_ values in 0-17, leave some room + * for more of them, start the x86-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#endif diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 130bdc8c68cf..fa11117d3cfa 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h @@ -9,6 +9,7 @@ #include #include #include /* for savesegment */ +#include #include @@ -109,13 +110,6 @@ typedef struct user_fxsr_struct elf_fpxregset_t; #define ELF_PLATFORM (system_utsname.machine) -/* - * Architecture-neutral AT_ values in 0-17, leave some room - * for more of them, start the x86-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) do { } while (0) diff --git a/include/asm-ia64/auxvec.h b/include/asm-ia64/auxvec.h new file mode 100644 index 000000000000..23cebe5685b9 --- /dev/null +++ b/include/asm-ia64/auxvec.h @@ -0,0 +1,11 @@ +#ifndef _ASM_IA64_AUXVEC_H +#define _ASM_IA64_AUXVEC_H + +/* + * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of + * them, start the architecture-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#endif /* _ASM_IA64_AUXVEC_H */ diff --git a/include/asm-ia64/elf.h b/include/asm-ia64/elf.h index 7d4ccc4b976e..446fce036fd9 100644 --- a/include/asm-ia64/elf.h +++ b/include/asm-ia64/elf.h @@ -12,6 +12,7 @@ #include #include +#include /* * This is used to ensure we don't load something for the wrong architecture. @@ -177,13 +178,6 @@ extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); relevant until we have real hardware to play with... */ #define ELF_PLATFORM NULL -/* - * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of - * them, start the architecture-specific ones at 32. - */ -#define AT_SYSINFO 32 -#define AT_SYSINFO_EHDR 33 - #ifdef __KERNEL__ #define SET_PERSONALITY(ex, ibcs2) set_personality(PER_LINUX) #define elf_read_implies_exec(ex, executable_stack) \ diff --git a/include/asm-m32r/auxvec.h b/include/asm-m32r/auxvec.h new file mode 100644 index 000000000000..f76dcc860fae --- /dev/null +++ b/include/asm-m32r/auxvec.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M32R__AUXVEC_H +#define _ASM_M32R__AUXVEC_H + +#endif /* _ASM_M32R__AUXVEC_H */ diff --git a/include/asm-m68k/auxvec.h b/include/asm-m68k/auxvec.h new file mode 100644 index 000000000000..844d6d52204b --- /dev/null +++ b/include/asm-m68k/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMm68k_AUXVEC_H +#define __ASMm68k_AUXVEC_H + +#endif diff --git a/include/asm-m68knommu/auxvec.h b/include/asm-m68knommu/auxvec.h new file mode 100644 index 000000000000..844d6d52204b --- /dev/null +++ b/include/asm-m68knommu/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMm68k_AUXVEC_H +#define __ASMm68k_AUXVEC_H + +#endif diff --git a/include/asm-mips/auxvec.h b/include/asm-mips/auxvec.h new file mode 100644 index 000000000000..7cf7f2d21943 --- /dev/null +++ b/include/asm-mips/auxvec.h @@ -0,0 +1,4 @@ +#ifndef _ASM_AUXVEC_H +#define _ASM_AUXVEC_H + +#endif /* _ASM_AUXVEC_H */ diff --git a/include/asm-parisc/auxvec.h b/include/asm-parisc/auxvec.h new file mode 100644 index 000000000000..9c3ac4b89dc9 --- /dev/null +++ b/include/asm-parisc/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMPARISC_AUXVEC_H +#define __ASMPARISC_AUXVEC_H + +#endif diff --git a/include/asm-ppc/auxvec.h b/include/asm-ppc/auxvec.h new file mode 100644 index 000000000000..172358df29c8 --- /dev/null +++ b/include/asm-ppc/auxvec.h @@ -0,0 +1,14 @@ +#ifndef __PPC_AUXVEC_H +#define __PPC_AUXVEC_H + +/* + * We need to put in some extra aux table entries to tell glibc what + * the cache block size is, so it can use the dcbz instruction safely. + */ +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +/* A special ignored type value for PPC, for glibc compatibility. */ +#define AT_IGNOREPPC 22 + +#endif diff --git a/include/asm-ppc/elf.h b/include/asm-ppc/elf.h index 2c056966efd3..c25cc35e6ab5 100644 --- a/include/asm-ppc/elf.h +++ b/include/asm-ppc/elf.h @@ -7,6 +7,7 @@ #include #include #include +#include /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 @@ -122,16 +123,6 @@ extern int dump_task_fpu(struct task_struct *t, elf_fpregset_t *fpu); #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX) -/* - * We need to put in some extra aux table entries to tell glibc what - * the cache block size is, so it can use the dcbz instruction safely. - */ -#define AT_DCACHEBSIZE 19 -#define AT_ICACHEBSIZE 20 -#define AT_UCACHEBSIZE 21 -/* A special ignored type value for PPC, for glibc compatibility. */ -#define AT_IGNOREPPC 22 - extern int dcache_bsize; extern int icache_bsize; extern int ucache_bsize; diff --git a/include/asm-ppc64/auxvec.h b/include/asm-ppc64/auxvec.h new file mode 100644 index 000000000000..ac6381a106e1 --- /dev/null +++ b/include/asm-ppc64/auxvec.h @@ -0,0 +1,19 @@ +#ifndef __PPC64_AUXVEC_H +#define __PPC64_AUXVEC_H + +/* + * We need to put in some extra aux table entries to tell glibc what + * the cache block size is, so it can use the dcbz instruction safely. + */ +#define AT_DCACHEBSIZE 19 +#define AT_ICACHEBSIZE 20 +#define AT_UCACHEBSIZE 21 +/* A special ignored type value for PPC, for glibc compatibility. */ +#define AT_IGNOREPPC 22 + +/* The vDSO location. We have to use the same value as x86 for glibc's + * sake :-) + */ +#define AT_SYSINFO_EHDR 33 + +#endif /* __PPC64_AUXVEC_H */ diff --git a/include/asm-ppc64/elf.h b/include/asm-ppc64/elf.h index 085eedb956fe..c919a89343db 100644 --- a/include/asm-ppc64/elf.h +++ b/include/asm-ppc64/elf.h @@ -4,6 +4,7 @@ #include #include #include +#include /* PowerPC relocations defined by the ABIs */ #define R_PPC_NONE 0 @@ -237,21 +238,6 @@ do { \ #endif -/* - * We need to put in some extra aux table entries to tell glibc what - * the cache block size is, so it can use the dcbz instruction safely. - */ -#define AT_DCACHEBSIZE 19 -#define AT_ICACHEBSIZE 20 -#define AT_UCACHEBSIZE 21 -/* A special ignored type value for PPC, for glibc compatibility. */ -#define AT_IGNOREPPC 22 - -/* The vDSO location. We have to use the same value as x86 for glibc's - * sake :-) - */ -#define AT_SYSINFO_EHDR 33 - extern int dcache_bsize; extern int icache_bsize; extern int ucache_bsize; diff --git a/include/asm-s390/auxvec.h b/include/asm-s390/auxvec.h new file mode 100644 index 000000000000..0d340720fd99 --- /dev/null +++ b/include/asm-s390/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMS390_AUXVEC_H +#define __ASMS390_AUXVEC_H + +#endif diff --git a/include/asm-sh/auxvec.h b/include/asm-sh/auxvec.h new file mode 100644 index 000000000000..fc21e4db5881 --- /dev/null +++ b/include/asm-sh/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SH_AUXVEC_H +#define __ASM_SH_AUXVEC_H + +#endif /* __ASM_SH_AUXVEC_H */ diff --git a/include/asm-sh64/auxvec.h b/include/asm-sh64/auxvec.h new file mode 100644 index 000000000000..1ad5a44bdc76 --- /dev/null +++ b/include/asm-sh64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SH64_AUXVEC_H +#define __ASM_SH64_AUXVEC_H + +#endif /* __ASM_SH64_AUXVEC_H */ diff --git a/include/asm-sparc/auxvec.h b/include/asm-sparc/auxvec.h new file mode 100644 index 000000000000..ad6f360261f6 --- /dev/null +++ b/include/asm-sparc/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASMSPARC_AUXVEC_H +#define __ASMSPARC_AUXVEC_H + +#endif /* !(__ASMSPARC_AUXVEC_H) */ diff --git a/include/asm-sparc64/auxvec.h b/include/asm-sparc64/auxvec.h new file mode 100644 index 000000000000..436a29129828 --- /dev/null +++ b/include/asm-sparc64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_SPARC64_AUXVEC_H +#define __ASM_SPARC64_AUXVEC_H + +#endif /* !(__ASM_SPARC64_AUXVEC_H) */ diff --git a/include/asm-um/auxvec.h b/include/asm-um/auxvec.h new file mode 100644 index 000000000000..1e5e1c2fc9b1 --- /dev/null +++ b/include/asm-um/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __UM_AUXVEC_H +#define __UM_AUXVEC_H + +#endif diff --git a/include/asm-v850/auxvec.h b/include/asm-v850/auxvec.h new file mode 100644 index 000000000000..f493232d0224 --- /dev/null +++ b/include/asm-v850/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __V850_AUXVEC_H__ +#define __V850_AUXVEC_H__ + +#endif /* __V850_AUXVEC_H__ */ diff --git a/include/asm-x86_64/auxvec.h b/include/asm-x86_64/auxvec.h new file mode 100644 index 000000000000..2403c4cfced2 --- /dev/null +++ b/include/asm-x86_64/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __ASM_X86_64_AUXVEC_H +#define __ASM_X86_64_AUXVEC_H + +#endif diff --git a/include/asm-xtensa/auxvec.h b/include/asm-xtensa/auxvec.h new file mode 100644 index 000000000000..257dec75c5af --- /dev/null +++ b/include/asm-xtensa/auxvec.h @@ -0,0 +1,4 @@ +#ifndef __XTENSA_AUXVEC_H +#define __XTENSA_AUXVEC_H + +#endif diff --git a/include/linux/auxvec.h b/include/linux/auxvec.h new file mode 100644 index 000000000000..9a7b374c9fb4 --- /dev/null +++ b/include/linux/auxvec.h @@ -0,0 +1,31 @@ +#ifndef _LINUX_AUXVEC_H +#define _LINUX_AUXVEC_H + +#include + +/* Symbolic values for the entries in the auxiliary table + put on the initial stack */ +#define AT_NULL 0 /* end of vector */ +#define AT_IGNORE 1 /* entry should be ignored */ +#define AT_EXECFD 2 /* file descriptor of program */ +#define AT_PHDR 3 /* program headers for program */ +#define AT_PHENT 4 /* size of program header entry */ +#define AT_PHNUM 5 /* number of program headers */ +#define AT_PAGESZ 6 /* system page size */ +#define AT_BASE 7 /* base address of interpreter */ +#define AT_FLAGS 8 /* flags */ +#define AT_ENTRY 9 /* entry point of program */ +#define AT_NOTELF 10 /* program is not ELF */ +#define AT_UID 11 /* real uid */ +#define AT_EUID 12 /* effective uid */ +#define AT_GID 13 /* real gid */ +#define AT_EGID 14 /* effective gid */ +#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ +#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ +#define AT_CLKTCK 17 /* frequency at which times() increments */ + +#define AT_SECURE 23 /* secure mode boolean */ + +#define AT_VECTOR_SIZE 42 /* Size of auxiliary table. */ + +#endif /* _LINUX_AUXVEC_H */ diff --git a/include/linux/elf.h b/include/linux/elf.h index f5b3ba5a317d..ff955dbf510d 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -2,6 +2,7 @@ #define _LINUX_ELF_H #include +#include #include #ifndef elf_read_implies_exec @@ -158,29 +159,6 @@ typedef __s64 Elf64_Sxword; #define ELF64_ST_BIND(x) ELF_ST_BIND(x) #define ELF64_ST_TYPE(x) ELF_ST_TYPE(x) -/* Symbolic values for the entries in the auxiliary table - put on the initial stack */ -#define AT_NULL 0 /* end of vector */ -#define AT_IGNORE 1 /* entry should be ignored */ -#define AT_EXECFD 2 /* file descriptor of program */ -#define AT_PHDR 3 /* program headers for program */ -#define AT_PHENT 4 /* size of program header entry */ -#define AT_PHNUM 5 /* number of program headers */ -#define AT_PAGESZ 6 /* system page size */ -#define AT_BASE 7 /* base address of interpreter */ -#define AT_FLAGS 8 /* flags */ -#define AT_ENTRY 9 /* entry point of program */ -#define AT_NOTELF 10 /* program is not ELF */ -#define AT_UID 11 /* real uid */ -#define AT_EUID 12 /* effective uid */ -#define AT_GID 13 /* real gid */ -#define AT_EGID 14 /* effective gid */ -#define AT_PLATFORM 15 /* string identifying CPU for optimizations */ -#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -#define AT_CLKTCK 17 /* frequency at which times() increments */ - -#define AT_SECURE 23 /* secure mode boolean */ - typedef struct dynamic{ Elf32_Sword d_tag; union{ diff --git a/include/linux/sched.h b/include/linux/sched.h index 5fb31bede103..b5a22ea80045 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -35,6 +35,8 @@ #include #include +#include /* For AT_VECTOR_SIZE */ + struct exec_domain; /* @@ -261,7 +263,7 @@ struct mm_struct { mm_counter_t _rss; mm_counter_t _anon_rss; - unsigned long saved_auxv[42]; /* for /proc/PID/auxv */ + unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */ unsigned dumpable:2; cpumask_t cpu_vm_mask; -- cgit v1.2.3 From 8fc2751beb0941966d3a97b26544e8585e428c08 Mon Sep 17 00:00:00 2001 From: Mark Bellon Date: Tue, 6 Sep 2005 15:16:54 -0700 Subject: [PATCH] disk quotas fail when /etc/mtab is symlinked to /proc/mounts If /etc/mtab is a regular file all of the mount options (of a file system) are written to /etc/mtab by the mount command. The quota tools look there for the quota strings for their operation. If, however, /etc/mtab is a symlink to /proc/mounts (a "good thing" in some environments) the tools don't write anything - they assume the kernel will take care of things. While the quota options are sent down to the kernel via the mount system call and the file system codes handle them properly unfortunately there is no code to echo the quota strings into /proc/mounts and the quota tools fail in the symlink case. The attached patchs modify the EXT[2|3] and JFS codes to add the necessary hooks. The show_options function of each file system in these patches currently deal with only those things that seemed related to quotas; especially in the EXT3 case more can be done (later?). Jan Kara also noted the difficulty in moving these changes above the FS codes responding similarly to myself to Andrew's comment about possible VFS migration. Issue summary: - FS codes have to process the entire string of options anyway. - Only FS codes that use quotas must have a show_options function (for quotas to work properly) however quotas are only used in a small number of FS. - Since most of the quota using FS support other options these FS codes should have the a show_options function to show those options - and the quota echoing becomes virtually negligible. Based on feedback I have modified my patches from the original: JFS a missing patch has been restored to the posting EXT[2|3] and JFS always use the show_options function - Each FS has at least one FS specific option displayed - QUOTA output is under a CONFIG_QUOTA ifdef - a follow-on patch will add a multitude of options for each FS EXT[2|3] and JFS "quota" is treated as "usrquota" EXT3 journalled data check for journalled quota removed EXT[2|3] mount when quota specified but not compiled in - no changes from my original patch. I tested the patch and the codes warn but - still mount. With all due respection I believe the comments otherwise were a - misread of the patch. Please reread/test and comment. XFS patch removed - the XFS team already made the necessary changes EXT3 mixing old and new quotas are handled differently (not purely exclusive) - if old and new quotas for the same type are used together the old type is silently depricated for compatability (e.g. usrquota and usrjquota) - mixing of old and new quotas is an error (e.g. usrjquota and grpquota) Signed-off-by: Mark Bellon Acked-by: Dave Kleikamp Cc: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext2/super.c | 59 +++++++++++++++++++++++++++---- fs/ext3/super.c | 92 +++++++++++++++++++++++++++++++++++++++++++------ fs/jfs/jfs_filsys.h | 3 ++ fs/jfs/super.c | 48 ++++++++++++++++++++++++-- include/linux/ext2_fs.h | 3 ++ include/linux/ext3_fs.h | 2 ++ 6 files changed, 186 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/ext2/super.c b/fs/ext2/super.c index dcfe331dc4c4..3c0c7c6a5b44 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -27,6 +28,8 @@ #include #include #include +#include +#include #include #include "ext2.h" #include "xattr.h" @@ -201,6 +204,26 @@ static void ext2_clear_inode(struct inode *inode) #endif } +static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct ext2_sb_info *sbi = EXT2_SB(vfs->mnt_sb); + + if (sbi->s_mount_opt & EXT2_MOUNT_GRPID) + seq_puts(seq, ",grpid"); + else + seq_puts(seq, ",nogrpid"); + +#if defined(CONFIG_QUOTA) + if (sbi->s_mount_opt & EXT2_MOUNT_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->s_mount_opt & EXT2_MOUNT_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + #ifdef CONFIG_QUOTA static ssize_t ext2_quota_read(struct super_block *sb, int type, char *data, size_t len, loff_t off); static ssize_t ext2_quota_write(struct super_block *sb, int type, const char *data, size_t len, loff_t off); @@ -218,6 +241,7 @@ static struct super_operations ext2_sops = { .statfs = ext2_statfs, .remount_fs = ext2_remount, .clear_inode = ext2_clear_inode, + .show_options = ext2_show_options, #ifdef CONFIG_QUOTA .quota_read = ext2_quota_read, .quota_write = ext2_quota_write, @@ -256,10 +280,11 @@ static unsigned long get_sb_block(void **data) enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, - Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, Opt_nobh, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, Opt_xip, - Opt_ignore, Opt_err, + Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, + Opt_err_ro, Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, + Opt_oldalloc, Opt_orlov, Opt_nobh, Opt_user_xattr, Opt_nouser_xattr, + Opt_acl, Opt_noacl, Opt_xip, Opt_ignore, Opt_err, Opt_quota, + Opt_usrquota, Opt_grpquota }; static match_table_t tokens = { @@ -288,10 +313,10 @@ static match_table_t tokens = { {Opt_acl, "acl"}, {Opt_noacl, "noacl"}, {Opt_xip, "xip"}, - {Opt_ignore, "grpquota"}, + {Opt_grpquota, "grpquota"}, {Opt_ignore, "noquota"}, - {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, + {Opt_quota, "quota"}, + {Opt_usrquota, "usrquota"}, {Opt_err, NULL} }; @@ -406,6 +431,26 @@ static int parse_options (char * options, printk("EXT2 xip option not supported\n"); #endif break; + +#if defined(CONFIG_QUOTA) + case Opt_quota: + case Opt_usrquota: + set_opt(sbi->s_mount_opt, USRQUOTA); + break; + + case Opt_grpquota: + set_opt(sbi->s_mount_opt, GRPQUOTA); + break; +#else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: + printk(KERN_ERR + "EXT2-fs: quota operations not supported.\n"); + + break; +#endif + case Opt_ignore: break; default: diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3c3c6e399fb3..a93c3609025d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include "xattr.h" #include "acl.h" @@ -509,8 +510,41 @@ static void ext3_clear_inode(struct inode *inode) kfree(rsv); } -#ifdef CONFIG_QUOTA +static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb); + + if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA) + seq_puts(seq, ",data=journal"); + + if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA) + seq_puts(seq, ",data=ordered"); + + if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA) + seq_puts(seq, ",data=writeback"); + +#if defined(CONFIG_QUOTA) + if (sbi->s_jquota_fmt) + seq_printf(seq, ",jqfmt=%s", + (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); + + if (sbi->s_qf_names[USRQUOTA]) + seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); + + if (sbi->s_qf_names[GRPQUOTA]) + seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); + if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + +#ifdef CONFIG_QUOTA #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) @@ -569,6 +603,7 @@ static struct super_operations ext3_sops = { .statfs = ext3_statfs, .remount_fs = ext3_remount, .clear_inode = ext3_clear_inode, + .show_options = ext3_show_options, #ifdef CONFIG_QUOTA .quota_read = ext3_quota_read, .quota_write = ext3_quota_write, @@ -590,7 +625,8 @@ enum { Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, + Opt_grpquota }; static match_table_t tokens = { @@ -634,10 +670,10 @@ static match_table_t tokens = { {Opt_grpjquota, "grpjquota=%s"}, {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, - {Opt_quota, "grpquota"}, + {Opt_grpquota, "grpquota"}, {Opt_noquota, "noquota"}, {Opt_quota, "quota"}, - {Opt_quota, "usrquota"}, + {Opt_usrquota, "usrquota"}, {Opt_barrier, "barrier=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, @@ -903,7 +939,13 @@ clear_qf_name: sbi->s_jquota_fmt = QFMT_VFS_V0; break; case Opt_quota: + case Opt_usrquota: set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, USRQUOTA); + break; + case Opt_grpquota: + set_opt(sbi->s_mount_opt, QUOTA); + set_opt(sbi->s_mount_opt, GRPQUOTA); break; case Opt_noquota: if (sb_any_quota_enabled(sb)) { @@ -912,8 +954,13 @@ clear_qf_name: return 0; } clear_opt(sbi->s_mount_opt, QUOTA); + clear_opt(sbi->s_mount_opt, USRQUOTA); + clear_opt(sbi->s_mount_opt, GRPQUOTA); break; #else + case Opt_quota: + case Opt_usrquota: + case Opt_grpquota: case Opt_usrjquota: case Opt_grpjquota: case Opt_offusrjquota: @@ -924,7 +971,6 @@ clear_qf_name: "EXT3-fs: journalled quota options not " "supported.\n"); break; - case Opt_quota: case Opt_noquota: break; #endif @@ -962,14 +1008,38 @@ clear_qf_name: } } #ifdef CONFIG_QUOTA - if (!sbi->s_jquota_fmt && (sbi->s_qf_names[USRQUOTA] || - sbi->s_qf_names[GRPQUOTA])) { - printk(KERN_ERR - "EXT3-fs: journalled quota format not specified.\n"); - return 0; + if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) { + if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) && + sbi->s_qf_names[USRQUOTA]) + clear_opt(sbi->s_mount_opt, USRQUOTA); + + if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) && + sbi->s_qf_names[GRPQUOTA]) + clear_opt(sbi->s_mount_opt, GRPQUOTA); + + if ((sbi->s_qf_names[USRQUOTA] && + (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) || + (sbi->s_qf_names[GRPQUOTA] && + (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) { + printk(KERN_ERR "EXT3-fs: old and new quota " + "format mixing.\n"); + return 0; + } + + if (!sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT3-fs: journalled quota format " + "not specified.\n"); + return 0; + } + } else { + if (sbi->s_jquota_fmt) { + printk(KERN_ERR "EXT3-fs: journalled quota format " + "specified with no journalling " + "enabled.\n"); + return 0; + } } #endif - return 1; } diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 86ccac80f0ab..72a5588faeca 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -37,6 +37,9 @@ #define JFS_ERR_CONTINUE 0x00000004 /* continue */ #define JFS_ERR_PANIC 0x00000008 /* panic */ +#define JFS_USRQUOTA 0x00000010 +#define JFS_GRPQUOTA 0x00000020 + /* platform option (conditional compilation) */ #define JFS_AIX 0x80000000 /* AIX support */ /* POSIX name/directory support */ diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 9ff89720f93b..71bc34b96b2b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -23,9 +23,11 @@ #include #include #include +#include #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -192,7 +194,8 @@ static void jfs_put_super(struct super_block *sb) enum { Opt_integrity, Opt_nointegrity, Opt_iocharset, Opt_resize, - Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, + Opt_resize_nosize, Opt_errors, Opt_ignore, Opt_err, Opt_quota, + Opt_usrquota, Opt_grpquota }; static match_table_t tokens = { @@ -204,8 +207,8 @@ static match_table_t tokens = { {Opt_errors, "errors=%s"}, {Opt_ignore, "noquota"}, {Opt_ignore, "quota"}, - {Opt_ignore, "usrquota"}, - {Opt_ignore, "grpquota"}, + {Opt_usrquota, "usrquota"}, + {Opt_grpquota, "grpquota"}, {Opt_err, NULL} }; @@ -293,6 +296,24 @@ static int parse_options(char *options, struct super_block *sb, s64 *newLVSize, } break; } + +#if defined(CONFIG_QUOTA) + case Opt_quota: + case Opt_usrquota: + *flag |= JFS_USRQUOTA; + break; + case Opt_grpquota: + *flag |= JFS_GRPQUOTA; + break; +#else + case Opt_usrquota: + case Opt_grpquota: + case Opt_quota: + printk(KERN_ERR + "JFS: quota operations not supported\n"); + break; +#endif + default: printk("jfs: Unrecognized mount option \"%s\" " " or missing value\n", p); @@ -539,6 +560,26 @@ static int jfs_sync_fs(struct super_block *sb, int wait) return 0; } +static int jfs_show_options(struct seq_file *seq, struct vfsmount *vfs) +{ + struct jfs_sb_info *sbi = JFS_SBI(vfs->mnt_sb); + + if (sbi->flag & JFS_NOINTEGRITY) + seq_puts(seq, ",nointegrity"); + else + seq_puts(seq, ",integrity"); + +#if defined(CONFIG_QUOTA) + if (sbi->flag & JFS_USRQUOTA) + seq_puts(seq, ",usrquota"); + + if (sbi->flag & JFS_GRPQUOTA) + seq_puts(seq, ",grpquota"); +#endif + + return 0; +} + static struct super_operations jfs_super_operations = { .alloc_inode = jfs_alloc_inode, .destroy_inode = jfs_destroy_inode, @@ -552,6 +593,7 @@ static struct super_operations jfs_super_operations = { .unlockfs = jfs_unlockfs, .statfs = jfs_statfs, .remount_fs = jfs_remount, + .show_options = jfs_show_options }; static struct export_operations jfs_export_operations = { diff --git a/include/linux/ext2_fs.h b/include/linux/ext2_fs.h index a657130ba03a..f7bd1c7ebefb 100644 --- a/include/linux/ext2_fs.h +++ b/include/linux/ext2_fs.h @@ -313,6 +313,9 @@ struct ext2_inode { #define EXT2_MOUNT_XATTR_USER 0x004000 /* Extended user attributes */ #define EXT2_MOUNT_POSIX_ACL 0x008000 /* POSIX Access Control Lists */ #define EXT2_MOUNT_XIP 0x010000 /* Execute in place */ +#define EXT2_MOUNT_USRQUOTA 0x020000 /* user quota */ +#define EXT2_MOUNT_GRPQUOTA 0x040000 /* group quota */ + #define clear_opt(o, opt) o &= ~EXT2_MOUNT_##opt #define set_opt(o, opt) o |= EXT2_MOUNT_##opt diff --git a/include/linux/ext3_fs.h b/include/linux/ext3_fs.h index c16662836c58..c0272d73ab20 100644 --- a/include/linux/ext3_fs.h +++ b/include/linux/ext3_fs.h @@ -373,6 +373,8 @@ struct ext3_inode { #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT3_MOUNT_NOBH 0x40000 /* No bufferheads */ #define EXT3_MOUNT_QUOTA 0x80000 /* Some quota option set */ +#define EXT3_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ +#define EXT3_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -- cgit v1.2.3 From 2865cf001878d22d5fd12e5215621dffbcad76dc Mon Sep 17 00:00:00 2001 From: Zhigang Huo Date: Tue, 6 Sep 2005 15:17:00 -0700 Subject: [PATCH] remove pipe definitions These no longer have any users. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 36725e7c02c6..1767073df26f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -39,9 +39,6 @@ struct pipe_inode_info { #define PIPE_SEM(inode) (&(inode).i_sem) #define PIPE_WAIT(inode) (&(inode).i_pipe->wait) -#define PIPE_BASE(inode) ((inode).i_pipe->base) -#define PIPE_START(inode) ((inode).i_pipe->start) -#define PIPE_LEN(inode) ((inode).i_pipe->len) #define PIPE_READERS(inode) ((inode).i_pipe->readers) #define PIPE_WRITERS(inode) ((inode).i_pipe->writers) #define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers) -- cgit v1.2.3 From d2052c1676a39cae101a81f3da8a4ade8b668c88 Mon Sep 17 00:00:00 2001 From: Erik Waling Date: Tue, 6 Sep 2005 15:17:02 -0700 Subject: [PATCH] sonypi SPIC initialisation fix Newer Sony VAIO models (VGN-S480, VGN-S460, VGN-S3XP etc) use a new method to initialize the SPIC device. The new way to initialize (and disable) the device comes directly from the AML code in the _CRS, _SRS and _DIS methods from the DSDT table. This patch adds support for the new models. Signed-off-by: Erik Waling Signed-off-by: Stelian Pop Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/sonypi.txt | 10 ++++ drivers/char/sonypi.c | 117 +++++++++++++++++++++++++++++++++++++++-------- include/linux/sonypi.h | 2 + 3 files changed, 109 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/Documentation/sonypi.txt b/Documentation/sonypi.txt index 0f3b2405d09e..c1237a925505 100644 --- a/Documentation/sonypi.txt +++ b/Documentation/sonypi.txt @@ -99,6 +99,7 @@ statically linked into the kernel). Those options are: SONYPI_MEYE_MASK 0x0400 SONYPI_MEMORYSTICK_MASK 0x0800 SONYPI_BATTERY_MASK 0x1000 + SONYPI_WIRELESS_MASK 0x2000 useinput: if set (which is the default) two input devices are created, one which interprets the jogdial events as @@ -137,6 +138,15 @@ Bugs: speed handling etc). Use ACPI instead of APM if it works on your laptop. + - sonypi lacks the ability to distinguish between certain key + events on some models. + + - some models with the nvidia card (geforce go 6200 tc) uses a + different way to adjust the backlighting of the screen. There + is a userspace utility to adjust the brightness on those models, + which can be downloaded from + http://www.acc.umu.se/~erikw/program/smartdimmer-0.1.tar.bz2 + - since all development was done by reverse engineering, there is _absolutely no guarantee_ that this driver will not crash your laptop. Permanently. diff --git a/drivers/char/sonypi.c b/drivers/char/sonypi.c index cefbe985e55c..35cf1edbc179 100644 --- a/drivers/char/sonypi.c +++ b/drivers/char/sonypi.c @@ -98,12 +98,13 @@ MODULE_PARM_DESC(useinput, #define SONYPI_DEVICE_MODEL_TYPE1 1 #define SONYPI_DEVICE_MODEL_TYPE2 2 +#define SONYPI_DEVICE_MODEL_TYPE3 3 /* type1 models use those */ #define SONYPI_IRQ_PORT 0x8034 #define SONYPI_IRQ_SHIFT 22 -#define SONYPI_BASE 0x50 -#define SONYPI_G10A (SONYPI_BASE+0x14) +#define SONYPI_TYPE1_BASE 0x50 +#define SONYPI_G10A (SONYPI_TYPE1_BASE+0x14) #define SONYPI_TYPE1_REGION_SIZE 0x08 #define SONYPI_TYPE1_EVTYPE_OFFSET 0x04 @@ -114,6 +115,13 @@ MODULE_PARM_DESC(useinput, #define SONYPI_TYPE2_REGION_SIZE 0x20 #define SONYPI_TYPE2_EVTYPE_OFFSET 0x12 +/* type3 series specifics */ +#define SONYPI_TYPE3_BASE 0x40 +#define SONYPI_TYPE3_GID2 (SONYPI_TYPE3_BASE+0x48) /* 16 bits */ +#define SONYPI_TYPE3_MISC (SONYPI_TYPE3_BASE+0x6d) /* 8 bits */ +#define SONYPI_TYPE3_REGION_SIZE 0x20 +#define SONYPI_TYPE3_EVTYPE_OFFSET 0x12 + /* battery / brightness addresses */ #define SONYPI_BAT_FLAGS 0x81 #define SONYPI_LCD_LIGHT 0x96 @@ -159,6 +167,10 @@ static struct sonypi_ioport_list sonypi_type2_ioport_list[] = { { 0x0, 0x0 } }; +/* same as in type 2 models */ +static struct sonypi_ioport_list *sonypi_type3_ioport_list = + sonypi_type2_ioport_list; + /* The set of possible interrupts */ struct sonypi_irq_list { u16 irq; @@ -180,6 +192,9 @@ static struct sonypi_irq_list sonypi_type2_irq_list[] = { { 0, 0x00 } /* no IRQ, 0x00 in SIRQ in AML */ }; +/* same as in type2 models */ +static struct sonypi_irq_list *sonypi_type3_irq_list = sonypi_type2_irq_list; + #define SONYPI_CAMERA_BRIGHTNESS 0 #define SONYPI_CAMERA_CONTRAST 1 #define SONYPI_CAMERA_HUE 2 @@ -223,6 +238,7 @@ static struct sonypi_irq_list sonypi_type2_irq_list[] = { #define SONYPI_MEYE_MASK 0x00000400 #define SONYPI_MEMORYSTICK_MASK 0x00000800 #define SONYPI_BATTERY_MASK 0x00001000 +#define SONYPI_WIRELESS_MASK 0x00002000 struct sonypi_event { u8 data; @@ -305,6 +321,13 @@ static struct sonypi_event sonypi_blueev[] = { { 0, 0 } }; +/* The set of possible wireless events */ +static struct sonypi_event sonypi_wlessev[] = { + { 0x59, SONYPI_EVENT_WIRELESS_ON }, + { 0x5a, SONYPI_EVENT_WIRELESS_OFF }, + { 0, 0 } +}; + /* The set of possible back button events */ static struct sonypi_event sonypi_backev[] = { { 0x20, SONYPI_EVENT_BACK_PRESSED }, @@ -391,6 +414,12 @@ static struct sonypi_eventtypes { { SONYPI_DEVICE_MODEL_TYPE2, 0x41, SONYPI_BATTERY_MASK, sonypi_batteryev }, { SONYPI_DEVICE_MODEL_TYPE2, 0x31, SONYPI_PKEY_MASK, sonypi_pkeyev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0, 0xffffffff, sonypi_releaseev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0x21, SONYPI_FNKEY_MASK, sonypi_fnkeyev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0x31, SONYPI_WIRELESS_MASK, sonypi_wlessev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0x31, SONYPI_MEMORYSTICK_MASK, sonypi_memorystickev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0x41, SONYPI_BATTERY_MASK, sonypi_batteryev }, + { SONYPI_DEVICE_MODEL_TYPE3, 0x31, SONYPI_PKEY_MASK, sonypi_pkeyev }, { 0 } }; @@ -563,6 +592,23 @@ static void sonypi_type2_srs(void) udelay(10); } +static void sonypi_type3_srs(void) +{ + u16 v16; + u8 v8; + + /* This model type uses the same initialiazation of + * the embedded controller as the type2 models. */ + sonypi_type2_srs(); + + /* Initialization of PCI config space of the LPC interface bridge. */ + v16 = (sonypi_device.ioport1 & 0xFFF0) | 0x01; + pci_write_config_word(sonypi_device.dev, SONYPI_TYPE3_GID2, v16); + pci_read_config_byte(sonypi_device.dev, SONYPI_TYPE3_MISC, &v8); + v8 = (v8 & 0xCF) | 0x10; + pci_write_config_byte(sonypi_device.dev, SONYPI_TYPE3_MISC, v8); +} + /* Disables the device - this comes from the AML code in the ACPI bios */ static void sonypi_type1_dis(void) { @@ -587,6 +633,13 @@ static void sonypi_type2_dis(void) printk(KERN_WARNING "ec_write failed\n"); } +static void sonypi_type3_dis(void) +{ + sonypi_type2_dis(); + udelay(10); + pci_write_config_word(sonypi_device.dev, SONYPI_TYPE3_GID2, 0); +} + static u8 sonypi_call1(u8 dev) { u8 v1, v2; @@ -1067,10 +1120,17 @@ static struct miscdevice sonypi_misc_device = { static void sonypi_enable(unsigned int camera_on) { - if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE2) - sonypi_type2_srs(); - else + switch (sonypi_device.model) { + case SONYPI_DEVICE_MODEL_TYPE1: sonypi_type1_srs(); + break; + case SONYPI_DEVICE_MODEL_TYPE2: + sonypi_type2_srs(); + break; + case SONYPI_DEVICE_MODEL_TYPE3: + sonypi_type3_srs(); + break; + } sonypi_call1(0x82); sonypi_call2(0x81, 0xff); @@ -1094,10 +1154,18 @@ static int sonypi_disable(void) if (!SONYPI_ACPI_ACTIVE && fnkeyinit) outb(0xf1, 0xb2); - if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE2) - sonypi_type2_dis(); - else + switch (sonypi_device.model) { + case SONYPI_DEVICE_MODEL_TYPE1: sonypi_type1_dis(); + break; + case SONYPI_DEVICE_MODEL_TYPE2: + sonypi_type2_dis(); + break; + case SONYPI_DEVICE_MODEL_TYPE3: + sonypi_type3_dis(); + break; + } + return 0; } @@ -1143,12 +1211,16 @@ static int __devinit sonypi_probe(void) struct sonypi_irq_list *irq_list; struct pci_dev *pcidev; - pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, - PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL))) + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE1; + else if ((pcidev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_ICH6_1, NULL))) + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE3; + else + sonypi_device.model = SONYPI_DEVICE_MODEL_TYPE2; sonypi_device.dev = pcidev; - sonypi_device.model = pcidev ? - SONYPI_DEVICE_MODEL_TYPE1 : SONYPI_DEVICE_MODEL_TYPE2; spin_lock_init(&sonypi_device.fifo_lock); sonypi_device.fifo = kfifo_alloc(SONYPI_BUF_SIZE, GFP_KERNEL, @@ -1176,16 +1248,22 @@ static int __devinit sonypi_probe(void) goto out_miscreg; } - if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE2) { + + if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) { + ioport_list = sonypi_type1_ioport_list; + sonypi_device.region_size = SONYPI_TYPE1_REGION_SIZE; + sonypi_device.evtype_offset = SONYPI_TYPE1_EVTYPE_OFFSET; + irq_list = sonypi_type1_irq_list; + } else if (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE2) { ioport_list = sonypi_type2_ioport_list; sonypi_device.region_size = SONYPI_TYPE2_REGION_SIZE; sonypi_device.evtype_offset = SONYPI_TYPE2_EVTYPE_OFFSET; irq_list = sonypi_type2_irq_list; } else { - ioport_list = sonypi_type1_ioport_list; - sonypi_device.region_size = SONYPI_TYPE1_REGION_SIZE; - sonypi_device.evtype_offset = SONYPI_TYPE1_EVTYPE_OFFSET; - irq_list = sonypi_type1_irq_list; + ioport_list = sonypi_type3_ioport_list; + sonypi_device.region_size = SONYPI_TYPE3_REGION_SIZE; + sonypi_device.evtype_offset = SONYPI_TYPE3_EVTYPE_OFFSET; + irq_list = sonypi_type3_irq_list; } for (i = 0; ioport_list[i].port1; i++) { @@ -1274,11 +1352,10 @@ static int __devinit sonypi_probe(void) printk(KERN_INFO "sonypi: Sony Programmable I/O Controller Driver" "v%s.\n", SONYPI_DRIVER_VERSION); - printk(KERN_INFO "sonypi: detected %s model, " + printk(KERN_INFO "sonypi: detected type%d model, " "verbose = %d, fnkeyinit = %s, camera = %s, " "compat = %s, mask = 0x%08lx, useinput = %s, acpi = %s\n", - (sonypi_device.model == SONYPI_DEVICE_MODEL_TYPE1) ? - "type1" : "type2", + sonypi_device.model, verbose, fnkeyinit ? "on" : "off", camera ? "on" : "off", diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h index 768cbba617d0..f56d24734950 100644 --- a/include/linux/sonypi.h +++ b/include/linux/sonypi.h @@ -99,6 +99,8 @@ #define SONYPI_EVENT_BATTERY_INSERT 57 #define SONYPI_EVENT_BATTERY_REMOVE 58 #define SONYPI_EVENT_FNKEY_RELEASED 59 +#define SONYPI_EVENT_WIRELESS_ON 60 +#define SONYPI_EVENT_WIRELESS_OFF 61 /* get/set brightness */ #define SONYPI_IOCGBRT _IOR('v', 0, __u8) -- cgit v1.2.3 From e139aa595c5d3bd01699530cbe017dec75fdb07f Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Tue, 6 Sep 2005 15:17:05 -0700 Subject: [PATCH] PNP: make pnp_dbg conditional directly on CONFIG_PNP_DEBUG Seems pointless to require .c files to test CONFIG_PNP_DEBUG and conditionally define DEBUG before including . Just test CONFIG_PNP_DEBUG directly in pnp.h. Signed-off-by: Bjorn Helgaas Cc: Adam Belay Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pnp/card.c | 7 ------- drivers/pnp/driver.c | 7 ------- drivers/pnp/manager.c | 7 ------- drivers/pnp/pnpacpi/core.c | 1 + drivers/pnp/quirks.c | 7 ------- drivers/pnp/support.c | 7 ------- include/linux/pnp.h | 2 +- 7 files changed, 2 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c index 6e5229e92fbc..e95ed67d4f05 100644 --- a/drivers/pnp/card.c +++ b/drivers/pnp/card.c @@ -8,13 +8,6 @@ #include #include #include - -#ifdef CONFIG_PNP_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include "base.h" diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c index 1d037c2a82ac..33da25f3213f 100644 --- a/drivers/pnp/driver.c +++ b/drivers/pnp/driver.c @@ -11,13 +11,6 @@ #include #include #include - -#ifdef CONFIG_PNP_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include "base.h" diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c index 6c510c19ad7d..94442ffd4aed 100644 --- a/drivers/pnp/manager.c +++ b/drivers/pnp/manager.c @@ -11,13 +11,6 @@ #include #include #include - -#ifdef CONFIG_PNP_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include "base.h" diff --git a/drivers/pnp/pnpacpi/core.c b/drivers/pnp/pnpacpi/core.c index d3d292ea5876..1a8915e74160 100644 --- a/drivers/pnp/pnpacpi/core.c +++ b/drivers/pnp/pnpacpi/core.c @@ -19,6 +19,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 596a02d7e03d..8936b0cb2ec3 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -16,13 +16,6 @@ #include #include #include - -#ifdef CONFIG_PNP_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include "base.h" diff --git a/drivers/pnp/support.c b/drivers/pnp/support.c index b952aec49189..61fe998944bd 100644 --- a/drivers/pnp/support.c +++ b/drivers/pnp/support.c @@ -8,13 +8,6 @@ #include #include #include - -#ifdef CONFIG_PNP_DEBUG - #define DEBUG -#else - #undef DEBUG -#endif - #include #include "base.h" diff --git a/include/linux/pnp.h b/include/linux/pnp.h index 5ec2bd0c2848..aadbac29103c 100644 --- a/include/linux/pnp.h +++ b/include/linux/pnp.h @@ -443,7 +443,7 @@ static inline void pnp_unregister_driver(struct pnp_driver *drv) { ; } #define pnp_info(format, arg...) printk(KERN_INFO "pnp: " format "\n" , ## arg) #define pnp_warn(format, arg...) printk(KERN_WARNING "pnp: " format "\n" , ## arg) -#ifdef DEBUG +#ifdef CONFIG_PNP_DEBUG #define pnp_dbg(format, arg...) printk(KERN_DEBUG "pnp: " format "\n" , ## arg) #else #define pnp_dbg(format, arg...) do {} while (0) -- cgit v1.2.3 From f35279d3f713e5c97b98cbdbf47d98f79942c11f Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Tue, 6 Sep 2005 15:17:08 -0700 Subject: [PATCH] sunrpc: cache_register can use wrong module reference When registering an RPC cache, cache_register() always sets the owner as the sunrpc module. However, there are RPC caches owned by other modules. With the incorrect owner setting, the real owning module can be removed potentially with an open reference to the cache from userspace. For example, if one were to stop the nfs server and unmount the nfsd filesystem, the nfsd module could be removed eventhough rpc.idmapd had references to the idtoname and nametoid caches (i.e. /proc/net/rpc/nfs4./channel is still open). This resulted in a system panic on one of our machines when attempting to restart the nfs services after reloading the nfsd module. The following patch adds a 'struct module *owner' field in struct cache_detail. The owner is further assigned to the struct proc_dir_entry in cache_register() so that the module cannot be unloaded while user-space daemons have an open reference on the associated file under /proc. Signed-off-by: Bruce Allan Cc: Trond Myklebust Cc: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfsd/export.c | 3 +++ fs/nfsd/nfs4idmap.c | 8 ++++++-- include/linux/sunrpc/cache.h | 1 + net/sunrpc/auth_gss/svcauth_gss.c | 8 ++++++-- net/sunrpc/cache.c | 8 ++++---- net/sunrpc/sunrpc_syms.c | 6 ++++-- net/sunrpc/svcauth.c | 1 + net/sunrpc/svcauth_unix.c | 1 + 8 files changed, 26 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9a11aa39e2e4..057aff745506 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -221,6 +222,7 @@ static int expkey_show(struct seq_file *m, } struct cache_detail svc_expkey_cache = { + .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .hash_table = expkey_table, .name = "nfsd.fh", @@ -456,6 +458,7 @@ static int svc_export_show(struct seq_file *m, return 0; } struct cache_detail svc_export_cache = { + .owner = THIS_MODULE, .hash_size = EXPORT_HASHMAX, .hash_table = export_table, .name = "nfsd.export", diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index 5605a26efc57..13369650cdf9 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -187,6 +187,7 @@ static int idtoname_parse(struct cache_detail *, char *, int); static struct ent *idtoname_lookup(struct ent *, int); static struct cache_detail idtoname_cache = { + .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .hash_table = idtoname_table, .name = "nfs4.idtoname", @@ -320,6 +321,7 @@ static struct ent *nametoid_lookup(struct ent *, int); static int nametoid_parse(struct cache_detail *, char *, int); static struct cache_detail nametoid_cache = { + .owner = THIS_MODULE, .hash_size = ENT_HASHMAX, .hash_table = nametoid_table, .name = "nfs4.nametoid", @@ -404,8 +406,10 @@ nfsd_idmap_init(void) void nfsd_idmap_shutdown(void) { - cache_unregister(&idtoname_cache); - cache_unregister(&nametoid_cache); + if (cache_unregister(&idtoname_cache)) + printk(KERN_ERR "nfsd: failed to unregister idtoname cache\n"); + if (cache_unregister(&nametoid_cache)) + printk(KERN_ERR "nfsd: failed to unregister nametoid cache\n"); } /* diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 6864063d1b9f..c4e3ea7cf154 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -60,6 +60,7 @@ struct cache_head { #define CACHE_NEW_EXPIRY 120 /* keep new things pending confirmation for 120 seconds */ struct cache_detail { + struct module * owner; int hash_size; struct cache_head ** hash_table; rwlock_t hash_lock; diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 5c8fe3bfc494..e3308195374e 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -250,6 +250,7 @@ out: } static struct cache_detail rsi_cache = { + .owner = THIS_MODULE, .hash_size = RSI_HASHMAX, .hash_table = rsi_table, .name = "auth.rpcsec.init", @@ -436,6 +437,7 @@ out: } static struct cache_detail rsc_cache = { + .owner = THIS_MODULE, .hash_size = RSC_HASHMAX, .hash_table = rsc_table, .name = "auth.rpcsec.context", @@ -1074,7 +1076,9 @@ gss_svc_init(void) void gss_svc_shutdown(void) { - cache_unregister(&rsc_cache); - cache_unregister(&rsi_cache); + if (cache_unregister(&rsc_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsc cache\n"); + if (cache_unregister(&rsi_cache)) + printk(KERN_ERR "auth_rpcgss: failed to unregister rsi cache\n"); svc_auth_unregister(RPC_AUTH_GSS); } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 900f5bc7e336..f509e9992767 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -177,7 +177,7 @@ void cache_register(struct cache_detail *cd) cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); if (cd->proc_ent) { struct proc_dir_entry *p; - cd->proc_ent->owner = THIS_MODULE; + cd->proc_ent->owner = cd->owner; cd->channel_ent = cd->content_ent = NULL; p = create_proc_entry("flush", S_IFREG|S_IRUSR|S_IWUSR, @@ -185,7 +185,7 @@ void cache_register(struct cache_detail *cd) cd->flush_ent = p; if (p) { p->proc_fops = &cache_flush_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } @@ -195,7 +195,7 @@ void cache_register(struct cache_detail *cd) cd->channel_ent = p; if (p) { p->proc_fops = &cache_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } @@ -205,7 +205,7 @@ void cache_register(struct cache_detail *cd) cd->content_ent = p; if (p) { p->proc_fops = &content_file_operations; - p->owner = THIS_MODULE; + p->owner = cd->owner; p->data = cd; } } diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index 62a073495276..ed48ff022d35 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -176,8 +176,10 @@ cleanup_sunrpc(void) { unregister_rpc_pipefs(); rpc_destroy_mempool(); - cache_unregister(&auth_domain_cache); - cache_unregister(&ip_map_cache); + if (cache_unregister(&auth_domain_cache)) + printk(KERN_ERR "sunrpc: failed to unregister auth_domain cache\n"); + if (cache_unregister(&ip_map_cache)) + printk(KERN_ERR "sunrpc: failed to unregister ip_map cache\n"); #ifdef RPC_DEBUG rpc_unregister_sysctl(); #endif diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index bde8147ef2db..dda4f0c63511 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -143,6 +143,7 @@ static void auth_domain_drop(struct cache_head *item, struct cache_detail *cd) struct cache_detail auth_domain_cache = { + .owner = THIS_MODULE, .hash_size = DN_HASHMAX, .hash_table = auth_domain_table, .name = "auth.domain", diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index d6baf6fdf8a9..cac2e774dd81 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -242,6 +242,7 @@ static int ip_map_show(struct seq_file *m, struct cache_detail ip_map_cache = { + .owner = THIS_MODULE, .hash_size = IP_HASHMAX, .hash_table = ip_table, .name = "auth.unix.ip", -- cgit v1.2.3 From 19b4946ca9d1e35d4c641dcebe27378de34f3ddd Mon Sep 17 00:00:00 2001 From: Mike Waychison Date: Tue, 6 Sep 2005 15:17:10 -0700 Subject: [PATCH] ipc: convert /proc/sysvipc/* to generic seq_file interface Change the /proc/sysvipc/shm|sem|msg files to use the generic seq_file implementation for struct ipc_ids. Signed-off-by: Mike Waychison Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/msg.h | 1 + include/linux/sem.h | 1 + ipc/msg.c | 82 +++++++++++++++++--------------------------------- ipc/sem.c | 73 ++++++++++++++------------------------------- ipc/shm.c | 86 +++++++++++++++++------------------------------------ 5 files changed, 80 insertions(+), 163 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msg.h b/include/linux/msg.h index 2c4c6aa643ff..903e0ab8101f 100644 --- a/include/linux/msg.h +++ b/include/linux/msg.h @@ -77,6 +77,7 @@ struct msg_msg { /* one msq_queue structure for each present queue on the system */ struct msg_queue { struct kern_ipc_perm q_perm; + int q_id; time_t q_stime; /* last msgsnd time */ time_t q_rtime; /* last msgrcv time */ time_t q_ctime; /* last change time */ diff --git a/include/linux/sem.h b/include/linux/sem.h index 2d8516be9fd7..106f9757339a 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -88,6 +88,7 @@ struct sem { /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ + int sem_id; time_t sem_otime; /* last semop time */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ diff --git a/ipc/msg.c b/ipc/msg.c index 27e516f96cdc..d035bd2aba96 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include "util.h" @@ -74,16 +75,16 @@ static struct ipc_ids msg_ids; static void freeque (struct msg_queue *msq, int id); static int newque (key_t key, int msgflg); #ifdef CONFIG_PROC_FS -static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); +static int sysvipc_msg_proc_show(struct seq_file *s, void *it); #endif void __init msg_init (void) { ipc_init_ids(&msg_ids,msg_ctlmni); - -#ifdef CONFIG_PROC_FS - create_proc_read_entry("sysvipc/msg", 0, NULL, sysvipc_msg_read_proc, NULL); -#endif + ipc_init_proc_interface("sysvipc/msg", + " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n", + &msg_ids, + sysvipc_msg_proc_show); } static int newque (key_t key, int msgflg) @@ -113,6 +114,7 @@ static int newque (key_t key, int msgflg) return -ENOSPC; } + msq->q_id = msg_buildid(id,msq->q_perm.seq); msq->q_stime = msq->q_rtime = 0; msq->q_ctime = get_seconds(); msq->q_cbytes = msq->q_qnum = 0; @@ -123,7 +125,7 @@ static int newque (key_t key, int msgflg) INIT_LIST_HEAD(&msq->q_senders); msg_unlock(msq); - return msg_buildid(id,msq->q_perm.seq); + return msq->q_id; } static inline void ss_add(struct msg_queue* msq, struct msg_sender* mss) @@ -808,55 +810,25 @@ out_unlock: } #ifdef CONFIG_PROC_FS -static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static int sysvipc_msg_proc_show(struct seq_file *s, void *it) { - off_t pos = 0; - off_t begin = 0; - int i, len = 0; - - down(&msg_ids.sem); - len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n"); - - for(i = 0; i <= msg_ids.max_id; i++) { - struct msg_queue * msq; - msq = msg_lock(i); - if(msq != NULL) { - len += sprintf(buffer + len, "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", - msq->q_perm.key, - msg_buildid(i,msq->q_perm.seq), - msq->q_perm.mode, - msq->q_cbytes, - msq->q_qnum, - msq->q_lspid, - msq->q_lrpid, - msq->q_perm.uid, - msq->q_perm.gid, - msq->q_perm.cuid, - msq->q_perm.cgid, - msq->q_stime, - msq->q_rtime, - msq->q_ctime); - msg_unlock(msq); - - pos += len; - if(pos < offset) { - len = 0; - begin = pos; - } - if(pos > offset + length) - goto done; - } - - } - *eof = 1; -done: - up(&msg_ids.sem); - *start = buffer + (offset - begin); - len -= (offset - begin); - if(len > length) - len = length; - if(len < 0) - len = 0; - return len; + struct msg_queue *msq = it; + + return seq_printf(s, + "%10d %10d %4o %10lu %10lu %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n", + msq->q_perm.key, + msq->q_id, + msq->q_perm.mode, + msq->q_cbytes, + msq->q_qnum, + msq->q_lspid, + msq->q_lrpid, + msq->q_perm.uid, + msq->q_perm.gid, + msq->q_perm.cuid, + msq->q_perm.cgid, + msq->q_stime, + msq->q_rtime, + msq->q_ctime); } #endif diff --git a/ipc/sem.c b/ipc/sem.c index 70975ce0784a..19af028a3e38 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include "util.h" @@ -89,7 +90,7 @@ static struct ipc_ids sem_ids; static int newary (key_t, int, int); static void freeary (struct sem_array *sma, int id); #ifdef CONFIG_PROC_FS -static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); +static int sysvipc_sem_proc_show(struct seq_file *s, void *it); #endif #define SEMMSL_FAST 256 /* 512 bytes on stack */ @@ -116,10 +117,10 @@ void __init sem_init (void) { used_sems = 0; ipc_init_ids(&sem_ids,sc_semmni); - -#ifdef CONFIG_PROC_FS - create_proc_read_entry("sysvipc/sem", 0, NULL, sysvipc_sem_read_proc, NULL); -#endif + ipc_init_proc_interface("sysvipc/sem", + " key semid perms nsems uid gid cuid cgid otime ctime\n", + &sem_ids, + sysvipc_sem_proc_show); } /* @@ -193,6 +194,7 @@ static int newary (key_t key, int nsems, int semflg) } used_sems += nsems; + sma->sem_id = sem_buildid(id, sma->sem_perm.seq); sma->sem_base = (struct sem *) &sma[1]; /* sma->sem_pending = NULL; */ sma->sem_pending_last = &sma->sem_pending; @@ -201,7 +203,7 @@ static int newary (key_t key, int nsems, int semflg) sma->sem_ctime = get_seconds(); sem_unlock(sma); - return sem_buildid(id, sma->sem_perm.seq); + return sma->sem_id; } asmlinkage long sys_semget (key_t key, int nsems, int semflg) @@ -1328,50 +1330,21 @@ next_entry: } #ifdef CONFIG_PROC_FS -static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static int sysvipc_sem_proc_show(struct seq_file *s, void *it) { - off_t pos = 0; - off_t begin = 0; - int i, len = 0; - - len += sprintf(buffer, " key semid perms nsems uid gid cuid cgid otime ctime\n"); - down(&sem_ids.sem); - - for(i = 0; i <= sem_ids.max_id; i++) { - struct sem_array *sma; - sma = sem_lock(i); - if(sma) { - len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", - sma->sem_perm.key, - sem_buildid(i,sma->sem_perm.seq), - sma->sem_perm.mode, - sma->sem_nsems, - sma->sem_perm.uid, - sma->sem_perm.gid, - sma->sem_perm.cuid, - sma->sem_perm.cgid, - sma->sem_otime, - sma->sem_ctime); - sem_unlock(sma); - - pos += len; - if(pos < offset) { - len = 0; - begin = pos; - } - if(pos > offset + length) - goto done; - } - } - *eof = 1; -done: - up(&sem_ids.sem); - *start = buffer + (offset - begin); - len -= (offset - begin); - if(len > length) - len = length; - if(len < 0) - len = 0; - return len; + struct sem_array *sma = it; + + return seq_printf(s, + "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n", + sma->sem_perm.key, + sma->sem_id, + sma->sem_perm.mode, + sma->sem_nsems, + sma->sem_perm.uid, + sma->sem_perm.gid, + sma->sem_perm.cuid, + sma->sem_perm.cgid, + sma->sem_otime, + sma->sem_ctime); } #endif diff --git a/ipc/shm.c b/ipc/shm.c index 1d6cf08d950b..dca90489e3b0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -23,12 +23,12 @@ #include #include #include -#include #include #include #include #include #include +#include #include @@ -51,7 +51,7 @@ static int newseg (key_t key, int shmflg, size_t size); static void shm_open (struct vm_area_struct *shmd); static void shm_close (struct vm_area_struct *shmd); #ifdef CONFIG_PROC_FS -static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); +static int sysvipc_shm_proc_show(struct seq_file *s, void *it); #endif size_t shm_ctlmax = SHMMAX; @@ -63,9 +63,10 @@ static int shm_tot; /* total number of shared memory pages */ void __init shm_init (void) { ipc_init_ids(&shm_ids, 1); -#ifdef CONFIG_PROC_FS - create_proc_read_entry("sysvipc/shm", 0, NULL, sysvipc_shm_read_proc, NULL); -#endif + ipc_init_proc_interface("sysvipc/shm", + " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n", + &shm_ids, + sysvipc_shm_proc_show); } static inline int shm_checkid(struct shmid_kernel *s, int id) @@ -869,63 +870,32 @@ asmlinkage long sys_shmdt(char __user *shmaddr) } #ifdef CONFIG_PROC_FS -static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data) +static int sysvipc_shm_proc_show(struct seq_file *s, void *it) { - off_t pos = 0; - off_t begin = 0; - int i, len = 0; - - down(&shm_ids.sem); - len += sprintf(buffer, " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime\n"); + struct shmid_kernel *shp = it; + char *format; - for(i = 0; i <= shm_ids.max_id; i++) { - struct shmid_kernel* shp; - - shp = shm_lock(i); - if(shp!=NULL) { #define SMALL_STRING "%10d %10d %4o %10u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" #define BIG_STRING "%10d %10d %4o %21u %5u %5u %5d %5u %5u %5u %5u %10lu %10lu %10lu\n" - char *format; - if (sizeof(size_t) <= sizeof(int)) - format = SMALL_STRING; - else - format = BIG_STRING; - len += sprintf(buffer + len, format, - shp->shm_perm.key, - shm_buildid(i, shp->shm_perm.seq), - shp->shm_flags, - shp->shm_segsz, - shp->shm_cprid, - shp->shm_lprid, - is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch, - shp->shm_perm.uid, - shp->shm_perm.gid, - shp->shm_perm.cuid, - shp->shm_perm.cgid, - shp->shm_atim, - shp->shm_dtim, - shp->shm_ctim); - shm_unlock(shp); - - pos += len; - if(pos < offset) { - len = 0; - begin = pos; - } - if(pos > offset + length) - goto done; - } - } - *eof = 1; -done: - up(&shm_ids.sem); - *start = buffer + (offset - begin); - len -= (offset - begin); - if(len > length) - len = length; - if(len < 0) - len = 0; - return len; + if (sizeof(size_t) <= sizeof(int)) + format = SMALL_STRING; + else + format = BIG_STRING; + return seq_printf(s, format, + shp->shm_perm.key, + shp->id, + shp->shm_flags, + shp->shm_segsz, + shp->shm_cprid, + shp->shm_lprid, + is_file_hugepages(shp->shm_file) ? (file_count(shp->shm_file) - 1) : shp->shm_nattch, + shp->shm_perm.uid, + shp->shm_perm.gid, + shp->shm_perm.cuid, + shp->shm_perm.cgid, + shp->shm_atim, + shp->shm_dtim, + shp->shm_ctim); } #endif -- cgit v1.2.3 From 6e3eaab02028c4087a92711b20abb9e72cc803a7 Mon Sep 17 00:00:00 2001 From: Abhay Salunke Date: Tue, 6 Sep 2005 15:17:13 -0700 Subject: [PATCH] modified firmware_class.c to support no hotplug Upgrade the request_firmware_nowait function to not start the hotplug action on a firmware update. This patch is tested along with dell_rbu driver on i386 and x86-64 systems. Signed-off-by: Abhay Salunke Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 79 +++++++++++++++++++++++++++---------------- include/linux/firmware.h | 5 ++- 2 files changed, 54 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 652281402c92..5bfa2e9a7c26 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -28,6 +28,7 @@ enum { FW_STATUS_DONE, FW_STATUS_ABORT, FW_STATUS_READY, + FW_STATUS_READY_NOHOTPLUG, }; static int loading_timeout = 10; /* In seconds */ @@ -344,7 +345,7 @@ error_kfree: static int fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p, - const char *fw_name, struct device *device) + const char *fw_name, struct device *device, int hotplug) { struct class_device *class_dev; struct firmware_priv *fw_priv; @@ -376,7 +377,10 @@ fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p, goto error_unreg; } - set_bit(FW_STATUS_READY, &fw_priv->status); + if (hotplug) + set_bit(FW_STATUS_READY, &fw_priv->status); + else + set_bit(FW_STATUS_READY_NOHOTPLUG, &fw_priv->status); *class_dev_p = class_dev; goto out; @@ -386,21 +390,9 @@ out: return retval; } -/** - * request_firmware: - request firmware to hotplug and wait for it - * Description: - * @firmware will be used to return a firmware image by the name - * of @name for device @device. - * - * Should be called from user context where sleeping is allowed. - * - * @name will be use as $FIRMWARE in the hotplug environment and - * should be distinctive enough not to be confused with any other - * firmware image for this or any other device. - **/ -int -request_firmware(const struct firmware **firmware_p, const char *name, - struct device *device) +static int +_request_firmware(const struct firmware **firmware_p, const char *name, + struct device *device, int hotplug) { struct class_device *class_dev; struct firmware_priv *fw_priv; @@ -419,22 +411,25 @@ request_firmware(const struct firmware **firmware_p, const char *name, } memset(firmware, 0, sizeof (*firmware)); - retval = fw_setup_class_device(firmware, &class_dev, name, device); + retval = fw_setup_class_device(firmware, &class_dev, name, device, + hotplug); if (retval) goto error_kfree_fw; fw_priv = class_get_devdata(class_dev); - if (loading_timeout > 0) { - fw_priv->timeout.expires = jiffies + loading_timeout * HZ; - add_timer(&fw_priv->timeout); - } - - kobject_hotplug(&class_dev->kobj, KOBJ_ADD); - wait_for_completion(&fw_priv->completion); - set_bit(FW_STATUS_DONE, &fw_priv->status); + if (hotplug) { + if (loading_timeout > 0) { + fw_priv->timeout.expires = jiffies + loading_timeout * HZ; + add_timer(&fw_priv->timeout); + } - del_timer_sync(&fw_priv->timeout); + kobject_hotplug(&class_dev->kobj, KOBJ_ADD); + wait_for_completion(&fw_priv->completion); + set_bit(FW_STATUS_DONE, &fw_priv->status); + del_timer_sync(&fw_priv->timeout); + } else + wait_for_completion(&fw_priv->completion); down(&fw_lock); if (!fw_priv->fw->size || test_bit(FW_STATUS_ABORT, &fw_priv->status)) { @@ -454,6 +449,26 @@ out: return retval; } +/** + * request_firmware: - request firmware to hotplug and wait for it + * Description: + * @firmware will be used to return a firmware image by the name + * of @name for device @device. + * + * Should be called from user context where sleeping is allowed. + * + * @name will be use as $FIRMWARE in the hotplug environment and + * should be distinctive enough not to be confused with any other + * firmware image for this or any other device. + **/ +int +request_firmware(const struct firmware **firmware_p, const char *name, + struct device *device) +{ + int hotplug = 1; + return _request_firmware(firmware_p, name, device, hotplug); +} + /** * release_firmware: - release the resource associated with a firmware image **/ @@ -491,6 +506,7 @@ struct firmware_work { struct device *device; void *context; void (*cont)(const struct firmware *fw, void *context); + int hotplug; }; static int @@ -503,7 +519,8 @@ request_firmware_work_func(void *arg) return 0; } daemonize("%s/%s", "firmware", fw_work->name); - request_firmware(&fw, fw_work->name, fw_work->device); + _request_firmware(&fw, fw_work->name, fw_work->device, + fw_work->hotplug); fw_work->cont(fw, fw_work->context); release_firmware(fw); module_put(fw_work->module); @@ -518,6 +535,9 @@ request_firmware_work_func(void *arg) * Asynchronous variant of request_firmware() for contexts where * it is not possible to sleep. * + * @hotplug invokes hotplug event to copy the firmware image if this flag + * is non-zero else the firmware copy must be done manually. + * * @cont will be called asynchronously when the firmware request is over. * * @context will be passed over to @cont. @@ -527,7 +547,7 @@ request_firmware_work_func(void *arg) **/ int request_firmware_nowait( - struct module *module, + struct module *module, int hotplug, const char *name, struct device *device, void *context, void (*cont)(const struct firmware *fw, void *context)) { @@ -548,6 +568,7 @@ request_firmware_nowait( .device = device, .context = context, .cont = cont, + .hotplug = hotplug, }; ret = kernel_thread(request_firmware_work_func, fw_work, diff --git a/include/linux/firmware.h b/include/linux/firmware.h index 886255b69bb9..2063c0839d4f 100644 --- a/include/linux/firmware.h +++ b/include/linux/firmware.h @@ -3,6 +3,9 @@ #include #include #define FIRMWARE_NAME_MAX 30 +#define FW_ACTION_NOHOTPLUG 0 +#define FW_ACTION_HOTPLUG 1 + struct firmware { size_t size; u8 *data; @@ -11,7 +14,7 @@ struct device; int request_firmware(const struct firmware **fw, const char *name, struct device *device); int request_firmware_nowait( - struct module *module, + struct module *module, int hotplug, const char *name, struct device *device, void *context, void (*cont)(const struct firmware *fw, void *context)); -- cgit v1.2.3 From f26fdd59929e1144c6caf72adcaf4561d6e682a4 Mon Sep 17 00:00:00 2001 From: Karsten Wiese Date: Tue, 6 Sep 2005 15:17:25 -0700 Subject: [PATCH] CHECK_IRQ_PER_CPU() to avoid dead code in __do_IRQ() IRQ_PER_CPU is not used by all architectures. This patch introduces the macros ARCH_HAS_IRQ_PER_CPU and CHECK_IRQ_PER_CPU() to avoid the generation of dead code in __do_IRQ(). ARCH_HAS_IRQ_PER_CPU is defined by architectures using IRQ_PER_CPU in their include/asm_ARCH/irq.h file. Through grepping the tree I found the following architectures currently use IRQ_PER_CPU: cris, ia64, ppc, ppc64 and parisc. Signed-off-by: Karsten Wiese Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-cris/irq.h | 5 +++++ include/asm-ia64/irq.h | 5 +++++ include/asm-parisc/irq.h | 5 +++++ include/asm-ppc/irq.h | 5 +++++ include/asm-ppc64/irq.h | 5 +++++ include/linux/irq.h | 7 ++++++- kernel/irq/handle.c | 2 +- 7 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/asm-cris/irq.h b/include/asm-cris/irq.h index 8e787fdaedd4..4fab5c3b2e15 100644 --- a/include/asm-cris/irq.h +++ b/include/asm-cris/irq.h @@ -1,6 +1,11 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H +/* + * IRQ line status macro IRQ_PER_CPU is used + */ +#define ARCH_HAS_IRQ_PER_CPU + #include extern __inline__ int irq_canonicalize(int irq) diff --git a/include/asm-ia64/irq.h b/include/asm-ia64/irq.h index 5d930fdc0bea..cd984d08fd15 100644 --- a/include/asm-ia64/irq.h +++ b/include/asm-ia64/irq.h @@ -14,6 +14,11 @@ #define NR_IRQS 256 #define NR_IRQ_VECTORS NR_IRQS +/* + * IRQ line status macro IRQ_PER_CPU is used + */ +#define ARCH_HAS_IRQ_PER_CPU + static __inline__ int irq_canonicalize (int irq) { diff --git a/include/asm-parisc/irq.h b/include/asm-parisc/irq.h index 75654ba93353..f876bdf22056 100644 --- a/include/asm-parisc/irq.h +++ b/include/asm-parisc/irq.h @@ -26,6 +26,11 @@ #define NR_IRQS (CPU_IRQ_MAX + 1) +/* + * IRQ line status macro IRQ_PER_CPU is used + */ +#define ARCH_HAS_IRQ_PER_CPU + static __inline__ int irq_canonicalize(int irq) { return (irq == 2) ? 9 : irq; diff --git a/include/asm-ppc/irq.h b/include/asm-ppc/irq.h index a244d93ca953..b4b270457edd 100644 --- a/include/asm-ppc/irq.h +++ b/include/asm-ppc/irq.h @@ -19,6 +19,11 @@ #define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ #define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ +/* + * IRQ line status macro IRQ_PER_CPU is used + */ +#define ARCH_HAS_IRQ_PER_CPU + #if defined(CONFIG_40x) #include diff --git a/include/asm-ppc64/irq.h b/include/asm-ppc64/irq.h index 570678b1da95..99782afb4cde 100644 --- a/include/asm-ppc64/irq.h +++ b/include/asm-ppc64/irq.h @@ -33,6 +33,11 @@ #define IRQ_POLARITY_POSITIVE 0x2 /* high level or low->high edge */ #define IRQ_POLARITY_NEGATIVE 0x0 /* low level or high->low edge */ +/* + * IRQ line status macro IRQ_PER_CPU is used + */ +#define ARCH_HAS_IRQ_PER_CPU + #define get_irq_desc(irq) (&irq_desc[(irq)]) /* Define a way to iterate across irqs. */ diff --git a/include/linux/irq.h b/include/linux/irq.h index 4a362b9ec966..69681c3b1f05 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -32,7 +32,12 @@ #define IRQ_WAITING 32 /* IRQ not yet seen - for autodetection */ #define IRQ_LEVEL 64 /* IRQ level triggered */ #define IRQ_MASKED 128 /* IRQ masked - shouldn't be seen again */ -#define IRQ_PER_CPU 256 /* IRQ is per CPU */ +#if defined(ARCH_HAS_IRQ_PER_CPU) +# define IRQ_PER_CPU 256 /* IRQ is per CPU */ +# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) +#else +# define CHECK_IRQ_PER_CPU(var) 0 +#endif /* * Interrupt controller descriptor. This is all we need diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index c29f83c16497..3ff7b925c387 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -111,7 +111,7 @@ fastcall unsigned int __do_IRQ(unsigned int irq, struct pt_regs *regs) unsigned int status; kstat_this_cpu.irqs[irq]++; - if (desc->status & IRQ_PER_CPU) { + if (CHECK_IRQ_PER_CPU(desc->status)) { irqreturn_t action_ret; /* -- cgit v1.2.3 From f23ef184b486ac021b6a471b4e94cfa04860d3b0 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 6 Sep 2005 15:17:29 -0700 Subject: [PATCH] Delete unused do_nanosleep declaration There is no do_nanosleep function so kill it's declaration in . Signed-off-by: Ralf Baechle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 5634497ff5df..c10d4c21c183 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -97,7 +97,6 @@ extern int do_settimeofday(struct timespec *tv); extern int do_sys_settimeofday(struct timespec *tv, struct timezone *tz); extern void clock_was_set(void); // call when ever the clock is set extern int do_posix_clock_monotonic_gettime(struct timespec *tp); -extern long do_nanosleep(struct timespec *t); extern long do_utimes(char __user * filename, struct timeval * times); struct itimerval; extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue); -- cgit v1.2.3 From 2832e9366a1fcd6f76957a42157be041240f994e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Sep 2005 15:17:38 -0700 Subject: [PATCH] remove file.f_maxcount struct file cleanup: f_maxcount has an unique value (INT_MAX). Just use the hard-wired value. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/file_table.c | 1 - fs/read_write.c | 2 +- include/linux/fs.h | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/file_table.c b/fs/file_table.c index 1d3de78e6bc9..43e9e1737de2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -89,7 +89,6 @@ struct file *get_empty_filp(void) rwlock_init(&f->f_owner.lock); /* f->f_version: 0 */ INIT_LIST_HEAD(&f->f_list); - f->f_maxcount = INT_MAX; return f; over: diff --git a/fs/read_write.c b/fs/read_write.c index 563abd09b5c8..b60324aaa2b6 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -188,7 +188,7 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count struct inode *inode; loff_t pos; - if (unlikely(count > file->f_maxcount)) + if (unlikely(count > INT_MAX)) goto Einval; pos = *ppos; if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) diff --git a/include/linux/fs.h b/include/linux/fs.h index 67e6732d4fdc..2036747c7d1f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -594,7 +594,6 @@ struct file { unsigned int f_uid, f_gid; struct file_ra_state f_ra; - size_t f_maxcount; unsigned long f_version; void *f_security; -- cgit v1.2.3 From b149ee2233edf08fb59b11e879a2c5941929bcb8 Mon Sep 17 00:00:00 2001 From: john stultz Date: Tue, 6 Sep 2005 15:17:46 -0700 Subject: [PATCH] NTP: ntp-helper functions This patch cleans up a commonly repeated set of changes to the NTP state variables by adding two helper inline functions: ntp_clear(): Clears the ntp state variables ntp_synced(): Returns 1 if the system is synced with a time server. This was compile tested for alpha, arm, i386, x86-64, ppc64, s390, sparc, sparc64. Signed-off-by: John Stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/time.c | 7 ++----- arch/arm/kernel/time.c | 7 ++----- arch/arm26/kernel/time.c | 7 ++----- arch/cris/arch-v10/kernel/time.c | 2 +- arch/cris/kernel/time.c | 5 +---- arch/frv/kernel/time.c | 7 ++----- arch/h8300/kernel/time.c | 5 +---- arch/i386/kernel/time.c | 7 ++----- arch/m32r/kernel/time.c | 7 ++----- arch/m68k/kernel/time.c | 5 +---- arch/m68knommu/kernel/time.c | 7 ++----- arch/mips/kernel/sysirix.c | 5 +---- arch/mips/kernel/time.c | 7 ++----- arch/mips/sgi-ip27/ip27-timer.c | 2 +- arch/parisc/kernel/time.c | 5 +---- arch/ppc/kernel/time.c | 7 ++----- arch/ppc64/kernel/time.c | 7 ++----- arch/s390/kernel/time.c | 5 +---- arch/sh/kernel/time.c | 7 ++----- arch/sh64/kernel/time.c | 7 ++----- arch/sparc/kernel/pcic.c | 5 +---- arch/sparc/kernel/time.c | 7 ++----- arch/sparc64/kernel/time.c | 2 +- arch/v850/kernel/time.c | 7 ++----- arch/x86_64/kernel/time.c | 7 ++----- arch/xtensa/kernel/time.c | 7 ++----- include/linux/timex.h | 23 +++++++++++++++++++++++ 27 files changed, 65 insertions(+), 111 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/time.c b/arch/alpha/kernel/time.c index 8226c5cd788c..67be50b7d80a 100644 --- a/arch/alpha/kernel/time.c +++ b/arch/alpha/kernel/time.c @@ -149,7 +149,7 @@ irqreturn_t timer_interrupt(int irq, void *dev, struct pt_regs * regs) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 + if (ntp_synced() && xtime.tv_sec > state.last_rtc_update + 660 && xtime.tv_nsec >= 500000 - ((unsigned) TICK_SIZE) / 2 && xtime.tv_nsec <= 500000 + ((unsigned) TICK_SIZE) / 2) { @@ -502,10 +502,7 @@ do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 8880482dcbff..69449a818dcc 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -102,7 +102,7 @@ static unsigned long next_rtc_update; */ static inline void do_set_rtc(void) { - if (time_status & STA_UNSYNC || set_rtc == NULL) + if (!ntp_synced() || set_rtc == NULL) return; if (next_rtc_update && @@ -292,10 +292,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/arm26/kernel/time.c b/arch/arm26/kernel/time.c index 549a6b2e177e..e66aedd02fad 100644 --- a/arch/arm26/kernel/time.c +++ b/arch/arm26/kernel/time.c @@ -114,7 +114,7 @@ static unsigned long next_rtc_update; */ static inline void do_set_rtc(void) { - if (time_status & STA_UNSYNC || set_rtc == NULL) + if (!ntp_synced() || set_rtc == NULL) return; //FIXME - timespec.tv_sec is a time_t not unsigned long @@ -189,10 +189,7 @@ int do_settimeofday(struct timespec *tv) xtime.tv_sec = tv->tv_sec; xtime.tv_nsec = tv->tv_nsec; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/cris/arch-v10/kernel/time.c b/arch/cris/arch-v10/kernel/time.c index 6b7b4e0802e3..dc3dfe9b4a1a 100644 --- a/arch/cris/arch-v10/kernel/time.c +++ b/arch/cris/arch-v10/kernel/time.c @@ -240,7 +240,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * The division here is not time critical since it will run once in * 11 minutes */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - (tick_nsec / 1000) / 2 && (xtime.tv_nsec / 1000) <= 500000 + (tick_nsec / 1000) / 2) { diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index fa2d4323da25..a2d99b4aedcd 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -114,10 +114,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/frv/kernel/time.c b/arch/frv/kernel/time.c index 075db6644694..8d6558b00e44 100644 --- a/arch/frv/kernel/time.c +++ b/arch/frv/kernel/time.c @@ -85,7 +85,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2 @@ -216,10 +216,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/h8300/kernel/time.c b/arch/h8300/kernel/time.c index 8a600218334d..af8c5d2057dd 100644 --- a/arch/h8300/kernel/time.c +++ b/arch/h8300/kernel/time.c @@ -116,10 +116,7 @@ int do_settimeofday(struct timespec *tv) xtime.tv_sec = tv->tv_sec; xtime.tv_nsec = tv->tv_nsec; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 9b94d84a6c3b..eefea7c55008 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -194,10 +194,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; @@ -347,7 +344,7 @@ static void sync_cmos_clock(unsigned long dummy) * This code is run on a timer. If the clock is set, that timer * may not expire at the correct time. Thus, we adjust... */ - if ((time_status & STA_UNSYNC) != 0) + if (!ntp_synced()) /* * Not synced, exit, do not restart a timer (if one is * running, let it run out). diff --git a/arch/m32r/kernel/time.c b/arch/m32r/kernel/time.c index 8a2b77bc5749..539c562cd54d 100644 --- a/arch/m32r/kernel/time.c +++ b/arch/m32r/kernel/time.c @@ -171,10 +171,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -221,7 +218,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * called as close as possible to 500 ms before the new second starts. */ write_seqlock(&xtime_lock); - if ((time_status & STA_UNSYNC) == 0 + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned)TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned)TICK_SIZE) / 2) diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index e47e19588525..4ec95e3cb874 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -166,10 +166,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c index 5c3ca671627c..b17c1ecba966 100644 --- a/arch/m68knommu/kernel/time.c +++ b/arch/m68knommu/kernel/time.c @@ -68,7 +68,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { @@ -178,10 +178,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/mips/kernel/sysirix.c b/arch/mips/kernel/sysirix.c index f3bf0e43b8bb..b46595462717 100644 --- a/arch/mips/kernel/sysirix.c +++ b/arch/mips/kernel/sysirix.c @@ -632,10 +632,7 @@ asmlinkage int irix_stime(int value) write_seqlock_irq(&xtime_lock); xtime.tv_sec = value; xtime.tv_nsec = 0; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); return 0; diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 648c82292ed6..0dd0df7a3b04 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -223,10 +223,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -442,7 +439,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * called as close as possible to 500 ms before the new second starts. */ write_seqlock(&xtime_lock); - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 8c1b96fffa76..cddf1cedf007 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -118,7 +118,7 @@ again: * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to when a second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 6cf7407344ba..7ff67f8e9f8c 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -188,10 +188,7 @@ do_settimeofday (struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); } write_sequnlock_irq(&xtime_lock); clock_was_set(); diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c index bf4ddca5e853..a3c5281a5d2d 100644 --- a/arch/ppc/kernel/time.c +++ b/arch/ppc/kernel/time.c @@ -169,7 +169,7 @@ void timer_interrupt(struct pt_regs * regs) * We should have an rtc call that only sets the minutes and * seconds like on Intel to avoid problems with non UTC clocks. */ - if ( ppc_md.set_rtc_time && (time_status & STA_UNSYNC) == 0 && + if ( ppc_md.set_rtc_time && ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ && jiffies - wall_jiffies == 1) { @@ -271,10 +271,7 @@ int do_settimeofday(struct timespec *tv) */ last_rtc_update = new_sec - 658; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irqrestore(&xtime_lock, flags); clock_was_set(); return 0; diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 91ef95ccda4f..9939c206afa4 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -128,7 +128,7 @@ static __inline__ void timer_check_rtc(void) * We should have an rtc call that only sets the minutes and * seconds like on Intel to avoid problems with non UTC clocks. */ - if ( (time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && abs((xtime.tv_nsec/1000) - (1000000-1000000/HZ)) < 500000/HZ && jiffies - wall_jiffies == 1) { @@ -435,10 +435,7 @@ int do_settimeofday(struct timespec *tv) */ last_rtc_update = new_sec - 658; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); delta_xsec = mulhdu( (tb_last_stamp-do_gtod.varp->tb_orig_stamp), do_gtod.varp->tb_to_xs ); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 8ca485676780..2fd75da15495 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -139,10 +139,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); return 0; diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c index d5f5aedde0a3..02ca69918d7c 100644 --- a/arch/sh/kernel/time.c +++ b/arch/sh/kernel/time.c @@ -215,10 +215,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -252,7 +249,7 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs) * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { diff --git a/arch/sh64/kernel/time.c b/arch/sh64/kernel/time.c index 926c6fc0619c..f4a62a10053c 100644 --- a/arch/sh64/kernel/time.c +++ b/arch/sh64/kernel/time.c @@ -247,10 +247,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -328,7 +325,7 @@ static inline void do_timer_interrupt(int irq, struct pt_regs *regs) * RTC clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { diff --git a/arch/sparc/kernel/pcic.c b/arch/sparc/kernel/pcic.c index 597d3ff6ad68..36a40697b8d6 100644 --- a/arch/sparc/kernel/pcic.c +++ b/arch/sparc/kernel/pcic.c @@ -840,10 +840,7 @@ static int pci_do_settimeofday(struct timespec *tv) xtime.tv_sec = tv->tv_sec; xtime.tv_nsec = tv->tv_nsec; - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); return 0; } diff --git a/arch/sparc/kernel/time.c b/arch/sparc/kernel/time.c index 3b759aefc170..bc015e980341 100644 --- a/arch/sparc/kernel/time.c +++ b/arch/sparc/kernel/time.c @@ -139,7 +139,7 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs * regs) /* Determine when to update the Mostek clock. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { @@ -554,10 +554,7 @@ static int sbus_do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); return 0; } diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c index 362b9c26871b..3f08a32f51a1 100644 --- a/arch/sparc64/kernel/time.c +++ b/arch/sparc64/kernel/time.c @@ -449,7 +449,7 @@ static inline void timer_check_rtc(void) static long last_rtc_update; /* Determine when to update the Mostek clock. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { diff --git a/arch/v850/kernel/time.c b/arch/v850/kernel/time.c index f722a268238a..ea3fd8844ff0 100644 --- a/arch/v850/kernel/time.c +++ b/arch/v850/kernel/time.c @@ -66,7 +66,7 @@ static irqreturn_t timer_interrupt (int irq, void *dummy, struct pt_regs *regs) * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be * called as close as possible to 500 ms before the new second starts. */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec > last_rtc_update + 660 && (xtime.tv_nsec / 1000) >= 500000 - ((unsigned) TICK_SIZE) / 2 && (xtime.tv_nsec / 1000) <= 500000 + ((unsigned) TICK_SIZE) / 2) { @@ -169,10 +169,7 @@ int do_settimeofday(struct timespec *tv) xtime.tv_sec = tv->tv_sec; xtime.tv_nsec = tv->tv_nsec; - time_adjust = 0; /* stop active adjtime () */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq (&xtime_lock); clock_was_set(); diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2b5d9da912a2..7b6abe058253 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c @@ -176,10 +176,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); clock_was_set(); @@ -471,7 +468,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) * off) isn't likely to go away much sooner anyway. */ - if ((~time_status & STA_UNSYNC) && xtime.tv_sec > rtc_update && + if (ntp_synced() && xtime.tv_sec > rtc_update && abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) { set_rtc_mmss(xtime.tv_sec); rtc_update = xtime.tv_sec + 660; diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index e07287db5a40..1ac7d5ce7456 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -122,10 +122,7 @@ int do_settimeofday(struct timespec *tv) set_normalized_timespec(&xtime, sec, nsec); set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); - time_adjust = 0; /* stop active adjtime() */ - time_status |= STA_UNSYNC; - time_maxerror = NTP_PHASE_LIMIT; - time_esterror = NTP_PHASE_LIMIT; + ntp_clear(); write_sequnlock_irq(&xtime_lock); return 0; } @@ -184,7 +181,7 @@ again: next += CCOUNT_PER_JIFFY; do_timer (regs); /* Linux handler in kernel/timer.c */ - if ((time_status & STA_UNSYNC) == 0 && + if (ntp_synced() && xtime.tv_sec - last_rtc_update >= 659 && abs((xtime.tv_nsec/1000)-(1000000-1000000/HZ))<5000000/HZ && jiffies - wall_jiffies == 1) { diff --git a/include/linux/timex.h b/include/linux/timex.h index 74fdd07d3792..7e050a2cc35b 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -260,6 +260,29 @@ extern long pps_calcnt; /* calibration intervals */ extern long pps_errcnt; /* calibration errors */ extern long pps_stbcnt; /* stability limit exceeded */ +/** + * ntp_clear - Clears the NTP state variables + * + * Must be called while holding a write on the xtime_lock + */ +static inline void ntp_clear(void) +{ + time_adjust = 0; /* stop active adjtime() */ + time_status |= STA_UNSYNC; + time_maxerror = NTP_PHASE_LIMIT; + time_esterror = NTP_PHASE_LIMIT; +} + +/** + * ntp_synced - Returns 1 if the NTP status is not UNSYNC + * + */ +static inline int ntp_synced(void) +{ + return !(time_status & STA_UNSYNC); +} + + #ifdef CONFIG_TIME_INTERPOLATION #define TIME_SOURCE_CPU 0 -- cgit v1.2.3 From 9c45817f41af987277353e463c78a1c6beb37da2 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Tue, 6 Sep 2005 15:17:47 -0700 Subject: [PATCH] Remove non-arch consumers of asm/segment.h asm/segment.h varies greatly on different architectures but is clearly deprecated. Removing all non-architecture consumers will make it easier for us to get ride of asm/segment.h all together. Signed-off-by: Kumar Gala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/isdn/hisax/hisax.h | 1 - drivers/media/video/adv7170.c | 1 - drivers/media/video/adv7175.c | 1 - drivers/media/video/bt819.c | 1 - drivers/media/video/bt856.c | 1 - drivers/media/video/saa7111.c | 1 - drivers/media/video/saa7114.c | 1 - drivers/media/video/saa7185.c | 1 - drivers/serial/68328serial.c | 1 - drivers/serial/crisv10.c | 1 - drivers/serial/icom.c | 1 - drivers/serial/mcfserial.c | 1 - drivers/video/q40fb.c | 1 - include/linux/isdn.h | 1 - sound/oss/os.h | 3 --- 15 files changed, 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/isdn/hisax/hisax.h b/drivers/isdn/hisax/hisax.h index 17cf7663c582..6eb96cba4d29 100644 --- a/drivers/isdn/hisax/hisax.h +++ b/drivers/isdn/hisax/hisax.h @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c index 52e32f05d625..1ca2b67aedfb 100644 --- a/drivers/media/video/adv7170.c +++ b/drivers/media/video/adv7170.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c index b5ed9544bdea..173bca1e0295 100644 --- a/drivers/media/video/adv7175.c +++ b/drivers/media/video/adv7175.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c index c6cfa7c48b04..3ee0afca76a7 100644 --- a/drivers/media/video/bt819.c +++ b/drivers/media/video/bt819.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c index c13d28658868..8eb871d0e85b 100644 --- a/drivers/media/video/bt856.c +++ b/drivers/media/video/bt856.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c index f18df53d98ff..fe8a5e453969 100644 --- a/drivers/media/video/saa7111.c +++ b/drivers/media/video/saa7111.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c index e0c70f54f073..d9f50e2f7b92 100644 --- a/drivers/media/video/saa7114.c +++ b/drivers/media/video/saa7114.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c index e93412f4407c..132aa7943c16 100644 --- a/drivers/media/video/saa7185.c +++ b/drivers/media/video/saa7185.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include diff --git a/drivers/serial/68328serial.c b/drivers/serial/68328serial.c index 9097f2f7b12a..2efb317153ce 100644 --- a/drivers/serial/68328serial.c +++ b/drivers/serial/68328serial.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include diff --git a/drivers/serial/crisv10.c b/drivers/serial/crisv10.c index 5690594b257b..40d3e7139cfe 100644 --- a/drivers/serial/crisv10.c +++ b/drivers/serial/crisv10.c @@ -446,7 +446,6 @@ static char *serial_version = "$Revision: 1.25 $"; #include #include #include -#include #include #include diff --git a/drivers/serial/icom.c b/drivers/serial/icom.c index 79f8df4d66b7..eb31125c6a30 100644 --- a/drivers/serial/icom.c +++ b/drivers/serial/icom.c @@ -56,7 +56,6 @@ #include #include -#include #include #include #include diff --git a/drivers/serial/mcfserial.c b/drivers/serial/mcfserial.c index 8c40167778de..43b03c55f453 100644 --- a/drivers/serial/mcfserial.c +++ b/drivers/serial/mcfserial.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/video/q40fb.c b/drivers/video/q40fb.c index 71b69da0c40d..162012bb9264 100644 --- a/drivers/video/q40fb.c +++ b/drivers/video/q40fb.c @@ -21,7 +21,6 @@ #include #include -#include #include #include #include diff --git a/include/linux/isdn.h b/include/linux/isdn.h index 862083eb58ab..53eaee96065b 100644 --- a/include/linux/isdn.h +++ b/include/linux/isdn.h @@ -150,7 +150,6 @@ typedef struct { #include #include #include -#include #include #include #include diff --git a/sound/oss/os.h b/sound/oss/os.h index d6b96297835c..80dce329cc3a 100644 --- a/sound/oss/os.h +++ b/sound/oss/os.h @@ -19,9 +19,6 @@ #include #include #include -#ifdef __alpha__ -#include -#endif #include #include #include -- cgit v1.2.3 From 7ea6040b0eff07d3a9a4e2d248ac137c6ad02d42 Mon Sep 17 00:00:00 2001 From: John McCutchan Date: Tue, 6 Sep 2005 15:18:02 -0700 Subject: [PATCH] inotify: fix event loss on hardlinked files People have run into a problem when they do this: watch (file1, all_events); watch (file2, some_events); if file2 is a hard link to file1, some events will be missed because by default we replace the mask. The patch below adds a flag IN_MASK_ADD which will cause inotify to add to the existing mask if present. Signed-off-by: John McCutchan Signed-off-by: Robert Love Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 9 ++++++++- include/linux/inotify.h | 1 + 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/fs/inotify.c b/fs/inotify.c index 2fd97ef547ff..a37e9fb1da58 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -931,6 +931,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) struct nameidata nd; struct file *filp; int ret, fput_needed; + int mask_add = 0; filp = fget_light(fd, &fput_needed); if (unlikely(!filp)) @@ -953,6 +954,9 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) down(&inode->inotify_sem); down(&dev->sem); + if (mask & IN_MASK_ADD) + mask_add = 1; + /* don't let user-space set invalid bits: we don't want flags set */ mask &= IN_ALL_EVENTS; if (unlikely(!mask)) { @@ -966,7 +970,10 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) */ old = inode_find_dev(inode, dev); if (unlikely(old)) { - old->mask = mask; + if (mask_add) + old->mask |= mask; + else + old->mask = mask; ret = old->wd; goto out; } diff --git a/include/linux/inotify.h b/include/linux/inotify.h index 93bb3afe646b..ee5b239092ed 100644 --- a/include/linux/inotify.h +++ b/include/linux/inotify.h @@ -47,6 +47,7 @@ struct inotify_event { #define IN_MOVE (IN_MOVED_FROM | IN_MOVED_TO) /* moves */ /* special flags */ +#define IN_MASK_ADD 0x20000000 /* add to the mask of an already existing watch */ #define IN_ISDIR 0x40000000 /* event occurred against dir */ #define IN_ONESHOT 0x80000000 /* only send event once */ -- cgit v1.2.3 From f90b1d2f1aaaa40c6519a32e69615edc25bb97d5 Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:10 -0700 Subject: [PATCH] cpusets: new __GFP_HARDWALL flag Add another GFP flag: __GFP_HARDWALL. A subsequent "cpuset_zone_allowed" patch will use this flag to mark GFP_USER allocations, and distinguish them from GFP_KERNEL allocations. Allocations (such as GFP_USER) marked GFP_HARDWALL are constrainted to the current tasks cpuset. Other allocations (such as GFP_KERNEL) can steal from the possibly larger nearest mem_exclusive cpuset ancestor, if memory is tight on every node in the current cpuset. This patch collides with Mel Gorman's patch to reduce fragmentation in the standard buddy allocator, which adds two GFP flags. This was discussed on linux-mm in July. Most likely, one of his flags for user reclaimable memory can be the same as my __GFP_HARDWALL flag, under some generic name meaning its user address space memory. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 7c7400137e97..4dc990f3b5cc 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,6 +40,7 @@ struct vm_area_struct; #define __GFP_ZERO 0x8000u /* Return zeroed page on success */ #define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */ #define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */ +#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */ #define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) @@ -48,14 +49,15 @@ struct vm_area_struct; #define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \ __GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \ __GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \ - __GFP_NOMEMALLOC|__GFP_NORECLAIM) + __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL) #define GFP_ATOMIC (__GFP_HIGH) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HIGHMEM) +#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \ + __GFP_HIGHMEM) /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */ -- cgit v1.2.3 From 9bf2229f8817677127a60c177aefce1badd22d7b Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:12 -0700 Subject: [PATCH] cpusets: formalize intermediate GFP_KERNEL containment This patch makes use of the previously underutilized cpuset flag 'mem_exclusive' to provide what amounts to another layer of memory placement resolution. With this patch, there are now the following four layers of memory placement available: 1) The whole system (interrupt and GFP_ATOMIC allocations can use this), 2) The nearest enclosing mem_exclusive cpuset (GFP_KERNEL allocations can use), 3) The current tasks cpuset (GFP_USER allocations constrained to here), and 4) Specific node placement, using mbind and set_mempolicy. These nest - each layer is a subset (same or within) of the previous. Layer (2) above is new, with this patch. The call used to check whether a zone (its node, actually) is in a cpuset (in its mems_allowed, actually) is extended to take a gfp_mask argument, and its logic is extended, in the case that __GFP_HARDWALL is not set in the flag bits, to look up the cpuset hierarchy for the nearest enclosing mem_exclusive cpuset, to determine if placement is allowed. The definition of GFP_USER, which used to be identical to GFP_KERNEL, is changed to also set the __GFP_HARDWALL bit, in the previous cpuset_gfp_hardwall_flag patch. GFP_ATOMIC and GFP_KERNEL allocations will stay within the current tasks cpuset, so long as any node therein is not too tight on memory, but will escape to the larger layer, if need be. The intended use is to allow something like a batch manager to handle several jobs, each job in its own cpuset, but using common kernel memory for caches and such. Swapper and oom_kill activity is also constrained to Layer (2). A task in or below one mem_exclusive cpuset should not cause swapping on nodes in another non-overlapping mem_exclusive cpuset, nor provoke oom_killing of a task in another such cpuset. Heavy use of kernel memory for i/o caching and such by one job should not impact the memory available to jobs in other non-overlapping mem_exclusive cpusets. This patch enables providing hardwall, inescapable cpusets for memory allocations of each job, while sharing kernel memory allocations between several jobs, in an enclosing mem_exclusive cpuset. Like Dinakar's patch earlier to enable administering sched domains using the cpu_exclusive flag, this patch also provides a useful meaning to a cpuset flag that had previously done nothing much useful other than restrict what cpuset configurations were allowed. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cpusets.txt | 12 +++++++ include/linux/cpuset.h | 5 +-- kernel/cpuset.c | 80 ++++++++++++++++++++++++++++++++++++++++++----- mm/page_alloc.c | 16 ++++++---- mm/vmscan.c | 8 ++--- 5 files changed, 101 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index ad944c060312..47f4114fbf54 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -60,6 +60,18 @@ all of the cpus in the system. This removes any overhead due to load balancing code trying to pull tasks outside of the cpu exclusive cpuset only to be prevented by the tasks' cpus_allowed mask. +A cpuset that is mem_exclusive restricts kernel allocations for +page, buffer and other data commonly shared by the kernel across +multiple users. All cpusets, whether mem_exclusive or not, restrict +allocations of memory for user space. This enables configuring a +system so that several independent jobs can share common kernel +data, such as file system pages, while isolating each jobs user +allocation in its own cpuset. To do this, construct a large +mem_exclusive cpuset to hold all the jobs, and construct child, +non-mem_exclusive cpusets for each individual job. Only a small +amount of typical kernel memory, such as requests from interrupt +handlers, is allowed to be taken outside even a mem_exclusive cpuset. + User level code may create and destroy cpusets by name in the cpuset virtual file system, manage the attributes and permissions of these cpusets and which CPUs and Memory Nodes are assigned to each cpuset, diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 3438233305a3..1fe1c3ebad30 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -23,7 +23,7 @@ void cpuset_init_current_mems_allowed(void); void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); -int cpuset_zone_allowed(struct zone *z); +extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -48,7 +48,8 @@ static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 1; } -static inline int cpuset_zone_allowed(struct zone *z) +static inline int cpuset_zone_allowed(struct zone *z, + unsigned int __nocast gfp_mask) { return 1; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8ab1b4e518b8..214806deca99 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1611,17 +1611,81 @@ int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) return 0; } +/* + * nearest_exclusive_ancestor() - Returns the nearest mem_exclusive + * ancestor to the specified cpuset. Call while holding cpuset_sem. + * If no ancestor is mem_exclusive (an unusual configuration), then + * returns the root cpuset. + */ +static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) +{ + while (!is_mem_exclusive(cs) && cs->parent) + cs = cs->parent; + return cs; +} + /** - * cpuset_zone_allowed - is zone z allowed in current->mems_allowed - * @z: zone in question + * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? + * @z: is this zone on an allowed node? + * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) * - * Is zone z allowed in current->mems_allowed, or is - * the CPU in interrupt context? (zone is always allowed in this case) - */ -int cpuset_zone_allowed(struct zone *z) + * If we're in interrupt, yes, we can always allocate. If zone + * z's node is in our tasks mems_allowed, yes. If it's not a + * __GFP_HARDWALL request and this zone's nodes is in the nearest + * mem_exclusive cpuset ancestor to this tasks cpuset, yes. + * Otherwise, no. + * + * GFP_USER allocations are marked with the __GFP_HARDWALL bit, + * and do not allow allocations outside the current tasks cpuset. + * GFP_KERNEL allocations are not so marked, so can escape to the + * nearest mem_exclusive ancestor cpuset. + * + * Scanning up parent cpusets requires cpuset_sem. The __alloc_pages() + * routine only calls here with __GFP_HARDWALL bit _not_ set if + * it's a GFP_KERNEL allocation, and all nodes in the current tasks + * mems_allowed came up empty on the first pass over the zonelist. + * So only GFP_KERNEL allocations, if all nodes in the cpuset are + * short of memory, might require taking the cpuset_sem semaphore. + * + * The first loop over the zonelist in mm/page_alloc.c:__alloc_pages() + * calls here with __GFP_HARDWALL always set in gfp_mask, enforcing + * hardwall cpusets - no allocation on a node outside the cpuset is + * allowed (unless in interrupt, of course). + * + * The second loop doesn't even call here for GFP_ATOMIC requests + * (if the __alloc_pages() local variable 'wait' is set). That check + * and the checks below have the combined affect in the second loop of + * the __alloc_pages() routine that: + * in_interrupt - any node ok (current task context irrelevant) + * GFP_ATOMIC - any node ok + * GFP_KERNEL - any node in enclosing mem_exclusive cpuset ok + * GFP_USER - only nodes in current tasks mems allowed ok. + **/ + +int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask) { - return in_interrupt() || - node_isset(z->zone_pgdat->node_id, current->mems_allowed); + int node; /* node that zone z is on */ + const struct cpuset *cs; /* current cpuset ancestors */ + int allowed = 1; /* is allocation in zone z allowed? */ + + if (in_interrupt()) + return 1; + node = z->zone_pgdat->node_id; + if (node_isset(node, current->mems_allowed)) + return 1; + if (gfp_mask & __GFP_HARDWALL) /* If hardwall request, stop here */ + return 0; + + /* Not hardwall and node outside mems_allowed: scan up cpusets */ + down(&cpuset_sem); + cs = current->cpuset; + if (!cs) + goto done; /* current task exiting */ + cs = nearest_exclusive_ancestor(cs); + allowed = node_isset(node, cs->mems_allowed); +done: + up(&cpuset_sem); + return allowed; } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 14d7032c1d12..3974fd81d27c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -806,11 +806,14 @@ __alloc_pages(unsigned int __nocast gfp_mask, unsigned int order, classzone_idx = zone_idx(zones[0]); restart: - /* Go through the zonelist once, looking for a zone with enough free */ + /* + * Go through the zonelist once, looking for a zone with enough free. + * See also cpuset_zone_allowed() comment in kernel/cpuset.c. + */ for (i = 0; (z = zones[i]) != NULL; i++) { int do_reclaim = should_reclaim_zone(z, gfp_mask); - if (!cpuset_zone_allowed(z)) + if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) continue; /* @@ -845,6 +848,7 @@ zone_reclaim_retry: * * This is the last chance, in general, before the goto nopage. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. + * See also cpuset_zone_allowed() comment in kernel/cpuset.c. */ for (i = 0; (z = zones[i]) != NULL; i++) { if (!zone_watermark_ok(z, order, z->pages_min, @@ -852,7 +856,7 @@ zone_reclaim_retry: gfp_mask & __GFP_HIGH)) continue; - if (wait && !cpuset_zone_allowed(z)) + if (wait && !cpuset_zone_allowed(z, gfp_mask)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -867,7 +871,7 @@ zone_reclaim_retry: if (!(gfp_mask & __GFP_NOMEMALLOC)) { /* go through the zonelist yet again, ignoring mins */ for (i = 0; (z = zones[i]) != NULL; i++) { - if (!cpuset_zone_allowed(z)) + if (!cpuset_zone_allowed(z, gfp_mask)) continue; page = buffered_rmqueue(z, order, gfp_mask); if (page) @@ -903,7 +907,7 @@ rebalance: gfp_mask & __GFP_HIGH)) continue; - if (!cpuset_zone_allowed(z)) + if (!cpuset_zone_allowed(z, gfp_mask)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -922,7 +926,7 @@ rebalance: classzone_idx, 0, 0)) continue; - if (!cpuset_zone_allowed(z)) + if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) continue; page = buffered_rmqueue(z, order, gfp_mask); diff --git a/mm/vmscan.c b/mm/vmscan.c index 0095533cdde9..a740778f688d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -894,7 +894,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc) if (zone->present_pages == 0) continue; - if (!cpuset_zone_allowed(zone)) + if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; zone->temp_priority = sc->priority; @@ -940,7 +940,7 @@ int try_to_free_pages(struct zone **zones, unsigned int gfp_mask) for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; - if (!cpuset_zone_allowed(zone)) + if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; zone->temp_priority = DEF_PRIORITY; @@ -986,7 +986,7 @@ out: for (i = 0; zones[i] != 0; i++) { struct zone *zone = zones[i]; - if (!cpuset_zone_allowed(zone)) + if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) continue; zone->prev_priority = zone->temp_priority; @@ -1256,7 +1256,7 @@ void wakeup_kswapd(struct zone *zone, int order) return; if (pgdat->kswapd_max_order < order) pgdat->kswapd_max_order = order; - if (!cpuset_zone_allowed(zone)) + if (!cpuset_zone_allowed(zone, __GFP_HARDWALL)) return; if (!waitqueue_active(&zone->zone_pgdat->kswapd_wait)) return; -- cgit v1.2.3 From ef08e3b4981aebf2ba9bd7025ef7210e8eec07ce Mon Sep 17 00:00:00 2001 From: Paul Jackson Date: Tue, 6 Sep 2005 15:18:13 -0700 Subject: [PATCH] cpusets: confine oom_killer to mem_exclusive cpuset Now the real motivation for this cpuset mem_exclusive patch series seems trivial. This patch keeps a task in or under one mem_exclusive cpuset from provoking an oom kill of a task under a non-overlapping mem_exclusive cpuset. Since only interrupt and GFP_ATOMIC allocations are allowed to escape mem_exclusive containment, there is little to gain from oom killing a task under a non-overlapping mem_exclusive cpuset, as almost all kernel and user memory allocation must come from disjoint memory nodes. This patch enables configuring a system so that a runaway job under one mem_exclusive cpuset cannot cause the killing of a job in another such cpuset that might be using very high compute and memory resources for a prolonged time. Signed-off-by: Paul Jackson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ kernel/cpuset.c | 33 +++++++++++++++++++++++++++++++++ mm/oom_kill.c | 5 +++++ 3 files changed, 44 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1fe1c3ebad30..24062a1dbf61 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -24,6 +24,7 @@ void cpuset_update_current_mems_allowed(void); void cpuset_restrict_to_mems_allowed(unsigned long *nodes); int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); extern int cpuset_zone_allowed(struct zone *z, unsigned int __nocast gfp_mask); +extern int cpuset_excl_nodes_overlap(const struct task_struct *p); extern struct file_operations proc_cpuset_operations; extern char *cpuset_task_status_allowed(struct task_struct *task, char *buffer); @@ -54,6 +55,11 @@ static inline int cpuset_zone_allowed(struct zone *z, return 1; } +static inline int cpuset_excl_nodes_overlap(const struct task_struct *p) +{ + return 1; +} + static inline char *cpuset_task_status_allowed(struct task_struct *task, char *buffer) { diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 214806deca99..40c6d801dd66 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -1688,6 +1688,39 @@ done: return allowed; } +/** + * cpuset_excl_nodes_overlap - Do we overlap @p's mem_exclusive ancestors? + * @p: pointer to task_struct of some other task. + * + * Description: Return true if the nearest mem_exclusive ancestor + * cpusets of tasks @p and current overlap. Used by oom killer to + * determine if task @p's memory usage might impact the memory + * available to the current task. + * + * Acquires cpuset_sem - not suitable for calling from a fast path. + **/ + +int cpuset_excl_nodes_overlap(const struct task_struct *p) +{ + const struct cpuset *cs1, *cs2; /* my and p's cpuset ancestors */ + int overlap = 0; /* do cpusets overlap? */ + + down(&cpuset_sem); + cs1 = current->cpuset; + if (!cs1) + goto done; /* current task exiting */ + cs2 = p->cpuset; + if (!cs2) + goto done; /* task p is exiting */ + cs1 = nearest_exclusive_ancestor(cs1); + cs2 = nearest_exclusive_ancestor(cs2); + overlap = nodes_intersects(cs1->mems_allowed, cs2->mems_allowed); +done: + up(&cpuset_sem); + + return overlap; +} + /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3a1d46502938..5ec8da12cfd9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -20,6 +20,7 @@ #include #include #include +#include /* #define DEBUG */ @@ -152,6 +153,10 @@ static struct task_struct * select_bad_process(void) continue; if (p->oomkilladj == OOM_DISABLE) continue; + /* If p's nodes don't overlap ours, it won't help to kill p. */ + if (!cpuset_excl_nodes_overlap(p)) + continue; + /* * This is in the process of releasing memory so for wait it * to finish before killing some other task by mistake. -- cgit v1.2.3 From 9c1cfda20a508b181bdda8c0045f7c0c333880a5 Mon Sep 17 00:00:00 2001 From: John Hawkes Date: Tue, 6 Sep 2005 15:18:14 -0700 Subject: [PATCH] cpusets: Move the ia64 domain setup code to the generic code Signed-off-by: John Hawkes Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/Makefile | 2 +- arch/ia64/kernel/domain.c | 444 ------------------------------------------- include/asm-ia64/processor.h | 3 - include/asm-ia64/topology.h | 23 --- include/linux/sched.h | 7 - include/linux/topology.h | 23 +++ kernel/sched.c | 290 ++++++++++++++++++++++------ 7 files changed, 260 insertions(+), 532 deletions(-) delete mode 100644 arch/ia64/kernel/domain.c (limited to 'include/linux') diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile index b242594be55b..307514f7a282 100644 --- a/arch/ia64/kernel/Makefile +++ b/arch/ia64/kernel/Makefile @@ -16,7 +16,7 @@ obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o obj-$(CONFIG_IA64_PALINFO) += palinfo.o obj-$(CONFIG_IOSAPIC) += iosapic.o obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o +obj-$(CONFIG_SMP) += smp.o smpboot.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o obj-$(CONFIG_IA64_CYCLONE) += cyclone.o diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c deleted file mode 100644 index e907109983f1..000000000000 --- a/arch/ia64/kernel/domain.c +++ /dev/null @@ -1,444 +0,0 @@ -/* - * arch/ia64/kernel/domain.c - * Architecture specific sched-domains builder. - * - * Copyright (C) 2004 Jesse Barnes - * Copyright (C) 2004 Silicon Graphics, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include - -#define SD_NODES_PER_DOMAIN 16 - -#ifdef CONFIG_NUMA -/** - * find_next_best_node - find the next node to include in a sched_domain - * @node: node whose sched_domain we're building - * @used_nodes: nodes already in the sched_domain - * - * Find the next node to include in a given scheduling domain. Simply - * finds the closest node not already in the @used_nodes map. - * - * Should use nodemask_t. - */ -static int find_next_best_node(int node, unsigned long *used_nodes) -{ - int i, n, val, min_val, best_node = 0; - - min_val = INT_MAX; - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Start at @node */ - n = (node + i) % MAX_NUMNODES; - - if (!nr_cpus_node(n)) - continue; - - /* Skip already used nodes */ - if (test_bit(n, used_nodes)) - continue; - - /* Simple min distance search */ - val = node_distance(node, n); - - if (val < min_val) { - min_val = val; - best_node = n; - } - } - - set_bit(best_node, used_nodes); - return best_node; -} - -/** - * sched_domain_node_span - get a cpumask for a node's sched_domain - * @node: node whose cpumask we're constructing - * @size: number of nodes to include in this span - * - * Given a node, construct a good cpumask for its sched_domain to span. It - * should be one that prevents unnecessary balancing, but also spreads tasks - * out optimally. - */ -static cpumask_t sched_domain_node_span(int node) -{ - int i; - cpumask_t span, nodemask; - DECLARE_BITMAP(used_nodes, MAX_NUMNODES); - - cpus_clear(span); - bitmap_zero(used_nodes, MAX_NUMNODES); - - nodemask = node_to_cpumask(node); - cpus_or(span, span, nodemask); - set_bit(node, used_nodes); - - for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { - int next_node = find_next_best_node(node, used_nodes); - nodemask = node_to_cpumask(next_node); - cpus_or(span, span, nodemask); - } - - return span; -} -#endif - -/* - * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we - * can switch it on easily if needed. - */ -#ifdef CONFIG_SCHED_SMT -static DEFINE_PER_CPU(struct sched_domain, cpu_domains); -static struct sched_group sched_group_cpus[NR_CPUS]; -static int cpu_to_cpu_group(int cpu) -{ - return cpu; -} -#endif - -static DEFINE_PER_CPU(struct sched_domain, phys_domains); -static struct sched_group sched_group_phys[NR_CPUS]; -static int cpu_to_phys_group(int cpu) -{ -#ifdef CONFIG_SCHED_SMT - return first_cpu(cpu_sibling_map[cpu]); -#else - return cpu; -#endif -} - -#ifdef CONFIG_NUMA -/* - * The init_sched_build_groups can't handle what we want to do with node - * groups, so roll our own. Now each node has its own list of groups which - * gets dynamically allocated. - */ -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static struct sched_group **sched_group_nodes_bycpu[NR_CPUS]; - -static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); -static struct sched_group *sched_group_allnodes_bycpu[NR_CPUS]; - -static int cpu_to_allnodes_group(int cpu) -{ - return cpu_to_node(cpu); -} -#endif - -/* - * Build sched domains for a given set of cpus and attach the sched domains - * to the individual cpus - */ -void build_sched_domains(const cpumask_t *cpu_map) -{ - int i; -#ifdef CONFIG_NUMA - struct sched_group **sched_group_nodes = NULL; - struct sched_group *sched_group_allnodes = NULL; - - /* - * Allocate the per-node list of sched groups - */ - sched_group_nodes = kmalloc(sizeof(struct sched_group*)*MAX_NUMNODES, - GFP_ATOMIC); - if (!sched_group_nodes) { - printk(KERN_WARNING "Can not alloc sched group node list\n"); - return; - } - sched_group_nodes_bycpu[first_cpu(*cpu_map)] = sched_group_nodes; -#endif - - /* - * Set up domains for cpus specified by the cpu_map. - */ - for_each_cpu_mask(i, *cpu_map) { - int group; - struct sched_domain *sd = NULL, *p; - cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); - - cpus_and(nodemask, nodemask, *cpu_map); - -#ifdef CONFIG_NUMA - if (cpus_weight(*cpu_map) - > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { - if (!sched_group_allnodes) { - sched_group_allnodes - = kmalloc(sizeof(struct sched_group) - * MAX_NUMNODES, - GFP_KERNEL); - if (!sched_group_allnodes) { - printk(KERN_WARNING - "Can not alloc allnodes sched group\n"); - break; - } - sched_group_allnodes_bycpu[i] - = sched_group_allnodes; - } - sd = &per_cpu(allnodes_domains, i); - *sd = SD_ALLNODES_INIT; - sd->span = *cpu_map; - group = cpu_to_allnodes_group(i); - sd->groups = &sched_group_allnodes[group]; - p = sd; - } else - p = NULL; - - sd = &per_cpu(node_domains, i); - *sd = SD_NODE_INIT; - sd->span = sched_domain_node_span(cpu_to_node(i)); - sd->parent = p; - cpus_and(sd->span, sd->span, *cpu_map); -#endif - - p = sd; - sd = &per_cpu(phys_domains, i); - group = cpu_to_phys_group(i); - *sd = SD_CPU_INIT; - sd->span = nodemask; - sd->parent = p; - sd->groups = &sched_group_phys[group]; - -#ifdef CONFIG_SCHED_SMT - p = sd; - sd = &per_cpu(cpu_domains, i); - group = cpu_to_cpu_group(i); - *sd = SD_SIBLING_INIT; - sd->span = cpu_sibling_map[i]; - cpus_and(sd->span, sd->span, *cpu_map); - sd->parent = p; - sd->groups = &sched_group_cpus[group]; -#endif - } - -#ifdef CONFIG_SCHED_SMT - /* Set up CPU (sibling) groups */ - for_each_cpu_mask(i, *cpu_map) { - cpumask_t this_sibling_map = cpu_sibling_map[i]; - cpus_and(this_sibling_map, this_sibling_map, *cpu_map); - if (i != first_cpu(this_sibling_map)) - continue; - - init_sched_build_groups(sched_group_cpus, this_sibling_map, - &cpu_to_cpu_group); - } -#endif - - /* Set up physical groups */ - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - init_sched_build_groups(sched_group_phys, nodemask, - &cpu_to_phys_group); - } - -#ifdef CONFIG_NUMA - if (sched_group_allnodes) - init_sched_build_groups(sched_group_allnodes, *cpu_map, - &cpu_to_allnodes_group); - - for (i = 0; i < MAX_NUMNODES; i++) { - /* Set up node groups */ - struct sched_group *sg, *prev; - cpumask_t nodemask = node_to_cpumask(i); - cpumask_t domainspan; - cpumask_t covered = CPU_MASK_NONE; - int j; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) { - sched_group_nodes[i] = NULL; - continue; - } - - domainspan = sched_domain_node_span(i); - cpus_and(domainspan, domainspan, *cpu_map); - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - sched_group_nodes[i] = sg; - for_each_cpu_mask(j, nodemask) { - struct sched_domain *sd; - sd = &per_cpu(node_domains, j); - sd->groups = sg; - if (sd->groups == NULL) { - /* Turn off balancing if we have no groups */ - sd->flags = 0; - } - } - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", i); - continue; - } - sg->cpu_power = 0; - sg->cpumask = nodemask; - cpus_or(covered, covered, nodemask); - prev = sg; - - for (j = 0; j < MAX_NUMNODES; j++) { - cpumask_t tmp, notcovered; - int n = (i + j) % MAX_NUMNODES; - - cpus_complement(notcovered, covered); - cpus_and(tmp, notcovered, *cpu_map); - cpus_and(tmp, tmp, domainspan); - if (cpus_empty(tmp)) - break; - - nodemask = node_to_cpumask(n); - cpus_and(tmp, tmp, nodemask); - if (cpus_empty(tmp)) - continue; - - sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); - if (!sg) { - printk(KERN_WARNING - "Can not alloc domain group for node %d\n", j); - break; - } - sg->cpu_power = 0; - sg->cpumask = tmp; - cpus_or(covered, covered, tmp); - prev->next = sg; - prev = sg; - } - prev->next = sched_group_nodes[i]; - } -#endif - - /* Calculate CPU power for physical packages and nodes */ - for_each_cpu_mask(i, *cpu_map) { - int power; - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); - power = SCHED_LOAD_SCALE; - sd->groups->cpu_power = power; -#endif - - sd = &per_cpu(phys_domains, i); - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - -#ifdef CONFIG_NUMA - sd = &per_cpu(allnodes_domains, i); - if (sd->groups) { - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - sd->groups->cpu_power = power; - } -#endif - } - -#ifdef CONFIG_NUMA - for (i = 0; i < MAX_NUMNODES; i++) { - struct sched_group *sg = sched_group_nodes[i]; - int j; - - if (sg == NULL) - continue; -next_sg: - for_each_cpu_mask(j, sg->cpumask) { - struct sched_domain *sd; - int power; - - sd = &per_cpu(phys_domains, j); - if (j != first_cpu(sd->groups->cpumask)) { - /* - * Only add "power" once for each - * physical package. - */ - continue; - } - power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * - (cpus_weight(sd->groups->cpumask)-1) / 10; - - sg->cpu_power += power; - } - sg = sg->next; - if (sg != sched_group_nodes[i]) - goto next_sg; - } -#endif - - /* Attach the domains */ - for_each_cpu_mask(i, *cpu_map) { - struct sched_domain *sd; -#ifdef CONFIG_SCHED_SMT - sd = &per_cpu(cpu_domains, i); -#else - sd = &per_cpu(phys_domains, i); -#endif - cpu_attach_domain(sd, i); - } -} -/* - * Set up scheduler domains and groups. Callers must hold the hotplug lock. - */ -void arch_init_sched_domains(const cpumask_t *cpu_map) -{ - cpumask_t cpu_default_map; - - /* - * Setup mask for cpus without special case scheduling requirements. - * For now this just excludes isolated cpus, but could be used to - * exclude other special cases in the future. - */ - cpus_andnot(cpu_default_map, *cpu_map, cpu_isolated_map); - - build_sched_domains(&cpu_default_map); -} - -void arch_destroy_sched_domains(const cpumask_t *cpu_map) -{ -#ifdef CONFIG_NUMA - int i; - int cpu; - - for_each_cpu_mask(cpu, *cpu_map) { - struct sched_group *sched_group_allnodes - = sched_group_allnodes_bycpu[cpu]; - struct sched_group **sched_group_nodes - = sched_group_nodes_bycpu[cpu]; - - if (sched_group_allnodes) { - kfree(sched_group_allnodes); - sched_group_allnodes_bycpu[cpu] = NULL; - } - - if (!sched_group_nodes) - continue; - - for (i = 0; i < MAX_NUMNODES; i++) { - cpumask_t nodemask = node_to_cpumask(i); - struct sched_group *oldsg, *sg = sched_group_nodes[i]; - - cpus_and(nodemask, nodemask, *cpu_map); - if (cpus_empty(nodemask)) - continue; - - if (sg == NULL) - continue; - sg = sg->next; -next_sg: - oldsg = sg; - sg = sg->next; - kfree(oldsg); - if (oldsg != sched_group_nodes[i]) - goto next_sg; - } - kfree(sched_group_nodes); - sched_group_nodes_bycpu[cpu] = NULL; - } -#endif -} diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h index 91bbd1f22461..94e07e727395 100644 --- a/include/asm-ia64/processor.h +++ b/include/asm-ia64/processor.h @@ -20,9 +20,6 @@ #include #include -/* Our arch specific arch_init_sched_domain is in arch/ia64/kernel/domain.c */ -#define ARCH_HAS_SCHED_DOMAIN - #define IA64_NUM_DBG_REGS 8 /* * Limits for PMC and PMD are set to less than maximum architected values diff --git a/include/asm-ia64/topology.h b/include/asm-ia64/topology.h index 399bc29729fd..a9f738bf18a7 100644 --- a/include/asm-ia64/topology.h +++ b/include/asm-ia64/topology.h @@ -98,29 +98,6 @@ void build_cpu_to_node_map(void); .nr_balance_failed = 0, \ } -/* sched_domains SD_ALLNODES_INIT for IA64 NUMA machines */ -#define SD_ALLNODES_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 64, \ - .max_interval = 64*num_online_cpus(), \ - .busy_factor = 128, \ - .imbalance_pct = 133, \ - .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ - .busy_idx = 3, \ - .idle_idx = 3, \ - .newidle_idx = 0, /* unused */ \ - .wake_idx = 0, /* unused */ \ - .forkexec_idx = 0, /* unused */ \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 64, \ - .nr_balance_failed = 0, \ -} - #endif /* CONFIG_NUMA */ #include diff --git a/include/linux/sched.h b/include/linux/sched.h index b5a22ea80045..ea1b5f32ec5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -564,13 +564,6 @@ struct sched_domain { extern void partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2); -#ifdef ARCH_HAS_SCHED_DOMAIN -/* Useful helpers that arch setup code may use. Defined in kernel/sched.c */ -extern cpumask_t cpu_isolated_map; -extern void init_sched_build_groups(struct sched_group groups[], - cpumask_t span, int (*group_fn)(int cpu)); -extern void cpu_attach_domain(struct sched_domain *sd, int cpu); -#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* CONFIG_SMP */ diff --git a/include/linux/topology.h b/include/linux/topology.h index 0320225e96da..3df1d474e5c5 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -135,6 +135,29 @@ } #endif +/* sched_domains SD_ALLNODES_INIT for NUMA machines */ +#define SD_ALLNODES_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 64, \ + .max_interval = 64*num_online_cpus(), \ + .busy_factor = 128, \ + .imbalance_pct = 133, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .busy_idx = 3, \ + .idle_idx = 3, \ + .newidle_idx = 0, /* unused */ \ + .wake_idx = 0, /* unused */ \ + .forkexec_idx = 0, /* unused */ \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 64, \ + .nr_balance_failed = 0, \ +} + #ifdef CONFIG_NUMA #ifndef SD_NODE_INIT #error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! diff --git a/kernel/sched.c b/kernel/sched.c index 5f889d0cbfcc..50860ad5b624 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4779,7 +4779,7 @@ static int sd_parent_degenerate(struct sched_domain *sd, * Attach the domain 'sd' to 'cpu' as its base domain. Callers must * hold the hotplug lock. */ -void cpu_attach_domain(struct sched_domain *sd, int cpu) +static void cpu_attach_domain(struct sched_domain *sd, int cpu) { runqueue_t *rq = cpu_rq(cpu); struct sched_domain *tmp; @@ -4802,7 +4802,7 @@ void cpu_attach_domain(struct sched_domain *sd, int cpu) } /* cpus with isolated domains */ -cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; +static cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE; /* Setup the mask of cpus configured for isolated domains */ static int __init isolated_cpu_setup(char *str) @@ -4830,8 +4830,8 @@ __setup ("isolcpus=", isolated_cpu_setup); * covered by the given span, and will set each group's ->cpumask correctly, * and ->cpu_power to 0. */ -void init_sched_build_groups(struct sched_group groups[], - cpumask_t span, int (*group_fn)(int cpu)) +static void init_sched_build_groups(struct sched_group groups[], cpumask_t span, + int (*group_fn)(int cpu)) { struct sched_group *first = NULL, *last = NULL; cpumask_t covered = CPU_MASK_NONE; @@ -4864,12 +4864,85 @@ void init_sched_build_groups(struct sched_group groups[], last->next = first; } +#define SD_NODES_PER_DOMAIN 16 -#ifdef ARCH_HAS_SCHED_DOMAIN -extern void build_sched_domains(const cpumask_t *cpu_map); -extern void arch_init_sched_domains(const cpumask_t *cpu_map); -extern void arch_destroy_sched_domains(const cpumask_t *cpu_map); -#else +#ifdef CONFIG_NUMA +/** + * find_next_best_node - find the next node to include in a sched_domain + * @node: node whose sched_domain we're building + * @used_nodes: nodes already in the sched_domain + * + * Find the next node to include in a given scheduling domain. Simply + * finds the closest node not already in the @used_nodes map. + * + * Should use nodemask_t. + */ +static int find_next_best_node(int node, unsigned long *used_nodes) +{ + int i, n, val, min_val, best_node = 0; + + min_val = INT_MAX; + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Start at @node */ + n = (node + i) % MAX_NUMNODES; + + if (!nr_cpus_node(n)) + continue; + + /* Skip already used nodes */ + if (test_bit(n, used_nodes)) + continue; + + /* Simple min distance search */ + val = node_distance(node, n); + + if (val < min_val) { + min_val = val; + best_node = n; + } + } + + set_bit(best_node, used_nodes); + return best_node; +} + +/** + * sched_domain_node_span - get a cpumask for a node's sched_domain + * @node: node whose cpumask we're constructing + * @size: number of nodes to include in this span + * + * Given a node, construct a good cpumask for its sched_domain to span. It + * should be one that prevents unnecessary balancing, but also spreads tasks + * out optimally. + */ +static cpumask_t sched_domain_node_span(int node) +{ + int i; + cpumask_t span, nodemask; + DECLARE_BITMAP(used_nodes, MAX_NUMNODES); + + cpus_clear(span); + bitmap_zero(used_nodes, MAX_NUMNODES); + + nodemask = node_to_cpumask(node); + cpus_or(span, span, nodemask); + set_bit(node, used_nodes); + + for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { + int next_node = find_next_best_node(node, used_nodes); + nodemask = node_to_cpumask(next_node); + cpus_or(span, span, nodemask); + } + + return span; +} +#endif + +/* + * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we + * can switch it on easily if needed. + */ #ifdef CONFIG_SCHED_SMT static DEFINE_PER_CPU(struct sched_domain, cpu_domains); static struct sched_group sched_group_cpus[NR_CPUS]; @@ -4891,36 +4964,20 @@ static int cpu_to_phys_group(int cpu) } #ifdef CONFIG_NUMA - -static DEFINE_PER_CPU(struct sched_domain, node_domains); -static struct sched_group sched_group_nodes[MAX_NUMNODES]; -static int cpu_to_node_group(int cpu) -{ - return cpu_to_node(cpu); -} -#endif - -#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) /* - * The domains setup code relies on siblings not spanning - * multiple nodes. Make sure the architecture has a proper - * siblings map: + * The init_sched_build_groups can't handle what we want to do with node + * groups, so roll our own. Now each node has its own list of groups which + * gets dynamically allocated. */ -static void check_sibling_maps(void) -{ - int i, j; +static DEFINE_PER_CPU(struct sched_domain, node_domains); +static struct sched_group *sched_group_nodes[MAX_NUMNODES]; - for_each_online_cpu(i) { - for_each_cpu_mask(j, cpu_sibling_map[i]) { - if (cpu_to_node(i) != cpu_to_node(j)) { - printk(KERN_INFO "warning: CPU %d siblings map " - "to different node - isolating " - "them.\n", i); - cpu_sibling_map[i] = cpumask_of_cpu(i); - break; - } - } - } +static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); +static struct sched_group sched_group_allnodes[MAX_NUMNODES]; + +static int cpu_to_allnodes_group(int cpu) +{ + return cpu_to_node(cpu); } #endif @@ -4928,7 +4985,7 @@ static void check_sibling_maps(void) * Build sched domains for a given set of cpus and attach the sched domains * to the individual cpus */ -static void build_sched_domains(const cpumask_t *cpu_map) +void build_sched_domains(const cpumask_t *cpu_map) { int i; @@ -4943,11 +5000,22 @@ static void build_sched_domains(const cpumask_t *cpu_map) cpus_and(nodemask, nodemask, *cpu_map); #ifdef CONFIG_NUMA + if (num_online_cpus() + > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { + sd = &per_cpu(allnodes_domains, i); + *sd = SD_ALLNODES_INIT; + sd->span = *cpu_map; + group = cpu_to_allnodes_group(i); + sd->groups = &sched_group_allnodes[group]; + p = sd; + } else + p = NULL; + sd = &per_cpu(node_domains, i); - group = cpu_to_node_group(i); *sd = SD_NODE_INIT; - sd->span = *cpu_map; - sd->groups = &sched_group_nodes[group]; + sd->span = sched_domain_node_span(cpu_to_node(i)); + sd->parent = p; + cpus_and(sd->span, sd->span, *cpu_map); #endif p = sd; @@ -4972,7 +5040,7 @@ static void build_sched_domains(const cpumask_t *cpu_map) #ifdef CONFIG_SCHED_SMT /* Set up CPU (sibling) groups */ - for_each_online_cpu(i) { + for_each_cpu_mask(i, *cpu_map) { cpumask_t this_sibling_map = cpu_sibling_map[i]; cpus_and(this_sibling_map, this_sibling_map, *cpu_map); if (i != first_cpu(this_sibling_map)) @@ -4997,8 +5065,74 @@ static void build_sched_domains(const cpumask_t *cpu_map) #ifdef CONFIG_NUMA /* Set up node groups */ - init_sched_build_groups(sched_group_nodes, *cpu_map, - &cpu_to_node_group); + init_sched_build_groups(sched_group_allnodes, *cpu_map, + &cpu_to_allnodes_group); + + for (i = 0; i < MAX_NUMNODES; i++) { + /* Set up node groups */ + struct sched_group *sg, *prev; + cpumask_t nodemask = node_to_cpumask(i); + cpumask_t domainspan; + cpumask_t covered = CPU_MASK_NONE; + int j; + + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + + domainspan = sched_domain_node_span(i); + cpus_and(domainspan, domainspan, *cpu_map); + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + sched_group_nodes[i] = sg; + for_each_cpu_mask(j, nodemask) { + struct sched_domain *sd; + sd = &per_cpu(node_domains, j); + sd->groups = sg; + if (sd->groups == NULL) { + /* Turn off balancing if we have no groups */ + sd->flags = 0; + } + } + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", i); + continue; + } + sg->cpu_power = 0; + sg->cpumask = nodemask; + cpus_or(covered, covered, nodemask); + prev = sg; + + for (j = 0; j < MAX_NUMNODES; j++) { + cpumask_t tmp, notcovered; + int n = (i + j) % MAX_NUMNODES; + + cpus_complement(notcovered, covered); + cpus_and(tmp, notcovered, *cpu_map); + cpus_and(tmp, tmp, domainspan); + if (cpus_empty(tmp)) + break; + + nodemask = node_to_cpumask(n); + cpus_and(tmp, tmp, nodemask); + if (cpus_empty(tmp)) + continue; + + sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); + if (!sg) { + printk(KERN_WARNING + "Can not alloc domain group for node %d\n", j); + break; + } + sg->cpu_power = 0; + sg->cpumask = tmp; + cpus_or(covered, covered, tmp); + prev->next = sg; + prev = sg; + } + prev->next = sched_group_nodes[i]; + } #endif /* Calculate CPU power for physical packages and nodes */ @@ -5017,14 +5151,46 @@ static void build_sched_domains(const cpumask_t *cpu_map) sd->groups->cpu_power = power; #ifdef CONFIG_NUMA - if (i == first_cpu(sd->groups->cpumask)) { - /* Only add "power" once for each physical package. */ - sd = &per_cpu(node_domains, i); - sd->groups->cpu_power += power; + sd = &per_cpu(allnodes_domains, i); + if (sd->groups) { + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + sd->groups->cpu_power = power; } #endif } +#ifdef CONFIG_NUMA + for (i = 0; i < MAX_NUMNODES; i++) { + struct sched_group *sg = sched_group_nodes[i]; + int j; + + if (sg == NULL) + continue; +next_sg: + for_each_cpu_mask(j, sg->cpumask) { + struct sched_domain *sd; + int power; + + sd = &per_cpu(phys_domains, j); + if (j != first_cpu(sd->groups->cpumask)) { + /* + * Only add "power" once for each + * physical package. + */ + continue; + } + power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * + (cpus_weight(sd->groups->cpumask)-1) / 10; + + sg->cpu_power += power; + } + sg = sg->next; + if (sg != sched_group_nodes[i]) + goto next_sg; + } +#endif + /* Attach the domains */ for_each_cpu_mask(i, *cpu_map) { struct sched_domain *sd; @@ -5039,13 +5205,10 @@ static void build_sched_domains(const cpumask_t *cpu_map) /* * Set up scheduler domains and groups. Callers must hold the hotplug lock. */ -static void arch_init_sched_domains(cpumask_t *cpu_map) +static void arch_init_sched_domains(const cpumask_t *cpu_map) { cpumask_t cpu_default_map; -#if defined(CONFIG_SCHED_SMT) && defined(CONFIG_NUMA) - check_sibling_maps(); -#endif /* * Setup mask for cpus without special case scheduling requirements. * For now this just excludes isolated cpus, but could be used to @@ -5058,10 +5221,29 @@ static void arch_init_sched_domains(cpumask_t *cpu_map) static void arch_destroy_sched_domains(const cpumask_t *cpu_map) { - /* Do nothing: everything is statically allocated. */ -} +#ifdef CONFIG_NUMA + int i; + for (i = 0; i < MAX_NUMNODES; i++) { + cpumask_t nodemask = node_to_cpumask(i); + struct sched_group *oldsg, *sg = sched_group_nodes[i]; -#endif /* ARCH_HAS_SCHED_DOMAIN */ + cpus_and(nodemask, nodemask, *cpu_map); + if (cpus_empty(nodemask)) + continue; + + if (sg == NULL) + continue; + sg = sg->next; +next_sg: + oldsg = sg; + sg = sg->next; + kfree(oldsg); + if (oldsg != sched_group_nodes[i]) + goto next_sg; + sched_group_nodes[i] = NULL; + } +#endif +} /* * Detach sched domains from a group of cpus specified in cpu_map -- cgit v1.2.3 From 3f4bb1f4199b7dc0c958447b1e4898980013b884 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Sep 2005 15:18:16 -0700 Subject: [PATCH] struct dentry: place d_hash close to d_parent and d_name to speedup lookups dentry cache uses sophisticated RCU technology (and prefetching if available) but touches 2 cache lines per dentry during hlist lookup. This patch moves d_hash in the same cache line than d_parent and d_name fields so that : 1) One cache line is needed instead of two. 2) the hlist_for_each_rcu() prefetching has a chance to bring all the needed data in advance, not only the part that includes d_hash.next. I also changed one old comment that was wrong for 64bits. A further optimisation would be to separate dentry in two parts, one that is mostly read, and one writen (d_count/d_lock) to avoid false sharing on SMP/NUMA but this would need different field placement depending on 32bits or 64bits platform. Signed-off-by: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 50be290d24d2..ab04b4f9b0db 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -88,8 +88,9 @@ struct dentry { * negative */ /* * The next three fields are touched by __d_lookup. Place them here - * so they all fit in a 16-byte range, with 16-byte alignment. + * so they all fit in a cache line. */ + struct hlist_node d_hash; /* lookup hash list */ struct dentry *d_parent; /* parent directory */ struct qstr d_name; @@ -103,7 +104,6 @@ struct dentry { void *d_fsdata; /* fs-specific data */ struct rcu_head d_rcu; struct dcookie_struct *d_cookie; /* cookie, if any */ - struct hlist_node d_hash; /* lookup hash list */ int d_mounted; unsigned char d_iname[DNAME_INLINE_LEN_MIN]; /* small names */ }; -- cgit v1.2.3 From e89bbd3a0b3c054d9a94feb0db7bbae1cdb99e54 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:21 -0700 Subject: [PATCH] remove iattr.ia_attr_flags Remove unused ia_attr_flags from struct iattr, and related defines. Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hostfs/hostfs.h | 1 - include/linux/fs.h | 10 ---------- 2 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 67bca0d4a33b..cca3fb693f99 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h @@ -49,7 +49,6 @@ struct hostfs_iattr { struct timespec ia_atime; struct timespec ia_mtime; struct timespec ia_ctime; - unsigned int ia_attr_flags; }; extern int stat_file(const char *path, unsigned long long *inode_out, diff --git a/include/linux/fs.h b/include/linux/fs.h index 2036747c7d1f..57e294bf83f4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -281,18 +281,8 @@ struct iattr { struct timespec ia_atime; struct timespec ia_mtime; struct timespec ia_ctime; - unsigned int ia_attr_flags; }; -/* - * This is the inode attributes flag definitions - */ -#define ATTR_FLAG_SYNCRONOUS 1 /* Syncronous write */ -#define ATTR_FLAG_NOATIME 2 /* Don't update atime */ -#define ATTR_FLAG_APPEND 4 /* Append-only file */ -#define ATTR_FLAG_IMMUTABLE 8 /* Immutable file */ -#define ATTR_FLAG_NODIRATIME 16 /* Don't update atime for directory */ - /* * Includes for diskquotas. */ -- cgit v1.2.3 From ab8d11beb46f0bd0617e04205c01f5c1fe845b61 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:24 -0700 Subject: [PATCH] remove duplicated code from proc and ptrace Extract common code used by ptrace_attach() and may_ptrace_attach() into a separate function. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 35 ++++------------------------------- include/linux/ptrace.h | 1 + kernel/ptrace.c | 41 ++++++++++++++++++++++++++++------------- 3 files changed, 33 insertions(+), 44 deletions(-) (limited to 'include/linux') diff --git a/fs/proc/base.c b/fs/proc/base.c index 24eed139e54e..84751f3f52d5 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -346,33 +346,6 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf (task->state == TASK_STOPPED || task->state == TASK_TRACED) && \ security_ptrace(current,task) == 0)) -static int may_ptrace_attach(struct task_struct *task) -{ - int retval = 0; - - task_lock(task); - - if (!task->mm) - goto out; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto out; - rmb(); - if (task->mm->dumpable != 1 && !capable(CAP_SYS_PTRACE)) - goto out; - if (security_ptrace(current, task)) - goto out; - - retval = 1; -out: - task_unlock(task); - return retval; -} - static int proc_pid_environ(struct task_struct *task, char * buffer) { int res = 0; @@ -382,7 +355,7 @@ static int proc_pid_environ(struct task_struct *task, char * buffer) if (len > PAGE_SIZE) len = PAGE_SIZE; res = access_process_vm(task, mm->env_start, buffer, len, 0); - if (!may_ptrace_attach(task)) + if (!ptrace_may_attach(task)) res = -ESRCH; mmput(mm); } @@ -685,7 +658,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, int ret = -ESRCH; struct mm_struct *mm; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) goto out; ret = -ENOMEM; @@ -711,7 +684,7 @@ static ssize_t mem_read(struct file * file, char __user * buf, this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count; retval = access_process_vm(task, src, page, this_len, 0); - if (!retval || !MAY_PTRACE(task) || !may_ptrace_attach(task)) { + if (!retval || !MAY_PTRACE(task) || !ptrace_may_attach(task)) { if (!ret) ret = -EIO; break; @@ -749,7 +722,7 @@ static ssize_t mem_write(struct file * file, const char * buf, struct task_struct *task = proc_task(file->f_dentry->d_inode); unsigned long dst = *ppos; - if (!MAY_PTRACE(task) || !may_ptrace_attach(task)) + if (!MAY_PTRACE(task) || !ptrace_may_attach(task)) return -ESRCH; page = (char *)__get_free_page(GFP_USER); diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 2afdafb62123..dc6f3647bfbc 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -90,6 +90,7 @@ extern void __ptrace_link(struct task_struct *child, struct task_struct *new_parent); extern void __ptrace_unlink(struct task_struct *child); extern void ptrace_untrace(struct task_struct *child); +extern int ptrace_may_attach(struct task_struct *task); static inline void ptrace_link(struct task_struct *child, struct task_struct *new_parent) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8dcb8f6288bc..019e04ec065a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -118,6 +118,33 @@ int ptrace_check_attach(struct task_struct *child, int kill) return ret; } +static int may_attach(struct task_struct *task) +{ + if (!task->mm) + return -EPERM; + if (((current->uid != task->euid) || + (current->uid != task->suid) || + (current->uid != task->uid) || + (current->gid != task->egid) || + (current->gid != task->sgid) || + (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + return -EPERM; + smp_rmb(); + if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) + return -EPERM; + + return security_ptrace(current, task); +} + +int ptrace_may_attach(struct task_struct *task) +{ + int err; + task_lock(task); + err = may_attach(task); + task_unlock(task); + return !err; +} + int ptrace_attach(struct task_struct *task) { int retval; @@ -127,22 +154,10 @@ int ptrace_attach(struct task_struct *task) goto bad; if (task == current) goto bad; - if (!task->mm) - goto bad; - if(((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) - goto bad; - smp_rmb(); - if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE)) - goto bad; /* the same process cannot be attached many times */ if (task->ptrace & PT_PTRACED) goto bad; - retval = security_ptrace(current, task); + retval = may_attach(task); if (retval) goto bad; -- cgit v1.2.3 From e922efc342d565a38eed3af377ff403f52148864 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 6 Sep 2005 15:18:25 -0700 Subject: [PATCH] remove duplicated sys_open32() code from 64bit archs 64 bit architectures all implement their own compatibility sys_open(), when in fact the difference is simply not forcing the O_LARGEFILE flag. So use the a common function instead. Signed-off-by: Miklos Szeredi Cc: Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/ia32/ia32_entry.S | 2 +- arch/ia64/ia32/sys_ia32.c | 31 ------------------------------- arch/ppc64/kernel/misc.S | 2 +- arch/ppc64/kernel/sys_ppc32.c | 31 ------------------------------- arch/sparc64/kernel/sys_sparc32.c | 24 +----------------------- arch/x86_64/ia32/ia32entry.S | 2 +- arch/x86_64/ia32/sys_ia32.c | 26 -------------------------- fs/compat.c | 10 ++++++++++ fs/open.c | 19 +++++++++++-------- include/linux/fs.h | 1 + 10 files changed, 26 insertions(+), 122 deletions(-) (limited to 'include/linux') diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S index 829a6d80711c..0708edb06cc4 100644 --- a/arch/ia64/ia32/ia32_entry.S +++ b/arch/ia64/ia32/ia32_entry.S @@ -215,7 +215,7 @@ ia32_syscall_table: data8 sys32_fork data8 sys_read data8 sys_write - data8 sys32_open /* 5 */ + data8 compat_sys_open /* 5 */ data8 sys_close data8 sys32_waitpid data8 sys_creat diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index c1e20d65dd6c..e29a8a55486a 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -2359,37 +2359,6 @@ sys32_brk (unsigned int brk) return ret; } -/* - * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag. - */ -asmlinkage long -sys32_open (const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; -} - /* Structure for ia32 emulation on ia64 */ struct epoll_event32 { diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index 474df0a862bf..2164bd7b4ef6 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -957,7 +957,7 @@ _GLOBAL(sys_call_table32) .llong .ppc_fork .llong .sys_read .llong .sys_write - .llong .sys32_open /* 5 */ + .llong .compat_sys_open /* 5 */ .llong .sys_close .llong .sys32_waitpid .llong .sys32_creat diff --git a/arch/ppc64/kernel/sys_ppc32.c b/arch/ppc64/kernel/sys_ppc32.c index 206619080e66..214914a95a50 100644 --- a/arch/ppc64/kernel/sys_ppc32.c +++ b/arch/ppc64/kernel/sys_ppc32.c @@ -867,37 +867,6 @@ off_t ppc32_lseek(unsigned int fd, u32 offset, unsigned int origin) return sys_lseek(fd, (int)offset, origin); } -/* - * This is just a version for 32-bit applications which does - * not force O_LARGEFILE on. - */ -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file * f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; -} - /* Note: it is necessary to treat bufsiz as an unsigned int, * with the corresponding cast to a signed int to insure that the * proper conversion (sign extension) between the register representation of a signed int (msr in 32-bit mode) diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c index 1d3aa588df8a..7f6239ed2521 100644 --- a/arch/sparc64/kernel/sys_sparc32.c +++ b/arch/sparc64/kernel/sys_sparc32.c @@ -1002,29 +1002,7 @@ asmlinkage long sys32_adjtimex(struct timex32 __user *utp) asmlinkage long sparc32_open(const char __user *filename, int flags, int mode) { - char * tmp; - int fd, error; - - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file * f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) - goto out_error; - fd_install(fd, f); - } -out: - putname(tmp); - } - return fd; - -out_error: - put_unused_fd(fd); - fd = error; - goto out; + return do_sys_open(filename, flags, mode); } extern unsigned long do_mremap(unsigned long addr, diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index c45d6a05b984..f174083d5567 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S @@ -307,7 +307,7 @@ ia32_sys_call_table: .quad stub32_fork .quad sys_read .quad sys_write - .quad sys32_open /* 5 */ + .quad compat_sys_open /* 5 */ .quad sys_close .quad sys32_waitpid .quad sys_creat diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index be996d1b691e..04d80406ce4f 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c @@ -969,32 +969,6 @@ long sys32_kill(int pid, int sig) return sys_kill(pid, sig); } -asmlinkage long sys32_open(const char __user * filename, int flags, int mode) -{ - char * tmp; - int fd, error; - - /* don't force O_LARGEFILE */ - tmp = getname(filename); - fd = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - fd = get_unused_fd(); - if (fd >= 0) { - struct file *f = filp_open(tmp, flags, mode); - error = PTR_ERR(f); - if (IS_ERR(f)) { - put_unused_fd(fd); - fd = error; - } else { - fsnotify_open(f->f_dentry); - fd_install(fd, f); - } - } - putname(tmp); - } - return fd; -} - extern asmlinkage long sys_timer_create(clockid_t which_clock, struct sigevent __user *timer_event_spec, diff --git a/fs/compat.c b/fs/compat.c index 2eb03c49b07c..8c665705c6a0 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1274,6 +1274,16 @@ out: return ret; } +/* + * Exactly like fs/open.c:sys_open(), except that it doesn't set the + * O_LARGEFILE flag. + */ +asmlinkage long +compat_sys_open(const char __user *filename, int flags, int mode) +{ + return do_sys_open(filename, flags, mode); +} + /* * compat_count() counts the number of arguments/envelopes. It is basically * a copy of count() from fs/exec.c, except that it works with 32 bit argv diff --git a/fs/open.c b/fs/open.c index 32bf05e2996d..4ee2dcc31c28 100644 --- a/fs/open.c +++ b/fs/open.c @@ -933,16 +933,11 @@ void fastcall fd_install(unsigned int fd, struct file * file) EXPORT_SYMBOL(fd_install); -asmlinkage long sys_open(const char __user * filename, int flags, int mode) +long do_sys_open(const char __user *filename, int flags, int mode) { - char * tmp; - int fd; + char *tmp = getname(filename); + int fd = PTR_ERR(tmp); - if (force_o_largefile()) - flags |= O_LARGEFILE; - - tmp = getname(filename); - fd = PTR_ERR(tmp); if (!IS_ERR(tmp)) { fd = get_unused_fd(); if (fd >= 0) { @@ -959,6 +954,14 @@ asmlinkage long sys_open(const char __user * filename, int flags, int mode) } return fd; } + +asmlinkage long sys_open(const char __user *filename, int flags, int mode) +{ + if (force_o_largefile()) + flags |= O_LARGEFILE; + + return do_sys_open(filename, flags, mode); +} EXPORT_SYMBOL_GPL(sys_open); #ifndef __alpha__ diff --git a/include/linux/fs.h b/include/linux/fs.h index 57e294bf83f4..7e1b589842af 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1280,6 +1280,7 @@ static inline int break_lease(struct inode *inode, unsigned int mode) /* fs/open.c */ extern int do_truncate(struct dentry *, loff_t start); +extern long do_sys_open(const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); extern int filp_close(struct file *, fl_owner_t id); -- cgit v1.2.3 From ebad6a4230bdb5927495e28bc7837f515bf667a7 Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Tue, 6 Sep 2005 15:18:29 -0700 Subject: [PATCH] dmi: add onboard devices discovery This patch adds onboard devices and IPMI BMC discovery into DMI scan code. Drivers can use dmi_find_device() function to search for devices by type and name. Signed-off-by: Andrey Panin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 102 ++++++++++++++++++++++++++++++++++++++------ include/linux/dmi.h | 36 ++++++++++++++-- 2 files changed, 123 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ae1a1aed2fc0..c4a73855e38c 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -6,13 +6,6 @@ #include -struct dmi_header { - u8 type; - u8 length; - u16 handle; -}; - - static char * __init dmi_string(struct dmi_header *dm, u8 s) { u8 *bp = ((u8 *) dm) + dm->length; @@ -88,6 +81,7 @@ static int __init dmi_checksum(u8 *buf) } static char *dmi_ident[DMI_STRING_MAX]; +static LIST_HEAD(dmi_devices); /* * Save a DMI string @@ -106,6 +100,58 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) dmi_ident[slot] = p; } +static void __init dmi_save_devices(struct dmi_header *dm) +{ + int i, count = (dm->length - sizeof(struct dmi_header)) / 2; + struct dmi_device *dev; + + for (i = 0; i < count; i++) { + char *d = ((char *) dm) + (i * 2); + + /* Skip disabled device */ + if ((*d & 0x80) == 0) + continue; + + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_devices: out of memory.\n"); + break; + } + + dev->type = *d++ & 0x7f; + dev->name = dmi_string(dm, *d); + dev->device_data = NULL; + + list_add(&dev->list, &dmi_devices); + } +} + +static void __init dmi_save_ipmi_device(struct dmi_header *dm) +{ + struct dmi_device *dev; + void * data; + + data = alloc_bootmem(dm->length); + if (data == NULL) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); + return; + } + + memcpy(data, dm, dm->length); + + dev = alloc_bootmem(sizeof(*dev)); + if (!dev) { + printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n"); + return; + } + + dev->type = DMI_DEV_TYPE_IPMI; + dev->name = "IPMI controller"; + dev->device_data = data; + + list_add(&dev->list, &dmi_devices); +} + /* * Process a DMI table entry. Right now all we care about are the BIOS * and machine entries. For 2.5 we should pull the smbus controller info @@ -113,25 +159,28 @@ static void __init dmi_save_ident(struct dmi_header *dm, int slot, int string) */ static void __init dmi_decode(struct dmi_header *dm) { - u8 *data __attribute__((__unused__)) = (u8 *)dm; - switch(dm->type) { - case 0: + case 0: /* BIOS Information */ dmi_save_ident(dm, DMI_BIOS_VENDOR, 4); dmi_save_ident(dm, DMI_BIOS_VERSION, 5); dmi_save_ident(dm, DMI_BIOS_DATE, 8); break; - case 1: + case 1: /* System Information */ dmi_save_ident(dm, DMI_SYS_VENDOR, 4); dmi_save_ident(dm, DMI_PRODUCT_NAME, 5); dmi_save_ident(dm, DMI_PRODUCT_VERSION, 6); dmi_save_ident(dm, DMI_PRODUCT_SERIAL, 7); break; - case 2: + case 2: /* Base Board Information */ dmi_save_ident(dm, DMI_BOARD_VENDOR, 4); dmi_save_ident(dm, DMI_BOARD_NAME, 5); dmi_save_ident(dm, DMI_BOARD_VERSION, 6); break; + case 10: /* Onboard Devices Information */ + dmi_save_devices(dm); + break; + case 38: /* IPMI Device Information */ + dmi_save_ipmi_device(dm); } } @@ -221,3 +270,32 @@ char *dmi_get_system_info(int field) return dmi_ident[field]; } EXPORT_SYMBOL(dmi_get_system_info); + +/** + * dmi_find_device - find onboard device by type/name + * @type: device type or %DMI_DEV_TYPE_ANY to match all device types + * @desc: device name string or %NULL to match all + * @from: previous device found in search, or %NULL for new search. + * + * Iterates through the list of known onboard devices. If a device is + * found with a matching @vendor and @device, a pointer to its device + * structure is returned. Otherwise, %NULL is returned. + * A new search is initiated by passing %NULL to the @from argument. + * If @from is not %NULL, searches continue from next device. + */ +struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) +{ + struct list_head *d, *head = from ? &from->list : &dmi_devices; + + for(d = head->next; d != &dmi_devices; d = d->next) { + struct dmi_device *dev = list_entry(d, struct dmi_device, list); + + if (((type == DMI_DEV_TYPE_ANY) || (dev->type == type)) && + ((name == NULL) || (strcmp(dev->name, name) == 0))) + return dev; + } + + return NULL; +} +EXPORT_SYMBOL(dmi_find_device); diff --git a/include/linux/dmi.h b/include/linux/dmi.h index 5e93e6dce9a4..c30175e8dec6 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -1,6 +1,8 @@ #ifndef __DMI_H__ #define __DMI_H__ +#include + enum dmi_field { DMI_NONE, DMI_BIOS_VENDOR, @@ -16,6 +18,24 @@ enum dmi_field { DMI_STRING_MAX, }; +enum dmi_device_type { + DMI_DEV_TYPE_ANY = 0, + DMI_DEV_TYPE_OTHER, + DMI_DEV_TYPE_UNKNOWN, + DMI_DEV_TYPE_VIDEO, + DMI_DEV_TYPE_SCSI, + DMI_DEV_TYPE_ETHERNET, + DMI_DEV_TYPE_TOKENRING, + DMI_DEV_TYPE_SOUND, + DMI_DEV_TYPE_IPMI = -1 +}; + +struct dmi_header { + u8 type; + u8 length; + u16 handle; +}; + /* * DMI callbacks for problem boards */ @@ -26,22 +46,32 @@ struct dmi_strmatch { struct dmi_system_id { int (*callback)(struct dmi_system_id *); - char *ident; + const char *ident; struct dmi_strmatch matches[4]; void *driver_data; }; -#define DMI_MATCH(a,b) { a, b } +#define DMI_MATCH(a, b) { a, b } + +struct dmi_device { + struct list_head list; + int type; + const char *name; + void *device_data; /* Type specific data */ +}; #if defined(CONFIG_X86) && !defined(CONFIG_X86_64) extern int dmi_check_system(struct dmi_system_id *list); extern char * dmi_get_system_info(int field); - +extern struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from); #else static inline int dmi_check_system(struct dmi_system_id *list) { return 0; } static inline char * dmi_get_system_info(int field) { return NULL; } +static struct dmi_device * dmi_find_device(int type, const char *name, + struct dmi_device *from) { return NULL; } #endif -- cgit v1.2.3 From dd3927105b6f65afb7dac17682172cdfb86d3f00 Mon Sep 17 00:00:00 2001 From: Pekka J Enberg Date: Tue, 6 Sep 2005 15:18:31 -0700 Subject: [PATCH] introduce and use kzalloc This patch introduces a kzalloc wrapper and converts kernel/ to use it. It saves a little program text. Signed-off-by: Pekka Enberg Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 16 +++++++++++++++- kernel/intermodule.c | 3 +-- kernel/params.c | 4 ++-- kernel/power/pm.c | 3 +-- kernel/resource.c | 3 +-- kernel/workqueue.c | 3 +-- mm/slab.c | 18 ++++++------------ 7 files changed, 27 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 80b2dfde2e80..42a6bea58af3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -99,7 +99,21 @@ found: return __kmalloc(size, flags); } -extern void *kcalloc(size_t, size_t, unsigned int __nocast); +extern void *kzalloc(size_t, unsigned int __nocast); + +/** + * kcalloc - allocate memory for an array. The memory is set to zero. + * @n: number of elements. + * @size: element size. + * @flags: the type of memory to allocate. + */ +static inline void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +{ + if (n != 0 && size > INT_MAX / n) + return NULL; + return kzalloc(n * size, flags); +} + extern void kfree(const void *); extern unsigned int ksize(const void *); diff --git a/kernel/intermodule.c b/kernel/intermodule.c index 388977f3e9b7..0cbe633420fb 100644 --- a/kernel/intermodule.c +++ b/kernel/intermodule.c @@ -39,7 +39,7 @@ void inter_module_register(const char *im_name, struct module *owner, const void struct list_head *tmp; struct inter_module_entry *ime, *ime_new; - if (!(ime_new = kmalloc(sizeof(*ime), GFP_KERNEL))) { + if (!(ime_new = kzalloc(sizeof(*ime), GFP_KERNEL))) { /* Overloaded kernel, not fatal */ printk(KERN_ERR "Aiee, inter_module_register: cannot kmalloc entry for '%s'\n", @@ -47,7 +47,6 @@ void inter_module_register(const char *im_name, struct module *owner, const void kmalloc_failed = 1; return; } - memset(ime_new, 0, sizeof(*ime_new)); ime_new->im_name = im_name; ime_new->owner = owner; ime_new->userdata = userdata; diff --git a/kernel/params.c b/kernel/params.c index d586c35ef8fc..fbf173215fd2 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -542,8 +542,8 @@ static void __init kernel_param_sysfs_setup(const char *name, { struct module_kobject *mk; - mk = kmalloc(sizeof(struct module_kobject), GFP_KERNEL); - memset(mk, 0, sizeof(struct module_kobject)); + mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); + BUG_ON(!mk); mk->mod = THIS_MODULE; kobj_set_kset_s(mk, module_subsys); diff --git a/kernel/power/pm.c b/kernel/power/pm.c index 61deda04e39e..159149321b3c 100644 --- a/kernel/power/pm.c +++ b/kernel/power/pm.c @@ -60,9 +60,8 @@ struct pm_dev *pm_register(pm_dev_t type, unsigned long id, pm_callback callback) { - struct pm_dev *dev = kmalloc(sizeof(struct pm_dev), GFP_KERNEL); + struct pm_dev *dev = kzalloc(sizeof(struct pm_dev), GFP_KERNEL); if (dev) { - memset(dev, 0, sizeof(*dev)); dev->type = type; dev->id = id; dev->callback = callback; diff --git a/kernel/resource.c b/kernel/resource.c index 26967e042201..92285d822de6 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -430,10 +430,9 @@ EXPORT_SYMBOL(adjust_resource); */ struct resource * __request_region(struct resource *parent, unsigned long start, unsigned long n, const char *name) { - struct resource *res = kmalloc(sizeof(*res), GFP_KERNEL); + struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL); if (res) { - memset(res, 0, sizeof(*res)); res->name = name; res->start = start; res->end = start + n - 1; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index a3de837a8ddd..91bacb13a7e2 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -308,10 +308,9 @@ struct workqueue_struct *__create_workqueue(const char *name, struct workqueue_struct *wq; struct task_struct *p; - wq = kmalloc(sizeof(*wq), GFP_KERNEL); + wq = kzalloc(sizeof(*wq), GFP_KERNEL); if (!wq) return NULL; - memset(wq, 0, sizeof(*wq)); wq->name = name; /* We don't need the distraction of CPUs appearing and vanishing. */ diff --git a/mm/slab.c b/mm/slab.c index a9ff4f7f9860..d7c4443991fe 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2558,24 +2558,18 @@ void kmem_cache_free(kmem_cache_t *cachep, void *objp) EXPORT_SYMBOL(kmem_cache_free); /** - * kcalloc - allocate memory for an array. The memory is set to zero. - * @n: number of elements. - * @size: element size. + * kzalloc - allocate memory. The memory is set to zero. + * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. */ -void *kcalloc(size_t n, size_t size, unsigned int __nocast flags) +void *kzalloc(size_t size, unsigned int __nocast flags) { - void *ret = NULL; - - if (n != 0 && size > INT_MAX / n) - return ret; - - ret = kmalloc(n * size, flags); + void *ret = kmalloc(size, flags); if (ret) - memset(ret, 0, n * size); + memset(ret, 0, size); return ret; } -EXPORT_SYMBOL(kcalloc); +EXPORT_SYMBOL(kzalloc); /** * kfree - free previously allocated memory -- cgit v1.2.3 From c14979b993021377228958498937bcdd9539cbce Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:38 -0700 Subject: [PATCH] ipmi: add per-channel IPMB addresses IPMI allows multiple IPMB channels on a single interface, and each channel might have a different IPMB address. However, the driver has only one IPMB address that it uses for everything. This patch adds new IOCTLS and a new internal interface for setting per-channel IPMB addresses and LUNs. New systems are coming out with support for multiple IPMB channels, and they are broken without this patch. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_devintf.c | 94 ++++++++++++++++++++++--- drivers/char/ipmi/ipmi_msghandler.c | 137 +++++++++++++++++++++++++----------- include/linux/ipmi.h | 30 ++++++-- 3 files changed, 201 insertions(+), 60 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_devintf.c b/drivers/char/ipmi/ipmi_devintf.c index e0a53570fea1..5571e92c520f 100644 --- a/drivers/char/ipmi/ipmi_devintf.c +++ b/drivers/char/ipmi/ipmi_devintf.c @@ -411,6 +411,7 @@ static int ipmi_ioctl(struct inode *inode, break; } + /* The next four are legacy, not per-channel. */ case IPMICTL_SET_MY_ADDRESS_CMD: { unsigned int val; @@ -420,22 +421,25 @@ static int ipmi_ioctl(struct inode *inode, break; } - ipmi_set_my_address(priv->user, val); - rv = 0; + rv = ipmi_set_my_address(priv->user, 0, val); break; } case IPMICTL_GET_MY_ADDRESS_CMD: { - unsigned int val; + unsigned int val; + unsigned char rval; + + rv = ipmi_get_my_address(priv->user, 0, &rval); + if (rv) + break; - val = ipmi_get_my_address(priv->user); + val = rval; if (copy_to_user(arg, &val, sizeof(val))) { rv = -EFAULT; break; } - rv = 0; break; } @@ -448,24 +452,94 @@ static int ipmi_ioctl(struct inode *inode, break; } - ipmi_set_my_LUN(priv->user, val); - rv = 0; + rv = ipmi_set_my_LUN(priv->user, 0, val); break; } case IPMICTL_GET_MY_LUN_CMD: { - unsigned int val; + unsigned int val; + unsigned char rval; + + rv = ipmi_get_my_LUN(priv->user, 0, &rval); + if (rv) + break; - val = ipmi_get_my_LUN(priv->user); + val = rval; if (copy_to_user(arg, &val, sizeof(val))) { rv = -EFAULT; break; } - rv = 0; break; } + + case IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD: + { + struct ipmi_channel_lun_address_set val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + return ipmi_set_my_address(priv->user, val.channel, val.value); + break; + } + + case IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD: + { + struct ipmi_channel_lun_address_set val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + rv = ipmi_get_my_address(priv->user, val.channel, &val.value); + if (rv) + break; + + if (copy_to_user(arg, &val, sizeof(val))) { + rv = -EFAULT; + break; + } + break; + } + + case IPMICTL_SET_MY_CHANNEL_LUN_CMD: + { + struct ipmi_channel_lun_address_set val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + rv = ipmi_set_my_LUN(priv->user, val.channel, val.value); + break; + } + + case IPMICTL_GET_MY_CHANNEL_LUN_CMD: + { + struct ipmi_channel_lun_address_set val; + + if (copy_from_user(&val, arg, sizeof(val))) { + rv = -EFAULT; + break; + } + + rv = ipmi_get_my_LUN(priv->user, val.channel, &val.value); + if (rv) + break; + + if (copy_to_user(arg, &val, sizeof(val))) { + rv = -EFAULT; + break; + } + break; + } + case IPMICTL_SET_TIMING_PARMS_CMD: { struct ipmi_timing_parms parms; diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index e16c13fe698d..84d477c6f925 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -124,6 +124,14 @@ struct ipmi_channel { unsigned char medium; unsigned char protocol; + + /* My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, + but may be changed by the user. */ + unsigned char address; + + /* My LUN. This should generally stay the SMS LUN, but just in + case... */ + unsigned char lun; }; #ifdef CONFIG_PROC_FS @@ -135,7 +143,7 @@ struct ipmi_proc_entry #endif #define IPMI_IPMB_NUM_SEQ 64 -#define IPMI_MAX_CHANNELS 8 +#define IPMI_MAX_CHANNELS 16 struct ipmi_smi { /* What interface number are we? */ @@ -199,14 +207,6 @@ struct ipmi_smi this is registered. */ ipmi_user_t all_cmd_rcvr; - /* My slave address. This is initialized to IPMI_BMC_SLAVE_ADDR, - but may be changed by the user. */ - unsigned char my_address; - - /* My LUN. This should generally stay the SMS LUN, but just in - case... */ - unsigned char my_lun; - /* The event receiver for my BMC, only really used at panic shutdown as a place to store this. */ unsigned char event_receiver; @@ -766,26 +766,44 @@ void ipmi_get_version(ipmi_user_t user, *minor = user->intf->version_minor; } -void ipmi_set_my_address(ipmi_user_t user, - unsigned char address) +int ipmi_set_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char address) { - user->intf->my_address = address; + if (channel >= IPMI_MAX_CHANNELS) + return -EINVAL; + user->intf->channels[channel].address = address; + return 0; } -unsigned char ipmi_get_my_address(ipmi_user_t user) +int ipmi_get_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char *address) { - return user->intf->my_address; + if (channel >= IPMI_MAX_CHANNELS) + return -EINVAL; + *address = user->intf->channels[channel].address; + return 0; } -void ipmi_set_my_LUN(ipmi_user_t user, - unsigned char LUN) +int ipmi_set_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char LUN) { - user->intf->my_lun = LUN & 0x3; + if (channel >= IPMI_MAX_CHANNELS) + return -EINVAL; + user->intf->channels[channel].lun = LUN & 0x3; + return 0; } -unsigned char ipmi_get_my_LUN(ipmi_user_t user) +int ipmi_get_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char *address) { - return user->intf->my_lun; + if (channel >= IPMI_MAX_CHANNELS) + return -EINVAL; + *address = user->intf->channels[channel].lun; + return 0; } int ipmi_set_gets_events(ipmi_user_t user, int val) @@ -1213,7 +1231,7 @@ static inline int i_ipmi_request(ipmi_user_t user, unsigned char ipmb_seq; long seqid; - if (addr->channel > IPMI_NUM_CHANNELS) { + if (addr->channel >= IPMI_NUM_CHANNELS) { spin_lock_irqsave(&intf->counter_lock, flags); intf->sent_invalid_commands++; spin_unlock_irqrestore(&intf->counter_lock, flags); @@ -1346,6 +1364,18 @@ static inline int i_ipmi_request(ipmi_user_t user, return rv; } +static int check_addr(ipmi_smi_t intf, + struct ipmi_addr *addr, + unsigned char *saddr, + unsigned char *lun) +{ + if (addr->channel >= IPMI_MAX_CHANNELS) + return -EINVAL; + *lun = intf->channels[addr->channel].lun; + *saddr = intf->channels[addr->channel].address; + return 0; +} + int ipmi_request_settime(ipmi_user_t user, struct ipmi_addr *addr, long msgid, @@ -1355,6 +1385,12 @@ int ipmi_request_settime(ipmi_user_t user, int retries, unsigned int retry_time_ms) { + unsigned char saddr, lun; + int rv; + + rv = check_addr(user->intf, addr, &saddr, &lun); + if (rv) + return rv; return i_ipmi_request(user, user->intf, addr, @@ -1363,8 +1399,8 @@ int ipmi_request_settime(ipmi_user_t user, user_msg_data, NULL, NULL, priority, - user->intf->my_address, - user->intf->my_lun, + saddr, + lun, retries, retry_time_ms); } @@ -1378,6 +1414,12 @@ int ipmi_request_supply_msgs(ipmi_user_t user, struct ipmi_recv_msg *supplied_recv, int priority) { + unsigned char saddr, lun; + int rv; + + rv = check_addr(user->intf, addr, &saddr, &lun); + if (rv) + return rv; return i_ipmi_request(user, user->intf, addr, @@ -1387,8 +1429,8 @@ int ipmi_request_supply_msgs(ipmi_user_t user, supplied_smi, supplied_recv, priority, - user->intf->my_address, - user->intf->my_lun, + saddr, + lun, -1, 0); } @@ -1397,8 +1439,15 @@ static int ipmb_file_read_proc(char *page, char **start, off_t off, { char *out = (char *) page; ipmi_smi_t intf = data; + int i; + int rv= 0; - return sprintf(out, "%x\n", intf->my_address); + for (i=0; ichannels[i].address); + out[rv-1] = '\n'; /* Replace the final space with a newline */ + out[rv] = '\0'; + rv++; + return rv; } static int version_file_read_proc(char *page, char **start, off_t off, @@ -1592,8 +1641,8 @@ send_channel_info_cmd(ipmi_smi_t intf, int chan) NULL, NULL, 0, - intf->my_address, - intf->my_lun, + intf->channels[0].address, + intf->channels[0].lun, -1, 0); } @@ -1696,11 +1745,13 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers, new_intf->intf_num = i; new_intf->version_major = version_major; new_intf->version_minor = version_minor; - if (slave_addr == 0) - new_intf->my_address = IPMI_BMC_SLAVE_ADDR; - else - new_intf->my_address = slave_addr; - new_intf->my_lun = 2; /* the SMS LUN. */ + for (j=0; jchannels[j].address + = IPMI_BMC_SLAVE_ADDR; + new_intf->channels[j].lun = 2; + } + if (slave_addr != 0) + new_intf->channels[0].address = slave_addr; rwlock_init(&(new_intf->users_lock)); INIT_LIST_HEAD(&(new_intf->users)); new_intf->handlers = handlers; @@ -1985,7 +2036,7 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t intf, msg->data[3] = msg->rsp[6]; msg->data[4] = ((netfn + 1) << 2) | (msg->rsp[7] & 0x3); msg->data[5] = ipmb_checksum(&(msg->data[3]), 2); - msg->data[6] = intf->my_address; + msg->data[6] = intf->channels[msg->rsp[3] & 0xf].address; /* rqseq/lun */ msg->data[7] = (msg->rsp[7] & 0xfc) | (msg->rsp[4] & 0x3); msg->data[8] = msg->rsp[8]; /* cmd */ @@ -2919,8 +2970,8 @@ static void send_panic_events(char *str) &smi_msg, &recv_msg, 0, - intf->my_address, - intf->my_lun, + intf->channels[0].address, + intf->channels[0].lun, 0, 1); /* Don't retry, and don't wait. */ } @@ -2965,8 +3016,8 @@ static void send_panic_events(char *str) &smi_msg, &recv_msg, 0, - intf->my_address, - intf->my_lun, + intf->channels[0].address, + intf->channels[0].lun, 0, 1); /* Don't retry, and don't wait. */ if (intf->local_event_generator) { @@ -2985,8 +3036,8 @@ static void send_panic_events(char *str) &smi_msg, &recv_msg, 0, - intf->my_address, - intf->my_lun, + intf->channels[0].address, + intf->channels[0].lun, 0, 1); /* no retry, and no wait. */ } intf->null_user_handler = NULL; @@ -2996,7 +3047,7 @@ static void send_panic_events(char *str) be zero, and it must not be my address. */ if (((intf->event_receiver & 1) == 0) && (intf->event_receiver != 0) - && (intf->event_receiver != intf->my_address)) + && (intf->event_receiver != intf->channels[0].address)) { /* The event receiver is valid, send an IPMB message. */ @@ -3031,7 +3082,7 @@ static void send_panic_events(char *str) data[0] = 0; data[1] = 0; data[2] = 0xf0; /* OEM event without timestamp. */ - data[3] = intf->my_address; + data[3] = intf->channels[0].address; data[4] = j++; /* sequence # */ /* Always give 11 bytes, so strncpy will fill it with zeroes for me. */ @@ -3047,8 +3098,8 @@ static void send_panic_events(char *str) &smi_msg, &recv_msg, 0, - intf->my_address, - intf->my_lun, + intf->channels[0].address, + intf->channels[0].lun, 0, 1); /* no retry, and no wait. */ } } diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 596ca6130159..846b69899776 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -298,13 +298,19 @@ void ipmi_get_version(ipmi_user_t user, this user, so it will affect all users of this interface. This is so some initialization code can come in and do the OEM-specific things it takes to determine your address (if not the BMC) and set - it for everyone else. */ -void ipmi_set_my_address(ipmi_user_t user, - unsigned char address); -unsigned char ipmi_get_my_address(ipmi_user_t user); -void ipmi_set_my_LUN(ipmi_user_t user, - unsigned char LUN); -unsigned char ipmi_get_my_LUN(ipmi_user_t user); + it for everyone else. Note that each channel can have its own address. */ +int ipmi_set_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char address); +int ipmi_get_my_address(ipmi_user_t user, + unsigned int channel, + unsigned char *address); +int ipmi_set_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char LUN); +int ipmi_get_my_LUN(ipmi_user_t user, + unsigned int channel, + unsigned char *LUN); /* * Like ipmi_request, but lets you specify the number of retries and @@ -585,6 +591,16 @@ struct ipmi_cmdspec * things it takes to determine your address (if not the BMC) and set * it for everyone else. You should probably leave the LUN alone. */ +struct ipmi_channel_lun_address_set +{ + unsigned short channel; + unsigned char value; +}; +#define IPMICTL_SET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 24, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 25, struct ipmi_channel_lun_address_set) +#define IPMICTL_SET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 26, struct ipmi_channel_lun_address_set) +#define IPMICTL_GET_MY_CHANNEL_LUN_CMD _IOR(IPMI_IOC_MAGIC, 27, struct ipmi_channel_lun_address_set) +/* Legacy interfaces, these only set IPMB 0. */ #define IPMICTL_SET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 17, unsigned int) #define IPMICTL_GET_MY_ADDRESS_CMD _IOR(IPMI_IOC_MAGIC, 18, unsigned int) #define IPMICTL_SET_MY_LUN_CMD _IOR(IPMI_IOC_MAGIC, 19, unsigned int) -- cgit v1.2.3 From 07766f241b54d67999907d529b99ffaa61d8b7d9 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:40 -0700 Subject: [PATCH] ipmi: allow userland to include ipmi.h The IPMI driver include file needs to include compiler.h so it has definitions for __user and such. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 846b69899776..dd30adedd07d 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -35,6 +35,7 @@ #define __LINUX_IPMI_H #include +#include /* * This file describes an interface to an IPMI driver. You have to -- cgit v1.2.3 From 56a55ec64806fb56e0cd43b0f726020b74c6689b Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:42 -0700 Subject: [PATCH] ipmi: fix panic ipmb response The "null message handler" in the IPMI driver is used in startup and panic situations to handle messages. It was only designed to work with messages from the local management controller, but in some cases it was used to get messages from remote managmenet controllers, and the system would then panic. This patch makes the "null message handler" in the IPMI driver more general so it works with any kind of message. Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_msghandler.c | 107 ++++++++++++++++++++++-------------- include/linux/ipmi.h | 3 +- 2 files changed, 69 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 05293d0e6692..d0ed25278cbb 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -219,7 +219,7 @@ struct ipmi_smi interface comes in with a NULL user, call this routine with it. Note that the message will still be freed by the caller. This only works on the system interface. */ - void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_smi_msg *msg); + void (*null_user_handler)(ipmi_smi_t intf, struct ipmi_recv_msg *msg); /* When we are scanning the channels for an SMI, this will tell which channel we are scanning. */ @@ -459,7 +459,27 @@ unsigned int ipmi_addr_length(int addr_type) static void deliver_response(struct ipmi_recv_msg *msg) { - msg->user->handler->ipmi_recv_hndl(msg, msg->user->handler_data); + if (! msg->user) { + ipmi_smi_t intf = msg->user_msg_data; + unsigned long flags; + + /* Special handling for NULL users. */ + if (intf->null_user_handler) { + intf->null_user_handler(intf, msg); + spin_lock_irqsave(&intf->counter_lock, flags); + intf->handled_local_responses++; + spin_unlock_irqrestore(&intf->counter_lock, flags); + } else { + /* No handler, so give up. */ + spin_lock_irqsave(&intf->counter_lock, flags); + intf->unhandled_local_responses++; + spin_unlock_irqrestore(&intf->counter_lock, flags); + } + ipmi_free_recv_msg(msg); + } else { + msg->user->handler->ipmi_recv_hndl(msg, + msg->user->handler_data); + } } /* Find the next sequence number not being used and add the given @@ -1389,6 +1409,8 @@ int ipmi_request_settime(ipmi_user_t user, unsigned char saddr, lun; int rv; + if (! user) + return -EINVAL; rv = check_addr(user->intf, addr, &saddr, &lun); if (rv) return rv; @@ -1418,6 +1440,8 @@ int ipmi_request_supply_msgs(ipmi_user_t user, unsigned char saddr, lun; int rv; + if (! user) + return -EINVAL; rv = check_addr(user->intf, addr, &saddr, &lun); if (rv) return rv; @@ -1638,7 +1662,7 @@ send_channel_info_cmd(ipmi_smi_t intf, int chan) (struct ipmi_addr *) &si, 0, &msg, - NULL, + intf, NULL, NULL, 0, @@ -1648,19 +1672,20 @@ send_channel_info_cmd(ipmi_smi_t intf, int chan) } static void -channel_handler(ipmi_smi_t intf, struct ipmi_smi_msg *msg) +channel_handler(ipmi_smi_t intf, struct ipmi_recv_msg *msg) { int rv = 0; int chan; - if ((msg->rsp[0] == (IPMI_NETFN_APP_RESPONSE << 2)) - && (msg->rsp[1] == IPMI_GET_CHANNEL_INFO_CMD)) + if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) + && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) + && (msg->msg.cmd == IPMI_GET_CHANNEL_INFO_CMD)) { /* It's the one we want */ - if (msg->rsp[2] != 0) { + if (msg->msg.data[0] != 0) { /* Got an error from the channel, just go on. */ - if (msg->rsp[2] == IPMI_INVALID_COMMAND_ERR) { + if (msg->msg.data[0] == IPMI_INVALID_COMMAND_ERR) { /* If the MC does not support this command, that is legal. We just assume it has one IPMB at channel @@ -1677,13 +1702,13 @@ channel_handler(ipmi_smi_t intf, struct ipmi_smi_msg *msg) } goto next_channel; } - if (msg->rsp_size < 6) { + if (msg->msg.data_len < 4) { /* Message not big enough, just go on. */ goto next_channel; } chan = intf->curr_channel; - intf->channels[chan].medium = msg->rsp[4] & 0x7f; - intf->channels[chan].protocol = msg->rsp[5] & 0x1f; + intf->channels[chan].medium = msg->msg.data[2] & 0x7f; + intf->channels[chan].protocol = msg->msg.data[3] & 0x1f; next_channel: intf->curr_channel++; @@ -2382,6 +2407,14 @@ static int handle_bmc_rsp(ipmi_smi_t intf, unsigned long flags; recv_msg = (struct ipmi_recv_msg *) msg->user_data; + if (recv_msg == NULL) + { + printk(KERN_WARNING"IPMI message received with no owner. This\n" + "could be because of a malformed message, or\n" + "because of a hardware error. Contact your\n" + "hardware vender for assistance\n"); + return 0; + } /* Make sure the user still exists. */ list_for_each_entry(user, &(intf->users), link) { @@ -2392,19 +2425,11 @@ static int handle_bmc_rsp(ipmi_smi_t intf, } } - if (!found) { - /* Special handling for NULL users. */ - if (!recv_msg->user && intf->null_user_handler){ - intf->null_user_handler(intf, msg); - spin_lock_irqsave(&intf->counter_lock, flags); - intf->handled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); - }else{ - /* The user for the message went away, so give up. */ - spin_lock_irqsave(&intf->counter_lock, flags); - intf->unhandled_local_responses++; - spin_unlock_irqrestore(&intf->counter_lock, flags); - } + if ((! found) && recv_msg->user) { + /* The user for the message went away, so give up. */ + spin_lock_irqsave(&intf->counter_lock, flags); + intf->unhandled_local_responses++; + spin_unlock_irqrestore(&intf->counter_lock, flags); ipmi_free_recv_msg(recv_msg); } else { struct ipmi_system_interface_addr *smi_addr; @@ -2890,28 +2915,30 @@ static void dummy_recv_done_handler(struct ipmi_recv_msg *msg) } #ifdef CONFIG_IPMI_PANIC_STRING -static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_smi_msg *msg) +static void event_receiver_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) { - if ((msg->rsp[0] == (IPMI_NETFN_SENSOR_EVENT_RESPONSE << 2)) - && (msg->rsp[1] == IPMI_GET_EVENT_RECEIVER_CMD) - && (msg->rsp[2] == IPMI_CC_NO_ERROR)) + if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) + && (msg->msg.netfn == IPMI_NETFN_SENSOR_EVENT_RESPONSE) + && (msg->msg.cmd == IPMI_GET_EVENT_RECEIVER_CMD) + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { /* A get event receiver command, save it. */ - intf->event_receiver = msg->rsp[3]; - intf->event_receiver_lun = msg->rsp[4] & 0x3; + intf->event_receiver = msg->msg.data[1]; + intf->event_receiver_lun = msg->msg.data[2] & 0x3; } } -static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_smi_msg *msg) +static void device_id_fetcher(ipmi_smi_t intf, struct ipmi_recv_msg *msg) { - if ((msg->rsp[0] == (IPMI_NETFN_APP_RESPONSE << 2)) - && (msg->rsp[1] == IPMI_GET_DEVICE_ID_CMD) - && (msg->rsp[2] == IPMI_CC_NO_ERROR)) + if ((msg->addr.addr_type == IPMI_SYSTEM_INTERFACE_ADDR_TYPE) + && (msg->msg.netfn == IPMI_NETFN_APP_RESPONSE) + && (msg->msg.cmd == IPMI_GET_DEVICE_ID_CMD) + && (msg->msg.data[0] == IPMI_CC_NO_ERROR)) { /* A get device id command, save if we are an event receiver or generator. */ - intf->local_sel_device = (msg->rsp[8] >> 2) & 1; - intf->local_event_generator = (msg->rsp[8] >> 5) & 1; + intf->local_sel_device = (msg->msg.data[6] >> 2) & 1; + intf->local_event_generator = (msg->msg.data[6] >> 5) & 1; } } #endif @@ -2967,7 +2994,7 @@ static void send_panic_events(char *str) &addr, 0, &msg, - NULL, + intf, &smi_msg, &recv_msg, 0, @@ -3013,7 +3040,7 @@ static void send_panic_events(char *str) &addr, 0, &msg, - NULL, + intf, &smi_msg, &recv_msg, 0, @@ -3033,7 +3060,7 @@ static void send_panic_events(char *str) &addr, 0, &msg, - NULL, + intf, &smi_msg, &recv_msg, 0, @@ -3095,7 +3122,7 @@ static void send_panic_events(char *str) &addr, 0, &msg, - NULL, + intf, &smi_msg, &recv_msg, 0, diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index dd30adedd07d..938d55b813a5 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -242,7 +242,8 @@ struct ipmi_recv_msg /* The user_msg_data is the data supplied when a message was sent, if this is a response to a sent message. If this is not a response to a sent message, then user_msg_data will - be NULL. */ + be NULL. If the user above is NULL, then this will be the + intf. */ void *user_msg_data; /* Call this when done with the message. It will presumably free -- cgit v1.2.3 From 8c702e16207c70119d03df924de35f8c3629a5c4 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Tue, 6 Sep 2005 15:18:46 -0700 Subject: [PATCH] ipmi poweroff: fix chassis control The IPMI power control function proc_write_chassctrl was badly written, it directly used userspace pointers, it assumed that strings were NULL terminated, and it used the evil sscanf function. This converts over to using the sysctl interface for this data and changes the semantics to be a little more logical. Signed-off-by: Corey Minyard Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/IPMI.txt | 13 ++-- drivers/char/ipmi/ipmi_poweroff.c | 132 +++++++++++++++++--------------------- include/linux/sysctl.h | 6 ++ 3 files changed, 71 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/Documentation/IPMI.txt b/Documentation/IPMI.txt index 84d3d4d10c17..bf1cf98d2a27 100644 --- a/Documentation/IPMI.txt +++ b/Documentation/IPMI.txt @@ -605,12 +605,13 @@ is in the ipmi_poweroff module. When the system requests a powerdown, it will send the proper IPMI commands to do this. This is supported on several platforms. -There is a module parameter named "poweroff_control" that may either be zero -(do a power down) or 2 (do a power cycle, power the system off, then power -it on in a few seconds). Setting ipmi_poweroff.poweroff_control=x will do -the same thing on the kernel command line. The parameter is also available -via the proc filesystem in /proc/ipmi/poweroff_control. Note that if the -system does not support power cycling, it will always to the power off. +There is a module parameter named "poweroff_powercycle" that may +either be zero (do a power down) or non-zero (do a power cycle, power +the system off, then power it on in a few seconds). Setting +ipmi_poweroff.poweroff_control=x will do the same thing on the kernel +command line. The parameter is also available via the proc filesystem +in /proc/sys/dev/ipmi/poweroff_powercycle. Note that if the system +does not support power cycling, it will always do the power off. Note that if you have ACPI enabled, the system will prefer using ACPI to power off. diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c index 42ea9843b394..e82a96ba396b 100644 --- a/drivers/char/ipmi/ipmi_poweroff.c +++ b/drivers/char/ipmi/ipmi_poweroff.c @@ -52,11 +52,11 @@ extern void (*pm_power_off)(void); #define IPMI_CHASSIS_POWER_CYCLE 0x02 /* power cycle */ /* the IPMI data command */ -static int poweroff_control = IPMI_CHASSIS_POWER_DOWN; +static int poweroff_powercycle; /* parameter definition to allow user to flag power cycle */ -module_param(poweroff_control, int, IPMI_CHASSIS_POWER_DOWN); -MODULE_PARM_DESC(poweroff_control, " Set to 2 to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down."); +module_param(poweroff_powercycle, int, 0); +MODULE_PARM_DESC(poweroff_powercycles, " Set to non-zero to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down."); /* Stuff from the get device id command. */ static unsigned int mfg_id; @@ -385,37 +385,34 @@ static void ipmi_poweroff_chassis (ipmi_user_t user) powercyclefailed: printk(KERN_INFO PFX "Powering %s via IPMI chassis control command\n", - ((poweroff_control != IPMI_CHASSIS_POWER_CYCLE) ? "down" : "cycle")); + (poweroff_powercycle ? "cycle" : "down")); /* * Power down */ send_msg.netfn = IPMI_NETFN_CHASSIS_REQUEST; send_msg.cmd = IPMI_CHASSIS_CONTROL_CMD; - data[0] = poweroff_control; + if (poweroff_powercycle) + data[0] = IPMI_CHASSIS_POWER_CYCLE; + else + data[0] = IPMI_CHASSIS_POWER_DOWN; send_msg.data = data; send_msg.data_len = sizeof(data); rv = ipmi_request_in_rc_mode(user, (struct ipmi_addr *) &smi_addr, &send_msg); if (rv) { - switch (poweroff_control) { - case IPMI_CHASSIS_POWER_CYCLE: - /* power cycle failed, default to power down */ - printk(KERN_ERR PFX "Unable to send chassis power " \ - "cycle message, IPMI error 0x%x\n", rv); - poweroff_control = IPMI_CHASSIS_POWER_DOWN; - goto powercyclefailed; - - case IPMI_CHASSIS_POWER_DOWN: - default: - printk(KERN_ERR PFX "Unable to send chassis power " \ - "down message, IPMI error 0x%x\n", rv); - break; + if (poweroff_powercycle) { + /* power cycle failed, default to power down */ + printk(KERN_ERR PFX "Unable to send chassis power " \ + "cycle message, IPMI error 0x%x\n", rv); + poweroff_powercycle = 0; + goto powercyclefailed; } - } - return; + printk(KERN_ERR PFX "Unable to send chassis power " \ + "down message, IPMI error 0x%x\n", rv); + } } @@ -561,39 +558,35 @@ static struct ipmi_smi_watcher smi_watcher = #ifdef CONFIG_PROC_FS -/* displays properties to proc */ -static int proc_read_chassctrl(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - return sprintf(page, "%d\t[ 0=powerdown 2=powercycle ]\n", - poweroff_control); -} +#include + +static ctl_table ipmi_table[] = { + { .ctl_name = DEV_IPMI_POWEROFF_POWERCYCLE, + .procname = "poweroff_powercycle", + .data = &poweroff_powercycle, + .maxlen = sizeof(poweroff_powercycle), + .mode = 0644, + .proc_handler = &proc_dointvec }, + { } +}; -/* process property writes from proc */ -static int proc_write_chassctrl(struct file *file, const char *buffer, - unsigned long count, void *data) -{ - int rv = count; - unsigned int newval = 0; - - sscanf(buffer, "%d", &newval); - switch (newval) { - case IPMI_CHASSIS_POWER_CYCLE: - printk(KERN_INFO PFX "power cycle is now enabled\n"); - poweroff_control = newval; - break; - - case IPMI_CHASSIS_POWER_DOWN: - poweroff_control = IPMI_CHASSIS_POWER_DOWN; - break; - - default: - rv = -EINVAL; - break; - } +static ctl_table ipmi_dir_table[] = { + { .ctl_name = DEV_IPMI, + .procname = "ipmi", + .mode = 0555, + .child = ipmi_table }, + { } +}; - return rv; -} +static ctl_table ipmi_root_table[] = { + { .ctl_name = CTL_DEV, + .procname = "dev", + .mode = 0555, + .child = ipmi_dir_table }, + { } +}; + +static struct ctl_table_header *ipmi_table_header; #endif /* CONFIG_PROC_FS */ /* @@ -601,41 +594,32 @@ static int proc_write_chassctrl(struct file *file, const char *buffer, */ static int ipmi_poweroff_init (void) { - int rv; - struct proc_dir_entry *file; + int rv; printk ("Copyright (C) 2004 MontaVista Software -" " IPMI Powerdown via sys_reboot.\n"); - switch (poweroff_control) { - case IPMI_CHASSIS_POWER_CYCLE: - printk(KERN_INFO PFX "Power cycle is enabled.\n"); - break; + if (poweroff_powercycle) + printk(KERN_INFO PFX "Power cycle is enabled.\n"); - case IPMI_CHASSIS_POWER_DOWN: - default: - poweroff_control = IPMI_CHASSIS_POWER_DOWN; - break; +#ifdef CONFIG_PROC_FS + ipmi_table_header = register_sysctl_table(ipmi_root_table, 1); + if (!ipmi_table_header) { + printk(KERN_ERR PFX "Unable to register powercycle sysctl\n"); + rv = -ENOMEM; + goto out_err; } +#endif +#ifdef CONFIG_PROC_FS rv = ipmi_smi_watcher_register(&smi_watcher); +#endif if (rv) { + unregister_sysctl_table(ipmi_table_header); printk(KERN_ERR PFX "Unable to register SMI watcher: %d\n", rv); goto out_err; } -#ifdef CONFIG_PROC_FS - file = create_proc_entry("poweroff_control", 0, proc_ipmi_root); - if (!file) { - printk(KERN_ERR PFX "Unable to create proc power control\n"); - } else { - file->nlink = 1; - file->read_proc = proc_read_chassctrl; - file->write_proc = proc_write_chassctrl; - file->owner = THIS_MODULE; - } -#endif - out_err: return rv; } @@ -646,7 +630,7 @@ static __exit void ipmi_poweroff_cleanup(void) int rv; #ifdef CONFIG_PROC_FS - remove_proc_entry("poweroff_control", proc_ipmi_root); + unregister_sysctl_table(ipmi_table_header); #endif ipmi_smi_watcher_unregister(&smi_watcher); diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index e82be96d4906..532a6c5c24e9 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -711,6 +711,7 @@ enum { DEV_RAID=4, DEV_MAC_HID=5, DEV_SCSI=6, + DEV_IPMI=7, }; /* /proc/sys/dev/cdrom */ @@ -776,6 +777,11 @@ enum { DEV_SCSI_LOGGING_LEVEL=1, }; +/* /proc/sys/dev/ipmi */ +enum { + DEV_IPMI_POWEROFF_POWERCYCLE=1, +}; + /* /proc/sys/abi */ enum { -- cgit v1.2.3 From 335eadf2ef6a1122a720aea98e758e5d431da87d Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:50 -0700 Subject: [PATCH] sd: initialize SD cards Support for the Secure Digital protocol in the MMC layer. A summary of the legal issues surrounding SD cards, as understood by yours truly: Members of the Secure Digital Association, hereafter SDA, are required to sign a NDA[1] before given access to any specifications. It has been speculated that including an SD implementation would forbid these members to redistribute Linux. This is the basic problem with SD support so it is unclear if it even is a problem since it has no effect on those of us that aren't members. The SDA doesn't seem to enforce these rules though since the patches included here are based on documentation made public by some of the members. The most complete specs[2] are actually released by Sandisk, one of the founding companies of the SDA. Because of this the NDA is considered a non-issue by most involved in the discussions concerning these patches. It might be that the SDA is only interested in protecting the so called "secure" bits of SD, which so far hasn't been found in any public spec. (The card is split into two sections, one "normal" and one "secure" which has an access scheme similar to TPM:s). (As a side note, Microsoft is working to make things easier for us since they want to be able to include the source code for a SD driver in one of their development kits. HP is making sure that the new NDA will allow a Linux implementation. So far only the SDIO specs have been opened up[3]. More will hopefully follow.) [1] http://www.sdcard.org/membership/images/ippolicy.pdf [2] http://www.sandisk.com/pdf/oem/ProdManualSDCardv1.9.pdf [3] http://www.sdcard.org/sdio/Simplified%20SDIO%20Card%20Specification.pdf This patch contains the central parts of the SD support. If no MMC cards are found on a bus then the MMC layer proceeds looking for SD cards. Helper functions are extended to handle the special needs of SD cards. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/mmc.c | 326 ++++++++++++++++++++++++++++++++++++----------- include/linux/mmc/card.h | 3 + include/linux/mmc/host.h | 4 + include/linux/mmc/mmc.h | 2 + 4 files changed, 262 insertions(+), 73 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 0a8165974ba7..294961a102ca 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -172,7 +172,79 @@ int mmc_wait_for_cmd(struct mmc_host *host, struct mmc_command *cmd, int retries EXPORT_SYMBOL(mmc_wait_for_cmd); +/** + * mmc_wait_for_app_cmd - start an application command and wait for + completion + * @host: MMC host to start command + * @rca: RCA to send MMC_APP_CMD to + * @cmd: MMC command to start + * @retries: maximum number of retries + * + * Sends a MMC_APP_CMD, checks the card response, sends the command + * in the parameter and waits for it to complete. Return any error + * that occurred while the command was executing. Do not attempt to + * parse the response. + */ +int mmc_wait_for_app_cmd(struct mmc_host *host, unsigned int rca, + struct mmc_command *cmd, int retries) +{ + struct mmc_request mrq; + struct mmc_command appcmd; + + int i, err; + + BUG_ON(host->card_busy == NULL); + BUG_ON(retries < 0); + + err = MMC_ERR_INVALID; + + /* + * We have to resend MMC_APP_CMD for each attempt so + * we cannot use the retries field in mmc_command. + */ + for (i = 0;i <= retries;i++) { + memset(&mrq, 0, sizeof(struct mmc_request)); + + appcmd.opcode = MMC_APP_CMD; + appcmd.arg = rca << 16; + appcmd.flags = MMC_RSP_R1; + appcmd.retries = 0; + memset(appcmd.resp, 0, sizeof(appcmd.resp)); + appcmd.data = NULL; + + mrq.cmd = &appcmd; + appcmd.data = NULL; + + mmc_wait_for_req(host, &mrq); + + if (appcmd.error) { + err = appcmd.error; + continue; + } + + /* Check that card supported application commands */ + if (!(appcmd.resp[0] & R1_APP_CMD)) + return MMC_ERR_FAILED; + + memset(&mrq, 0, sizeof(struct mmc_request)); + + memset(cmd->resp, 0, sizeof(cmd->resp)); + cmd->retries = 0; + + mrq.cmd = cmd; + cmd->data = NULL; + + mmc_wait_for_req(host, &mrq); + err = cmd->error; + if (cmd->error == MMC_ERR_NONE) + break; + } + + return err; +} + +EXPORT_SYMBOL(mmc_wait_for_app_cmd); /** * __mmc_claim_host - exclusively claim a host @@ -322,48 +394,70 @@ static void mmc_decode_cid(struct mmc_card *card) memset(&card->cid, 0, sizeof(struct mmc_cid)); - /* - * The selection of the format here is guesswork based upon - * information people have sent to date. - */ - switch (card->csd.mmca_vsn) { - case 0: /* MMC v1.? */ - case 1: /* MMC v1.4 */ - card->cid.manfid = UNSTUFF_BITS(resp, 104, 24); - card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); - card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); - card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); - card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); - card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); - card->cid.prod_name[5] = UNSTUFF_BITS(resp, 56, 8); - card->cid.prod_name[6] = UNSTUFF_BITS(resp, 48, 8); - card->cid.hwrev = UNSTUFF_BITS(resp, 44, 4); - card->cid.fwrev = UNSTUFF_BITS(resp, 40, 4); - card->cid.serial = UNSTUFF_BITS(resp, 16, 24); - card->cid.month = UNSTUFF_BITS(resp, 12, 4); - card->cid.year = UNSTUFF_BITS(resp, 8, 4) + 1997; - break; - - case 2: /* MMC v2.x ? */ - case 3: /* MMC v3.x ? */ - card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); - card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); - card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); - card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); - card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); - card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); - card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); - card->cid.prod_name[5] = UNSTUFF_BITS(resp, 56, 8); - card->cid.serial = UNSTUFF_BITS(resp, 16, 32); - card->cid.month = UNSTUFF_BITS(resp, 12, 4); - card->cid.year = UNSTUFF_BITS(resp, 8, 4) + 1997; - break; - - default: - printk("%s: card has unknown MMCA version %d\n", - mmc_hostname(card->host), card->csd.mmca_vsn); - mmc_card_set_bad(card); - break; + if (mmc_card_sd(card)) { + /* + * SD doesn't currently have a version field so we will + * have to assume we can parse this. + */ + card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); + card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); + card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); + card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); + card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); + card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); + card->cid.hwrev = UNSTUFF_BITS(resp, 60, 4); + card->cid.fwrev = UNSTUFF_BITS(resp, 56, 4); + card->cid.serial = UNSTUFF_BITS(resp, 24, 32); + card->cid.year = UNSTUFF_BITS(resp, 12, 8); + card->cid.month = UNSTUFF_BITS(resp, 8, 4); + + card->cid.year += 2000; /* SD cards year offset */ + } + else { + /* + * The selection of the format here is based upon published + * specs from sandisk and from what people have reported. + */ + switch (card->csd.mmca_vsn) { + case 0: /* MMC v1.0 - v1.2 */ + case 1: /* MMC v1.4 */ + card->cid.manfid = UNSTUFF_BITS(resp, 104, 24); + card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); + card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); + card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); + card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); + card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); + card->cid.prod_name[5] = UNSTUFF_BITS(resp, 56, 8); + card->cid.prod_name[6] = UNSTUFF_BITS(resp, 48, 8); + card->cid.hwrev = UNSTUFF_BITS(resp, 44, 4); + card->cid.fwrev = UNSTUFF_BITS(resp, 40, 4); + card->cid.serial = UNSTUFF_BITS(resp, 16, 24); + card->cid.month = UNSTUFF_BITS(resp, 12, 4); + card->cid.year = UNSTUFF_BITS(resp, 8, 4) + 1997; + break; + + case 2: /* MMC v2.0 - v2.2 */ + case 3: /* MMC v3.1 - v3.3 */ + card->cid.manfid = UNSTUFF_BITS(resp, 120, 8); + card->cid.oemid = UNSTUFF_BITS(resp, 104, 16); + card->cid.prod_name[0] = UNSTUFF_BITS(resp, 96, 8); + card->cid.prod_name[1] = UNSTUFF_BITS(resp, 88, 8); + card->cid.prod_name[2] = UNSTUFF_BITS(resp, 80, 8); + card->cid.prod_name[3] = UNSTUFF_BITS(resp, 72, 8); + card->cid.prod_name[4] = UNSTUFF_BITS(resp, 64, 8); + card->cid.prod_name[5] = UNSTUFF_BITS(resp, 56, 8); + card->cid.serial = UNSTUFF_BITS(resp, 16, 32); + card->cid.month = UNSTUFF_BITS(resp, 12, 4); + card->cid.year = UNSTUFF_BITS(resp, 8, 4) + 1997; + break; + + default: + printk("%s: card has unknown MMCA version %d\n", + mmc_hostname(card->host), card->csd.mmca_vsn); + mmc_card_set_bad(card); + break; + } } } @@ -376,34 +470,61 @@ static void mmc_decode_csd(struct mmc_card *card) unsigned int e, m, csd_struct; u32 *resp = card->raw_csd; - /* - * We only understand CSD structure v1.1 and v2. - * v2 has extra information in bits 15, 11 and 10. - */ - csd_struct = UNSTUFF_BITS(resp, 126, 2); - if (csd_struct != 1 && csd_struct != 2) { - printk("%s: unrecognised CSD structure version %d\n", - mmc_hostname(card->host), csd_struct); - mmc_card_set_bad(card); - return; + if (mmc_card_sd(card)) { + csd_struct = UNSTUFF_BITS(resp, 126, 2); + if (csd_struct != 0) { + printk("%s: unrecognised CSD structure version %d\n", + mmc_hostname(card->host), csd_struct); + mmc_card_set_bad(card); + return; + } + + m = UNSTUFF_BITS(resp, 115, 4); + e = UNSTUFF_BITS(resp, 112, 3); + csd->tacc_ns = (tacc_exp[e] * tacc_mant[m] + 9) / 10; + csd->tacc_clks = UNSTUFF_BITS(resp, 104, 8) * 100; + + m = UNSTUFF_BITS(resp, 99, 4); + e = UNSTUFF_BITS(resp, 96, 3); + csd->max_dtr = tran_exp[e] * tran_mant[m]; + csd->cmdclass = UNSTUFF_BITS(resp, 84, 12); + + e = UNSTUFF_BITS(resp, 47, 3); + m = UNSTUFF_BITS(resp, 62, 12); + csd->capacity = (1 + m) << (e + 2); + + csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); } + else { + /* + * We only understand CSD structure v1.1 and v1.2. + * v1.2 has extra information in bits 15, 11 and 10. + */ + csd_struct = UNSTUFF_BITS(resp, 126, 2); + if (csd_struct != 1 && csd_struct != 2) { + printk("%s: unrecognised CSD structure version %d\n", + mmc_hostname(card->host), csd_struct); + mmc_card_set_bad(card); + return; + } - csd->mmca_vsn = UNSTUFF_BITS(resp, 122, 4); - m = UNSTUFF_BITS(resp, 115, 4); - e = UNSTUFF_BITS(resp, 112, 3); - csd->tacc_ns = (tacc_exp[e] * tacc_mant[m] + 9) / 10; - csd->tacc_clks = UNSTUFF_BITS(resp, 104, 8) * 100; + csd->mmca_vsn = UNSTUFF_BITS(resp, 122, 4); + m = UNSTUFF_BITS(resp, 115, 4); + e = UNSTUFF_BITS(resp, 112, 3); + csd->tacc_ns = (tacc_exp[e] * tacc_mant[m] + 9) / 10; + csd->tacc_clks = UNSTUFF_BITS(resp, 104, 8) * 100; - m = UNSTUFF_BITS(resp, 99, 4); - e = UNSTUFF_BITS(resp, 96, 3); - csd->max_dtr = tran_exp[e] * tran_mant[m]; - csd->cmdclass = UNSTUFF_BITS(resp, 84, 12); + m = UNSTUFF_BITS(resp, 99, 4); + e = UNSTUFF_BITS(resp, 96, 3); + csd->max_dtr = tran_exp[e] * tran_mant[m]; + csd->cmdclass = UNSTUFF_BITS(resp, 84, 12); - e = UNSTUFF_BITS(resp, 47, 3); - m = UNSTUFF_BITS(resp, 62, 12); - csd->capacity = (1 + m) << (e + 2); + e = UNSTUFF_BITS(resp, 47, 3); + m = UNSTUFF_BITS(resp, 62, 12); + csd->capacity = (1 + m) << (e + 2); - csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); + csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); + } } /* @@ -536,6 +657,34 @@ static int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr) return err; } +static int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr) +{ + struct mmc_command cmd; + int i, err = 0; + + cmd.opcode = SD_APP_OP_COND; + cmd.arg = ocr; + cmd.flags = MMC_RSP_R3; + + for (i = 100; i; i--) { + err = mmc_wait_for_app_cmd(host, 0, &cmd, CMD_RETRIES); + if (err != MMC_ERR_NONE) + break; + + if (cmd.resp[0] & MMC_CARD_BUSY || ocr == 0) + break; + + err = MMC_ERR_TIMEOUT; + + mmc_delay(10); + } + + if (rocr) + *rocr = cmd.resp[0]; + + return err; +} + /* * Discover cards by requesting their CID. If this command * times out, it is not an error; there are no further cards @@ -579,13 +728,28 @@ static void mmc_discover_cards(struct mmc_host *host) card->state &= ~MMC_STATE_DEAD; - cmd.opcode = MMC_SET_RELATIVE_ADDR; - cmd.arg = card->rca << 16; - cmd.flags = MMC_RSP_R1; + if (host->mode == MMC_MODE_SD) { + mmc_card_set_sd(card); - err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); - if (err != MMC_ERR_NONE) - mmc_card_set_dead(card); + cmd.opcode = SD_SEND_RELATIVE_ADDR; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1; + + err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); + if (err != MMC_ERR_NONE) + mmc_card_set_dead(card); + else + card->rca = cmd.resp[0] >> 16; + } + else { + cmd.opcode = MMC_SET_RELATIVE_ADDR; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1; + + err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); + if (err != MMC_ERR_NONE) + mmc_card_set_dead(card); + } } } @@ -669,12 +833,25 @@ static void mmc_setup(struct mmc_host *host) int err; u32 ocr; + host->mode = MMC_MODE_MMC; + mmc_power_up(host); mmc_idle_cards(host); err = mmc_send_op_cond(host, 0, &ocr); + + /* + * If we fail to detect any cards then try + * searching for SD cards. + */ if (err != MMC_ERR_NONE) - return; + { + err = mmc_send_app_op_cond(host, 0, &ocr); + if (err != MMC_ERR_NONE) + return; + + host->mode = MMC_MODE_SD; + } host->ocr = mmc_select_voltage(host, ocr); @@ -714,7 +891,10 @@ static void mmc_setup(struct mmc_host *host) * all get the idea that they should be ready for CMD2. * (My SanDisk card seems to need this.) */ - mmc_send_op_cond(host, host->ocr, NULL); + if (host->mode == MMC_MODE_SD) + mmc_send_app_op_cond(host, host->ocr, NULL); + else + mmc_send_op_cond(host, host->ocr, NULL); mmc_discover_cards(host); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index aefedf04b9bb..538e8c86336c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -47,6 +47,7 @@ struct mmc_card { #define MMC_STATE_PRESENT (1<<0) /* present in sysfs */ #define MMC_STATE_DEAD (1<<1) /* device no longer in stack */ #define MMC_STATE_BAD (1<<2) /* unrecognised device */ +#define MMC_STATE_SDCARD (1<<3) /* is an SD card */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ struct mmc_cid cid; /* card identification */ @@ -56,10 +57,12 @@ struct mmc_card { #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) #define mmc_card_dead(c) ((c)->state & MMC_STATE_DEAD) #define mmc_card_bad(c) ((c)->state & MMC_STATE_BAD) +#define mmc_card_sd(c) ((c)->state & MMC_STATE_SDCARD) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_dead(c) ((c)->state |= MMC_STATE_DEAD) #define mmc_card_set_bad(c) ((c)->state |= MMC_STATE_BAD) +#define mmc_card_set_sd(c) ((c)->state |= MMC_STATE_SDCARD) #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) ((c)->dev.bus_id) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 30f68c0c8c6e..845020d90c60 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -87,6 +87,10 @@ struct mmc_host { struct mmc_ios ios; /* current io bus settings */ u32 ocr; /* the current OCR setting */ + unsigned int mode; /* current card mode of host */ +#define MMC_MODE_MMC 0 +#define MMC_MODE_SD 1 + struct list_head cards; /* devices attached to this host */ wait_queue_head_t wq; diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h index 0d35d4ffb360..1ab78e8d6c53 100644 --- a/include/linux/mmc/mmc.h +++ b/include/linux/mmc/mmc.h @@ -88,6 +88,8 @@ struct mmc_card; extern int mmc_wait_for_req(struct mmc_host *, struct mmc_request *); extern int mmc_wait_for_cmd(struct mmc_host *, struct mmc_command *, int); +extern int mmc_wait_for_app_cmd(struct mmc_host *, unsigned int, + struct mmc_command *, int); extern int __mmc_claim_host(struct mmc_host *host, struct mmc_card *card); -- cgit v1.2.3 From a00fc09029f02ca833cf90e5d5625f08c4ac4f51 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:52 -0700 Subject: [PATCH] sd: read-only switch Support for the read-only switch on SD cards which must be enforced by the host. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/mmc.c | 39 +++++++++++++++++++++++---------------- drivers/mmc/mmc_block.c | 9 +++++++-- include/linux/mmc/card.h | 3 +++ include/linux/mmc/host.h | 1 + 4 files changed, 34 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 294961a102ca..725c6ad3eb64 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -413,8 +413,7 @@ static void mmc_decode_cid(struct mmc_card *card) card->cid.month = UNSTUFF_BITS(resp, 8, 4); card->cid.year += 2000; /* SD cards year offset */ - } - else { + } else { /* * The selection of the format here is based upon published * specs from sandisk and from what people have reported. @@ -494,8 +493,7 @@ static void mmc_decode_csd(struct mmc_card *card) csd->capacity = (1 + m) << (e + 2); csd->read_blkbits = UNSTUFF_BITS(resp, 80, 4); - } - else { + } else { /* * We only understand CSD structure v1.1 and v1.2. * v1.2 has extra information in bits 15, 11 and 10. @@ -738,10 +736,20 @@ static void mmc_discover_cards(struct mmc_host *host) err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); if (err != MMC_ERR_NONE) mmc_card_set_dead(card); - else + else { card->rca = cmd.resp[0] >> 16; - } - else { + + if (!host->ops->get_ro) { + printk(KERN_WARNING "%s: host does not " + "support reading read-only " + "switch. assuming write-enable.\n", + mmc_hostname(host)); + } else { + if (host->ops->get_ro(host)) + mmc_card_set_readonly(card); + } + } + } else { cmd.opcode = MMC_SET_RELATIVE_ADDR; cmd.arg = card->rca << 16; cmd.flags = MMC_RSP_R1; @@ -833,24 +841,23 @@ static void mmc_setup(struct mmc_host *host) int err; u32 ocr; - host->mode = MMC_MODE_MMC; + host->mode = MMC_MODE_SD; mmc_power_up(host); mmc_idle_cards(host); - err = mmc_send_op_cond(host, 0, &ocr); + err = mmc_send_app_op_cond(host, 0, &ocr); /* - * If we fail to detect any cards then try - * searching for SD cards. + * If we fail to detect any SD cards then try + * searching for MMC cards. */ - if (err != MMC_ERR_NONE) - { - err = mmc_send_app_op_cond(host, 0, &ocr); + if (err != MMC_ERR_NONE) { + host->mode = MMC_MODE_MMC; + + err = mmc_send_op_cond(host, 0, &ocr); if (err != MMC_ERR_NONE) return; - - host->mode = MMC_MODE_SD; } host->ocr = mmc_select_voltage(host, ocr); diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index d4eee99c2bf6..fa83f15fdf16 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -95,6 +95,10 @@ static int mmc_blk_open(struct inode *inode, struct file *filp) if (md->usage == 2) check_disk_change(inode->i_bdev); ret = 0; + + if ((filp->f_mode & FMODE_WRITE) && + mmc_card_readonly(md->queue.card)) + ret = -EROFS; } return ret; @@ -403,9 +407,10 @@ static int mmc_blk_probe(struct mmc_card *card) if (err) goto out; - printk(KERN_INFO "%s: %s %s %dKiB\n", + printk(KERN_INFO "%s: %s %s %dKiB %s\n", md->disk->disk_name, mmc_card_id(card), mmc_card_name(card), - (card->csd.capacity << card->csd.read_blkbits) / 1024); + (card->csd.capacity << card->csd.read_blkbits) / 1024, + mmc_card_readonly(card)?"(ro)":""); mmc_set_drvdata(card, md); add_disk(md->disk); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 538e8c86336c..0e9ec01b9c5b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -48,6 +48,7 @@ struct mmc_card { #define MMC_STATE_DEAD (1<<1) /* device no longer in stack */ #define MMC_STATE_BAD (1<<2) /* unrecognised device */ #define MMC_STATE_SDCARD (1<<3) /* is an SD card */ +#define MMC_STATE_READONLY (1<<4) /* card is read-only */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ struct mmc_cid cid; /* card identification */ @@ -58,11 +59,13 @@ struct mmc_card { #define mmc_card_dead(c) ((c)->state & MMC_STATE_DEAD) #define mmc_card_bad(c) ((c)->state & MMC_STATE_BAD) #define mmc_card_sd(c) ((c)->state & MMC_STATE_SDCARD) +#define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY) #define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT) #define mmc_card_set_dead(c) ((c)->state |= MMC_STATE_DEAD) #define mmc_card_set_bad(c) ((c)->state |= MMC_STATE_BAD) #define mmc_card_set_sd(c) ((c)->state |= MMC_STATE_SDCARD) +#define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY) #define mmc_card_name(c) ((c)->cid.prod_name) #define mmc_card_id(c) ((c)->dev.bus_id) diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 845020d90c60..8c5f71376e41 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -62,6 +62,7 @@ struct mmc_ios { struct mmc_host_ops { void (*request)(struct mmc_host *host, struct mmc_request *req); void (*set_ios)(struct mmc_host *host, struct mmc_ios *ios); + int (*get_ro)(struct mmc_host *host); }; struct mmc_card; -- cgit v1.2.3 From b57c43ad81602589afca3948a5a7121e40026e17 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:53 -0700 Subject: [PATCH] sd: SCR register Read the SD specific SCR register from the card. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/mmc.c | 143 +++++++++++++++++++++++++++++++++++++++++++---- include/linux/mmc/card.h | 9 +++ 2 files changed, 142 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 725c6ad3eb64..21d4fb3314f8 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -16,6 +16,8 @@ #include #include #include +#include +#include #include #include @@ -246,6 +248,8 @@ int mmc_wait_for_app_cmd(struct mmc_host *host, unsigned int rca, EXPORT_SYMBOL(mmc_wait_for_app_cmd); +static int mmc_select_card(struct mmc_host *host, struct mmc_card *card); + /** * __mmc_claim_host - exclusively claim a host * @host: mmc host to claim @@ -278,16 +282,10 @@ int __mmc_claim_host(struct mmc_host *host, struct mmc_card *card) spin_unlock_irqrestore(&host->lock, flags); remove_wait_queue(&host->wq, &wait); - if (card != (void *)-1 && host->card_selected != card) { - struct mmc_command cmd; - - host->card_selected = card; - - cmd.opcode = MMC_SELECT_CARD; - cmd.arg = card->rca << 16; - cmd.flags = MMC_RSP_R1; - - err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); + if (card != (void *)-1) { + err = mmc_select_card(host, card); + if (err != MMC_ERR_NONE) + return err; } return err; @@ -317,6 +315,29 @@ void mmc_release_host(struct mmc_host *host) EXPORT_SYMBOL(mmc_release_host); +static int mmc_select_card(struct mmc_host *host, struct mmc_card *card) +{ + int err; + struct mmc_command cmd; + + BUG_ON(host->card_busy == NULL); + + if (host->card_selected == card) + return MMC_ERR_NONE; + + host->card_selected = card; + + cmd.opcode = MMC_SELECT_CARD; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1; + + err = mmc_wait_for_cmd(host, &cmd, CMD_RETRIES); + if (err != MMC_ERR_NONE) + return err; + + return MMC_ERR_NONE; +} + /* * Ensure that no card is selected. */ @@ -525,6 +546,32 @@ static void mmc_decode_csd(struct mmc_card *card) } } +/* + * Given a 64-bit response, decode to our card SCR structure. + */ +static void mmc_decode_scr(struct mmc_card *card) +{ + struct sd_scr *scr = &card->scr; + unsigned int scr_struct; + u32 resp[4]; + + BUG_ON(!mmc_card_sd(card)); + + resp[3] = card->raw_scr[1]; + resp[2] = card->raw_scr[0]; + + scr_struct = UNSTUFF_BITS(resp, 60, 4); + if (scr_struct != 0) { + printk("%s: unrecognised SCR structure version %d\n", + mmc_hostname(card->host), scr_struct); + mmc_card_set_bad(card); + return; + } + + scr->sda_vsn = UNSTUFF_BITS(resp, 56, 4); + scr->bus_widths = UNSTUFF_BITS(resp, 48, 4); +} + /* * Locate a MMC card on this MMC host given a raw CID. */ @@ -789,6 +836,79 @@ static void mmc_read_csds(struct mmc_host *host) } } +static void mmc_read_scrs(struct mmc_host *host) +{ + int err; + struct mmc_card *card; + + struct mmc_request mrq; + struct mmc_command cmd; + struct mmc_data data; + + struct scatterlist sg; + + list_for_each_entry(card, &host->cards, node) { + if (card->state & (MMC_STATE_DEAD|MMC_STATE_PRESENT)) + continue; + if (!mmc_card_sd(card)) + continue; + + err = mmc_select_card(host, card); + if (err != MMC_ERR_NONE) { + mmc_card_set_dead(card); + continue; + } + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = MMC_APP_CMD; + cmd.arg = card->rca << 16; + cmd.flags = MMC_RSP_R1; + + err = mmc_wait_for_cmd(host, &cmd, 0); + if ((err != MMC_ERR_NONE) || !(cmd.resp[0] & R1_APP_CMD)) { + mmc_card_set_dead(card); + continue; + } + + memset(&cmd, 0, sizeof(struct mmc_command)); + + cmd.opcode = SD_APP_SEND_SCR; + cmd.arg = 0; + cmd.flags = MMC_RSP_R1; + + memset(&data, 0, sizeof(struct mmc_data)); + + data.timeout_ns = card->csd.tacc_ns * 10; + data.timeout_clks = card->csd.tacc_clks * 10; + data.blksz_bits = 3; + data.blocks = 1; + data.flags = MMC_DATA_READ; + data.sg = &sg; + data.sg_len = 1; + + memset(&mrq, 0, sizeof(struct mmc_request)); + + mrq.cmd = &cmd; + mrq.data = &data; + + sg_init_one(&sg, (u8*)card->raw_scr, 8); + + err = mmc_wait_for_req(host, &mrq); + if (err != MMC_ERR_NONE) { + mmc_card_set_dead(card); + continue; + } + + card->raw_scr[0] = ntohl(card->raw_scr[0]); + card->raw_scr[1] = ntohl(card->raw_scr[1]); + + mmc_decode_scr(card); + } + + mmc_deselect_cards(host); +} + static unsigned int mmc_calculate_clock(struct mmc_host *host) { struct mmc_card *card; @@ -912,6 +1032,9 @@ static void mmc_setup(struct mmc_host *host) host->ops->set_ios(host, &host->ios); mmc_read_csds(host); + + if (host->mode == MMC_MODE_SD) + mmc_read_scrs(host); } diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 0e9ec01b9c5b..18fc77f682de 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -33,6 +33,13 @@ struct mmc_csd { unsigned int capacity; }; +struct sd_scr { + unsigned char sda_vsn; + unsigned char bus_widths; +#define SD_SCR_BUS_WIDTH_1 (1<<0) +#define SD_SCR_BUS_WIDTH_4 (1<<2) +}; + struct mmc_host; /* @@ -51,8 +58,10 @@ struct mmc_card { #define MMC_STATE_READONLY (1<<4) /* card is read-only */ u32 raw_cid[4]; /* raw card CID */ u32 raw_csd[4]; /* raw card CSD */ + u32 raw_scr[2]; /* raw card SCR */ struct mmc_cid cid; /* card identification */ struct mmc_csd csd; /* card specific */ + struct sd_scr scr; /* extra SD information */ }; #define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT) -- cgit v1.2.3 From f218278a456b3c272b480443c89004c3d2a49f18 Mon Sep 17 00:00:00 2001 From: Pierre Ossman Date: Tue, 6 Sep 2005 15:18:55 -0700 Subject: [PATCH] sd: SD 4-bit bus Infrastructure for 4-bit bus transfers with SD cards. Signed-off-by: Pierre Ossman Cc: Russell King Cc: David Brownell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/mmc.c | 36 ++++++++++++++++++++++++++++++++++++ include/linux/mmc/host.h | 9 +++++++++ include/linux/mmc/protocol.h | 7 +++++++ 3 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 21d4fb3314f8..6414f071a2a4 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -335,6 +335,40 @@ static int mmc_select_card(struct mmc_host *host, struct mmc_card *card) if (err != MMC_ERR_NONE) return err; + /* + * Default bus width is 1 bit. + */ + host->ios.bus_width = MMC_BUS_WIDTH_1; + + /* + * We can only change the bus width of the selected + * card so therefore we have to put the handling + * here. + */ + if (host->caps & MMC_CAP_4_BIT_DATA) { + /* + * The card is in 1 bit mode by default so + * we only need to change if it supports the + * wider version. + */ + if (mmc_card_sd(card) && + (card->scr.bus_widths & SD_SCR_BUS_WIDTH_4)) { + struct mmc_command cmd; + cmd.opcode = SD_APP_SET_BUS_WIDTH; + cmd.arg = SD_BUS_WIDTH_4; + cmd.flags = MMC_RSP_R1; + + err = mmc_wait_for_app_cmd(host, card->rca, &cmd, + CMD_RETRIES); + if (err != MMC_ERR_NONE) + return err; + + host->ios.bus_width = MMC_BUS_WIDTH_4; + } + } + + host->ops->set_ios(host, &host->ios); + return MMC_ERR_NONE; } @@ -653,6 +687,7 @@ static void mmc_power_up(struct mmc_host *host) host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; host->ios.chip_select = MMC_CS_DONTCARE; host->ios.power_mode = MMC_POWER_UP; + host->ios.bus_width = MMC_BUS_WIDTH_1; host->ops->set_ios(host, &host->ios); mmc_delay(1); @@ -671,6 +706,7 @@ static void mmc_power_off(struct mmc_host *host) host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN; host->ios.chip_select = MMC_CS_DONTCARE; host->ios.power_mode = MMC_POWER_OFF; + host->ios.bus_width = MMC_BUS_WIDTH_1; host->ops->set_ios(host, &host->ios); } diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 8c5f71376e41..6014160d9c06 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -57,6 +57,11 @@ struct mmc_ios { #define MMC_POWER_OFF 0 #define MMC_POWER_UP 1 #define MMC_POWER_ON 2 + + unsigned char bus_width; /* data bus width */ + +#define MMC_BUS_WIDTH_1 0 +#define MMC_BUS_WIDTH_4 2 }; struct mmc_host_ops { @@ -77,6 +82,10 @@ struct mmc_host { unsigned int f_max; u32 ocr_avail; + unsigned long caps; /* Host capabilities */ + +#define MMC_CAP_4_BIT_DATA (1 << 0) /* Can the host do 4 bit transfers */ + /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ unsigned short max_hw_segs; /* see blk_queue_max_hw_segments */ diff --git a/include/linux/mmc/protocol.h b/include/linux/mmc/protocol.h index 896342817b97..f819cae92266 100644 --- a/include/linux/mmc/protocol.h +++ b/include/linux/mmc/protocol.h @@ -236,5 +236,12 @@ struct _mmc_csd { #define CSD_SPEC_VER_2 2 /* Implements system specification 2.0 - 2.2 */ #define CSD_SPEC_VER_3 3 /* Implements system specification 3.1 */ + +/* + * SD bus widths + */ +#define SD_BUS_WIDTH_1 0 +#define SD_BUS_WIDTH_4 2 + #endif /* MMC_MMC_PROTOCOL_H */ -- cgit v1.2.3 From 3158106685acac8f8d4e74a17b974f160fe77c0b Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Tue, 6 Sep 2005 15:19:06 -0700 Subject: [PATCH] Input: Add a new switch event type The corgi keyboard has need of a switch event type with slightly type to the input system as recommended by the input maintainer. Signed-off-by: Richard Purdie Cc: Vojtech Pavlik Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/input/evdev.c | 8 ++++++++ drivers/input/input.c | 11 +++++++++++ drivers/input/keyboard/corgikbd.c | 9 +++++---- include/linux/input.h | 25 +++++++++++++++++++++++++ 4 files changed, 49 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c index f8b278d3559b..19c14c4beb44 100644 --- a/drivers/input/evdev.c +++ b/drivers/input/evdev.c @@ -393,6 +393,7 @@ static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case EV_LED: bits = dev->ledbit; len = LED_MAX; break; case EV_SND: bits = dev->sndbit; len = SND_MAX; break; case EV_FF: bits = dev->ffbit; len = FF_MAX; break; + case EV_SW: bits = dev->swbit; len = SW_MAX; break; default: return -EINVAL; } len = NBITS(len) * sizeof(long); @@ -421,6 +422,13 @@ static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return copy_to_user(p, dev->snd, len) ? -EFAULT : len; } + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0))) { + int len; + len = NBITS(SW_MAX) * sizeof(long); + if (len > _IOC_SIZE(cmd)) len = _IOC_SIZE(cmd); + return copy_to_user(p, dev->sw, len) ? -EFAULT : len; + } + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) { int len; if (!dev->name) return -ENOENT; diff --git a/drivers/input/input.c b/drivers/input/input.c index a275211c8e1e..88636a204525 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -89,6 +89,15 @@ void input_event(struct input_dev *dev, unsigned int type, unsigned int code, in break; + case EV_SW: + + if (code > SW_MAX || !test_bit(code, dev->swbit) || !!test_bit(code, dev->sw) == value) + return; + + change_bit(code, dev->sw); + + break; + case EV_ABS: if (code > ABS_MAX || !test_bit(code, dev->absbit)) @@ -402,6 +411,7 @@ static void input_call_hotplug(char *verb, struct input_dev *dev) SPRINTF_BIT_A2(ledbit, "LED=", LED_MAX, EV_LED); SPRINTF_BIT_A2(sndbit, "SND=", SND_MAX, EV_SND); SPRINTF_BIT_A2(ffbit, "FF=", FF_MAX, EV_FF); + SPRINTF_BIT_A2(swbit, "SW=", SW_MAX, EV_SW); envp[i++] = NULL; @@ -490,6 +500,7 @@ static int input_devices_read(char *buf, char **start, off_t pos, int count, int SPRINTF_BIT_B2(ledbit, "LED=", LED_MAX, EV_LED); SPRINTF_BIT_B2(sndbit, "SND=", SND_MAX, EV_SND); SPRINTF_BIT_B2(ffbit, "FF=", FF_MAX, EV_FF); + SPRINTF_BIT_B2(swbit, "SW=", SW_MAX, EV_SW); len += sprintf(buf + len, "\n"); diff --git a/drivers/input/keyboard/corgikbd.c b/drivers/input/keyboard/corgikbd.c index 767e853dd766..cd4b6e795013 100644 --- a/drivers/input/keyboard/corgikbd.c +++ b/drivers/input/keyboard/corgikbd.c @@ -249,9 +249,8 @@ static void corgikbd_hinge_timer(unsigned long data) if (hinge_count >= HINGE_STABLE_COUNT) { spin_lock_irqsave(&corgikbd_data->lock, flags); - input_report_key(&corgikbd_data->input, corgikbd_data->keycode[125], (sharpsl_hinge_state == 0x00)); - input_report_key(&corgikbd_data->input, corgikbd_data->keycode[126], (sharpsl_hinge_state == 0x08)); - input_report_key(&corgikbd_data->input, corgikbd_data->keycode[127], (sharpsl_hinge_state == 0x0c)); + input_report_switch(&corgikbd_data->input, SW_0, ((sharpsl_hinge_state & CORGI_SCP_SWA) != 0)); + input_report_switch(&corgikbd_data->input, SW_1, ((sharpsl_hinge_state & CORGI_SCP_SWB) != 0)); input_sync(&corgikbd_data->input); spin_unlock_irqrestore(&corgikbd_data->lock, flags); @@ -321,7 +320,7 @@ static int __init corgikbd_probe(struct device *dev) corgikbd->input.id.vendor = 0x0001; corgikbd->input.id.product = 0x0001; corgikbd->input.id.version = 0x0100; - corgikbd->input.evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR); + corgikbd->input.evbit[0] = BIT(EV_KEY) | BIT(EV_REP) | BIT(EV_PWR) | BIT(EV_SW); corgikbd->input.keycode = corgikbd->keycode; corgikbd->input.keycodesize = sizeof(unsigned char); corgikbd->input.keycodemax = ARRAY_SIZE(corgikbd_keycode); @@ -330,6 +329,8 @@ static int __init corgikbd_probe(struct device *dev) for (i = 0; i < ARRAY_SIZE(corgikbd_keycode); i++) set_bit(corgikbd->keycode[i], corgikbd->input.keybit); clear_bit(0, corgikbd->input.keybit); + set_bit(SW_0, corgikbd->input.swbit); + set_bit(SW_1, corgikbd->input.swbit); input_register_device(&corgikbd->input); mod_timer(&corgikbd->htimer, jiffies + HINGE_SCAN_INTERVAL); diff --git a/include/linux/input.h b/include/linux/input.h index bdc53c6cc962..4767e5429534 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -66,6 +66,7 @@ struct input_absinfo { #define EVIOCGKEY(len) _IOC(_IOC_READ, 'E', 0x18, len) /* get global keystate */ #define EVIOCGLED(len) _IOC(_IOC_READ, 'E', 0x19, len) /* get all LEDs */ #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ +#define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ #define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ #define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ @@ -86,6 +87,7 @@ struct input_absinfo { #define EV_REL 0x02 #define EV_ABS 0x03 #define EV_MSC 0x04 +#define EV_SW 0x05 #define EV_LED 0x11 #define EV_SND 0x12 #define EV_REP 0x14 @@ -550,6 +552,20 @@ struct input_absinfo { #define ABS_MISC 0x28 #define ABS_MAX 0x3f +/* + * Switch events + */ + +#define SW_0 0x00 +#define SW_1 0x01 +#define SW_2 0x02 +#define SW_3 0x03 +#define SW_4 0x04 +#define SW_5 0x05 +#define SW_6 0x06 +#define SW_7 0x07 +#define SW_MAX 0x0f + /* * Misc events */ @@ -824,6 +840,7 @@ struct input_dev { unsigned long ledbit[NBITS(LED_MAX)]; unsigned long sndbit[NBITS(SND_MAX)]; unsigned long ffbit[NBITS(FF_MAX)]; + unsigned long swbit[NBITS(SW_MAX)]; int ff_effects_max; unsigned int keycodemax; @@ -844,6 +861,7 @@ struct input_dev { unsigned long key[NBITS(KEY_MAX)]; unsigned long led[NBITS(LED_MAX)]; unsigned long snd[NBITS(SND_MAX)]; + unsigned long sw[NBITS(SW_MAX)]; int absmax[ABS_MAX + 1]; int absmin[ABS_MAX + 1]; @@ -886,6 +904,7 @@ struct input_dev { #define INPUT_DEVICE_ID_MATCH_LEDBIT 0x200 #define INPUT_DEVICE_ID_MATCH_SNDBIT 0x400 #define INPUT_DEVICE_ID_MATCH_FFBIT 0x800 +#define INPUT_DEVICE_ID_MATCH_SWBIT 0x1000 #define INPUT_DEVICE_ID_MATCH_DEVICE\ (INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT) @@ -906,6 +925,7 @@ struct input_device_id { unsigned long ledbit[NBITS(LED_MAX)]; unsigned long sndbit[NBITS(SND_MAX)]; unsigned long ffbit[NBITS(FF_MAX)]; + unsigned long swbit[NBITS(SW_MAX)]; unsigned long driver_info; }; @@ -998,6 +1018,11 @@ static inline void input_report_ff_status(struct input_dev *dev, unsigned int co input_event(dev, EV_FF_STATUS, code, value); } +static inline void input_report_switch(struct input_dev *dev, unsigned int code, int value) +{ + input_event(dev, EV_SW, code, !!value); +} + static inline void input_regs(struct input_dev *dev, struct pt_regs *regs) { dev->regs = regs; -- cgit v1.2.3 From a7662236253374012d364106b6dc9161bd929e2e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 6 Sep 2005 15:19:10 -0700 Subject: [PATCH] Make ll_rw_block() wait for buffer lock Introduce new ll_rw_block() operation SWRITE meaning that block layer should wait for the buffer lock and write-out afterwards. Hence data in buffers at the time of call are guaranteed to be submitted to the disk. Signed-off-by: Jan Kara Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/buffer.c | 30 ++++++++++++++++-------------- include/linux/fs.h | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index a92b81403552..1c62203a4906 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -917,8 +917,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) * contents - it is a noop if I/O is still in * flight on potentially older contents. */ - wait_on_buffer(bh); - ll_rw_block(WRITE, 1, &bh); + ll_rw_block(SWRITE, 1, &bh); brelse(bh); spin_lock(lock); } @@ -2793,21 +2792,22 @@ int submit_bh(int rw, struct buffer_head * bh) /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * - * ll_rw_block() takes an array of pointers to &struct buffer_heads, - * and requests an I/O operation on them, either a %READ or a %WRITE. - * The third %READA option is described in the documentation for - * generic_make_request() which ll_rw_block() calls. + * ll_rw_block() takes an array of pointers to &struct buffer_heads, and + * requests an I/O operation on them, either a %READ or a %WRITE. The third + * %SWRITE is like %WRITE only we make sure that the *current* data in buffers + * are sent to disk. The fourth %READA option is described in the documentation + * for generic_make_request() which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit), any buffer that appears to be clean when doing a - * write request, and any buffer that appears to be up-to-date when doing - * read request. Further it marks as clean buffers that are processed for - * writing (the buffer cache won't assume that they are actually clean until - * the buffer gets unlocked). + * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be + * clean when doing a write request, and any buffer that appears to be + * up-to-date when doing read request. Further it marks as clean buffers that + * are processed for writing (the buffer cache won't assume that they are + * actually clean until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2823,11 +2823,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (test_set_buffer_locked(bh)) + if (rw == SWRITE) + lock_buffer(bh); + else if (test_set_buffer_locked(bh)) continue; get_bh(bh); - if (rw == WRITE) { + if (rw == WRITE || rw == SWRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, bh); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7e1b589842af..fd93ab7da905 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -69,6 +69,7 @@ extern int dir_notify_enable; #define READ 0 #define WRITE 1 #define READA 2 /* read-ahead - don't block if no resources */ +#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ #define SPECIAL 4 /* For non-blockdevice requests in request queue */ #define READ_SYNC (READ | (1 << BIO_RW_SYNC)) #define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -- cgit v1.2.3 From d0aaff9796c3310326d10da44fc0faed352a1d29 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Tue, 6 Sep 2005 15:19:26 -0700 Subject: [PATCH] Kprobes: prevent possible race conditions generic There are possible race conditions if probes are placed on routines within the kprobes files and routines used by the kprobes. For example if you put probe on get_kprobe() routines, the system can hang while inserting probes on any routine such as do_fork(). Because while inserting probes on do_fork(), register_kprobes() routine grabs the kprobes spin lock and executes get_kprobe() routine and to handle probe of get_kprobe(), kprobes_handler() gets executed and tries to grab kprobes spin lock, and spins forever. This patch avoids such possible race conditions by preventing probes on routines within the kprobes file and routines used by kprobes. I have modified the patches as per Andi Kleen's suggestion to move kprobes routines and other routines used by kprobes to a seperate section .kprobes.text. Also moved page fault and exception handlers, general protection fault to .kprobes.text section. These patches have been tested on i386, x86_64 and ppc64 architectures, also compiled on ia64 and sparc64 architectures. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/sections.h | 1 + include/asm-generic/vmlinux.lds.h | 6 ++++ include/linux/kprobes.h | 3 ++ include/linux/linkage.h | 7 ++++ kernel/kprobes.c | 72 +++++++++++++++++++++++---------------- 5 files changed, 60 insertions(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h index 450eae22c39a..886dbd116899 100644 --- a/include/asm-generic/sections.h +++ b/include/asm-generic/sections.h @@ -12,5 +12,6 @@ extern char _sextratext[] __attribute__((weak)); extern char _eextratext[] __attribute__((weak)); extern char _end[]; extern char __per_cpu_start[], __per_cpu_end[]; +extern char __kprobes_text_start[], __kprobes_text_end[]; #endif /* _ASM_GENERIC_SECTIONS_H_ */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 3fa94288aa93..6f857be2b644 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -97,3 +97,9 @@ VMLINUX_SYMBOL(__lock_text_start) = .; \ *(.spinlock.text) \ VMLINUX_SYMBOL(__lock_text_end) = .; + +#define KPROBES_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__kprobes_text_start) = .; \ + *(.kprobes.text) \ + VMLINUX_SYMBOL(__kprobes_text_end) = .; diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index e050fc2d4c26..e30afdca7917 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -42,6 +42,9 @@ #define KPROBE_REENTER 0x00000004 #define KPROBE_HIT_SSDONE 0x00000008 +/* Attach to insert probes on any functions which should be ignored*/ +#define __kprobes __attribute__((__section__(".kprobes.text"))) + struct kprobe; struct pt_regs; struct kretprobe; diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 338f7795d8a0..147eb01e0d4b 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -33,6 +33,13 @@ ALIGN; \ name: +#define KPROBE_ENTRY(name) \ + .section .kprobes.text, "ax"; \ + .globl name; \ + ALIGN; \ + name: + + #endif #define NORET_TYPE /**/ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index b0237122b24e..3b7653f2e7ae 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -72,7 +73,7 @@ static struct hlist_head kprobe_insn_pages; * get_insn_slot() - Find a slot on an executable page for an instruction. * We allocate an executable page if there's no room on existing ones. */ -kprobe_opcode_t *get_insn_slot(void) +kprobe_opcode_t __kprobes *get_insn_slot(void) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -117,7 +118,7 @@ kprobe_opcode_t *get_insn_slot(void) return kip->insns; } -void free_insn_slot(kprobe_opcode_t *slot) +void __kprobes free_insn_slot(kprobe_opcode_t *slot) { struct kprobe_insn_page *kip; struct hlist_node *pos; @@ -152,20 +153,20 @@ void free_insn_slot(kprobe_opcode_t *slot) } /* Locks kprobe: irqs must be disabled */ -void lock_kprobes(void) +void __kprobes lock_kprobes(void) { spin_lock(&kprobe_lock); kprobe_cpu = smp_processor_id(); } -void unlock_kprobes(void) +void __kprobes unlock_kprobes(void) { kprobe_cpu = NR_CPUS; spin_unlock(&kprobe_lock); } /* You have to be holding the kprobe_lock */ -struct kprobe *get_kprobe(void *addr) +struct kprobe __kprobes *get_kprobe(void *addr) { struct hlist_head *head; struct hlist_node *node; @@ -183,7 +184,7 @@ struct kprobe *get_kprobe(void *addr) * Aggregate handlers for multiple kprobes support - these handlers * take care of invoking the individual kprobe handlers on p->list */ -static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) +static int __kprobes aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp; @@ -198,8 +199,8 @@ static int aggr_pre_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, - unsigned long flags) +static void __kprobes aggr_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) { struct kprobe *kp; @@ -213,8 +214,8 @@ static void aggr_post_handler(struct kprobe *p, struct pt_regs *regs, return; } -static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, - int trapnr) +static int __kprobes aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, + int trapnr) { /* * if we faulted "during" the execution of a user specified @@ -227,7 +228,7 @@ static int aggr_fault_handler(struct kprobe *p, struct pt_regs *regs, return 0; } -static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) +static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs) { struct kprobe *kp = curr_kprobe; if (curr_kprobe && kp->break_handler) { @@ -240,7 +241,7 @@ static int aggr_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) +struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp) { struct hlist_node *node; struct kretprobe_instance *ri; @@ -249,7 +250,8 @@ struct kretprobe_instance *get_free_rp_inst(struct kretprobe *rp) return NULL; } -static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) +static struct kretprobe_instance __kprobes *get_used_rp_inst(struct kretprobe + *rp) { struct hlist_node *node; struct kretprobe_instance *ri; @@ -258,7 +260,7 @@ static struct kretprobe_instance *get_used_rp_inst(struct kretprobe *rp) return NULL; } -void add_rp_inst(struct kretprobe_instance *ri) +void __kprobes add_rp_inst(struct kretprobe_instance *ri) { /* * Remove rp inst off the free list - @@ -276,7 +278,7 @@ void add_rp_inst(struct kretprobe_instance *ri) hlist_add_head(&ri->uflist, &ri->rp->used_instances); } -void recycle_rp_inst(struct kretprobe_instance *ri) +void __kprobes recycle_rp_inst(struct kretprobe_instance *ri) { /* remove rp inst off the rprobe_inst_table */ hlist_del(&ri->hlist); @@ -291,7 +293,7 @@ void recycle_rp_inst(struct kretprobe_instance *ri) kfree(ri); } -struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) +struct hlist_head __kprobes *kretprobe_inst_table_head(struct task_struct *tsk) { return &kretprobe_inst_table[hash_ptr(tsk, KPROBE_HASH_BITS)]; } @@ -302,7 +304,7 @@ struct hlist_head * kretprobe_inst_table_head(struct task_struct *tsk) * instances associated with this task. These left over instances represent * probed functions that have been called but will never return. */ -void kprobe_flush_task(struct task_struct *tk) +void __kprobes kprobe_flush_task(struct task_struct *tk) { struct kretprobe_instance *ri; struct hlist_head *head; @@ -322,7 +324,8 @@ void kprobe_flush_task(struct task_struct *tk) * This kprobe pre_handler is registered with every kretprobe. When probe * hits it will set up the return probe. */ -static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs) +static int __kprobes pre_handler_kretprobe(struct kprobe *p, + struct pt_regs *regs) { struct kretprobe *rp = container_of(p, struct kretprobe, kp); @@ -353,7 +356,7 @@ static inline void copy_kprobe(struct kprobe *old_p, struct kprobe *p) * Add the new probe to old_p->list. Fail if this is the * second jprobe at the address - two jprobes can't coexist */ -static int add_new_kprobe(struct kprobe *old_p, struct kprobe *p) +static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p) { struct kprobe *kp; @@ -395,7 +398,8 @@ static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p) * the intricacies * TODO: Move kcalloc outside the spinlock */ -static int register_aggr_kprobe(struct kprobe *old_p, struct kprobe *p) +static int __kprobes register_aggr_kprobe(struct kprobe *old_p, + struct kprobe *p) { int ret = 0; struct kprobe *ap; @@ -434,15 +438,25 @@ static inline void cleanup_aggr_kprobe(struct kprobe *old_p, spin_unlock_irqrestore(&kprobe_lock, flags); } -int register_kprobe(struct kprobe *p) +static int __kprobes in_kprobes_functions(unsigned long addr) +{ + if (addr >= (unsigned long)__kprobes_text_start + && addr < (unsigned long)__kprobes_text_end) + return -EINVAL; + return 0; +} + +int __kprobes register_kprobe(struct kprobe *p) { int ret = 0; unsigned long flags = 0; struct kprobe *old_p; - if ((ret = arch_prepare_kprobe(p)) != 0) { + if ((ret = in_kprobes_functions((unsigned long) p->addr)) != 0) + return ret; + if ((ret = arch_prepare_kprobe(p)) != 0) goto rm_kprobe; - } + spin_lock_irqsave(&kprobe_lock, flags); old_p = get_kprobe(p->addr); p->nmissed = 0; @@ -466,7 +480,7 @@ rm_kprobe: return ret; } -void unregister_kprobe(struct kprobe *p) +void __kprobes unregister_kprobe(struct kprobe *p) { unsigned long flags; struct kprobe *old_p; @@ -487,7 +501,7 @@ static struct notifier_block kprobe_exceptions_nb = { .priority = 0x7fffffff /* we need to notified first */ }; -int register_jprobe(struct jprobe *jp) +int __kprobes register_jprobe(struct jprobe *jp) { /* Todo: Verify probepoint is a function entry point */ jp->kp.pre_handler = setjmp_pre_handler; @@ -496,14 +510,14 @@ int register_jprobe(struct jprobe *jp) return register_kprobe(&jp->kp); } -void unregister_jprobe(struct jprobe *jp) +void __kprobes unregister_jprobe(struct jprobe *jp) { unregister_kprobe(&jp->kp); } #ifdef ARCH_SUPPORTS_KRETPROBES -int register_kretprobe(struct kretprobe *rp) +int __kprobes register_kretprobe(struct kretprobe *rp) { int ret = 0; struct kretprobe_instance *inst; @@ -540,14 +554,14 @@ int register_kretprobe(struct kretprobe *rp) #else /* ARCH_SUPPORTS_KRETPROBES */ -int register_kretprobe(struct kretprobe *rp) +int __kprobes register_kretprobe(struct kretprobe *rp) { return -ENOSYS; } #endif /* ARCH_SUPPORTS_KRETPROBES */ -void unregister_kretprobe(struct kretprobe *rp) +void __kprobes unregister_kretprobe(struct kretprobe *rp) { unsigned long flags; struct kretprobe_instance *ri; -- cgit v1.2.3 From 333fad5364d6b457c8d837f7d05802d2aaf8a961 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 8 Sep 2005 09:59:17 +0900 Subject: [IPV6]: Support several new sockopt / ancillary data in Advanced API (RFC3542). Support several new socket options / ancillary data: IPV6_RECVPKTINFO, IPV6_PKTINFO, IPV6_RECVHOPOPTS, IPV6_HOPOPTS, IPV6_RECVDSTOPTS, IPV6_DSTOPTS, IPV6_RTHDRDSTOPTS, IPV6_RECVRTHDR, IPV6_RTHDR, IPV6_RECVHOPOPTS, IPV6_HOPOPTS Old semantics are preserved as IPV6_2292xxxx so that we can maintain backward compatibility. Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 38 +++++++++-- include/linux/ipv6.h | 10 ++- include/net/ipv6.h | 4 ++ net/ipv6/datagram.c | 110 ++++++++++++++++++++++++++++--- net/ipv6/exthdrs.c | 107 ++++++++++++++++++++++++++++++- net/ipv6/ip6_flowlabel.c | 12 ++-- net/ipv6/ipv6_sockglue.c | 164 +++++++++++++++++++++++++++++++++++++++++++---- net/ipv6/raw.c | 3 +- net/ipv6/tcp_ipv6.c | 21 +++--- net/ipv6/udp.c | 3 +- 10 files changed, 423 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index dcf5720ffcbb..c11022f2f2ac 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -148,13 +148,13 @@ struct in6_flowlabel_req */ #define IPV6_ADDRFORM 1 -#define IPV6_PKTINFO 2 -#define IPV6_HOPOPTS 3 -#define IPV6_DSTOPTS 4 -#define IPV6_RTHDR 5 -#define IPV6_PKTOPTIONS 6 +#define IPV6_2292PKTINFO 2 +#define IPV6_2292HOPOPTS 3 +#define IPV6_2292DSTOPTS 4 +#define IPV6_2292RTHDR 5 +#define IPV6_2292PKTOPTIONS 6 #define IPV6_CHECKSUM 7 -#define IPV6_HOPLIMIT 8 +#define IPV6_2292HOPLIMIT 8 #define IPV6_NEXTHOP 9 #define IPV6_AUTHHDR 10 /* obsolete */ #define IPV6_FLOWINFO 11 @@ -198,4 +198,30 @@ struct in6_flowlabel_req * MCAST_MSFILTER 48 */ +/* RFC3542 advanced socket options (50-67) */ +#define IPV6_RECVPKTINFO 50 +#define IPV6_PKTINFO 51 +#if 0 +#define IPV6_RECVPATHMTU 52 +#define IPV6_PATHMTU 53 +#define IPV6_DONTFRAG 54 +#define IPV6_USE_MIN_MTU 55 +#endif +#define IPV6_RECVHOPOPTS 56 +#define IPV6_HOPOPTS 57 +#if 0 +#define IPV6_RECVRTHDRDSTOPTS 58 /* Unused, see net/ipv6/datagram.c */ +#endif +#define IPV6_RTHDRDSTOPTS 59 +#define IPV6_RECVRTHDR 60 +#define IPV6_RTHDR 61 +#define IPV6_RECVDSTOPTS 62 +#define IPV6_DSTOPTS 63 +#define IPV6_RECVHOPLIMIT 64 +#define IPV6_HOPLIMIT 65 +#if 0 +#define IPV6_RECVTCLASS 66 +#define IPV6_TCLASS 67 +#endif + #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 3c7dbc6a0a70..2581f1c94df5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -189,6 +189,7 @@ struct inet6_skb_parm { __u16 dst0; __u16 srcrt; __u16 dst1; + __u16 lastopt; }; #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) @@ -234,14 +235,19 @@ struct ipv6_pinfo { /* pktoption flags */ union { struct { - __u8 srcrt:2, + __u16 srcrt:2, + osrcrt:2, rxinfo:1, + rxoinfo:1, rxhlim:1, + rxohlim:1, hopopts:1, + ohopopts:1, dstopts:1, + odstopts:1, rxflow:1; } bits; - __u8 all; + __u16 all; } rxopt; /* sockopt flags */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 3203eaff4bd4..8a9fe9434e94 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -233,6 +233,10 @@ extern int ip6_ra_control(struct sock *sk, int sel, extern int ipv6_parse_hopopts(struct sk_buff *skb, int); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); +extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, + int newoptlen); extern int ip6_frag_nqueues; extern atomic_t ip6_frag_mem; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 01468fab3d3d..832476bbc5cb 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -394,21 +394,85 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); } + + /* HbH is allowed only once */ if (np->rxopt.bits.hopopts && opt->hop) { u8 *ptr = skb->nh.raw + opt->hop; put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.dstopts && opt->dst0) { + + if (opt->lastopt && + (np->rxopt.bits.dstopts || np->rxopt.bits.srcrt)) { + /* + * Silly enough, but we need to reparse in order to + * report extension headers (except for HbH) + * in order. + * + * Also note that IPV6_RECVRTHDRDSTOPTS is NOT + * (and WILL NOT be) defined because + * IPV6_RECVDSTOPTS is more generic. --yoshfuji + */ + unsigned int off = sizeof(struct ipv6hdr); + u8 nexthdr = skb->nh.ipv6h->nexthdr; + + while (off <= opt->lastopt) { + unsigned len; + u8 *ptr = skb->nh.raw + off; + + switch(nexthdr) { + case IPPROTO_DSTOPTS: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.dstopts) + put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, len, ptr); + break; + case IPPROTO_ROUTING: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + if (np->rxopt.bits.srcrt) + put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, len, ptr); + break; + case IPPROTO_AH: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 2; + break; + default: + nexthdr = ptr[0]; + len = (ptr[1] + 1) << 3; + break; + } + + off += len; + } + } + + /* socket options in old style */ + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + + src_info.ipi6_ifindex = opt->iif; + ipv6_addr_copy(&src_info.ipi6_addr, &skb->nh.ipv6h->daddr); + put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = skb->nh.ipv6h->hop_limit; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } + if (np->rxopt.bits.ohopopts && opt->hop) { + u8 *ptr = skb->nh.raw + opt->hop; + put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr); + } + if (np->rxopt.bits.odstopts && opt->dst0) { u8 *ptr = skb->nh.raw + opt->dst0; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } - if (np->rxopt.bits.srcrt && opt->srcrt) { + if (np->rxopt.bits.osrcrt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = (struct ipv6_rt_hdr *)(skb->nh.raw + opt->srcrt); - put_cmsg(msg, SOL_IPV6, IPV6_RTHDR, (rthdr->hdrlen+1) << 3, rthdr); + put_cmsg(msg, SOL_IPV6, IPV6_2292RTHDR, (rthdr->hdrlen+1) << 3, rthdr); } - if (np->rxopt.bits.dstopts && opt->dst1) { + if (np->rxopt.bits.odstopts && opt->dst1) { u8 *ptr = skb->nh.raw + opt->dst1; - put_cmsg(msg, SOL_IPV6, IPV6_DSTOPTS, (ptr[1]+1)<<3, ptr); + put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); } return 0; } @@ -438,6 +502,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: + case IPV6_2292PKTINFO: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -492,6 +557,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->fl6_flowlabel = IPV6_FLOWINFO_MASK & *(u32 *)CMSG_DATA(cmsg); break; + case IPV6_2292HOPOPTS: case IPV6_HOPOPTS: if (opt->hopopt || cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; @@ -512,7 +578,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->hopopt = hdr; break; - case IPV6_DSTOPTS: + case IPV6_2292DSTOPTS: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { err = -EINVAL; goto exit_f; @@ -536,6 +602,33 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->dst1opt = hdr; break; + case IPV6_DSTOPTS: + case IPV6_RTHDRDSTOPTS: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_opt_hdr))) { + err = -EINVAL; + goto exit_f; + } + + hdr = (struct ipv6_opt_hdr *)CMSG_DATA(cmsg); + len = ((hdr->hdrlen + 1) << 3); + if (cmsg->cmsg_len < CMSG_LEN(len)) { + err = -EINVAL; + goto exit_f; + } + if (!capable(CAP_NET_RAW)) { + err = -EPERM; + goto exit_f; + } + if (cmsg->cmsg_type == IPV6_DSTOPTS) { + opt->opt_flen += len; + opt->dst1opt = hdr; + } else { + opt->opt_nflen += len; + opt->dst0opt = hdr; + } + break; + + case IPV6_2292RTHDR: case IPV6_RTHDR: if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct ipv6_rt_hdr))) { err = -EINVAL; @@ -568,7 +661,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, opt->opt_nflen += len; opt->srcrt = rthdr; - if (opt->dst1opt) { + if (cmsg->cmsg_type == IPV6_2292RTHDR && opt->dst1opt) { int dsthdrlen = ((opt->dst1opt->hdrlen+1)<<3); opt->opt_nflen += dsthdrlen; @@ -579,6 +672,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, break; + case IPV6_2292HOPLIMIT: case IPV6_HOPLIMIT: if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { err = -EINVAL; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 5be6da2584ee..ffcda45e2c1e 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -164,6 +164,7 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp) return -1; } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { @@ -243,6 +244,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp) looped_back: if (hdr->segments_left == 0) { + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; opt->dst0 = opt->dst1; @@ -539,10 +541,15 @@ void ipv6_push_nfrag_opts(struct sk_buff *skb, struct ipv6_txoptions *opt, u8 *proto, struct in6_addr **daddr) { - if (opt->srcrt) + if (opt->srcrt) { ipv6_push_rthdr(skb, proto, opt->srcrt, daddr); - if (opt->dst0opt) - ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + /* + * IPV6_RTHDRDSTOPTS is ignored + * unless IPV6_RTHDR is set (RFC3542). + */ + if (opt->dst0opt) + ipv6_push_exthdr(skb, proto, NEXTHDR_DEST, opt->dst0opt); + } if (opt->hopopt) ipv6_push_exthdr(skb, proto, NEXTHDR_HOP, opt->hopopt); } @@ -573,3 +580,97 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) } return opt2; } + +static int ipv6_renew_option(void *ohdr, + struct ipv6_opt_hdr __user *newopt, int newoptlen, + int inherit, + struct ipv6_opt_hdr **hdr, + char **p) +{ + if (inherit) { + if (ohdr) { + memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr)); + *hdr = (struct ipv6_opt_hdr *)*p; + *p += CMSG_ALIGN(ipv6_optlen(*(struct ipv6_opt_hdr **)hdr)); + } + } else { + if (newopt) { + if (copy_from_user(*p, newopt, newoptlen)) + return -EFAULT; + *hdr = (struct ipv6_opt_hdr *)*p; + if (ipv6_optlen(*(struct ipv6_opt_hdr **)hdr) > newoptlen) + return -EINVAL; + *p += CMSG_ALIGN(newoptlen); + } + } + return 0; +} + +struct ipv6_txoptions * +ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, + int newtype, + struct ipv6_opt_hdr __user *newopt, int newoptlen) +{ + int tot_len = 0; + char *p; + struct ipv6_txoptions *opt2; + int err; + + if (newtype != IPV6_HOPOPTS && opt->hopopt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->hopopt)); + if (newtype != IPV6_RTHDRDSTOPTS && opt->dst0opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst0opt)); + if (newtype != IPV6_RTHDR && opt->srcrt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->srcrt)); + if (newtype != IPV6_DSTOPTS && opt->dst1opt) + tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt)); + if (newopt && newoptlen) + tot_len += CMSG_ALIGN(newoptlen); + + if (!tot_len) + return NULL; + + opt2 = sock_kmalloc(sk, tot_len, GFP_ATOMIC); + if (!opt2) + return ERR_PTR(-ENOBUFS); + + memset(opt2, 0, tot_len); + + opt2->tot_len = tot_len; + p = (char *)(opt2 + 1); + + err = ipv6_renew_option(opt->hopopt, newopt, newoptlen, + newtype != IPV6_HOPOPTS, + &opt2->hopopt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst0opt, newopt, newoptlen, + newtype != IPV6_RTHDRDSTOPTS, + &opt2->dst0opt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->srcrt, newopt, newoptlen, + newtype != IPV6_RTHDR, + (struct ipv6_opt_hdr **)opt2->srcrt, &p); + if (err) + goto out; + + err = ipv6_renew_option(opt->dst1opt, newopt, newoptlen, + newtype != IPV6_DSTOPTS, + &opt2->dst1opt, &p); + if (err) + goto out; + + opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) + + (opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) + + (opt2->srcrt ? ipv6_optlen(opt2->srcrt) : 0); + opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0); + + return opt2; +out: + sock_kfree_s(sk, p, tot_len); + return ERR_PTR(err); +} + diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index b6c73da5ff35..2d5ce376c265 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -225,16 +225,20 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space, struct ip6_flowlabel * fl, struct ipv6_txoptions * fopt) { - struct ipv6_txoptions * fl_opt = fl->opt; + struct ipv6_txoptions * fl_opt = fl ? fl->opt : NULL; - if (fopt == NULL || fopt->opt_flen == 0) - return fl_opt; + if (fopt == NULL || fopt->opt_flen == 0) { + if (!fl_opt || !fl_opt->dst0opt || fl_opt->srcrt) + return fl_opt; + } if (fl_opt != NULL) { opt_space->hopopt = fl_opt->hopopt; - opt_space->dst0opt = fl_opt->dst0opt; + opt_space->dst0opt = fl_opt->srcrt ? fl_opt->dst0opt : NULL; opt_space->srcrt = fl_opt->srcrt; opt_space->opt_nflen = fl_opt->opt_nflen; + if (fl_opt->dst0opt && !fl_opt->srcrt) + opt_space->opt_nflen -= ipv6_optlen(fl_opt->dst0opt); } else { if (fopt->opt_nflen == 0) return fopt; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 76466af8331e..dc1d9914bf7d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -210,39 +210,127 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: np->rxopt.bits.rxinfo = valbool; retv = 0; break; + + case IPV6_2292PKTINFO: + np->rxopt.bits.rxoinfo = valbool; + retv = 0; + break; - case IPV6_HOPLIMIT: + case IPV6_RECVHOPLIMIT: np->rxopt.bits.rxhlim = valbool; retv = 0; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + np->rxopt.bits.rxohlim = valbool; + retv = 0; + break; + + case IPV6_RECVRTHDR: if (val < 0 || val > 2) goto e_inval; np->rxopt.bits.srcrt = val; retv = 0; break; - case IPV6_HOPOPTS: + case IPV6_2292RTHDR: + if (val < 0 || val > 2) + goto e_inval; + np->rxopt.bits.osrcrt = val; + retv = 0; + break; + + case IPV6_RECVHOPOPTS: np->rxopt.bits.hopopts = valbool; retv = 0; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + np->rxopt.bits.ohopopts = valbool; + retv = 0; + break; + + case IPV6_RECVDSTOPTS: np->rxopt.bits.dstopts = valbool; retv = 0; break; + case IPV6_2292DSTOPTS: + np->rxopt.bits.odstopts = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; break; - case IPV6_PKTOPTIONS: + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + struct ipv6_txoptions *opt; + if (optlen == 0) + optval = 0; + + /* hop-by-hop / destination options are privileged option */ + retv = -EPERM; + if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) + break; + + retv = -EINVAL; + if (optlen & 0x7 || optlen > 8 * 255) + break; + + opt = ipv6_renew_options(sk, np->opt, optname, + (struct ipv6_opt_hdr __user *)optval, + optlen); + if (IS_ERR(opt)) { + retv = PTR_ERR(opt); + break; + } + + /* routing header option needs extra check */ + if (optname == IPV6_RTHDR && opt->srcrt) { + struct ipv6_rt_hdr *rthdr = opt->srcrt; + if (rthdr->type) + goto sticky_done; + if ((rthdr->hdrlen & 1) || + (rthdr->hdrlen >> 1) != rthdr->segments_left) + goto sticky_done; + } + + retv = 0; + if (sk->sk_type == SOCK_STREAM) { + if (opt) { + struct tcp_sock *tp = tcp_sk(sk); + if (!((1 << sk->sk_state) & + (TCPF_LISTEN | TCPF_CLOSE)) + && inet_sk(sk)->daddr != LOOPBACK4_IPV6) { + tp->ext_header_len = opt->opt_flen + opt->opt_nflen; + tcp_sync_mss(sk, tp->pmtu_cookie); + } + } + opt = xchg(&np->opt, opt); + sk_dst_reset(sk); + } else { + write_lock(&sk->sk_dst_lock); + opt = xchg(&np->opt, opt); + write_unlock(&sk->sk_dst_lock); + sk_dst_reset(sk); + } +sticky_done: + if (opt) + sock_kfree_s(sk, opt, opt->tot_len); + break; + } + + case IPV6_2292PKTOPTIONS: { struct ipv6_txoptions *opt = NULL; struct msghdr msg; @@ -529,6 +617,17 @@ e_inval: return -EINVAL; } +int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_opt_hdr *hdr, + char __user *optval, int len) +{ + if (!hdr) + return 0; + len = min_t(int, len, ipv6_optlen(hdr)); + if (copy_to_user(optval, hdr, ipv6_optlen(hdr))) + return -EFAULT; + return len; +} + int ipv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -567,7 +666,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, return err; } - case IPV6_PKTOPTIONS: + case IPV6_2292PKTOPTIONS: { struct msghdr msg; struct sk_buff *skb; @@ -601,6 +700,16 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxoinfo) { + struct in6_pktinfo src_info; + src_info.ipi6_ifindex = np->mcast_oif; + ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); + put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); + } + if (np->rxopt.bits.rxohlim) { + int hlim = np->mcast_hops; + put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -625,26 +734,59 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->ipv6only; break; - case IPV6_PKTINFO: + case IPV6_RECVPKTINFO: val = np->rxopt.bits.rxinfo; break; - case IPV6_HOPLIMIT: + case IPV6_2292PKTINFO: + val = np->rxopt.bits.rxoinfo; + break; + + case IPV6_RECVHOPLIMIT: val = np->rxopt.bits.rxhlim; break; - case IPV6_RTHDR: + case IPV6_2292HOPLIMIT: + val = np->rxopt.bits.rxohlim; + break; + + case IPV6_RECVRTHDR: val = np->rxopt.bits.srcrt; break; + case IPV6_2292RTHDR: + val = np->rxopt.bits.osrcrt; + break; + case IPV6_HOPOPTS: + case IPV6_RTHDRDSTOPTS: + case IPV6_RTHDR: + case IPV6_DSTOPTS: + { + + lock_sock(sk); + len = ipv6_getsockopt_sticky(sk, np->opt->hopopt, + optval, len); + release_sock(sk); + return put_user(len, optlen); + } + + case IPV6_RECVHOPOPTS: val = np->rxopt.bits.hopopts; break; - case IPV6_DSTOPTS: + case IPV6_2292HOPOPTS: + val = np->rxopt.bits.ohopopts; + break; + + case IPV6_RECVDSTOPTS: val = np->rxopt.bits.dstopts; break; + case IPV6_2292DSTOPTS: + val = np->rxopt.bits.odstopts; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ed3a76b30fd9..e527a1652d7c 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -755,8 +755,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl.proto = proto; rawv6_probe_proto_opt(&fl, msg); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 794734f1d230..246414b27d0e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -849,7 +849,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, if (dst == NULL) { opt = np->opt; if (opt == NULL && - np->rxopt.bits.srcrt == 2 && + np->rxopt.bits.osrcrt == 2 && treq->pktopts) { struct sk_buff *pktopts = treq->pktopts; struct inet6_skb_parm *rxopt = IP6CB(pktopts); @@ -915,11 +915,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb) struct inet6_skb_parm *opt = IP6CB(skb); if (np->rxopt.all) { - if ((opt->hop && np->rxopt.bits.hopopts) || - ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) && - np->rxopt.bits.rxflow) || - (opt->srcrt && np->rxopt.bits.srcrt) || - ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts)) + if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || + ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) || + (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || + ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return 1; } return 0; @@ -1190,8 +1189,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb->h.th); treq->pktopts = NULL; if (ipv6_opt_accepted(sk, skb) || - np->rxopt.bits.rxinfo || - np->rxopt.bits.rxhlim) { + np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { atomic_inc(&skb->users); treq->pktopts = skb; } @@ -1288,7 +1287,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, if (sk_acceptq_is_full(sk)) goto out_overflow; - if (np->rxopt.bits.srcrt == 2 && + if (np->rxopt.bits.osrcrt == 2 && opt == NULL && treq->pktopts) { struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts); if (rxopt->srcrt) @@ -1544,9 +1543,9 @@ ipv6_pktoptions: tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - if (np->rxopt.bits.rxinfo) + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) np->mcast_oif = inet6_iif(opt_skb); - if (np->rxopt.bits.rxhlim) + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = opt_skb->nh.ipv6h->hop_limit; if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 390d750449ce..aa6eaf3f18a6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -773,8 +773,7 @@ do_udp_sendmsg: } if (opt == NULL) opt = np->opt; - if (flowlabel) - opt = fl6_merge_options(&opt_space, flowlabel, opt); + opt = fl6_merge_options(&opt_space, flowlabel, opt); fl->proto = IPPROTO_UDP; ipv6_addr_copy(&fl->fl6_dst, daddr); -- cgit v1.2.3 From 41a1f8ea4fbfcdc4232f023732584aae2220de31 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 8 Sep 2005 10:19:03 +0900 Subject: [IPV6]: Support IPV6_{RECV,}TCLASS socket options / ancillary data. Based on patch from David L Stevens Signed-off-by: David L Stevens Signed-off-by: YOSHIFUJI Hideaki --- include/linux/in6.h | 2 -- include/linux/ipv6.h | 5 ++++- include/net/ipv6.h | 1 + include/net/transp_v6.h | 2 +- net/ipv6/datagram.c | 25 ++++++++++++++++++++++++- net/ipv6/icmp.c | 15 ++++++++++++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ip6_output.c | 24 +++++++++++++++++------- net/ipv6/ipv6_sockglue.c | 22 +++++++++++++++++++++- net/ipv6/raw.c | 14 +++++++++++--- net/ipv6/udp.c | 16 ++++++++++++---- 11 files changed, 104 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/in6.h b/include/linux/in6.h index c11022f2f2ac..bd32b79d6295 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -219,9 +219,7 @@ struct in6_flowlabel_req #define IPV6_DSTOPTS 63 #define IPV6_RECVHOPLIMIT 64 #define IPV6_HOPLIMIT 65 -#if 0 #define IPV6_RECVTCLASS 66 #define IPV6_TCLASS 67 -#endif #endif diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 2581f1c94df5..6c5f7b39a4b0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -245,7 +245,8 @@ struct ipv6_pinfo { ohopopts:1, dstopts:1, odstopts:1, - rxflow:1; + rxflow:1, + rxtclass:1; } bits; __u16 all; } rxopt; @@ -256,6 +257,7 @@ struct ipv6_pinfo { sndflow:1, pmtudisc:2, ipv6only:1; + __u8 tclass; __u32 dst_cookie; @@ -269,6 +271,7 @@ struct ipv6_pinfo { struct ipv6_txoptions *opt; struct rt6_info *rt; int hop_limit; + int tclass; } cork; }; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8a9fe9434e94..65ec86678a08 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -377,6 +377,7 @@ extern int ip6_append_data(struct sock *sk, int length, int transhdrlen, int hlimit, + int tclass, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 8b075ab7a26c..4e86f2de6638 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -37,7 +37,7 @@ extern int datagram_recv_ctl(struct sock *sk, extern int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit); + int *hlimit, int *tclass); #define LOOPBACK4_IPV6 __constant_htonl(0x7f000006) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 832476bbc5cb..157cec648032 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -390,6 +390,11 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) put_cmsg(msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxtclass) { + int tclass = (ntohl(*(u32 *)skb->nh.ipv6h) >> 20) & 0xff; + put_cmsg(msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); + } + if (np->rxopt.bits.rxflow && (*(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK)) { u32 flowinfo = *(u32*)skb->nh.raw & IPV6_FLOWINFO_MASK; put_cmsg(msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); @@ -479,7 +484,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, - int *hlimit) + int *hlimit, int *tclass) { struct in6_pktinfo *src_info; struct cmsghdr *cmsg; @@ -682,6 +687,24 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, *hlimit = *(int *)CMSG_DATA(cmsg); break; + case IPV6_TCLASS: + { + int tc; + + err = -EINVAL; + if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) { + goto exit_f; + } + + tc = *(int *)CMSG_DATA(cmsg); + if (tc < 0 || tc > 0xff) + goto exit_f; + + err = 0; + *tclass = tc; + + break; + } default: LIMIT_NETDEBUG(KERN_DEBUG "invalid cmsg type: %d\n", cmsg->cmsg_type); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index fa8f1bb0aa52..34e99c55e856 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int iif = 0; int addr_type = 0; int len; - int hlimit; + int hlimit, tclass; int err = 0; if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) @@ -385,6 +385,10 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -400,7 +404,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, err = ip6_append_data(sk, icmpv6_getfrag, &msg, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr), - hlimit, NULL, &fl, (struct rt6_info*)dst, + hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { ip6_flush_pending_frames(sk); @@ -434,6 +438,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + int tclass; saddr = &skb->nh.ipv6h->daddr; @@ -475,13 +480,17 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + idev = in6_dev_get(skb->dev); msg.skb = skb; msg.offset = 0; err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, NULL, &fl, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT); if (err) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 2d5ce376c265..a7db762de14a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -314,7 +314,7 @@ fl_create(struct in6_flowlabel_req *freq, char __user *optval, int optlen, int * msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk); + err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 01ef94f7c7f1..2f589f24c093 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -166,7 +166,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6hdr *hdr; u8 proto = fl->proto; int seg_len = skb->len; - int hlimit; + int hlimit, tclass; u32 mtu; if (opt) { @@ -202,7 +202,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, * Fill in the IPv6 header */ - *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel; hlimit = -1; if (np) hlimit = np->hop_limit; @@ -211,6 +210,14 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (hlimit < 0) hlimit = ipv6_get_hoplimit(dst->dev); + tclass = -1; + if (np) + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + + *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; @@ -762,10 +769,11 @@ out_err_release: return err; } -int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), - void *from, int length, int transhdrlen, - int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt, - unsigned int flags) +int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, + int offset, int len, int odd, struct sk_buff *skb), + void *from, int length, int transhdrlen, + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, + struct rt6_info *rt, unsigned int flags) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -803,6 +811,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offse np->cork.rt = rt; inet->cork.fl = *fl; np->cork.hop_limit = hlimit; + np->cork.tclass = tclass; inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path); if (dst_allfrag(rt->u.dst.path)) inet->cork.flags |= IPCORK_ALLFRAG; @@ -1084,7 +1093,8 @@ int ip6_push_pending_frames(struct sock *sk) skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr)); - *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000); + *(u32*)hdr = fl->fl6_flowlabel | + htonl(0x60000000 | ((int)np->cork.tclass << 20)); if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index dc1d9914bf7d..8567873d0dd8 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -264,6 +264,18 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_TCLASS: + if (val < 0 || val > 0xff) + goto e_inval; + np->tclass = val; + retv = 0; + break; + + case IPV6_RECVTCLASS: + np->rxopt.bits.rxtclass = valbool; + retv = 0; + break; + case IPV6_FLOWINFO: np->rxopt.bits.rxflow = valbool; retv = 0; @@ -364,7 +376,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk); + retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); if (retv) goto done; update: @@ -787,6 +799,14 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, val = np->rxopt.bits.odstopts; break; + case IPV6_TCLASS: + val = np->tclass; + break; + + case IPV6_RECVTCLASS: + val = np->rxopt.bits.rxtclass; + break; + case IPV6_FLOWINFO: val = np->rxopt.bits.rxflow; break; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e527a1652d7c..2ad37893334a 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -655,6 +655,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct flowi fl; int addr_len = msg->msg_namelen; int hlimit = -1; + int tclass = -1; u16 proto; int err; @@ -740,7 +741,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit); + err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -797,6 +798,12 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->cork.tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; @@ -805,8 +812,9 @@ back_from_confirm: err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags); } else { lock_sock(sk); - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0, - hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, + len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst, + msg->msg_flags); if (err) ip6_flush_pending_frames(sk); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index aa6eaf3f18a6..dbd18a9d1669 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -637,6 +637,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int addr_len = msg->msg_namelen; int ulen = len; int hlimit = -1; + int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; @@ -758,7 +759,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, &hlimit); + err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -814,6 +815,12 @@ do_udp_sendmsg: hlimit = ipv6_get_hoplimit(dst->dev); } + if (tclass < 0) { + tclass = np->tclass; + if (tclass < 0) + tclass = 0; + } + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -833,9 +840,10 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), - hlimit, opt, fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), hlimit, tclass, opt, fl, + (struct rt6_info*)dst, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) -- cgit v1.2.3 From 34bb61f9ddabd7a7f909cbfb05592eb775f6662a Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Tue, 6 Sep 2005 16:56:51 -0700 Subject: [PATCH] fix klist semantics for lists which have elements removed on traversal The problem is that klists claim to provide semantics for safe traversal of lists which are being modified. The failure case is when traversal of a list causes element removal (a fairly common case). The issue is that although the list node is refcounted, if it is embedded in an object (which is universally the case), then the object will be freed regardless of the klist refcount leading to slab corruption because the klist iterator refers to the prior element to get the next. The solution is to make the klist take and release references to the embedding object meaning that the embedding object won't be released until the list relinquishes the reference to it. (akpm: fast-track this because it's needed for the 2.6.13 scsi merge) Signed-off-by: James Bottomley Signed-off-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/base/bus.c | 34 ++++++++++++++++++++++++++++++++-- drivers/base/core.c | 17 ++++++++++++++++- drivers/base/driver.c | 15 ++++++++++++++- include/linux/klist.h | 8 +++++--- lib/klist.c | 18 +++++++++++++++++- 5 files changed, 84 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/base/bus.c b/drivers/base/bus.c index 17e96698410e..03204bfd17af 100644 --- a/drivers/base/bus.c +++ b/drivers/base/bus.c @@ -568,6 +568,36 @@ static void bus_remove_attrs(struct bus_type * bus) } } +static void klist_devices_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_bus); + + get_device(dev); +} + +static void klist_devices_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_bus); + + put_device(dev); +} + +static void klist_drivers_get(struct klist_node *n) +{ + struct device_driver *drv = container_of(n, struct device_driver, + knode_bus); + + get_driver(drv); +} + +static void klist_drivers_put(struct klist_node *n) +{ + struct device_driver *drv = container_of(n, struct device_driver, + knode_bus); + + put_driver(drv); +} + /** * bus_register - register a bus with the system. * @bus: bus. @@ -602,8 +632,8 @@ int bus_register(struct bus_type * bus) if (retval) goto bus_drivers_fail; - klist_init(&bus->klist_devices); - klist_init(&bus->klist_drivers); + klist_init(&bus->klist_devices, klist_devices_get, klist_devices_put); + klist_init(&bus->klist_drivers, klist_drivers_get, klist_drivers_put); bus_add_attrs(bus); pr_debug("bus type '%s' registered\n", bus->name); diff --git a/drivers/base/core.c b/drivers/base/core.c index c8a33df00761..6ab73f5c799a 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -191,6 +191,20 @@ void device_remove_file(struct device * dev, struct device_attribute * attr) } } +static void klist_children_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_parent); + + get_device(dev); +} + +static void klist_children_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_parent); + + put_device(dev); +} + /** * device_initialize - init device structure. @@ -207,7 +221,8 @@ void device_initialize(struct device *dev) { kobj_set_kset_s(dev, devices_subsys); kobject_init(&dev->kobj); - klist_init(&dev->klist_children); + klist_init(&dev->klist_children, klist_children_get, + klist_children_put); INIT_LIST_HEAD(&dev->dma_pools); init_MUTEX(&dev->sem); } diff --git a/drivers/base/driver.c b/drivers/base/driver.c index 291c5954a3af..ef3fe513e398 100644 --- a/drivers/base/driver.c +++ b/drivers/base/driver.c @@ -142,6 +142,19 @@ void put_driver(struct device_driver * drv) kobject_put(&drv->kobj); } +static void klist_devices_get(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_driver); + + get_device(dev); +} + +static void klist_devices_put(struct klist_node *n) +{ + struct device *dev = container_of(n, struct device, knode_driver); + + put_device(dev); +} /** * driver_register - register driver with bus @@ -157,7 +170,7 @@ void put_driver(struct device_driver * drv) */ int driver_register(struct device_driver * drv) { - klist_init(&drv->klist_devices); + klist_init(&drv->klist_devices, klist_devices_get, klist_devices_put); init_completion(&drv->unloaded); return bus_add_driver(drv); } diff --git a/include/linux/klist.h b/include/linux/klist.h index c4d1fae4dd89..74071254c9d3 100644 --- a/include/linux/klist.h +++ b/include/linux/klist.h @@ -17,15 +17,17 @@ #include #include - +struct klist_node; struct klist { spinlock_t k_lock; struct list_head k_list; + void (*get)(struct klist_node *); + void (*put)(struct klist_node *); }; -extern void klist_init(struct klist * k); - +extern void klist_init(struct klist * k, void (*get)(struct klist_node *), + void (*put)(struct klist_node *)); struct klist_node { struct klist * n_klist; diff --git a/lib/klist.c b/lib/klist.c index a70c836c5c4c..bb2f3551d50a 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -42,12 +42,23 @@ /** * klist_init - Initialize a klist structure. * @k: The klist we're initializing. + * @get: The get function for the embedding object (NULL if none) + * @put: The put function for the embedding object (NULL if none) + * + * Initialises the klist structure. If the klist_node structures are + * going to be embedded in refcounted objects (necessary for safe + * deletion) then the get/put arguments are used to initialise + * functions that take and release references on the embedding + * objects. */ -void klist_init(struct klist * k) +void klist_init(struct klist * k, void (*get)(struct klist_node *), + void (*put)(struct klist_node *)) { INIT_LIST_HEAD(&k->k_list); spin_lock_init(&k->k_lock); + k->get = get; + k->put = put; } EXPORT_SYMBOL_GPL(klist_init); @@ -74,6 +85,8 @@ static void klist_node_init(struct klist * k, struct klist_node * n) init_completion(&n->n_removed); kref_init(&n->n_ref); n->n_klist = k; + if (k->get) + k->get(n); } @@ -110,9 +123,12 @@ EXPORT_SYMBOL_GPL(klist_add_tail); static void klist_release(struct kref * kref) { struct klist_node * n = container_of(kref, struct klist_node, n_ref); + void (*put)(struct klist_node *) = n->n_klist->put; list_del(&n->n_node); complete(&n->n_removed); n->n_klist = NULL; + if (put) + put(n); } static int klist_dec_and_del(struct klist_node * n) -- cgit v1.2.3 From 6df29debb7fc04ac3f92038c57437f40bab4e72d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 8 Sep 2005 16:04:41 +0100 Subject: [SERIAL] Use an enum for serial8250 platform device IDs Rather than hard-coding the platform device IDs, enumerate them. We don't particularly care about the actual ID we get, just as long as they're unique. Signed-off-by: Russell King --- arch/arm/mach-clps7500/core.c | 2 +- arch/arm/mach-ebsa110/core.c | 2 +- arch/arm/mach-epxa10db/arch.c | 2 +- arch/arm/mach-footbridge/isa.c | 2 +- arch/arm/mach-h720x/cpu-h7202.c | 2 +- arch/arm/mach-ixp2000/core.c | 2 +- arch/arm/mach-ixp4xx/coyote-setup.c | 2 +- arch/arm/mach-ixp4xx/gtwx5715-setup.c | 2 +- arch/arm/mach-ixp4xx/ixdp425-setup.c | 2 +- arch/arm/mach-omap1/board-voiceblue.c | 2 +- arch/arm/mach-omap1/serial.c | 2 +- arch/arm/mach-rpc/riscpc.c | 2 +- arch/arm/mach-s3c2410/mach-bast.c | 2 +- arch/arm/mach-s3c2410/mach-vr1000.c | 2 +- arch/arm/mach-shark/core.c | 2 +- arch/ppc/syslib/mpc10x_common.c | 4 ++-- arch/ppc/syslib/mpc83xx_devices.c | 2 +- arch/ppc/syslib/mpc85xx_devices.c | 2 +- arch/ppc64/kernel/setup.c | 2 +- drivers/serial/8250.c | 2 +- drivers/serial/8250_accent.c | 2 +- drivers/serial/8250_boca.c | 2 +- drivers/serial/8250_fourport.c | 2 +- drivers/serial/8250_hub6.c | 2 +- drivers/serial/8250_mca.c | 2 +- include/linux/serial_8250.h | 15 +++++++++++++++ 26 files changed, 41 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/arm/mach-clps7500/core.c b/arch/arm/mach-clps7500/core.c index 112f1d68fb2b..e216ab8b9e8f 100644 --- a/arch/arm/mach-clps7500/core.c +++ b/arch/arm/mach-clps7500/core.c @@ -354,7 +354,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index 23c4da10101b..5aeadfd72143 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -219,7 +219,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-epxa10db/arch.c b/arch/arm/mach-epxa10db/arch.c index 7daa021676d0..44c56571d183 100644 --- a/arch/arm/mach-epxa10db/arch.c +++ b/arch/arm/mach-epxa10db/arch.c @@ -52,7 +52,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-footbridge/isa.c b/arch/arm/mach-footbridge/isa.c index aa3a1fef563e..28846c7edaaf 100644 --- a/arch/arm/mach-footbridge/isa.c +++ b/arch/arm/mach-footbridge/isa.c @@ -34,7 +34,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-h720x/cpu-h7202.c b/arch/arm/mach-h720x/cpu-h7202.c index 4b3199319e68..a4a7c0125d03 100644 --- a/arch/arm/mach-h720x/cpu-h7202.c +++ b/arch/arm/mach-h720x/cpu-h7202.c @@ -90,7 +90,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-ixp2000/core.c b/arch/arm/mach-ixp2000/core.c index 098c817a7fb8..74bd2fd602d4 100644 --- a/arch/arm/mach-ixp2000/core.c +++ b/arch/arm/mach-ixp2000/core.c @@ -174,7 +174,7 @@ static struct resource ixp2000_uart_resource = { static struct platform_device ixp2000_serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = ixp2000_serial_port, }, diff --git a/arch/arm/mach-ixp4xx/coyote-setup.c b/arch/arm/mach-ixp4xx/coyote-setup.c index 8b2f25322452..050c92768913 100644 --- a/arch/arm/mach-ixp4xx/coyote-setup.c +++ b/arch/arm/mach-ixp4xx/coyote-setup.c @@ -66,7 +66,7 @@ static struct plat_serial8250_port coyote_uart_data[] = { static struct platform_device coyote_uart = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = coyote_uart_data, }, diff --git a/arch/arm/mach-ixp4xx/gtwx5715-setup.c b/arch/arm/mach-ixp4xx/gtwx5715-setup.c index 3fd92c5cbaa8..29a6d02fa851 100644 --- a/arch/arm/mach-ixp4xx/gtwx5715-setup.c +++ b/arch/arm/mach-ixp4xx/gtwx5715-setup.c @@ -93,7 +93,7 @@ static struct plat_serial8250_port gtwx5715_uart_platform_data[] = { static struct platform_device gtwx5715_uart_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = gtwx5715_uart_platform_data, }, diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index 6c14ff3c23a0..ae1fa099d5fa 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -96,7 +96,7 @@ static struct plat_serial8250_port ixdp425_uart_data[] = { static struct platform_device ixdp425_uart = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev.platform_data = ixdp425_uart_data, .num_resources = 2, .resource = ixdp425_uart_resources diff --git a/arch/arm/mach-omap1/board-voiceblue.c b/arch/arm/mach-omap1/board-voiceblue.c index e42281988990..76176dd518ba 100644 --- a/arch/arm/mach-omap1/board-voiceblue.c +++ b/arch/arm/mach-omap1/board-voiceblue.c @@ -74,7 +74,7 @@ static struct plat_serial8250_port voiceblue_ports[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 1, + .id = PLAT8250_DEV_PLATFORM1, .dev = { .platform_data = voiceblue_ports, }, diff --git a/arch/arm/mach-omap1/serial.c b/arch/arm/mach-omap1/serial.c index 214e5d17c8b5..1446bf332e4c 100644 --- a/arch/arm/mach-omap1/serial.c +++ b/arch/arm/mach-omap1/serial.c @@ -94,7 +94,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-rpc/riscpc.c b/arch/arm/mach-rpc/riscpc.c index a10268618f74..e3587efec4bf 100644 --- a/arch/arm/mach-rpc/riscpc.c +++ b/arch/arm/mach-rpc/riscpc.c @@ -140,7 +140,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index e9182242da95..1a3367da6408 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -381,7 +381,7 @@ static struct plat_serial8250_port bast_sio_data[] = { static struct platform_device bast_sio = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = &bast_sio_data, }, diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c index 924e8464c212..8f9ab2893df4 100644 --- a/arch/arm/mach-s3c2410/mach-vr1000.c +++ b/arch/arm/mach-s3c2410/mach-vr1000.c @@ -221,7 +221,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/arm/mach-shark/core.c b/arch/arm/mach-shark/core.c index e737eae4521f..946c0d11c73b 100644 --- a/arch/arm/mach-shark/core.c +++ b/arch/arm/mach-shark/core.c @@ -41,7 +41,7 @@ static struct plat_serial8250_port serial_platform_data[] = { static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_platform_data, }, diff --git a/arch/ppc/syslib/mpc10x_common.c b/arch/ppc/syslib/mpc10x_common.c index 87065e2e4c5f..3e039706bdbc 100644 --- a/arch/ppc/syslib/mpc10x_common.c +++ b/arch/ppc/syslib/mpc10x_common.c @@ -140,12 +140,12 @@ struct platform_device ppc_sys_platform_devices[] = { }, [MPC10X_UART0] = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev.platform_data = serial_plat_uart0, }, [MPC10X_UART1] = { .name = "serial8250", - .id = 1, + .id = PLAT8250_DEV_PLATFORM1, .dev.platform_data = serial_plat_uart1, }, diff --git a/arch/ppc/syslib/mpc83xx_devices.c b/arch/ppc/syslib/mpc83xx_devices.c index 5aaf0e58e1f9..95b3b8a7f0ba 100644 --- a/arch/ppc/syslib/mpc83xx_devices.c +++ b/arch/ppc/syslib/mpc83xx_devices.c @@ -165,7 +165,7 @@ struct platform_device ppc_sys_platform_devices[] = { }, [MPC83xx_DUART] = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev.platform_data = serial_platform_data, }, [MPC83xx_SEC2] = { diff --git a/arch/ppc/syslib/mpc85xx_devices.c b/arch/ppc/syslib/mpc85xx_devices.c index 8af322dd476a..bbc5ac0de878 100644 --- a/arch/ppc/syslib/mpc85xx_devices.c +++ b/arch/ppc/syslib/mpc85xx_devices.c @@ -282,7 +282,7 @@ struct platform_device ppc_sys_platform_devices[] = { }, [MPC85xx_DUART] = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev.platform_data = serial_platform_data, }, [MPC85xx_PERFMON] = { diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index d0bb68af0ea4..bfa8791c9807 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -1283,7 +1283,7 @@ void __init generic_find_legacy_serial_ports(u64 *physport, static struct platform_device serial_device = { .name = "serial8250", - .id = 0, + .id = PLAT8250_DEV_PLATFORM, .dev = { .platform_data = serial_ports, }, diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c index 30a0a3d10145..5b65e208893b 100644 --- a/drivers/serial/8250.c +++ b/drivers/serial/8250.c @@ -2536,7 +2536,7 @@ static int __init serial8250_init(void) goto out; serial8250_isa_devs = platform_device_register_simple("serial8250", - -1, NULL, 0); + PLAT8250_DEV_LEGACY, NULL, 0); if (IS_ERR(serial8250_isa_devs)) { ret = PTR_ERR(serial8250_isa_devs); goto unreg; diff --git a/drivers/serial/8250_accent.c b/drivers/serial/8250_accent.c index 1f2c276063ef..9c10262f2469 100644 --- a/drivers/serial/8250_accent.c +++ b/drivers/serial/8250_accent.c @@ -29,7 +29,7 @@ static struct plat_serial8250_port accent_data[] = { static struct platform_device accent_device = { .name = "serial8250", - .id = 2, + .id = PLAT8250_DEV_ACCENT, .dev = { .platform_data = accent_data, }, diff --git a/drivers/serial/8250_boca.c b/drivers/serial/8250_boca.c index 465c9ea1e7a3..3bfe0f7b26fb 100644 --- a/drivers/serial/8250_boca.c +++ b/drivers/serial/8250_boca.c @@ -43,7 +43,7 @@ static struct plat_serial8250_port boca_data[] = { static struct platform_device boca_device = { .name = "serial8250", - .id = 3, + .id = PLAT8250_DEV_BOCA, .dev = { .platform_data = boca_data, }, diff --git a/drivers/serial/8250_fourport.c b/drivers/serial/8250_fourport.c index e9b4d908ef42..6375d68b7913 100644 --- a/drivers/serial/8250_fourport.c +++ b/drivers/serial/8250_fourport.c @@ -35,7 +35,7 @@ static struct plat_serial8250_port fourport_data[] = { static struct platform_device fourport_device = { .name = "serial8250", - .id = 1, + .id = PLAT8250_DEV_FOURPORT, .dev = { .platform_data = fourport_data, }, diff --git a/drivers/serial/8250_hub6.c b/drivers/serial/8250_hub6.c index 77f396f84b4c..daf569cd3c8f 100644 --- a/drivers/serial/8250_hub6.c +++ b/drivers/serial/8250_hub6.c @@ -40,7 +40,7 @@ static struct plat_serial8250_port hub6_data[] = { static struct platform_device hub6_device = { .name = "serial8250", - .id = 4, + .id = PLAT8250_DEV_HUB6, .dev = { .platform_data = hub6_data, }, diff --git a/drivers/serial/8250_mca.c b/drivers/serial/8250_mca.c index f0c40d68b8c1..ac205256d5f3 100644 --- a/drivers/serial/8250_mca.c +++ b/drivers/serial/8250_mca.c @@ -44,7 +44,7 @@ static struct plat_serial8250_port mca_data[] = { static struct platform_device mca_device = { .name = "serial8250", - .id = 5, + .id = PLAT8250_DEV_MCA, .dev = { .platform_data = mca_data, }, diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h index d8a023d804d4..317a979b24de 100644 --- a/include/linux/serial_8250.h +++ b/include/linux/serial_8250.h @@ -29,6 +29,21 @@ struct plat_serial8250_port { unsigned int flags; /* UPF_* flags */ }; +/* + * Allocate 8250 platform device IDs. Nothing is implied by + * the numbering here, except for the legacy entry being -1. + */ +enum { + PLAT8250_DEV_LEGACY = -1, + PLAT8250_DEV_PLATFORM, + PLAT8250_DEV_PLATFORM1, + PLAT8250_DEV_FOURPORT, + PLAT8250_DEV_ACCENT, + PLAT8250_DEV_BOCA, + PLAT8250_DEV_HUB6, + PLAT8250_DEV_MCA, +}; + /* * This should be used by drivers which want to register * their own 8250 ports without registering their own -- cgit v1.2.3 From 8dc003359cc3996abad9e53a7b2280b272610283 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Thu, 8 Sep 2005 17:53:01 +0100 Subject: [MMC] Allow detection/removal to be delayed Change mmc_detect_change() to take a delay argument such that the detection of card insertions and removals can be delayed according to the requirements of the host driver or platform. Signed-off-by: Richard Purdie Signed-off-by: Russell King --- drivers/mmc/mmc.c | 12 ++++++++---- drivers/mmc/mmci.c | 2 +- drivers/mmc/pxamci.c | 2 +- drivers/mmc/wbsd.c | 4 ++-- include/linux/mmc/host.h | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/drivers/mmc/mmc.c b/drivers/mmc/mmc.c index 0a117c61cd18..ceae379a4d4c 100644 --- a/drivers/mmc/mmc.c +++ b/drivers/mmc/mmc.c @@ -1079,13 +1079,17 @@ static void mmc_setup(struct mmc_host *host) /** * mmc_detect_change - process change of state on a MMC socket * @host: host which changed state. + * @delay: optional delay to wait before detection (jiffies) * * All we know is that card(s) have been inserted or removed * from the socket(s). We don't know which socket or cards. */ -void mmc_detect_change(struct mmc_host *host) +void mmc_detect_change(struct mmc_host *host, unsigned long delay) { - schedule_work(&host->detect); + if (delay) + schedule_delayed_work(&host->detect, delay); + else + schedule_work(&host->detect); } EXPORT_SYMBOL(mmc_detect_change); @@ -1189,7 +1193,7 @@ int mmc_add_host(struct mmc_host *host) ret = mmc_add_host_sysfs(host); if (ret == 0) { mmc_power_off(host); - mmc_detect_change(host); + mmc_detect_change(host, 0); } return ret; @@ -1259,7 +1263,7 @@ EXPORT_SYMBOL(mmc_suspend_host); */ int mmc_resume_host(struct mmc_host *host) { - mmc_detect_change(host); + mmc_detect_change(host, 0); return 0; } diff --git a/drivers/mmc/mmci.c b/drivers/mmc/mmci.c index 716c4ef4faf6..91c74843dc0d 100644 --- a/drivers/mmc/mmci.c +++ b/drivers/mmc/mmci.c @@ -442,7 +442,7 @@ static void mmci_check_status(unsigned long data) status = host->plat->status(mmc_dev(host->mmc)); if (status ^ host->oldstat) - mmc_detect_change(host->mmc); + mmc_detect_change(host->mmc, 0); host->oldstat = status; mod_timer(&host->timer, jiffies + HZ); diff --git a/drivers/mmc/pxamci.c b/drivers/mmc/pxamci.c index e99a53b09e32..5223cd3bfc1e 100644 --- a/drivers/mmc/pxamci.c +++ b/drivers/mmc/pxamci.c @@ -423,7 +423,7 @@ static void pxamci_dma_irq(int dma, void *devid, struct pt_regs *regs) static irqreturn_t pxamci_detect_irq(int irq, void *devid, struct pt_regs *regs) { - mmc_detect_change(devid); + mmc_detect_change(devid, 0); return IRQ_HANDLED; } diff --git a/drivers/mmc/wbsd.c b/drivers/mmc/wbsd.c index dec01d38c782..a62c86fef5cc 100644 --- a/drivers/mmc/wbsd.c +++ b/drivers/mmc/wbsd.c @@ -1122,7 +1122,7 @@ static void wbsd_detect_card(unsigned long data) DBG("Executing card detection\n"); - mmc_detect_change(host->mmc); + mmc_detect_change(host->mmc, 0); } /* @@ -1198,7 +1198,7 @@ static void wbsd_tasklet_card(unsigned long param) */ spin_unlock(&host->lock); - mmc_detect_change(host->mmc); + mmc_detect_change(host->mmc, 0); } else spin_unlock(&host->lock); diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 6014160d9c06..c5d73c0cf6bb 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -123,7 +123,7 @@ extern void mmc_free_host(struct mmc_host *); extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); -extern void mmc_detect_change(struct mmc_host *); +extern void mmc_detect_change(struct mmc_host *, unsigned long delay); extern void mmc_request_done(struct mmc_host *, struct mmc_request *); #endif -- cgit v1.2.3 From 0e4e4220f10bf8f58a8606f0cb28538088c64b1a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 8 Sep 2005 12:32:03 -0700 Subject: [NET]: Optimize pskb_trim_rcsum() Since packets almost never contain extra garbage at the end, it is worthwhile to optimize for that case. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index da7da9c0ed1b..2741c0c55e83 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1167,7 +1167,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { - if (len >= skb->len) + if (likely(len >= skb->len)) return 0; if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; -- cgit v1.2.3 From 7657ec1fcb69e266ab876af56332d0c484ca6d00 Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Wed, 17 Aug 2005 15:17:26 +0400 Subject: [PATCH] lib/crc16: added crc16 algorithm. Add the crc16 routines, as used by w1 devices. Signed-off-by: Ben Gardner Signed-off-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- include/linux/crc16.h | 44 +++++++++++++++++++++++++++++++++ lib/Kconfig | 8 ++++++ lib/Makefile | 3 ++- lib/crc16.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 include/linux/crc16.h create mode 100644 lib/crc16.c (limited to 'include/linux') diff --git a/include/linux/crc16.h b/include/linux/crc16.h new file mode 100644 index 000000000000..bdedf825b04a --- /dev/null +++ b/include/linux/crc16.h @@ -0,0 +1,44 @@ +/* + * crc16.h - CRC-16 routine + * + * Implements the standard CRC-16, as used with 1-wire devices: + * Width 16 + * Poly 0x8005 (x^16 + x^15 + x^2 + 1) + * Init 0 + * + * For 1-wire devices, the CRC is stored inverted, LSB-first + * + * Example buffer with the CRC attached: + * 31 32 33 34 35 36 37 38 39 C2 44 + * + * The CRC over a buffer with the CRC attached is 0xB001. + * So, if (crc16(0, buf, size) == 0xB001) then the buffer is valid. + * + * Refer to "Application Note 937: Book of iButton Standards" for details. + * http://www.maxim-ic.com/appnotes.cfm/appnote_number/937 + * + * Copyright (c) 2005 Ben Gardner + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#ifndef __CRC16_H +#define __CRC16_H + +#include + +#define CRC16_INIT 0 +#define CRC16_VALID 0xb001 + +extern u16 const crc16_table[256]; + +extern u16 crc16(u16 crc, const u8 *buffer, size_t len); + +static inline u16 crc16_byte(u16 crc, const u8 data) +{ + return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; +} + +#endif /* __CRC16_H */ + diff --git a/lib/Kconfig b/lib/Kconfig index e43197efeb9c..3de93357f5ab 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -12,6 +12,14 @@ config CRC_CCITT the kernel tree does. Such modules that use library CRC-CCITT functions require M here. +config CRC16 + tristate "CRC16 functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC16 functions, but a module built outside + the kernel tree does. Such modules that use library CRC16 + functions require M here. + config CRC32 tristate "CRC32 functions" default y diff --git a/lib/Makefile b/lib/Makefile index 3e2bd0df23bb..d9c38ba05e7b 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,11 +23,12 @@ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o -ifneq ($(CONFIG_HAVE_DEC_LOCK),y) +ifneq ($(CONFIG_HAVE_DEC_LOCK),y) lib-y += dec_and_lock.o endif obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o +obj-$(CONFIG_CRC16) += crc16.o obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_GENERIC_IOMAP) += iomap.o diff --git a/lib/crc16.c b/lib/crc16.c new file mode 100644 index 000000000000..011fe573c666 --- /dev/null +++ b/lib/crc16.c @@ -0,0 +1,67 @@ +/* + * crc16.c + * + * This source code is licensed under the GNU General Public License, + * Version 2. See the file COPYING for more details. + */ + +#include +#include +#include + +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */ +u16 const crc16_table[256] = { + 0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241, + 0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440, + 0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40, + 0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841, + 0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40, + 0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41, + 0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641, + 0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040, + 0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240, + 0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441, + 0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41, + 0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840, + 0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41, + 0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40, + 0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640, + 0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041, + 0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240, + 0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441, + 0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41, + 0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840, + 0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41, + 0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40, + 0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640, + 0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041, + 0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241, + 0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440, + 0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40, + 0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841, + 0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40, + 0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41, + 0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641, + 0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040 +}; +EXPORT_SYMBOL(crc16_table); + +/** + * Compute the CRC-16 for the data buffer + * + * @param crc previous CRC value + * @param buffer data pointer + * @param len number of bytes in the buffer + * @return the updated CRC value + */ +u16 crc16(u16 crc, u8 const *buffer, size_t len) +{ + while (len--) + crc = crc16_byte(crc, *buffer++); + return crc; +} +EXPORT_SYMBOL(crc16); + +MODULE_DESCRIPTION("CRC16 calculations"); +MODULE_LICENSE("GPL"); + -- cgit v1.2.3 From 01357dcac62ac028de65a1c315eb75c530c8a5d6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 8 Sep 2005 22:46:00 +0100 Subject: [MMC] Ensure correct mmc_priv() behaviour mmc_priv() has some nasty effects if the wrong pointer type is passed to it. Introduce type checking, which also means we get the right type. Also add an additional member to mmc_host which is used to align host-private data appropriately. Signed-off-by: Russell King --- include/linux/mmc/host.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c5d73c0cf6bb..c1f021eddffa 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -109,6 +109,8 @@ struct mmc_host { struct mmc_card *card_selected; /* the selected MMC card */ struct work_struct detect; + + unsigned long private[0] ____cacheline_aligned; }; extern struct mmc_host *mmc_alloc_host(int extra, struct device *); @@ -116,7 +118,11 @@ extern int mmc_add_host(struct mmc_host *); extern void mmc_remove_host(struct mmc_host *); extern void mmc_free_host(struct mmc_host *); -#define mmc_priv(x) ((void *)((x) + 1)) +static inline void *mmc_priv(struct mmc_host *host) +{ + return (void *)host->private; +} + #define mmc_dev(x) ((x)->dev) #define mmc_hostname(x) ((x)->class_dev.class_id) -- cgit v1.2.3 From 4e1491847ef5ca1c5a661601d5f96dcb7d90d2f0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 8 Sep 2005 14:47:12 -0700 Subject: Fix up ARM serial driver compile failure Proud member of Uglyhacks'R'US. Acked-by: David S. Miller Acked-by: Russell King Signed-off-by: Linus Torvalds --- include/linux/serial_core.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 9b12fe731612..27db8da43aa4 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -401,6 +401,9 @@ uart_handle_sysrq_char(struct uart_port *port, unsigned int ch, #endif return 0; } +#ifndef SUPPORT_SYSRQ +#define uart_handle_sysrq_char(port,ch,regs) uart_handle_sysrq_char(port, 0, NULL) +#endif /* * We do the SysRQ and SAK checking like this... -- cgit v1.2.3 From d42c69972b853fd33a26c8c7405624be41a22136 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 6 Jul 2005 19:56:03 +0200 Subject: [PATCH] PCI: Run PCI driver initialization on local node Run PCI driver initialization on local node Instead of adding messy kmalloc_node()s everywhere run the PCI driver probe on the node local to the device. This would not have helped for IDE, but should for other more clean drivers that do more initialization in probe(). It won't help for drivers that do most of the work on first open (like many network drivers) Signed-off-by: Andi Kleen Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci-driver.c | 31 ++++++++++++++++++++++++++++++- include/linux/mempolicy.h | 1 + mm/mempolicy.c | 2 +- 3 files changed, 32 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e4115a0d5ba6..414c77299c7d 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "pci.h" /* @@ -163,6 +164,34 @@ const struct pci_device_id *pci_match_device(struct pci_driver *drv, return NULL; } +static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, + const struct pci_device_id *id) +{ + int error; +#ifdef CONFIG_NUMA + /* Execute driver initialization on node where the + device's bus is attached to. This way the driver likely + allocates its local memory on the right node without + any need to change it. */ + struct mempolicy *oldpol; + cpumask_t oldmask = current->cpus_allowed; + int node = pcibus_to_node(dev->bus); + if (node >= 0 && node_online(node)) + set_cpus_allowed(current, node_to_cpumask(node)); + /* And set default memory allocation policy */ + oldpol = current->mempolicy; + current->mempolicy = &default_policy; + mpol_get(current->mempolicy); +#endif + error = drv->probe(dev, id); +#ifdef CONFIG_NUMA + set_cpus_allowed(current, oldmask); + mpol_free(current->mempolicy); + current->mempolicy = oldpol; +#endif + return error; +} + /** * __pci_device_probe() * @@ -180,7 +209,7 @@ __pci_device_probe(struct pci_driver *drv, struct pci_dev *pci_dev) id = pci_match_device(drv, pci_dev); if (id) - error = drv->probe(pci_dev, id); + error = pci_call_probe(drv, pci_dev, id); if (error >= 0) { pci_dev->driver = drv; error = 0; diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 94a46f38c532..58385ee1c0ac 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -155,6 +155,7 @@ struct mempolicy *get_vma_policy(struct task_struct *task, extern void numa_default_policy(void); extern void numa_policy_init(void); +extern struct mempolicy default_policy; #else diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 13492d66b7c8..afa06e184d88 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -88,7 +88,7 @@ static kmem_cache_t *sn_cache; policied. */ static int policy_zone; -static struct mempolicy default_policy = { +struct mempolicy default_policy = { .refcnt = ATOMIC_INIT(1), /* never free it */ .policy = MPOL_DEFAULT, }; -- cgit v1.2.3 From 74d863ee8a9da2b0f31e0f977daf127807b2e9d2 Mon Sep 17 00:00:00 2001 From: "akpm@osdl.org" Date: Mon, 25 Jul 2005 23:28:14 -0700 Subject: [PATCH] PCI: Move PCI fixup data into r/o section Make PCI fixup data const, so it'll end up in a r/o section. This also fixes the conversion into ECOFF which gets broken by too many changes between r/w and r/o sections. Call it a hack but it's a change that's correct by itself. Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index bc4c40000c0d..025bfc39771d 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1067,7 +1067,7 @@ enum pci_fixup_pass { /* Anonymous variables would be nice... */ #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, hook) \ - static struct pci_fixup __pci_fixup_##name __attribute_used__ \ + static const struct pci_fixup __pci_fixup_##name __attribute_used__ \ __attribute__((__section__(#section))) = { vendor, device, hook }; #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early, \ -- cgit v1.2.3 From 982245f01734e9d5a3ab98b2b2e9761ae7719094 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 17 Jul 2005 04:22:20 +0200 Subject: [PATCH] PCI: remove CONFIG_PCI_NAMES This patch removes CONFIG_PCI_NAMES. Signed-off-by: Adrian Bunk Signed-off-by: Greg Kroah-Hartman --- Documentation/feature-removal-schedule.txt | 9 - MAINTAINERS | 7 - arch/alpha/kernel/sys_marvel.c | 5 +- arch/ppc64/kernel/eeh.c | 31 +- arch/ppc64/kernel/iSeries_VpdInfo.c | 5 +- arch/ppc64/kernel/pci.c | 1 - drivers/char/drm/drmP.h | 4 + drivers/infiniband/hw/mthca/mthca_main.c | 8 +- drivers/infiniband/hw/mthca/mthca_reset.c | 8 +- drivers/net/irda/vlsi_ir.h | 6 - drivers/pci/Kconfig | 17 - drivers/pci/Makefile | 18 +- drivers/pci/gen-devlist.c | 132 - drivers/pci/names.c | 137 - drivers/pci/pci.ids | 10180 --------------------------- drivers/pci/probe.c | 2 - drivers/pci/proc.c | 12 +- drivers/usb/core/hcd-pci.c | 4 - drivers/video/nvidia/nvidia.c | 4 - drivers/video/riva/fbdev.c | 4 - include/linux/pci.h | 14 - 21 files changed, 32 insertions(+), 10576 deletions(-) delete mode 100644 drivers/pci/gen-devlist.c delete mode 100644 drivers/pci/names.c delete mode 100644 drivers/pci/pci.ids (limited to 'include/linux') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 2e0a01b21fe0..5f95d4b3cab1 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -25,15 +25,6 @@ Who: Pavel Machek --------------------------- -What: PCI Name Database (CONFIG_PCI_NAMES) -When: July 2005 -Why: It bloats the kernel unnecessarily, and is handled by userspace better - (pciutils supports it.) Will eliminate the need to try to keep the - pci.ids file in sync with the sf.net database all of the time. -Who: Greg Kroah-Hartman - ---------------------------- - What: io_remap_page_range() (macro or function) When: September 2005 Why: Replaced by io_remap_pfn_range() which allows more memory space diff --git a/MAINTAINERS b/MAINTAINERS index 8e4e82921070..cb389067cf66 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1813,13 +1813,6 @@ M: hch@infradead.org L: linux-abi-devel@lists.sourceforge.net S: Maintained -PCI ID DATABASE -P: Martin Mares -M: mj@ucw.cz -L: pciids-devel@lists.sourceforge.net -W: http://pciids.sourceforge.net/ -S: Maintained - PCI SOUND DRIVERS (ES1370, ES1371 and SONICVIBES) P: Thomas Sailer M: sailer@ife.ee.ethz.ch diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 804727853d25..e32fee505220 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -373,12 +373,11 @@ marvel_map_irq(struct pci_dev *dev, u8 slot, u8 pin) irq += 0x80; /* offset for lsi */ #if 1 - printk("PCI:%d:%d:%d (hose %d) [%s] is using MSI\n", + printk("PCI:%d:%d:%d (hose %d) is using MSI\n", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn), - hose->index, - pci_pretty_name (dev)); + hose->index); printk(" %d message(s) from 0x%04x\n", 1 << ((msg_ctl & PCI_MSI_FLAGS_QSIZE) >> 4), msg_dat); diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c index af5272fedadf..4c857a6516fc 100644 --- a/arch/ppc64/kernel/eeh.c +++ b/arch/ppc64/kernel/eeh.c @@ -202,10 +202,9 @@ static void pci_addr_cache_print(struct pci_io_addr_cache *cache) while (n) { struct pci_io_addr_range *piar; piar = rb_entry(n, struct pci_io_addr_range, rb_node); - printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s %s\n", + printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s\n", (piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt, - piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev), - pci_pretty_name(piar->pcidev)); + piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev)); cnt++; n = rb_next(n); } @@ -260,8 +259,8 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); if (!dn) { - printk(KERN_WARNING "PCI: no pci dn found for dev=%s %s\n", - pci_name(dev), pci_pretty_name(dev)); + printk(KERN_WARNING "PCI: no pci dn found for dev=%s\n", + pci_name(dev)); return; } @@ -269,8 +268,8 @@ static void __pci_addr_cache_insert_device(struct pci_dev *dev) if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) || dn->eeh_mode & EEH_MODE_NOCHECK) { #ifdef DEBUG - printk(KERN_INFO "PCI: skip building address cache for=%s %s\n", - pci_name(dev), pci_pretty_name(dev)); + printk(KERN_INFO "PCI: skip building address cache for=%s\n", + pci_name(dev)); #endif return; } @@ -447,12 +446,12 @@ static void eeh_panic(struct pci_dev *dev, int reset_state) * in light of potential corruption, we can use it here. */ if (panic_on_oops) - panic("EEH: MMIO failure (%d) on device:%s %s\n", reset_state, - pci_name(dev), pci_pretty_name(dev)); + panic("EEH: MMIO failure (%d) on device:%s\n", reset_state, + pci_name(dev)); else { __get_cpu_var(ignored_failures)++; - printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s %s\n", - reset_state, pci_name(dev), pci_pretty_name(dev)); + printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s\n", + reset_state, pci_name(dev)); } } @@ -482,8 +481,8 @@ static void eeh_event_handler(void *dummy) break; printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device " - "%s %s\n", event->reset_state, - pci_name(event->dev), pci_pretty_name(event->dev)); + "%s\n", event->reset_state, + pci_name(event->dev)); atomic_set(&eeh_fail_count, 0); notifier_call_chain (&eeh_notifier_chain, @@ -851,8 +850,7 @@ void eeh_add_device_late(struct pci_dev *dev) return; #ifdef DEBUG - printk(KERN_DEBUG "EEH: adding device %s %s\n", pci_name(dev), - pci_pretty_name(dev)); + printk(KERN_DEBUG "EEH: adding device %s\n", pci_name(dev)); #endif pci_addr_cache_insert_device (dev); @@ -873,8 +871,7 @@ void eeh_remove_device(struct pci_dev *dev) /* Unregister the device with the EEH/PCI address search system */ #ifdef DEBUG - printk(KERN_DEBUG "EEH: remove device %s %s\n", pci_name(dev), - pci_pretty_name(dev)); + printk(KERN_DEBUG "EEH: remove device %s\n", pci_name(dev)); #endif pci_addr_cache_remove_device(dev); } diff --git a/arch/ppc64/kernel/iSeries_VpdInfo.c b/arch/ppc64/kernel/iSeries_VpdInfo.c index d11c732daf81..5d921792571f 100644 --- a/arch/ppc64/kernel/iSeries_VpdInfo.c +++ b/arch/ppc64/kernel/iSeries_VpdInfo.c @@ -264,8 +264,5 @@ void __init iSeries_Device_Information(struct pci_dev *PciDev, int count) printk("%d. PCI: Bus%3d, Device%3d, Vendor %04X Frame%3d, Card %4s ", count, bus, PCI_SLOT(PciDev->devfn), PciDev->vendor, frame, card); - if (pci_class_name(PciDev->class >> 8) == 0) - printk("0x%04X\n", (int)(PciDev->class >> 8)); - else - printk("%s\n", pci_class_name(PciDev->class >> 8)); + printk("0x%04X\n", (int)(PciDev->class >> 8)); } diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index d0d55c7908ef..b5ca7d8347e2 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -84,7 +84,6 @@ static void fixup_broken_pcnet32(struct pci_dev* dev) if ((dev->class>>8 == PCI_CLASS_NETWORK_ETHERNET)) { dev->vendor = PCI_VENDOR_ID_AMD; pci_write_config_word(dev, PCI_VENDOR_ID, PCI_VENDOR_ID_AMD); - pci_name_device(dev); } } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TRIDENT, PCI_ANY_ID, fixup_broken_pcnet32); diff --git a/drivers/char/drm/drmP.h b/drivers/char/drm/drmP.h index 6f98701dfe15..121cc85f347e 100644 --- a/drivers/char/drm/drmP.h +++ b/drivers/char/drm/drmP.h @@ -1071,5 +1071,9 @@ extern void *drm_calloc(size_t nmemb, size_t size, int area); extern unsigned long drm_core_get_map_ofs(drm_map_t *map); extern unsigned long drm_core_get_reg_ofs(struct drm_device *dev); +#ifndef pci_pretty_name +#define pci_pretty_name(dev) "" +#endif + #endif /* __KERNEL__ */ #endif diff --git a/drivers/infiniband/hw/mthca/mthca_main.c b/drivers/infiniband/hw/mthca/mthca_main.c index 3241d6c9dc11..ffbcd40418d5 100644 --- a/drivers/infiniband/hw/mthca/mthca_main.c +++ b/drivers/infiniband/hw/mthca/mthca_main.c @@ -937,12 +937,12 @@ static int __devinit mthca_init_one(struct pci_dev *pdev, ++mthca_version_printed; } - printk(KERN_INFO PFX "Initializing %s (%s)\n", - pci_pretty_name(pdev), pci_name(pdev)); + printk(KERN_INFO PFX "Initializing %s\n", + pci_name(pdev)); if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) { - printk(KERN_ERR PFX "%s (%s) has invalid driver data %lx\n", - pci_pretty_name(pdev), pci_name(pdev), id->driver_data); + printk(KERN_ERR PFX "%s has invalid driver data %lx\n", + pci_name(pdev), id->driver_data); return -ENODEV; } diff --git a/drivers/infiniband/hw/mthca/mthca_reset.c b/drivers/infiniband/hw/mthca/mthca_reset.c index 8ea801271a41..4f995391dd1d 100644 --- a/drivers/infiniband/hw/mthca/mthca_reset.c +++ b/drivers/infiniband/hw/mthca/mthca_reset.c @@ -71,8 +71,8 @@ int mthca_reset(struct mthca_dev *mdev) bridge)) != NULL) { if (bridge->hdr_type == PCI_HEADER_TYPE_BRIDGE && bridge->subordinate == mdev->pdev->bus) { - mthca_dbg(mdev, "Found bridge: %s (%s)\n", - pci_pretty_name(bridge), pci_name(bridge)); + mthca_dbg(mdev, "Found bridge: %s\n", + pci_name(bridge)); break; } } @@ -83,8 +83,8 @@ int mthca_reset(struct mthca_dev *mdev) * assume we're in no-bridge mode and hope for * the best. */ - mthca_warn(mdev, "No bridge found for %s (%s)\n", - pci_pretty_name(mdev->pdev), pci_name(mdev->pdev)); + mthca_warn(mdev, "No bridge found for %s\n", + pci_name(mdev->pdev)); } } diff --git a/drivers/net/irda/vlsi_ir.h b/drivers/net/irda/vlsi_ir.h index 414694abf588..741aecc655df 100644 --- a/drivers/net/irda/vlsi_ir.h +++ b/drivers/net/irda/vlsi_ir.h @@ -69,14 +69,8 @@ typedef void irqreturn_t; #else /* 2.5 or later */ -/* recent 2.5/2.6 stores pci device names at varying places ;-) */ -#ifdef CONFIG_PCI_NAMES -/* human readable name */ -#define PCIDEV_NAME(pdev) ((pdev)->pretty_name) -#else /* whatever we get from the associated struct device - bus:slot:dev.fn id */ #define PCIDEV_NAME(pdev) (pci_name(pdev)) -#endif #endif diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 7f31991772ea..f187fd8aeed6 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -30,23 +30,6 @@ config PCI_LEGACY_PROC When in doubt, say N. -config PCI_NAMES - bool "PCI device name database" - depends on PCI - ---help--- - By default, the kernel contains a database of all known PCI device - names to make the information in /proc/pci, /proc/ioports and - similar files comprehensible to the user. - - This database increases size of the kernel image by about 80KB. This - memory is freed after the system boots up if CONFIG_HOTPLUG is not set. - - Anyway, if you are building an installation floppy or kernel for an - embedded system where kernel image size really matters, you can disable - this feature and you'll get device ID numbers instead of names. - - When in doubt, say Y. - config PCI_DEBUG bool "PCI Debugging" depends on PCI && DEBUG_KERNEL diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 3657f6199c48..87cbf2d5f9b1 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -3,8 +3,7 @@ # obj-y += access.o bus.o probe.o remove.o pci.o quirks.o \ - names.o pci-driver.o search.o pci-sysfs.o \ - rom.o + pci-driver.o search.o pci-sysfs.o rom.o obj-$(CONFIG_PROC_FS) += proc.o ifndef CONFIG_SPARC64 @@ -46,21 +45,6 @@ ifeq ($(CONFIG_PCI_DEBUG),y) EXTRA_CFLAGS += -DDEBUG endif -hostprogs-y := gen-devlist - -# Dependencies on generated files need to be listed explicitly -$(obj)/names.o: $(obj)/devlist.h $(obj)/classlist.h -$(obj)/classlist.h: $(obj)/devlist.h - -# And that's how to generate them -quiet_cmd_devlist = DEVLIST $@ - cmd_devlist = ( cd $(obj); ./gen-devlist ) < $< -$(obj)/devlist.h: $(src)/pci.ids $(obj)/gen-devlist - $(call cmd,devlist) - -# Files generated that shall be removed upon make clean -clean-files := devlist.h classlist.h - # Build PCI Express stuff if needed obj-$(CONFIG_PCIEPORTBUS) += pcie/ diff --git a/drivers/pci/gen-devlist.c b/drivers/pci/gen-devlist.c deleted file mode 100644 index 8abfc499fdef..000000000000 --- a/drivers/pci/gen-devlist.c +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Generate devlist.h and classlist.h from the PCI ID file. - * - * (c) 1999--2002 Martin Mares - */ - -#include -#include - -#define MAX_NAME_SIZE 200 - -static void -pq(FILE *f, const char *c, int len) -{ - int i = 1; - while (*c && i != len) { - if (*c == '"') - fprintf(f, "\\\""); - else { - fputc(*c, f); - if (*c == '?' && c[1] == '?') { - /* Avoid trigraphs */ - fprintf(f, "\" \""); - } - } - c++; - i++; - } -} - -int -main(void) -{ - char line[1024], *c, *bra, vend[8]; - int vendors = 0; - int mode = 0; - int lino = 0; - int vendor_len = 0; - FILE *devf, *clsf; - - devf = fopen("devlist.h", "w"); - clsf = fopen("classlist.h", "w"); - if (!devf || !clsf) { - fprintf(stderr, "Cannot create output file!\n"); - return 1; - } - - while (fgets(line, sizeof(line)-1, stdin)) { - lino++; - if ((c = strchr(line, '\n'))) - *c = 0; - if (!line[0] || line[0] == '#') - continue; - if (line[1] == ' ') { - if (line[0] == 'C' && strlen(line) > 4 && line[4] == ' ') { - vend[0] = line[2]; - vend[1] = line[3]; - vend[2] = 0; - mode = 2; - } else goto err; - } - else if (line[0] == '\t') { - if (line[1] == '\t') - continue; - switch (mode) { - case 1: - if (strlen(line) > 5 && line[5] == ' ') { - c = line + 5; - while (*c == ' ') - *c++ = 0; - if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) { - /* Too long, try cutting off long description */ - bra = strchr(c, '['); - if (bra && bra > c && bra[-1] == ' ') - bra[-1] = 0; - if (vendor_len + strlen(c) + 1 > MAX_NAME_SIZE) { - fprintf(stderr, "Line %d: Device name too long. Name truncated.\n", lino); - fprintf(stderr, "%s\n", c); - /*return 1;*/ - } - } - fprintf(devf, "\tDEVICE(%s,%s,\"", vend, line+1); - pq(devf, c, MAX_NAME_SIZE - vendor_len - 1); - fputs("\")\n", devf); - } else goto err; - break; - case 2: - if (strlen(line) > 3 && line[3] == ' ') { - c = line + 3; - while (*c == ' ') - *c++ = 0; - fprintf(clsf, "CLASS(%s%s, \"%s\")\n", vend, line+1, c); - } else goto err; - break; - default: - goto err; - } - } else if (strlen(line) > 4 && line[4] == ' ') { - c = line + 4; - while (*c == ' ') - *c++ = 0; - if (vendors) - fputs("ENDVENDOR()\n\n", devf); - vendors++; - strcpy(vend, line); - vendor_len = strlen(c); - if (vendor_len + 24 > MAX_NAME_SIZE) { - fprintf(stderr, "Line %d: Vendor name too long\n", lino); - return 1; - } - fprintf(devf, "VENDOR(%s,\"", vend); - pq(devf, c, 0); - fputs("\")\n", devf); - mode = 1; - } else { - err: - fprintf(stderr, "Line %d: Syntax error in mode %d: %s\n", lino, mode, line); - return 1; - } - } - fputs("ENDVENDOR()\n\ -\n\ -#undef VENDOR\n\ -#undef DEVICE\n\ -#undef ENDVENDOR\n", devf); - fputs("\n#undef CLASS\n", clsf); - - fclose(devf); - fclose(clsf); - - return 0; -} diff --git a/drivers/pci/names.c b/drivers/pci/names.c deleted file mode 100644 index ad224aada7c9..000000000000 --- a/drivers/pci/names.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * PCI Class and Device Name Tables - * - * Copyright 1993--1999 Drew Eckhardt, Frederic Potter, - * David Mosberger-Tang, Martin Mares - */ - -#include -#include -#include -#include -#include - -#ifdef CONFIG_PCI_NAMES - -struct pci_device_info { - unsigned short device; - unsigned short seen; - const char *name; -}; - -struct pci_vendor_info { - unsigned short vendor; - unsigned short nr; - const char *name; - struct pci_device_info *devices; -}; - -/* - * This is ridiculous, but we want the strings in - * the .init section so that they don't take up - * real memory.. Parse the same file multiple times - * to get all the info. - */ -#define VENDOR( vendor, name ) static char __vendorstr_##vendor[] __devinitdata = name; -#define ENDVENDOR() -#define DEVICE( vendor, device, name ) static char __devicestr_##vendor##device[] __devinitdata = name; -#include "devlist.h" - - -#define VENDOR( vendor, name ) static struct pci_device_info __devices_##vendor[] __devinitdata = { -#define ENDVENDOR() }; -#define DEVICE( vendor, device, name ) { 0x##device, 0, __devicestr_##vendor##device }, -#include "devlist.h" - -static struct pci_vendor_info __devinitdata pci_vendor_list[] = { -#define VENDOR( vendor, name ) { 0x##vendor, sizeof(__devices_##vendor) / sizeof(struct pci_device_info), __vendorstr_##vendor, __devices_##vendor }, -#define ENDVENDOR() -#define DEVICE( vendor, device, name ) -#include "devlist.h" -}; - -#define VENDORS (sizeof(pci_vendor_list)/sizeof(struct pci_vendor_info)) - -void __devinit pci_name_device(struct pci_dev *dev) -{ - const struct pci_vendor_info *vendor_p = pci_vendor_list; - int i = VENDORS; - char *name = dev->pretty_name; - - do { - if (vendor_p->vendor == dev->vendor) - goto match_vendor; - vendor_p++; - } while (--i); - - /* Couldn't find either the vendor nor the device */ - sprintf(name, "PCI device %04x:%04x", dev->vendor, dev->device); - return; - - match_vendor: { - struct pci_device_info *device_p = vendor_p->devices; - int i = vendor_p->nr; - - while (i > 0) { - if (device_p->device == dev->device) - goto match_device; - device_p++; - i--; - } - - /* Ok, found the vendor, but unknown device */ - sprintf(name, "PCI device %04x:%04x (%." PCI_NAME_HALF "s)", - dev->vendor, dev->device, vendor_p->name); - return; - - /* Full match */ - match_device: { - char *n = name + sprintf(name, "%s %s", - vendor_p->name, device_p->name); - int nr = device_p->seen + 1; - device_p->seen = nr; - if (nr > 1) - sprintf(n, " (#%d)", nr); - } - } -} - -/* - * Class names. Not in .init section as they are needed in runtime. - */ - -static u16 pci_class_numbers[] = { -#define CLASS(x,y) 0x##x, -#include "classlist.h" -}; - -static char *pci_class_names[] = { -#define CLASS(x,y) y, -#include "classlist.h" -}; - -char * -pci_class_name(u32 class) -{ - int i; - - for(i=0; i and other volunteers from the -# Linux PCI ID's Project at http://pciids.sf.net/. New data are always -# welcome (if they are accurate), we're eagerly expecting new entries, -# so if you have anything to contribute, please visit the home page or -# send a diff -u against the most recent pci.ids to pci-ids@ucw.cz. -# -# Daily snapshot on Tue 2005-03-08 10:11:48 -# - -# Vendors, devices and subsystems. Please keep sorted. - -# Syntax: -# vendor vendor_name -# device device_name <-- single tab -# subvendor subdevice subsystem_name <-- two tabs - -0000 Gammagraphx, Inc. -001a Ascend Communications, Inc. -0033 Paradyne corp. -003d Lockheed Martin-Marietta Corp -# Real TJN ID is e159, but they got it wrong several times --mj -0059 Tiger Jet Network Inc. (Wrong ID) -0070 Hauppauge computer works Inc. - 4000 WinTV PVR-350 - 4001 WinTV PVR-250 (v1) - 4009 WinTV PVR-250 - 4801 WinTV PVR-250 MCE -0071 Nebula Electronics Ltd. -0095 Silicon Image, Inc. (Wrong ID) - 0680 Ultra ATA/133 IDE RAID CONTROLLER CARD -0100 Ncipher Corp Ltd -# 018a is not LevelOne but there is a board misprogrammed -018a LevelOne - 0106 FPC-0106TX misprogrammed [RTL81xx] -# 021b is not Compaq but there is a board misprogrammed -021b Compaq Computer Corporation - 8139 HNE-300 (RealTek RTL8139c) [iPaq Networking] -# http://www.davicom.com.tw/ -0291 Davicom Semiconductor, Inc. - 8212 DM9102A(DM9102AE, SM9102AF) Ethernet 100/10 MBit(Rev 40) -# SpeedStream is Efficient Networks, Inc, a Siemens Company -02ac SpeedStream - 1012 1012 PCMCIA 10/100 Ethernet Card [RTL81xx] -0357 TTTech AG - 000a TTP-Monitoring Card V2.0 -0432 SCM Microsystems, Inc. - 0001 Pluto2 DVB-T Receiver for PCMCIA [EasyWatch MobilSet] -05e3 CyberDoor - 0701 CBD516 -0675 Dynalink - 1700 IS64PH ISDN Adapter - 1702 IS64PH ISDN Adapter -# Wrong ID used in subsystem ID of VIA USB controllers. -0925 VIA Technologies, Inc. (Wrong ID) -09c1 Arris - 0704 CM 200E Cable Modem -0a89 BREA Technologies Inc -0b49 ASCII Corporation -# see http://homepage1.nifty.com/mcn/lab/machines/trance_vibrator/usbview.vib.txt - 064f Trance Vibrator -0e11 Compaq Computer Corporation - 0001 PCI to EISA Bridge - 0002 PCI to ISA Bridge - 0046 Smart Array 64xx - 0e11 409a Smart Array 641 - 0e11 409b Smart Array 642 - 0e11 409c Smart Array 6400 - 0e11 409d Smart Array 6400 EM - 0049 NC7132 Gigabit Upgrade Module - 004a NC6136 Gigabit Server Adapter - 007c NC7770 1000BaseTX - 007d NC6770 1000BaseTX - 0085 NC7780 1000BaseTX - 00bb NC7760 - 00ca NC7771 - 00cb NC7781 - 00cf NC7772 - 00d0 NC7782 - 00d1 NC7783 - 00e3 NC7761 - 0508 Netelligent 4/16 Token Ring - 1000 Triflex/Pentium Bridge, Model 1000 - 2000 Triflex/Pentium Bridge, Model 2000 - 3032 QVision 1280/p - 3033 QVision 1280/p - 3034 QVision 1280/p - 4000 4000 [Triflex] - 4030 SMART-2/P - 4031 SMART-2SL - 4032 Smart Array 3200 - 4033 Smart Array 3100ES - 4034 Smart Array 221 - 4040 Integrated Array - 4048 Compaq Raid LC2 - 4050 Smart Array 4200 - 4051 Smart Array 4250ES - 4058 Smart Array 431 - 4070 Smart Array 5300 - 4080 Smart Array 5i - 4082 Smart Array 532 - 4083 Smart Array 5312 - 4091 Smart Array 6i - 409a Smart Array 641 - 409b Smart Array 642 - 409c Smart Array 6400 - 409d Smart Array 6400 EM - 6010 HotPlug PCI Bridge 6010 - 7020 USB Controller - a0ec Fibre Channel Host Controller - a0f0 Advanced System Management Controller - a0f3 Triflex PCI to ISA Bridge - a0f7 PCI Hotplug Controller - 8086 002a PCI Hotplug Controller A - 8086 002b PCI Hotplug Controller B - a0f8 ZFMicro Chipset USB - a0fc FibreChannel HBA Tachyon - ae10 Smart-2/P RAID Controller - 0e11 4030 Smart-2/P Array Controller - 0e11 4031 Smart-2SL Array Controller - 0e11 4032 Smart Array Controller - 0e11 4033 Smart 3100ES Array Controller - ae29 MIS-L - ae2a MPC - ae2b MIS-E - ae31 System Management Controller - ae32 Netelligent 10/100 TX PCI UTP - ae33 Triflex Dual EIDE Controller - ae34 Netelligent 10 T PCI UTP - ae35 Integrated NetFlex-3/P - ae40 Netelligent Dual 10/100 TX PCI UTP - ae43 Netelligent Integrated 10/100 TX UTP - ae69 CETUS-L - ae6c Northstar - ae6d NorthStar CPU to PCI Bridge - b011 Netelligent 10/100 TX Embedded UTP - b012 Netelligent 10 T/2 PCI UTP/Coax - b01e NC3120 Fast Ethernet NIC - b01f NC3122 Fast Ethernet NIC - b02f NC1120 Ethernet NIC - b030 Netelligent 10/100 TX UTP - b04a 10/100 TX PCI Intel WOL UTP Controller - b060 Smart Array 5300 Controller - b0c6 NC3161 Fast Ethernet NIC - b0c7 NC3160 Fast Ethernet NIC - b0d7 NC3121 Fast Ethernet NIC - b0dd NC3131 Fast Ethernet NIC - b0de NC3132 Fast Ethernet Module - b0df NC6132 Gigabit Module - b0e0 NC6133 Gigabit Module - b0e1 NC3133 Fast Ethernet Module - b123 NC6134 Gigabit NIC - b134 NC3163 Fast Ethernet NIC - b13c NC3162 Fast Ethernet NIC - b144 NC3123 Fast Ethernet NIC - b163 NC3134 Fast Ethernet NIC - b164 NC3165 Fast Ethernet Upgrade Module - b178 Smart Array 5i/532 - 0e11 4080 Smart Array 5i - 0e11 4082 Smart Array 532 - 0e11 4083 Smart Array 5312 - b1a4 NC7131 Gigabit Server Adapter -# HP Memory Hot-Plug Controller - b200 Memory Hot-Plug Controller - b203 Integrated Lights Out Controller - b204 Integrated Lights Out Processor - f130 NetFlex-3/P ThunderLAN 1.0 - f150 NetFlex-3/P ThunderLAN 2.3 -0e55 HaSoTec GmbH -# Formerly NCR -1000 LSI Logic / Symbios Logic - 0001 53c810 - 1000 1000 LSI53C810AE PCI to SCSI I/O Processor - 0002 53c820 - 0003 53c825 - 1000 1000 LSI53C825AE PCI to SCSI I/O Processor (Ultra Wide) - 0004 53c815 - 0005 53c810AP - 0006 53c860 - 1000 1000 LSI53C860E PCI to Ultra SCSI I/O Processor - 000a 53c1510 - 1000 1000 LSI53C1510 PCI to Dual Channel Wide Ultra2 SCSI Controller (Nonintelligent mode) - 000b 53C896/897 - 0e11 6004 EOB003 Series SCSI host adapter - 1000 1000 LSI53C896/7 PCI to Dual Channel Ultra2 SCSI Multifunction Controller - 1000 1010 LSI22910 PCI to Dual Channel Ultra2 SCSI host adapter - 1000 1020 LSI21002 PCI to Dual Channel Ultra2 SCSI host adapter -# multifunction PCI card: Dual U2W SCSI, dual 10/100TX, graphics - 13e9 1000 6221L-4U - 000c 53c895 - 1000 1010 LSI8951U PCI to Ultra2 SCSI host adapter - 1000 1020 LSI8952U PCI to Ultra2 SCSI host adapter - 1de1 3906 DC-390U2B SCSI adapter - 1de1 3907 DC-390U2W - 000d 53c885 - 000f 53c875 - 0e11 7004 Embedded Ultra Wide SCSI Controller - 1000 1000 LSI53C876/E PCI to Dual Channel SCSI Controller - 1000 1010 LSI22801 PCI to Dual Channel Ultra SCSI host adapter - 1000 1020 LSI22802 PCI to Dual Channel Ultra SCSI host adapter - 1092 8760 FirePort 40 Dual SCSI Controller - 1de1 3904 DC390F/U Ultra Wide SCSI Adapter - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 4c53 1050 CT7 mainboard - 0010 53C1510 - 0e11 4040 Integrated Array Controller - 0e11 4048 RAID LC2 Controller - 1000 1000 53C1510 PCI to Dual Channel Wide Ultra2 SCSI Controller (Intelligent mode) - 0012 53c895a - 1000 1000 LSI53C895A PCI to Ultra2 SCSI Controller - 0013 53c875a - 1000 1000 LSI53C875A PCI to Ultra SCSI Controller - 0020 53c1010 Ultra3 SCSI Adapter - 1000 1000 LSI53C1010-33 PCI to Dual Channel Ultra160 SCSI Controller - 1de1 1020 DC-390U3W - 0021 53c1010 66MHz Ultra3 SCSI Adapter - 1000 1000 LSI53C1000/1000R/1010R/1010-66 PCI to Ultra160 SCSI Controller - 1000 1010 Asus TR-DLS onboard 53C1010-66 - 124b 1070 PMC-USCSI3 - 4c53 1080 CT8 mainboard - 4c53 1300 P017 mezzanine (32-bit PMC) - 4c53 1310 P017 mezzanine (64-bit PMC) - 0030 53c1030 PCI-X Fusion-MPT Dual Ultra320 SCSI - 1028 0123 PowerEdge 2600 - 1028 014a PowerEdge 1750 - 1028 016c PowerEdge 1850 MPT Fusion SCSI/RAID (Perc 4) - 1028 0183 PowerEdge 1800 - 1028 1010 LSI U320 SCSI Controller - 0031 53c1030ZC PCI-X Fusion-MPT Dual Ultra320 SCSI - 0032 53c1035 PCI-X Fusion-MPT Dual Ultra320 SCSI - 1000 1000 LSI53C1020/1030 PCI-X to Ultra320 SCSI Controller - 0033 1030ZC_53c1035 PCI-X Fusion-MPT Dual Ultra320 SCSI - 0040 53c1035 PCI-X Fusion-MPT Dual Ultra320 SCSI - 1000 0033 MegaRAID SCSI 320-2XR - 1000 0066 MegaRAID SCSI 320-2XRWS - 0041 53C1035ZC PCI-X Fusion-MPT Dual Ultra320 SCSI - 008f 53c875J - 1092 8000 FirePort 40 SCSI Controller - 1092 8760 FirePort 40 Dual SCSI Host Adapter - 0407 MegaRAID - 1000 0530 MegaRAID 530 SCSI 320-0X RAID Controller - 1000 0531 MegaRAID 531 SCSI 320-4X RAID Controller - 1000 0532 MegaRAID 532 SCSI 320-2X RAID Controller - 1028 0531 PowerEdge Expandable RAID Controller 4/QC - 1028 0533 PowerEdge Expandable RAID Controller 4/QC - 8086 0530 MegaRAID Intel RAID Controller SRCZCRX - 8086 0532 MegaRAID Intel RAID Controller SRCU42X - 0408 MegaRAID - 1000 0001 MegaRAID SCSI 320-1E RAID Controller - 1000 0002 MegaRAID SCSI 320-2E RAID Controller - 1025 004d MegaRAID ACER ROMB-2E RAID Controller - 1028 0001 PowerEdge RAID Controller PERC4e/SC - 1028 0002 PowerEdge RAID Controller PERC4e/DC - 1734 1065 FSC MegaRAID PCI Express ROMB - 8086 0002 MegaRAID Intel RAID Controller SRCU42E - 0409 MegaRAID - 1000 3004 MegaRAID SATA 300-4X RAID Controller - 1000 3008 MegaRAID SATA 300-8X RAID Controller - 8086 3008 MegaRAID RAID Controller SRCS28X - 8086 3431 MegaRAID RAID Controller Alief SROMBU42E - 8086 3499 MegaRAID RAID Controller Harwich SROMBU42E - 0621 FC909 Fibre Channel Adapter - 0622 FC929 Fibre Channel Adapter - 1000 1020 44929 O Dual Fibre Channel card - 0623 FC929 LAN - 0624 FC919 Fibre Channel Adapter - 0625 FC919 LAN - 0626 FC929X Fibre Channel Adapter - 1000 1010 7202-XP-LC Dual Fibre Channel card - 0627 FC929X LAN - 0628 FC919X Fibre Channel Adapter - 0629 FC919X LAN - 0701 83C885 NT50 DigitalScape Fast Ethernet - 0702 Yellowfin G-NIC gigabit ethernet - 1318 0000 PEI100X - 0804 SA2010 - 0805 SA2010ZC - 0806 SA2020 - 0807 SA2020ZC - 0901 61C102 - 1000 63C815 - 1960 MegaRAID - 1000 0518 MegaRAID 518 SCSI 320-2 Controller - 1000 0520 MegaRAID 520 SCSI 320-1 Controller - 1000 0522 MegaRAID 522 i4 133 RAID Controller - 1000 0523 MegaRAID SATA 150-6 RAID Controller - 1000 4523 MegaRAID SATA 150-4 RAID Controller - 1000 a520 MegaRAID ZCR SCSI 320-0 Controller - 1028 0518 MegaRAID 518 DELL PERC 4/DC RAID Controller - 1028 0520 MegaRAID 520 DELL PERC 4/SC RAID Controller - 1028 0531 PowerEdge Expandable RAID Controller 4/QC - 1028 0533 PowerEdge Expandable RAID Controller 4/QC - 8086 0520 MegaRAIDRAID Controller SRCU41L - 8086 0523 MegaRAID RAID Controller SRCS16 -1001 Kolter Electronic - 0010 PCI 1616 Measurement card with 32 digital I/O lines - 0011 OPTO-PCI Opto-Isolated digital I/O board - 0012 PCI-AD/DA Analogue I/O board - 0013 PCI-OPTO-RELAIS Digital I/O board with relay outputs - 0014 PCI-Counter/Timer Counter Timer board - 0015 PCI-DAC416 Analogue output board - 0016 PCI-MFB Analogue I/O board - 0017 PROTO-3 PCI Prototyping board - 9100 INI-9100/9100W SCSI Host -1002 ATI Technologies Inc - 3150 M24 1P [Radeon Mobility X600] - 3154 M24 1T [FireGL M24 GL] - 3e50 RV380 0x3e50 [Radeon X600] - 3e54 RV380 0x3e54 [FireGL V3200] - 3e70 RV380 [Radeon X600] Secondary - 4136 Radeon IGP 320 M - 4137 Radeon IGP330/340/350 - 4144 R300 AD [Radeon 9500 Pro] -# New PCI ID provided by ATI developer relations (correction to above) - 4145 R300 AE [Radeon 9700 Pro] -# New PCI ID provided by ATI developer relations (oops, correction to above) - 4146 R300 AF [Radeon 9700 Pro] - 4147 R300 AG [FireGL Z1/X1] - 4148 R350 AH [Radeon 9800] - 4149 R350 AI [Radeon 9800] - 414a R350 AJ [Radeon 9800] - 414b R350 AK [Fire GL X2] -# New PCI ID provided by ATI developer relations - 4150 RV350 AP [Radeon 9600] - 1002 0002 R9600 Pro primary (Asus OEM for HP) - 1002 0003 R9600 Pro secondary (Asus OEM for HP) - 1458 4024 Giga-Byte GV-R96128D Primary - 148c 2064 PowerColor R96A-C3N - 148c 2066 PowerColor R96A-C3N - 174b 7c19 Sapphire Atlantis Radeon 9600 Pro - 174b 7c29 GC-R9600PRO Primary [Sapphire] - 17ee 2002 Radeon 9600 256Mb Primary - 18bc 0101 GC-R9600PRO Primary -# New PCI ID provided by ATI developer relations - 4151 RV350 AQ [Radeon 9600] - 1043 c004 A9600SE -# New PCI ID provided by ATI developer relations - 4152 RV350 AR [Radeon 9600] - 1002 0002 Radeon 9600XT - 1043 c002 Radeon 9600 XT TVD - 174b 7c29 Sapphire Radeon 9600XT - 1787 4002 Radeon 9600 XT - 4153 RV350 AS [Radeon 9600 AS] - 4154 RV350 AT [Fire GL T2] - 4155 RV350 AU [Fire GL T2] - 4156 RV350 AV [Fire GL T2] - 4157 RV350 AW [Fire GL T2] - 4158 68800AX [Mach32] -# The PCI ID is unrelated to any DVI output. - 4164 R300 AD [Radeon 9500 Pro] (Secondary) -# New PCI ID info provided by ATI developer relations - 4165 R300 AE [Radeon 9700 Pro] (Secondary) -# New PCI ID info provided by ATI developer relations - 4166 R300 AF [Radeon 9700 Pro] (Secondary) -# New PCI ID provided by ATI developer relations - 4168 Radeon R350 [Radeon 9800] (Secondary) -# New PCI ID provided by ATI developer relations (correction to above) - 4170 RV350 AP [Radeon 9600] (Secondary) - 1458 4025 Giga-Byte GV-R96128D Secondary - 148c 2067 PowerColor R96A-C3N (Secondary) - 174b 7c28 GC-R9600PRO Secondary [Sapphire] - 17ee 2003 Radeon 9600 256Mb Secondary - 18bc 0100 GC-R9600PRO Secondary -# New PCI ID provided by ATI developer relations (correction to above) - 4171 RV350 AQ [Radeon 9600] (Secondary) - 1043 c005 A9600SE (Secondary) -# New PCI ID provided by ATI developer relations (correction to above) - 4172 RV350 AR [Radeon 9600] (Secondary) - 1002 0003 Radeon 9600XT (Secondary) - 1043 c003 A9600XT (Secondary) - 174b 7c28 Sapphire Radeon 9600XT (Secondary) - 1787 4003 Radeon 9600 XT (Secondary) - 4173 RV350 ?? [Radeon 9550] (Secondary) - 4237 Radeon 7000 IGP - 4242 R200 BB [Radeon All in Wonder 8500DV] - 1002 02aa Radeon 8500 AIW DV Edition - 4243 R200 BC [Radeon All in Wonder 8500] - 4336 Radeon Mobility U1 - 103c 0024 Pavilion ze4400 builtin Video - 4337 Radeon IGP 330M/340M/350M - 1014 053a ThinkPad R40e (2684-HVG) builtin VGA controller - 103c 0850 Radeon IGP 345M - 4341 IXP150 AC'97 Audio Controller - 4345 EHCI USB Controller - 4347 OHCI USB Controller #1 - 4348 OHCI USB Controller #2 - 4349 ATI Dual Channel Bus Master PCI IDE Controller - 434d IXP AC'97 Modem - 4353 ATI SMBus - 4354 215CT [Mach64 CT] - 4358 210888CX [Mach64 CX] - 4363 ATI SMBus - 436e ATI 436E Serial ATA Controller - 4372 ATI SMBus - 4376 Standard Dual Channel PCI IDE Controller ATI - 4379 ATI 4379 Serial ATA Controller - 437a ATI 437A Serial ATA Controller - 4437 Radeon Mobility 7000 IGP - 4554 210888ET [Mach64 ET] - 4654 Mach64 VT - 4742 3D Rage Pro AGP 1X/2X - 1002 0040 Rage Pro Turbo AGP 2X - 1002 0044 Rage Pro Turbo AGP 2X - 1002 0061 Rage Pro AIW AGP 2X - 1002 0062 Rage Pro AIW AGP 2X - 1002 0063 Rage Pro AIW AGP 2X - 1002 0080 Rage Pro Turbo AGP 2X - 1002 0084 Rage Pro Turbo AGP 2X - 1002 4742 Rage Pro Turbo AGP 2X - 1002 8001 Rage Pro Turbo AGP 2X - 1028 0082 Rage Pro Turbo AGP 2X - 1028 4082 Optiplex GX1 Onboard Display Adapter - 1028 8082 Rage Pro Turbo AGP 2X - 1028 c082 Rage Pro Turbo AGP 2X - 8086 4152 Xpert 98D AGP 2X - 8086 464a Rage Pro Turbo AGP 2X - 4744 3D Rage Pro AGP 1X - 1002 4744 Rage Pro Turbo AGP - 4747 3D Rage Pro - 4749 3D Rage Pro - 1002 0061 Rage Pro AIW - 1002 0062 Rage Pro AIW - 474c Rage XC - 474d Rage XL AGP 2X - 1002 0004 Xpert 98 RXL AGP 2X - 1002 0008 Xpert 98 RXL AGP 2X - 1002 0080 Rage XL AGP 2X - 1002 0084 Xpert 98 AGP 2X - 1002 474d Rage XL AGP - 1033 806a Rage XL AGP - 474e Rage XC AGP - 1002 474e Rage XC AGP - 474f Rage XL - 1002 0008 Rage XL - 1002 474f Rage XL - 4750 3D Rage Pro 215GP - 1002 0040 Rage Pro Turbo - 1002 0044 Rage Pro Turbo - 1002 0080 Rage Pro Turbo - 1002 0084 Rage Pro Turbo - 1002 4750 Rage Pro Turbo - 4751 3D Rage Pro 215GQ - 4752 Rage XL - 1002 0008 Rage XL - 1002 4752 Rage XL - 1002 8008 Rage XL - 1028 00ce PowerEdge 1400 - 1028 00d1 PowerEdge 2550 - 1028 00d9 PowerEdge 2500 - 8086 3411 SDS2 Mainboard - 8086 3427 S875WP1-E mainboard - 4753 Rage XC - 1002 4753 Rage XC - 4754 3D Rage I/II 215GT [Mach64 GT] - 4755 3D Rage II+ 215GTB [Mach64 GTB] - 4756 3D Rage IIC 215IIC [Mach64 GT IIC] - 1002 4756 Rage IIC - 4757 3D Rage IIC AGP - 1002 4757 Rage IIC AGP - 1028 0089 Rage 3D IIC - 1028 4082 Rage 3D IIC - 1028 8082 Rage 3D IIC - 1028 c082 Rage 3D IIC - 4758 210888GX [Mach64 GX] - 4759 3D Rage IIC - 475a 3D Rage IIC AGP - 1002 0084 Rage 3D Pro AGP 2x XPERT 98 - 1002 0087 Rage 3D IIC - 1002 475a Rage IIC AGP - 4964 Radeon RV250 Id [Radeon 9000] - 4965 Radeon RV250 Ie [Radeon 9000] - 4966 Radeon RV250 If [Radeon 9000] - 10f1 0002 RV250 If [Tachyon G9000 PRO] - 148c 2039 RV250 If [Radeon 9000 Pro "Evil Commando"] - 1509 9a00 RV250 If [Radeon 9000 "AT009"] -# New subdevice - 3D Prophet 9000 PCI by Hercules. AGP version probably would have same ID, so not specified. - 1681 0040 RV250 If [3D prophet 9000] - 174b 7176 RV250 If [Sapphire Radeon 9000 Pro] - 174b 7192 RV250 If [Radeon 9000 "Atlantis"] - 17af 2005 RV250 If [Excalibur Radeon 9000 Pro] - 17af 2006 RV250 If [Excalibur Radeon 9000] - 4967 Radeon RV250 Ig [Radeon 9000] - 496e Radeon RV250 [Radeon 9000] (Secondary) - 4a48 R420 JH [Radeon X800] - 4a49 R420 JI [Radeon X800PRO] - 4a4a R420 JJ [Radeon X800SE] - 4a4b R420 JK [Radeon X800] - 4a4c R420 JL [Radeon X800] - 4a4d R420 JM [FireGL X3] - 4a4e M18 JN [Radeon Mobility 9800] - 4a50 R420 JP [Radeon X800XT] - 4a70 R420 [X800XT-PE] (Secondary) - 4c42 3D Rage LT Pro AGP-133 - 0e11 b0e7 Rage LT Pro (Compaq Presario 5240) - 0e11 b0e8 Rage 3D LT Pro - 0e11 b10e 3D Rage LT Pro (Compaq Armada 1750) - 1002 0040 Rage LT Pro AGP 2X - 1002 0044 Rage LT Pro AGP 2X - 1002 4c42 Rage LT Pro AGP 2X - 1002 8001 Rage LT Pro AGP 2X - 1028 0085 Rage 3D LT Pro - 4c44 3D Rage LT Pro AGP-66 - 4c45 Rage Mobility M3 AGP - 4c46 Rage Mobility M3 AGP 2x - 1028 00b1 Latitude C600 - 4c47 3D Rage LT-G 215LG - 4c49 3D Rage LT Pro - 1002 0004 Rage LT Pro - 1002 0040 Rage LT Pro - 1002 0044 Rage LT Pro - 1002 4c49 Rage LT Pro - 4c4d Rage Mobility P/M AGP 2x - 0e11 b111 Armada M700 - 0e11 b160 Armada E500 - 1002 0084 Xpert 98 AGP 2X (Mobility) - 1014 0154 ThinkPad A20m - 1028 00aa Latitude CPt - 1028 00bb Latitude CPx - 4c4e Rage Mobility L AGP 2x - 4c50 3D Rage LT Pro - 1002 4c50 Rage LT Pro - 4c51 3D Rage LT Pro - 4c52 Rage Mobility P/M - 1033 8112 Versa Note VXi - 4c53 Rage Mobility L - 4c54 264LT [Mach64 LT] - 4c57 Radeon Mobility M7 LW [Radeon Mobility 7500] - 1014 0517 ThinkPad T30 - 1028 00e6 Radeon Mobility M7 LW (Dell Inspiron 8100) - 1028 012a Latitude C640 - 144d c006 Radeon Mobility M7 LW in vpr Matrix 170B4 - 4c58 Radeon RV200 LX [Mobility FireGL 7800 M7] - 4c59 Radeon Mobility M6 LY - 1014 0235 ThinkPad A30/A30p (2652/2653) - 1014 0239 ThinkPad X22/X23/X24 - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 4c5a Radeon Mobility M6 LZ - 4c64 Radeon R250 Ld [Radeon Mobility 9000 M9] - 4c65 Radeon R250 Le [Radeon Mobility 9000 M9] - 4c66 Radeon R250 Lf [FireGL 9000] - 4c67 Radeon R250 Lg [Radeon Mobility 9000 M9] -# Secondary chip to the Lf - 4c6e Radeon R250 Ln [Radeon Mobility 9000 M9] [Secondary] - 4d46 Rage Mobility M4 AGP - 4d4c Rage Mobility M4 AGP - 4e44 Radeon R300 ND [Radeon 9700 Pro] - 4e45 Radeon R300 NE [Radeon 9500 Pro] - 1002 0002 Radeon R300 NE [Radeon 9500 Pro] - 1681 0002 Hercules 3D Prophet 9500 PRO [Radeon 9500 Pro] -# New PCI ID provided by ATI developer relations (correction to above) - 4e46 RV350 NF [Radeon 9600] - 4e47 Radeon R300 NG [FireGL X1] -# (added pro) - 4e48 Radeon R350 [Radeon 9800 Pro] -# New PCI ID provided by ATI developer relations - 4e49 Radeon R350 [Radeon 9800] - 4e4a RV350 NJ [Radeon 9800 XT] - 4e4b R350 NK [Fire GL X2] -# New PCI ID provided by ATI developer relations - 4e50 RV350 [Mobility Radeon 9600 M10] - 1025 005a TravelMate 290 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1734 1055 Amilo M1420W - 4e51 M10 NQ [Radeon Mobility 9600] - 4e52 RV350 [Mobility Radeon 9600 M10] - 4e53 M10 NS [Radeon Mobility 9600] - 4e54 M10 NT [FireGL Mobility T2] - 4e56 M11 NV [FireGL Mobility T2e] - 4e64 Radeon R300 [Radeon 9700 Pro] (Secondary) - 4e65 Radeon R300 [Radeon 9500 Pro] (Secondary) - 1002 0003 Radeon R300 NE [Radeon 9500 Pro] - 1681 0003 Hercules 3D Prophet 9500 PRO [Radeon 9500 Pro] (Secondary) -# New PCI ID provided by ATI developer relations (correction to above) - 4e66 RV350 NF [Radeon 9600] (Secondary) - 4e67 Radeon R300 [FireGL X1] (Secondary) -# (added pro) - 4e68 Radeon R350 [Radeon 9800 Pro] (Secondary) -# New PCI ID provided by ATI developer relations - 4e69 Radeon R350 [Radeon 9800] (Secondary) - 4e6a RV350 NJ [Radeon 9800 XT] (Secondary) - 1002 4e71 ATI Technologies Inc M10 NQ [Radeon Mobility 9600] - 5041 Rage 128 PA/PRO - 5042 Rage 128 PB/PRO AGP 2x - 5043 Rage 128 PC/PRO AGP 4x - 5044 Rage 128 PD/PRO TMDS - 1002 0028 Rage 128 AIW - 1002 0029 Rage 128 AIW - 5045 Rage 128 PE/PRO AGP 2x TMDS - 5046 Rage 128 PF/PRO AGP 4x TMDS - 1002 0004 Rage Fury Pro - 1002 0008 Rage Fury Pro/Xpert 2000 Pro - 1002 0014 Rage Fury Pro - 1002 0018 Rage Fury Pro/Xpert 2000 Pro - 1002 0028 Rage 128 Pro AIW AGP - 1002 002a Rage 128 Pro AIW AGP - 1002 0048 Rage Fury Pro - 1002 2000 Rage Fury MAXX AGP 4x (TMDS) (VGA device) - 1002 2001 Rage Fury MAXX AGP 4x (TMDS) (Extra device?!) - 5047 Rage 128 PG/PRO - 5048 Rage 128 PH/PRO AGP 2x - 5049 Rage 128 PI/PRO AGP 4x - 504a Rage 128 PJ/PRO TMDS - 504b Rage 128 PK/PRO AGP 2x TMDS - 504c Rage 128 PL/PRO AGP 4x TMDS - 504d Rage 128 PM/PRO - 504e Rage 128 PN/PRO AGP 2x - 504f Rage 128 PO/PRO AGP 4x - 5050 Rage 128 PP/PRO TMDS [Xpert 128] - 1002 0008 Xpert 128 - 5051 Rage 128 PQ/PRO AGP 2x TMDS - 5052 Rage 128 PR/PRO AGP 4x TMDS - 5053 Rage 128 PS/PRO - 5054 Rage 128 PT/PRO AGP 2x - 5055 Rage 128 PU/PRO AGP 4x - 5056 Rage 128 PV/PRO TMDS - 5057 Rage 128 PW/PRO AGP 2x TMDS - 5058 Rage 128 PX/PRO AGP 4x TMDS - 5144 Radeon R100 QD [Radeon 7200] - 1002 0008 Radeon 7000/Radeon VE - 1002 0009 Radeon 7000/Radeon - 1002 000a Radeon 7000/Radeon - 1002 001a Radeon 7000/Radeon - 1002 0029 Radeon AIW - 1002 0038 Radeon 7000/Radeon - 1002 0039 Radeon 7000/Radeon - 1002 008a Radeon 7000/Radeon - 1002 00ba Radeon 7000/Radeon - 1002 0139 Radeon 7000/Radeon - 1002 028a Radeon 7000/Radeon - 1002 02aa Radeon AIW - 1002 053a Radeon 7000/Radeon - 5145 Radeon R100 QE - 5146 Radeon R100 QF - 5147 Radeon R100 QG - 5148 Radeon R200 QH [Radeon 8500] - 1002 010a FireGL 8800 64Mb - 1002 0152 FireGL 8800 128Mb - 1002 0162 FireGL 8700 32Mb - 1002 0172 FireGL 8700 64Mb - 5149 Radeon R200 QI - 514a Radeon R200 QJ - 514b Radeon R200 QK - 514c Radeon R200 QL [Radeon 8500 LE] - 1002 003a Radeon R200 QL [Radeon 8500 LE] - 1002 013a Radeon 8500 - 148c 2026 R200 QL [Radeon 8500 Evil Master II Multi Display Edition] - 1681 0010 Radeon 8500 [3D Prophet 8500 128Mb] - 174b 7149 Radeon R200 QL [Sapphire Radeon 8500 LE] - 514d Radeon R200 QM [Radeon 9100] - 514e Radeon R200 QN [Radeon 8500LE] - 514f Radeon R200 QO [Radeon 8500LE] - 5154 R200 QT [Radeon 8500] - 5155 R200 QU [Radeon 9100] - 5157 Radeon RV200 QW [Radeon 7500] - 1002 013a Radeon 7500 - 1002 103a Dell Optiplex GX260 - 1458 4000 RV200 QW [RADEON 7500 PRO MAYA AR] - 148c 2024 RV200 QW [Radeon 7500LE Dual Display] - 148c 2025 RV200 QW [Radeon 7500 Evil Master Multi Display Edition] - 148c 2036 RV200 QW [Radeon 7500 PCI Dual Display] - 174b 7146 RV200 QW [Radeon 7500 LE] - 174b 7147 RV200 QW [Sapphire Radeon 7500LE] - 174b 7161 Radeon RV200 QW [Radeon 7500 LE] - 17af 0202 RV200 QW [Excalibur Radeon 7500LE] - 5158 Radeon RV200 QX [Radeon 7500] - 5159 Radeon RV100 QY [Radeon 7000/VE] - 1002 000a Radeon 7000/Radeon VE - 1002 000b Radeon 7000 - 1002 0038 Radeon 7000/Radeon VE - 1002 003a Radeon 7000/Radeon VE - 1002 00ba Radeon 7000/Radeon VE - 1002 013a Radeon 7000/Radeon VE - 1458 4002 RV100 QY [RADEON 7000 PRO MAYA AV Series] - 148c 2003 RV100 QY [Radeon 7000 Multi-Display Edition] - 148c 2023 RV100 QY [Radeon 7000 Evil Master Multi-Display] - 174b 7112 RV100 QY [Sapphire Radeon VE 7000] - 174b 7c28 Sapphire Radeon VE 7000 DDR - 1787 0202 RV100 QY [Excalibur Radeon 7000] - 515a Radeon RV100 QZ [Radeon 7000/VE] - 5168 Radeon R200 Qh - 5169 Radeon R200 Qi - 516a Radeon R200 Qj - 516b Radeon R200 Qk -# This one is not in ATI documentation, but is in XFree86 source code - 516c Radeon R200 Ql - 5245 Rage 128 RE/SG - 1002 0008 Xpert 128 - 1002 0028 Rage 128 AIW - 1002 0029 Rage 128 AIW - 1002 0068 Rage 128 AIW - 5246 Rage 128 RF/SG AGP - 1002 0004 Magnum/Xpert 128/Xpert 99 - 1002 0008 Magnum/Xpert128/X99/Xpert2000 - 1002 0028 Rage 128 AIW AGP - 1002 0044 Rage Fury/Xpert 128/Xpert 2000 - 1002 0068 Rage 128 AIW AGP - 1002 0448 Rage Fury - 5247 Rage 128 RG - 524b Rage 128 RK/VR - 524c Rage 128 RL/VR AGP - 1002 0008 Xpert 99/Xpert 2000 - 1002 0088 Xpert 99 - 5345 Rage 128 SE/4x - 5346 Rage 128 SF/4x AGP 2x - 1002 0048 RAGE 128 16MB VGA TVOUT AMC PAL - 5347 Rage 128 SG/4x AGP 4x - 5348 Rage 128 SH - 534b Rage 128 SK/4x - 534c Rage 128 SL/4x AGP 2x - 534d Rage 128 SM/4x AGP 4x - 1002 0008 Xpert 99/Xpert 2000 - 1002 0018 Xpert 2000 - 534e Rage 128 4x - 5354 Mach 64 VT - 1002 5654 Mach 64 reference - 5446 Rage 128 Pro Ultra TF - 1002 0004 Rage Fury Pro - 1002 0008 Rage Fury Pro/Xpert 2000 Pro - 1002 0018 Rage Fury Pro/Xpert 2000 Pro - 1002 0028 Rage 128 AIW Pro AGP - 1002 0029 Rage 128 AIW - 1002 002a Rage 128 AIW Pro AGP - 1002 002b Rage 128 AIW - 1002 0048 Xpert 2000 Pro - 544c Rage 128 Pro Ultra TL - 5452 Rage 128 Pro Ultra TR - 1002 001c Rage 128 Pro 4XL - 103c 1279 Rage 128 Pro 4XL - 5453 Rage 128 Pro Ultra TS - 5454 Rage 128 Pro Ultra TT - 5455 Rage 128 Pro Ultra TU - 5460 M22 [Radeon Mobility M300] - 5464 M22 [FireGL GL] - 5548 R423 UH [Radeon X800 (PCIE)] - 5549 R423 UI [Radeon X800PRO (PCIE)] - 554a R423 UJ [Radeon X800LE (PCIE)] - 554b R423 UK [Radeon X800SE (PCIE)] - 5551 R423 UQ [FireGL V7200 (PCIE)] - 5552 R423 UR [FireGL V5100 (PCIE)] - 5554 R423 UT [FireGL V7100 (PCIE)] - 556b Radeon R423 UK (PCIE) [X800 SE] (Secondary) - 5654 264VT [Mach64 VT] - 1002 5654 Mach64VT Reference - 5655 264VT3 [Mach64 VT3] - 5656 264VT4 [Mach64 VT4] - 5830 RS300 Host Bridge - 5831 RS300 Host Bridge - 5832 RS300 Host Bridge - 5833 Radeon 9100 IGP Host Bridge - 5834 Radeon 9100 IGP - 5835 RS300M AGP [Radeon Mobility 9100IGP] - 5838 Radeon 9100 IGP AGP Bridge - 5941 RV280 [Radeon 9200] (Secondary) - 1458 4019 Gigabyte Radeon 9200 - 174b 7c12 Sapphire Radeon 9200 -# http://www.hightech.com.hk/html/9200.htm - 17af 200d Excalibur Radeon 9200 - 18bc 0050 GeXcube GC-R9200-C3 (Secondary) - 5944 RV280 [Radeon 9200 SE (PCI)] - 5960 RV280 [Radeon 9200 PRO] - 5961 RV280 [Radeon 9200] - 1002 2f72 All-in-Wonder 9200 Series - 1019 4c30 Radeon 9200 VIVO - 12ab 5961 YUAN SMARTVGA Radeon 9200 - 1458 4018 Gigabyte Radeon 9200 - 174b 7c13 Sapphire Radeon 9200 -# http://www.hightech.com.hk/html/9200.htm - 17af 200c Excalibur Radeon 9200 - 18bc 0050 Radeon 9200 Game Buster - 18bc 0051 GeXcube GC-R9200-C3 - 18bc 0053 Radeon 9200 Game Buster VIVO - 5962 RV280 [Radeon 9200] - 5964 RV280 [Radeon 9200 SE] - 1043 c006 ASUS Radeon 9200 SE / TD / 128M - 1458 4018 Radeon 9200 SE - 148c 2073 CN-AG92E - 174b 7c13 Sapphire Radeon 9200 SE - 1787 5964 Excalibur 9200SE VIVO 128M - 17af 2012 Radeon 9200 SE Excalibur - 18bc 0170 Sapphire Radeon 9200 SE 128MB Game Buster -# 128MB DDR, DVI/VGA/TV out - 18bc 0173 GC-R9200L(SE)-C3H [Radeon 9200 Game Buster] - 5b60 RV370 5B60 [Radeon X300 (PCIE)] - 1043 002a Extreme AX300SE-X - 1043 032e Extreme AX300/TD - 5b62 RV370 5B62 [Radeon X600 (PCIE)] - 5b64 RV370 5B64 [FireGL V3100 (PCIE)] - 5b65 RV370 5B65 [FireGL D1100 (PCIE)] - 5c61 M9+ 5C61 [Radeon Mobility 9200 (AGP)] - 5c63 M9+ 5C63 [Radeon Mobility 9200 (AGP)] - 5d44 RV280 [Radeon 9200 SE] (Secondary) - 1458 4019 Radeon 9200 SE (Secondary) - 174b 7c12 Sapphire Radeon 9200 SE (Secondary) - 1787 5965 Excalibur 9200SE VIVO 128M (Secondary) - 17af 2013 Radeon 9200 SE Excalibur (Secondary) - 18bc 0171 Radeon 9200 SE 128MB Game Buster (Secondary) - 18bc 0172 GC-R9200L(SE)-C3H [Radeon 9200 Game Buster] - 5d4d R480 [Radeon X850XT Platinum] - 5d57 R423 5F57 [Radeon X800XT (PCIE)] - 700f PCI Bridge [IGP 320M] - 7010 PCI Bridge [IGP 340M] - 7834 Radeon 9100 PRO IGP - 7835 Radeon Mobility 9200 IGP - 7c37 RV350 AQ [Radeon 9600 SE] - cab0 AGP Bridge [IGP 320M] - cab2 RS200/RS200M AGP Bridge [IGP 340M] - cbb2 RS200/RS200M AGP Bridge [IGP 340M] -1003 ULSI Systems - 0201 US201 -1004 VLSI Technology Inc - 0005 82C592-FC1 - 0006 82C593-FC1 - 0007 82C594-AFC2 - 0008 82C596/7 [Wildcat] - 0009 82C597-AFC2 - 000c 82C541 [Lynx] - 000d 82C543 [Lynx] - 0101 82C532 - 0102 82C534 [Eagle] - 0103 82C538 - 0104 82C535 - 0105 82C147 - 0200 82C975 - 0280 82C925 - 0304 QSound ThunderBird PCI Audio - 1004 0304 QSound ThunderBird PCI Audio - 122d 1206 DSP368 Audio - 1483 5020 XWave Thunder 3D Audio - 0305 QSound ThunderBird PCI Audio Gameport - 1004 0305 QSound ThunderBird PCI Audio Gameport - 122d 1207 DSP368 Audio Gameport - 1483 5021 XWave Thunder 3D Audio Gameport - 0306 QSound ThunderBird PCI Audio Support Registers - 1004 0306 QSound ThunderBird PCI Audio Support Registers - 122d 1208 DSP368 Audio Support Registers - 1483 5022 XWave Thunder 3D Audio Support Registers - 0307 Thunderbird - 0308 Thunderbird - 0702 VAS96011 [Golden Gate II] - 0703 Tollgate -1005 Avance Logic Inc. [ALI] - 2064 ALG2032/2064 - 2128 ALG2364A - 2301 ALG2301 - 2302 ALG2302 - 2364 ALG2364 - 2464 ALG2364A - 2501 ALG2564A/25128A -1006 Reply Group -1007 NetFrame Systems Inc -1008 Epson -100a Phoenix Technologies -100b National Semiconductor Corporation - 0001 DP83810 - 0002 87415/87560 IDE - 000e 87560 Legacy I/O - 000f FireWire Controller - 0011 NS87560 National PCI System I/O - 0012 USB Controller - 0020 DP83815 (MacPhyter) Ethernet Controller - 103c 0024 Pavilion ze4400 builtin Network - 1385 f311 FA311 / FA312 (FA311 with WoL HW) - 0022 DP83820 10/100/1000 Ethernet Controller - 0028 Geode GX2 Host Bridge - 002a CS5535 South Bridge - 002b CS5535 ISA bridge - 002d CS5535 IDE - 002e CS5535 Audio - 002f CS5535 USB - 0030 Geode GX2 Graphics Processor - 0035 DP83065 [Saturn] 10/100/1000 Ethernet Controller - 0500 SCx200 Bridge - 0501 SCx200 SMI - 0502 SCx200 IDE - 0503 SCx200 Audio - 0504 SCx200 Video - 0505 SCx200 XBus - 0510 SC1100 Bridge - 0511 SC1100 SMI - 0515 SC1100 XBus - d001 87410 IDE -100c Tseng Labs Inc - 3202 ET4000/W32p rev A - 3205 ET4000/W32p rev B - 3206 ET4000/W32p rev C - 3207 ET4000/W32p rev D - 3208 ET6000 - 4702 ET6300 -100d AST Research Inc -100e Weitek - 9000 P9000 Viper - 9001 P9000 Viper - 9002 P9000 Viper - 9100 P9100 Viper Pro/SE -1010 Video Logic, Ltd. -1011 Digital Equipment Corporation - 0001 DECchip 21050 - 0002 DECchip 21040 [Tulip] - 0004 DECchip 21030 [TGA] - 0007 NVRAM [Zephyr NVRAM] - 0008 KZPSA [KZPSA] - 0009 DECchip 21140 [FasterNet] - 1025 0310 21140 Fast Ethernet - 10b8 2001 SMC9332BDT EtherPower 10/100 - 10b8 2002 SMC9332BVT EtherPower T4 10/100 - 10b8 2003 SMC9334BDT EtherPower 10/100 (1-port) - 1109 2400 ANA-6944A/TX Fast Ethernet - 1112 2300 RNS2300 Fast Ethernet - 1112 2320 RNS2320 Fast Ethernet - 1112 2340 RNS2340 Fast Ethernet - 1113 1207 EN-1207-TX Fast Ethernet - 1186 1100 DFE-500TX Fast Ethernet - 1186 1112 DFE-570TX Fast Ethernet - 1186 1140 DFE-660 Cardbus Ethernet 10/100 - 1186 1142 DFE-660 Cardbus Ethernet 10/100 - 11f6 0503 Freedomline Fast Ethernet - 1282 9100 AEF-380TXD Fast Ethernet - 1385 1100 FA310TX Fast Ethernet - 2646 0001 KNE100TX Fast Ethernet - 000a 21230 Video Codec - 000d PBXGB [TGA2] - 000f DEFPA - 0014 DECchip 21041 [Tulip Pass 3] - 1186 0100 DE-530+ - 0016 DGLPB [OPPO] - 0017 PV-PCI Graphics Controller (ZLXp-L) - 0019 DECchip 21142/43 - 1011 500a DE500A Fast Ethernet - 1011 500b DE500B Fast Ethernet - 1014 0001 10/100 EtherJet Cardbus - 1025 0315 ALN315 Fast Ethernet - 1033 800c PC-9821-CS01 100BASE-TX Interface Card - 1033 800d PC-9821NR-B06 100BASE-TX Interface Card - 108d 0016 Rapidfire 2327 10/100 Ethernet - 108d 0017 GoCard 2250 Ethernet 10/100 Cardbus - 10b8 2005 SMC8032DT Extreme Ethernet 10/100 - 10b8 8034 SMC8034 Extreme Ethernet 10/100 - 10ef 8169 Cardbus Fast Ethernet - 1109 2a00 ANA-6911A/TX Fast Ethernet - 1109 2b00 ANA-6911A/TXC Fast Ethernet - 1109 3000 ANA-6922/TX Fast Ethernet - 1113 1207 Cheetah Fast Ethernet - 1113 2220 Cardbus Fast Ethernet - 115d 0002 Cardbus Ethernet 10/100 - 1179 0203 Fast Ethernet - 1179 0204 Cardbus Fast Ethernet - 1186 1100 DFE-500TX Fast Ethernet - 1186 1101 DFE-500TX Fast Ethernet - 1186 1102 DFE-500TX Fast Ethernet - 1186 1112 DFE-570TX Quad Fast Ethernet - 1259 2800 AT-2800Tx Fast Ethernet - 1266 0004 Eagle Fast EtherMAX - 12af 0019 NetFlyer Cardbus Fast Ethernet - 1374 0001 Cardbus Ethernet Card 10/100 - 1374 0002 Cardbus Ethernet Card 10/100 - 1374 0007 Cardbus Ethernet Card 10/100 - 1374 0008 Cardbus Ethernet Card 10/100 - 1385 2100 FA510 - 1395 0001 10/100 Ethernet CardBus PC Card - 13d1 ab01 EtherFast 10/100 Cardbus (PCMPC200) - 14cb 0100 LNDL-100N 100Base-TX Ethernet PC Card - 8086 0001 EtherExpress PRO/100 Mobile CardBus 32 - 001a Farallon PN9000SX Gigabit Ethernet - 0021 DECchip 21052 - 0022 DECchip 21150 - 0023 DECchip 21150 - 0024 DECchip 21152 - 0025 DECchip 21153 - 0026 DECchip 21154 - 0034 56k Modem Cardbus - 1374 0003 56k Modem Cardbus - 0045 DECchip 21553 - 0046 DECchip 21554 - 0e11 4050 Integrated Smart Array - 0e11 4051 Integrated Smart Array - 0e11 4058 Integrated Smart Array - 103c 10c2 Hewlett-Packard NetRAID-4M - 12d9 000a IP Telephony card - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - 9005 0364 5400S (Mustang) - 9005 0365 5400S (Mustang) - 9005 1364 Dell PowerEdge RAID Controller 2 - 9005 1365 Dell PowerEdge RAID Controller 2 - e4bf 1000 CC8-1-BLUES - 1065 StrongARM DC21285 - 1069 0020 DAC960P / DAC1164P -1012 Micronics Computers Inc -1013 Cirrus Logic - 0038 GD 7548 - 0040 GD 7555 Flat Panel GUI Accelerator - 004c GD 7556 Video/Graphics LCD/CRT Ctrlr - 00a0 GD 5430/40 [Alpine] - 00a2 GD 5432 [Alpine] - 00a4 GD 5434-4 [Alpine] - 00a8 GD 5434-8 [Alpine] - 00ac GD 5436 [Alpine] - 00b0 GD 5440 - 00b8 GD 5446 - 00bc GD 5480 - 1013 00bc CL-GD5480 - 00d0 GD 5462 - 00d2 GD 5462 [Laguna I] - 00d4 GD 5464 [Laguna] - 00d5 GD 5464 BD [Laguna] - 00d6 GD 5465 [Laguna] - 13ce 8031 Barco Metheus 2 Megapixel, Dual Head - 13cf 8031 Barco Metheus 2 Megapixel, Dual Head - 00e8 GD 5436U - 1100 CL 6729 - 1110 PD 6832 PCMCIA/CardBus Ctrlr - 1112 PD 6834 PCMCIA/CardBus Ctrlr - 1113 PD 6833 PCMCIA/CardBus Ctrlr - 1200 GD 7542 [Nordic] - 1202 GD 7543 [Viking] - 1204 GD 7541 [Nordic Light] - 4000 MD 5620 [CLM Data Fax Voice] - 4400 CD 4400 - 6001 CS 4610/11 [CrystalClear SoundFusion Audio Accelerator] - 1014 1010 CS4610 SoundFusion Audio Accelerator - 6003 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator] - 1013 4280 Crystal SoundFusion PCI Audio Accelerator - 153b 1136 SiXPack 5.1+ - 1681 0050 Game Theater XP - 1681 a011 Fortissimo III 7.1 - 6004 CS 4614/22/24 [CrystalClear SoundFusion Audio Accelerator] - 6005 Crystal CS4281 PCI Audio - 1013 4281 Crystal CS4281 PCI Audio - 10cf 10a8 Crystal CS4281 PCI Audio - 10cf 10a9 Crystal CS4281 PCI Audio - 10cf 10aa Crystal CS4281 PCI Audio - 10cf 10ab Crystal CS4281 PCI Audio - 10cf 10ac Crystal CS4281 PCI Audio - 10cf 10ad Crystal CS4281 PCI Audio - 10cf 10b4 Crystal CS4281 PCI Audio - 1179 0001 Crystal CS4281 PCI Audio - 14c0 000c Crystal CS4281 PCI Audio -1014 IBM - 0002 PCI to MCA Bridge - 0005 Alta Lite - 0007 Alta MP - 000a Fire Coral - 0017 CPU to PCI Bridge - 0018 TR Auto LANstreamer - 001b GXT-150P - 001c Carrera - 001d 82G2675 - 0020 GXT1000 Graphics Adapter - 0022 IBM27-82351 - 002d Python -# [official name in AIX 5] - 002e SCSI RAID Adapter [ServeRAID] - 1014 002e ServeRAID-3x - 1014 022e ServeRAID-4H - 0031 2 Port Serial Adapter -# AS400 iSeries PCI sync serial card - 1014 0031 2721 WAN IOA - 2 Port Sync Serial Adapter - 0036 Miami - 0037 82660 CPU to PCI Bridge - 003a CPU to PCI Bridge - 003c GXT250P/GXT255P Graphics Adapter - 003e 16/4 Token ring UTP/STP controller - 1014 003e Token-Ring Adapter - 1014 00cd Token-Ring Adapter + Wake-On-LAN - 1014 00ce 16/4 Token-Ring Adapter 2 - 1014 00cf 16/4 Token-Ring Adapter Special - 1014 00e4 High-Speed 100/16/4 Token-Ring Adapter - 1014 00e5 16/4 Token-Ring Adapter 2 + Wake-On-LAN - 1014 016d iSeries 2744 Card - 0045 SSA Adapter - 0046 MPIC interrupt controller - 0047 PCI to PCI Bridge - 0048 PCI to PCI Bridge - 0049 Warhead SCSI Controller - 004e ATM Controller (14104e00) - 004f ATM Controller (14104f00) - 0050 ATM Controller (14105000) - 0053 25 MBit ATM Controller - 0054 GXT500P/GXT550P Graphics Adapter - 0057 MPEG PCI Bridge - 005c i82557B 10/100 - 005e GXT800P Graphics Adapter - 007c ATM Controller (14107c00) - 007d 3780IDSP [MWave] - 008b EADS PCI to PCI Bridge - 008e GXT3000P Graphics Adapter - 0090 GXT 3000P - 1014 008e GXT-3000P - 0091 SSA Adapter - 0095 20H2999 PCI Docking Bridge - 0096 Chukar chipset SCSI controller - 1014 0097 iSeries 2778 DASD IOA - 1014 0098 iSeries 2763 DASD IOA - 1014 0099 iSeries 2748 DASD IOA - 009f PCI 4758 Cryptographic Accelerator - 00a5 ATM Controller (1410a500) - 00a6 ATM 155MBPS MM Controller (1410a600) - 00b7 256-bit Graphics Rasterizer [Fire GL1] - 1092 00b8 FireGL1 AGP 32Mb - 00b8 GXT2000P Graphics Adapter - 00be ATM 622MBPS Controller (1410be00) - 00dc Advanced Systems Management Adapter (ASMA) - 00fc CPC710 Dual Bridge and Memory Controller (PCI-64) - 0104 Gigabit Ethernet-SX Adapter - 0105 CPC710 Dual Bridge and Memory Controller (PCI-32) - 010f Remote Supervisor Adapter (RSA) - 0142 Yotta Video Compositor Input - 1014 0143 Yotta Input Controller (ytin) - 0144 Yotta Video Compositor Output - 1014 0145 Yotta Output Controller (ytout) - 0156 405GP PLB to PCI Bridge - 015e 622Mbps ATM PCI Adapter - 0160 64bit/66MHz PCI ATM 155 MMF - 016e GXT4000P Graphics Adapter - 0170 GXT6000P Graphics Adapter - 017d GXT300P Graphics Adapter - 0180 Snipe chipset SCSI controller - 1014 0241 iSeries 2757 DASD IOA - 1014 0264 Quad Channel PCI-X U320 SCSI RAID Adapter (2780) - 0188 EADS-X PCI-X to PCI-X Bridge - 01a7 PCI-X to PCI-X Bridge - 01bd ServeRAID Controller - 1014 01be ServeRAID-4M - 1014 01bf ServeRAID-4L - 1014 0208 ServeRAID-4Mx - 1014 020e ServeRAID-4Lx - 1014 022e ServeRAID-4H - 1014 0258 ServeRAID-5i - 1014 0259 ServeRAID-5i - 01c1 64bit/66MHz PCI ATM 155 UTP - 01e6 Cryptographic Accelerator - 01ff 10/100 Mbps Ethernet - 0219 Multiport Serial Adapter - 1014 021a Dual RVX - 1014 0251 Internal Modem/RVX - 1014 0252 Quad Internal Modem - 021b GXT6500P Graphics Adapter - 021c GXT4500P Graphics Adapter - 0233 GXT135P Graphics Adapter - 0266 PCI-X Dual Channel SCSI - 0268 Gigabit Ethernet-SX Adapter (PCI-X) - 0269 10/100/1000 Base-TX Ethernet Adapter (PCI-X) - 028c Citrine chipset SCSI controller - 1014 028D Dual Channel PCI-X DDR SAS RAID Adapter (572E) - 1014 02BE Dual Channel PCI-X DDR U320 SCSI RAID Adapter (571B) - 1014 02C0 Dual Channel PCI-X DDR U320 SCSI Adapter (571A) - 0302 X-Architecture Bridge [Summit] - 0314 ZISC 036 Neural accelerator card - ffff MPIC-2 interrupt controller -1015 LSI Logic Corp of Canada -1016 ICL Personal Systems -1017 SPEA Software AG - 5343 SPEA 3D Accelerator -1018 Unisys Systems -1019 Elitegroup Computer Systems -101a AT&T GIS (NCR) - 0005 100VG ethernet -101b Vitesse Semiconductor -101c Western Digital - 0193 33C193A - 0196 33C196A - 0197 33C197A - 0296 33C296A - 3193 7193 - 3197 7197 - 3296 33C296A - 4296 34C296 - 9710 Pipeline 9710 - 9712 Pipeline 9712 - c24a 90C -101e American Megatrends Inc. - 1960 MegaRAID - 101e 0471 MegaRAID 471 Enterprise 1600 RAID Controller - 101e 0475 MegaRAID 475 Express 500/500LC RAID Controller - 101e 0477 MegaRAID 477 Elite 3100 RAID Controller - 101e 0493 MegaRAID 493 Elite 1600 RAID Controller - 101e 0494 MegaRAID 494 Elite 1650 RAID Controller - 101e 0503 MegaRAID 503 Enterprise 1650 RAID Controller - 101e 0511 MegaRAID 511 i4 IDE RAID Controller - 101e 0522 MegaRAID 522 i4133 RAID Controller - 1028 0471 PowerEdge RAID Controller 3/QC - 1028 0475 PowerEdge RAID Controller 3/SC - 1028 0493 PowerEdge RAID Controller 3/DC - 1028 0511 PowerEdge Cost Effective RAID Controller ATA100/4Ch - 9010 MegaRAID 428 Ultra RAID Controller - 9030 EIDE Controller - 9031 EIDE Controller - 9032 EIDE & SCSI Controller - 9033 SCSI Controller - 9040 Multimedia card - 9060 MegaRAID 434 Ultra GT RAID Controller - 9063 MegaRAC - 101e 0767 Dell Remote Assistant Card 2 -101f PictureTel -1020 Hitachi Computer Products -1021 OKI Electric Industry Co. Ltd. -1022 Advanced Micro Devices [AMD] - 1100 K8 [Athlon64/Opteron] HyperTransport Technology Configuration - 1101 K8 [Athlon64/Opteron] Address Map - 1102 K8 [Athlon64/Opteron] DRAM Controller - 1103 K8 [Athlon64/Opteron] Miscellaneous Control - 2000 79c970 [PCnet32 LANCE] - 1014 2000 NetFinity 10/100 Fast Ethernet - 1022 2000 PCnet - Fast 79C971 - 103c 104c Ethernet with LAN remote power Adapter - 103c 1064 Ethernet with LAN remote power Adapter - 103c 1065 Ethernet with LAN remote power Adapter - 103c 106c Ethernet with LAN remote power Adapter - 103c 106e Ethernet with LAN remote power Adapter - 103c 10ea Ethernet with LAN remote power Adapter - 1113 1220 EN1220 10/100 Fast Ethernet - 1259 2450 AT-2450 10/100 Fast Ethernet - 1259 2454 AT-2450v4 10Mb Ethernet Adapter - 1259 2700 AT-2700TX 10/100 Fast Ethernet - 1259 2701 AT-2700FX 100Mb Ethernet - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 4c53 1010 CP5/CR6 mainboard - 4c53 1020 VR6 mainboard - 4c53 1030 PC5 mainboard - 4c53 1040 CL7 mainboard - 4c53 1060 PC7 mainboard - 2001 79c978 [HomePNA] - 1092 0a78 Multimedia Home Network Adapter - 1668 0299 ActionLink Home Network Adapter - 2003 Am 1771 MBW [Alchemy] - 2020 53c974 [PCscsi] - 2040 79c974 - 3000 ELanSC520 Microcontroller - 7006 AMD-751 [Irongate] System Controller - 7007 AMD-751 [Irongate] AGP Bridge - 700a AMD-IGR4 AGP Host to PCI Bridge - 700b AMD-IGR4 PCI to PCI Bridge - 700c AMD-760 MP [IGD4-2P] System Controller - 700d AMD-760 MP [IGD4-2P] AGP Bridge - 700e AMD-760 [IGD4-1P] System Controller - 700f AMD-760 [IGD4-1P] AGP Bridge - 7400 AMD-755 [Cobra] ISA - 7401 AMD-755 [Cobra] IDE - 7403 AMD-755 [Cobra] ACPI - 7404 AMD-755 [Cobra] USB - 7408 AMD-756 [Viper] ISA - 7409 AMD-756 [Viper] IDE - 740b AMD-756 [Viper] ACPI - 740c AMD-756 [Viper] USB - 7410 AMD-766 [ViperPlus] ISA - 7411 AMD-766 [ViperPlus] IDE - 7413 AMD-766 [ViperPlus] ACPI - 7414 AMD-766 [ViperPlus] USB - 7440 AMD-768 [Opus] ISA - 1043 8044 A7M-D Mainboard - 7441 AMD-768 [Opus] IDE - 7443 AMD-768 [Opus] ACPI - 1043 8044 A7M-D Mainboard - 7445 AMD-768 [Opus] Audio - 7446 AMD-768 [Opus] MC97 Modem (Smart Link HAMR5600 compatible) - 7448 AMD-768 [Opus] PCI - 7449 AMD-768 [Opus] USB - 7450 AMD-8131 PCI-X Bridge - 7451 AMD-8131 PCI-X APIC - 7454 AMD-8151 System Controller - 7455 AMD-8151 AGP Bridge - 7460 AMD-8111 PCI - 161f 3017 HDAMB - 7461 AMD-8111 USB - 7462 AMD-8111 Ethernet - 7464 AMD-8111 USB - 161f 3017 HDAMB - 7468 AMD-8111 LPC - 161f 3017 HDAMB - 7469 AMD-8111 IDE - 161f 3017 HDAMB - 746a AMD-8111 SMBus 2.0 - 746b AMD-8111 ACPI - 161f 3017 HDAMB - 746d AMD-8111 AC97 Audio - 161f 3017 HDAMB - 746e AMD-8111 MC97 Modem - 756b AMD-8111 ACPI -1023 Trident Microsystems - 0194 82C194 - 2000 4DWave DX - 2001 4DWave NX - 122d 1400 Trident PCI288-Q3DII (NX) - 2100 CyberBlade XP4m32 - 2200 XGI Volari XP5 - 8400 CyberBlade/i7 - 1023 8400 CyberBlade i7 AGP - 8420 CyberBlade/i7d - 0e11 b15a CyberBlade i7 AGP - 8500 CyberBlade/i1 - 8520 CyberBlade i1 - 0e11 b16e CyberBlade i1 AGP - 1023 8520 CyberBlade i1 AGP - 8620 CyberBlade/i1 - 1014 0502 ThinkPad R30/T30 - 8820 CyberBlade XPAi1 - 9320 TGUI 9320 - 9350 GUI Accelerator - 9360 Flat panel GUI Accelerator - 9382 Cyber 9382 [Reference design] - 9383 Cyber 9383 [Reference design] - 9385 Cyber 9385 [Reference design] - 9386 Cyber 9386 - 9388 Cyber 9388 - 9397 Cyber 9397 - 939a Cyber 9397DVD - 9420 TGUI 9420 - 9430 TGUI 9430 - 9440 TGUI 9440 - 9460 TGUI 9460 - 9470 TGUI 9470 - 9520 Cyber 9520 - 9525 Cyber 9525 - 10cf 1094 Lifebook C6155 - 9540 Cyber 9540 - 9660 TGUI 9660/938x/968x - 9680 TGUI 9680 - 9682 TGUI 9682 - 9683 TGUI 9683 - 9685 ProVIDIA 9685 - 9750 3DImage 9750 - 1014 9750 3DImage 9750 - 1023 9750 3DImage 9750 - 9753 TGUI 9753 - 9754 TGUI 9754 - 9759 TGUI 975 - 9783 TGUI 9783 - 9785 TGUI 9785 - 9850 3DImage 9850 - 9880 Blade 3D PCI/AGP - 1023 9880 Blade 3D - 9910 CyberBlade/XP - 9930 CyberBlade/XPm -1024 Zenith Data Systems -1025 Acer Incorporated [ALI] - 1435 M1435 - 1445 M1445 - 1449 M1449 - 1451 M1451 - 1461 M1461 - 1489 M1489 - 1511 M1511 - 1512 ALI M1512 Aladdin - 1513 M1513 - 1521 ALI M1521 Aladdin III CPU Bridge - 10b9 1521 ALI M1521 Aladdin III CPU Bridge - 1523 ALI M1523 ISA Bridge - 10b9 1523 ALI M1523 ISA Bridge - 1531 M1531 Northbridge [Aladdin IV/IV+] - 1533 M1533 PCI-to-ISA Bridge - 10b9 1533 ALI M1533 Aladdin IV/V ISA South Bridge - 1535 M1535 PCI Bridge + Super I/O + FIR - 1541 M1541 Northbridge [Aladdin V] - 10b9 1541 ALI M1541 Aladdin V/V+ AGP+PCI North Bridge - 1542 M1542 Northbridge [Aladdin V] - 1543 M1543 PCI-to-ISA Bridge + Super I/O + FIR - 1561 M1561 Northbridge [Aladdin 7] - 1621 M1621 Northbridge [Aladdin-Pro II] - 1631 M1631 Northbridge+3D Graphics [Aladdin TNT2] - 1641 M1641 Northbridge [Aladdin-Pro IV] - 1647 M1647 [MaGiK1] PCI North Bridge - 1671 M1671 Northbridge [ALADDiN-P4] - 1672 Northbridge [CyberALADDiN-P4] - 3141 M3141 - 3143 M3143 - 3145 M3145 - 3147 M3147 - 3149 M3149 - 3151 M3151 - 3307 M3307 MPEG-I Video Controller - 3309 M3309 MPEG-II Video w/ Software Audio Decoder - 3321 M3321 MPEG-II Audio/Video Decoder - 5212 M4803 - 5215 ALI PCI EIDE Controller - 5217 M5217H - 5219 M5219 - 5225 M5225 - 5229 M5229 - 5235 M5235 - 5237 M5237 PCI USB Host Controller - 5240 EIDE Controller - 5241 PCMCIA Bridge - 5242 General Purpose Controller - 5243 PCI to PCI Bridge Controller - 5244 Floppy Disk Controller - 5247 M1541 PCI to PCI Bridge - 5251 M5251 P1394 Controller - 5427 PCI to AGP Bridge - 5451 M5451 PCI AC-Link Controller Audio Device - 5453 M5453 PCI AC-Link Controller Modem Device - 7101 M7101 PCI PMU Power Management Controller - 10b9 7101 M7101 PCI PMU Power Management Controller -1028 Dell - 0001 PowerEdge Expandable RAID Controller 2/Si - 1028 0001 PowerEdge 2400 - 0002 PowerEdge Expandable RAID Controller 3/Di - 1028 0002 PowerEdge 4400 - 0003 PowerEdge Expandable RAID Controller 3/Si - 1028 0003 PowerEdge 2450 - 0006 PowerEdge Expandable RAID Controller 3/Di - 0007 Remote Access Card III - 0008 Remote Access Card III - 0009 Remote Access Card III: BMC/SMIC device not present - 000a PowerEdge Expandable RAID Controller 3/Di - 000c Embedded Remote Access or ERA/O - 000d Embedded Remote Access: BMC/SMIC device - 000e PowerEdge Expandable RAID controller 4/Di - 000f PowerEdge Expandable RAID controller 4/Di - 0010 Remote Access Card 4 - 0011 Remote Access Card 4 Daughter Card - 0012 Remote Access Card 4 Daughter Card Virtual UART - 0013 PowerEdge Expandable RAID controller 4 - 1028 016c PowerEdge Expandable RAID Controller 4e/Si - 1028 016d PowerEdge Expandable RAID Controller 4e/Di - 1028 016e PowerEdge Expandable RAID Controller 4e/Di - 1028 016f PowerEdge Expandable RAID Controller 4e/Di - 1028 0170 PowerEdge Expandable RAID Controller 4e/Di - 0014 Remote Access Card 4 Daughter Card SMIC interface -1029 Siemens Nixdorf IS -102a LSI Logic - 0000 HYDRA - 0010 ASPEN - 001f AHA-2940U2/U2W /7890/7891 SCSI Controllers - 9005 000f 2940U2W SCSI Controller - 9005 0106 2940U2W SCSI Controller - 9005 a180 2940U2W SCSI Controller - 00c5 AIC-7899 U160/m SCSI Controller - 1028 00c5 PowerEdge 2550/2650/4600 - 00cf AIC-7899P U160/m - 1028 0106 PowerEdge 4600 - 1028 0121 PowerEdge 2650 -102b Matrox Graphics, Inc. -# DJ: I've a suspicion that 0010 is a duplicate of 0d10. - 0010 MGA-I [Impression?] - 0100 MGA 1064SG [Mystique] - 0518 MGA-II [Athena] - 0519 MGA 2064W [Millennium] - 051a MGA 1064SG [Mystique] - 102b 0100 MGA-1064SG Mystique - 102b 1100 MGA-1084SG Mystique - 102b 1200 MGA-1084SG Mystique - 1100 102b MGA-1084SG Mystique - 110a 0018 Scenic Pro C5 (D1025) - 051b MGA 2164W [Millennium II] - 102b 051b MGA-2164W Millennium II - 102b 1100 MGA-2164W Millennium II - 102b 1200 MGA-2164W Millennium II - 051e MGA 1064SG [Mystique] AGP - 051f MGA 2164W [Millennium II] AGP - 0520 MGA G200 - 102b dbc2 G200 Multi-Monitor - 102b dbc8 G200 Multi-Monitor - 102b dbe2 G200 Multi-Monitor - 102b dbe8 G200 Multi-Monitor - 102b ff03 Millennium G200 SD - 102b ff04 Marvel G200 - 0521 MGA G200 AGP - 1014 ff03 Millennium G200 AGP - 102b 48e9 Mystique G200 AGP - 102b 48f8 Millennium G200 SD AGP - 102b 4a60 Millennium G200 LE AGP - 102b 4a64 Millennium G200 AGP - 102b c93c Millennium G200 AGP - 102b c9b0 Millennium G200 AGP - 102b c9bc Millennium G200 AGP - 102b ca60 Millennium G250 LE AGP - 102b ca6c Millennium G250 AGP - 102b dbbc Millennium G200 AGP - 102b dbc2 Millennium G200 MMS (Dual G200) - 102b dbc3 G200 Multi-Monitor - 102b dbc8 Millennium G200 MMS (Dual G200) - 102b dbd2 G200 Multi-Monitor - 102b dbd3 G200 Multi-Monitor - 102b dbd4 G200 Multi-Monitor - 102b dbd5 G200 Multi-Monitor - 102b dbd8 G200 Multi-Monitor - 102b dbd9 G200 Multi-Monitor - 102b dbe2 Millennium G200 MMS (Quad G200) - 102b dbe3 G200 Multi-Monitor - 102b dbe8 Millennium G200 MMS (Quad G200) - 102b dbf2 G200 Multi-Monitor - 102b dbf3 G200 Multi-Monitor - 102b dbf4 G200 Multi-Monitor - 102b dbf5 G200 Multi-Monitor - 102b dbf8 G200 Multi-Monitor - 102b dbf9 G200 Multi-Monitor - 102b f806 Mystique G200 Video AGP - 102b ff00 MGA-G200 AGP - 102b ff02 Mystique G200 AGP - 102b ff03 Millennium G200 AGP - 102b ff04 Marvel G200 AGP - 110a 0032 MGA-G200 AGP - 0525 MGA G400 AGP - 0e11 b16f MGA-G400 AGP - 102b 0328 Millennium G400 16Mb SDRAM - 102b 0338 Millennium G400 16Mb SDRAM - 102b 0378 Millennium G400 32Mb SDRAM - 102b 0541 Millennium G450 Dual Head - 102b 0542 Millennium G450 Dual Head LX - 102b 0543 Millennium G450 Single Head LX - 102b 0641 Millennium G450 32Mb SDRAM Dual Head - 102b 0642 Millennium G450 32Mb SDRAM Dual Head LX - 102b 0643 Millennium G450 32Mb SDRAM Single Head LX - 102b 07c0 Millennium G450 Dual Head LE - 102b 07c1 Millennium G450 SDR Dual Head LE - 102b 0d41 Millennium G450 Dual Head PCI - 102b 0d42 Millennium G450 Dual Head LX PCI - 102b 0d43 Millennium G450 32Mb Dual Head PCI - 102b 0e00 Marvel G450 eTV - 102b 0e01 Marvel G450 eTV - 102b 0e02 Marvel G450 eTV - 102b 0e03 Marvel G450 eTV - 102b 0f80 Millennium G450 Low Profile - 102b 0f81 Millennium G450 Low Profile - 102b 0f82 Millennium G450 Low Profile DVI - 102b 0f83 Millennium G450 Low Profile DVI - 102b 19d8 Millennium G400 16Mb SGRAM - 102b 19f8 Millennium G400 32Mb SGRAM - 102b 2159 Millennium G400 Dual Head 16Mb - 102b 2179 Millennium G400 MAX/Dual Head 32Mb - 102b 217d Millennium G400 Dual Head Max - 102b 23c0 Millennium G450 - 102b 23c1 Millennium G450 - 102b 23c2 Millennium G450 DVI - 102b 23c3 Millennium G450 DVI - 102b 2f58 Millennium G400 - 102b 2f78 Millennium G400 - 102b 3693 Marvel G400 AGP - 102b 5dd0 4Sight II - 102b 5f50 4Sight II - 102b 5f51 4Sight II - 102b 5f52 4Sight II - 102b 9010 Millennium G400 Dual Head - 1458 0400 GA-G400 - 1705 0001 Millennium G450 32MB SGRAM - 1705 0002 Millennium G450 16MB SGRAM - 1705 0003 Millennium G450 32MB - 1705 0004 Millennium G450 16MB - 0527 MGA Parhelia AGP - 102b 0840 Parhelia 128Mb - 0d10 MGA Ultima/Impression - 1000 MGA G100 [Productiva] - 102b ff01 Productiva G100 - 102b ff05 Productiva G100 Multi-Monitor - 1001 MGA G100 [Productiva] AGP - 102b 1001 MGA-G100 AGP - 102b ff00 MGA-G100 AGP - 102b ff01 MGA-G100 Productiva AGP - 102b ff03 Millennium G100 AGP - 102b ff04 MGA-G100 AGP - 102b ff05 MGA-G100 Productiva AGP Multi-Monitor - 110a 001e MGA-G100 AGP - 2007 MGA Mistral - 2527 MGA G550 AGP - 102b 0f83 Millennium G550 - 102b 0f84 Millennium G550 Dual Head DDR 32Mb - 102b 1e41 Millennium G550 - 2537 MGA G650 AGP - 4536 VIA Framegrabber - 6573 Shark 10/100 Multiport SwitchNIC -102c Chips and Technologies - 00b8 F64310 - 00c0 F69000 HiQVideo - 102c 00c0 F69000 HiQVideo - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 4c53 1010 CP5/CR6 mainboard - 4c53 1020 VR6 mainboard - 4c53 1030 PC5 mainboard - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - 00d0 F65545 - 00d8 F65545 - 00dc F65548 - 00e0 F65550 - 00e4 F65554 - 00e5 F65555 HiQVPro - 0e11 b049 Armada 1700 Laptop Display Controller - 00f0 F68554 - 00f4 F68554 HiQVision - 00f5 F68555 - 0c30 F69030 - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard -# C5C project cancelled - 4c53 1080 CT8 mainboard -102d Wyse Technology Inc. - 50dc 3328 Audio -102e Olivetti Advanced Technology -102f Toshiba America - 0009 r4x00 - 000a TX3927 MIPS RISC PCI Controller - 0020 ATM Meteor 155 - 102f 00f8 ATM Meteor 155 - 0030 TC35815CF PCI 10/100 Mbit Ethernet Controller - 0031 TC35815CF PCI 10/100 Mbit Ethernet Controller with WOL - 0105 TC86C001 [goku-s] IDE - 0106 TC86C001 [goku-s] USB 1.1 Host - 0107 TC86C001 [goku-s] USB Device Controller - 0108 TC86C001 [goku-s] I2C/SIO/GPIO Controller - 0180 TX4927/38 MIPS RISC PCI Controller - 0181 TX4925 MIPS RISC PCI Controller - 0182 TX4937 MIPS RISC PCI Controller -1030 TMC Research -1031 Miro Computer Products AG - 5601 DC20 ASIC - 5607 Video I/O & motion JPEG compressor - 5631 Media 3D - 6057 MiroVideo DC10/DC30+ -1032 Compaq -1033 NEC Corporation - 0000 Vr4181A USB Host or Function Control Unit - 0001 PCI to 486-like bus Bridge - 0002 PCI to VL98 Bridge - 0003 ATM Controller - 0004 R4000 PCI Bridge - 0005 PCI to 486-like bus Bridge - 0006 PC-9800 Graphic Accelerator - 0007 PCI to UX-Bus Bridge - 0008 PC-9800 Graphic Accelerator - 0009 PCI to PC9800 Core-Graph Bridge - 0016 PCI to VL Bridge - 001a [Nile II] - 0021 Vrc4373 [Nile I] - 0029 PowerVR PCX1 - 002a PowerVR 3D - 002c Star Alpha 2 - 002d PCI to C-bus Bridge - 0035 USB - 1179 0001 USB - 12ee 7000 Root Hub - 1799 0001 Root Hub - 807d 0035 PCI-USB2 (OHCI subsystem) - 003b PCI to C-bus Bridge - 003e NAPCCARD Cardbus Controller - 0046 PowerVR PCX2 [midas] - 005a Vrc5074 [Nile 4] - 0063 Firewarden - 0067 PowerVR Neon 250 Chipset - 1010 0020 PowerVR Neon 250 AGP 32Mb - 1010 0080 PowerVR Neon 250 AGP 16Mb - 1010 0088 PowerVR Neon 250 16Mb - 1010 0090 PowerVR Neon 250 AGP 16Mb - 1010 0098 PowerVR Neon 250 16Mb - 1010 00a0 PowerVR Neon 250 AGP 32Mb - 1010 00a8 PowerVR Neon 250 32Mb - 1010 0120 PowerVR Neon 250 AGP 32Mb - 0072 uPD72874 IEEE1394 OHCI 1.1 3-port PHY-Link Ctrlr - 0074 56k Voice Modem - 1033 8014 RCV56ACF 56k Voice Modem - 009b Vrc5476 - 00a5 VRC4173 - 00a6 VRC5477 AC97 - 00cd IEEE 1394 [OrangeLink] Host Controller - 12ee 8011 Root hub - 00ce IEEE 1394 Host Controller - 00df Vr4131 - 00e0 USB 2.0 - 0ee4 3383 Sitecom IEEE 1394 / USB2.0 Combo Card - 12ee 7001 Root hub - 1799 0002 Root Hub - 807d 1043 PCI-USB2 (EHCI subsystem) - 00e7 IEEE 1394 Host Controller - 00f2 uPD72874 IEEE1394 OHCI 1.1 3-port PHY-Link Ctrlr - 00f3 uPD6113x Multimedia Decoder/Processor [EMMA2] - 010c VR7701 -1034 Framatome Connectors USA Inc. -1035 Comp. & Comm. Research Lab -1036 Future Domain Corp. - 0000 TMC-18C30 [36C70] -1037 Hitachi Micro Systems -1038 AMP, Inc -1039 Silicon Integrated Systems [SiS] - 0001 Virtual PCI-to-PCI bridge (AGP) - 0002 SG86C202 - 0006 85C501/2/3 - 0008 SiS85C503/5513 (LPC Bridge) - 0009 ACPI -# source: http://members.datafast.net.au/dft0802/downloads/pcidevs.txt - 0016 SiS961/2 SMBus Controller - 0018 SiS85C503/5513 (LPC Bridge) -# Controller for 2 PATA and 2 SATA channels - 0180 RAID bus controller 180 SATA/PATA [SiS] - 0181 SiS SATA - 0200 5597/5598/6326 VGA - 1039 0000 SiS5597 SVGA (Shared RAM) - 0204 82C204 - 0205 SG86C205 - 0300 300/305 PCI/AGP VGA Display Adapter - 107d 2720 Leadtek WinFast VR300 - 0310 315H PCI/AGP VGA Display Adapter - 0315 315 PCI/AGP VGA Display Adapter - 0325 315PRO PCI/AGP VGA Display Adapter - 0330 330 [Xabre] PCI/AGP VGA Display Adapter - 0406 85C501/2 - 0496 85C496 - 0530 530 Host - 0540 540 Host - 0550 550 Host - 0597 5513C - 0601 85C601 - 0620 620 Host - 0630 630 Host - 0633 633 Host - 0635 635 Host - 0645 SiS645 Host & Memory & AGP Controller - 0646 SiS645DX Host & Memory & AGP Controller - 0648 SiS 645xx - 0650 650/M650 Host - 0651 651 Host - 0655 655 Host - 0660 660 Host - 0661 661FX/M661FX/M661MX Host - 0730 730 Host - 0733 733 Host - 0735 735 Host - 0740 740 Host - 0741 741/741GX/M741 Host - 0745 745 Host - 0746 746 Host - 0755 755 Host - 0760 760/M760 Host - 0900 SiS900 PCI Fast Ethernet - 1019 0a14 K7S5A motherboard - 1039 0900 SiS900 10/100 Ethernet Adapter - 1043 8035 CUSI-FX motherboard - 0961 SiS961 [MuTIOL Media IO] - 0962 SiS962 [MuTIOL Media IO] - 0963 SiS963 [MuTIOL Media IO] - 0964 SiS964 [MuTIOL Media IO] - 0965 SiS965 [MuTIOL Media IO] - 3602 83C602 - 5107 5107 - 5300 SiS540 PCI Display Adapter - 5315 550 PCI/AGP VGA Display Adapter - 5401 486 PCI Chipset - 5511 5511/5512 - 5513 5513 [IDE] - 1019 0970 P6STP-FL motherboard - 1039 5513 SiS5513 EIDE Controller (A,B step) - 1043 8035 CUSI-FX motherboard - 5517 5517 - 5571 5571 - 5581 5581 Pentium Chipset - 5582 5582 - 5591 5591/5592 Host - 5596 5596 Pentium Chipset - 5597 5597 [SiS5582] - 5600 5600 Host - 6204 Video decoder & MPEG interface - 6205 VGA Controller - 6236 6236 3D-AGP - 6300 630/730 PCI/AGP VGA Display Adapter - 1019 0970 P6STP-FL motherboard - 1043 8035 CUSI-FX motherboard - 6306 530/620 PCI/AGP VGA Display Adapter - 1039 6306 SiS530,620 GUI Accelerator+3D - 6325 65x/M650/740 PCI/AGP VGA Display Adapter - 6326 86C326 5598/6326 - 1039 6326 SiS6326 GUI Accelerator - 1092 0a50 SpeedStar A50 - 1092 0a70 SpeedStar A70 - 1092 4910 SpeedStar A70 - 1092 4920 SpeedStar A70 - 1569 6326 SiS6326 GUI Accelerator - 6330 661/741/760 PCI/AGP VGA Display Adapter - 1039 6330 [M]661xX/[M]741[GX]/[M]760 PCI/AGP VGA Adapter - 7001 USB 1.0 Controller - 1019 0a14 K7S5A motherboard - 1039 7000 Onboard USB Controller - 7002 USB 2.0 Controller - 1509 7002 Onboard USB Controller - 7007 FireWire Controller - 7012 Sound Controller -# There are may be different modem codecs here (Intel537 compatible and incompatible) - 7013 AC'97 Modem Controller - 7016 SiS7016 PCI Fast Ethernet Adapter - 1039 7016 SiS7016 10/100 Ethernet Adapter - 7018 SiS PCI Audio Accelerator - 1014 01b6 SiS PCI Audio Accelerator - 1014 01b7 SiS PCI Audio Accelerator - 1019 7018 SiS PCI Audio Accelerator - 1025 000e SiS PCI Audio Accelerator - 1025 0018 SiS PCI Audio Accelerator - 1039 7018 SiS PCI Audio Accelerator - 1043 800b SiS PCI Audio Accelerator - 1054 7018 SiS PCI Audio Accelerator - 107d 5330 SiS PCI Audio Accelerator - 107d 5350 SiS PCI Audio Accelerator - 1170 3209 SiS PCI Audio Accelerator - 1462 400a SiS PCI Audio Accelerator - 14a4 2089 SiS PCI Audio Accelerator - 14cd 2194 SiS PCI Audio Accelerator - 14ff 1100 SiS PCI Audio Accelerator - 152d 8808 SiS PCI Audio Accelerator - 1558 1103 SiS PCI Audio Accelerator - 1558 2200 SiS PCI Audio Accelerator - 1563 7018 SiS PCI Audio Accelerator - 15c5 0111 SiS PCI Audio Accelerator - 270f a171 SiS PCI Audio Accelerator - a0a0 0022 SiS PCI Audio Accelerator - 7019 SiS7019 Audio Accelerator -103a Seiko Epson Corporation -103b Tatung Co. of America -103c Hewlett-Packard Company - 1005 A4977A Visualize EG - 1006 Visualize FX6 - 1008 Visualize FX4 - 100a Visualize FX2 - 1028 Tach TL Fibre Channel Host Adapter - 1029 Tach XL2 Fibre Channel Host Adapter - 107e 000f Interphase 5560 Fibre Channel Adapter - 9004 9210 1Gb/2Gb Family Fibre Channel Controller - 9004 9211 1Gb/2Gb Family Fibre Channel Controller - 102a Tach TS Fibre Channel Host Adapter - 107e 000e Interphase 5540/5541 Fibre Channel Adapter - 9004 9110 1Gb/2Gb Family Fibre Channel Controller - 9004 9111 1Gb/2Gb Family Fibre Channel Controller - 1030 J2585A DeskDirect 10/100VG NIC - 1031 J2585B HP 10/100VG PCI LAN Adapter - 103c 1040 J2973A DeskDirect 10BaseT NIC - 103c 1041 J2585B DeskDirect 10/100VG NIC - 103c 1042 J2970A DeskDirect 10BaseT/2 NIC - 1040 J2973A DeskDirect 10BaseT NIC - 1041 J2585B DeskDirect 10/100 NIC - 1042 J2970A DeskDirect 10BaseT/2 NIC - 1048 Diva Serial [GSP] Multiport UART - 103c 1049 Tosca Console - 103c 104a Tosca Secondary - 103c 104b Maestro SP2 - 103c 1223 Superdome Console - 103c 1226 Keystone SP2 - 103c 1227 Powerbar SP2 - 103c 1282 Everest SP2 - 103c 1301 Diva RMP3 - 1054 PCI Local Bus Adapter - 1064 79C970 PCnet Ethernet Controller - 108b Visualize FXe - 10c1 NetServer Smart IRQ Router - 10ed TopTools Remote Control - 10f0 rio System Bus Adapter - 10f1 rio I/O Controller - 1200 82557B 10/100 NIC - 1219 NetServer PCI Hot-Plug Controller - 121a NetServer SMIC Controller - 121b NetServer Legacy COM Port Decoder - 121c NetServer PCI COM Port Decoder - 1229 zx1 System Bus Adapter - 122a zx1 I/O Controller - 122e zx1 Local Bus Adapter - 127c sx1000 I/O Controller - 1290 Auxiliary Diva Serial Port - 12b4 zx1 QuickSilver AGP8x Local Bus Adapter - 2910 E2910A PCIBus Exerciser - 2925 E2925A 32 Bit, 33 MHzPCI Exerciser & Analyzer -103e Solliday Engineering -103f Synopsys/Logic Modeling Group -1040 Accelgraphics Inc. -1041 Computrend -1042 Micron - 1000 PC Tech RZ1000 - 1001 PC Tech RZ1001 - 3000 Samurai_0 - 3010 Samurai_1 - 3020 Samurai_IDE -1043 ASUSTeK Computer Inc. - 0675 ISDNLink P-IN100-ST-D - 4015 v7100 SDRAM [GeForce2 MX] - 4021 v7100 Combo Deluxe [GeForce2 MX + TV tuner] - 4057 v8200 GeForce 3 - 8043 v8240 PAL 128M [P4T] Motherboard - 807b v9280/TD [Geforce4 TI4200 8X With TV-Out and DVI] - 80bb v9180 Magic/T [GeForce4 MX440 AGP 8x 64MB TV-out] - 80c5 nForce3 chipset motherboard [SK8N] - 80df v9520 Magic/T -1044 Adaptec (formerly DPT) - 1012 Domino RAID Engine - a400 SmartCache/Raid I-IV Controller - a500 PCI Bridge - a501 SmartRAID V Controller - 1044 c001 PM1554U2 Ultra2 Single Channel - 1044 c002 PM1654U2 Ultra2 Single Channel - 1044 c003 PM1564U3 Ultra3 Single Channel - 1044 c004 PM1564U3 Ultra3 Dual Channel - 1044 c005 PM1554U2 Ultra2 Single Channel (NON ACPI) - 1044 c00a PM2554U2 Ultra2 Single Channel - 1044 c00b PM2654U2 Ultra2 Single Channel - 1044 c00c PM2664U3 Ultra3 Single Channel - 1044 c00d PM2664U3 Ultra3 Dual Channel - 1044 c00e PM2554U2 Ultra2 Single Channel (NON ACPI) - 1044 c00f PM2654U2 Ultra2 Single Channel (NON ACPI) - 1044 c014 PM3754U2 Ultra2 Single Channel (NON ACPI) - 1044 c015 PM3755U2B Ultra2 Single Channel (NON ACPI) - 1044 c016 PM3755F Fibre Channel (NON ACPI) - 1044 c01e PM3757U2 Ultra2 Single Channel - 1044 c01f PM3757U2 Ultra2 Dual Channel - 1044 c020 PM3767U3 Ultra3 Dual Channel - 1044 c021 PM3767U3 Ultra3 Quad Channel - 1044 c028 PM2865U3 Ultra3 Single Channel - 1044 c029 PM2865U3 Ultra3 Dual Channel - 1044 c02a PM2865F Fibre Channel - 1044 c03c 2000S Ultra3 Single Channel - 1044 c03d 2000S Ultra3 Dual Channel - 1044 c03e 2000F Fibre Channel - 1044 c046 3000S Ultra3 Single Channel - 1044 c047 3000S Ultra3 Dual Channel - 1044 c048 3000F Fibre Channel - 1044 c050 5000S Ultra3 Single Channel - 1044 c051 5000S Ultra3 Dual Channel - 1044 c052 5000F Fibre Channel - 1044 c05a 2400A UDMA Four Channel - 1044 c05b 2400A UDMA Four Channel DAC - 1044 c064 3010S Ultra3 Dual Channel - 1044 c065 3410S Ultra160 Four Channel - 1044 c066 3010S Fibre Channel - a511 SmartRAID V Controller - 1044 c032 ASR-2005S I2O Zero Channel -1045 OPTi Inc. - a0f8 82C750 [Vendetta] USB Controller - c101 92C264 - c178 92C178 - c556 82X556 [Viper] - c557 82C557 [Viper-M] - c558 82C558 [Viper-M ISA+IDE] - c567 82C750 [Vendetta], device 0 - c568 82C750 [Vendetta], device 1 - c569 82C579 [Viper XPress+ Chipset] - c621 82C621 [Viper-M/N+] - c700 82C700 [FireStar] - c701 82C701 [FireStar Plus] - c814 82C814 [Firebridge 1] - c822 82C822 - c824 82C824 - c825 82C825 [Firebridge 2] - c832 82C832 - c861 82C861 - c895 82C895 - c935 EV1935 ECTIVA MachOne PCIAudio - d568 82C825 [Firebridge 2] - d721 IDE [FireStar] -1046 IPC Corporation, Ltd. -1047 Genoa Systems Corp -1048 Elsa AG - 0c60 Gladiac MX - 0d22 Quadro4 900XGL [ELSA GLoria4 900XGL] - 1000 QuickStep 1000 - 3000 QuickStep 3000 - 8901 Gloria XL -1049 Fountain Technologies, Inc. -# # nee SGS Thomson Microelectronics -104a STMicroelectronics - 0008 STG 2000X - 0009 STG 1764X - 0010 STG4000 [3D Prophet Kyro Series] - 0209 STPC Consumer/Industrial North- and Southbridge - 020a STPC Atlas/ConsumerS/Consumer IIA Northbridge -# From - 0210 STPC Atlas ISA Bridge - 021a STPC Consumer S Southbridge - 021b STPC Consumer IIA Southbridge - 0500 ST70137 [Unicorn] ADSL DMT Transceiver - 0564 STPC Client Northbridge - 0981 21x4x DEC-Tulip compatible 10/100 Ethernet - 1746 STG 1764X - 2774 21x4x DEC-Tulip compatible 10/100 Ethernet - 3520 MPEG-II decoder card - 55cc STPC Client Southbridge -104b BusLogic - 0140 BT-946C (old) [multimaster 01] - 1040 BT-946C (BA80C30) [MultiMaster 10] - 8130 Flashpoint LT -104c Texas Instruments - 0500 100 MBit LAN Controller - 0508 TMS380C2X Compressor Interface - 1000 Eagle i/f AS - 104c PCI1510 PC card Cardbus Controller - 3d04 TVP4010 [Permedia] - 3d07 TVP4020 [Permedia 2] - 1011 4d10 Comet - 1040 000f AccelStar II - 1040 0011 AccelStar II - 1048 0a31 WINNER 2000 - 1048 0a32 GLoria Synergy - 1048 0a35 GLoria Synergy - 107d 2633 WinFast 3D L2300 - 1092 0127 FIRE GL 1000 PRO - 1092 0136 FIRE GL 1000 PRO - 1092 0141 FIRE GL 1000 PRO - 1092 0146 FIRE GL 1000 PRO - 1092 0148 FIRE GL 1000 PRO - 1092 0149 FIRE GL 1000 PRO - 1092 0152 FIRE GL 1000 PRO - 1092 0154 FIRE GL 1000 PRO - 1092 0155 FIRE GL 1000 PRO - 1092 0156 FIRE GL 1000 PRO - 1092 0157 FIRE GL 1000 PRO - 1097 3d01 Jeronimo Pro - 1102 100f Graphics Blaster Extreme - 3d3d 0100 Reference Permedia 2 3D - 8000 PCILynx/PCILynx2 IEEE 1394 Link Layer Controller - e4bf 1010 CF1-1-SNARE - e4bf 1020 CF1-2-SNARE - 8009 FireWire Controller - 104d 8032 8032 OHCI i.LINK (IEEE 1394) Controller - 8017 PCI4410 FireWire Controller - 8019 TSB12LV23 IEEE-1394 Controller - 11bd 000a Studio DV500-1394 - 11bd 000e Studio DV - e4bf 1010 CF2-1-CYMBAL - 8020 TSB12LV26 IEEE-1394 Controller (Link) - 11bd 000f Studio DV500-1394 - 8021 TSB43AA22 IEEE-1394 Controller (PHY/Link Integrated) - 104d 80df Vaio PCG-FX403 - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 8022 TSB43AB22 IEEE-1394a-2000 Controller (PHY/Link) - 8023 TSB43AB22/A IEEE-1394a-2000 Controller (PHY/Link) - 103c 088c nc8000 laptop - 8024 TSB43AB23 IEEE-1394a-2000 Controller (PHY/Link) - 8025 TSB82AA2 IEEE-1394b Link Layer Controller - 55aa 55aa FireWire 800 PCI Card - 8026 TSB43AB21 IEEE-1394a-2000 Controller (PHY/Link) - 8027 PCI4451 IEEE-1394 Controller - 1028 00e6 PCI4451 IEEE-1394 Controller (Dell Inspiron 8100) - 8029 PCI4510 IEEE-1394 Controller - 1028 0163 Latitude D505 - 1071 8160 MIM2900 - 802b PCI7410,7510,7610 OHCI-Lynx Controller - 1028 014e PCI7410,7510,7610 OHCI-Lynx Controller (Dell Latitude D800) - 802e PCI7x20 1394a-2000 OHCI Two-Port PHY/Link-Layer Controller - 8031 Texas Instruments PCIxx21/x515 Cardbus Controller - 8032 Texas Instruments OHCI Compliant IEEE 1394 Host Controller - 8033 Texas Instruments PCIxx21 Integrated FlashMedia Controller - 8034 Texas Instruments PCI6411, PCI6421, PCI6611, PCI6621, PCI7411, PCI7421, PCI7611, PCI7621 Secure Digital (SD) Controller - 8035 Texas Instruments PCI6411, PCI6421, PCI6611, PCI6621, PCI7411, PCI7421, PCI7611, PCI7621 Smart Card Controller (SMC) - 8201 PCI1620 Firmware Loading Function - 8204 PCI7410,7510,7610 PCI Firmware Loading Function - 1028 014e Latitude D800 - 8400 ACX 100 22Mbps Wireless Interface - 00fc 16ec U.S. Robotics 22 Mbps Wireless PC Card (model 2210) - 00fd 16ec U.S. Robotics 22Mbps Wireless PCI Adapter (model 2216) - 1186 3b00 DWL-650+ PC Card cardbus 22Mbs Wireless Adapter [AirPlus] - 1186 3b01 DWL-520+ 22Mbps PCI Wireless Adapter - 8401 ACX 100 22Mbps Wireless Interface -# OK, this info is almost useless as is, but at least it's known that it's a wireless card. More info requested from reporter (whi - 9000 Wireless Interface (of unknown type) - 9066 ACX 111 54Mbps Wireless Interface - a001 TDC1570 - a100 TDC1561 - a102 TNETA1575 HyperSAR Plus w/PCI Host i/f & UTOPIA i/f - a106 TMS320C6205 Fixed Point DSP - 175c 5000 ASI50xx Audio Adapter - 175c 8700 ASI87xx Radio Tuner card - ac10 PCI1050 - ac11 PCI1053 - ac12 PCI1130 - ac13 PCI1031 - ac15 PCI1131 - ac16 PCI1250 - 1014 0092 ThinkPad 600 - ac17 PCI1220 - ac18 PCI1260 - ac19 PCI1221 - ac1a PCI1210 - ac1b PCI1450 - 0e11 b113 Armada M700 - ac1c PCI1225 - 0e11 b121 Armada E500 - 1028 0088 Dell Computer Corporation Latitude CPi A400XT - ac1d PCI1251A - ac1e PCI1211 - ac1f PCI1251B - ac20 TI 2030 - ac21 PCI2031 - ac22 PCI2032 PCI Docking Bridge - ac23 PCI2250 PCI-to-PCI Bridge - ac28 PCI2050 PCI-to-PCI Bridge - ac30 PCI1260 PC card Cardbus Controller - ac40 PCI4450 PC card Cardbus Controller - ac41 PCI4410 PC card Cardbus Controller - ac42 PCI4451 PC card Cardbus Controller - 1028 00e6 PCI4451 PC card CardBus Controller (Dell Inspiron 8100) - ac44 PCI4510 PC card Cardbus Controller - 1028 0163 Latitude D505 - 1071 8160 MIM2000 - ac46 PCI4520 PC card Cardbus Controller - ac47 PCI7510 PC card Cardbus Controller - 1028 014e Latitude D800 - ac4a PCI7510,7610 PC card Cardbus Controller - 1028 014e Latitude D800 - ac50 PCI1410 PC card Cardbus Controller - ac51 PCI1420 - 1014 023b ThinkPad T23 (2647-4MG) - 1028 00b1 Latitude C600 - 1028 012a Latitude C640 - 1033 80cd Versa Note VXi - 10cf 1095 Lifebook C6155 - e4bf 1000 CP2-2-HIPHOP - ac52 PCI1451 PC card Cardbus Controller - ac53 PCI1421 PC card Cardbus Controller - ac54 PCI1620 PC Card Controller - ac55 PCI1520 PC card Cardbus Controller - 1014 0512 ThinkPad T30/T40 - ac56 PCI1510 PC card Cardbus Controller - 1014 0528 ThinkPad R40e (2684-HVG) Cardbus Controller - ac60 PCI2040 PCI to DSP Bridge Controller - 175c 5100 ASI51xx Audio Adapter - 175c 6100 ASI61xx Audio Adapter - 175c 6200 ASI62xx Audio Adapter - ac8d PCI 7620 - ac8e PCI7420 CardBus Controller - ac8f PCI7420/PCI7620 Dual Socket CardBus and Smart Card Cont. w/ 1394a-2000 OHCI Two-Port PHY/Link-Layer Cont. and SD/MS-Pro Sockets - fe00 FireWire Host Controller - fe03 12C01A FireWire Host Controller -104d Sony Corporation - 8004 DTL-H2500 [Playstation development board] - 8009 CXD1947Q i.LINK Controller - 8039 CXD3222 i.LINK Controller - 8056 Rockwell HCF 56K modem - 808a Memory Stick Controller -104e Oak Technology, Inc - 0017 OTI-64017 - 0107 OTI-107 [Spitfire] - 0109 Video Adapter - 0111 OTI-64111 [Spitfire] - 0217 OTI-64217 - 0317 OTI-64317 -104f Co-time Computer Ltd -1050 Winbond Electronics Corp - 0000 NE2000 - 0001 W83769F - 0105 W82C105 - 0840 W89C840 - 1050 0001 W89C840 Ethernet Adapter - 1050 0840 W89C840 Ethernet Adapter - 0940 W89C940 - 5a5a W89C940F - 6692 W6692 - 9921 W99200F MPEG-1 Video Encoder - 9922 W99200F/W9922PF MPEG-1/2 Video Encoder - 9970 W9970CF -1051 Anigma, Inc. -1052 ?Young Micro Systems -1053 Young Micro Systems -1054 Hitachi, Ltd -1055 Efar Microsystems - 9130 SLC90E66 [Victory66] IDE - 9460 SLC90E66 [Victory66] ISA - 9462 SLC90E66 [Victory66] USB - 9463 SLC90E66 [Victory66] ACPI -1056 ICL -# Motorola made a mistake and used 1507 instead of 1057 in some chips. Please look at the 1507 entry as well when updating this. -1057 Motorola - 0001 MPC105 [Eagle] - 0002 MPC106 [Grackle] - 0003 MPC8240 [Kahlua] - 0004 MPC107 - 0006 MPC8245 [Unity] - 0008 MPC8540 - 0009 MPC8560 - 0100 MC145575 [HFC-PCI] - 0431 KTI829c 100VG - 1801 DSP56301 Digital Signal Processor - 14fb 0101 Transas Radar Imitator Board [RIM] - 14fb 0102 Transas Radar Imitator Board [RIM-2] - 14fb 0202 Transas Radar Integrator Board [RIB-2] - 14fb 0611 1 channel CAN bus Controller [CanPci-1] - 14fb 0612 2 channels CAN bus Controller [CanPci-2] - 14fb 0613 3 channels CAN bus Controller [CanPci-3] - 14fb 0614 4 channels CAN bus Controller [CanPci-4] - 14fb 0621 1 channel CAN bus Controller [CanPci2-1] - 14fb 0622 2 channels CAN bus Controller [CanPci2-2] - 14fb 0810 Transas VTS Radar Integrator Board [RIB-4] - 175c 4200 ASI4215 Audio Adapter - 175c 4300 ASI43xx Audio Adapter - 175c 4400 ASI4401 Audio Adapter - ecc0 0010 Darla - ecc0 0020 Gina - ecc0 0030 Layla rev.0 - ecc0 0031 Layla rev.1 - ecc0 0040 Darla24 rev.0 - ecc0 0041 Darla24 rev.1 - ecc0 0050 Gina24 rev.0 - ecc0 0051 Gina24 rev.1 - ecc0 0070 Mona rev.0 - ecc0 0071 Mona rev.1 - ecc0 0072 Mona rev.2 - 18c0 MPC8265A/MPC8266 - 18c1 MPC8271/MPC8272 - 3410 DSP56361 Digital Signal Processor - ecc0 0050 Gina24 rev.0 - ecc0 0051 Gina24 rev.1 - ecc0 0060 Layla24 - ecc0 0070 Mona rev.0 - ecc0 0071 Mona rev.1 - ecc0 0072 Mona rev.2 - ecc0 0080 Mia rev.0 - ecc0 0081 Mia rev.1 - ecc0 0090 Indigo - ecc0 00a0 Indigo IO - ecc0 00b0 Indigo DJ - ecc0 0100 3G - 4801 Raven - 4802 Falcon - 4803 Hawk - 4806 CPX8216 - 4d68 20268 - 5600 SM56 PCI Modem - 1057 0300 SM56 PCI Speakerphone Modem - 1057 0301 SM56 PCI Voice Modem - 1057 0302 SM56 PCI Fax Modem - 1057 5600 SM56 PCI Voice modem - 13d2 0300 SM56 PCI Speakerphone Modem - 13d2 0301 SM56 PCI Voice modem - 13d2 0302 SM56 PCI Fax Modem - 1436 0300 SM56 PCI Speakerphone Modem - 1436 0301 SM56 PCI Voice modem - 1436 0302 SM56 PCI Fax Modem - 144f 100c SM56 PCI Fax Modem - 1494 0300 SM56 PCI Speakerphone Modem - 1494 0301 SM56 PCI Voice modem - 14c8 0300 SM56 PCI Speakerphone Modem - 14c8 0302 SM56 PCI Fax Modem - 1668 0300 SM56 PCI Speakerphone Modem - 1668 0302 SM56 PCI Fax Modem - 5803 MPC5200 - 6400 MPC190 Security Processor (S1 family, encryption) - 6405 MPC184 Security Processor (S1 family) -1058 Electronics & Telecommunications RSH -1059 Teknor Industrial Computers Inc -105a Promise Technology, Inc. -# more correct description from promise linux sources - 0d30 PDC20265 (FastTrak100 Lite/Ultra100) - 105a 4d33 Ultra100 - 0d38 20263 - 105a 4d39 Fasttrak66 - 1275 20275 - 3318 PDC20318 (SATA150 TX4) - 3319 PDC20319 (FastTrak S150 TX4) - 8086 3427 S875WP1-E mainboard - 3371 PDC20371 (FastTrak S150 TX2plus) - 3373 PDC20378 (FastTrak 378/SATA 378) - 1043 80f5 K8V Deluxe/PC-DL Deluxe motherboard - 1462 702e K8T NEO FIS2R motherboard - 3375 PDC20375 (SATA150 TX2plus) - 3376 PDC20376 (FastTrak 376) - 1043 809e A7V8X motherboard - 3574 PDC20579 SATAII 150 IDE Controller - 3d18 PDC20518/PDC40518 (SATAII 150 TX4) - 3d75 PDC20575 (SATAII150 TX2plus) - 4d30 PDC20267 (FastTrak100/Ultra100) - 105a 4d33 Ultra100 - 105a 4d39 FastTrak100 - 4d33 20246 - 105a 4d33 20246 IDE Controller - 4d38 PDC20262 (FastTrak66/Ultra66) - 105a 4d30 Ultra Device on SuperTrak - 105a 4d33 Ultra66 - 105a 4d39 FastTrak66 - 4d68 PDC20268 (Ultra100 TX2) - 105a 4d68 Ultra100TX2 - 4d69 20269 - 105a 4d68 Ultra133TX2 - 5275 PDC20276 (MBFastTrak133 Lite) - 105a 0275 SuperTrak SX6000 IDE - 105a 1275 MBFastTrak133 Lite (tm) Controller (RAID mode) - 1458 b001 MBUltra 133 - 5300 DC5300 - 6268 PDC20270 (FastTrak100 LP/TX2/TX4) - 105a 4d68 FastTrak100 TX2 - 6269 PDC20271 (FastTrak TX2000) - 105a 6269 FastTrak TX2/TX2000 - 6621 PDC20621 (FastTrak S150 SX4/FastTrak SX4000 lite) - 6622 PDC20621 [SATA150 SX4] 4 Channel IDE RAID Controller - 6626 PDC20618 (Ultra 618) - 6629 PDC20619 (FastTrak TX4000) - 7275 PDC20277 (SBFastTrak133 Lite) -105b Foxconn International, Inc. -105c Wipro Infotech Limited -105d Number 9 Computer Company - 2309 Imagine 128 - 2339 Imagine 128-II - 105d 0000 Imagine 128 series 2 4Mb VRAM - 105d 0001 Imagine 128 series 2 4Mb VRAM - 105d 0002 Imagine 128 series 2 4Mb VRAM - 105d 0003 Imagine 128 series 2 4Mb VRAM - 105d 0004 Imagine 128 series 2 4Mb VRAM - 105d 0005 Imagine 128 series 2 4Mb VRAM - 105d 0006 Imagine 128 series 2 4Mb VRAM - 105d 0007 Imagine 128 series 2 4Mb VRAM - 105d 0008 Imagine 128 series 2e 4Mb DRAM - 105d 0009 Imagine 128 series 2e 4Mb DRAM - 105d 000a Imagine 128 series 2 8Mb VRAM - 105d 000b Imagine 128 series 2 8Mb H-VRAM - 11a4 000a Barco Metheus 5 Megapixel - 13cc 0000 Barco Metheus 5 Megapixel - 13cc 0004 Barco Metheus 5 Megapixel - 13cc 0005 Barco Metheus 5 Megapixel - 13cc 0006 Barco Metheus 5 Megapixel - 13cc 0008 Barco Metheus 5 Megapixel - 13cc 0009 Barco Metheus 5 Megapixel - 13cc 000a Barco Metheus 5 Megapixel - 13cc 000c Barco Metheus 5 Megapixel - 493d Imagine 128 T2R [Ticket to Ride] - 11a4 000a Barco Metheus 5 Megapixel, Dual Head - 11a4 000b Barco Metheus 5 Megapixel, Dual Head - 13cc 0002 Barco Metheus 4 Megapixel, Dual Head - 13cc 0003 Barco Metheus 5 Megapixel, Dual Head - 13cc 0007 Barco Metheus 5 Megapixel, Dual Head - 13cc 0008 Barco Metheus 5 Megapixel, Dual Head - 13cc 0009 Barco Metheus 5 Megapixel, Dual Head - 13cc 000a Barco Metheus 5 Megapixel, Dual Head - 5348 Revolution 4 - 105d 0037 Revolution IV-FP AGP (For SGI 1600SW) -105e Vtech Computers Ltd -105f Infotronic America Inc -1060 United Microelectronics [UMC] - 0001 UM82C881 - 0002 UM82C886 - 0101 UM8673F - 0881 UM8881 - 0886 UM8886F - 0891 UM8891A - 1001 UM886A - 673a UM8886BF - 673b EIDE Master/DMA - 8710 UM8710 - 886a UM8886A - 8881 UM8881F - 8886 UM8886F - 888a UM8886A - 8891 UM8891A - 9017 UM9017F - 9018 UM9018 - 9026 UM9026 - e881 UM8881N - e886 UM8886N - e88a UM8886N - e891 UM8891N -1061 I.I.T. - 0001 AGX016 - 0002 IIT3204/3501 -1062 Maspar Computer Corp -1063 Ocean Office Automation -1064 Alcatel -1065 Texas Microsystems -1066 PicoPower Technology - 0000 PT80C826 - 0001 PT86C521 [Vesuvius v1] Host Bridge - 0002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Master - 0003 PT86C524 [Nile] PCI-to-PCI Bridge - 0004 PT86C525 [Nile-II] PCI-to-PCI Bridge - 0005 National PC87550 System Controller - 8002 PT86C523 [Vesuvius v3] PCI-ISA Bridge Slave -1067 Mitsubishi Electric - 0301 AccelGraphics AccelECLIPSE - 0304 AccelGALAXY A2100 [OEM Evans & Sutherland] - 0308 Tornado 3000 [OEM Evans & Sutherland] - 1002 VG500 [VolumePro Volume Rendering Accelerator] -1068 Diversified Technology -1069 Mylex Corporation - 0001 DAC960P - 0002 DAC960PD - 0010 DAC960PG - 0020 DAC960LA - 0050 AcceleRAID 352/170/160 support Device - b166 Gemstone chipset SCSI controller - 1014 0242 iSeries 2872 DASD IOA - 1014 0266 Dual Channel PCI-X U320 SCSI Adapter - 1014 0278 Dual Channel PCI-X U320 SCSI RAID Adapter - 1014 02d3 Dual Channel PCI-X U320 SCSI Adapter - 1014 02d4 Dual Channel PCI-X U320 SCSI RAID Adapter - ba55 eXtremeRAID 1100 support Device - ba56 eXtremeRAID 2000/3000 support Device -106a Aten Research Inc -106b Apple Computer Inc. - 0001 Bandit PowerPC host bridge - 0002 Grand Central I/O - 0003 Control Video - 0004 PlanB Video-In - 0007 O'Hare I/O - 000c DOS on Mac - 000e Hydra Mac I/O - 0010 Heathrow Mac I/O - 0017 Paddington Mac I/O - 0018 UniNorth FireWire - 0019 KeyLargo USB - 001e UniNorth Internal PCI - 001f UniNorth PCI - 0020 UniNorth AGP - 0021 UniNorth GMAC (Sun GEM) - 0022 KeyLargo Mac I/O - 0024 UniNorth/Pangea GMAC (Sun GEM) - 0025 KeyLargo/Pangea Mac I/O - 0026 KeyLargo/Pangea USB - 0027 UniNorth/Pangea AGP - 0028 UniNorth/Pangea PCI - 0029 UniNorth/Pangea Internal PCI - 002d UniNorth 1.5 AGP - 002e UniNorth 1.5 PCI - 002f UniNorth 1.5 Internal PCI - 0030 UniNorth/Pangea FireWire - 0031 UniNorth 2 FireWire - 0032 UniNorth 2 GMAC (Sun GEM) - 0033 UniNorth 2 ATA/100 - 0034 UniNorth 2 AGP - 0035 UniNorth 2 PCI - 0036 UniNorth 2 Internal PCI - 003b UniNorth/Intrepid ATA/100 - 003e KeyLargo/Intrepid Mac I/O - 003f KeyLargo/Intrepid USB - 0040 K2 KeyLargo USB - 0041 K2 KeyLargo Mac/IO - 0042 K2 FireWire - 0043 K2 ATA/100 - 0045 K2 HT-PCI Bridge - 0046 K2 HT-PCI Bridge - 0047 K2 HT-PCI Bridge - 0048 K2 HT-PCI Bridge - 0049 K2 HT-PCI Bridge - 004b U3 AGP - 004c K2 GMAC (Sun GEM) - 004f Shasta Mac I/O - 0050 Shasta IDE - 0051 Shasta (Sun GEM) - 0052 Shasta Firewire - 0053 Shasta PCI Bridge - 0054 Shasta PCI Bridge - 0055 Shasta PCI Bridge - 0058 U3L AGP Bridge - 1645 Tigon3 Gigabit Ethernet NIC (BCM5701) -106c Hynix Semiconductor - 8801 Dual Pentium ISA/PCI Motherboard - 8802 PowerPC ISA/PCI Motherboard - 8803 Dual Window Graphics Accelerator - 8804 LAN Controller - 8805 100-BaseT LAN -106d Sequent Computer Systems -106e DFI, Inc -106f City Gate Development Ltd -1070 Daewoo Telecom Ltd -1071 Mitac - 8160 Mitac 8060B Mobile Platform -1072 GIT Co Ltd -1073 Yamaha Corporation - 0001 3D GUI Accelerator - 0002 YGV615 [RPA3 3D-Graphics Controller] - 0003 YMF-740 - 0004 YMF-724 - 1073 0004 YMF724-Based PCI Audio Adapter - 0005 DS1 Audio - 1073 0005 DS-XG PCI Audio CODEC - 0006 DS1 Audio - 0008 DS1 Audio - 1073 0008 DS-XG PCI Audio CODEC - 000a DS1L Audio - 1073 0004 DS-XG PCI Audio CODEC - 1073 000a DS-XG PCI Audio CODEC - 000c YMF-740C [DS-1L Audio Controller] - 107a 000c DS-XG PCI Audio CODEC - 000d YMF-724F [DS-1 Audio Controller] - 1073 000d DS-XG PCI Audio CODEC - 0010 YMF-744B [DS-1S Audio Controller] - 1073 0006 DS-XG PCI Audio CODEC - 1073 0010 DS-XG PCI Audio CODEC - 0012 YMF-754 [DS-1E Audio Controller] - 1073 0012 DS-XG PCI Audio Codec - 0020 DS-1 Audio - 2000 DS2416 Digital Mixing Card - 1073 2000 DS2416 Digital Mixing Card -1074 NexGen Microsystems - 4e78 82c500/1 -1075 Advanced Integrations Research -1076 Chaintech Computer Co. Ltd -1077 QLogic Corp. - 1016 ISP10160 Single Channel Ultra3 SCSI Processor - 1020 ISP1020 Fast-wide SCSI - 1022 ISP1022 Fast-wide SCSI - 1080 ISP1080 SCSI Host Adapter - 1216 ISP12160 Dual Channel Ultra3 SCSI Processor - 101e 8471 QLA12160 on AMI MegaRAID - 101e 8493 QLA12160 on AMI MegaRAID - 1240 ISP1240 SCSI Host Adapter - 1280 ISP1280 SCSI Host Adapter - 2020 ISP2020A Fast!SCSI Basic Adapter - 2100 QLA2100 64-bit Fibre Channel Adapter - 1077 0001 QLA2100 64-bit Fibre Channel Adapter - 2200 QLA2200 64-bit Fibre Channel Adapter - 1077 0002 QLA2200 - 2300 QLA2300 64-bit Fibre Channel Adapter - 2312 QLA2312 Fibre Channel Adapter -1078 Cyrix Corporation - 0000 5510 [Grappa] - 0001 PCI Master - 0002 5520 [Cognac] - 0100 5530 Legacy [Kahlua] - 0101 5530 SMI [Kahlua] - 0102 5530 IDE [Kahlua] - 0103 5530 Audio [Kahlua] - 0104 5530 Video [Kahlua] - 0400 ZFMicro PCI Bridge - 0401 ZFMicro Chipset SMI - 0402 ZFMicro Chipset IDE - 0403 ZFMicro Expansion Bus -1079 I-Bus -107a NetWorth -107b Gateway 2000 -107c LG Electronics [Lucky Goldstar Co. Ltd] -107d LeadTek Research Inc. - 0000 P86C850 - 2134 WinFast 3D S320 II - 2971 [GeForce FX 5900] WinFast A350 TDH MyViVo -107e Interphase Corporation - 0001 5515 ATM Adapter [Flipper] - 0002 100 VG AnyLan Controller - 0004 5526 Fibre Channel Host Adapter - 0005 x526 Fibre Channel Host Adapter - 0008 5525/5575 ATM Adapter (155 Mbit) [Atlantic] - 9003 5535-4P-BRI-ST - 9007 5535-4P-BRI-U - 9008 5535-1P-SR - 900c 5535-1P-SR-ST - 900e 5535-1P-SR-U - 9011 5535-1P-PRI - 9013 5535-2P-PRI - 9023 5536-4P-BRI-ST - 9027 5536-4P-BRI-U - 9031 5536-1P-PRI - 9033 5536-2P-PRI -107f Data Technology Corporation - 0802 SL82C105 -1080 Contaq Microsystems - 0600 82C599 - c691 Cypress CY82C691 - c693 82c693 -1081 Supermac Technology - 0d47 Radius PCI to NuBUS Bridge -1082 EFA Corporation of America -1083 Forex Computer Corporation - 0001 FR710 -1084 Parador -1085 Tulip Computers Int.B.V. -1086 J. Bond Computer Systems -1087 Cache Computer -1088 Microcomputer Systems (M) Son -1089 Data General Corporation -# Formerly Bit3 Computer Corp. -108a SBS Technologies - 0001 VME Bridge Model 617 - 0010 VME Bridge Model 618 - 0040 dataBLIZZARD - 3000 VME Bridge Model 2706 -108c Oakleigh Systems Inc. -108d Olicom - 0001 Token-Ring 16/4 PCI Adapter (3136/3137) - 0002 16/4 Token Ring - 0004 RapidFire 3139 Token-Ring 16/4 PCI Adapter - 108d 0004 OC-3139/3140 RapidFire Token-Ring 16/4 Adapter - 0005 GoCard 3250 Token-Ring 16/4 CardBus PC Card - 0006 OC-3530 RapidFire Token-Ring 100 - 0007 RapidFire 3141 Token-Ring 16/4 PCI Fiber Adapter - 108d 0007 OC-3141 RapidFire Token-Ring 16/4 Adapter - 0008 RapidFire 3540 HSTR 100/16/4 PCI Adapter - 108d 0008 OC-3540 RapidFire HSTR 100/16/4 Adapter - 0011 OC-2315 - 0012 OC-2325 - 0013 OC-2183/2185 - 0014 OC-2326 - 0019 OC-2327/2250 10/100 Ethernet Adapter - 108d 0016 OC-2327 Rapidfire 10/100 Ethernet Adapter - 108d 0017 OC-2250 GoCard 10/100 Ethernet Adapter - 0021 OC-6151/6152 [RapidFire ATM 155] - 0022 ATM Adapter -108e Sun Microsystems Computer Corp. - 0001 EBUS - 1000 EBUS - 1001 Happy Meal - 1100 RIO EBUS - 1101 RIO GEM - 1102 RIO 1394 - 1103 RIO USB - 1648 [bge] Gigabit Ethernet - 2bad GEM - 5000 Simba Advanced PCI Bridge - 5043 SunPCI Co-processor - 8000 Psycho PCI Bus Module - 8001 Schizo PCI Bus Module - 8002 Schizo+ PCI Bus Module - a000 Ultra IIi - a001 Ultra IIe - a801 Tomatillo PCI Bus Module - abba Cassini 10/100/1000 -108f Systemsoft -1090 Encore Computer Corporation -1091 Intergraph Corporation - 0020 3D graphics processor - 0021 3D graphics processor w/Texturing - 0040 3D graphics frame buffer - 0041 3D graphics frame buffer - 0060 Proprietary bus bridge - 00e4 Powerstorm 4D50T - 0720 Motion JPEG codec - 07a0 Sun Expert3D-Lite Graphics Accelerator - 1091 Sun Expert3D Graphics Accelerator -1092 Diamond Multimedia Systems - 00a0 Speedstar Pro SE - 00a8 Speedstar 64 - 0550 Viper V550 - 08d4 Supra 2260 Modem - 094c SupraExpress 56i Pro - 1092 Viper V330 - 6120 Maximum DVD - 8810 Stealth SE - 8811 Stealth 64/SE - 8880 Stealth - 8881 Stealth - 88b0 Stealth 64 - 88b1 Stealth 64 - 88c0 Stealth 64 - 88c1 Stealth 64 - 88d0 Stealth 64 - 88d1 Stealth 64 - 88f0 Stealth 64 - 88f1 Stealth 64 - 9999 DMD-I0928-1 "Monster sound" sound chip -1093 National Instruments - 0160 PCI-DIO-96 - 0162 PCI-MIO-16XE-50 - 1170 PCI-MIO-16XE-10 - 1180 PCI-MIO-16E-1 - 1190 PCI-MIO-16E-4 - 1310 PCI-6602 - 1330 PCI-6031E - 1350 PCI-6071E - 14e0 PCI-6110 - 14f0 PCI-6111 - 17d0 PCI-6503 - 1870 PCI-6713 - 1880 PCI-6711 - 18b0 PCI-6052E - 2410 PCI-6733 - 2890 PCI-6036E - 2a60 PCI-6023E - 2a70 PCI-6024E - 2a80 PCI-6025E - 2c80 PCI-6035E - 2ca0 PCI-6034E - 70b8 PCI-6251 [M Series - High Speed Multifunction DAQ] - b001 IMAQ-PCI-1408 - b011 IMAQ-PXI-1408 - b021 IMAQ-PCI-1424 - b031 IMAQ-PCI-1413 - b041 IMAQ-PCI-1407 - b051 IMAQ-PXI-1407 - b061 IMAQ-PCI-1411 - b071 IMAQ-PCI-1422 - b081 IMAQ-PXI-1422 - b091 IMAQ-PXI-1411 - c801 PCI-GPIB - c831 PCI-GPIB bridge -1094 First International Computers [FIC] -1095 Silicon Image, Inc. (formerly CMD Technology Inc) - 0240 Adaptec AAR-1210SA SATA HostRAID Controller - 0640 PCI0640 - 0643 PCI0643 - 0646 PCI0646 - 0647 PCI0647 - 0648 PCI0648 - 0649 SiI 0649 Ultra ATA/100 PCI to ATA Host Controller - 0e11 005d Integrated Ultra ATA-100 Dual Channel Controller - 0e11 007e Integrated Ultra ATA-100 IDE RAID Controller - 101e 0649 AMI MegaRAID IDE 100 Controller - 0650 PBC0650A - 0670 USB0670 - 1095 0670 USB0670 - 0673 USB0673 - 0680 PCI0680 Ultra ATA-133 Host Controller - 1095 3680 Winic W-680 (Silicon Image 680 based) - 3112 SiI 3112 [SATALink/SATARaid] Serial ATA Controller - 1095 3112 SiI 3112 SATALink Controller - 1095 6112 SiI 3112 SATARaid Controller - 3114 SiI 3114 [SATALink/SATARaid] Serial ATA Controller - 1095 3114 SiI 3114 SATALink Controller - 1095 6114 SiI 3114 SATARaid Controller - 3124 SiI 3124 PCI-X Serial ATA Controller - 1095 3124 SiI 3124 PCI-X Serial ATA Controller - 3512 SiI 3512 [SATALink/SATARaid] Serial ATA Controller - 1095 3512 SiI 3512 SATALink Controller - 1095 6512 SiI 3512 SATARaid Controller -1096 Alacron -1097 Appian Technology -1098 Quantum Designs (H.K.) Ltd - 0001 QD-8500 - 0002 QD-8580 -1099 Samsung Electronics Co., Ltd -109a Packard Bell -109b Gemlight Computer Ltd. -109c Megachips Corporation -109d Zida Technologies Ltd. -109e Brooktree Corporation - 0350 Bt848 Video Capture - 0351 Bt849A Video capture - 0369 Bt878 Video Capture - 1002 0001 TV-Wonder - 1002 0003 TV-Wonder/VE - 036c Bt879(??) Video Capture - 13e9 0070 Win/TV (Video Section) - 036e Bt878 Video Capture - 0070 13eb WinTV Series - 0070 ff01 Viewcast Osprey 200 - 0071 0101 DigiTV PCI - 107d 6606 WinFast TV 2000 - 11bd 0012 PCTV pro (TV + FM stereo receiver) - 11bd 001c PCTV Sat (DBC receiver) - 127a 0001 Bt878 Mediastream Controller NTSC - 127a 0002 Bt878 Mediastream Controller PAL BG - 127a 0003 Bt878a Mediastream Controller PAL BG - 127a 0048 Bt878/832 Mediastream Controller - 144f 3000 MagicTView CPH060 - Video - 1461 0002 TV98 Series (TV/No FM/Remote) - 1461 0003 AverMedia UltraTV PCI 350 - 1461 0004 AVerTV WDM Video Capture - 1461 0761 AverTV DVB-T - 14f1 0001 Bt878 Mediastream Controller NTSC - 14f1 0002 Bt878 Mediastream Controller PAL BG - 14f1 0003 Bt878a Mediastream Controller PAL BG - 14f1 0048 Bt878/832 Mediastream Controller - 1822 0001 VisionPlus DVB card - 1851 1850 FlyVideo'98 - Video - 1851 1851 FlyVideo II - 1852 1852 FlyVideo'98 - Video (with FM Tuner) - 270f fc00 Digitop DTT-1000 - bd11 1200 PCTV pro (TV + FM stereo receiver) - 036f Bt879 Video Capture - 127a 0044 Bt879 Video Capture NTSC - 127a 0122 Bt879 Video Capture PAL I - 127a 0144 Bt879 Video Capture NTSC - 127a 0222 Bt879 Video Capture PAL BG - 127a 0244 Bt879a Video Capture NTSC - 127a 0322 Bt879 Video Capture NTSC - 127a 0422 Bt879 Video Capture NTSC - 127a 1122 Bt879 Video Capture PAL I - 127a 1222 Bt879 Video Capture PAL BG - 127a 1322 Bt879 Video Capture NTSC - 127a 1522 Bt879a Video Capture PAL I - 127a 1622 Bt879a Video Capture PAL BG - 127a 1722 Bt879a Video Capture NTSC - 14f1 0044 Bt879 Video Capture NTSC - 14f1 0122 Bt879 Video Capture PAL I - 14f1 0144 Bt879 Video Capture NTSC - 14f1 0222 Bt879 Video Capture PAL BG - 14f1 0244 Bt879a Video Capture NTSC - 14f1 0322 Bt879 Video Capture NTSC - 14f1 0422 Bt879 Video Capture NTSC - 14f1 1122 Bt879 Video Capture PAL I - 14f1 1222 Bt879 Video Capture PAL BG - 14f1 1322 Bt879 Video Capture NTSC - 14f1 1522 Bt879a Video Capture PAL I - 14f1 1622 Bt879a Video Capture PAL BG - 14f1 1722 Bt879a Video Capture NTSC - 1851 1850 FlyVideo'98 - Video - 1851 1851 FlyVideo II - 1852 1852 FlyVideo'98 - Video (with FM Tuner) - 0370 Bt880 Video Capture - 1851 1850 FlyVideo'98 - 1851 1851 FlyVideo'98 EZ - video - 1852 1852 FlyVideo'98 (with FM Tuner) - 0878 Bt878 Audio Capture - 0070 13eb WinTV Series - 0070 ff01 Viewcast Osprey 200 - 0071 0101 DigiTV PCI - 1002 0001 TV-Wonder - 1002 0003 TV-Wonder/VE - 11bd 0012 PCTV pro (TV + FM stereo receiver, audio section) - 11bd 001c PCTV Sat (DBC receiver) - 127a 0001 Bt878 Video Capture (Audio Section) - 127a 0002 Bt878 Video Capture (Audio Section) - 127a 0003 Bt878 Video Capture (Audio Section) - 127a 0048 Bt878 Video Capture (Audio Section) - 13e9 0070 Win/TV (Audio Section) - 144f 3000 MagicTView CPH060 - Audio - 1461 0004 AVerTV WDM Audio Capture - 1461 0761 AVerTV DVB-T - 14f1 0001 Bt878 Video Capture (Audio Section) - 14f1 0002 Bt878 Video Capture (Audio Section) - 14f1 0003 Bt878 Video Capture (Audio Section) - 14f1 0048 Bt878 Video Capture (Audio Section) - 1822 0001 VisionPlus DVB Card - 270f fc00 Digitop DTT-1000 - bd11 1200 PCTV pro (TV + FM stereo receiver, audio section) - 0879 Bt879 Audio Capture - 127a 0044 Bt879 Video Capture (Audio Section) - 127a 0122 Bt879 Video Capture (Audio Section) - 127a 0144 Bt879 Video Capture (Audio Section) - 127a 0222 Bt879 Video Capture (Audio Section) - 127a 0244 Bt879 Video Capture (Audio Section) - 127a 0322 Bt879 Video Capture (Audio Section) - 127a 0422 Bt879 Video Capture (Audio Section) - 127a 1122 Bt879 Video Capture (Audio Section) - 127a 1222 Bt879 Video Capture (Audio Section) - 127a 1322 Bt879 Video Capture (Audio Section) - 127a 1522 Bt879 Video Capture (Audio Section) - 127a 1622 Bt879 Video Capture (Audio Section) - 127a 1722 Bt879 Video Capture (Audio Section) - 14f1 0044 Bt879 Video Capture (Audio Section) - 14f1 0122 Bt879 Video Capture (Audio Section) - 14f1 0144 Bt879 Video Capture (Audio Section) - 14f1 0222 Bt879 Video Capture (Audio Section) - 14f1 0244 Bt879 Video Capture (Audio Section) - 14f1 0322 Bt879 Video Capture (Audio Section) - 14f1 0422 Bt879 Video Capture (Audio Section) - 14f1 1122 Bt879 Video Capture (Audio Section) - 14f1 1222 Bt879 Video Capture (Audio Section) - 14f1 1322 Bt879 Video Capture (Audio Section) - 14f1 1522 Bt879 Video Capture (Audio Section) - 14f1 1622 Bt879 Video Capture (Audio Section) - 14f1 1722 Bt879 Video Capture (Audio Section) - 0880 Bt880 Audio Capture - 2115 BtV 2115 Mediastream controller - 2125 BtV 2125 Mediastream controller - 2164 BtV 2164 - 2165 BtV 2165 - 8230 Bt8230 ATM Segment/Reassembly Ctrlr (SRC) - 8472 Bt8472 - 8474 Bt8474 -109f Trigem Computer Inc. -10a0 Meidensha Corporation -10a1 Juko Electronics Ind. Co. Ltd -10a2 Quantum Corporation -10a3 Everex Systems Inc -10a4 Globe Manufacturing Sales -10a5 Smart Link Ltd. - 3052 SmartPCI562 56K Modem - 5449 SmartPCI561 modem -10a6 Informtech Industrial Ltd. -10a7 Benchmarq Microelectronics -10a8 Sierra Semiconductor - 0000 STB Horizon 64 -10a9 Silicon Graphics, Inc. - 0001 Crosstalk to PCI Bridge - 0002 Linc I/O controller - 0003 IOC3 I/O controller - 0004 O2 MACE - 0005 RAD Audio - 0006 HPCEX - 0007 RPCEX - 0008 DiVO VIP - 0009 AceNIC Gigabit Ethernet - 10a9 8002 AceNIC Gigabit Ethernet - 0010 AMP Video I/O - 0011 GRIP - 0012 SGH PSHAC GSN - 1001 Magic Carpet - 1002 Lithium - 1003 Dual JPEG 1 - 1004 Dual JPEG 2 - 1005 Dual JPEG 3 - 1006 Dual JPEG 4 - 1007 Dual JPEG 5 - 1008 Cesium - 100a IOC4 I/O controller - 2001 Fibre Channel - 2002 ASDE - 8001 O2 1394 - 8002 G-net NT -10aa ACC Microelectronics - 0000 ACCM 2188 -10ab Digicom -10ac Honeywell IAC -10ad Symphony Labs - 0001 W83769F - 0003 SL82C103 - 0005 SL82C105 - 0103 SL82c103 - 0105 SL82c105 - 0565 W83C553 -10ae Cornerstone Technology -10af Micro Computer Systems Inc -10b0 CardExpert Technology -10b1 Cabletron Systems Inc -10b2 Raytheon Company -10b3 Databook Inc - 3106 DB87144 - b106 DB87144 -10b4 STB Systems Inc - 1b1d Velocity 128 3D - 10b4 237e Velocity 4400 -10b5 PLX Technology, Inc. - 0001 i960 PCI bus interface - 1076 VScom 800 8 port serial adaptor - 1077 VScom 400 4 port serial adaptor - 1078 VScom 210 2 port serial and 1 port parallel adaptor - 1103 VScom 200 2 port serial adaptor - 1146 VScom 010 1 port parallel adaptor - 1147 VScom 020 2 port parallel adaptor - 2724 Thales PCSM Security Card - 8516 PEX 8516 Versatile PCI Express Switch - 8532 PEX 8532 Versatile PCI Express Switch - 9030 PCI <-> IOBus Bridge Hot Swap - 10b5 2862 Alpermann+Velte PCL PCI LV (3V/5V): Timecode Reader Board - 10b5 2906 Alpermann+Velte PCI TS (3V/5V): Time Synchronisation Board - 10b5 2940 Alpermann+Velte PCL PCI D (3V/5V): Timecode Reader Board - 10b5 3025 Alpermann+Velte PCL PCI L (3V/5V): Timecode Reader Board - 10b5 3068 Alpermann+Velte PCL PCI HD (3V/5V): Timecode Reader Board - 15ed 1002 MCCS 8-port Serial Hot Swap - 15ed 1003 MCCS 16-port Serial Hot Swap - 9036 9036 - 9050 PCI <-> IOBus Bridge - 10b5 1067 IXXAT CAN i165 - 10b5 1172 IK220 (Heidenhain) - 10b5 2036 SatPak GPS - 10b5 2221 Alpermann+Velte PCL PCI LV: Timecode Reader Board - 10b5 2273 SH-ARC SoHard ARCnet card - 10b5 2431 Alpermann+Velte PCL PCI D: Timecode Reader Board - 10b5 2905 Alpermann+Velte PCI TS: Time Synchronisation Board - 10b5 9050 MP9050 - 1498 0362 TPMC866 8 Channel Serial Card - 1522 0001 RockForce 4 Port V.90 Data/Fax/Voice Modem - 1522 0002 RockForce 2 Port V.90 Data/Fax/Voice Modem - 1522 0003 RockForce 6 Port V.90 Data/Fax/Voice Modem - 1522 0004 RockForce 8 Port V.90 Data/Fax/Voice Modem - 1522 0010 RockForce2000 4 Port V.90 Data/Fax/Voice Modem - 1522 0020 RockForce2000 2 Port V.90 Data/Fax/Voice Modem - 15ed 1000 Macrolink MCCS 8-port Serial - 15ed 1001 Macrolink MCCS 16-port Serial - 15ed 1002 Macrolink MCCS 8-port Serial Hot Swap - 15ed 1003 Macrolink MCCS 16-port Serial Hot Swap -# Sorry, there was a typo - 5654 2036 OpenSwitch 6 Telephony card -# Sorry, there was a typo - 5654 3132 OpenSwitch 12 Telephony card - 5654 5634 OpenLine4 Telephony Card - d531 c002 PCIntelliCAN 2xSJA1000 CAN bus - d84d 4006 EX-4006 1P - d84d 4008 EX-4008 1P EPP/ECP - d84d 4014 EX-4014 2P - d84d 4018 EX-4018 3P EPP/ECP - d84d 4025 EX-4025 1S(16C550) RS-232 - d84d 4027 EX-4027 1S(16C650) RS-232 - d84d 4028 EX-4028 1S(16C850) RS-232 - d84d 4036 EX-4036 2S(16C650) RS-232 - d84d 4037 EX-4037 2S(16C650) RS-232 - d84d 4038 EX-4038 2S(16C850) RS-232 - d84d 4052 EX-4052 1S(16C550) RS-422/485 - d84d 4053 EX-4053 2S(16C550) RS-422/485 - d84d 4055 EX-4055 4S(16C550) RS-232 - d84d 4058 EX-4055 4S(16C650) RS-232 - d84d 4065 EX-4065 8S(16C550) RS-232 - d84d 4068 EX-4068 8S(16C650) RS-232 - d84d 4078 EX-4078 2S(16C552) RS-232+1P - 9054 PCI <-> IOBus Bridge - 10b5 2455 Wessex Techology PHIL-PCI - 10b5 2696 Innes Corp AM Radcap card - 10b5 2717 Innes Corp Auricon card - 10b5 2844 Innes Corp TVS Encoder card - 12d9 0002 PCI Prosody Card rev 1.5 - 16df 0011 PIKA PrimeNet MM PCI - 16df 0012 PIKA PrimeNet MM cPCI 8 - 16df 0013 PIKA PrimeNet MM cPCI 8 (without CAS Signaling Option) - 16df 0014 PIKA PrimeNet MM cPCI 4 - 16df 0015 PIKA Daytona MM - 16df 0016 PIKA InLine MM - 9056 Francois - 10b5 2979 CellinkBlade 11 - CPCI board VoATM AAL1 - 9060 9060 - 906d 9060SD - 125c 0640 Aries 16000P - 906e 9060ES - 9080 9080 - 103c 10eb (Agilent) E2777B 83K Series PCI based Optical Communication Interface - 103c 10ec (Agilent) E6978-66442 PCI CIC - 10b5 9080 9080 [real subsystem ID not set] - 129d 0002 Aculab PCI Prosidy card - 12d9 0002 PCI Prosody Card - 12df 4422 4422PCI ["Do-All" Telemetry Data Aquisition System] - bb04 B&B 3PCIOSD1A Isolated PCI Serial -10b6 Madge Networks - 0001 Smart 16/4 PCI Ringnode - 0002 Smart 16/4 PCI Ringnode Mk2 - 10b6 0002 Smart 16/4 PCI Ringnode Mk2 - 10b6 0006 16/4 CardBus Adapter - 0003 Smart 16/4 PCI Ringnode Mk3 - 0e11 b0fd Compaq NC4621 PCI, 4/16, WOL - 10b6 0003 Smart 16/4 PCI Ringnode Mk3 - 10b6 0007 Presto PCI Plus Adapter - 0004 Smart 16/4 PCI Ringnode Mk1 - 0006 16/4 Cardbus Adapter - 10b6 0006 16/4 CardBus Adapter - 0007 Presto PCI Adapter - 10b6 0007 Presto PCI - 0009 Smart 100/16/4 PCI-HS Ringnode - 10b6 0009 Smart 100/16/4 PCI-HS Ringnode - 000a Smart 100/16/4 PCI Ringnode - 10b6 000a Smart 100/16/4 PCI Ringnode - 000b 16/4 CardBus Adapter Mk2 - 10b6 0008 16/4 CardBus Adapter Mk2 - 10b6 000b 16/4 Cardbus Adapter Mk2 - 000c RapidFire 3140V2 16/4 TR Adapter - 10b6 000c RapidFire 3140V2 16/4 TR Adapter - 1000 Collage 25/155 ATM Client Adapter - 1001 Collage 155 ATM Server Adapter -10b7 3Com Corporation - 0001 3c985 1000BaseSX (SX/TX) - 0013 AR5212 802.11abg NIC (3CRDAG675) - 10b7 2031 3CRDAG675 11a/b/g Wireless PCI Adapter - 0910 3C910-A01 - 1006 MINI PCI type 3B Data Fax Modem - 1007 Mini PCI 56k Winmodem - 10b7 615c Mini PCI 56K Modem - 1201 3c982-TXM 10/100baseTX Dual Port A [Hydra] - 1202 3c982-TXM 10/100baseTX Dual Port B [Hydra] - 1700 3c940 10/100/1000Base-T [Marvell] - 1043 80eb P4P800/K8V Deluxe motherboard - 10b7 0010 3C940 Gigabit LOM Ethernet Adapter - 10b7 0020 3C941 Gigabit LOM Ethernet Adapter - 147b 1407 KV8-MAX3 motherboard - 3390 3c339 TokenLink Velocity - 3590 3c359 TokenLink Velocity XL - 10b7 3590 TokenLink Velocity XL Adapter (3C359/359B) - 4500 3c450 HomePNA [Tornado] - 5055 3c555 Laptop Hurricane - 5057 3c575 Megahertz 10/100 LAN CardBus [Boomerang] - 10b7 5a57 3C575 Megahertz 10/100 LAN Cardbus PC Card - 5157 3cCFE575BT Megahertz 10/100 LAN CardBus [Cyclone] - 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card - 5257 3cCFE575CT CardBus [Cyclone] - 10b7 5c57 FE575C-3Com 10/100 LAN CardBus-Fast Ethernet - 5900 3c590 10BaseT [Vortex] - 5920 3c592 EISA 10mbps Demon/Vortex - 5950 3c595 100BaseTX [Vortex] - 5951 3c595 100BaseT4 [Vortex] - 5952 3c595 100Base-MII [Vortex] - 5970 3c597 EISA Fast Demon/Vortex - 5b57 3c595 Megahertz 10/100 LAN CardBus [Boomerang] - 10b7 5b57 3C575 Megahertz 10/100 LAN Cardbus PC Card - 6000 3CRSHPW796 [OfficeConnect Wireless CardBus] - 6001 3com 3CRWE154G72 [Office Connect Wireless LAN Adapter] - 6055 3c556 Hurricane CardBus [Cyclone] - 6056 3c556B CardBus [Tornado] - 10b7 6556 10/100 Mini PCI Ethernet Adapter - 6560 3cCFE656 CardBus [Cyclone] - 10b7 656a 3CCFEM656 10/100 LAN+56K Modem CardBus - 6561 3cCFEM656 10/100 LAN+56K Modem CardBus - 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus - 6562 3cCFEM656B 10/100 LAN+Winmodem CardBus [Cyclone] - 10b7 656b 3CCFEM656B 10/100 LAN+56K Modem CardBus - 6563 3cCFEM656B 10/100 LAN+56K Modem CardBus - 10b7 656b 3CCFEM656 10/100 LAN+56K Modem CardBus - 6564 3cXFEM656C 10/100 LAN+Winmodem CardBus [Tornado] - 7646 3cSOHO100-TX Hurricane - 7770 3CRWE777 PCI(PLX) Wireless Adaptor [Airconnect] - 7940 3c803 FDDILink UTP Controller - 7980 3c804 FDDILink SAS Controller - 7990 3c805 FDDILink DAS Controller - 80eb 3c940B 10/100/1000Base-T - 8811 Token ring - 9000 3c900 10BaseT [Boomerang] - 9001 3c900 10Mbps Combo [Boomerang] - 9004 3c900B-TPO Etherlink XL [Cyclone] - 10b7 9004 3C900B-TPO Etherlink XL TPO 10Mb - 9005 3c900B-Combo Etherlink XL [Cyclone] - 10b7 9005 3C900B-Combo Etherlink XL Combo - 9006 3c900B-TPC Etherlink XL [Cyclone] - 900a 3c900B-FL 10base-FL [Cyclone] - 9050 3c905 100BaseTX [Boomerang] - 9051 3c905 100BaseT4 [Boomerang] - 9055 3c905B 100BaseTX [Cyclone] - 1028 0080 3C905B Fast Etherlink XL 10/100 - 1028 0081 3C905B Fast Etherlink XL 10/100 - 1028 0082 3C905B Fast Etherlink XL 10/100 - 1028 0083 3C905B Fast Etherlink XL 10/100 - 1028 0084 3C905B Fast Etherlink XL 10/100 - 1028 0085 3C905B Fast Etherlink XL 10/100 - 1028 0086 3C905B Fast Etherlink XL 10/100 - 1028 0087 3C905B Fast Etherlink XL 10/100 - 1028 0088 3C905B Fast Etherlink XL 10/100 - 1028 0089 3C905B Fast Etherlink XL 10/100 - 1028 0090 3C905B Fast Etherlink XL 10/100 - 1028 0091 3C905B Fast Etherlink XL 10/100 - 1028 0092 3C905B Fast Etherlink XL 10/100 - 1028 0093 3C905B Fast Etherlink XL 10/100 - 1028 0094 3C905B Fast Etherlink XL 10/100 - 1028 0095 3C905B Fast Etherlink XL 10/100 - 1028 0096 3C905B Fast Etherlink XL 10/100 - 1028 0097 3C905B Fast Etherlink XL 10/100 - 1028 0098 3C905B Fast Etherlink XL 10/100 - 1028 0099 3C905B Fast Etherlink XL 10/100 - 10b7 9055 3C905B Fast Etherlink XL 10/100 - 9056 3c905B-T4 Fast EtherLink XL [Cyclone] - 9058 3c905B Deluxe Etherlink 10/100/BNC [Cyclone] - 905a 3c905B-FX Fast Etherlink XL FX 100baseFx [Cyclone] - 9200 3c905C-TX/TX-M [Tornado] - 1028 0095 3C920 Integrated Fast Ethernet Controller - 1028 0097 3C920 Integrated Fast Ethernet Controller - 1028 00fe Optiplex GX240 - 1028 012a 3C920 Integrated Fast Ethernet Controller [Latitude C640] - 10b7 1000 3C905C-TX Fast Etherlink for PC Management NIC - 10b7 7000 10/100 Mini PCI Ethernet Adapter - 10f1 2466 Tiger MPX S2466 (3C920 Integrated Fast Ethernet Controller) - 9201 3C920B-EMB Integrated Fast Ethernet Controller [Tornado] - 1043 80ab A7N8X Deluxe onboard 3C920B-EMB Integrated Fast Ethernet Controller - 9202 3Com 3C920B-EMB-WNM Integrated Fast Ethernet Controller - 9210 3C920B-EMB-WNM Integrated Fast Ethernet Controller - 9300 3CSOHO100B-TX 910-A01 [tulip] - 9800 3c980-TX Fast Etherlink XL Server Adapter [Cyclone] - 10b7 9800 3c980-TX Fast Etherlink XL Server Adapter - 9805 3c980-C 10/100baseTX NIC [Python-T] - 10b7 1201 EtherLink Server 10/100 Dual Port A - 10b7 1202 EtherLink Server 10/100 Dual Port B - 10b7 9805 3c980 10/100baseTX NIC [Python-T] - 10f1 2462 Thunder K7 S2462 - 9900 3C990-TX [Typhoon] - 9902 3CR990-TX-95 [Typhoon 56-bit] - 9903 3CR990-TX-97 [Typhoon 168-bit] - 9904 3C990B-TX-M/3C990BSVR [Typhoon2] - 10b7 1000 3CR990B-TX-M [Typhoon2] - 10b7 2000 3CR990BSVR [Typhoon2 Server] - 9905 3CR990-FX-95/97/95 [Typhon Fiber] - 10b7 1101 3CR990-FX-95 [Typhoon Fiber 56-bit] - 10b7 1102 3CR990-FX-97 [Typhoon Fiber 168-bit] - 10b7 2101 3CR990-FX-95 Server [Typhoon Fiber 56-bit] - 10b7 2102 3CR990-FX-97 Server [Typhoon Fiber 168-bit] - 9908 3CR990SVR95 [Typhoon Server 56-bit] - 9909 3CR990SVR97 [Typhoon Server 168-bit] - 990a 3C990SVR [Typhoon Server] - 990b 3C990SVR [Typhoon Server] -10b8 Standard Microsystems Corp [SMC] - 0005 83c170 EPIC/100 Fast Ethernet Adapter - 1055 e000 LANEPIC 10/100 [EVB171Q-PCI] - 1055 e002 LANEPIC 10/100 [EVB171G-PCI] - 10b8 a011 EtherPower II 10/100 - 10b8 a014 EtherPower II 10/100 - 10b8 a015 EtherPower II 10/100 - 10b8 a016 EtherPower II 10/100 - 10b8 a017 EtherPower II 10/100 - 0006 83c175 EPIC/100 Fast Ethernet Adapter - 1055 e100 LANEPIC Cardbus Fast Ethernet Adapter - 1055 e102 LANEPIC Cardbus Fast Ethernet Adapter - 1055 e300 LANEPIC Cardbus Fast Ethernet Adapter - 1055 e302 LANEPIC Cardbus Fast Ethernet Adapter - 10b8 a012 LANEPIC Cardbus Fast Ethernet Adapter - 13a2 8002 LANEPIC Cardbus Fast Ethernet Adapter - 13a2 8006 LANEPIC Cardbus Fast Ethernet Adapter - 1000 FDC 37c665 - 1001 FDC 37C922 -# 802.11g card - 2802 SMC2802W [EZ Connect g] - a011 83C170QF - b106 SMC34C90 -10b9 ALi Corporation - 0101 CMI8338/C3DX PCI Audio Device - 0111 C-Media CMI8738/C3DX Audio Device (OEM) - 10b9 0111 C-Media CMI8738/C3DX Audio Device (OEM) - 0780 Multi-IO Card - 0782 Multi-IO Card - 1435 M1435 - 1445 M1445 - 1449 M1449 - 1451 M1451 - 1461 M1461 - 1489 M1489 - 1511 M1511 [Aladdin] - 1512 M1512 [Aladdin] - 1513 M1513 [Aladdin] - 1521 M1521 [Aladdin III] - 10b9 1521 ALI M1521 Aladdin III CPU Bridge - 1523 M1523 - 10b9 1523 ALI M1523 ISA Bridge - 1531 M1531 [Aladdin IV] - 1533 M1533 PCI to ISA Bridge [Aladdin IV] - 1014 053b ThinkPad R40e (2684-HVG) PCI to ISA Bridge - 10b9 1533 ALI M1533 Aladdin IV ISA Bridge - 1541 M1541 - 10b9 1541 ALI M1541 Aladdin V/V+ AGP System Controller - 1543 M1543 - 1563 M1563 HyperTransport South Bridge - 1621 M1621 - 1631 ALI M1631 PCI North Bridge Aladdin Pro III - 1632 M1632M Northbridge+Trident - 1641 ALI M1641 PCI North Bridge Aladdin Pro IV - 1644 M1644/M1644T Northbridge+Trident - 1646 M1646 Northbridge+Trident - 1647 M1647 Northbridge [MAGiK 1 / MobileMAGiK 1] - 1651 M1651/M1651T Northbridge [Aladdin-Pro 5/5M,Aladdin-Pro 5T/5TM] - 1671 M1671 Super P4 Northbridge [AGP4X,PCI and SDR/DDR] - 1672 M1672 Northbridge [CyberALADDiN-P4] - 1681 M1681 P4 Northbridge [AGP8X,HyperTransport and SDR/DDR] - 1687 M1687 K8 Northbridge [AGP8X and HyperTransport] - 1689 M1689 K8 Northbridge [Super K8 Single Chip] - 3141 M3141 - 3143 M3143 - 3145 M3145 - 3147 M3147 - 3149 M3149 - 3151 M3151 - 3307 M3307 - 3309 M3309 - 3323 M3325 Video/Audio Decoder - 5212 M4803 - 5215 MS4803 - 5217 M5217H - 5219 M5219 - 5225 M5225 - 5228 M5228 ALi ATA/RAID Controller - 5229 M5229 IDE - 1014 050f ThinkPad R30 - 1014 053d ThinkPad R40e (2684-HVG) builtin IDE - 103c 0024 Pavilion ze4400 builtin IDE - 1043 8053 A7A266 Motherboard IDE - 5235 M5225 - 5237 USB 1.1 Controller - 1014 0540 ThinkPad R40e (2684-HVG) builtin USB - 103c 0024 Pavilion ze4400 builtin USB - 5239 USB 2.0 Controller - 5243 M1541 PCI to AGP Controller - 5246 AGP8X Controller - 5247 PCI to AGP Controller - 5249 M5249 HTT to PCI Bridge - 5251 M5251 P1394 OHCI 1.0 Controller - 5253 M5253 P1394 OHCI 1.1 Controller - 5261 M5261 Ethernet Controller - 5263 M5263 Ethernet Controller - 5281 ALi M5281 Serial ATA / RAID Host Controller - 5287 ULi 5287 SATA - 5289 ULi 5289 SATA - 5450 Lucent Technologies Soft Modem AMR - 5451 M5451 PCI AC-Link Controller Audio Device - 1014 0506 ThinkPad R30 - 1014 053e ThinkPad R40e (2684-HVG) builtin Audio - 103c 0024 Pavilion ze4400 builtin Audio - 10b9 5451 HP Compaq nc4010 (DY885AA#ABN) - 5453 M5453 PCI AC-Link Controller Modem Device - 5455 M5455 PCI AC-Link Controller Audio Device - 5457 M5457 AC'97 Modem Controller - 1014 0535 ThinkPad R40e (2684-HVG) builtin modem - 103c 0024 Pavilion ze4400 builtin Modem Device -# Same but more usefull for driver's lookup - 5459 SmartLink SmartPCI561 56K Modem -# SmartLink PCI SoftModem - 545a SmartLink SmartPCI563 56K Modem - 5471 M5471 Memory Stick Controller - 5473 M5473 SD-MMC Controller - 7101 M7101 Power Management Controller [PMU] - 1014 0510 ThinkPad R30 - 1014 053c ThinkPad R40e (2684-HVG) Power Management Controller - 103c 0024 Pavilion ze4400 -10ba Mitsubishi Electric Corp. - 0301 AccelGraphics AccelECLIPSE - 0304 AccelGALAXY A2100 [OEM Evans & Sutherland] - 0308 Tornado 3000 [OEM Evans & Sutherland] - 1002 VG500 [VolumePro Volume Rendering Accelerator] -10bb Dapha Electronics Corporation -10bc Advanced Logic Research -10bd Surecom Technology - 0e34 NE-34 -10be Tseng Labs International Co. -10bf Most Inc -10c0 Boca Research Inc. -10c1 ICM Co., Ltd. -10c2 Auspex Systems Inc. -10c3 Samsung Semiconductors, Inc. - 1100 Smartether100 SC1100 LAN Adapter (i82557B) -10c4 Award Software International Inc. -10c5 Xerox Corporation -10c6 Rambus Inc. -10c7 Media Vision -10c8 Neomagic Corporation - 0001 NM2070 [MagicGraph 128] - 0002 NM2090 [MagicGraph 128V] - 0003 NM2093 [MagicGraph 128ZV] - 0004 NM2160 [MagicGraph 128XD] - 1014 00ba MagicGraph 128XD - 1025 1007 MagicGraph 128XD - 1028 0074 MagicGraph 128XD - 1028 0075 MagicGraph 128XD - 1028 007d MagicGraph 128XD - 1028 007e MagicGraph 128XD - 1033 802f MagicGraph 128XD - 104d 801b MagicGraph 128XD - 104d 802f MagicGraph 128XD - 104d 830b MagicGraph 128XD - 10ba 0e00 MagicGraph 128XD - 10c8 0004 MagicGraph 128XD - 10cf 1029 MagicGraph 128XD - 10f7 8308 MagicGraph 128XD - 10f7 8309 MagicGraph 128XD - 10f7 830b MagicGraph 128XD - 10f7 830d MagicGraph 128XD - 10f7 8312 MagicGraph 128XD - 0005 NM2200 [MagicGraph 256AV] - 1014 00dd ThinkPad 570 - 1028 0088 Latitude CPi A - 0006 NM2360 [MagicMedia 256ZX] - 0016 NM2380 [MagicMedia 256XL+] - 10c8 0016 MagicMedia 256XL+ - 0025 NM2230 [MagicGraph 256AV+] - 0083 NM2093 [MagicGraph 128ZV+] - 8005 NM2200 [MagicMedia 256AV Audio] - 0e11 b0d1 MagicMedia 256AV Audio Device on Discovery - 0e11 b126 MagicMedia 256AV Audio Device on Durango - 1014 00dd MagicMedia 256AV Audio Device on BlackTip Thinkpad - 1025 1003 MagicMedia 256AV Audio Device on TravelMate 720 - 1028 0088 Latitude CPi A - 1028 008f MagicMedia 256AV Audio Device on Colorado Inspiron - 103c 0007 MagicMedia 256AV Audio Device on Voyager II - 103c 0008 MagicMedia 256AV Audio Device on Voyager III - 103c 000d MagicMedia 256AV Audio Device on Omnibook 900 - 10c8 8005 MagicMedia 256AV Audio Device on FireAnt - 110a 8005 MagicMedia 256AV Audio Device - 14c0 0004 MagicMedia 256AV Audio Device - 8006 NM2360 [MagicMedia 256ZX Audio] - 8016 NM2380 [MagicMedia 256XL+ Audio] -10c9 Dataexpert Corporation -10ca Fujitsu Microelectr., Inc. -10cb Omron Corporation -# nee Mentor ARC Inc -10cc Mai Logic Incorporated - 0660 Articia S Host Bridge - 0661 Articia S PCI Bridge -10cd Advanced System Products, Inc - 1100 ASC1100 - 1200 ASC1200 [(abp940) Fast SCSI-II] - 1300 ABP940-U / ABP960-U - 10cd 1310 ASC1300 SCSI Adapter - 2300 ABP940-UW - 2500 ABP940-U2W -10ce Radius -# nee Citicorp TTI -10cf Fujitsu Limited. - 2001 mb86605 -10d1 FuturePlus Systems Corp. -10d2 Molex Incorporated -10d3 Jabil Circuit Inc -10d4 Hualon Microelectronics -10d5 Autologic Inc. -10d6 Cetia -10d7 BCM Advanced Research -10d8 Advanced Peripherals Labs -10d9 Macronix, Inc. [MXIC] - 0431 MX98715 - 0512 MX98713 - 0531 MX987x5 - 1186 1200 DFE-540TX ProFAST 10/100 Adapter - 8625 MX86250 - 8888 MX86200 -10da Compaq IPG-Austin - 0508 TC4048 Token Ring 4/16 - 3390 Tl3c3x9 -10db Rohm LSI Systems, Inc. -10dc CERN/ECP/EDU - 0001 STAR/RD24 SCI-PCI (PMC) - 0002 TAR/RD24 SCI-PCI (PMC) - 0021 HIPPI destination - 0022 HIPPI source - 10dc ATT2C15-3 FPGA -10dd Evans & Sutherland -10de nVidia Corporation - 0008 NV1 [EDGE 3D] - 0009 NV1 [EDGE 3D] - 0010 NV2 [Mutara V08] - 0020 NV4 [RIVA TNT] - 1043 0200 V3400 TNT - 1048 0c18 Erazor II SGRAM - 1048 0c1b Erazor II - 1092 0550 Viper V550 - 1092 0552 Viper V550 - 1092 4804 Viper V550 - 1092 4808 Viper V550 - 1092 4810 Viper V550 - 1092 4812 Viper V550 - 1092 4815 Viper V550 - 1092 4820 Viper V550 with TV out - 1092 4822 Viper V550 - 1092 4904 Viper V550 - 1092 4914 Viper V550 - 1092 8225 Viper V550 - 10b4 273d Velocity 4400 - 10b4 273e Velocity 4400 - 10b4 2740 Velocity 4400 - 10de 0020 Riva TNT - 1102 1015 Graphics Blaster CT6710 - 1102 1016 Graphics Blaster RIVA TNT - 0028 NV5 [RIVA TNT2/TNT2 Pro] - 1043 0200 AGP-V3800 SGRAM - 1043 0201 AGP-V3800 SDRAM - 1043 0205 PCI-V3800 - 1043 4000 AGP-V3800PRO - 1048 0c21 Synergy II - 1048 0c31 Erazor III - 107d 2134 WinFast 3D S320 II + TV-Out - 1092 4804 Viper V770 - 1092 4a00 Viper V770 - 1092 4a02 Viper V770 Ultra - 1092 5a00 RIVA TNT2/TNT2 Pro - 1092 6a02 Viper V770 Ultra - 1092 7a02 Viper V770 Ultra - 10de 0005 RIVA TNT2 Pro - 10de 000f Compaq NVIDIA TNT2 Pro - 1102 1020 3D Blaster RIVA TNT2 - 1102 1026 3D Blaster RIVA TNT2 Digital - 14af 5810 Maxi Gamer Xentor - 0029 NV5 [RIVA TNT2 Ultra] - 1043 0200 AGP-V3800 Deluxe - 1043 0201 AGP-V3800 Ultra SDRAM - 1043 0205 PCI-V3800 Ultra - 1102 1021 3D Blaster RIVA TNT2 Ultra - 1102 1029 3D Blaster RIVA TNT2 Ultra - 1102 102f 3D Blaster RIVA TNT2 Ultra - 14af 5820 Maxi Gamer Xentor 32 - 002a NV5 [Riva TnT2] - 002b NV5 [Riva TnT2] - 002c NV6 [Vanta/Vanta LT] - 1043 0200 AGP-V3800 Combat SDRAM - 1043 0201 AGP-V3800 Combat - 1092 6820 Viper V730 - 1102 1031 CT6938 VANTA 8MB - 1102 1034 CT6894 VANTA 16MB - 14af 5008 Maxi Gamer Phoenix 2 - 002d NV5M64 [RIVA TNT2 Model 64/Model 64 Pro] - 1043 0200 AGP-V3800M - 1043 0201 AGP-V3800M - 1048 0c3a Erazor III LT - 10de 001e M64 AGP4x - 1102 1023 CT6892 RIVA TNT2 Value - 1102 1024 CT6932 RIVA TNT2 Value 32Mb - 1102 102c CT6931 RIVA TNT2 Value [Jumper] - 1462 8808 MSI-8808 - 1554 1041 Pixelview RIVA TNT2 M64 - 1569 002d Palit Microsystems Daytona TNT2 M64 - 002e NV6 [Vanta] - 002f NV6 [Vanta] - 0034 MCP04 SMBus - 0035 MCP04 IDE - 0036 MCP04 Serial ATA Controller - 0037 MCP04 Ethernet Controller - 0038 MCP04 Ethernet Controller - 003a MCP04 AC'97 Audio Controller - 003b MCP04 USB Controller - 003c MCP04 USB Controller - 003d MCP04 PCI Bridge - 003e MCP04 Serial ATA Controller - 0040 nv40 [GeForce 6800 Ultra] - 0041 NV40 [GeForce 6800] - 0042 NV40.2 - 0043 NV40.3 - 0045 NV40 [GeForce 6800 GT] - 0049 NV40GL - 004e NV40GL [Quadro FX 4000] - 0051 CK804 ISA Bridge - 0052 CK804 SMBus - 0053 CK804 IDE - 0054 CK804 Serial ATA Controller - 0055 CK804 Serial ATA Controller - 0056 CK804 Ethernet Controller - 0057 CK804 Ethernet Controller - 0059 CK804 AC'97 Audio Controller - 005a CK804 USB Controller - 005b CK804 USB Controller - 005c CK804 PCI Bridge - 005d CK804 PCIE Bridge - 005e CK804 Memory Controller - 0060 nForce2 ISA Bridge - 1043 80ad A7N8X Mainboard - 0064 nForce2 SMBus (MCP) - 0065 nForce2 IDE - 0066 nForce2 Ethernet Controller - 1043 80a7 A7N8X Mainboard onboard nForce2 Ethernet - 0067 nForce2 USB Controller - 1043 0c11 A7N8X Mainboard - 0068 nForce2 USB Controller - 1043 0c11 A7N8X Mainboard - 006a nForce2 AC97 Audio Controler (MCP) - 006b nForce Audio Processing Unit - 10de 006b nForce2 MCP Audio Processing Unit - 006c nForce2 External PCI Bridge - 006d nForce2 PCI Bridge - 006e nForce2 FireWire (IEEE 1394) Controller - 0084 MCP2A SMBus - 0085 MCP2A IDE - 0086 MCP2A Ethernet Controller - 0087 MCP2A USB Controller - 0088 MCP2A USB Controller - 008a MCP2S AC'97 Audio Controller - 008b MCP2A PCI Bridge - 008c MCP2A Ethernet Controller - 008e nForce2 Serial ATA Controller - 00a0 NV5 [Aladdin TNT2] - 14af 5810 Maxi Gamer Xentor - 00c0 NV41.0 - 00c1 NV41.1 - 00c2 NV41.2 - 00c8 NV41.8 - 00ce NV41GL - 00d0 nForce3 LPC Bridge - 00d1 nForce3 Host Bridge - 00d2 nForce3 AGP Bridge - 00d3 CK804 Memory Controller - 00d4 nForce3 SMBus - 00d5 nForce3 IDE - 00d6 nForce3 Ethernet - 00d7 nForce3 USB 1.1 - 00d8 nForce3 USB 2.0 - 00da nForce3 Audio - 00dd nForce3 PCI Bridge - 00df CK8S Ethernet Controller - 00e0 nForce3 250Gb LPC Bridge - 00e1 nForce3 250Gb Host Bridge - 00e2 nForce3 250Gb AGP Host to PCI Bridge - 00e3 CK8S Serial ATA Controller (v2.5) - 00e4 nForce 250Gb PCI System Management - 00e5 CK8S Parallel ATA Controller (v2.5) - 00e6 CK8S Ethernet Controller - 00e7 CK8S USB Controller - 00e8 nForce3 EHCI USB 2.0 Controller - 00ea nForce3 250Gb AC'97 Audio Controller - 00ed nForce3 250Gb PCI-to-PCI Bridge - 00ee CK8S Serial ATA Controller (v2.5) - 00f0 NV40 [GeForce 6800/GeForce 6800 Ultra] - 00f1 NV43 [GeForce 6600/GeForce 6600 GT] - 00f2 NV43 [GeForce 6600 GT] - 00f8 NV45GL [Quadro FX 3400] - 00f9 NV40 [GeForce 6800 Ultra/GeForce 6800 GT] - 1682 2120 GEFORCE 6800 GT PCI-E - 00fa NV36 [GeForce PCX 5750] - 00fb NV35 [GeForce PCX 5900] - 00fc NV37GL [Quadro FX 330/GeForce PCX 5300] - 00fd NV37GL [Quadro FX 330] - 00fe NV38GL [Quadro FX 1300] - 00ff NV18 [GeForce PCX 4300] - 0100 NV10 [GeForce 256 SDR] - 1043 0200 AGP-V6600 SGRAM - 1043 0201 AGP-V6600 SDRAM - 1043 4008 AGP-V6600 SGRAM - 1043 4009 AGP-V6600 SDRAM - 1102 102d CT6941 GeForce 256 - 14af 5022 3D Prophet SE - 0101 NV10DDR [GeForce 256 DDR] - 1043 0202 AGP-V6800 DDR - 1043 400a AGP-V6800 DDR SGRAM - 1043 400b AGP-V6800 DDR SDRAM - 107d 2822 WinFast GeForce 256 - 1102 102e CT6971 GeForce 256 DDR - 14af 5021 3D Prophet DDR-DVI - 0103 NV10GL [Quadro] - 0110 NV11 [GeForce2 MX/MX 400] - 1043 4015 AGP-V7100 Pro - 1043 4031 V7100 Pro with TV output - 10de 0091 Dell OEM GeForce 2 MX 400 - 1462 8817 MSI GeForce2 MX400 Pro32S [MS-8817] - 14af 7102 3D Prophet II MX - 14af 7103 3D Prophet II MX Dual-Display - 0111 NV11DDR [GeForce2 MX 100 DDR/200 DDR] - 0112 NV11 [GeForce2 Go] - 0113 NV11GL [Quadro2 MXR/EX] - 0140 NV43 [MSI NX6600GT-TD128E] - 014f NV43 [GeForce 6200] - 0150 NV15 [GeForce2 GTS/Pro] - 1043 4016 V7700 AGP Video Card - 107d 2840 WinFast GeForce2 GTS with TV output - 107d 2842 WinFast GeForce 2 Pro - 1462 8831 Creative GeForce2 Pro - 0151 NV15DDR [GeForce2 Ti] - 1043 405f V7700Ti - 1462 5506 Creative 3D Blaster Geforce2 Titanium - 0152 NV15BR [GeForce2 Ultra, Bladerunner] - 1048 0c56 GLADIAC Ultra - 0153 NV15GL [Quadro2 Pro] - 0170 NV17 [GeForce4 MX 460] - 0171 NV17 [GeForce4 MX 440] - 10b0 0002 Gainward Pro/600 TV - 1462 8661 G4MX440-VTP - 1462 8730 MX440SES-T (MS-8873) - 147b 8f00 Abit Siluro GeForce4MX440 - 0172 NV17 [GeForce4 MX 420] - 0173 NV17 [GeForce4 MX 440-SE] - 0174 NV17 [GeForce4 440 Go] - 0175 NV17 [GeForce4 420 Go] - 0176 NV17 [GeForce4 420 Go 32M] - 4c53 1090 Cx9 / Vx9 mainboard - 0177 NV17 [GeForce4 460 Go] - 0178 NV17GL [Quadro4 550 XGL] - 0179 NV17 [GeForce4 440 Go 64M] - 10de 0179 GeForce4 MX (Mac) - 017a NV17GL [Quadro4 200/400 NVS] - 017b NV17GL [Quadro4 550 XGL] - 017c NV17GL [Quadro4 550 GoGL] - 017d NV17 [GeForce4 410 Go 16M] - 0181 NV18 [GeForce4 MX 440 AGP 8x] - 1043 806f V9180 Magic - 1462 8880 MS-StarForce GeForce4 MX 440 with AGP8X - 1462 8900 MS-8890 GeForce 4 MX440 AGP8X - 1462 9350 MSI Geforce4 MX T8X with AGP8X - 147b 8f0d Siluro GF4 MX-8X - 0182 NV18 [GeForce4 MX 440SE AGP 8x] - 0183 NV18 [GeForce4 MX 420 AGP 8x] - 0185 NV18 [GeForce4 MX 4000 AGP 8x] - 0186 NV18M [GeForce4 448 Go] - 0187 NV18M [GeForce4 488 Go] - 0188 NV18GL [Quadro4 580 XGL] - 018a NV18GL [Quadro4 NVS AGP 8x] - 018b NV18GL [Quadro4 380 XGL] - 018d NV18M [GeForce4 448 Go] - 01a0 NVCrush11 [GeForce2 MX Integrated Graphics] - 01a4 nForce CPU bridge - 01ab nForce 420 Memory Controller (DDR) - 01ac nForce 220/420 Memory Controller - 01ad nForce 220/420 Memory Controller - 01b0 nForce Audio - 01b1 nForce Audio - 01b2 nForce ISA Bridge - 01b4 nForce PCI System Management - 01b7 nForce AGP to PCI Bridge - 01b8 nForce PCI-to-PCI bridge - 01bc nForce IDE - 01c1 nForce AC'97 Modem Controller - 01c2 nForce USB Controller - 01c3 nForce Ethernet Controller - 01e0 nForce2 AGP (different version?) - 01e8 nForce2 AGP - 01ea nForce2 Memory Controller 0 - 01eb nForce2 Memory Controller 1 - 01ec nForce2 Memory Controller 2 - 01ed nForce2 Memory Controller 3 - 01ee nForce2 Memory Controller 4 - 01ef nForce2 Memory Controller 5 - 01f0 NV18 [GeForce4 MX - nForce GPU] - 0200 NV20 [GeForce3] - 1043 402f AGP-V8200 DDR - 0201 NV20 [GeForce3 Ti 200] - 0202 NV20 [GeForce3 Ti 500] - 1043 405b V8200 T5 - 1545 002f Xtasy 6964 - 0203 NV20DCC [Quadro DCC] - 0240 C51 PCI Express Bridge - 0241 C51 PCI Express Bridge - 0242 C51 PCI Express Bridge - 0243 C51 PCI Express Bridge - 0244 C51 PCI Express Bridge - 0245 C51 PCI Express Bridge - 0246 C51 PCI Express Bridge - 0247 C51 PCI Express Bridge - 0248 C51 PCI Express Bridge - 0249 C51 PCI Express Bridge - 024a C51 PCI Express Bridge - 024b C51 PCI Express Bridge - 024c C51 PCI Express Bridge - 024d C51 PCI Express Bridge - 024e C51 PCI Express Bridge - 024f C51 PCI Express Bridge - 0250 NV25 [GeForce4 Ti 4600] - 0251 NV25 [GeForce4 Ti 4400] - 1043 8023 v8440 GeForce 4 Ti4400 - 0252 NV25 [GeForce4 Ti] - 0253 NV25 [GeForce4 Ti 4200] - 107d 2896 WinFast A250 LE TD (Dual VGA/TV-out/DVI) - 147b 8f09 Siluro (Dual VGA/TV-out/DVI) - 0258 NV25GL [Quadro4 900 XGL] - 0259 NV25GL [Quadro4 750 XGL] - 025b NV25GL [Quadro4 700 XGL] - 0260 MCP51 LPC Bridge - 0261 MCP51 LPC Bridge - 0262 MCP51 LPC Bridge - 0263 MCP51 LPC Bridge - 0264 MCP51 SMBus - 0265 MCP51 IDE - 0266 MCP51 Serial ATA Controller - 0267 MCP51 Serial ATA Controller - 0268 MCP51 Ethernet Controller - 0269 MCP51 Ethernet Controller - 026a MCP51 MCI - 026b MCP51 AC97 Audio Controller - 026c MCP51 High Definition Audio - 026d MCP51 USB Controller - 026e MCP51 USB Controller - 026f MCP51 PCI Bridge - 0270 MCP51 Host Bridge - 0271 MCP51 PMU - 0272 MCP51 Memory Controller 0 - 027e C51 Memory Controller 2 - 027f C51 Memory Controller 3 - 0280 NV28 [GeForce4 Ti 4800] - 0281 NV28 [GeForce4 Ti 4200 AGP 8x] - 0282 NV28 [GeForce4 Ti 4800 SE] - 0286 NV28 [GeForce4 Ti 4200 Go AGP 8x] - 0288 NV28GL [Quadro4 980 XGL] - 0289 NV28GL [Quadro4 780 XGL] - 028c NV28GLM [Quadro4 700 GoGL] - 02f0 C51 Host Bridge - 02f1 C51 Host Bridge - 02f2 C51 Host Bridge - 02f3 C51 Host Bridge - 02f4 C51 Host Bridge - 02f5 C51 Host Bridge - 02f6 C51 Host Bridge - 02f7 C51 Host Bridge - 02f8 C51 Memory Controller 5 - 02f9 C51 Memory Controller 4 - 02fa C51 Memory Controller 0 - 02fb C51 PCI Express Bridge - 02fc C51 PCI Express Bridge - 02fd C51 PCI Express Bridge - 02fe C51 Memory Controller 1 - 02ff C51 Host Bridge - 0300 NV30 [GeForce FX] - 0301 NV30 [GeForce FX 5800 Ultra] - 0302 NV30 [GeForce FX 5800] - 0308 NV30GL [Quadro FX 2000] - 0309 NV30GL [Quadro FX 1000] - 0311 NV31 [GeForce FX 5600 Ultra] - 0312 NV31 [GeForce FX 5600] - 0313 NV31 - 0314 NV31 [GeForce FX 5600XT] - 1043 814a V9560XT/TD - 0316 NV31 - 0317 NV31 - 031a NV31M [GeForce FX Go 5600] - 031b NV31M [GeForce FX Go5650] - 031c NVIDIA Quadro FX 700 Go - 031d NV31 - 031e NV31 - 031f NV31 - 0320 NV34 [GeForce FX 5200] - 0321 NV34 [GeForce FX 5200 Ultra] - 0322 NV34 [GeForce FX 5200] - 1462 9171 MS-8917 (FX5200-T128) - 0323 NV34 [GeForce FX 5200LE] - 0324 NV34M [GeForce FX Go 5200] - 1071 8160 MIM2000 - 0325 NV34M [GeForce FX Go5250] - 0326 NV34 [GeForce FX 5500] - 0327 NV34 [GeForce FX 5100] - 0328 NV34M [GeForce FX Go 5200] - 0329 NV34M [GeForce FX Go5200] - 032a NV34GL [Quadro NVS 280 PCI] - 032b NV34GL [Quadro FX 500/600 PCI] - 032c NV34GLM [GeForce FX Go 5300] - 032d NV34 [GeForce FX Go5100] - 032f NV34 - 0330 NV35 [GeForce FX 5900 Ultra] - 0331 NV35 [GeForce FX 5900] - 1043 8145 V9950GE - 0332 NV35 [GeForce FX 5900XT] - 0333 NV38 [GeForce FX 5950 Ultra] - 0334 NV35 [GeForce FX 5900ZT] - 0338 NV35GL [Quadro FX 3000] - 033f NV35GL [Quadro FX 700] - 0341 NV36.1 [GeForce FX 5700 Ultra] - 0342 NV36.2 [GeForce FX 5700] - 0343 NV36 [GeForce FX 5700LE] - 0344 NV36.4 [GeForce FX 5700VE] - 0345 NV36.5 - 0347 NV36 [GeForce FX Go5700] - 0348 NV36 [GeForce FX Go5700] - 0349 NV36 - 034b NV36 - 034c NV36 [Quadro FX Go1000] - 034e NV36GL [Quadro FX 1100] - 034f NV36GL -10df Emulex Corporation - 1ae5 LP6000 Fibre Channel Host Adapter - 1ae6 LP 8000 Fibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:1-2) - 1ae7 LP 8000 Fibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:2-3) - f005 LP1150e Fibre Channel Host Adapter - f085 LP850 Fibre Channel Host Adapter - f095 LP952 Fibre Channel Host Adapter - f098 LP982 Fibre Channel Host Adapter - f0a5 LP1050 Fibre Channel Host Adapter - f0d5 LP1150 Fibre Channel Host Adapter - f100 LP11000e Fibre Channel Host Adapter - f700 LP7000 Fibre Channel Host Adapter - f701 LP 7000EFibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:1-2) - f800 LP8000 Fibre Channel Host Adapter - f801 LP 8000 Fibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:1-2) - f900 LP9000 Fibre Channel Host Adapter - f901 LP 9000 Fibre Channel Host Adapter Alternate ID (JX1:2-3, JX2:1-2) - f980 LP9802 Fibre Channel Host Adapter - f981 LP 9802 Fibre Channel Host Adapter Alternate ID - f982 LP 9802 Fibre Channel Host Adapter Alternate ID - fa00 LP10000 Fibre Channel Host Adapter - fa01 LP101 Fibre Channel Host Adapter - fd00 LP11000 Fibre Channel Host Adapter -10e0 Integrated Micro Solutions Inc. - 5026 IMS5026/27/28 - 5027 IMS5027 - 5028 IMS5028 - 8849 IMS8849 - 8853 IMS8853 - 9128 IMS9128 [Twin turbo 128] -10e1 Tekram Technology Co.,Ltd. - 0391 TRM-S1040 - 10e1 0391 DC-315U SCSI-3 Host Adapter - 690c DC-690c - dc29 DC-290 -10e2 Aptix Corporation -10e3 Tundra Semiconductor Corp. - 0000 CA91C042 [Universe] - 0860 CA91C860 [QSpan] - 0862 CA91C862A [QSpan-II] - 8260 CA91L8200B [Dual PCI PowerSpan II] - 8261 CA91L8260B [Single PCI PowerSpan II] -10e4 Tandem Computers -10e5 Micro Industries Corporation -10e6 Gainbery Computer Products Inc. -10e7 Vadem -10e8 Applied Micro Circuits Corp. - 1072 INES GPIB-PCI (AMCC5920 based) - 2011 Q-Motion Video Capture/Edit board - 4750 S5930 [Matchmaker] - 5920 S5920 - 8043 LANai4.x [Myrinet LANai interface chip] - 8062 S5933_PARASTATION - 807d S5933 [Matchmaker] - 8088 Kongsberg Spacetec Format Synchronizer - 8089 Kongsberg Spacetec Serial Output Board - 809c S5933_HEPC3 - 80d7 PCI-9112 - 80d9 PCI-9118 - 80da PCI-9812 - 811a PCI-IEEE1355-DS-DE Interface - 814c Fastcom ESCC-PCI (Commtech, Inc.) - 8170 S5933 [Matchmaker] (Chipset Development Tool) -# sold with Roper Scientifc(Photometrics) CoolSnap HQ camera - 81e6 Multimedia video controller - 8291 Fastcom 232/8-PCI (Commtech, Inc.) - 82c4 Fastcom 422/4-PCI (Commtech, Inc.) - 82c5 Fastcom 422/2-PCI (Commtech, Inc.) - 82c6 Fastcom IG422/1-PCI (Commtech, Inc.) - 82c7 Fastcom IG232/2-PCI (Commtech, Inc.) - 82ca Fastcom 232/4-PCI (Commtech, Inc.) - 82db AJA HDNTV HD SDI Framestore - 82e2 Fastcom DIO24H-PCI (Commtech, Inc.) - 8851 S5933 on Innes Corp FM Radio Capture card -10e9 Alps Electric Co., Ltd. -10ea Intergraphics Systems - 1680 IGA-1680 - 1682 IGA-1682 - 1683 IGA-1683 - 2000 CyberPro 2000 - 2010 CyberPro 2000A - 5000 CyberPro 5000 - 5050 CyberPro 5050 - 5202 CyberPro 5202 -# CyberPro5202 Audio Function - 5252 CyberPro5252 -10eb Artists Graphics - 0101 3GA - 8111 Twist3 Frame Grabber -10ec Realtek Semiconductor Co., Ltd. - 8029 RTL-8029(AS) - 10b8 2011 EZ-Card (SMC1208) - 10ec 8029 RTL-8029(AS) - 1113 1208 EN1208 - 1186 0300 DE-528 - 1259 2400 AT-2400 - 8129 RTL-8129 - 10ec 8129 RT8129 Fast Ethernet Adapter - 8138 RT8139 (B/C) Cardbus Fast Ethernet Adapter - 10ec 8138 RT8139 (B/C) Fast Ethernet Adapter - 8139 RTL-8139/8139C/8139C+ - 0357 000a TTP-Monitoring Card V2.0 - 1025 005a TravelMate 290 - 1025 8920 ALN-325 - 1025 8921 ALN-325 - 1071 8160 MIM2000 - 10bd 0320 EP-320X-R - 10ec 8139 RT8139 - 1113 ec01 FNC-0107TX - 1186 1300 DFE-538TX - 1186 1320 SN5200 - 1186 8139 DRN-32TX - 11f6 8139 FN22-3(A) LinxPRO Ethernet Adapter - 1259 2500 AT-2500TX - 1259 2503 AT-2500TX/ACPI - 1429 d010 ND010 - 1432 9130 EN-9130TX - 1436 8139 RT8139 - 1458 e000 GA-7VM400M/7VT600 Motherboard - 146c 1439 FE-1439TX - 1489 6001 GF100TXRII - 1489 6002 GF100TXRA - 149c 139a LFE-8139ATX - 149c 8139 LFE-8139TX - 14cb 0200 LNR-100 Family 10/100 Base-TX Ethernet - 1799 5000 F5D5000 PCI Card/Desktop Network PCI Card - 2646 0001 EtheRx - 8e2e 7000 KF-230TX - 8e2e 7100 KF-230TX/2 - a0a0 0007 ALN-325C - 8169 RTL-8169 Gigabit Ethernet - 1259 c107 CG-LAPCIGT - 1371 434e ProG-2000L - 1458 e000 GA-K8VT800 Pro Motherboard - 1462 702c K8T NEO 2 motherboard - 8180 RTL8180L 802.11b MAC - 8197 SmartLAN56 56K Modem -10ed Ascii Corporation - 7310 V7310 -10ee Xilinx Corporation - 3fc0 RME Digi96 - 3fc1 RME Digi96/8 - 3fc2 RME Digi96/8 Pro - 3fc3 RME Digi96/8 Pad - 3fc4 RME Digi9652 (Hammerfall) - 3fc5 RME Hammerfall DSP - 3fc6 RME Hammerfall DSP MADI - 8381 Ellips Santos Frame Grabber -10ef Racore Computer Products, Inc. - 8154 M815x Token Ring Adapter -10f0 Peritek Corporation -10f1 Tyan Computer -10f2 Achme Computer, Inc. -10f3 Alaris, Inc. -10f4 S-MOS Systems, Inc. -10f5 NKK Corporation - a001 NDR4000 [NR4600 Bridge] -10f6 Creative Electronic Systems SA -10f7 Matsushita Electric Industrial Co., Ltd. -10f8 Altos India Ltd -10f9 PC Direct -10fa Truevision - 000c TARGA 1000 -10fb Thesys Gesellschaft für Mikroelektronik mbH - 186f TH 6255 -10fc I-O Data Device, Inc. -# What's in the cardbus end of a Sony ACR-A01 card, comes with newer Vaio CD-RW drives - 0003 Cardbus IDE Controller - 0005 Cardbus SCSI CBSC II -10fd Soyo Computer, Inc -10fe Fast Multimedia AG -10ff NCube -1100 Jazz Multimedia -1101 Initio Corporation - 1060 INI-A100U2W - 9100 INI-9100/9100W - 9400 INI-940 - 9401 INI-950 - 9500 360P - 9502 Initio INI-9100UW Ultra Wide SCSI Controller INIC-950P chip -1102 Creative Labs - 0002 SB Live! EMU10k1 - 1102 0020 CT4850 SBLive! Value - 1102 0021 CT4620 SBLive! - 1102 002f SBLive! mainboard implementation - 1102 4001 E-mu APS - 1102 8022 CT4780 SBLive! Value - 1102 8023 CT4790 SoundBlaster PCI512 - 1102 8024 CT4760 SBLive! - 1102 8025 SBLive! Mainboard Implementation - 1102 8026 CT4830 SBLive! Value - 1102 8027 CT4832 SBLive! Value - 1102 8028 CT4760 SBLive! OEM version - 1102 8031 CT4831 SBLive! Value - 1102 8040 CT4760 SBLive! - 1102 8051 CT4850 SBLive! Value - 1102 8061 SBLive! Player 5.1 - 1102 8064 SB Live! 5.1 Model SB0100 - 1102 8065 SBLive! 5.1 Digital Model SB0220 - 1102 8067 SBLive! 5.1 eMicro 28028 - 0004 SB Audigy - 1102 0051 SB0090 Audigy Player - 1102 0053 SB0090 Audigy Player/OEM - 1102 0058 SB0090 Audigy Player/OEM - 1102 1007 SB0240 Audigy 2 Platinum 6.1 - 1102 2002 SB Audigy 2 ZS (SB0350) - 0006 [SB Live! Value] EMU10k1X - 0007 SB Audigy LS - 1102 1001 SB0310 Audigy LS - 1102 1002 SB0312 Audigy LS - 1102 1006 SB0410 SBLive! 24-bit - 0008 SB0400 Audigy2 Value - 4001 SB Audigy FireWire Port - 1102 0010 SB Audigy FireWire Port - 7002 SB Live! MIDI/Game Port - 1102 0020 Gameport Joystick - 7003 SB Audigy MIDI/Game port - 1102 0040 SB Audigy MIDI/Game Port - 7004 [SB Live! Value] Input device controller - 7005 SB Audigy LS MIDI/Game port - 1102 1001 SB0310 Audigy LS MIDI/Game port - 1102 1002 SB0312 Audigy LS MIDI/Game port - 8064 SB0100 [SBLive! 5.1 OEM] - 8938 Ectiva EV1938 - 1033 80e5 SlimTower-Jim (NEC) - 1071 7150 Mitac 7150 - 110a 5938 Siemens Scenic Mobile 510PIII - 13bd 100c Ceres-C (Sharp, Intel BX) - 13bd 100d Sharp, Intel Banister - 13bd 100e TwinHead P09S/P09S3 (Sharp) - 13bd f6f1 Marlin (Sharp) - 14ff 0e70 P88TE (TWINHEAD INTERNATIONAL Corp) - 14ff c401 Notebook 9100/9200/2000 (TWINHEAD INTERNATIONAL Corp) - 156d b400 G400 - Geo (AlphaTop (Taiwan)) - 156d b550 G560 (AlphaTop (Taiwan)) - 156d b560 G560 (AlphaTop (Taiwan)) - 156d b700 G700/U700 (AlphaTop (Taiwan)) - 156d b795 G795 (AlphaTop (Taiwan)) - 156d b797 G797 (AlphaTop (Taiwan)) -1103 Triones Technologies, Inc. - 0003 HPT343 - 0004 HPT366/368/370/370A/372/372N - 1103 0001 HPT370A - 1103 0003 HPT343 / HPT345 / HPT363 UDMA33 - 1103 0004 HPT366 UDMA66 (r1) / HPT368 UDMA66 (r2) / HPT370 UDMA100 (r3) / HPT370 UDMA100 RAID (r4) - 1103 0005 HPT370 UDMA100 - 1103 0006 HPT302 - 1103 0007 HPT371 UDMA133 - 1103 0008 HPT374 UDMA/ATA133 RAID Controller - 0005 HPT372A/372N - 0006 HPT302 - 0007 HPT371/371N - 0008 HPT374 - 0009 HPT372N -1104 RasterOps Corp. -1105 Sigma Designs, Inc. - 1105 REALmagic Xcard MPEG 1/2/3/4 DVD Decoder - 8300 REALmagic Hollywood Plus DVD Decoder - 8400 EM840x REALmagic DVD/MPEG-2 Audio/Video Decoder - 8401 EM8401 REALmagic DVD/MPEG-2 A/V Decoder - 8470 EM8470 REALmagic DVD/MPEG-4 A/V Decoder - 8471 EM8471 REALmagic DVD/MPEG-4 A/V Decoder - 8475 EM8475 REALmagic DVD/MPEG-4 A/V Decoder - 8476 EM8476 REALmagic DVD/MPEG-4 A/V Decoder - 8485 EM8485 REALmagic DVD/MPEG-4 A/V Decoder - 8486 EM8486 REALmagic DVD/MPEG-4 A/V Decoder -1106 VIA Technologies, Inc. - 0102 Embedded VIA Ethernet Controller - 0130 VT6305 1394.A Controller - 0305 VT8363/8365 [KT133/KM133] - 1043 8033 A7V Mainboard - 1043 803e A7V-E Mainboard - 1043 8042 A7V133/A7V133-C Mainboard - 147b a401 KT7/KT7-RAID/KT7A/KT7A-RAID Mainboard - 0391 VT8371 [KX133] - 0501 VT8501 [Apollo MVP4] - 0505 VT82C505 -# Shares chip with :0576. The VT82C576M has :1571 instead of :0561. - 0561 VT82C576MV - 0571 VT82C586A/B/VT82C686/A/B/VT823x/A/C PIPC Bus Master IDE - 1019 0985 P6VXA Motherboard - 1019 0a81 L7VTA v1.0 Motherboard (KT400-8235) - 1043 8052 VT8233A Bus Master ATA100/66/33 IDE - 1043 808c A7V8X motherboard - 1043 80a1 A7V8X-X motherboard rev. 1.01 - 1043 80ed A7V600 motherboard - 1106 0571 VT82C586/B/VT82C686/A/B/VT8233/A/C/VT8235 PIPC Bus Master IDE - 1179 0001 Magnia Z310 - 1297 f641 FX41 motherboard - 1458 5002 GA-7VAX Mainboard - 1462 7020 K8T NEO 2 motherboard - 147b 1407 KV8-MAX3 motherboard - 1849 0571 K7VT2 motherboard - 0576 VT82C576 3V [Apollo Master] - 0585 VT82C585VP [Apollo VP1/VPX] - 0586 VT82C586/A/B PCI-to-ISA [Apollo VP] - 1106 0000 MVP3 ISA Bridge - 0595 VT82C595 [Apollo VP2] - 0596 VT82C596 ISA [Mobile South] - 1106 0000 VT82C596/A/B PCI to ISA Bridge - 1458 0596 VT82C596/A/B PCI to ISA Bridge - 0597 VT82C597 [Apollo VP3] - 0598 VT82C598 [Apollo MVP3] - 0601 VT8601 [Apollo ProMedia] - 0605 VT8605 [ProSavage PM133] - 1043 802c CUV4X mainboard - 0680 VT82C680 [Apollo P6] - 0686 VT82C686 [Apollo Super South] - 1019 0985 P6VXA Motherboard - 1043 802c CUV4X mainboard - 1043 8033 A7V Mainboard - 1043 803e A7V-E Mainboard - 1043 8040 A7M266 Mainboard - 1043 8042 A7V133/A7V133-C Mainboard - 1106 0000 VT82C686/A PCI to ISA Bridge - 1106 0686 VT82C686/A PCI to ISA Bridge - 1179 0001 Magnia Z310 - 147b a702 KG7-Lite Mainboard - 0691 VT82C693A/694x [Apollo PRO133x] - 1019 0985 P6VXA Motherboard - 1179 0001 Magnia Z310 - 1458 0691 VT82C691 Apollo Pro System Controller - 0693 VT82C693 [Apollo Pro Plus] - 0698 VT82C693A [Apollo Pro133 AGP] - 0926 VT82C926 [Amazon] - 1000 VT82C570MV - 1106 VT82C570MV - 1571 VT82C576M/VT82C586 - 1595 VT82C595/97 [Apollo VP2/97] - 3022 CLE266 -# This is *not* USB 2.0 as the existing entry suggests - 3038 VT82xxxxx UHCI USB 1.1 Controller - 0925 1234 USB Controller - 1019 0985 P6VXA Motherboard - 1019 0a81 L7VTA v1.0 Motherboard (KT400-8235) - 1043 808c VT6202 USB2.0 4 port controller - 1043 80a1 A7V8X-X motherboard - 1043 80ed A7V600 motherboard - 1179 0001 Magnia Z310 - 1458 5004 GA-7VAX Mainboard - 1462 7020 K8T NEO 2 motherboard - 147b 1407 KV8-MAX3 motherboard - 182d 201d CN-029 USB2.0 4 port PCI Card - 3040 VT82C586B ACPI - 3043 VT86C100A [Rhine] - 10bd 0000 VT86C100A Fast Ethernet Adapter - 1106 0100 VT86C100A Fast Ethernet Adapter - 1186 1400 DFE-530TX rev A - 3044 IEEE 1394 Host Controller - 1025 005a TravelMate 290 - 1458 1000 GA-7VT600-1394 Motherboard - 1462 702d K8T NEO 2 motherboard - 3050 VT82C596 Power Management - 3051 VT82C596 Power Management - 3053 VT6105M [Rhine-III] - 3057 VT82C686 [Apollo Super ACPI] - 1019 0985 P6VXA Motherboard - 1043 8033 A7V Mainboard - 1043 803e A7V-E Mainboard - 1043 8040 A7M266 Mainboard - 1043 8042 A7V133/A7V133-C Mainboard - 1179 0001 Magnia Z310 - 3058 VT82C686 AC97 Audio Controller - 0e11 0097 SoundMax Digital Integrated Audio - 0e11 b194 Soundmax integrated digital audio - 1019 0985 P6VXA Motherboard - 1043 1106 A7V133/A7V133-C Mainboard - 1106 4511 Onboard Audio on EP7KXA - 1458 7600 Onboard Audio - 1462 3091 MS-6309 Onboard Audio - 1462 3300 MS-6330 Onboard Audio - 15dd 7609 Onboard Audio - 3059 VT8233/A/8235/8237 AC97 Audio Controller - 1019 0a81 L7VTA v1.0 Motherboard (KT400-8235) - 1043 8095 A7V8X Motherboard (Realtek ALC650 codec) - 1043 80a1 A7V8X-X Motherboard - 1043 80b0 A7V600/K8V Deluxe motherboard (ADI AD1980 codec [SoundMAX]) - 1106 3059 L7VMM2 Motherboard - 1106 4161 K7VT2 motherboard - 1297 c160 FX41 motherboard (Realtek ALC650 codec) - 1458 a002 GA-7VAX Onboard Audio (Realtek ALC650) - 1462 0080 K8T NEO 2 motherboard - 1462 3800 KT266 onboard audio - 147b 1407 KV8-MAX3 motherboard - 3065 VT6102 [Rhine-II] - 1043 80a1 A7V8X-X Motherboard - 1106 0102 VT6102 [Rhine II] Embeded Ethernet Controller on VT8235 - 1186 1400 DFE-530TX rev A - 1186 1401 DFE-530TX rev B - 13b9 1421 LD-10/100AL PCI Fast Ethernet Adapter (rev.B) -# This hosts more than just the Intel 537 codec, it also hosts PCtel (SIL33) and SmartLink (SIL34) codecs - 3068 AC'97 Modem Controller - 1462 309e MS-6309 Saturn Motherboard - 3074 VT8233 PCI to ISA Bridge - 1043 8052 VT8233A - 3091 VT8633 [Apollo Pro266] - 3099 VT8366/A/7 [Apollo KT266/A/333] - 1043 8064 A7V266-E Mainboard - 1043 807f A7V333 Mainboard - 1849 3099 K7VT2 motherboard - 3101 VT8653 Host Bridge - 3102 VT8662 Host Bridge - 3103 VT8615 Host Bridge - 3104 USB 2.0 - 1019 0a81 L7VTA v1.0 Motherboard (KT400-8235) - 1043 808c A7V8X motherboard - 1043 80a1 A7V8X-X motherboard rev 1.01 - 1043 80ed A7V600 motherboard - 1297 f641 FX41 motherboard - 1458 5004 GA-7VAX Mainboard - 1462 7020 K8T NEO 2 motherboard - 147b 1407 KV8-MAX3 motherboard - 182d 201d CN-029 USB 2.0 4 port PCI Card - 3106 VT6105 [Rhine-III] - 1186 1403 DFE-530TX rev C - 3108 S3 Unichrome Pro VGA Adapter - 3109 VT8233C PCI to ISA Bridge - 3112 VT8361 [KLE133] Host Bridge - 3116 VT8375 [KM266/KL266] Host Bridge - 1297 f641 FX41 motherboard - 3118 S3 Unichrome Pro VGA Adapter - 3119 VT6120/VT6121/VT6122 Gigabit Ethernet Adapter -# found on EPIA M6000/9000 mainboard - 3122 VT8623 [Apollo CLE266] integrated CastleRock graphics -# found on EPIA M6000/9000 mainboard - 3123 VT8623 [Apollo CLE266] - 3128 VT8753 [P4X266 AGP] - 3133 VT3133 Host Bridge - 3147 VT8233A ISA Bridge - 3148 P4M266 Host Bridge - 3149 VIA VT6420 SATA RAID Controller - 1043 80ed A7V600/K8V Deluxe motherboard - 1458 b003 GA-7VM400AM(F) Motherboard - 1462 7020 K8T Neo 2 Motherboard - 147b 1407 KV8-MAX3 motherboard - 3156 P/KN266 Host Bridge -# on ASUS P4P800 - 3164 VT6410 ATA133 RAID controller - 3168 VT8374 P4X400 Host Controller/AGP Bridge - 3177 VT8235 ISA Bridge - 1019 0a81 L7VTA v1.0 Motherboard (KT400-8235) - 1043 808c A7V8X motherboard - 1043 80a1 A7V8X-X motherboard - 1297 f641 FX41 motherboard - 1458 5001 GA-7VAX Mainboard - 1849 3177 K7VT2 motherboard - 3178 ProSavageDDR P4N333 Host Bridge - 3188 VT8385 [K8T800 AGP] Host Bridge - 1043 80a3 K8V Deluxe motherboard - 147b 1407 KV8-MAX3 motherboard - 3189 VT8377 [KT400/KT600 AGP] Host Bridge - 1043 807f A7V8X motherboard - 1458 5000 GA-7VAX Mainboard - 3204 K8M800 - 3205 VT8378 [KM400/A] Chipset Host Bridge - 1458 5000 GA-7VM400M Motherboard - 3218 K8T800M Host Bridge - 3227 VT8237 ISA bridge [KT600/K8T800 South] - 1043 80ed A7V600 motherboard - 1106 3227 DFI KT600-AL Motherboard - 1458 5001 GA-7VT600 Motherboard - 147b 1407 KV8-MAX3 motherboard - 3249 VT6421 IDE RAID Controller - 4149 VIA VT6420 (ATA133) Controller - 5030 VT82C596 ACPI [Apollo PRO] - 6100 VT85C100A [Rhine II] - 7204 K8M800 -# S3 Graphics UniChromeâ„¢ 2D/3D Graphics with motion compensation - 7205 VT8378 [S3 UniChrome] Integrated Video - 1458 d000 Gigabyte GA-7VM400(A)M(F) Motherboard - 8231 VT8231 [PCI-to-ISA Bridge] - 8235 VT8235 ACPI - 8305 VT8363/8365 [KT133/KM133 AGP] - 8391 VT8371 [KX133 AGP] - 8501 VT8501 [Apollo MVP4 AGP] - 8596 VT82C596 [Apollo PRO AGP] - 8597 VT82C597 [Apollo VP3 AGP] - 8598 VT82C598/694x [Apollo MVP3/Pro133x AGP] - 1019 0985 P6VXA Motherboard - 8601 VT8601 [Apollo ProMedia AGP] - 8605 VT8605 [PM133 AGP] - 8691 VT82C691 [Apollo Pro] - 8693 VT82C693 [Apollo Pro Plus] PCI Bridge - b091 VT8633 [Apollo Pro266 AGP] - b099 VT8366/A/7 [Apollo KT266/A/333 AGP] - b101 VT8653 AGP Bridge - b102 VT8362 AGP Bridge - b103 VT8615 AGP Bridge - b112 VT8361 [KLE133] AGP Bridge - b168 VT8235 PCI Bridge - b188 VT8237 PCI bridge [K8T800 South] - 147b 1407 KV8-MAX3 motherboard - b198 VT8237 PCI Bridge -# 32-Bit PCI bus master Ethernet MAC with standard MII interface - d104 VT8237 Integrated Fast Ethernet Controller -1107 Stratus Computers - 0576 VIA VT82C570MV [Apollo] (Wrong vendor ID!) -1108 Proteon, Inc. - 0100 p1690plus_AA - 0101 p1690plus_AB - 0105 P1690Plus - 0108 P1690Plus - 0138 P1690Plus - 0139 P1690Plus - 013c P1690Plus - 013d P1690Plus -1109 Cogent Data Technologies, Inc. - 1400 EM110TX [EX110TX] -110a Siemens Nixdorf AG - 0002 Pirahna 2-port - 0005 Tulip controller, power management, switch extender - 0006 FSC PINC (I/O-APIC) - 0015 FSC Multiprocessor Interrupt Controller - 001d FSC Copernicus Management Controller - 007b FSC Remote Service Controller, mailbox device - 007c FSC Remote Service Controller, shared memory device - 007d FSC Remote Service Controller, SMIC device -# Superfastcom-PCI (Commtech, Inc.) or DSCC4 WAN Adapter - 2102 DSCC4 PEB/PEF 20534 DMA Supported Serial Communication Controller with 4 Channels - 2104 Eicon Diva 2.02 compatible passive ISDN card - 3142 SIMATIC NET CP 5613A1 (Profibus Adapter) - 4021 SIMATIC NET CP 5512 (Profibus and MPI Cardbus Adapter) - 4029 SIMATIC NET CP 5613A2 (Profibus Adapter) - 4942 FPGA I-Bus Tracer for MBD - 6120 SZB6120 -110b Chromatic Research Inc. - 0001 Mpact Media Processor - 0004 Mpact 2 -110c Mini-Max Technology, Inc. -110d Znyx Advanced Systems -110e CPU Technology -110f Ross Technology -1110 Powerhouse Systems - 6037 Firepower Powerized SMP I/O ASIC - 6073 Firepower Powerized SMP I/O ASIC -1111 Santa Cruz Operation -# Also claimed to be RNS or Rockwell International, current PCISIG records list Osicom -1112 Osicom Technologies Inc - 2200 FDDI Adapter - 2300 Fast Ethernet Adapter - 2340 4 Port Fast Ethernet Adapter - 2400 ATM Adapter -1113 Accton Technology Corporation - 1211 SMC2-1211TX - 103c 1207 EN-1207D Fast Ethernet Adapter - 1113 1211 EN-1207D Fast Ethernet Adapter - 1216 EN-1216 Ethernet Adapter - 1113 2242 EN2242 10/100 Ethernet Mini-PCI Card - 111a 1020 SpeedStream 1020 PCI 10/100 Ethernet Adaptor [EN-1207F-TX ?] - 1217 EN-1217 Ethernet Adapter - 5105 10Mbps Network card - 9211 EN-1207D Fast Ethernet Adapter - 1113 9211 EN-1207D Fast Ethernet Adapter - 9511 21x4x DEC-Tulip compatible Fast Ethernet - d301 CPWNA100 (Philips wireless PCMCIA) - ec02 SMC 1244TX v3 -1114 Atmel Corporation - 0506 802.11b Wireless Network Adaptor (at76c506) -1115 3D Labs -1116 Data Translation - 0022 DT3001 - 0023 DT3002 - 0024 DT3003 - 0025 DT3004 - 0026 DT3005 - 0027 DT3001-PGL - 0028 DT3003-PGL -1117 Datacube, Inc - 9500 Max-1C SVGA card - 9501 Max-1C image processing -1118 Berg Electronics -1119 ICP Vortex Computersysteme GmbH - 0000 GDT 6000/6020/6050 - 0001 GDT 6000B/6010 - 0002 GDT 6110/6510 - 0003 GDT 6120/6520 - 0004 GDT 6530 - 0005 GDT 6550 - 0006 GDT 6117/6517 - 0007 GDT 6127/6527 - 0008 GDT 6537 - 0009 GDT 6557/6557-ECC - 000a GDT 6115/6515 - 000b GDT 6125/6525 - 000c GDT 6535 - 000d GDT 6555 - 0010 GDT 6115/6515 - 0011 GDT 6125/6525 - 0012 GDT 6535 - 0013 GDT 6555/6555-ECC - 0100 GDT 6117RP/6517RP - 0101 GDT 6127RP/6527RP - 0102 GDT 6537RP - 0103 GDT 6557RP - 0104 GDT 6111RP/6511RP - 0105 GDT 6121RP/6521RP - 0110 GDT 6117RD/6517RD - 0111 GDT 6127RD/6527RD - 0112 GDT 6537RD - 0113 GDT 6557RD - 0114 GDT 6111RD/6511RD - 0115 GDT 6121RD/6521RD - 0118 GDT 6118RD/6518RD/6618RD - 0119 GDT 6128RD/6528RD/6628RD - 011a GDT 6538RD/6638RD - 011b GDT 6558RD/6658RD - 0120 GDT 6117RP2/6517RP2 - 0121 GDT 6127RP2/6527RP2 - 0122 GDT 6537RP2 - 0123 GDT 6557RP2 - 0124 GDT 6111RP2/6511RP2 - 0125 GDT 6121RP2/6521RP2 - 0136 GDT 6113RS/6513RS - 0137 GDT 6123RS/6523RS - 0138 GDT 6118RS/6518RS/6618RS - 0139 GDT 6128RS/6528RS/6628RS - 013a GDT 6538RS/6638RS - 013b GDT 6558RS/6658RS - 013c GDT 6533RS/6633RS - 013d GDT 6543RS/6643RS - 013e GDT 6553RS/6653RS - 013f GDT 6563RS/6663RS - 0166 GDT 7113RN/7513RN/7613RN - 0167 GDT 7123RN/7523RN/7623RN - 0168 GDT 7118RN/7518RN/7518RN - 0169 GDT 7128RN/7528RN/7628RN - 016a GDT 7538RN/7638RN - 016b GDT 7558RN/7658RN - 016c GDT 7533RN/7633RN - 016d GDT 7543RN/7643RN - 016e GDT 7553RN/7653RN - 016f GDT 7563RN/7663RN - 01d6 GDT 4x13RZ - 01d7 GDT 4x23RZ - 01f6 GDT 8x13RZ - 01f7 GDT 8x23RZ - 01fc GDT 8x33RZ - 01fd GDT 8x43RZ - 01fe GDT 8x53RZ - 01ff GDT 8x63RZ - 0210 GDT 6519RD/6619RD - 0211 GDT 6529RD/6629RD - 0260 GDT 7519RN/7619RN - 0261 GDT 7529RN/7629RN - 02ff GDT MAXRP - 0300 GDT NEWRX -111a Efficient Networks, Inc - 0000 155P-MF1 (FPGA) - 0002 155P-MF1 (ASIC) - 0003 ENI-25P ATM - 111a 0000 ENI-25p Miniport ATM Adapter - 0005 SpeedStream (LANAI) - 111a 0001 ENI-3010 ATM - 111a 0009 ENI-3060 ADSL (VPI=0) - 111a 0101 ENI-3010 ATM - 111a 0109 ENI-3060CO ADSL (VPI=0) - 111a 0809 ENI-3060 ADSL (VPI=0 or 8) - 111a 0909 ENI-3060CO ADSL (VPI=0 or 8) - 111a 0a09 ENI-3060 ADSL (VPI=<0..15>) - 0007 SpeedStream ADSL - 111a 1001 ENI-3061 ADSL [ASIC] - 1203 SpeedStream 1023 Wireless PCI Adapter -111b Teledyne Electronic Systems -111c Tricord Systems Inc. - 0001 Powerbis Bridge -111d Integrated Device Technology, Inc. - 0001 IDT77201/77211 155Mbps ATM SAR Controller [NICStAR] - 0003 IDT77222/77252 155Mbps ATM MICRO ABR SAR Controller - 0004 IDT77V252 155Mbps ATM MICRO ABR SAR Controller - 0005 IDT77V222 155Mbps ATM MICRO ABR SAR Controller -111e Eldec -111f Precision Digital Images - 4a47 Precision MX Video engine interface - 5243 Frame capture bus interface -1120 EMC Corporation -1121 Zilog -1122 Multi-tech Systems, Inc. -1123 Excellent Design, Inc. -1124 Leutron Vision AG -1125 Eurocore -1126 Vigra -1127 FORE Systems Inc - 0200 ForeRunner PCA-200 ATM - 0210 PCA-200PC - 0250 ATM - 0300 ForeRunner PCA-200EPC ATM - 0310 ATM - 0400 ForeRunnerHE ATM Adapter - 1127 0400 ForeRunnerHE ATM -1129 Firmworks -112a Hermes Electronics Company, Ltd. -112b Linotype - Hell AG -112c Zenith Data Systems -112d Ravicad -112e Infomedia Microelectronics Inc. -112f Imaging Technology Inc - 0000 MVC IC-PCI - 0001 MVC IM-PCI Video frame grabber/processor -1130 Computervision -1131 Philips Semiconductors - 1561 USB 1.1 Host Controller - 1562 USB 2.0 Host Controller - 3400 SmartPCI56(UCB1500) 56K Modem - 5400 TriMedia TM1000/1100 - 5402 TriMedia TM-1300 - 1244 0f00 Fritz!Card DSL - 7130 SAA7130 Video Broadcast Decoder - 5168 0138 LiveView FlyVideo 2000 - 7133 SAA713X Audio+video broadcast decoder - 5168 0138 LifeView FlyVideo 3000 - 5168 0212 LifeView FlyTV Platinum mini - 5168 0502 LifeView FlyDVB-T Duo CardBus -# PCI audio and video broadcast decoder (http://www.semiconductors.philips.com/pip/saa7134hl) - 7134 SAA7134 - 1043 4842 TV-FM Card 7134 - 7135 SAA7135 Audio+video broadcast decoder - 7145 SAA7145 - 7146 SAA7146 - 110a 0000 Fujitsu/Siemens DVB-C card rev1.5 - 110a ffff Fujitsu/Siemens DVB-C card rev1.5 - 1131 4f56 KNC1 DVB-S Budget - 1131 4f61 Fujitsu-Siemens Activy DVB-S Budget - 114b 2003 DVRaptor Video Edit/Capture Card - 11bd 0006 DV500 Overlay - 11bd 000a DV500 Overlay - 11bd 000f DV500 Overlay - 13c2 0000 Siemens/Technotrend/Hauppauge DVB card rev1.3 or rev1.5 - 13c2 0001 Technotrend/Hauppauge DVB card rev1.3 or rev1.6 - 13c2 0002 Technotrend/Hauppauge DVB card rev2.1 - 13c2 0003 Technotrend/Hauppauge DVB card rev2.1 - 13c2 0004 Technotrend/Hauppauge DVB card rev2.1 - 13c2 0006 Technotrend/Hauppauge DVB card rev1.3 or rev1.6 - 13c2 0008 Technotrend/Hauppauge DVB-T - 13c2 000a Octal/Technotrend DVB-C for iTV - 13c2 1003 Technotrend-Budget / Hauppauge WinTV-NOVA-S DVB card - 13c2 1004 Technotrend-Budget / Hauppauge WinTV-NOVA-C DVB card - 13c2 1005 Technotrend-Budget / Hauppauge WinTV-NOVA-T DVB card - 13c2 100c Technotrend-Budget / Hauppauge WinTV-NOVA-CI DVB card - 13c2 100f Technotrend-Budget / Hauppauge WinTV-NOVA-CI DVB card - 13c2 1011 Technotrend-Budget / Hauppauge WinTV-NOVA-T DVB card - 13c2 1013 SATELCO Multimedia DVB - 13c2 1102 Technotrend/Hauppauge DVB card rev2.1 -1132 Mitel Corp. -# This is the new official company name. See disclaimer on www.eicon.com for details! -1133 Eicon Networks Corporation - 7901 EiconCard S90 - 7902 EiconCard S90 - 7911 EiconCard S91 - 7912 EiconCard S91 - 7941 EiconCard S94 - 7942 EiconCard S94 - 7943 EiconCard S94 - 7944 EiconCard S94 - b921 EiconCard P92 - b922 EiconCard P92 - b923 EiconCard P92 - e001 Diva Pro 2.0 S/T - e002 Diva 2.0 S/T PCI - e003 Diva Pro 2.0 U - e004 Diva 2.0 U PCI - e005 Diva 2.01 S/T PCI - e006 Diva CT S/T PCI - e007 Diva CT U PCI - e008 Diva CT Lite S/T PCI - e009 Diva CT Lite U PCI - e00a Diva ISDN+V.90 PCI - e00b Diva 2.02 PCI S/T - e00c Diva 2.02 PCI U - e00d Diva ISDN Pro 3.0 PCI - e00e Diva ISDN+CT S/T PCI Rev 2 - e010 Diva Server BRI-2M PCI - 110a 0021 Fujitsu Siemens ISDN S0 - 8001 0014 Diva Server BRI-2M PCI Cornet NQ - e011 Diva Server BRI S/T Rev 2 - e012 Diva Server 4BRI-8M PCI - 8001 0014 Diva Server 4BRI-8M PCI Cornet NQ - e013 Diva Server 4BRI Rev 2 - 1133 1300 Diva Server V-4BRI-8 - 1133 e013 Diva Server 4BRI-8M 2.0 PCI - 8001 0014 Diva Server 4BRI-8M 2.0 PCI Cornet NQ - e014 Diva Server PRI-30M PCI - 0008 0100 Diva Server PRI-30M PCI - 8001 0014 Diva Server PRI-30M PCI Cornet NQ - e015 DIVA Server PRI Rev 2 - 1133 e015 Diva Server PRI 2.0 PCI - 8001 0014 Diva Server PRI 2.0 PCI Cornet NQ - e016 Diva Server Voice 4BRI PCI - 8001 0014 Diva Server PRI Cornet NQ - e017 Diva Server Voice 4BRI Rev 2 - 1133 e017 Diva Server Voice 4BRI-8M 2.0 PCI - 8001 0014 Diva Server Voice 4BRI-8M 2.0 PCI Cornet NQ - e018 Diva Server BRI-2M 2.0 PCI - 1133 1800 Diva Server V-BRI-2 - 1133 e018 Diva Server BRI-2M 2.0 PCI - 8001 0014 Diva Server BRI-2M 2.0 PCI Cornet NQ - e019 Diva Server Voice PRI Rev 2 - 1133 e019 Diva Server Voice PRI 2.0 PCI - 8001 0014 Diva Server Voice PRI 2.0 PCI Cornet NQ - e01a Diva Server 2FX - e01b Diva Server Voice BRI-2M 2.0 PCI - 1133 e01b Diva Server Voice BRI-2M 2.0 PCI - 8001 0014 Diva Server Voice BRI-2M 2.0 PCI Cornet NQ - e01c Diva Server PRI Rev 3 - 1133 1c01 Diva Server PRI/E1/T1-8 - 1133 1c02 Diva Server PRI/T1-24 - 1133 1c03 Diva Server PRI/E1-30 - 1133 1c04 Diva Server PRI/E1/T1 - 1133 1c05 Diva Server V-PRI/T1-24 - 1133 1c06 Diva Server V-PRI/E1-30 - 1133 1c07 Diva Server PRI/E1/T1-8 Cornet NQ - 1133 1c08 Diva Server PRI/T1-24 Cornet NQ - 1133 1c09 Diva Server PRI/E1-30 Cornet NQ - 1133 1c0a Diva Server PRI/E1/T1 Cornet NQ - 1133 1c0b Diva Server V-PRI/T1-24 Cornet NQ - 1133 1c0c Diva Server V-PRI/E1-30 Cornet NQ - e01e Diva Server 2PRI - 1133 1e00 Diva Server V-2PRI/E1-60 - 1133 1e01 Diva Server V-2PRI/T1-48 - 1133 1e02 Diva Server 2PRI/E1-60 - 1133 1e03 Diva Server 2PRI/T1-48 - e020 Diva Server 4PRI - 1133 2000 Diva Server V-4PRI/E1-120 - 1133 2001 Diva Server V-4PRI/T1-96 - 1133 2002 Diva Server 4PRI/E1-120 - 1133 2003 Diva Server 4PRI/T1-96 - e024 Diva Server Analog-4P - 1133 2400 Diva Server V-Analog-4P - 1133 e024 Diva Server Analog-4P - e028 Diva Server Analog-8P - 1133 2800 Diva Server V-Analog-8P - 1133 e028 Diva Server Analog-8P -1134 Mercury Computer Systems - 0001 Raceway Bridge - 0002 Dual PCI to RapidIO Bridge -1135 Fuji Xerox Co Ltd - 0001 Printer controller -1136 Momentum Data Systems -1137 Cisco Systems Inc -1138 Ziatech Corporation - 8905 8905 [STD 32 Bridge] -1139 Dynamic Pictures, Inc - 0001 VGA Compatable 3D Graphics -113a FWB Inc -113b Network Computing Devices -113c Cyclone Microsystems, Inc. - 0000 PCI-9060 i960 Bridge - 0001 PCI-SDK [PCI i960 Evaluation Platform] - 0911 PCI-911 [i960Jx-based Intelligent I/O Controller] - 0912 PCI-912 [i960CF-based Intelligent I/O Controller] - 0913 PCI-913 - 0914 PCI-914 [I/O Controller w/ secondary PCI bus] -113d Leading Edge Products Inc -113e Sanyo Electric Co - Computer Engineering Dept -113f Equinox Systems, Inc. - 0808 SST-64P Adapter - 1010 SST-128P Adapter - 80c0 SST-16P DB Adapter - 80c4 SST-16P RJ Adapter - 80c8 SST-16P Adapter - 8888 SST-4P Adapter - 9090 SST-8P Adapter -1140 Intervoice Inc -1141 Crest Microsystem Inc -1142 Alliance Semiconductor Corporation - 3210 AP6410 - 6422 ProVideo 6422 - 6424 ProVideo 6424 - 6425 ProMotion AT25 - 643d ProMotion AT3D -1143 NetPower, Inc -1144 Cincinnati Milacron - 0001 Noservo controller -1145 Workbit Corporation - 8007 NinjaSCSI-32 Workbit - f007 NinjaSCSI-32 KME - f010 NinjaSCSI-32 Workbit - f012 NinjaSCSI-32 Logitec - f013 NinjaSCSI-32 Logitec - f015 NinjaSCSI-32 Melco -1146 Force Computers -1147 Interface Corp -# Formerly (Schneider & Koch) -1148 SysKonnect - 4000 FDDI Adapter - 0e11 b03b Netelligent 100 FDDI DAS Fibre SC - 0e11 b03c Netelligent 100 FDDI SAS Fibre SC - 0e11 b03d Netelligent 100 FDDI DAS UTP - 0e11 b03e Netelligent 100 FDDI SAS UTP - 0e11 b03f Netelligent 100 FDDI SAS Fibre MIC - 1148 5521 FDDI SK-5521 (SK-NET FDDI-UP) - 1148 5522 FDDI SK-5522 (SK-NET FDDI-UP DAS) - 1148 5541 FDDI SK-5541 (SK-NET FDDI-FP) - 1148 5543 FDDI SK-5543 (SK-NET FDDI-LP) - 1148 5544 FDDI SK-5544 (SK-NET FDDI-LP DAS) - 1148 5821 FDDI SK-5821 (SK-NET FDDI-UP64) - 1148 5822 FDDI SK-5822 (SK-NET FDDI-UP64 DAS) - 1148 5841 FDDI SK-5841 (SK-NET FDDI-FP64) - 1148 5843 FDDI SK-5843 (SK-NET FDDI-LP64) - 1148 5844 FDDI SK-5844 (SK-NET FDDI-LP64 DAS) - 4200 Token Ring adapter - 4300 SK-98xx Gigabit Ethernet Server Adapter - 1148 9821 SK-9821 Gigabit Ethernet Server Adapter (SK-NET GE-T) - 1148 9822 SK-9822 Gigabit Ethernet Server Adapter (SK-NET GE-T dual link) - 1148 9841 SK-9841 Gigabit Ethernet Server Adapter (SK-NET GE-LX) - 1148 9842 SK-9842 Gigabit Ethernet Server Adapter (SK-NET GE-LX dual link) - 1148 9843 SK-9843 Gigabit Ethernet Server Adapter (SK-NET GE-SX) - 1148 9844 SK-9844 Gigabit Ethernet Server Adapter (SK-NET GE-SX dual link) - 1148 9861 SK-9861 Gigabit Ethernet Server Adapter (SK-NET GE-SX Volition) - 1148 9862 SK-9862 Gigabit Ethernet Server Adapter (SK-NET GE-SX Volition dual link) - 1148 9871 SK-9871 Gigabit Ethernet Server Adapter (SK-NET GE-ZX) - 1148 9872 SK-9872 Gigabit Ethernet Server Adapter (SK-NET GE-ZX dual link) - 1259 2970 AT-2970SX Gigabit Ethernet Adapter - 1259 2971 AT-2970LX Gigabit Ethernet Adapter - 1259 2972 AT-2970TX Gigabit Ethernet Adapter - 1259 2973 AT-2971SX Gigabit Ethernet Adapter - 1259 2974 AT-2971T Gigabit Ethernet Adapter - 1259 2975 AT-2970SX/2SC Gigabit Ethernet Adapter - 1259 2976 AT-2970LX/2SC Gigabit Ethernet Adapter - 1259 2977 AT-2970TX/2TX Gigabit Ethernet Adapter - 4320 SK-98xx V2.0 Gigabit Ethernet Adapter - 1148 0121 Marvell RDK-8001 Adapter - 1148 0221 Marvell RDK-8002 Adapter - 1148 0321 Marvell RDK-8003 Adapter - 1148 0421 Marvell RDK-8004 Adapter - 1148 0621 Marvell RDK-8006 Adapter - 1148 0721 Marvell RDK-8007 Adapter - 1148 0821 Marvell RDK-8008 Adapter - 1148 0921 Marvell RDK-8009 Adapter - 1148 1121 Marvell RDK-8011 Adapter - 1148 1221 Marvell RDK-8012 Adapter - 1148 3221 SK-9521 V2.0 10/100/1000Base-T Adapter - 1148 5021 SK-9821 V2.0 Gigabit Ethernet 10/100/1000Base-T Adapter - 1148 5041 SK-9841 V2.0 Gigabit Ethernet 1000Base-LX Adapter - 1148 5043 SK-9843 V2.0 Gigabit Ethernet 1000Base-SX Adapter - 1148 5051 SK-9851 V2.0 Gigabit Ethernet 1000Base-SX Adapter - 1148 5061 SK-9861 V2.0 Gigabit Ethernet 1000Base-SX Adapter - 1148 5071 SK-9871 V2.0 Gigabit Ethernet 1000Base-ZX Adapter - 1148 9521 SK-9521 10/100/1000Base-T Adapter - 4400 SK-9Dxx Gigabit Ethernet Adapter - 4500 SK-9Mxx Gigabit Ethernet Adapter - 9000 SK-9Sxx Gigabit Ethernet Server Adapter PCI-X - 9843 [Fujitsu] Gigabit Ethernet - 9e00 SK-9Exx 10/100/1000Base-T Adapter - 1148 2100 SK-9E21 Server Adapter - 1148 21d0 SK-9E21D 10/100/1000Base-T Adapter - 1148 2200 SK-9E22 Server Adapter - 1148 8100 SK-9E81 Server Adapter - 1148 8200 SK-9E82 Server Adapter - 1148 9100 SK-9E91 Server Adapter - 1148 9200 SK-9E92 Server Adapter -1149 Win System Corporation -114a VMIC - 5579 VMIPCI-5579 (Reflective Memory Card) - 5587 VMIPCI-5587 (Reflective Memory Card) - 6504 VMIC PCI 7755 FPGA - 7587 VMIVME-7587 -114b Canopus Co., Ltd -114c Annabooks -114d IC Corporation -114e Nikon Systems Inc -114f Digi International - 0002 AccelePort EPC - 0003 RightSwitch SE-6 - 0004 AccelePort Xem - 0005 AccelePort Xr - 0006 AccelePort Xr,C/X - 0009 AccelePort Xr/J - 000a AccelePort EPC/J - 000c DataFirePRIme T1 (1-port) - 000d SyncPort 2-Port (x.25/FR) - 0011 AccelePort 8r EIA-232 (IBM) - 0012 AccelePort 8r EIA-422 - 0013 AccelePort Xr - 0014 AccelePort 8r EIA-422 - 0015 AccelePort Xem - 0016 AccelePort EPC/X - 0017 AccelePort C/X - 001a DataFirePRIme E1 (1-port) - 001b AccelePort C/X (IBM) - 001d DataFire RAS T1/E1/PRI - 114f 0050 DataFire RAS E1 Adapter - 114f 0051 DataFire RAS Dual E1 Adapter - 114f 0052 DataFire RAS T1 Adapter - 114f 0053 DataFire RAS Dual T1 Adapter - 0023 AccelePort RAS - 0024 DataFire RAS B4 ST/U - 114f 0030 DataFire RAS BRI U Adapter - 114f 0031 DataFire RAS BRI S/T Adapter - 0026 AccelePort 4r 920 - 0027 AccelePort Xr 920 - 0028 ClassicBoard 4 - 0029 ClassicBoard 8 - 0034 AccelePort 2r 920 - 0035 DataFire DSP T1/E1/PRI cPCI - 0040 AccelePort Xp - 0042 AccelePort 2p - 0043 AccelePort 4p - 0044 AccelePort 8p - 0045 AccelePort 16p - 004e AccelePort 32p - 0070 Datafire Micro V IOM2 (Europe) - 0071 Datafire Micro V (Europe) - 0072 Datafire Micro V IOM2 (North America) - 0073 Datafire Micro V (North America) - 00b0 Digi Neo 4 - 00b1 Digi Neo 8 - 00c8 Digi Neo 2 DB9 - 00c9 Digi Neo 2 DB9 PRI - 00ca Digi Neo 2 RJ45 - 00cb Digi Neo 2 RJ45 PRI - 00d0 ClassicBoard 4 422 - 00d1 ClassicBoard 8 422 - 6001 Avanstar -1150 Thinking Machines Corp -1151 JAE Electronics Inc. -1152 Megatek -1153 Land Win Electronic Corp -1154 Melco Inc -1155 Pine Technology Ltd -1156 Periscope Engineering -1157 Avsys Corporation -1158 Voarx R & D Inc - 3011 Tokenet/vg 1001/10m anylan - 9050 Lanfleet/Truevalue - 9051 Lanfleet/Truevalue -1159 Mutech Corp - 0001 MV-1000 -115a Harlequin Ltd -115b Parallax Graphics -115c Photron Ltd. -115d Xircom - 0003 Cardbus Ethernet 10/100 - 1014 0181 10/100 EtherJet Cardbus Adapter - 1014 1181 10/100 EtherJet Cardbus Adapter - 1014 8181 10/100 EtherJet Cardbus Adapter - 1014 9181 10/100 EtherJet Cardbus Adapter - 115d 0181 Cardbus Ethernet 10/100 - 115d 1181 Cardbus Ethernet 10/100 - 1179 0181 Cardbus Ethernet 10/100 - 8086 8181 EtherExpress PRO/100 Mobile CardBus 32 Adapter - 8086 9181 EtherExpress PRO/100 Mobile CardBus 32 Adapter - 0005 Cardbus Ethernet 10/100 - 1014 0182 10/100 EtherJet Cardbus Adapter - 1014 1182 10/100 EtherJet Cardbus Adapter - 115d 0182 Cardbus Ethernet 10/100 - 115d 1182 Cardbus Ethernet 10/100 - 0007 Cardbus Ethernet 10/100 - 1014 0182 10/100 EtherJet Cardbus Adapter - 1014 1182 10/100 EtherJet Cardbus Adapter - 115d 0182 Cardbus Ethernet 10/100 - 115d 1182 Cardbus Ethernet 10/100 - 000b Cardbus Ethernet 10/100 - 1014 0183 10/100 EtherJet Cardbus Adapter - 115d 0183 Cardbus Ethernet 10/100 - 000c Mini-PCI V.90 56k Modem - 000f Cardbus Ethernet 10/100 - 1014 0183 10/100 EtherJet Cardbus Adapter - 115d 0183 Cardbus Ethernet 10/100 - 00d4 Mini-PCI K56Flex Modem - 0101 Cardbus 56k modem - 115d 1081 Cardbus 56k Modem - 0103 Cardbus Ethernet + 56k Modem - 1014 9181 Cardbus 56k Modem - 1115 1181 Cardbus Ethernet 100 + 56k Modem - 115d 1181 CBEM56G-100 Ethernet + 56k Modem - 8086 9181 PRO/100 LAN + Modem56 CardBus -115e Peer Protocols Inc -115f Maxtor Corporation -1160 Megasoft Inc -1161 PFU Limited -1162 OA Laboratory Co Ltd -1163 Rendition - 0001 Verite 1000 - 2000 Verite V2000/V2100/V2200 - 1092 2000 Stealth II S220 -1164 Advanced Peripherals Technologies -1165 Imagraph Corporation - 0001 Motion TPEG Recorder/Player with audio -1166 ServerWorks - 0000 CMIC-LE - 0005 CNB20-LE Host Bridge - 0006 CNB20HE Host Bridge - 0007 CNB20-LE Host Bridge - 0008 CNB20HE Host Bridge - 0009 CNB20LE Host Bridge - 0010 CIOB30 - 0011 CMIC-HE - 0012 CMIC-WS Host Bridge (GC-LE chipset) - 0013 CNB20-HE Host Bridge - 0014 CMIC-LE Host Bridge (GC-LE chipset) - 0015 CMIC-GC Host Bridge - 0016 CMIC-GC Host Bridge - 0017 GCNB-LE Host Bridge - 0101 CIOB-X2 PCI-X I/O Bridge - 0110 CIOB-E I/O Bridge with Gigabit Ethernet - 0200 OSB4 South Bridge - 0201 CSB5 South Bridge - 4c53 1080 CT8 mainboard - 0203 CSB6 South Bridge - 0211 OSB4 IDE Controller - 0212 CSB5 IDE Controller - 4c53 1080 CT8 mainboard - 0213 CSB6 RAID/IDE Controller - 0217 CSB6 IDE Controller - 0220 OSB4/CSB5 OHCI USB Controller - 4c53 1080 CT8 mainboard - 0221 CSB6 OHCI USB Controller - 0225 CSB5 LPC bridge -# cancelled - 4c53 1080 CT8 mainboard - 0227 GCLE-2 Host Bridge - 0230 CSB5 LPC bridge - 4c53 1080 CT8 mainboard - 0240 K2 SATA - 0241 K2 SATA - 0242 K2 SATA -1167 Mutoh Industries Inc -1168 Thine Electronics Inc -1169 Centre for Development of Advanced Computing -116a Polaris Communications - 6100 Bus/Tag Channel - 6800 Escon Channel - 7100 Bus/Tag Channel - 7800 Escon Channel -116b Connectware Inc -116c Intelligent Resources Integrated Systems -116d Martin-Marietta -116e Electronics for Imaging -116f Workstation Technology -1170 Inventec Corporation -1171 Loughborough Sound Images Plc -1172 Altera Corporation -1173 Adobe Systems, Inc -1174 Bridgeport Machines -1175 Mitron Computer Inc. -1176 SBE Incorporated -1177 Silicon Engineering -1178 Alfa, Inc. - afa1 Fast Ethernet Adapter -1179 Toshiba America Info Systems - 0103 EX-IDE Type-B - 0404 DVD Decoder card - 0406 Tecra Video Capture device - 0407 DVD Decoder card (Version 2) - 0601 CPU to PCI bridge - 0603 ToPIC95 PCI to CardBus Bridge for Notebooks - 060a ToPIC95 - 060f ToPIC97 - 0617 ToPIC100 PCI to Cardbus Bridge with ZV Support - 0618 CPU to PCI and PCI to ISA bridge -# Claimed to be Lucent DSP1645 [Mars], but that's apparently incorrect. Does anyone know the correct ID? - 0701 FIR Port - 0804 TC6371AF SmartMedia Controller - 0805 SD TypA Controller - 0d01 FIR Port Type-DO - 1179 0001 FIR Port Type-DO -117a A-Trend Technology -117b L G Electronics, Inc. -117c Atto Technology -117d Becton & Dickinson -117e T/R Systems -117f Integrated Circuit Systems -1180 Ricoh Co Ltd - 0465 RL5c465 - 0466 RL5c466 - 0475 RL5c475 - 144d c006 vpr Matrix 170B4 CardBus bridge - 0476 RL5c476 II - 1014 0185 ThinkPad A/T/X Series - 104d 80df Vaio PCG-FX403 - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 14ef 0220 PCD-RP-220S - 0477 RL5c477 - 0478 RL5c478 - 1014 0184 ThinkPad A30p (2653-64G) - 0522 R5C522 IEEE 1394 Controller - 1014 01cf ThinkPad A30p (2653-64G) - 0551 R5C551 IEEE 1394 Controller - 144d c006 vpr Matrix 170B4 - 0552 R5C552 IEEE 1394 Controller - 1014 0511 ThinkPad A/T/X Series - 0576 R5C576 SD Bus Host Adapter - 0592 R5C592 Memory Stick Bus Host Adapter -1181 Telmatics International -1183 Fujikura Ltd -1184 Forks Inc -1185 Dataworld International Ltd -1186 D-Link System Inc - 0100 DC21041 - 1002 DL10050 Sundance Ethernet - 1186 1002 DFE-550TX - 1186 1012 DFE-580TX - 1025 AirPlus Xtreme G DWL-G650 Adapter - 1026 AirXpert DWL-AG650 Wireless Cardbus Adapter - 1043 AirXpert DWL-AG650 Wireless Cardbus Adapter - 1300 RTL8139 Ethernet - 1186 1300 DFE-538TX 10/100 Ethernet Adapter - 1186 1301 DFE-530TX+ 10/100 Ethernet Adapter - 1340 DFE-690TXD CardBus PC Card - 1541 DFE-680TXD CardBus PC Card - 1561 DRP-32TXD Cardbus PC Card - 2027 AirPlus Xtreme G DWL-G520 Adapter - 3203 AirPlus Xtreme G DWL-G520 Adapter - 3300 DWL-510 2.4GHz Wireless PCI Adapter - 3a03 AirPro DWL-A650 Wireless Cardbus Adapter(rev.B) - 3a04 AirPro DWL-AB650 Multimode Wireless Cardbus Adapter - 3a05 AirPro DWL-AB520 Multimode Wireless PCI Adapter - 3a07 AirXpert DWL-AG650 Wireless Cardbus Adapter - 3a08 AirXpert DWL-AG520 Wireless PCI Adapter - 3a10 AirXpert DWL-AG650 Wireless Cardbus Adapter(rev.B) - 3a11 AirXpert DWL-AG520 Wireless PCI Adapter(rev.B) - 3a12 AirPlus DWL-G650 Wireless Cardbus Adapter(rev.C) - 3a13 AirPlus DWL-G520 Wireless PCI Adapter(rev.B) - 3a14 AirPremier DWL-AG530 Wireless PCI Adapter - 3a63 AirXpert DWL-AG660 Wireless Cardbus Adapter - 3b05 DWL-G650+ CardBus PC Card - 4000 DL2000-based Gigabit Ethernet - 4300 DGE-528T Gigabit Ethernet Adapter - 4c00 Gigabit Ethernet Adapter - 1186 4c00 DGE-530T Gigabit Ethernet Adapter - 8400 D-Link DWL-650+ CardBus PC Card -1187 Advanced Technology Laboratories, Inc. -1188 Shima Seiki Manufacturing Ltd. -1189 Matsushita Electronics Co Ltd -118a Hilevel Technology -118b Hypertec Pty Limited -118c Corollary, Inc - 0014 PCIB [C-bus II to PCI bus host bridge chip] - 1117 Intel 8-way XEON Profusion Chipset [Cache Coherency Filter] -118d BitFlow Inc - 0001 Raptor-PCI framegrabber - 0012 Model 12 Road Runner Frame Grabber - 0014 Model 14 Road Runner Frame Grabber - 0024 Model 24 Road Runner Frame Grabber - 0044 Model 44 Road Runner Frame Grabber - 0112 Model 12 Road Runner Frame Grabber - 0114 Model 14 Road Runner Frame Grabber - 0124 Model 24 Road Runner Frame Grabber - 0144 Model 44 Road Runner Frame Grabber - 0212 Model 12 Road Runner Frame Grabber - 0214 Model 14 Road Runner Frame Grabber - 0224 Model 24 Road Runner Frame Grabber - 0244 Model 44 Road Runner Frame Grabber - 0312 Model 12 Road Runner Frame Grabber - 0314 Model 14 Road Runner Frame Grabber - 0324 Model 24 Road Runner Frame Grabber - 0344 Model 44 Road Runner Frame Grabber -118e Hermstedt GmbH -118f Green Logic -1190 Tripace - c731 TP-910/920/940 PCI Ultra(Wide) SCSI Adapter -1191 Artop Electronic Corp - 0003 SCSI Cache Host Adapter - 0004 ATP8400 - 0005 ATP850UF - 0006 ATP860 NO-BIOS - 0007 ATP860 - 0008 ATP865 NO-ROM - 0009 ATP865 - 8002 AEC6710 SCSI-2 Host Adapter - 8010 AEC6712UW SCSI - 8020 AEC6712U SCSI - 8030 AEC6712S SCSI - 8040 AEC6712D SCSI - 8050 AEC6712SUW SCSI - 8060 AEC6712 SCSI - 8080 AEC67160 SCSI - 8081 AEC67160S SCSI - 808a AEC67162 2-ch. LVD SCSI -1192 Densan Company Ltd -1193 Zeitnet Inc. - 0001 1221 - 0002 1225 -1194 Toucan Technology -1195 Ratoc System Inc -1196 Hytec Electronics Ltd -1197 Gage Applied Sciences, Inc. - 010c CompuScope 82G 8bit 2GS/s Analog Input Card -1198 Lambda Systems Inc -1199 Attachmate Corporation -119a Mind Share, Inc. -119b Omega Micro Inc. - 1221 82C092G -119c Information Technology Inst. -119d Bug, Inc. Sapporo Japan -119e Fujitsu Microelectronics Ltd. - 0001 FireStream 155 - 0003 FireStream 50 -119f Bull HN Information Systems -11a0 Convex Computer Corporation -11a1 Hamamatsu Photonics K.K. -11a2 Sierra Research and Technology -11a3 Deuretzbacher GmbH & Co. Eng. KG -11a4 Barco Graphics NV -11a5 Microunity Systems Eng. Inc -11a6 Pure Data Ltd. -11a7 Power Computing Corp. -11a8 Systech Corp. -11a9 InnoSys Inc. - 4240 AMCC S933Q Intelligent Serial Card -11aa Actel -# Formerly Galileo Technology, Inc. -11ab Marvell Technology Group Ltd. - 0146 GT-64010/64010A System Controller - 138f W8300 802.11 Adapter (rev 07) - 1fa6 Marvell W8300 802.11 Adapter - 1fa7 88W8310 and 88W8000G [Libertas] 802.11g client chipset - 4320 Gigabit Ethernet Controller - 1019 0f38 Marvell 88E8001 Gigabit Ethernet Controller (ECS) - 1019 8001 Marvell 88E8001 Gigabit Ethernet Controller (ECS) - 1043 173c Marvell 88E8001 Gigabit Ethernet Controller (Asus) - 1043 811a Marvell 88E8001 Gigabit Ethernet Controller (Asus) - 105b 0c19 Marvell 88E8001 Gigabit Ethernet Controller (Foxconn) - 10b8 b452 SMC EZ Card 1000 (SMC9452TXV.2) - 11ab 0121 Marvell RDK-8001 - 11ab 0321 Marvell RDK-8003 - 11ab 1021 Marvell RDK-8010 - 11ab 5021 Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (64 bit) - 11ab 9521 Marvell Yukon Gigabit Ethernet 10/100/1000Base-T Controller (32 bit) - 1458 e000 Marvell 88E8001 Gigabit Ethernet Controller (Gigabyte) - 147b 1406 Marvell 88E8001 Gigabit Ethernet Controller (Abit) - 15d4 0047 Marvell 88E8001 Gigabit Ethernet Controller (Iwill) - 1695 9025 Marvell 88E8001 Gigabit Ethernet Controller (Epox) - 17f2 1c03 Marvell 88E8001 Gigabit Ethernet Controller (Albatron) - 270f 2803 Marvell 88E8001 Gigabit Ethernet Controller (Chaintech) - 4350 Fast Ethernet Controller - 1179 0001 Marvell 88E8035 Fast Ethernet Controller (Toshiba) - 11ab 3521 Marvell RDK-8035 - 1854 000d Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 000e Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 000f Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0011 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0012 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0016 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0017 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0018 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0019 Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 001c Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 001e Marvell 88E8035 Fast Ethernet Controller (LGE) - 1854 0020 Marvell 88E8035 Fast Ethernet Controller (LGE) - 4351 Fast Ethernet Controller - 107b 4009 Marvell 88E8036 Fast Ethernet Controller (Wistron) - 10f7 8338 Marvell 88E8036 Fast Ethernet Controller (Panasonic) - 1179 0001 Marvell 88E8036 Fast Ethernet Controller (Toshiba) - 1179 ff00 Marvell 88E8036 Fast Ethernet Controller (Compal) - 1179 ff10 Marvell 88E8036 Fast Ethernet Controller (Inventec) - 11ab 3621 Marvell RDK-8036 - 13d1 ac12 Abocom EFE3K - 10/100 Ethernet Expresscard - 161f 203d Marvell 88E8036 Fast Ethernet Controller (Arima) - 1854 000d Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 000e Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 000f Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0011 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0012 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0016 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0017 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0018 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0019 Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 001c Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 001e Marvell 88E8036 Fast Ethernet Controller (LGE) - 1854 0020 Marvell 88E8036 Fast Ethernet Controller (LGE) - 4360 Gigabit Ethernet Controller - 1043 8134 Marvell 88E8052 Gigabit Ethernet Controller (Asus) - 107b 4009 Marvell 88E8052 Gigabit Ethernet Controller (Wistron) - 11ab 5221 Marvell RDK-8052 - 1458 e000 Marvell 88E8052 Gigabit Ethernet Controller (Gigabyte) - 1462 052c Marvell 88E8052 Gigabit Ethernet Controller (MSI) - 1849 8052 Marvell 88E8052 Gigabit Ethernet Controller (ASRock) - 1940 e000 Marvell 88E8052 Gigabit Ethernet Controller (Gigabyte) - a0a0 0509 Marvell 88E8052 Gigabit Ethernet Controller (Aopen) - 4361 Gigabit Ethernet Controller - 107b 3015 Marvell 88E8050 Gigabit Ethernet Controller (Gateway) - 11ab 5021 Marvell 88E8050 Gigabit Ethernet Controller (Intel) - 8086 3063 D925XCVLK mainboard - 4362 Gigabit Ethernet Controller - 103c 2a0d Marvell 88E8053 Gigabit Ethernet Controller (Asus) - 1043 8142 Marvell 88E8053 Gigabit Ethernet Controller (Asus) - 109f 3197 Marvell 88E8053 Gigabit Ethernet Controller (Trigem) - 10f7 8338 Marvell 88E8053 Gigabit Ethernet Controller (Panasonic) - 10fd a430 Marvell 88E8053 Gigabit Ethernet Controller (SOYO) - 1179 0001 Marvell 88E8053 Gigabit Ethernet Controller (Toshiba) - 1179 ff00 Marvell 88E8053 Gigabit Ethernet Controller (Compal) - 1179 ff10 Marvell 88E8053 Gigabit Ethernet Controller (Inventec) - 11ab 5321 Marvell RDK-8053 - 1297 c240 Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) - 1297 c241 Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) - 1297 c242 Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) - 1297 c243 Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) - 1297 c244 Marvell 88E8053 Gigabit Ethernet Controller (Shuttle) - 13d1 ac11 Abocom EGE5K - Giga Ethernet Expresscard - 1458 e000 Marvell 88E8053 Gigabit Ethernet Controller (Gigabyte) - 1462 058c Marvell 88E8053 Gigabit Ethernet Controller (MSI) - 14c0 0012 Marvell 88E8053 Gigabit Ethernet Controller (Compal) - 1558 04a0 Marvell 88E8053 Gigabit Ethernet Controller (Clevo) - 15bd 1003 Marvell 88E8053 Gigabit Ethernet Controller (DFI) - 161f 203c Marvell 88E8053 Gigabit Ethernet Controller (Arima) - 161f 203d Marvell 88E8053 Gigabit Ethernet Controller (Arima) - 1695 9029 Marvell 88E8053 Gigabit Ethernet Controller (Epox) - 17f2 2c08 Marvell 88E8053 Gigabit Ethernet Controller (Albatron) - 17ff 0585 Marvell 88E8053 Gigabit Ethernet Controller (Quanta) - 1849 8053 Marvell 88E8053 Gigabit Ethernet Controller (ASRock) - 1854 000b Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 000c Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0010 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0013 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0014 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0015 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 001a Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 001b Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 001d Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 001f Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0021 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1854 0022 Marvell 88E8053 Gigabit Ethernet Controller (LGE) - 1940 e000 Marvell 88E8053 Gigabit Ethernet Controller (Gigabyte) - 270f 2801 Marvell 88E8053 Gigabit Ethernet Controller (Chaintech) - a0a0 0506 Marvell 88E8053 Gigabit Ethernet Controller (Aopen) - 4611 GT-64115 System Controller - 4620 GT-64120/64120A/64121A System Controller - 4801 GT-48001 - 5005 Belkin F5D5005 Gigabit Desktop Network PCI Card - 5040 MV88SX5040 4-port SATA I PCI-X Controller - 5041 MV88SX5041 4-port SATA I PCI-X Controller - 5080 MV88SX5080 8-port SATA I PCI-X Controller - 5081 MV88SX5081 8-port SATA I PCI-X Controller - 6041 MV88SX6041 4-port SATA II PCI-X Controller - 6081 MV88SX6081 8-port SATA II PCI-X Controller - 6460 MV64360/64361/64362 System Controller - f003 GT-64010 Primary Image Piranha Image Generator -11ac Canon Information Systems Research Aust. -11ad Lite-On Communications Inc - 0002 LNE100TX - 11ad 0002 LNE100TX - 11ad 0003 LNE100TX - 11ad f003 LNE100TX - 11ad ffff LNE100TX - 1385 f004 FA310TX - c115 LNE100TX [Linksys EtherFast 10/100] - 11ad c001 LNE100TX [ver 2.0] -11ae Aztech System Ltd -11af Avid Technology Inc. - 0001 [Cinema] -11b0 V3 Semiconductor Inc. - 0002 V300PSC - 0292 V292PBC [Am29030/40 Bridge] - 0960 V96xPBC - c960 V96DPC -11b1 Apricot Computers -11b2 Eastman Kodak -11b3 Barr Systems Inc. -11b4 Leitch Technology International -11b5 Radstone Technology Plc -11b6 United Video Corp -11b7 Motorola -11b8 XPoint Technologies, Inc - 0001 Quad PeerMaster -11b9 Pathlight Technology Inc. - c0ed SSA Controller -11ba Videotron Corp -11bb Pyramid Technology -11bc Network Peripherals Inc - 0001 NP-PCI -11bd Pinnacle Systems Inc. -11be International Microcircuits Inc -11bf Astrodesign, Inc. -11c0 Hewlett Packard -11c1 Agere Systems (former Lucent Microelectronics) - 0440 56k WinModem - 1033 8015 LT WinModem 56k Data+Fax+Voice+Dsvd - 1033 8047 LT WinModem 56k Data+Fax+Voice+Dsvd - 1033 804f LT WinModem 56k Data+Fax+Voice+Dsvd - 10cf 102c LB LT Modem V.90 56k - 10cf 104a BIBLO LT Modem 56k - 10cf 105f LB2 LT Modem V.90 56k - 1179 0001 Internal V.90 Modem - 11c1 0440 LT WinModem 56k Data+Fax+Voice+Dsvd - 122d 4101 MDP7800-U Modem - 122d 4102 MDP7800SP-U Modem - 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 0440 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 0441 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 0450 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 f100 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 f101 LT WinModem 56k Data+Fax+Voice+Dsvd - 144d 2101 LT56PV Modem - 149f 0440 LT WinModem 56k Data+Fax+Voice+Dsvd - 0441 56k WinModem - 1033 804d LT WinModem 56k Data+Fax - 1033 8065 LT WinModem 56k Data+Fax - 1092 0440 Supra 56i - 1179 0001 Internal V.90 Modem - 11c1 0440 LT WinModem 56k Data+Fax - 11c1 0441 LT WinModem 56k Data+Fax - 122d 4100 MDP7800-U Modem - 13e0 0040 LT WinModem 56k Data+Fax - 13e0 0100 LT WinModem 56k Data+Fax - 13e0 0410 LT WinModem 56k Data+Fax - 13e0 0420 TelePath Internet 56k WinModem - 13e0 0440 LT WinModem 56k Data+Fax - 13e0 0443 LT WinModem 56k Data+Fax - 13e0 f102 LT WinModem 56k Data+Fax - 1416 9804 CommWave 56k Modem - 141d 0440 LT WinModem 56k Data+Fax - 144f 0441 Lucent 56k V.90 DF Modem - 144f 0449 Lucent 56k V.90 DF Modem - 144f 110d Lucent Win Modem - 1468 0441 Presario 56k V.90 DF Modem - 1668 0440 Lucent Win Modem - 0442 56k WinModem - 11c1 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 11c1 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 13e0 0412 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 13e0 0442 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 13fc 2471 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 144d 2104 LT56PT Modem - 144f 1104 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 149f 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 1668 0440 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 0443 LT WinModem - 0444 LT WinModem - 0445 LT WinModem - 8086 2203 PRO/100+ MiniPCI (probably an Ambit U98.003.C.00 combo card) - 8086 2204 PRO/100+ MiniPCI on Armada E500 - 0446 LT WinModem - 0447 LT WinModem - 0448 WinModem 56k - 1014 0131 Lucent Win Modem - 1033 8066 LT WinModem 56k Data+Fax+Voice+Dsvd - 13e0 0030 56k Voice Modem - 13e0 0040 LT WinModem 56k Data+Fax+Voice+Dsvd -# Actiontech eth+modem card as used by Dell &c. - 1668 2400 LT WinModem 56k (MiniPCI Ethernet+Modem) - 0449 WinModem 56k - 0e11 b14d 56k V.90 Modem - 13e0 0020 LT WinModem 56k Data+Fax - 13e0 0041 TelePath Internet 56k WinModem - 1436 0440 Lucent Win Modem - 144f 0449 Lucent 56k V.90 DFi Modem - 1468 0410 IBM ThinkPad T23 (2647-4MG) - 1468 0440 Lucent Win Modem - 1468 0449 Presario 56k V.90 DFi Modem - 044a F-1156IV WinModem (V90, 56KFlex) - 10cf 1072 LB Global LT Modem - 13e0 0012 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 13e0 0042 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 144f 1005 LT WinModem 56k Data+Fax+Voice+VoiceView+Dsvd - 044b LT WinModem - 044c LT WinModem - 044d LT WinModem - 044e LT WinModem - 044f V90 WildWire Modem - 0450 LT WinModem - 1033 80a8 Versa Note Vxi - 144f 4005 Magnia SG20 - 0451 LT WinModem - 0452 LT WinModem - 0453 LT WinModem - 0454 LT WinModem - 0455 LT WinModem - 0456 LT WinModem - 0457 LT WinModem - 0458 LT WinModem - 0459 LT WinModem - 045a LT WinModem - 045c LT WinModem - 0461 V90 WildWire Modem - 0462 V90 WildWire Modem - 0480 Venus Modem (V90, 56KFlex) - 048c V.92 56K WinModem -# InPorte Home Internal 56k Modem/fax/answering machine/SMS Features - 048f V.92 56k WinModem - 5801 USB - 5802 USS-312 USB Controller -# 4 port PCI USB Controller made by Agere (formely Lucent) - 5803 USS-344S USB Controller - 5811 FW323 - 8086 524c D865PERL mainboard - dead 0800 FireWire Host Bus Adapter - ab10 WL60010 Wireless LAN MAC - ab11 WL60040 Multimode Wireles LAN MAC - 11c1 ab12 WaveLAN 11abg Cardbus card (Model 1102) - 11c1 ab13 WaveLAN 11abg MiniPCI card (Model 0512) - 11c1 ab15 WaveLAN 11abg Cardbus card (Model 1106) - 11c1 ab16 WaveLAN 11abg MiniPCI card (Model 0516) - ab20 ORiNOCO PCI Adapter - ab21 Agere Wireless PCI Adapter - ab30 Hermes2 Mini-PCI WaveLAN a/b/g - 14cd 2012 Hermes2 Mini-PCI WaveLAN a/b/g -11c2 Sand Microelectronics -11c3 NEC Corporation -11c4 Document Technologies, Inc -11c5 Shiva Corporation -11c6 Dainippon Screen Mfg. Co. Ltd -11c7 D.C.M. Data Systems -11c8 Dolphin Interconnect Solutions AS - 0658 PSB32 SCI-Adapter D31x - d665 PSB64 SCI-Adapter D32x - d667 PSB66 SCI-Adapter D33x -11c9 Magma - 0010 16-line serial port w/- DMA - 0011 4-line serial port w/- DMA -11ca LSI Systems, Inc -11cb Specialix Research Ltd. - 2000 PCI_9050 - 11cb 0200 SX - 11cb b008 I/O8+ - 4000 SUPI_1 - 8000 T225 -11cc Michels & Kleberhoff Computer GmbH -11cd HAL Computer Systems, Inc. -11ce Netaccess -11cf Pioneer Electronic Corporation -11d0 Lockheed Martin Federal Systems-Manassas -11d1 Auravision - 01f7 VxP524 -11d2 Intercom Inc. -11d3 Trancell Systems Inc -11d4 Analog Devices - 1535 Blackfin BF535 processor - 1805 SM56 PCI modem - 1889 AD1889 sound chip -11d5 Ikon Corporation - 0115 10115 - 0117 10117 -11d6 Tekelec Telecom -11d7 Trenton Technology, Inc. -11d8 Image Technologies Development -11d9 TEC Corporation -11da Novell -11db Sega Enterprises Ltd -11dc Questra Corporation -11dd Crosfield Electronics Limited -11de Zoran Corporation - 6057 ZR36057PQC Video cutting chipset - 1031 7efe DC10 Plus - 1031 fc00 MiroVIDEO DC50, Motion JPEG Capture/CODEC Board - 13ca 4231 JPEG/TV Card - 6120 ZR36120 - 1328 f001 Cinemaster C DVD Decoder -11df New Wave PDG -11e0 Cray Communications A/S -11e1 GEC Plessey Semi Inc. -11e2 Samsung Information Systems America -11e3 Quicklogic Corporation - 5030 PC Watchdog -11e4 Second Wave Inc -11e5 IIX Consulting -11e6 Mitsui-Zosen System Research -11e7 Toshiba America, Elec. Company -11e8 Digital Processing Systems Inc. -11e9 Highwater Designs Ltd. -11ea Elsag Bailey -11eb Formation Inc. -11ec Coreco Inc -11ed Mediamatics -11ee Dome Imaging Systems Inc -11ef Nicolet Technologies B.V. -11f0 Compu-Shack - 4231 FDDI - 4232 FASTline UTP Quattro - 4233 FASTline FO - 4234 FASTline UTP - 4235 FASTline-II UTP - 4236 FASTline-II FO - 4731 GIGAline -11f1 Symbios Logic Inc -11f2 Picture Tel Japan K.K. -11f3 Keithley Metrabyte -11f4 Kinetic Systems Corporation - 2915 CAMAC controller -11f5 Computing Devices International -11f6 Compex - 0112 ENet100VG4 - 0113 FreedomLine 100 - 1401 ReadyLink 2000 - 2011 RL100-ATX 10/100 - 11f6 2011 RL100-ATX - 2201 ReadyLink 100TX (Winbond W89C840) - 11f6 2011 ReadyLink 100TX - 9881 RL100TX Fast Ethernet -11f7 Scientific Atlanta -11f8 PMC-Sierra Inc. - 7375 PM7375 [LASAR-155 ATM SAR] -11f9 I-Cube Inc -11fa Kasan Electronics Company, Ltd. -11fb Datel Inc -11fc Silicon Magic -11fd High Street Consultants -11fe Comtrol Corporation - 0001 RocketPort 32 port w/external I/F - 0002 RocketPort 8 port w/external I/F - 0003 RocketPort 16 port w/external I/F - 0004 RocketPort 4 port w/quad cable - 0005 RocketPort 8 port w/octa cable - 0006 RocketPort 8 port w/RJ11 connectors - 0007 RocketPort 4 port w/RJ11 connectors - 0008 RocketPort 8 port w/ DB78 SNI (Siemens) connector - 0009 RocketPort 16 port w/ DB78 SNI (Siemens) connector - 000a RocketPort Plus 4 port - 000b RocketPort Plus 8 port - 000c RocketModem 6 port - 000d RocketModem 4-port - 000e RocketPort Plus 2 port RS232 - 000f RocketPort Plus 2 port RS422 - 0801 RocketPort UPCI 32 port w/external I/F - 0802 RocketPort UPCI 8 port w/external I/F - 0803 RocketPort UPCI 16 port w/external I/F - 0805 RocketPort UPCI 8 port w/octa cable - 080c RocketModem III 8 port - 080d RocketModem III 4 port - 0903 RocketPort Compact PCI 16 port w/external I/F - 8015 RocketPort 4-port UART 16954 -11ff Scion Corporation - 0003 AG-5 -1200 CSS Corporation -1201 Vista Controls Corp -1202 Network General Corp. - 4300 Gigabit Ethernet Adapter - 1202 9841 SK-9841 LX - 1202 9842 SK-9841 LX dual link - 1202 9843 SK-9843 SX - 1202 9844 SK-9843 SX dual link -1203 Bayer Corporation, Agfa Division -1204 Lattice Semiconductor Corporation -1205 Array Corporation -1206 Amdahl Corporation -1208 Parsytec GmbH - 4853 HS-Link Device -1209 SCI Systems Inc -120a Synaptel -120b Adaptive Solutions -120c Technical Corp. -120d Compression Labs, Inc. -120e Cyclades Corporation - 0100 Cyclom-Y below first megabyte - 0101 Cyclom-Y above first megabyte - 0102 Cyclom-4Y below first megabyte - 0103 Cyclom-4Y above first megabyte - 0104 Cyclom-8Y below first megabyte - 0105 Cyclom-8Y above first megabyte - 0200 Cyclades-Z below first megabyte - 0201 Cyclades-Z above first megabyte - 0300 PC300/RSV or /X21 (2 ports) - 0301 PC300/RSV or /X21 (1 port) - 0310 PC300/TE (2 ports) - 0311 PC300/TE (1 port) - 0320 PC300/TE-M (2 ports) - 0321 PC300/TE-M (1 port) - 0400 PC400 -120f Essential Communications - 0001 Roadrunner serial HIPPI -1210 Hyperparallel Technologies -1211 Braintech Inc -1212 Kingston Technology Corp. -1213 Applied Intelligent Systems, Inc. -1214 Performance Technologies, Inc. -1215 Interware Co., Ltd -1216 Purup Prepress A/S -1217 O2 Micro, Inc. - 6729 OZ6729 - 673a OZ6730 - 6832 OZ6832/6833 CardBus Controller - 6836 OZ6836/6860 CardBus Controller - 6872 OZ6812 CardBus Controller - 6925 OZ6922 CardBus Controller - 6933 OZ6933/711E1 CardBus/SmartCardBus Controller - 1025 1016 Travelmate 612 TX - 6972 OZ601/6912/711E0 CardBus/SmartCardBus Controller - 1014 020c ThinkPad R30 - 1179 0001 Magnia Z310 - 7110 OZ711Mx 4-in-1 MemoryCardBus Accelerator - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 7112 OZ711EC1/M1 SmartCardBus/MemoryCardBus Controller - 7113 OZ711EC1 SmartCardBus Controller - 7114 OZ711M1/MC1 4-in-1 MemoryCardBus Controller - 7134 OZ711MP1/MS1 MemoryCardBus Controller - 71e2 OZ711E2 SmartCardBus Controller - 7212 OZ711M2 4-in-1 MemoryCardBus Controller - 7213 OZ6933E CardBus Controller - 7223 OZ711M3/MC3 4-in-1 MemoryCardBus Controller - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 7233 OZ711MP3/MS3 4-in-1 MemoryCardBus Controller -1218 Hybricon Corp. -1219 First Virtual Corporation -121a 3Dfx Interactive, Inc. - 0001 Voodoo - 0002 Voodoo 2 - 0003 Voodoo Banshee - 1092 0003 Monster Fusion - 1092 4000 Monster Fusion - 1092 4002 Monster Fusion - 1092 4801 Monster Fusion AGP - 1092 4803 Monster Fusion AGP - 1092 8030 Monster Fusion - 1092 8035 Monster Fusion AGP - 10b0 0001 Dragon 4000 - 1102 1018 3D Blaster Banshee VE - 121a 0001 Voodoo Banshee AGP - 121a 0003 Voodoo Banshee AGP SGRAM - 121a 0004 Voodoo Banshee - 139c 0016 Raven - 139c 0017 Raven - 14af 0002 Maxi Gamer Phoenix - 0004 Voodoo Banshee [Velocity 100] - 0005 Voodoo 3 - 121a 0004 Voodoo3 AGP - 121a 0030 Voodoo3 AGP - 121a 0031 Voodoo3 AGP - 121a 0034 Voodoo3 AGP - 121a 0036 Voodoo3 2000 PCI - 121a 0037 Voodoo3 AGP - 121a 0038 Voodoo3 AGP - 121a 003a Voodoo3 AGP - 121a 0044 Voodoo3 - 121a 004b Velocity 100 - 121a 004c Velocity 200 - 121a 004d Voodoo3 AGP - 121a 004e Voodoo3 AGP - 121a 0051 Voodoo3 AGP - 121a 0052 Voodoo3 AGP - 121a 0060 Voodoo3 3500 TV (NTSC) - 121a 0061 Voodoo3 3500 TV (PAL) - 121a 0062 Voodoo3 3500 TV (SECAM) - 0009 Voodoo 4 / Voodoo 5 - 121a 0003 Voodoo5 PCI 5500 - 121a 0009 Voodoo5 AGP 5500/6000 - 0057 Voodoo 3/3000 [Avenger] -121b Advanced Telecommunications Modules -121c Nippon Texaco., Ltd -121d Lippert Automationstechnik GmbH -121e CSPI -121f Arcus Technology, Inc. -1220 Ariel Corporation - 1220 AMCC 5933 TMS320C80 DSP/Imaging board -1221 Contec Co., Ltd -1222 Ancor Communications, Inc. -1223 Artesyn Communication Products - 0003 PM/Link - 0004 PM/T1 - 0005 PM/E1 - 0008 PM/SLS - 0009 BajaSpan Resource Target - 000a BajaSpan Section 0 - 000b BajaSpan Section 1 - 000c BajaSpan Section 2 - 000d BajaSpan Section 3 - 000e PM/PPC -1224 Interactive Images -1225 Power I/O, Inc. -1227 Tech-Source - 0006 Raptor GFX 8P -1228 Norsk Elektro Optikk A/S -1229 Data Kinesis Inc. -122a Integrated Telecom -122b LG Industrial Systems Co., Ltd -122c Sican GmbH -122d Aztech System Ltd - 1206 368DSP - 1400 Trident PCI288-Q3DII (NX) - 50dc 3328 Audio - 122d 0001 3328 Audio - 80da 3328 Audio - 122d 0001 3328 Audio -122e Xyratex -122f Andrew Corporation -1230 Fishcamp Engineering -1231 Woodward McCoach, Inc. -1232 GPT Limited -1233 Bus-Tech, Inc. -1234 Technical Corp. -1235 Risq Modular Systems, Inc. -1236 Sigma Designs Corporation - 0000 RealMagic64/GX - 6401 REALmagic 64/GX (SD 6425) -1237 Alta Technology Corporation -1238 Adtran -1239 3DO Company -123a Visicom Laboratories, Inc. -123b Seeq Technology, Inc. -123c Century Systems, Inc. -123d Engineering Design Team, Inc. - 0000 EasyConnect 8/32 - 0002 EasyConnect 8/64 - 0003 EasyIO -123e Simutech, Inc. -123f C-Cube Microsystems - 00e4 MPEG - 8120 E4? - 11bd 0006 DV500 E4 - 11bd 000a DV500 E4 - 11bd 000f DV500 E4 - 8888 Cinemaster C 3.0 DVD Decoder - 1002 0001 Cinemaster C 3.0 DVD Decoder - 1002 0002 Cinemaster C 3.0 DVD Decoder - 1328 0001 Cinemaster C 3.0 DVD Decoder -1240 Marathon Technologies Corp. -1241 DSC Communications -# Formerly Jaycor Networks, Inc. -1242 JNI Corporation - 1560 JNIC-1560 PCI-X Fibre Channel Controller - 1242 6562 FCX2-6562 Dual Channel PCI-X Fibre Channel Adapter - 1242 656a FCX-6562 PCI-X Fibre Channel Adapter - 4643 FCI-1063 Fibre Channel Adapter - 6562 FCX2-6562 Dual Channel PCI-X Fibre Channel Adapter - 656a FCX-6562 PCI-X Fibre Channel Adapter -1243 Delphax -1244 AVM Audiovisuelles MKTG & Computer System GmbH - 0700 B1 ISDN - 0800 C4 ISDN - 0a00 A1 ISDN [Fritz] - 1244 0a00 FRITZ!Card ISDN Controller - 0e00 Fritz!PCI v2.0 ISDN - 1100 C2 ISDN - 1200 T1 ISDN - 2700 Fritz!Card DSL SL - 2900 Fritz!Card DSL v2.0 -1245 A.P.D., S.A. -1246 Dipix Technologies, Inc. -1247 Xylon Research, Inc. -1248 Central Data Corporation -1249 Samsung Electronics Co., Ltd. -124a AEG Electrocom GmbH -124b SBS/Greenspring Modular I/O - 0040 PCI-40A or cPCI-200 Quad IndustryPack carrier - 124b 9080 PCI9080 Bridge -124c Solitron Technologies, Inc. -124d Stallion Technologies, Inc. - 0000 EasyConnection 8/32 - 0002 EasyConnection 8/64 - 0003 EasyIO - 0004 EasyConnection/RA -124e Cylink -124f Infortrend Technology, Inc. - 0041 IFT-2000 Series RAID Controller -1250 Hitachi Microcomputer System Ltd -1251 VLSI Solutions Oy -1253 Guzik Technical Enterprises -1254 Linear Systems Ltd. -1255 Optibase Ltd - 1110 MPEG Forge - 1210 MPEG Fusion - 2110 VideoPlex - 2120 VideoPlex CC - 2130 VideoQuest -1256 Perceptive Solutions, Inc. - 4201 PCI-2220I - 4401 PCI-2240I - 5201 PCI-2000 -1257 Vertex Networks, Inc. -1258 Gilbarco, Inc. -1259 Allied Telesyn International - 2560 AT-2560 Fast Ethernet Adapter (i82557B) - a117 RTL81xx Fast Ethernet - a120 21x4x DEC-Tulip compatible 10/100 Ethernet -125a ABB Power Systems -125b Asix Electronics Corporation - 1400 ALFA GFC2204 Fast Ethernet -125c Aurora Technologies, Inc. - 0101 Saturn 4520P - 0640 Aries 16000P -125d ESS Technology - 0000 ES336H Fax Modem (Early Model) - 1948 Solo? - 1968 ES1968 Maestro 2 - 1028 0085 ES1968 Maestro-2 PCI - 1033 8051 ES1968 Maestro-2 Audiodrive - 1969 ES1969 Solo-1 Audiodrive - 1014 0166 ES1969 SOLO-1 AudioDrive on IBM Aptiva Mainboard - 125d 8888 Solo-1 Audio Adapter - 153b 111b Terratec 128i PCI - 1978 ES1978 Maestro 2E - 0e11 b112 Armada M700/E500 - 1033 803c ES1978 Maestro-2E Audiodrive - 1033 8058 ES1978 Maestro-2E Audiodrive - 1092 4000 Monster Sound MX400 - 1179 0001 ES1978 Maestro-2E Audiodrive - 1988 ES1988 Allegro-1 - 1092 4100 Sonic Impact S100 - 125d 1988 ESS Allegro-1 Audiodrive - 1989 ESS Modem - 125d 1989 ESS Modem - 1998 ES1983S Maestro-3i PCI Audio Accelerator - 1028 00b1 Latitude C600 - 1028 00e6 ES1983S Maestro-3i (Dell Inspiron 8100) - 1999 ES1983S Maestro-3i PCI Modem Accelerator - 199a ES1983S Maestro-3i PCI Audio Accelerator - 199b ES1983S Maestro-3i PCI Modem Accelerator - 2808 ES336H Fax Modem (Later Model) - 2838 ES2838/2839 SuperLink Modem - 2898 ES2898 Modem - 125d 0424 ES56-PI Data Fax Modem - 125d 0425 ES56T-PI Data Fax Modem - 125d 0426 ES56V-PI Data Fax Modem - 125d 0427 VW-PI Data Fax Modem - 125d 0428 ES56ST-PI Data Fax Modem - 125d 0429 ES56SV-PI Data Fax Modem - 147a c001 ES56-PI Data Fax Modem - 14fe 0428 ES56-PI Data Fax Modem - 14fe 0429 ES56-PI Data Fax Modem -125e Specialvideo Engineering SRL -125f Concurrent Technologies, Inc. -1260 Intersil Corporation - 3872 Prism 2.5 Wavelan chipset - 1468 0202 LAN-Express IEEE 802.11b Wireless LAN - 3873 Prism 2.5 Wavelan chipset - 1186 3501 DWL-520 Wireless PCI Adapter - 1186 3700 DWL-520 Wireless PCI Adapter, Rev E1 - 1385 4105 MA311 802.11b wireless adapter - 1668 0414 HWP01170-01 802.11b PCI Wireless Adapter - 16a5 1601 AIR.mate PC-400 PCI Wireless LAN Adapter - 1737 3874 WMP11 Wireless 802.11b PCI Adapter - 8086 2513 Wireless 802.11b MiniPCI Adapter - 3886 ISL3886 [Prism Javelin/Prism Xbow] - 17cf 0037 Z-Com XG-901 and clones Wireless Adapter - 3890 Intersil ISL3890 [Prism GT/Prism Duette] - 10b8 2802 SMC2802W Wireless PCI Adapter - 10b8 2835 SMC2835W Wireless Cardbus Adapter - 10b8 a835 SMC2835W V2 Wireless Cardbus Adapter - 1113 ee03 SMC2802W V2 Wireless PCI Adapter - 1113 ee08 SMC2835W V3 EU Wireless Cardbus Adapter - 1186 3202 DWL-G650 A1 Wireless Adapter - 1259 c104 CG-WLCB54GT Wireless Adapter - 1385 4800 WG511 Wireless Adapter - 16a5 1605 ALLNET ALL0271 Wireless PCI Adapter - 17cf 0014 Z-Com XG-600 and clones Wireless Adapter - 17cf 0020 Z-Com XG-900 and clones Wireless Adapter - 8130 HMP8130 NTSC/PAL Video Decoder - 8131 HMP8131 NTSC/PAL Video Decoder -1261 Matsushita-Kotobuki Electronics Industries, Ltd. -1262 ES Computer Company, Ltd. -1263 Sonic Solutions -1264 Aval Nagasaki Corporation -1265 Casio Computer Co., Ltd. -1266 Microdyne Corporation - 0001 NE10/100 Adapter (i82557B) - 1910 NE2000Plus (RT8029) Ethernet Adapter - 1266 1910 NE2000Plus Ethernet Adapter -1267 S. A. Telecommunications - 5352 PCR2101 - 5a4b Telsat Turbo -1268 Tektronix -1269 Thomson-CSF/TTM -126a Lexmark International, Inc. -126b Adax, Inc. -126c Northern Telecom - 1211 10/100BaseTX [RTL81xx] - 126c 802.11b Wireless Ethernet Adapter -126d Splash Technology, Inc. -126e Sumitomo Metal Industries, Ltd. -126f Silicon Motion, Inc. - 0501 SM501 VoyagerGX - 0710 SM710 LynxEM - 0712 SM712 LynxEM+ - 0720 SM720 Lynx3DM - 0730 SM731 Cougar3DR - 0810 SM810 LynxE - 0811 SM811 LynxE - 0820 SM820 Lynx3D - 0910 SM910 -1270 Olympus Optical Co., Ltd. -1271 GW Instruments -1272 Telematics International -1273 Hughes Network Systems - 0002 DirecPC -1274 Ensoniq - 1171 ES1373 [AudioPCI] (also Creative Labs CT5803) - 1371 ES1371 [AudioPCI-97] - 0e11 0024 AudioPCI on Motherboard Compaq Deskpro - 0e11 b1a7 ES1371, ES1373 AudioPCI - 1033 80ac ES1371, ES1373 AudioPCI - 1042 1854 Tazer - 107b 8054 Tabor2 - 1274 1371 Creative Sound Blaster AudioPCI64V, AudioPCI128 - 1462 6470 ES1371, ES1373 AudioPCI On Motherboard MS-6147 1.1A - 1462 6560 ES1371, ES1373 AudioPCI On Motherboard MS-6156 1.10 - 1462 6630 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 1.0A - 1462 6631 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 1.0A - 1462 6632 ES1371, ES1373 AudioPCI On Motherboard MS-6163BX 2.0A - 1462 6633 ES1371, ES1373 AudioPCI On Motherboard MS-6163VIA 2.0A - 1462 6820 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00 - 1462 6822 ES1371, ES1373 AudioPCI On Motherboard MS-6182 1.00A - 1462 6830 ES1371, ES1373 AudioPCI On Motherboard MS-6183 1.00 - 1462 6880 ES1371, ES1373 AudioPCI On Motherboard MS-6188 1.00 - 1462 6900 ES1371, ES1373 AudioPCI On Motherboard MS-6190 1.00 - 1462 6910 ES1371, ES1373 AudioPCI On Motherboard MS-6191 - 1462 6930 ES1371, ES1373 AudioPCI On Motherboard MS-6193 - 1462 6990 ES1371, ES1373 AudioPCI On Motherboard MS-6199BX 2.0A - 1462 6991 ES1371, ES1373 AudioPCI On Motherboard MS-6199VIA 2.0A - 14a4 2077 ES1371, ES1373 AudioPCI On Motherboard KR639 - 14a4 2105 ES1371, ES1373 AudioPCI On Motherboard MR800 - 14a4 2107 ES1371, ES1373 AudioPCI On Motherboard MR801 - 14a4 2172 ES1371, ES1373 AudioPCI On Motherboard DR739 - 1509 9902 ES1371, ES1373 AudioPCI On Motherboard KW11 - 1509 9903 ES1371, ES1373 AudioPCI On Motherboard KW31 - 1509 9904 ES1371, ES1373 AudioPCI On Motherboard KA11 - 1509 9905 ES1371, ES1373 AudioPCI On Motherboard KC13 - 152d 8801 ES1371, ES1373 AudioPCI On Motherboard CP810E - 152d 8802 ES1371, ES1373 AudioPCI On Motherboard CP810 - 152d 8803 ES1371, ES1373 AudioPCI On Motherboard P3810E - 152d 8804 ES1371, ES1373 AudioPCI On Motherboard P3810-S - 152d 8805 ES1371, ES1373 AudioPCI On Motherboard P3820-S - 270f 2001 ES1371, ES1373 AudioPCI On Motherboard 6CTR - 270f 2200 ES1371, ES1373 AudioPCI On Motherboard 6WTX - 270f 3000 ES1371, ES1373 AudioPCI On Motherboard 6WSV - 270f 3100 ES1371, ES1373 AudioPCI On Motherboard 6WIV2 - 270f 3102 ES1371, ES1373 AudioPCI On Motherboard 6WIV - 270f 7060 ES1371, ES1373 AudioPCI On Motherboard 6ASA2 - 8086 4249 ES1371, ES1373 AudioPCI On Motherboard BI440ZX - 8086 424c ES1371, ES1373 AudioPCI On Motherboard BL440ZX - 8086 425a ES1371, ES1373 AudioPCI On Motherboard BZ440ZX - 8086 4341 ES1371, ES1373 AudioPCI On Motherboard Cayman - 8086 4343 ES1371, ES1373 AudioPCI On Motherboard Cape Cod - 8086 4649 ES1371, ES1373 AudioPCI On Motherboard Fire Island - 8086 464a ES1371, ES1373 AudioPCI On Motherboard FJ440ZX - 8086 4d4f ES1371, ES1373 AudioPCI On Motherboard Montreal - 8086 4f43 ES1371, ES1373 AudioPCI On Motherboard OC440LX - 8086 5243 ES1371, ES1373 AudioPCI On Motherboard RC440BX - 8086 5352 ES1371, ES1373 AudioPCI On Motherboard SunRiver - 8086 5643 ES1371, ES1373 AudioPCI On Motherboard Vancouver - 8086 5753 ES1371, ES1373 AudioPCI On Motherboard WS440BX - 5000 ES1370 [AudioPCI] - 5880 5880 AudioPCI - 1274 2000 Creative Sound Blaster AudioPCI128 - 1274 2003 Creative SoundBlaster AudioPCI 128 - 1274 5880 Creative Sound Blaster AudioPCI128 - 1274 8001 Sound Blaster 16PCI 4.1ch - 1458 a000 5880 AudioPCI On Motherboard 6OXET - 1462 6880 5880 AudioPCI On Motherboard MS-6188 1.00 - 270f 2001 5880 AudioPCI On Motherboard 6CTR - 270f 2200 5880 AudioPCI On Motherboard 6WTX - 270f 7040 5880 AudioPCI On Motherboard 6ATA4 -1275 Network Appliance Corporation -1276 Switched Network Technologies, Inc. -1277 Comstream -1278 Transtech Parallel Systems Ltd. - 0701 TPE3/TM3 PowerPC Node - 0710 TPE5 PowerPC PCI board -1279 Transmeta Corporation - 0295 Northbridge - 0395 LongRun Northbridge - 0396 SDRAM controller - 0397 BIOS scratchpad -127a Rockwell International - 1002 HCF 56k Data/Fax Modem - 1092 094c SupraExpress 56i PRO [Diamond SUP2380] - 122d 4002 HPG / MDP3858-U - 122d 4005 MDP3858-E - 122d 4007 MDP3858-A/-NZ - 122d 4012 MDP3858-SA - 122d 4017 MDP3858-W - 122d 4018 MDP3858-W - 127a 1002 Rockwell 56K D/F HCF Modem - 1003 HCF 56k Data/Fax Modem - 0e11 b0bc 229-DF Zephyr - 0e11 b114 229-DF Cheetah - 1033 802b 229-DF - 13df 1003 PCI56RX Modem - 13e0 0117 IBM - 13e0 0147 IBM F-1156IV+/R3 Spain V.90 Modem - 13e0 0197 IBM - 13e0 01c7 IBM F-1156IV+/R3 WW V.90 Modem - 13e0 01f7 IBM - 1436 1003 IBM - 1436 1103 IBM 5614PM3G V.90 Modem - 1436 1602 Compaq 229-DF Ducati - 1004 HCF 56k Data/Fax/Voice Modem - 1048 1500 MicroLink 56k Modem - 10cf 1059 Fujitsu 229-DFRT - 1005 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 1005 127a AOpen FM56-P - 1033 8029 229-DFSV - 1033 8054 Modem - 10cf 103c Fujitsu - 10cf 1055 Fujitsu 229-DFSV - 10cf 1056 Fujitsu 229-DFSV - 122d 4003 MDP3858SP-U - 122d 4006 Packard Bell MDP3858V-E - 122d 4008 MDP3858SP-A/SP-NZ - 122d 4009 MDP3858SP-E - 122d 4010 MDP3858V-U - 122d 4011 MDP3858SP-SA - 122d 4013 MDP3858V-A/V-NZ - 122d 4015 MDP3858SP-W - 122d 4016 MDP3858V-W - 122d 4019 MDP3858V-SA - 13df 1005 PCI56RVP Modem - 13e0 0187 IBM - 13e0 01a7 IBM - 13e0 01b7 IBM DF-1156IV+/R3 Spain V.90 Modem - 13e0 01d7 IBM DF-1156IV+/R3 WW V.90 Modem - 1436 1005 IBM - 1436 1105 IBM - 1437 1105 IBM 5614PS3G V.90 Modem - 1022 HCF 56k Modem - 1436 1303 M3-5614PM3G V.90 Modem - 1023 HCF 56k Data/Fax Modem - 122d 4020 Packard Bell MDP3858-WE - 122d 4023 MDP3858-UE - 13e0 0247 IBM F-1156IV+/R6 Spain V.90 Modem - 13e0 0297 IBM - 13e0 02c7 IBM F-1156IV+/R6 WW V.90 Modem - 1436 1203 IBM - 1436 1303 IBM - 1024 HCF 56k Data/Fax/Voice Modem - 1025 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 10cf 106a Fujitsu 235-DFSV - 122d 4021 Packard Bell MDP3858V-WE - 122d 4022 MDP3858SP-WE - 122d 4024 MDP3858V-UE - 122d 4025 MDP3858SP-UE - 1026 HCF 56k PCI Speakerphone Modem - 1032 HCF 56k Modem - 1033 HCF 56k Modem - 1034 HCF 56k Modem - 1035 HCF 56k PCI Speakerphone Modem - 1036 HCF 56k Modem - 1085 HCF 56k Volcano PCI Modem - 2005 HCF 56k Data/Fax Modem - 104d 8044 229-DFSV - 104d 8045 229-DFSV - 104d 8055 PBE/Aztech 235W-DFSV - 104d 8056 235-DFSV - 104d 805a Modem - 104d 805f Modem - 104d 8074 Modem - 2013 HSF 56k Data/Fax Modem - 1179 0001 Modem - 1179 ff00 Modem - 2014 HSF 56k Data/Fax/Voice Modem - 10cf 1057 Fujitsu Citicorp III - 122d 4050 MSP3880-U - 122d 4055 MSP3880-W - 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 10cf 1063 Fujitsu - 10cf 1064 Fujitsu - 1468 2015 Fujitsu - 2016 HSF 56k Data/Fax/Voice/Spkp Modem - 122d 4051 MSP3880V-W - 122d 4052 MSP3880SP-W - 122d 4054 MSP3880V-U - 122d 4056 MSP3880SP-U - 122d 4057 MSP3880SP-A - 4311 Riptide HSF 56k PCI Modem - 127a 4311 Ring Modular? Riptide HSF RT HP Dom - 13e0 0210 HP-GVC - 4320 Riptide PCI Audio Controller - 1235 4320 Riptide PCI Audio Controller - 4321 Riptide HCF 56k PCI Modem - 1235 4321 Hewlett Packard DF - 1235 4324 Hewlett Packard DF - 13e0 0210 Hewlett Packard DF - 144d 2321 Riptide - 4322 Riptide PCI Game Controller - 1235 4322 Riptide PCI Game Controller - 8234 RapidFire 616X ATM155 Adapter - 108d 0022 RapidFire 616X ATM155 Adapter - 108d 0027 RapidFire 616X ATM155 Adapter -127b Pixera Corporation -127c Crosspoint Solutions, Inc. -127d Vela Research -127e Winnov, L.P. -127f Fujifilm -1280 Photoscript Group Ltd. -1281 Yokogawa Electric Corporation -1282 Davicom Semiconductor, Inc. - 9009 Ethernet 100/10 MBit - 9100 21x4x DEC-Tulip compatible 10/100 Ethernet - 9102 21x4x DEC-Tulip compatible 10/100 Ethernet - 9132 Ethernet 100/10 MBit -1283 Integrated Technology Express, Inc. - 673a IT8330G - 8212 IT/ITE8212 Dual channel ATA RAID controller (PCI version seems to be IT8212, embedded seems to be ITE8212) - 1283 0001 IT/ITE8212 Dual channel ATA RAID controller - 8330 IT8330G - 8872 IT8874F PCI Dual Serial Port Controller - 8888 IT8888F PCI to ISA Bridge with SMB - 8889 IT8889F PCI to ISA Bridge - e886 IT8330G -1284 Sahara Networks, Inc. -1285 Platform Technologies, Inc. - 0100 AGOGO sound chip (aka ESS Maestro 1) -1286 Mazet GmbH -1287 M-Pact, Inc. - 001e LS220D DVD Decoder - 001f LS220C DVD Decoder -1288 Timestep Corporation -1289 AVC Technology, Inc. -128a Asante Technologies, Inc. -128b Transwitch Corporation -128c Retix Corporation -128d G2 Networks, Inc. - 0021 ATM155 Adapter -128e Hoontech Corporation/Samho Multi Tech Ltd. - 0008 ST128 WSS/SB - 0009 ST128 SAM9407 - 000a ST128 Game Port - 000b ST128 MPU Port - 000c ST128 Ctrl Port -128f Tateno Dennou, Inc. -1290 Sord Computer Corporation -1291 NCS Computer Italia -1292 Tritech Microelectronics Inc -1293 Media Reality Technology -1294 Rhetorex, Inc. -1295 Imagenation Corporation -1296 Kofax Image Products -1297 Holco Enterprise Co, Ltd/Shuttle Computer -1298 Spellcaster Telecommunications Inc. -1299 Knowledge Technology Lab. -129a VMetro, inc. - 0615 PBT-615 PCI-X Bus Analyzer -129b Image Access -129c Jaycor -129d Compcore Multimedia, Inc. -129e Victor Company of Japan, Ltd. -129f OEC Medical Systems, Inc. -12a0 Allen-Bradley Company -12a1 Simpact Associates, Inc. -12a2 Newgen Systems Corporation -12a3 Lucent Technologies - 8105 T8105 H100 Digital Switch -12a4 NTT Electronics Technology Company -12a5 Vision Dynamics Ltd. -12a6 Scalable Networks, Inc. -12a7 AMO GmbH -12a8 News Datacom -12a9 Xiotech Corporation -12aa SDL Communications, Inc. -12ab Yuan Yuan Enterprise Co., Ltd. - 0002 AU8830 [Vortex2] Based Sound Card With A3D Support - 3000 MPG-200C PCI DVD Decoder Card -12ac Measurex Corporation -12ad Multidata GmbH -12ae Alteon Networks Inc. - 0001 AceNIC Gigabit Ethernet - 1014 0104 Gigabit Ethernet-SX PCI Adapter - 12ae 0001 Gigabit Ethernet-SX (Universal) - 1410 0104 Gigabit Ethernet-SX PCI Adapter - 0002 AceNIC Gigabit Ethernet (Copper) - 10a9 8002 Acenic Gigabit Ethernet - 12ae 0002 Gigabit Ethernet-T (3C986-T) - 00fa Farallon PN9100-T Gigabit Ethernet -12af TDK USA Corp -12b0 Jorge Scientific Corp -12b1 GammaLink -12b2 General Signal Networks -12b3 Inter-Face Co Ltd -12b4 FutureTel Inc -12b5 Granite Systems Inc. -12b6 Natural Microsystems -12b7 Cognex Modular Vision Systems Div. - Acumen Inc. -12b8 Korg -12b9 3Com Corp, Modem Division (formerly US Robotics) - 1006 WinModem - 12b9 005c USR 56k Internal Voice WinModem (Model 3472) - 12b9 005e USR 56k Internal WinModem (Models 662975) - 12b9 0062 USR 56k Internal Voice WinModem (Model 662978) - 12b9 0068 USR 56k Internal Voice WinModem (Model 5690) - 12b9 007a USR 56k Internal Voice WinModem (Model 662974) - 12b9 007f USR 56k Internal WinModem (Models 5698, 5699) - 12b9 0080 USR 56k Internal WinModem (Models 2975, 3528) - 12b9 0081 USR 56k Internal Voice WinModem (Models 2974, 3529) - 12b9 0091 USR 56k Internal Voice WinModem (Model 2978) - 1007 USR 56k Internal WinModem - 12b9 00a3 USR 56k Internal WinModem (Model 3595) - 1008 56K FaxModem Model 5610 - 12b9 00a2 USR 56k Internal FAX Modem (Model 2977) - 12b9 00aa USR 56k Internal Voice Modem (Model 2976) - 12b9 00ab USR 56k Internal Voice Modem (Model 5609) - 12b9 00ac USR 56k Internal Voice Modem (Model 3298) - 12b9 00ad USR 56k Internal FAX Modem (Model 5610) -12ba BittWare, Inc. -12bb Nippon Unisoft Corporation -12bc Array Microsystems -12bd Computerm Corp. -12be Anchor Chips Inc. - 3041 AN3041Q CO-MEM - 3042 AN3042Q CO-MEM Lite - 12be 3042 Anchor Chips Lite Evaluation Board -12bf Fujifilm Microdevices -12c0 Infimed -12c1 GMM Research Corp -12c2 Mentec Limited -12c3 Holtek Microelectronics Inc - 0058 PCI NE2K Ethernet - 5598 PCI NE2K Ethernet -12c4 Connect Tech Inc - 0001 Blue HEAT/PCI 8 (RS232/CL/RJ11) - 0002 Blue HEAT/PCI 4 (RS232) - 0003 Blue HEAT/PCI 2 (RS232) - 0004 Blue HEAT/PCI 8 (UNIV, RS485) - 0005 Blue HEAT/PCI 4+4/6+2 (UNIV, RS232/485) - 0006 Blue HEAT/PCI 4 (OPTO, RS485) - 0007 Blue HEAT/PCI 2+2 (RS232/485) - 0008 Blue HEAT/PCI 2 (OPTO, Tx, RS485) - 0009 Blue HEAT/PCI 2+6 (RS232/485) - 000a Blue HEAT/PCI 8 (Tx, RS485) - 000b Blue HEAT/PCI 4 (Tx, RS485) - 000c Blue HEAT/PCI 2 (20 MHz, RS485) - 000d Blue HEAT/PCI 2 PTM - 0100 NT960/PCI - 0201 cPCI Titan - 2 Port - 0202 cPCI Titan - 4 Port - 0300 CTI PCI UART 2 (RS232) - 0301 CTI PCI UART 4 (RS232) - 0302 CTI PCI UART 8 (RS232) - 0310 CTI PCI UART 1+1 (RS232/485) - 0311 CTI PCI UART 2+2 (RS232/485) - 0312 CTI PCI UART 4+4 (RS232/485) - 0320 CTI PCI UART 2 - 0321 CTI PCI UART 4 - 0322 CTI PCI UART 8 - 0330 CTI PCI UART 2 (RS485) - 0331 CTI PCI UART 4 (RS485) - 0332 CTI PCI UART 8 (RS485) -12c5 Picture Elements Incorporated - 007e Imaging/Scanning Subsystem Engine - 007f Imaging/Scanning Subsystem Engine - 0081 PCIVST [Grayscale Thresholding Engine] - 0085 Video Simulator/Sender - 0086 THR2 Multi-scale Thresholder -12c6 Mitani Corporation -12c7 Dialogic Corp -12c8 G Force Co, Ltd -12c9 Gigi Operations -12ca Integrated Computing Engines -12cb Antex Electronics Corporation -12cc Pluto Technologies International -12cd Aims Lab -12ce Netspeed Inc. -12cf Prophet Systems, Inc. -12d0 GDE Systems, Inc. -12d1 PSITech -12d2 NVidia / SGS Thomson (Joint Venture) - 0008 NV1 - 0009 DAC64 - 0018 Riva128 - 1048 0c10 VICTORY Erazor - 107b 8030 STB Velocity 128 - 1092 0350 Viper V330 - 1092 1092 Viper V330 - 10b4 1b1b STB Velocity 128 - 10b4 1b1d STB Velocity 128 - 10b4 1b1e STB Velocity 128, PAL TV-Out - 10b4 1b20 STB Velocity 128 Sapphire - 10b4 1b21 STB Velocity 128 - 10b4 1b22 STB Velocity 128 AGP, NTSC TV-Out - 10b4 1b23 STB Velocity 128 AGP, PAL TV-Out - 10b4 1b27 STB Velocity 128 DVD - 10b4 1b88 MVP Pro 128 - 10b4 222a STB Velocity 128 AGP - 10b4 2230 STB Velocity 128 - 10b4 2232 STB Velocity 128 - 10b4 2235 STB Velocity 128 AGP - 2a15 54a3 3DVision-SAGP / 3DexPlorer 3000 - 0019 Riva128ZX - 0020 TNT - 0028 TNT2 - 0029 UTNT2 - 002c VTNT2 - 00a0 ITNT2 -12d3 Vingmed Sound A/S -12d4 Ulticom (Formerly DGM&S) - 0200 T1 Card -12d5 Equator Technologies Inc - 0003 BSP16 - 1000 BSP15 -12d6 Analogic Corp -12d7 Biotronic SRL -12d8 Pericom Semiconductor -12d9 Aculab PLC - 0002 PCI Prosody - 0004 cPCI Prosody - 0005 Aculab E1/T1 PCI card -12da True Time Inc. -12db Annapolis Micro Systems, Inc -12dc Symicron Computer Communication Ltd. -12dd Management Graphics -12de Rainbow Technologies - 0200 CryptoSwift CS200 -12df SBS Technologies Inc -12e0 Chase Research - 0010 ST16C654 Quad UART - 0020 ST16C654 Quad UART - 0030 ST16C654 Quad UART -12e1 Nintendo Co, Ltd -12e2 Datum Inc. Bancomm-Timing Division -12e3 Imation Corp - Medical Imaging Systems -12e4 Brooktrout Technology Inc -12e5 Apex Semiconductor Inc -12e6 Cirel Systems -12e7 Sunsgroup Corporation -12e8 Crisc Corp -12e9 GE Spacenet -12ea Zuken -12eb Aureal Semiconductor - 0001 Vortex 1 - 104d 8036 AU8820 Vortex Digital Audio Processor - 1092 2000 Sonic Impact A3D - 1092 2100 Sonic Impact A3D - 1092 2110 Sonic Impact A3D - 1092 2200 Sonic Impact A3D - 122d 1002 AU8820 Vortex Digital Audio Processor - 12eb 0001 AU8820 Vortex Digital Audio Processor - 5053 3355 Montego - 0002 Vortex 2 - 104d 8049 AU8830 Vortex 3D Digital Audio Processor - 104d 807b AU8830 Vortex 3D Digital Audio Processor - 1092 3000 Monster Sound II - 1092 3001 Monster Sound II - 1092 3002 Monster Sound II - 1092 3003 Monster Sound II - 1092 3004 Monster Sound II - 12eb 0001 AU8830 Vortex 3D Digital Audio Processor - 12eb 0002 AU8830 Vortex 3D Digital Audio Processor - 12eb 0088 AU8830 Vortex 3D Digital Audio Processor - 144d 3510 AU8830 Vortex 3D Digital Audio Processor - 5053 3356 Montego II - 0003 AU8810 Vortex Digital Audio Processor - 104d 8049 AU8810 Vortex Digital Audio Processor - 104d 8077 AU8810 Vortex Digital Audio Processor - 109f 1000 AU8810 Vortex Digital Audio Processor - 12eb 0003 AU8810 Vortex Digital Audio Processor - 1462 6780 AU8810 Vortex Digital Audio Processor - 14a4 2073 AU8810 Vortex Digital Audio Processor - 14a4 2091 AU8810 Vortex Digital Audio Processor - 14a4 2104 AU8810 Vortex Digital Audio Processor - 14a4 2106 AU8810 Vortex Digital Audio Processor - 8803 Vortex 56k Software Modem - 12eb 8803 Vortex 56k Software Modem -12ec 3A International, Inc. -12ed Optivision Inc. -12ee Orange Micro -12ef Vienna Systems -12f0 Pentek -12f1 Sorenson Vision Inc -12f2 Gammagraphx, Inc. -12f3 Radstone Technology -12f4 Megatel -12f5 Forks -12f6 Dawson France -12f7 Cognex -12f8 Electronic Design GmbH - 0002 VideoMaker -12f9 Four Fold Ltd -12fb Spectrum Signal Processing -12fc Capital Equipment Corp -12fd I2S -12fe ESD Electronic System Design GmbH -12ff Lexicon -1300 Harman International Industries Inc -1302 Computer Sciences Corp -1303 Innovative Integration -1304 Juniper Networks -1305 Netphone, Inc -1306 Duet Technologies -# Formerly ComputerBoards -1307 Measurement Computing - 0001 PCI-DAS1602/16 - 000b PCI-DIO48H - 000c PCI-PDISO8 - 000d PCI-PDISO16 - 000f PCI-DAS1200 - 0010 PCI-DAS1602/12 - 0014 PCI-DIO24H - 0015 PCI-DIO24H/CTR3 - 0016 PCI-DIO48H/CTR15 - 0017 PCI-DIO96H - 0018 PCI-CTR05 - 0019 PCI-DAS1200/JR - 001a PCI-DAS1001 - 001b PCI-DAS1002 - 001c PCI-DAS1602JR/16 - 001d PCI-DAS6402/16 - 001e PCI-DAS6402/12 - 001f PCI-DAS16/M1 - 0020 PCI-DDA02/12 - 0021 PCI-DDA04/12 - 0022 PCI-DDA08/12 - 0023 PCI-DDA02/16 - 0024 PCI-DDA04/16 - 0025 PCI-DDA08/16 - 0026 PCI-DAC04/12-HS - 0027 PCI-DAC04/16-HS - 0028 PCI-DIO24 - 0029 PCI-DAS08 - 002c PCI-INT32 - 0033 PCI-DUAL-AC5 - 0034 PCI-DAS-TC - 0035 PCI-DAS64/M1/16 - 0036 PCI-DAS64/M2/16 - 0037 PCI-DAS64/M3/16 - 004c PCI-DAS1000 - 004d PCI-QUAD04 - 0052 PCI-DAS4020/12 - 005e PCI-DAS6025 -1308 Jato Technologies Inc. - 0001 NetCelerator Adapter - 1308 0001 NetCelerator Adapter -1309 AB Semiconductor Ltd -130a Mitsubishi Electric Microcomputer -130b Colorgraphic Communications Corp -130c Ambex Technologies, Inc -130d Accelerix Inc -130e Yamatake-Honeywell Co. Ltd -130f Advanet Inc -1310 Gespac -1311 Videoserver, Inc -1312 Acuity Imaging, Inc -1313 Yaskawa Electric Co. -1316 Teradyne Inc -1317 Linksys - 0981 21x4x DEC-Tulip compatible 10/100 Ethernet - 0985 NC100 Network Everywhere Fast Ethernet 10/100 - 1985 21x4x DEC-Tulip compatible 10/100 Ethernet - 2850 HSP MicroModem 56 - 8201 ADMtek ADM8211 802.11b Wireless Interface - 10b8 2635 SMC2635W 802.11b (11Mbps) wireless lan pcmcia (cardbus) card - 1317 8201 SMC2635W 802.11b (11mbps) wireless lan pcmcia (cardbus) card - 8211 ADMtek ADM8211 802.11b Wireless Interface - 9511 21x4x DEC-Tulip compatible 10/100 Ethernet -1318 Packet Engines Inc. - 0911 GNIC-II PCI Gigabit Ethernet [Hamachi] -1319 Fortemedia, Inc - 0801 Xwave QS3000A [FM801] - 0802 Xwave QS3000A [FM801 game port] - 1000 FM801 PCI Audio - 1001 FM801 PCI Joystick -131a Finisar Corp. -131c Nippon Electro-Sensory Devices Corp -131d Sysmic, Inc. -131e Xinex Networks Inc -131f Siig Inc - 1000 CyberSerial (1-port) 16550 - 1001 CyberSerial (1-port) 16650 - 1002 CyberSerial (1-port) 16850 - 1010 Duet 1S(16550)+1P - 1011 Duet 1S(16650)+1P - 1012 Duet 1S(16850)+1P - 1020 CyberParallel (1-port) - 1021 CyberParallel (2-port) - 1030 CyberSerial (2-port) 16550 - 1031 CyberSerial (2-port) 16650 - 1032 CyberSerial (2-port) 16850 - 1034 Trio 2S(16550)+1P - 1035 Trio 2S(16650)+1P - 1036 Trio 2S(16850)+1P - 1050 CyberSerial (4-port) 16550 - 1051 CyberSerial (4-port) 16650 - 1052 CyberSerial (4-port) 16850 - 2000 CyberSerial (1-port) 16550 - 2001 CyberSerial (1-port) 16650 - 2002 CyberSerial (1-port) 16850 - 2010 Duet 1S(16550)+1P - 2011 Duet 1S(16650)+1P - 2012 Duet 1S(16850)+1P - 2020 CyberParallel (1-port) - 2021 CyberParallel (2-port) - 2030 CyberSerial (2-port) 16550 - 131f 2030 PCI Serial Card - 2031 CyberSerial (2-port) 16650 - 2032 CyberSerial (2-port) 16850 - 2040 Trio 1S(16550)+2P - 2041 Trio 1S(16650)+2P - 2042 Trio 1S(16850)+2P - 2050 CyberSerial (4-port) 16550 - 2051 CyberSerial (4-port) 16650 - 2052 CyberSerial (4-port) 16850 - 2060 Trio 2S(16550)+1P - 2061 Trio 2S(16650)+1P - 2062 Trio 2S(16850)+1P - 2081 CyberSerial (8-port) ST16654 -1320 Crypto AG -1321 Arcobel Graphics BV -1322 MTT Co., Ltd -1323 Dome Inc -1324 Sphere Communications -1325 Salix Technologies, Inc -1326 Seachange international -1327 Voss scientific -1328 quadrant international -1329 Productivity Enhancement -132a Microcom Inc. -132b Broadband Technologies -132c Micrel Inc -132d Integrated Silicon Solution, Inc. -1330 MMC Networks -1331 Radisys Corp. - 0030 ENP-2611 - 8200 82600 Host Bridge - 8201 82600 IDE - 8202 82600 USB - 8210 82600 PCI Bridge -1332 Micro Memory - 5415 MM-5415CN PCI Memory Module with Battery Backup - 5425 MM-5425CN PCI 64/66 Memory Module with Battery Backup -1334 Redcreek Communications, Inc -1335 Videomail, Inc -1337 Third Planet Publishing -1338 BT Electronics -133a Vtel Corp -133b Softcom Microsystems -133c Holontech Corp -133d SS Technologies -133e Virtual Computer Corp -133f SCM Microsystems -1340 Atalla Corp -1341 Kyoto Microcomputer Co -1342 Promax Systems Inc -1343 Phylon Communications Inc -1344 Crucial Technology -1345 Arescom Inc -1347 Odetics -1349 Sumitomo Electric Industries, Ltd. -134a DTC Technology Corp. - 0001 Domex 536 - 0002 Domex DMX3194UP SCSI Adapter -134b ARK Research Corp. -134c Chori Joho System Co. Ltd -134d PCTel Inc - 2189 HSP56 MicroModem - 2486 2304WT V.92 MDC Modem - 7890 HSP MicroModem 56 - 134d 0001 PCT789 adapter - 7891 HSP MicroModem 56 - 134d 0001 HSP MicroModem 56 - 7892 HSP MicroModem 56 - 7893 HSP MicroModem 56 - 7894 HSP MicroModem 56 - 7895 HSP MicroModem 56 - 7896 HSP MicroModem 56 - 7897 HSP MicroModem 56 -134e CSTI -134f Algo System Co Ltd -1350 Systec Co. Ltd -1351 Sonix Inc -1353 Thales Idatys - 0002 Proserver - 0003 PCI-FUT - 0004 PCI-S0 - 0005 PCI-FUT-S0 -1354 Dwave System Inc -1355 Kratos Analytical Ltd -1356 The Logical Co -1359 Prisa Networks -135a Brain Boxes -135b Giganet Inc -135c Quatech Inc - 0010 QSC-100 - 0020 DSC-100 - 0030 DSC-200/300 - 0040 QSC-200/300 - 0050 ESC-100D - 0060 ESC-100M - 00f0 MPAC-100 Syncronous Serial Card (Zilog 85230) - 0170 QSCLP-100 - 0180 DSCLP-100 - 0190 SSCLP-100 - 01a0 QSCLP-200/300 - 01b0 DSCLP-200/300 - 01c0 SSCLP-200/300 -135d ABB Network Partner AB -135e Sealevel Systems Inc - 5101 Route 56.PCI - Multi-Protocol Serial Interface (Zilog Z16C32) - 7101 Single Port RS-232/422/485/530 - 7201 Dual Port RS-232/422/485 Interface - 7202 Dual Port RS-232 Interface - 7401 Four Port RS-232 Interface - 7402 Four Port RS-422/485 Interface - 7801 Eight Port RS-232 Interface - 7804 Eight Port RS-232/422/485 Interface - 8001 8001 Digital I/O Adapter -135f I-Data International A-S -1360 Meinberg Funkuhren - 0101 PCI32 DCF77 Radio Clock - 0102 PCI509 DCF77 Radio Clock - 0103 PCI510 DCF77 Radio Clock - 0201 GPS167PCI GPS Receiver - 0202 GPS168PCI GPS Receiver - 0203 GPS169PCI GPS Receiver - 0301 TCR510PCI IRIG Receiver -1361 Soliton Systems K.K. -1362 Fujifacom Corporation -1363 Phoenix Technology Ltd -1364 ATM Communications Inc -1365 Hypercope GmbH -1366 Teijin Seiki Co. Ltd -1367 Hitachi Zosen Corporation -1368 Skyware Corporation -1369 Digigram -136a High Soft Tech -136b Kawasaki Steel Corporation - ff01 KL5A72002 Motion JPEG -136c Adtek System Science Co Ltd -136d Gigalabs Inc -136f Applied Magic Inc -1370 ATL Products -1371 CNet Technology Inc - 434e GigaCard Network Adapter - 1371 434e N-Way PCI-Bus Giga-Card 1000/100/10Mbps(L) -1373 Silicon Vision Inc -1374 Silicom Ltd -1375 Argosystems Inc -1376 LMC -1377 Electronic Equipment Production & Distribution GmbH -1378 Telemann Co. Ltd -1379 Asahi Kasei Microsystems Co Ltd -137a Mark of the Unicorn Inc - 0001 PCI-324 Audiowire Interface -137b PPT Vision -137c Iwatsu Electric Co Ltd -137d Dynachip Corporation -137e Patriot Scientific Corporation -137f Japan Satellite Systems Inc -1380 Sanritz Automation Co Ltd -1381 Brains Co. Ltd -1382 Marian - Electronic & Software - 0001 ARC88 audio recording card - 2008 Prodif 96 Pro sound system - 2088 Marc 8 Midi sound system - 20c8 Marc A sound system - 4008 Marc 2 sound system - 4010 Marc 2 Pro sound system - 4048 Marc 4 MIDI sound system - 4088 Marc 4 Digi sound system - 4248 Marc X sound system -1383 Controlnet Inc -1384 Reality Simulation Systems Inc -1385 Netgear -# Note: This lists as Atheros Communications, Inc. AR5212 802.11abg NIC because of Madwifi - 0013 WG311T - 311a GA511 Gigabit Ethernet - 4100 802.11b Wireless Adapter (MA301) - 4105 MA311 802.11b wireless adapter - 4400 WAG511 802.11a/b/g Dual Band Wireless PC Card - 4600 WAG511 802.11a/b/g Dual Band Wireless PC Card - 4601 WAG511 802.11a/b/g Dual Band Wireless PC Card - 4610 WAG511 802.11a/b/g Dual Band Wireless PC Card - 4a00 WAG311 802.11a/g Wireless PCI Adapter - 4c00 WG311v2 54 Mbps Wireless PCI Adapter - 620a GA620 Gigabit Ethernet - 622a GA622 - 630a GA630 Gigabit Ethernet - f004 FA310TX -1386 Video Domain Technologies -1387 Systran Corp -1388 Hitachi Information Technology Co Ltd -1389 Applicom International - 0001 PCI1500PFB [Intelligent fieldbus adaptor] -138a Fusion Micromedia Corp -138b Tokimec Inc -138c Silicon Reality -138d Future Techno Designs pte Ltd -138e Basler GmbH -138f Patapsco Designs Inc -1390 Concept Development Inc -1391 Development Concepts Inc -1392 Medialight Inc -1393 Moxa Technologies Co Ltd - 1040 Smartio C104H/PCI - 1141 Industrio CP-114 - 1680 Smartio C168H/PCI - 2040 Intellio CP-204J - 2180 Intellio C218 Turbo PCI - 3200 Intellio C320 Turbo PCI -1394 Level One Communications - 0001 LXT1001 Gigabit Ethernet - 1394 0001 NetCelerator Adapter -1395 Ambicom Inc -1396 Cipher Systems Inc -1397 Cologne Chip Designs GmbH - 2bd0 ISDN network controller [HFC-PCI] - 1397 2bd0 ISDN Board - e4bf 1000 CI1-1-Harp -1398 Clarion co. Ltd -1399 Rios systems Co Ltd -139a Alacritech Inc - 0001 Quad Port 10/100 Server Accelerator - 0003 Single Port 10/100 Server Accelerator - 0005 Single Port Gigabit Server Accelerator -139b Mediasonic Multimedia Systems Ltd -139c Quantum 3d Inc -139d EPL limited -139e Media4 -139f Aethra s.r.l. -13a0 Crystal Group Inc -13a1 Kawasaki Heavy Industries Ltd -13a2 Ositech Communications Inc -13a3 Hifn Inc. - 0005 7751 Security Processor - 0006 6500 Public Key Processor - 0007 7811 Security Processor - 0012 7951 Security Processor - 0014 78XX Security Processor - 0016 8065 Security Processor - 0017 8165 Security Processor - 0018 8154 Security Processor - 001d 7956 Security Processor - 0020 7955 Security Processor -13a4 Rascom Inc -13a5 Audio Digital Imaging Inc -13a6 Videonics Inc -13a7 Teles AG -13a8 Exar Corp. - 0154 XR17C154 Quad UART - 0158 XR17C158 Octal UART -13a9 Siemens Medical Systems, Ultrasound Group -13aa Broadband Networks Inc -13ab Arcom Control Systems Ltd -13ac Motion Media Technology Ltd -13ad Nexus Inc -13ae ALD Technology Ltd -13af T.Sqware -13b0 Maxspeed Corp -13b1 Tamura corporation -13b2 Techno Chips Co. Ltd -13b3 Lanart Corporation -13b4 Wellbean Co Inc -13b5 ARM -13b6 Dlog GmbH -13b7 Logic Devices Inc -13b8 Nokia Telecommunications oy -13b9 Elecom Co Ltd -13ba Oxford Instruments -13bb Sanyo Technosound Co Ltd -13bc Bitran Corporation -13bd Sharp corporation -13be Miroku Jyoho Service Co. Ltd -13bf Sharewave Inc -13c0 Microgate Corporation - 0010 SyncLink Adapter v1 - 0020 SyncLink SCC Adapter - 0030 SyncLink Multiport Adapter - 0210 SyncLink Adapter v2 -13c1 3ware Inc - 1000 3ware Inc 3ware 5xxx/6xxx-series PATA-RAID - 1001 3ware Inc 3ware 7xxx/8xxx-series PATA/SATA-RAID - 13c1 1001 3ware Inc 3ware 7xxx/8xxx-series PATA/SATA-RAID - 1002 3ware Inc 3ware 9xxx-series SATA-RAID -13c2 Technotrend Systemtechnik GmbH -13c3 Janz Computer AG -13c4 Phase Metrics -13c5 Alphi Technology Corp -13c6 Condor Engineering Inc - 0520 CEI-520 A429 Card - 0620 CEI-620 A429 Card - 0820 CEI-820 A429 Card -13c7 Blue Chip Technology Ltd -13c8 Apptech Inc -13c9 Eaton Corporation -13ca Iomega Corporation -13cb Yano Electric Co Ltd -13cc Metheus Corporation -13cd Compatible Systems Corporation -13ce Cocom A/S -13cf Studio Audio & Video Ltd -13d0 Techsan Electronics Co Ltd - 2103 B2C2 FlexCopII DVB chip / Technisat SkyStar2 DVB card - 2200 B2C2 FlexCopIII DVB chip / Technisat SkyStar2 DVB card -13d1 Abocom Systems Inc - ab02 ADMtek Centaur-C rev 17 [D-Link DFE-680TX] CardBus Fast Ethernet Adapter - ab03 21x4x DEC-Tulip compatible 10/100 Ethernet - ab06 RTL8139 [FE2000VX] CardBus Fast Ethernet Attached Port Adapter - ab08 21x4x DEC-Tulip compatible 10/100 Ethernet -13d2 Shark Multimedia Inc -13d3 IMC Networks -13d4 Graphics Microsystems Inc -13d5 Media 100 Inc -13d6 K.I. Technology Co Ltd -13d7 Toshiba Engineering Corporation -13d8 Phobos corporation -13d9 Apex PC Solutions Inc -13da Intresource Systems pte Ltd -13db Janich & Klass Computertechnik GmbH -13dc Netboost Corporation -13dd Multimedia Bundle Inc -13de ABB Robotics Products AB -13df E-Tech Inc - 0001 PCI56RVP Modem - 13df 0001 PCI56RVP Modem -13e0 GVC Corporation -13e1 Silicom Multimedia Systems Inc -13e2 Dynamics Research Corporation -13e3 Nest Inc -13e4 Calculex Inc -13e5 Telesoft Design Ltd -13e6 Argosy research Inc -13e7 NAC Incorporated -13e8 Chip Express Corporation -13e9 Intraserver Technology Inc -13ea Dallas Semiconductor -13eb Hauppauge Computer Works Inc -13ec Zydacron Inc -13ed Raytheion E-Systems -13ee Hayes Microcomputer Products Inc -13ef Coppercom Inc -13f0 Sundance Technology Inc - 0201 ST201 Sundance Ethernet -13f1 Oce' - Technologies B.V. -13f2 Ford Microelectronics Inc -13f3 Mcdata Corporation -13f4 Troika Networks, Inc. - 1401 Zentai Fibre Channel Adapter -13f5 Kansai Electric Co. Ltd -13f6 C-Media Electronics Inc - 0011 CMI8738 - 0100 CM8338A - 13f6 ffff CMI8338/C3DX PCI Audio Device - 0101 CM8338B - 13f6 0101 CMI8338-031 PCI Audio Device - 0111 CM8738 - 1019 0970 P6STP-FL motherboard - 1043 8035 CUSI-FX motherboard - 1043 8077 CMI8738 6-channel audio controller - 1043 80e2 CMI8738 6ch-MX - 13f6 0111 CMI8738/C3DX PCI Audio Device - 1681 a000 Gamesurround MUSE XL - 0211 CM8738 -13f7 Wildfire Communications -13f8 Ad Lib Multimedia Inc -13f9 NTT Advanced Technology Corp. -13fa Pentland Systems Ltd -13fb Aydin Corp -13fc Computer Peripherals International -13fd Micro Science Inc -13fe Advantech Co. Ltd - 1240 PCI-1240 4-channel stepper motor controller card w. Nova Electronics MCX314 - 1600 PCI-1612 4-port RS-232/422/485 PCI Communication Card - 1752 PCI-1752 - 1754 PCI-1754 - 1756 PCI-1756 -13ff Silicon Spice Inc -1400 Artx Inc - 1401 9432 TX -1401 CR-Systems A/S -1402 Meilhaus Electronic GmbH -1403 Ascor Inc -1404 Fundamental Software Inc -1405 Excalibur Systems Inc -1406 Oce' Printing Systems GmbH -1407 Lava Computer mfg Inc - 0100 Lava Dual Serial - 0101 Lava Quatro A - 0102 Lava Quatro B - 0110 Lava DSerial-PCI Port A - 0111 Lava DSerial-PCI Port B - 0120 Quattro-PCI A - 0121 Quattro-PCI B - 0180 Lava Octo A - 0181 Lava Octo B - 0200 Lava Port Plus - 0201 Lava Quad A - 0202 Lava Quad B - 0220 Lava Quattro PCI Ports A/B - 0221 Lava Quattro PCI Ports C/D - 0500 Lava Single Serial - 0600 Lava Port 650 - 8000 Lava Parallel - 8001 Dual parallel port controller A - 8002 Lava Dual Parallel port A - 8003 Lava Dual Parallel port B - 8800 BOCA Research IOPPAR -1408 Aloka Co. Ltd -1409 Timedia Technology Co Ltd - 7168 PCI2S550 (Dual 16550 UART) -140a DSP Research Inc -140b Ramix Inc -140c Elmic Systems Inc -140d Matsushita Electric Works Ltd -140e Goepel Electronic GmbH -140f Salient Systems Corp -1410 Midas lab Inc -1411 Ikos Systems Inc -# formerly IC Ensemble Inc. -1412 VIA Technologies Inc. - 1712 ICE1712 [Envy24] PCI Multi-Channel I/O Controller - 1412 1712 Hoontech ST Audio DSP 24 - 1412 d630 M-Audio Delta 1010 - 1412 d631 M-Audio Delta DiO - 1412 d632 M-Audio Delta 66 - 1412 d633 M-Audio Delta 44 - 1412 d634 M-Audio Delta Audiophile - 1412 d635 M-Audio Delta TDIF - 1412 d637 M-Audio Delta RBUS - 1412 d638 M-Audio Delta 410 - 1412 d63b M-Audio Delta 1010LT - 1412 d63c Digigram VX442 - 1416 1712 Hoontech ST Audio DSP 24 Media 7.1 - 153b 1115 EWS88 MT - 153b 1125 EWS88 MT (Master) - 153b 112b EWS88 D - 153b 112c EWS88 D (Master) - 153b 1130 EWX 24/96 - 153b 1138 DMX 6fire 24/96 - 153b 1151 PHASE88 - 16ce 1040 Edirol DA-2496 - 1724 VT1720/24 [Envy24PT/HT] PCI Multi-Channel Audio Controller - 1412 1724 AMP Ltd AUDIO2000 - 1412 3630 M-Audio Revolution 7.1 - 153b 1145 Aureon 7.1 Space - 153b 1147 Aureon 5.1 Sky - 153b 1153 Aureon 7.1 Universe - 270f f641 ZNF3-150 - 270f f645 ZNF3-250 -1413 Addonics -1414 Microsoft Corporation -1415 Oxford Semiconductor Ltd - 8403 VScom 011H-EP1 1 port parallel adaptor - 9501 OX16PCI954 (Quad 16950 UART) function 0 - 131f 2050 CyberPro (4-port) -# Model IO1085, Part No: JJ-P46012 - 131f 2051 CyberSerial 4S Plus - 15ed 2000 MCCR Serial p0-3 of 8 - 15ed 2001 MCCR Serial p0-3 of 16 - 950a EXSYS EX-41092 Dual 16950 Serial adapter - 950b OXCB950 Cardbus 16950 UART - 9510 OX16PCI954 (Quad 16950 UART) function 1 (Disabled) - 9511 OX16PCI954 (Quad 16950 UART) function 1 - 15ed 2000 MCCR Serial p4-7 of 8 - 15ed 2001 MCCR Serial p4-15 of 16 - 9521 OX16PCI952 (Dual 16950 UART) -1416 Multiwave Innovation pte Ltd -1417 Convergenet Technologies Inc -1418 Kyushu electronics systems Inc -1419 Excel Switching Corp -141a Apache Micro Peripherals Inc -141b Zoom Telephonics Inc -141d Digitan Systems Inc -141e Fanuc Ltd -141f Visiontech Ltd -1420 Psion Dacom plc - 8002 Gold Card NetGlobal 56k+10/100Mb CardBus (Ethernet part) - 8003 Gold Card NetGlobal 56k+10/100Mb CardBus (Modem part) -1421 Ads Technologies Inc -1422 Ygrec Systems Co Ltd -1423 Custom Technology Corp. -1424 Videoserver Connections -1425 Chelsio Communications Inc -1426 Storage Technology Corp. -1427 Better On-Line Solutions -1428 Edec Co Ltd -1429 Unex Technology Corp. -142a Kingmax Technology Inc -142b Radiolan -142c Minton Optic Industry Co Ltd -142d Pix stream Inc -142e Vitec Multimedia - 4020 VM2-2 [Video Maker 2] MPEG1/2 Encoder -142f Radicom Research Inc -1430 ITT Aerospace/Communications Division -1431 Gilat Satellite Networks -1432 Edimax Computer Co. - 9130 RTL81xx Fast Ethernet -1433 Eltec Elektronik GmbH -1435 Real Time Devices US Inc. -1436 CIS Technology Inc -1437 Nissin Inc Co -1438 Atmel-dream -1439 Outsource Engineering & Mfg. Inc -143a Stargate Solutions Inc -143b Canon Research Center, America -143c Amlogic Inc -143d Tamarack Microelectronics Inc -143e Jones Futurex Inc -143f Lightwell Co Ltd - Zax Division -1440 ALGOL Corp. -1441 AGIE Ltd -1442 Phoenix Contact GmbH & Co. -1443 Unibrain S.A. -1444 TRW -1445 Logical DO Ltd -1446 Graphin Co Ltd -1447 AIM GmBH -1448 Alesis Studio Electronics -1449 TUT Systems Inc -144a Adlink Technology - 7296 PCI-7296 - 7432 PCI-7432 - 7433 PCI-7433 - 7434 PCI-7434 - 7841 PCI-7841 - 8133 PCI-8133 - 8164 PCI-8164 - 8554 PCI-8554 - 9111 PCI-9111 - 9113 PCI-9113 - 9114 PCI-9114 -144b Loronix Information Systems Inc -144c Catalina Research Inc -144d Samsung Electronics Co Ltd -144e OLITEC -144f Askey Computer Corp. -1450 Octave Communications Ind. -1451 SP3D Chip Design GmBH -1453 MYCOM Inc -1454 Altiga Networks -1455 Logic Plus Plus Inc -1456 Advanced Hardware Architectures -1457 Nuera Communications Inc -1458 Giga-byte Technology - 0c11 K8NS Pro Mainboard -1459 DOOIN Electronics -145a Escalate Networks Inc -145b PRAIM SRL -145c Cryptek -145d Gallant Computer Inc -145e Aashima Technology B.V. -145f Baldor Electric Company - 0001 NextMove PCI -1460 DYNARC INC -1461 Avermedia Technologies Inc -1462 Micro-Star International Co., Ltd. -# MSI CB54G Wireless PC Card that seems to use the Broadcom 4306 Chipset - 6819 Broadcom Corporation BCM4306 802.11b/g Wireless LAN Controller [MSI CB54G] - 6825 PCI Card wireless 11g [PC54G] - 8725 NVIDIA NV25 [GeForce4 Ti 4600] VGA Adapter -# MSI G4Ti4800, 128MB DDR SDRAM, TV-Out, DVI-I - 9000 NVIDIA NV28 [GeForce4 Ti 4800] VGA Adapter - 9110 GeFORCE FX5200 - 9119 NVIDIA NV31 [GeForce FX 5600XT] VGA Adapter - 9591 nVidia Corporation NV36 [GeForce FX 5700LE] -1463 Fast Corporation -1464 Interactive Circuits & Systems Ltd -1465 GN NETTEST Telecom DIV. -1466 Designpro Inc. -1467 DIGICOM SPA -1468 AMBIT Microsystem Corp. -1469 Cleveland Motion Controls -146a IFR -146b Parascan Technologies Ltd -146c Ruby Tech Corp. - 1430 FE-1430TX Fast Ethernet PCI Adapter -146d Tachyon, INC. -146e Williams Electronics Games, Inc. -146f Multi Dimensional Consulting Inc -1470 Bay Networks -1471 Integrated Telecom Express Inc -1472 DAIKIN Industries, Ltd -1473 ZAPEX Technologies Inc -1474 Doug Carson & Associates -1475 PICAZO Communications -1476 MORTARA Instrument Inc -1477 Net Insight -1478 DIATREND Corporation -1479 TORAY Industries Inc -147a FORMOSA Industrial Computing -147b ABIT Computer Corp. -147c AWARE, Inc. -147d Interworks Computer Products -147e Matsushita Graphic Communication Systems, Inc. -147f NIHON UNISYS, Ltd. -1480 SCII Telecom -1481 BIOPAC Systems Inc -1482 ISYTEC - Integrierte Systemtechnik GmBH -1483 LABWAY Corporation -1484 Logic Corporation -1485 ERMA - Electronic GmBH -1486 L3 Communications Telemetry & Instrumentation -1487 MARQUETTE Medical Systems -1488 KONTRON Electronik GmBH -1489 KYE Systems Corporation -148a OPTO -148b INNOMEDIALOGIC Inc. -148c C.P. Technology Co. Ltd -148d DIGICOM Systems, Inc. - 1003 HCF 56k Data/Fax Modem -148e OSI Plus Corporation -148f Plant Equipment, Inc. -1490 Stone Microsystems PTY Ltd. -1491 ZEAL Corporation -1492 Time Logic Corporation -1493 MAKER Communications -1494 WINTOP Technology, Inc. -1495 TOKAI Communications Industry Co. Ltd -1496 JOYTECH Computer Co., Ltd. -1497 SMA Regelsysteme GmBH -1498 TEWS Datentechnik GmBH - 30c8 TPCI200 -1499 EMTEC CO., Ltd -149a ANDOR Technology Ltd -149b SEIKO Instruments Inc -149c OVISLINK Corp. -149d NEWTEK Inc - 0001 Video Toaster for PC -149e Mapletree Networks Inc. -149f LECTRON Co Ltd -14a0 SOFTING GmBH -14a1 Systembase Co Ltd -14a2 Millennium Engineering Inc -14a3 Maverick Networks -14a4 GVC/BCM Advanced Research -14a5 XIONICS Document Technologies Inc -14a6 INOVA Computers GmBH & Co KG -14a7 MYTHOS Systems Inc -14a8 FEATRON Technologies Corporation -14a9 HIVERTEC Inc -14aa Advanced MOS Technology Inc -14ab Mentor Graphics Corp. -14ac Novaweb Technologies Inc -14ad Time Space Radio AB -14ae CTI, Inc -14af Guillemot Corporation - 7102 3D Prophet II MX -14b0 BST Communication Technology Ltd -14b1 Nextcom K.K. -14b2 ENNOVATE Networks Inc -14b3 XPEED Inc - 0000 DSL NIC -14b4 PHILIPS Business Electronics B.V. -14b5 Creamware GmBH - 0200 Scope - 0300 Pulsar - 0400 PulsarSRB - 0600 Pulsar2 - 0800 DSP-Board - 0900 DSP-Board - 0a00 DSP-Board - 0b00 DSP-Board -14b6 Quantum Data Corp. -14b7 PROXIM Inc - 0001 Symphony 4110 -14b8 Techsoft Technology Co Ltd -14b9 AIRONET Wireless Communications - 0001 PC4800 - 0340 PC4800 - 0350 PC4800 - 4500 PC4500 - 4800 Cisco Aironet 340 802.11b Wireless LAN Adapter/Aironet PC4800 - a504 Cisco Aironet Wireless 802.11b - a505 Cisco Aironet CB20a 802.11a Wireless LAN Adapter - a506 Cisco Aironet Mini PCI b/g -14ba INTERNIX Inc. -14bb SEMTECH Corporation -14bc Globespan Semiconductor Inc. -14bd CARDIO Control N.V. -14be L3 Communications -14bf SPIDER Communications Inc. -14c0 COMPAL Electronics Inc -14c1 MYRICOM Inc. - 8043 Myrinet 2000 Scalable Cluster Interconnect -14c2 DTK Computer -14c3 MEDIATEK Corp. -14c4 IWASAKI Information Systems Co Ltd -14c5 Automation Products AB -14c6 Data Race Inc -14c7 Modular Technology Holdings Ltd -14c8 Turbocomm Tech. Inc. -14c9 ODIN Telesystems Inc -14ca PE Logic Corp. -14cb Billionton Systems Inc -14cc NAKAYO Telecommunications Inc -14cd Universal Scientific Ind. -14ce Whistle Communications -14cf TEK Microsystems Inc. -14d0 Ericsson Axe R & D -14d1 Computer Hi-Tech Co Ltd -14d2 Titan Electronics Inc - 8001 VScom 010L 1 port parallel adaptor - 8002 VScom 020L 2 port parallel adaptor - 8010 VScom 100L 1 port serial adaptor - 8011 VScom 110L 1 port serial and 1 port parallel adaptor - 8020 VScom 200L 1 port serial adaptor - 8021 VScom 210L 2 port serial and 1 port parallel adaptor - 8040 VScom 400L 4 port serial adaptor - 8080 VScom 800L 8 port serial adaptor - a000 VScom 010H 1 port parallel adaptor - a001 VScom 100H 1 port serial adaptor - a003 VScom 400H 4 port serial adaptor - a004 VScom 400HF1 4 port serial adaptor - a005 VScom 200H 2 port serial adaptor - e001 VScom 010HV2 1 port parallel adaptor - e010 VScom 100HV2 1 port serial adaptor - e020 VScom 200HV2 2 port serial adaptor -14d3 CIRTECH (UK) Ltd -14d4 Panacom Technology Corp -14d5 Nitsuko Corporation -14d6 Accusys Inc -14d7 Hirakawa Hewtech Corp -14d8 HOPF Elektronik GmBH -# Formerly SiPackets, Inc., formerly API NetWorks, Inc., formerly Alpha Processor, Inc. -14d9 Alliance Semiconductor Corporation - 0010 AP1011/SP1011 HyperTransport-PCI Bridge [Sturgeon] - 9000 AS90L10204/10208 HyperTransport to PCI-X Bridge -14da National Aerospace Laboratories -14db AFAVLAB Technology Inc - 2120 TK9902 -14dc Amplicon Liveline Ltd - 0000 PCI230 - 0001 PCI242 - 0002 PCI244 - 0003 PCI247 - 0004 PCI248 - 0005 PCI249 - 0006 PCI260 - 0007 PCI224 - 0008 PCI234 - 0009 PCI236 - 000a PCI272 - 000b PCI215 -14dd Boulder Design Labs Inc -14de Applied Integration Corporation -14df ASIC Communications Corp -14e1 INVERTEX -14e2 INFOLIBRIA -14e3 AMTELCO -14e4 Broadcom Corporation - 0800 Sentry5 Chipcommon I/O Controller - 0804 Sentry5 PCI Bridge - 0805 Sentry5 MIPS32 CPU - 0806 Sentry5 Ethernet Controller - 080b Sentry5 Crypto Accelerator - 080f Sentry5 DDR/SDR RAM Controller - 0811 Sentry5 External Interface Core - 0816 BCM3302 Sentry5 MIPS32 CPU - 1600 NetXtreme BCM5752 Gigabit Ethernet PCI Express - 1644 NetXtreme BCM5700 Gigabit Ethernet - 1014 0277 Broadcom Vigil B5700 1000Base-T - 1028 00d1 Broadcom BCM5700 - 1028 0106 Broadcom BCM5700 - 1028 0109 Broadcom BCM5700 1000Base-T - 1028 010a Broadcom BCM5700 1000BaseTX - 10b7 1000 3C996-T 1000Base-T - 10b7 1001 3C996B-T 1000Base-T - 10b7 1002 3C996C-T 1000Base-T - 10b7 1003 3C997-T 1000Base-T Dual Port - 10b7 1004 3C996-SX 1000Base-SX - 10b7 1005 3C997-SX 1000Base-SX Dual Port - 10b7 1008 3C942 Gigabit LOM (31X31) - 14e4 0002 NetXtreme 1000Base-SX - 14e4 0003 NetXtreme 1000Base-SX - 14e4 0004 NetXtreme 1000Base-T - 14e4 1028 NetXtreme 1000BaseTX - 14e4 1644 BCM5700 1000Base-T - 1645 NetXtreme BCM5701 Gigabit Ethernet - 0e11 007c NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T) - 0e11 007d NC6770 Gigabit Server Adapter (PCI-X, 1000-SX) - 0e11 0085 NC7780 Gigabit Server Adapter (embedded, WOL) - 0e11 0099 NC7780 Gigabit Server Adapter (embedded, WOL) - 0e11 009a NC7770 Gigabit Server Adapter (PCI-X, 10/100/1000-T) - 0e11 00c1 NC6770 Gigabit Server Adapter (PCI-X, 1000-SX) - 1028 0121 Broadcom BCM5701 1000Base-T - 103c 128a HP 1000Base-T (PCI) [A7061A] - 103c 128b HP 1000Base-SX (PCI) [A7073A] - 103c 12a4 HP Core Lan 1000Base-T - 103c 12c1 HP IOX Core Lan 1000Base-T [A7109AX] - 10a9 8010 SGI IO9 Gigabit Ethernet (Copper) - 10a9 8011 SGI Gigabit Ethernet (Copper) - 10a9 8012 SGI Gigabit Ethernet (Fiber) - 10b7 1004 3C996-SX 1000Base-SX - 10b7 1006 3C996B-T 1000Base-T - 10b7 1007 3C1000-T 1000Base-T - 10b7 1008 3C940-BR01 1000Base-T - 14e4 0001 BCM5701 1000Base-T - 14e4 0005 BCM5701 1000Base-T - 14e4 0006 BCM5701 1000Base-T - 14e4 0007 BCM5701 1000Base-SX - 14e4 0008 BCM5701 1000Base-T - 14e4 8008 BCM5701 1000Base-T - 1646 NetXtreme BCM5702 Gigabit Ethernet - 0e11 00bb NC7760 1000BaseTX - 1028 0126 Broadcom BCM5702 1000BaseTX - 14e4 8009 BCM5702 1000BaseTX - 1647 NetXtreme BCM5703 Gigabit Ethernet - 0e11 0099 NC7780 1000BaseTX - 0e11 009a NC7770 1000BaseTX - 10a9 8010 SGI IO9 Gigabit Ethernet (Copper) - 14e4 0009 BCM5703 1000BaseTX - 14e4 000a BCM5703 1000BaseSX - 14e4 000b BCM5703 1000BaseTX - 14e4 8009 BCM5703 1000BaseTX - 14e4 800a BCM5703 1000BaseTX - 1648 NetXtreme BCM5704 Gigabit Ethernet - 0e11 00cf NC7772 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 0e11 00d0 NC7782 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 0e11 00d1 NC7783 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 10b7 2000 3C998-T Dual Port 10/100/1000 PCI-X - 10b7 3000 3C999-T Quad Port 10/100/1000 PCI-X - 1166 1648 NetXtreme CIOB-E 1000Base-T - 164a NetXtreme II BCM5706 Gigabit Ethernet - 164d NetXtreme BCM5702FE Gigabit Ethernet - 1653 NetXtreme BCM5705 Gigabit Ethernet - 0e11 00e3 NC7761 Gigabit Server Adapter - 1654 NetXtreme BCM5705_2 Gigabit Ethernet - 0e11 00e3 NC7761 Gigabit Server Adapter - 103c 3100 NC1020 HP ProLiant Gigabit Server Adapter 32 PCI - 1659 NetXtreme BCM5721 Gigabit Ethernet PCI Express - 165d NetXtreme BCM5705M Gigabit Ethernet - 165e NetXtreme BCM5705M_2 Gigabit Ethernet - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 166e 570x 10/100 Integrated Controller - 1677 NetXtreme BCM5751 Gigabit Ethernet PCI Express - 1028 0179 Optiplex GX280 - 167d NetXtreme BCM5751M Gigabit Ethernet PCI Express - 167e NetXtreme BCM5751F Fast Ethernet PCI Express - 1696 NetXtreme BCM5782 Gigabit Ethernet - 103c 12bc HP d530 CMT (DG746A) - 14e4 000d NetXtreme BCM5782 1000Base-T - 169c NetXtreme BCM5788 Gigabit Ethernet - 169d NetLink BCM5789 Gigabit Ethernet PCI Express - 16a6 NetXtreme BCM5702X Gigabit Ethernet - 0e11 00bb NC7760 Gigabit Server Adapter (PCI-X, 10/100/1000-T) - 1028 0126 BCM5702 1000Base-T - 14e4 000c BCM5702 1000Base-T - 14e4 8009 BCM5702 1000Base-T - 16a7 NetXtreme BCM5703X Gigabit Ethernet - 0e11 00ca NC7771 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 0e11 00cb NC7781 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 14e4 0009 NetXtreme BCM5703 1000Base-T - 14e4 000a NetXtreme BCM5703 1000Base-SX - 14e4 000b NetXtreme BCM5703 1000Base-T - 14e4 800a NetXtreme BCM5703 1000Base-T - 16a8 NetXtreme BCM5704S Gigabit Ethernet - 10b7 2001 3C998-SX Dual Port 1000-SX PCI-X - 16aa NetXtreme II BCM5706S Gigabit Ethernet - 16c6 NetXtreme BCM5702A3 Gigabit Ethernet - 10b7 1100 3C1000B-T 10/100/1000 PCI - 14e4 000c BCM5702 1000Base-T - 14e4 8009 BCM5702 1000Base-T - 16c7 NetXtreme BCM5703 Gigabit Ethernet - 0e11 00ca NC7771 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 0e11 00cb NC7781 Gigabit Server Adapter (PCI-X, 10,100,1000-T) - 103c 12c3 HP Combo FC/GigE-SX [A9782A] - 103c 12ca HP Combo FC/GigE-T [A9784A] - 14e4 0009 NetXtreme BCM5703 1000Base-T - 14e4 000a NetXtreme BCM5703 1000Base-SX - 16dd NetLink BCM5781 Gigabit Ethernet PCI Express - 16f7 NetXtreme BCM5753 Gigabit Ethernet PCI Express - 16fd NetXtreme BCM5753M Gigabit Ethernet PCI Express - 16fe NetXtreme BCM5753F Fast Ethernet PCI Express - 170c BCM4401-B0 100Base-TX - 170d NetXtreme BCM5901 100Base-TX - 1014 0545 ThinkPad R40e (2684-HVG) builtin ethernet controller - 170e NetXtreme BCM5901 100Base-TX - 3352 BCM3352 - 3360 BCM3360 - 4210 BCM4210 iLine10 HomePNA 2.0 - 4211 BCM4211 iLine10 HomePNA 2.0 + V.90 56k modem - 4212 BCM4212 v.90 56k modem - 4301 BCM4303 802.11b Wireless LAN Controller - 1028 0407 TrueMobile 1180 Onboard WLAN - 1043 0120 WL-103b Wireless LAN PC Card - 4305 BCM4307 V.90 56k Modem - 4306 BCM4307 Ethernet Controller - 4307 BCM4307 802.11b Wireless LAN Controller - 4310 BCM4310 Chipcommon I/OController - 4312 BCM4310 UART - 4313 BCM4310 Ethernet Controller - 4315 BCM4310 USB Controller - 4320 BCM4306 802.11b/g Wireless LAN Controller - 1028 0001 TrueMobile 1300 WLAN Mini-PCI Card - 1028 0003 Wireless 1350 WLAN Mini-PCI Card - 1043 100f WL-100G - 14e4 4320 Linksys WMP54G PCI - 1737 4320 WPC54G - 1799 7010 Belkin F5D7010 54g Wireless Network card - 4321 BCM4306 802.11a Wireless LAN Controller - 4322 BCM4306 UART - 4324 BCM4309 802.11a/b/g - 1028 0001 Truemobile 1400 - 1028 0003 Truemobile 1450 MiniPCI - 4325 BCM43xG 802.11b/g - 1414 0003 Wireless Notebook Adapter MN-720 - 1414 0004 Wireless PCI Adapter MN-730 -# probably this is a correct ID... - 4326 BCM4307 Chipcommon I/O Controller? - 4401 BCM4401 100Base-T - 1043 80a8 A7V8X motherboard - 4402 BCM4402 Integrated 10/100BaseT - 4403 BCM4402 V.90 56k Modem - 4410 BCM4413 iLine32 HomePNA 2.0 - 4411 BCM4413 V.90 56k modem - 4412 BCM4412 10/100BaseT - 4430 BCM44xx CardBus iLine32 HomePNA 2.0 - 4432 BCM4432 CardBus 10/100BaseT - 4610 BCM4610 Sentry5 PCI to SB Bridge - 4611 BCM4610 Sentry5 iLine32 HomePNA 1.0 - 4612 BCM4610 Sentry5 V.90 56k Modem - 4613 BCM4610 Sentry5 Ethernet Controller - 4614 BCM4610 Sentry5 External Interface - 4615 BCM4610 Sentry5 USB Controller - 4704 BCM4704 PCI to SB Bridge - 4705 BCM4704 Sentry5 802.11b Wireless LAN Controller - 4706 BCM4704 Sentry5 Ethernet Controller - 4707 BCM4704 Sentry5 USB Controller - 4708 BCM4704 Crypto Accelerator - 4710 BCM4710 Sentry5 PCI to SB Bridge - 4711 BCM47xx Sentry5 iLine32 HomePNA 2.0 - 4712 BCM47xx V.92 56k modem - 4713 Sentry5 Ethernet Controller - 4714 BCM47xx Sentry5 External Interface - 4715 Sentry5 USB Controller - 4716 BCM47xx Sentry5 USB Host Controller - 4717 BCM47xx Sentry5 USB Device Controller - 4718 Sentry5 Crypto Accelerator - 4720 BCM4712 MIPS CPU - 5365 BCM5365P Sentry5 Host Bridge - 5600 BCM5600 StrataSwitch 24+2 Ethernet Switch Controller - 5605 BCM5605 StrataSwitch 24+2 Ethernet Switch Controller - 5615 BCM5615 StrataSwitch 24+2 Ethernet Switch Controller - 5625 BCM5625 StrataSwitch 24+2 Ethernet Switch Controller - 5645 BCM5645 StrataSwitch 24+2 Ethernet Switch Controller - 5670 BCM5670 8-Port 10GE Ethernet Switch Fabric - 5680 BCM5680 G-Switch 8 Port Gigabit Ethernet Switch Controller - 5690 BCM5690 12-port Multi-Layer Gigabit Ethernet Switch - 5691 BCM5691 GE/10GE 8+2 Gigabit Ethernet Switch Controller - 5820 BCM5820 Crypto Accelerator - 5821 BCM5821 Crypto Accelerator - 5822 BCM5822 Crypto Accelerator - 5823 BCM5823 Crypto Accelerator - 5824 BCM5824 Crypto Accelerator - 5840 BCM5840 Crypto Accelerator - 5841 BCM5841 Crypto Accelerator - 5850 BCM5850 Crypto Accelerator -14e5 Pixelfusion Ltd -14e6 SHINING Technology Inc -14e7 3CX -14e8 RAYCER Inc -14e9 GARNETS System CO Ltd -14ea Planex Communications, Inc - ab06 FNW-3603-TX CardBus Fast Ethernet - ab07 RTL81xx RealTek Ethernet -14eb SEIKO EPSON Corp -14ec ACQIRIS -14ed DATAKINETICS Ltd -14ee MASPRO KENKOH Corp -14ef CARRY Computer ENG. CO Ltd -14f0 CANON RESEACH CENTRE FRANCE -14f1 Conexant - 1002 HCF 56k Modem - 1003 HCF 56k Modem - 1004 HCF 56k Modem - 1005 HCF 56k Modem - 1006 HCF 56k Modem - 1022 HCF 56k Modem - 1023 HCF 56k Modem - 1024 HCF 56k Modem - 1025 HCF 56k Modem - 1026 HCF 56k Modem - 1032 HCF 56k Modem - 1033 HCF 56k Data/Fax Modem - 1033 8077 NEC - 122d 4027 Dell Zeus - MDP3880-W(B) Data Fax Modem - 122d 4030 Dell Mercury - MDP3880-U(B) Data Fax Modem - 122d 4034 Dell Thor - MDP3880-W(U) Data Fax Modem - 13e0 020d Dell Copper - 13e0 020e Dell Silver - 13e0 0261 IBM - 13e0 0290 Compaq Goldwing - 13e0 02a0 IBM - 13e0 02b0 IBM - 13e0 02c0 Compaq Scooter - 13e0 02d0 IBM - 144f 1500 IBM P85-DF (1) - 144f 1501 IBM P85-DF (2) - 144f 150a IBM P85-DF (3) - 144f 150b IBM P85-DF Low Profile (1) - 144f 1510 IBM P85-DF Low Profile (2) - 1034 HCF 56k Data/Fax/Voice Modem - 1035 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 10cf 1098 Fujitsu P85-DFSV - 1036 HCF 56k Data/Fax/Voice/Spkp Modem - 104d 8067 HCF 56k Modem - 122d 4029 MDP3880SP-W - 122d 4031 MDP3880SP-U - 13e0 0209 Dell Titanium - 13e0 020a Dell Graphite - 13e0 0260 Gateway Red Owl - 13e0 0270 Gateway White Horse - 1052 HCF 56k Data/Fax Modem (Worldwide) - 1053 HCF 56k Data/Fax Modem (Worldwide) - 1054 HCF 56k Data/Fax/Voice Modem (Worldwide) - 1055 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (Worldwide) - 1056 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide) - 1057 HCF 56k Data/Fax/Voice/Spkp Modem (Worldwide) - 1059 HCF 56k Data/Fax/Voice Modem (Worldwide) - 1063 HCF 56k Data/Fax Modem - 1064 HCF 56k Data/Fax/Voice Modem - 1065 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 1066 HCF 56k Data/Fax/Voice/Spkp Modem - 122d 4033 Dell Athena - MDP3900V-U - 1433 HCF 56k Data/Fax Modem - 1434 HCF 56k Data/Fax/Voice Modem - 1435 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 1436 HCF 56k Data/Fax Modem - 1453 HCF 56k Data/Fax Modem - 13e0 0240 IBM - 13e0 0250 IBM - 144f 1502 IBM P95-DF (1) - 144f 1503 IBM P95-DF (2) - 1454 HCF 56k Data/Fax/Voice Modem - 1455 HCF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 1456 HCF 56k Data/Fax/Voice/Spkp Modem - 122d 4035 Dell Europa - MDP3900V-W - 122d 4302 Dell MP3930V-W(C) MiniPCI - 1610 ADSL AccessRunner PCI Arbitration Device - 1611 AccessRunner PCI ADSL Interface Device - 1620 ADSL AccessRunner V2 PCI Arbitration Device - 1621 AccessRunner V2 PCI ADSL Interface Device - 1622 AccessRunner V2 PCI ADSL Yukon WAN Adapter - 1803 HCF 56k Modem - 0e11 0023 623-LAN Grizzly - 0e11 0043 623-LAN Yogi - 1815 HCF 56k Modem - 0e11 0022 Grizzly - 0e11 0042 Yogi - 2003 HSF 56k Data/Fax Modem - 2004 HSF 56k Data/Fax/Voice Modem - 2005 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 2006 HSF 56k Data/Fax/Voice/Spkp Modem - 2013 HSF 56k Data/Fax Modem - 0e11 b195 Bear - 0e11 b196 Seminole 1 - 0e11 b1be Seminole 2 - 1025 8013 Acer - 1033 809d NEC - 1033 80bc NEC - 155d 6793 HP - 155d 8850 E Machines - 2014 HSF 56k Data/Fax/Voice Modem - 2015 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem - 2016 HSF 56k Data/Fax/Voice/Spkp Modem - 2043 HSF 56k Data/Fax Modem (WorldW SmartDAA) - 2044 HSF 56k Data/Fax/Voice Modem (WorldW SmartDAA) - 2045 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (WorldW SmartDAA) - 2046 HSF 56k Data/Fax/Voice/Spkp Modem (WorldW SmartDAA) - 2063 HSF 56k Data/Fax Modem (SmartDAA) - 2064 HSF 56k Data/Fax/Voice Modem (SmartDAA) - 2065 HSF 56k Data/Fax/Voice/Spkp (w/Handset) Modem (SmartDAA) - 2066 HSF 56k Data/Fax/Voice/Spkp Modem (SmartDAA) - 2093 HSF 56k Modem - 155d 2f07 Legend - 2143 HSF 56k Data/Fax/Cell Modem (Mob WorldW SmartDAA) - 2144 HSF 56k Data/Fax/Voice/Cell Modem (Mob WorldW SmartDAA) - 2145 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob WorldW SmartDAA) - 2146 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob WorldW SmartDAA) - 2163 HSF 56k Data/Fax/Cell Modem (Mob SmartDAA) - 2164 HSF 56k Data/Fax/Voice/Cell Modem (Mob SmartDAA) - 2165 HSF 56k Data/Fax/Voice/Spkp (w/HS)/Cell Modem (Mob SmartDAA) - 2166 HSF 56k Data/Fax/Voice/Spkp/Cell Modem (Mob SmartDAA) - 2343 HSF 56k Data/Fax CardBus Modem (Mob WorldW SmartDAA) - 2344 HSF 56k Data/Fax/Voice CardBus Modem (Mob WorldW SmartDAA) - 2345 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob WorldW SmartDAA) - 2346 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob WorldW SmartDAA) - 2363 HSF 56k Data/Fax CardBus Modem (Mob SmartDAA) - 2364 HSF 56k Data/Fax/Voice CardBus Modem (Mob SmartDAA) - 2365 HSF 56k Data/Fax/Voice/Spkp (w/HS) CardBus Modem (Mob SmartDAA) - 2366 HSF 56k Data/Fax/Voice/Spkp CardBus Modem (Mob SmartDAA) - 2443 HSF 56k Data/Fax Modem (Mob WorldW SmartDAA) - 104d 8075 Modem - 104d 8083 Modem - 104d 8097 Modem - 2444 HSF 56k Data/Fax/Voice Modem (Mob WorldW SmartDAA) - 2445 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob WorldW SmartDAA) - 2446 HSF 56k Data/Fax/Voice/Spkp Modem (Mob WorldW SmartDAA) - 2463 HSF 56k Data/Fax Modem (Mob SmartDAA) - 2464 HSF 56k Data/Fax/Voice Modem (Mob SmartDAA) - 2465 HSF 56k Data/Fax/Voice/Spkp (w/HS) Modem (Mob SmartDAA) - 2466 HSF 56k Data/Fax/Voice/Spkp Modem (Mob SmartDAA) - 2f00 HSF 56k HSFi Modem - 13e0 8d84 IBM HSFi V.90 - 13e0 8d85 Compaq Stinger - 14f1 2004 Dynalink 56PMi - 2f02 HSF 56k HSFi Data/Fax - 2f11 HSF 56k HSFi Modem - 8234 RS8234 ATM SAR Controller [ServiceSAR Plus] - 8800 CX22702 DVB-T 2k/8k - 17de 08a1 XPert DVB-T PCI BDA DVBT 23880 Video Capture - 8802 CX23883 Broadcast Decoder - 17de 08a1 Xpert DVB-T PCI 2388x Transport Stream Capture -14f2 MOBILITY Electronics - 0120 EV1000 bridge - 0121 EV1000 Parallel port - 0122 EV1000 Serial port - 0123 EV1000 Keyboard controller - 0124 EV1000 Mouse controller -14f3 BroadLogic - 2030 2030 DVB-S Satellite Reciever - 2050 2050 DVB-T Terrestrial (Cable) Reciever - 2060 2060 ATSC Terrestrial (Cable) Reciever -14f4 TOKYO Electronic Industry CO Ltd -14f5 SOPAC Ltd -14f6 COYOTE Technologies LLC -14f7 WOLF Technology Inc -14f8 AUDIOCODES Inc - 2077 TP-240 dual span E1 VoIP PCI card -14f9 AG COMMUNICATIONS -14fa WANDEL & GOCHERMANN -14fb TRANSAS MARINE (UK) Ltd -14fc Quadrics Ltd - 0000 QsNet Elan3 Network Adapter - 0001 QsNetII Elan4 Network Adapter -14fd JAPAN Computer Industry Inc -14fe ARCHTEK TELECOM Corp -14ff TWINHEAD INTERNATIONAL Corp -1500 DELTA Electronics, Inc - 1360 RTL81xx RealTek Ethernet -1501 BANKSOFT CANADA Ltd -1502 MITSUBISHI ELECTRIC LOGISTICS SUPPORT Co Ltd -1503 KAWASAKI LSI USA Inc -1504 KAISER Electronics -1505 ITA INGENIEURBURO FUR TESTAUFGABEN GmbH -1506 CHAMELEON Systems Inc -# Should be HTEC Ltd, but there are no known HTEC chips and 1507 is already used by mistake by Motorola (see vendor ID 1057). -1507 Motorola ?? / HTEC - 0001 MPC105 [Eagle] - 0002 MPC106 [Grackle] - 0003 MPC8240 [Kahlua] - 0100 MC145575 [HFC-PCI] - 0431 KTI829c 100VG - 4801 Raven - 4802 Falcon - 4803 Hawk - 4806 CPX8216 -1508 HONDA CONNECTORS/MHOTRONICS Inc -1509 FIRST INTERNATIONAL Computer Inc -150a FORVUS RESEARCH Inc -150b YAMASHITA Systems Corp -150c KYOPAL CO Ltd -150d WARPSPPED Inc -150e C-PORT Corp -150f INTEC GmbH -1510 BEHAVIOR TECH Computer Corp -1511 CENTILLIUM Technology Corp -1512 ROSUN Technologies Inc -1513 Raychem -1514 TFL LAN Inc -1515 Advent design -1516 MYSON Technology Inc - 0800 MTD-8xx 100/10M Ethernet PCI Adapter - 0803 SURECOM EP-320X-S 100/10M Ethernet PCI Adapter - 1320 10bd SURECOM EP-320X-S 100/10M Ethernet PCI Adapter - 0891 MTD-8xx 100/10M Ethernet PCI Adapter -1517 ECHOTEK Corp -1518 PEP MODULAR Computers GmbH -1519 TELEFON AKTIEBOLAGET LM Ericsson -151a Globetek - 1002 PCI-1002 - 1004 PCI-1004 - 1008 PCI-1008 -151b COMBOX Ltd -151c DIGITAL AUDIO LABS Inc - 0003 Prodif T 2496 - 4000 Prodif 88 -151d Fujitsu Computer Products Of America -151e MATRIX Corp -151f TOPIC SEMICONDUCTOR Corp - 0000 TP560 Data/Fax/Voice 56k modem -1520 CHAPLET System Inc -1521 BELL Corp -1522 MainPine Ltd - 0100 PCI <-> IOBus Bridge - 1522 0200 RockForceDUO 2 Port V.92/V.44 Data/Fax/Voice Modem - 1522 0300 RockForceQUATRO 4 Port V.92/V.44 Data/Fax/Voice Modem - 1522 0400 RockForceDUO+ 2 Port V.92/V.44 Data/Fax/Voice Modem - 1522 0500 RockForceQUATRO+ 4 Port V.92/V.44 Data/Fax/Voice Modem - 1522 0600 RockForce+ 2 Port V.90 Data/Fax/Voice Modem - 1522 0700 RockForce+ 4 Port V.90 Data/Fax/Voice Modem - 1522 0800 RockForceOCTO+ 8 Port V.92/V.44 Data/Fax/Voice Modem - 1522 0c00 RockForceDUO+ 2 Port V.92/V.44 Data, V.34 Super-G3 Fax, Voice Modem - 1522 0d00 RockForceQUATRO+ 4 Port V.92/V.44 Data, V.34 Super-G3 Fax, Voice Modem -# this is a correction to a recent entry. 1522:0E00 should be 1522:1D00 - 1522 1d00 RockForceOCTO+ 8 Port V.92/V.44 Data, V.34 Super-G3 Fax, Voice Modem -1523 MUSIC Semiconductors -1524 ENE Technology Inc - 0510 CB710 Memory Card Reader Controller - 0610 PCI Smart Card Reader Controller - 1211 CB1211 Cardbus Controller - 1225 CB1225 Cardbus Controller - 1410 CB1410 Cardbus Controller - 1025 005a TravelMate 290 - 1411 CB-710/2/4 Cardbus Controller - 1412 CB-712/4 Cardbus Controller - 1420 CB1420 Cardbus Controller - 1421 CB-720/2/4 Cardbus Controller - 1422 CB-722/4 Cardbus Controller -1525 IMPACT Technologies -1526 ISS, Inc -1527 SOLECTRON -1528 ACKSYS -1529 AMERICAN MICROSystems Inc -152a QUICKTURN DESIGN Systems -152b FLYTECH Technology CO Ltd -152c MACRAIGOR Systems LLC -152d QUANTA Computer Inc -152e MELEC Inc -152f PHILIPS - CRYPTO -1530 ACQIS Technology Inc -1531 CHRYON Corp -1532 ECHELON Corp -1533 BALTIMORE -1534 ROAD Corp -1535 EVERGREEN Technologies Inc -1537 DATALEX COMMUNCATIONS -1538 ARALION Inc - 0303 ARS106S Ultra ATA 133/100/66 Host Controller -1539 ATELIER INFORMATIQUES et ELECTRONIQUE ETUDES S.A. -153a ONO SOKKI -153b TERRATEC Electronic GmbH - 1144 Aureon 5.1 -# Terratec seems to use several IDs for the same card. - 1147 Aureon 5.1 Sky - 1158 Philips Semiconductors SAA7134 (rev 01) [Terratec Cinergy 600 TV] -153c ANTAL Electronic -153d FILANET Corp -153e TECHWELL Inc -153f MIPS DENMARK -1540 PROVIDEO MULTIMEDIA Co Ltd -1541 MACHONE Communications -1542 VIVID Technology Inc -1543 SILICON Laboratories - 3052 Intel 537 [Winmodem] - 4c22 Si3036 MC'97 DAA -1544 DCM DATA Systems -1545 VISIONTEK -1546 IOI Technology Corp -1547 MITUTOYO Corp -1548 JET PROPULSION Laboratory -1549 INTERCONNECT Systems Solutions -154a MAX Technologies Inc -154b COMPUTEX Co Ltd -154c VISUAL Technology Inc -154d PAN INTERNATIONAL Industrial Corp -154e SERVOTEST Ltd -154f STRATABEAM Technology -1550 OPEN NETWORK Co Ltd -1551 SMART Electronic DEVELOPMENT GmBH -1552 RACAL AIRTECH Ltd -1553 CHICONY Electronics Co Ltd -1554 PROLINK Microsystems Corp -1555 GESYTEC GmBH -1556 PLD APPLICATIONS -1557 MEDIASTAR Co Ltd -1558 CLEVO/KAPOK Computer -1559 SI LOGIC Ltd -155a INNOMEDIA Inc -155b PROTAC INTERNATIONAL Corp -155c Cemax-Icon Inc -155d Mac System Co Ltd -155e LP Elektronik GmbH -155f Perle Systems Ltd -1560 Terayon Communications Systems -1561 Viewgraphics Inc -1562 Symbol Technologies -1563 A-Trend Technology Co Ltd -1564 Yamakatsu Electronics Industry Co Ltd -1565 Biostar Microtech Int'l Corp -1566 Ardent Technologies Inc -1567 Jungsoft -1568 DDK Electronics Inc -1569 Palit Microsystems Inc. -156a Avtec Systems -156b 2wire Inc -156c Vidac Electronics GmbH -156d Alpha-Top Corp -156e Alfa Inc -156f M-Systems Flash Disk Pioneers Ltd -1570 Lecroy Corp -1571 Contemporary Controls - a001 CCSI PCI20-485 ARCnet - a002 CCSI PCI20-485D ARCnet - a003 CCSI PCI20-485X ARCnet - a004 CCSI PCI20-CXB ARCnet - a005 CCSI PCI20-CXS ARCnet - a006 CCSI PCI20-FOG-SMA ARCnet - a007 CCSI PCI20-FOG-ST ARCnet - a008 CCSI PCI20-TB5 ARCnet - a009 CCSI PCI20-5-485 5Mbit ARCnet - a00a CCSI PCI20-5-485D 5Mbit ARCnet - a00b CCSI PCI20-5-485X 5Mbit ARCnet - a00c CCSI PCI20-5-FOG-ST 5Mbit ARCnet - a00d CCSI PCI20-5-FOG-SMA 5Mbit ARCnet - a201 CCSI PCI22-485 10Mbit ARCnet - a202 CCSI PCI22-485D 10Mbit ARCnet - a203 CCSI PCI22-485X 10Mbit ARCnet - a204 CCSI PCI22-CHB 10Mbit ARCnet - a205 CCSI PCI22-FOG_ST 10Mbit ARCnet - a206 CCSI PCI22-THB 10Mbit ARCnet -1572 Otis Elevator Company -1573 Lattice - Vantis -1574 Fairchild Semiconductor -1575 Voltaire Advanced Data Security Ltd -1576 Viewcast COM -1578 HITT - 5615 VPMK3 [Video Processor Mk III] -1579 Dual Technology Corp -157a Japan Elecronics Ind Inc -157b Star Multimedia Corp -157c Eurosoft (UK) - 8001 Fix2000 PCI Y2K Compliance Card -157d Gemflex Networks -157e Transition Networks -157f PX Instruments Technology Ltd -1580 Primex Aerospace Co -1581 SEH Computertechnik GmbH -1582 Cytec Corp -1583 Inet Technologies Inc -1584 Uniwill Computer Corp -1585 Logitron -1586 Lancast Inc -1587 Konica Corp -1588 Solidum Systems Corp -1589 Atlantek Microsystems Pty Ltd -158a Digalog Systems Inc -158b Allied Data Technologies -158c Hitachi Semiconductor & Devices Sales Co Ltd -158d Point Multimedia Systems -158e Lara Technology Inc -158f Ditect Coop -1590 3pardata Inc -1591 ARN -1592 Syba Tech Ltd - 0781 Multi-IO Card - 0782 Parallel Port Card 2xEPP - 0783 Multi-IO Card - 0785 Multi-IO Card - 0786 Multi-IO Card - 0787 Multi-IO Card - 0788 Multi-IO Card - 078a Multi-IO Card -1593 Bops Inc -1594 Netgame Ltd -1595 Diva Systems Corp -1596 Folsom Research Inc -1597 Memec Design Services -1598 Granite Microsystems -1599 Delta Electronics Inc -159a General Instrument -159b Faraday Technology Corp -159c Stratus Computer Systems -159d Ningbo Harrison Electronics Co Ltd -159e A-Max Technology Co Ltd -159f Galea Network Security -15a0 Compumaster SRL -15a1 Geocast Network Systems -15a2 Catalyst Enterprises Inc - 0001 TA700 PCI Bus Analyzer/Exerciser -15a3 Italtel -15a4 X-Net OY -15a5 Toyota Macs Inc -15a6 Sunlight Ultrasound Technologies Ltd -15a7 SSE Telecom Inc -15a8 Shanghai Communications Technologies Center -15aa Moreton Bay -15ab Bluesteel Networks Inc -15ac North Atlantic Instruments -15ad VMware Inc - 0405 [VMware SVGA II] PCI Display Adapter - 0710 Virtual SVGA - 0720 VMware High-Speed Virtual NIC [vmxnet] -15ae Amersham Pharmacia Biotech -15b0 Zoltrix International Ltd -15b1 Source Technology Inc -15b2 Mosaid Technologies Inc -15b3 Mellanox Technologies - 5274 MT21108 InfiniBridge - 5a44 MT23108 InfiniHost - 5a45 MT23108 [Infinihost HCA Flash Recovery] - 5a46 MT23108 PCI Bridge - 5e8c MT24204 [InfiniHost III Lx HCA] - 5e8d MT24204 [InfiniHost III Lx HCA Flash Recovery] - 6278 MT25208 InfiniHost III Ex (Tavor compatibility mode) - 6279 MT25208 [InfiniHost III Ex HCA Flash Recovery] - 6282 MT25208 InfiniHost III Ex -15b4 CCI/TRIAD -15b5 Cimetrics Inc -15b6 Texas Memory Systems Inc -15b7 Sandisk Corp -15b8 ADDI-DATA GmbH -15b9 Maestro Digital Communications -15ba Impacct Technology Corp -15bb Portwell Inc -15bc Agilent Technologies - 2922 64 Bit, 133MHz PCI-X Exerciser & Protocol Checker - 2928 64 Bit, 66MHz PCI Exerciser & Analyzer - 2929 64 Bit, 133MHz PCI-X Analyzer & Exerciser -15bd DFI Inc -15be Sola Electronics -15bf High Tech Computer Corp (HTC) -15c0 BVM Ltd -15c1 Quantel -15c2 Newer Technology Inc -15c3 Taiwan Mycomp Co Ltd -15c4 EVSX Inc -15c5 Procomp Informatics Ltd - 8010 1394b - 1394 Firewire 3-Port Host Adapter Card -15c6 Technical University of Budapest -15c7 Tateyama System Laboratory Co Ltd - 0349 Tateyama C-PCI PLC/NC card Rev.01A -15c8 Penta Media Co Ltd -15c9 Serome Technology Inc -15ca Bitboys OY -15cb AG Electronics Ltd -15cc Hotrail Inc -15cd Dreamtech Co Ltd -15ce Genrad Inc -15cf Hilscher GmbH -15d1 Infineon Technologies AG -15d2 FIC (First International Computer Inc) -15d3 NDS Technologies Israel Ltd -15d4 Iwill Corp -15d5 Tatung Co -15d6 Entridia Corp -15d7 Rockwell-Collins Inc -15d8 Cybernetics Technology Co Ltd -15d9 Super Micro Computer Inc -15da Cyberfirm Inc -15db Applied Computing Systems Inc -15dc Litronic Inc - 0001 Argus 300 PCI Cryptography Module -15dd Sigmatel Inc -15de Malleable Technologies Inc -15df Infinilink Corp -15e0 Cacheflow Inc -15e1 Voice Technologies Group Inc -15e2 Quicknet Technologies Inc -15e3 Networth Technologies Inc -15e4 VSN Systemen BV -15e5 Valley technologies Inc -15e6 Agere Inc -15e7 Get Engineering Corp -15e8 National Datacomm Corp - 0130 Wireless PCI Card -15e9 Pacific Digital Corp - 1841 ADMA-100 DiscStaQ ATA Controller -15ea Tokyo Denshi Sekei K.K. -15eb Drsearch GmbH -15ec Beckhoff GmbH - 3101 FC3101 Profibus DP 1 Channel PCI - 5102 FC5102 -15ed Macrolink Inc -15ee In Win Development Inc -15ef Intelligent Paradigm Inc -15f0 B-Tree Systems Inc -15f1 Times N Systems Inc -15f2 Diagnostic Instruments Inc -15f3 Digitmedia Corp -15f4 Valuesoft -15f5 Power Micro Research -15f6 Extreme Packet Device Inc -15f7 Banctec -15f8 Koga Electronics Co -15f9 Zenith Electronics Corp -15fa J.P. Axzam Corp -15fb Zilog Inc -15fc Techsan Electronics Co Ltd -15fd N-CUBED.NET -15fe Kinpo Electronics Inc -15ff Fastpoint Technologies Inc -1600 Northrop Grumman - Canada Ltd -1601 Tenta Technology -1602 Prosys-tec Inc -1603 Nokia Wireless Communications -1604 Central System Research Co Ltd -1605 Pairgain Technologies -1606 Europop AG -1607 Lava Semiconductor Manufacturing Inc -1608 Automated Wagering International -1609 Scimetric Instruments Inc -1612 Telesynergy Research Inc. -1619 FarSite Communications Ltd - 0400 FarSync T2P (2 port X.21/V.35/V.24) - 0440 FarSync T4P (4 port X.21/V.35/V.24) -# www.rioworks.com -161f Rioworks -1626 TDK Semiconductor Corp. - 8410 RTL81xx Fast Ethernet -1629 Kongsberg Spacetec AS - 1003 Format synchronizer v3.0 - 2002 Fast Universal Data Output -# This seems to occur on their 802.11b Wireless card WMP-11 -1637 Linksys - 3874 Linksys 802.11b WMP11 PCI Wireless card -1638 Standard Microsystems Corp [SMC] - 1100 SMC2602W EZConnect / Addtron AWA-100 / Eumitcom PCI WL11000 -163c Smart Link Ltd. - 3052 SmartLink SmartPCI562 56K Modem - 5449 SmartPCI561 Modem -1657 Brocade Communications Systems, Inc. -165a Epix Inc - c100 PIXCI(R) CL1 Camera Link Video Capture Board [custom QL5232] - d200 PIXCI(R) D2X Digital Video Capture Board [custom QL5232] - d300 PIXCI(R) D3X Digital Video Capture Board [custom QL5232] -165d Hsing Tech. Enterprise Co., Ltd. -1661 Worldspace Corp. -1668 Actiontec Electronics Inc - 0100 Mini-PCI bridge -# Formerly SiByte, Inc. -166d Broadcom Corporation - 0001 SiByte BCM1125/1125H/1250 System-on-a-Chip PCI - 0002 SiByte BCM1125H/1250 System-on-a-Chip HyperTransport -1677 Bernecker + Rainer - 104e 5LS172.6 B&R Dual CAN Interface Card - 12d7 5LS172.61 B&R Dual CAN Interface Card -167b ZyDAS Technology Corp. - 2102 ZyDAS ZD1202 - 187e 3406 ZyAIR B-122 CardBus 11Mbs Wireless LAN Card -1681 Hercules -# More specs, more accurate desc. - 0010 Hercules 3d Prophet II Ultra 64MB [ 350 MHz NV15BR core, 128-bit DDR @ 460 MHz, 1.5v AGP4x ] -1682 XFX Pine Group Inc. -1688 CastleNet Technology Inc. - 1170 WLAN 802.11b card -168c Atheros Communications, Inc. - 0007 AR5000 802.11a Wireless Adapter - 0011 AR5210 802.11a NIC - 0012 AR5211 802.11ab NIC - 0013 AR5212 802.11abg NIC - 1113 d301 Philips CPWNA100 Wireless CardBus adapter - 1186 3202 D-link DWL-G650 B3 Wireless cardbus adapter - 1186 3203 DWL-G520 Wireless PCI Adapter - 1186 3a13 DWL-G520 Wireless PCI Adapter rev. B - 1186 3a94 C54C Wireless 801.11g cardbus - 1385 4d00 Netgear WG311T Wireless PCI Adapter - 14b7 0a60 8482-WD ORiNOCO 11a/b/g Wireless PCI Adapter - 168c 0013 WG511T Wireless CardBus Adapter - 168c 1025 DWL-G650B2 Wireless CardBus Adapter - 168c 1027 Netgate NL-3054CB ARIES b/g CardBus Adapter - 168c 2026 Netgate 5354MP ARIES a(108Mb turbo)/b/g MiniPCI Adapter - 168c 2041 Netgate 5354MP Plus ARIES2 b/g MiniPCI Adapter - 168c 2042 Netgate 5354MP Plus ARIES2 a/b/g MiniPCI Adapter - 1014 AR5212 802.11abg NIC -169c Netcell Corporation - 0044 SyncRAID SR3000/5000 Series SATA RAID Controllers -16a5 Tekram Technology Co.,Ltd. -16ab Global Sun Technology Inc - 1100 GL24110P - 1101 PLX9052 PCMCIA-to-PCI Wireless LAN - 1102 PCMCIA-to-PCI Wireless Network Bridge - 8501 WL-8305 Wireless LAN PCI Adapter -16ae Safenet Inc - 1141 SafeXcel-1141 -16b4 Aspex Semiconductor Ltd -16be Creatix Polymedia GmbH -16ca CENATEK Inc - 0001 Rocket Drive DL -16cd Densitron Technologies -16ce Roland Corp. -# www.pikatechnologies.com -16df PIKA Technologies Inc. -16e3 European Space Agency - 1e0f LEON2FT Processor -16ec U.S. Robotics - 00ff USR997900 10/100 Mbps PCI Network Card - 0116 USR997902 10/100/1000 Mbps PCI Network Card - 3685 Wireless Access PCI Adapter Model 022415 -16ed Sycron N. V. - 1001 UMIO communication card -16f3 Jetway Information Co., Ltd. -16f4 Vweb Corp - 8000 VW2010 -16f6 VideoTele.com, Inc. -# www.internetmachines.com -1702 Internet Machines Corporation (IMC) -1705 Digital First, Inc. -170b NetOctave - 0100 NSP2000-SSL crypto accelerator -170c YottaYotta Inc. -# Seems to be a 2nd ID for Vitesse Semiconductor -1725 Vitesse Semiconductor - 7174 VSC7174 PCI/PCI-X Serial ATA Host Bus Controller -172a Accelerated Encryption -1734 Fujitsu Siemens Computer GmbH -1737 Linksys - 0013 WMP54G Wireless Pci Card - 0015 WMP54GS Wireless Pci Card - 1032 Gigabit Network Adapter - 1737 0015 EG1032 v2 Instant Gigabit Network Adapter - 1064 Gigabit Network Adapter - 1737 0016 EG1064 v2 Instant Gigabit Network Adapter - ab08 21x4x DEC-Tulip compatible 10/100 Ethernet - ab09 21x4x DEC-Tulip compatible 10/100 Ethernet -173b Altima (nee Broadcom) - 03e8 AC1000 Gigabit Ethernet - 03e9 AC1001 Gigabit Ethernet - 03ea AC9100 Gigabit Ethernet - 173b 0001 AC1002 - 03eb AC1003 Gigabit Ethernet -1743 Peppercon AG - 8139 ROL/F-100 Fast Ethernet Adapter with ROL -1749 RLX Technologies -174b PC Partner Limited -174d WellX Telecom SA -175c AudioScience Inc -175e Sanera Systems, Inc. -1787 Hightech Information System Ltd. -# also used by Struck Innovative Systeme for joint developments -1796 Research Centre Juelich - 0001 SIS1100 [Gigabit link] - 0002 HOTlink - 0003 Counter Timer - 0004 CAMAC Controller - 0005 PROFIBUS - 0006 AMCC HOTlink -1797 JumpTec h, GMBH -1799 Belkin - 6001 Wireless PCI Card - F5D6001 - 6020 Wireless PCMCIA Card - F5D6020 - 6060 Wireless PDA Card - F5D6060 - 7000 Wireless PCI Card - F5D7000 -17a0 Genesys Logic, Inc - 8033 GL880S USB 1.1 controller - 8034 GL880S USB 2.0 controller -17af Hightech Information System Ltd. -17b3 Hawking Technologies - ab08 PN672TX 10/100 Ethernet -17b4 Indra Networks, Inc. - 0011 WebEnhance 100 GZIP Compression Card -17c0 Wistron Corp. -17c2 Newisys, Inc. -17cc NetChip Technology, Inc - 2280 USB 2.0 -17d3 Areca Technology Corp. - 1110 ARC-1110 4-Port PCI-X to SATA RAID Controller - 1120 ARC-1120 8-Port PCI-X to SATA RAID Controller - 1130 ARC-1130 12-Port PCI-X to SATA RAID Controller - 1160 ARC-1160 16-Port PCI-X to SATA RAID Controller - 1210 ARC-1210 4-Port PCI-Express to SATA RAID Controller - 1220 ARC-1220 8-Port PCI-Express to SATA RAID Controller - 1230 ARC-1230 12-Port PCI-Express to SATA RAID Controller - 1260 ARC-1260 16-Port PCI-Express to SATA RAID Controller -# S2io ships 10Gb PCI-X Ethernet adapters www.s2io.com -17d5 S2io Inc. - 5831 Xframe 10 Gigabit Ethernet PCI-X - 103c 12d5 HP PCI-X 133MHz 10GbE SR Fiber [AB287A] -17de KWorld Computer Co. Ltd. -# http://www.connect3d.com -17ee Connect Components Ltd -17fe Linksys, A Division of Cisco Systems - 2120 WMP11v4 802.11b PCI card - 2220 [AirConn] INPROCOMM IPN 2220 Wireless LAN Adapter (rev 01) -1813 Ambient Technologies Inc - 4000 HaM controllerless modem - 16be 0001 V9x HAM Data Fax Modem - 4100 HaM plus Data Fax Modem - 16be 0002 V9x HAM 1394 -1814 RaLink - 0101 Wireless PCI Adpator RT2400 / RT2460 - 3306 1113 Quidway WL100M - 0201 Ralink RT2500 802.11 Cardbus Reference Card - 1371 001e CWC-854 Wireless-G CardBus Adapter - 1371 001f CWM-854 Wireless-G Mini PCI Adapter - 1371 0020 CWP-854 Wireless-G PCI Adapter - 1458 e381 GN-WMKG 802.11b/g Wireless CardBus Adapter -1820 InfiniCon Systems Inc. -1822 Twinhan Technology Co. Ltd -182d SiteCom Europe BV -# HFC-based ISDN card - 3069 ISDN PCI DC-105V2 - 9790 WL-121 Wireless Network Adapter 100g+ [Ver.3] -1830 Credence Systems Corporation -183b MikroM GmbH - 08a7 MVC100 DVI - 08a8 MVC101 SDI - 08a9 MVC102 DVI+Audio -1849 ASRock Incorporation -1851 Microtune, Inc. -1852 Anritsu Corp. -185f Wistron NeWeb Corp. -1867 Topspin Communications - 5a44 MT23108 PCI-X HCA - 5a45 MT23108 PCI-X HCA flash recovery - 5a46 MT23108 PCI-X HCA bridge - 6278 MT25208 InfiniHost III Ex (Tavor compatibility mode) - 6282 MT25208 InfiniHost III Ex -187e ZyXEL Communication Corporation -1888 Varisys Ltd - 0301 VMFX1 FPGA PMC module - 0601 VSM2 dual PMC carrier - 0710 VS14x series PowerPC PCI board - 0720 VS24x series PowerPC PCI board -# found e.g. on KNC DVB-S card -1894 KNC One -1896 B&B Electronics Manufacturing Company, Inc. -18a1 Astute Networks Inc. -18ac DViCO Corporation - d810 FusionHDTV 3 Gold -18b8 Ammasso - b001 AMSO 1100 iWARP/RDMA Gigabit Ethernet Coprocessor -18bc Info-Tek Corp. -# assigned to Octigabay System, which has been acquired by Cray -18c8 Cray Inc -18c9 ARVOO Engineering BV -18ca XGI - Xabre Graphics Inc - 0040 Volari V8 -18e6 MPL AG - 0001 OSCI [Octal Serial Communication Interface] -18f7 Commtech, Inc. - 0001 Fastcom ESCC-PCI-335 - 0002 Fastcom 422/4-PCI-335 - 0004 Fastcom 422/2-PCI-335 - 0005 Fastcom IGESCC-PCI-ISO/1 - 000a Fastcom 232/4-PCI-335 -18fb Resilience Corporation -1924 Level 5 Networks Inc. -1966 Orad Hi-Tec Systems - 1975 DVG64 family -1993 Innominate Security Technologies AG -# http://www.progeny.net -19ae Progeny Systems Corporation -1a08 Sierra semiconductor - 0000 SC15064 -1b13 Jaton Corp -1c1c Symphony - 0001 82C101 -1d44 DPT - a400 PM2x24/PM3224 -1de1 Tekram Technology Co.,Ltd. - 0391 TRM-S1040 - 2020 DC-390 - 690c 690c - dc29 DC290 -1fc0 Tumsan Oy - 0300 E2200 Dual E1/Rawpipe Card -2000 Smart Link Ltd. -2001 Temporal Research Ltd -2003 Smart Link Ltd. -2004 Smart Link Ltd. -21c3 21st Century Computer Corp. -2348 Racore - 2010 8142 100VG/AnyLAN -2646 Kingston Technologies -270b Xantel Corporation -270f Chaintech Computer Co. Ltd -2711 AVID Technology Inc. -2a15 3D Vision(???) -3000 Hansol Electronics Inc. -3142 Post Impression Systems. -3388 Hint Corp - 0013 HiNT HC4 PCI to ISDN bridge, Multimedia audio controller - 0014 HiNT HC4 PCI to ISDN bridge, Network controller - 0020 HB6 Universal PCI-PCI bridge (transparent mode) - 0021 HB6 Universal PCI-PCI bridge (non-transparent mode) - 4c53 1050 CT7 mainboard - 4c53 1080 CT8 mainboard - 4c53 10a0 CA3/CR3 mainboard - 4c53 3010 PPCI mezzanine (32-bit PMC) - 4c53 3011 PPCI mezzanine (64-bit PMC) - 0022 HiNT HB4 PCI-PCI Bridge (PCI6150) - 0026 HB2 PCI-PCI Bridge - 101a E.Band [AudioTrak Inca88] - 101b E.Band [AudioTrak Inca88] - 8011 VXPro II Chipset - 3388 8011 VXPro II Chipset CPU to PCI Bridge - 8012 VXPro II Chipset - 3388 8012 VXPro II Chipset PCI to ISA Bridge - 8013 VXPro II IDE - 3388 8013 VXPro II Chipset EIDE Controller -3411 Quantum Designs (H.K.) Inc -3513 ARCOM Control Systems Ltd -3842 eVga.com. Corp. -38ef 4Links -3d3d 3DLabs - 0001 GLINT 300SX - 0002 GLINT 500TX - 0003 GLINT Delta - 0004 Permedia - 0005 Permedia - 0006 GLINT MX - 0007 3D Extreme - 0008 GLINT Gamma G1 - 0009 Permedia II 2D+3D - 1040 0011 AccelStar II - 13e9 1000 6221L-4U - 3d3d 0100 AccelStar II 3D Accelerator - 3d3d 0111 Permedia 3:16 - 3d3d 0114 Santa Ana - 3d3d 0116 Oxygen GVX1 - 3d3d 0119 Scirocco - 3d3d 0120 Santa Ana PCL - 3d3d 0125 Oxygen VX1 - 3d3d 0127 Permedia3 Create! - 000a GLINT R3 - 3d3d 0121 Oxygen VX1 - 000c GLINT R3 [Oxygen VX1] - 3d3d 0144 Oxygen VX1-4X AGP [Permedia 4] - 000d GLint R4 rev A - 0011 GLint R4 rev B - 0012 GLint R5 rev A - 0013 GLint R5 rev B - 0020 VP10 visual processor -# P10 generic II - 0022 VP10 visual processor - 0024 VP9 visual processor - 0100 Permedia II 2D+3D - 07a1 Wildcat III 6210 - 07a2 Sun XVR-500 Graphics Accelerator - 07a3 Wildcat IV 7210 - 1004 Permedia - 3d04 Permedia - ffff Glint VGA -4005 Avance Logic Inc. - 0300 ALS300 PCI Audio Device - 0308 ALS300+ PCI Audio Device - 0309 PCI Input Controller - 1064 ALG-2064 - 2064 ALG-2064i - 2128 ALG-2364A GUI Accelerator - 2301 ALG-2301 - 2302 ALG-2302 - 2303 AVG-2302 GUI Accelerator - 2364 ALG-2364A - 2464 ALG-2464 - 2501 ALG-2564A/25128A - 4000 ALS4000 Audio Chipset - 4005 4000 ALS4000 Audio Chipset - 4710 ALC200/200P -4033 Addtron Technology Co, Inc. - 1360 RTL8139 Ethernet -4143 Digital Equipment Corp -4144 Alpha Data - 0044 ADM-XRCIIPro -416c Aladdin Knowledge Systems - 0100 AladdinCARD - 0200 CPC -4444 Internext Compression Inc - 0016 iTVC16 (CX23416) MPEG-2 Encoder - 0070 4009 WinTV PVR 250 - 0070 8003 WinTV PVR 150 - 0803 iTVC15 MPEG-2 Encoder - 0070 4000 WinTV PVR-350 - 0070 4001 WinTV PVR-250 -# video capture card - 1461 a3cf M179 -4468 Bridgeport machines -4594 Cogetec Informatique Inc -45fb Baldor Electric Company -4680 Umax Computer Corp -4843 Hercules Computer Technology Inc -4916 RedCreek Communications Inc - 1960 RedCreek PCI adapter -4943 Growth Networks -494f ACCES I/O Products, Inc. - 10e8 LPCI-COM-8SM -4978 Axil Computer Inc -4a14 NetVin - 5000 NV5000SC - 4a14 5000 RT8029-Based Ethernet Adapter -4b10 Buslogic Inc. -4c48 LUNG HWA Electronics -4c53 SBS Technologies - 0000 PLUSTEST device - 4c53 3000 PLUSTEST card (PC104+) - 4c53 3001 PLUSTEST card (PMC) - 0001 PLUSTEST-MM device - 4c53 3002 PLUSTEST-MM card (PMC) -4ca1 Seanix Technology Inc -4d51 MediaQ Inc. - 0200 MQ-200 -4d54 Microtechnica Co Ltd -4ddc ILC Data Device Corp - 0100 DD-42924I5-300 (ARINC 429 Data Bus) - 0801 BU-65570I1 MIL-STD-1553 Test and Simulation - 0802 BU-65570I2 MIL-STD-1553 Test and Simulation - 0811 BU-65572I1 MIL-STD-1553 Test and Simulation - 0812 BU-65572I2 MIL-STD-1553 Test and Simulation - 0881 BU-65570T1 MIL-STD-1553 Test and Simulation - 0882 BU-65570T2 MIL-STD-1553 Test and Simulation - 0891 BU-65572T1 MIL-STD-1553 Test and Simulation - 0892 BU-65572T2 MIL-STD-1553 Test and Simulation - 0901 BU-65565C1 MIL-STD-1553 Data Bus - 0902 BU-65565C2 MIL-STD-1553 Data Bus - 0903 BU-65565C3 MIL-STD-1553 Data Bus - 0904 BU-65565C4 MIL-STD-1553 Data Bus - 0b01 BU-65569I1 MIL-STD-1553 Data Bus - 0b02 BU-65569I2 MIL-STD-1553 Data Bus - 0b03 BU-65569I3 MIL-STD-1553 Data Bus - 0b04 BU-65569I4 MIL-STD-1553 Data Bus -5046 GemTek Technology Corporation - 1001 PCI Radio -5053 Voyetra Technologies - 2010 Daytona Audio Adapter -5136 S S Technologies -5143 Qualcomm Inc -5145 Ensoniq (Old) - 3031 Concert AudioPCI -5168 Animation Technologies Inc. -5301 Alliance Semiconductor Corp. - 0001 ProMotion aT3D -5333 S3 Inc. - 0551 Plato/PX (system) - 5631 86c325 [ViRGE] - 8800 86c866 [Vision 866] - 8801 86c964 [Vision 964] - 8810 86c764_0 [Trio 32 vers 0] - 8811 86c764/765 [Trio32/64/64V+] - 8812 86cM65 [Aurora64V+] - 8813 86c764_3 [Trio 32/64 vers 3] - 8814 86c767 [Trio 64UV+] - 8815 86cM65 [Aurora 128] - 883d 86c988 [ViRGE/VX] - 8870 FireGL - 8880 86c868 [Vision 868 VRAM] vers 0 - 8881 86c868 [Vision 868 VRAM] vers 1 - 8882 86c868 [Vision 868 VRAM] vers 2 - 8883 86c868 [Vision 868 VRAM] vers 3 - 88b0 86c928 [Vision 928 VRAM] vers 0 - 88b1 86c928 [Vision 928 VRAM] vers 1 - 88b2 86c928 [Vision 928 VRAM] vers 2 - 88b3 86c928 [Vision 928 VRAM] vers 3 - 88c0 86c864 [Vision 864 DRAM] vers 0 - 88c1 86c864 [Vision 864 DRAM] vers 1 - 88c2 86c864 [Vision 864-P DRAM] vers 2 - 88c3 86c864 [Vision 864-P DRAM] vers 3 - 88d0 86c964 [Vision 964 VRAM] vers 0 - 88d1 86c964 [Vision 964 VRAM] vers 1 - 88d2 86c964 [Vision 964-P VRAM] vers 2 - 88d3 86c964 [Vision 964-P VRAM] vers 3 - 88f0 86c968 [Vision 968 VRAM] rev 0 - 88f1 86c968 [Vision 968 VRAM] rev 1 - 88f2 86c968 [Vision 968 VRAM] rev 2 - 88f3 86c968 [Vision 968 VRAM] rev 3 - 8900 86c755 [Trio 64V2/DX] - 5333 8900 86C775 Trio64V2/DX - 8901 86c775/86c785 [Trio 64V2/DX or /GX] - 5333 8901 86C775 Trio64V2/DX, 86C785 Trio64V2/GX - 8902 Plato/PX - 8903 Trio 3D business multimedia - 8904 Trio 64 3D - 1014 00db Integrated Trio3D - 5333 8904 86C365 Trio3D AGP - 8905 Trio 64V+ family - 8906 Trio 64V+ family - 8907 Trio 64V+ family - 8908 Trio 64V+ family - 8909 Trio 64V+ family - 890a Trio 64V+ family - 890b Trio 64V+ family - 890c Trio 64V+ family - 890d Trio 64V+ family - 890e Trio 64V+ family - 890f Trio 64V+ family - 8a01 ViRGE/DX or /GX - 0e11 b032 ViRGE/GX - 10b4 1617 Nitro 3D - 10b4 1717 Nitro 3D - 5333 8a01 ViRGE/DX - 8a10 ViRGE/GX2 - 1092 8a10 Stealth 3D 4000 - 8a13 86c368 [Trio 3D/2X] - 5333 8a13 Trio3D/2X - 8a20 86c794 [Savage 3D] - 5333 8a20 86C391 Savage3D - 8a21 86c390 [Savage 3D/MV] - 5333 8a21 86C390 Savage3D/MV - 8a22 Savage 4 - 1033 8068 Savage 4 - 1033 8069 Savage 4 - 1033 8110 Savage4 LT - 105d 0018 SR9 8Mb SDRAM - 105d 002a SR9 Pro 16Mb SDRAM - 105d 003a SR9 Pro 32Mb SDRAM - 105d 092f SR9 Pro+ 16Mb SGRAM - 1092 4207 Stealth III S540 - 1092 4800 Stealth III S540 - 1092 4807 SpeedStar A90 - 1092 4808 Stealth III S540 - 1092 4809 Stealth III S540 - 1092 480e Stealth III S540 - 1092 4904 Stealth III S520 - 1092 4905 SpeedStar A200 - 1092 4a09 Stealth III S540 - 1092 4a0b Stealth III S540 Xtreme - 1092 4a0f Stealth III S540 - 1092 4e01 Stealth III S540 - 1102 101d 3d Blaster Savage 4 - 1102 101e 3d Blaster Savage 4 - 5333 8100 86C394-397 Savage4 SDRAM 100 - 5333 8110 86C394-397 Savage4 SDRAM 110 - 5333 8125 86C394-397 Savage4 SDRAM 125 - 5333 8143 86C394-397 Savage4 SDRAM 143 - 5333 8a22 86C394-397 Savage4 - 5333 8a2e 86C394-397 Savage4 32bit - 5333 9125 86C394-397 Savage4 SGRAM 125 - 5333 9143 86C394-397 Savage4 SGRAM 143 - 8a23 Savage 4 - 8a25 ProSavage PM133 - 8a26 ProSavage KM133 - 8c00 ViRGE/M3 - 8c01 ViRGE/MX - 1179 0001 ViRGE/MX - 8c02 ViRGE/MX+ - 8c03 ViRGE/MX+MV - 8c10 86C270-294 Savage/MX-MV - 8c11 82C270-294 Savage/MX - 8c12 86C270-294 Savage/IX-MV - 1014 017f ThinkPad T20 - 1179 0001 86C584 SuperSavage/IXC Toshiba - 8c13 86C270-294 Savage/IX - 1179 0001 Magnia Z310 - 8c22 SuperSavage MX/128 - 8c24 SuperSavage MX/64 - 8c26 SuperSavage MX/64C - 8c2a SuperSavage IX/128 SDR - 8c2b SuperSavage IX/128 DDR - 8c2c SuperSavage IX/64 SDR - 8c2d SuperSavage IX/64 DDR - 8c2e SuperSavage IX/C SDR - 1014 01fc ThinkPad T23 (2647-4MG) - 8c2f SuperSavage IX/C DDR - 8d01 86C380 [ProSavageDDR K4M266] - 8d02 VT8636A [ProSavage KN133] AGP4X VGA Controller (TwisterK) - 8d03 VT8751 [ProSavageDDR P4M266] - 8d04 VT8375 [ProSavage8 KM266/KL266] - 9102 86C410 Savage 2000 - 1092 5932 Viper II Z200 - 1092 5934 Viper II Z200 - 1092 5952 Viper II Z200 - 1092 5954 Viper II Z200 - 1092 5a35 Viper II Z200 - 1092 5a37 Viper II Z200 - 1092 5a55 Viper II Z200 - 1092 5a57 Viper II Z200 - ca00 SonicVibes -544c Teralogic Inc - 0350 TL880-based HDTV/ATSC tuner -5455 Technische University Berlin - 4458 S5933 -5519 Cnet Technologies, Inc. -5544 Dunord Technologies - 0001 I-30xx Scanner Interface -5555 Genroco, Inc - 0003 TURBOstor HFP-832 [HiPPI NIC] -5654 VoiceTronix Pty Ltd - 3132 OpenSwitch12 -5700 Netpower -5851 Exacq Technologies -6356 UltraStor -6374 c't Magazin für Computertechnik - 6773 GPPCI -6409 Logitec Corp. -6666 Decision Computer International Co. - 0001 PCCOM4 - 0002 PCCOM8 -7604 O.N. Electronic Co Ltd. -7bde MIDAC Corporation -7fed PowerTV -8008 Quancom Electronic GmbH - 0010 WDOG1 [PCI-Watchdog 1] - 0011 PWDOG2 [PCI-Watchdog 2] -# Wrong ID used in subsystem ID of AsusTek PCI-USB2 PCI card. -807d Asustek Computer, Inc. -8086 Intel Corporation - 0007 82379AB - 0008 Extended Express System Support Controller - 0008 1000 WorldMark 4300 INCA ASIC - 0039 21145 Fast Ethernet - 0122 82437FX - 0309 80303 I/O Processor PCI-to-PCI Bridge - 030d 80312 I/O Companion Chip PCI-to-PCI Bridge - 0326 6700/6702PXH I/OxAPIC Interrupt Controller A - 0327 6700PXH I/OxAPIC Interrupt Controller B - 0329 6700PXH PCI Express-to-PCI Bridge A - 032a 6700PXH PCI Express-to-PCI Bridge B - 032c 6702PXH PCI Express-to-PCI Bridge A -# A-segment bridge - 0330 80332 [Dobson] I/O processor -# A-segment IOAPIC - 0331 80332 [Dobson] I/O processor -# B-segment bridge - 0332 80332 [Dobson] I/O processor -# B-segment IOAPIC - 0333 80332 [Dobson] I/O processor -# Address Translation Unit (ATU) - 0334 80332 [Dobson] I/O processor -# PCI-X bridge - 0335 80331 [Lindsay] I/O processor -# Address Translation Unit (ATU) - 0336 80331 [Lindsay] I/O processor -# A-segment bridge - 0340 41210 [Lanai] Serial to Parallel PCI Bridge -# B-segment bridge - 0341 41210 [Lanai] Serial to Parallel PCI Bridge - 0482 82375EB/SB PCI to EISA Bridge - 0483 82424TX/ZX [Saturn] CPU to PCI bridge - 0484 82378ZB/IB, 82379AB (SIO, SIO.A) PCI to ISA Bridge - 0486 82425EX/ZX [Aries] PCIset with ISA bridge - 04a3 82434LX/NX [Mercury/Neptune] Processor to PCI bridge - 04d0 82437FX [Triton FX] - 0500 E8870 Processor bus control - 0501 E8870 Memory controller -# and registers common to both SPs - 0502 E8870 Scalability Port 0 -# and global performance monitoring - 0503 E8870 Scalability Port 1 - 0510 E8870IO Hub Interface Port 0 registers (8-bit compatibility port) - 0511 E8870IO Hub Interface Port 1 registers - 0512 E8870IO Hub Interface Port 2 registers - 0513 E8870IO Hub Interface Port 3 registers - 0514 E8870IO Hub Interface Port 4 registers - 0515 E8870IO General SIOH registers - 0516 E8870IO RAS registers - 0530 E8870SP Scalability Port 0 registers - 0531 E8870SP Scalability Port 1 registers - 0532 E8870SP Scalability Port 2 registers - 0533 E8870SP Scalability Port 3 registers - 0534 E8870SP Scalability Port 4 registers - 0535 E8870SP Scalability Port 5 registers -# (bi-interleave 0) and global registers that are neither per-port nor per-interleave - 0536 E8870SP Interleave registers 0 and 1 -# (bi-interleave 1) - 0537 E8870SP Interleave registers 2 and 3 - 0600 RAID Controller - 8086 01c1 ICP Vortex GDT8546RZ - 8086 01f7 SCRU32 -# uninitialized SRCU32 RAID Controller - 061f 80303 I/O Processor - 0960 80960RP [i960 RP Microprocessor/Bridge] - 0962 80960RM [i960RM Bridge] - 0964 80960RP [i960 RP Microprocessor/Bridge] - 1000 82542 Gigabit Ethernet Controller - 0e11 b0df NC1632 Gigabit Ethernet Adapter (1000-SX) - 0e11 b0e0 NC1633 Gigabit Ethernet Adapter (1000-LX) - 0e11 b123 NC1634 Gigabit Ethernet Adapter (1000-SX) - 1014 0119 Netfinity Gigabit Ethernet SX Adapter - 8086 1000 PRO/1000 Gigabit Server Adapter - 1001 82543GC Gigabit Ethernet Controller (Fiber) - 0e11 004a NC6136 Gigabit Server Adapter - 1014 01ea Netfinity Gigabit Ethernet SX Adapter - 8086 1002 PRO/1000 F Server Adapter - 8086 1003 PRO/1000 F Server Adapter - 1002 Pro 100 LAN+Modem 56 Cardbus II - 8086 200e Pro 100 LAN+Modem 56 Cardbus II - 8086 2013 Pro 100 SR Mobile Combo Adapter - 8086 2017 Pro 100 S Combo Mobile Adapter - 1004 82543GC Gigabit Ethernet Controller (Copper) - 0e11 0049 NC7132 Gigabit Upgrade Module - 0e11 b1a4 NC7131 Gigabit Server Adapter - 1014 10f2 Gigabit Ethernet Server Adapter - 8086 1004 PRO/1000 T Server Adapter - 8086 2004 PRO/1000 T Server Adapter - 1008 82544EI Gigabit Ethernet Controller (Copper) - 1014 0269 iSeries 1000/100/10 Ethernet Adapter - 1028 011c PRO/1000 XT Network Connection - 8086 1107 PRO/1000 XT Server Adapter - 8086 2107 PRO/1000 XT Server Adapter - 8086 2110 PRO/1000 XT Server Adapter - 8086 3108 PRO/1000 XT Network Connection - 1009 82544EI Gigabit Ethernet Controller (Fiber) - 1014 0268 iSeries Gigabit Ethernet Adapter - 8086 1109 PRO/1000 XF Server Adapter - 8086 2109 PRO/1000 XF Server Adapter - 100c 82544GC Gigabit Ethernet Controller (Copper) - 8086 1112 PRO/1000 T Desktop Adapter - 8086 2112 PRO/1000 T Desktop Adapter - 100d 82544GC Gigabit Ethernet Controller (LOM) - 1028 0123 PRO/1000 XT Network Connection - 1079 891f 82544GC Based Network Connection - 4c53 1080 CT8 mainboard - 8086 110d 82544GC Based Network Connection - 100e 82540EM Gigabit Ethernet Controller - 1014 0265 PRO/1000 MT Network Connection - 1014 0267 PRO/1000 MT Network Connection - 1014 026a PRO/1000 MT Network Connection - 1028 002e Optiplex GX260 - 1028 0151 PRO/1000 MT Network Connection - 107b 8920 PRO/1000 MT Desktop Adapter - 8086 001e PRO/1000 MT Desktop Adapter - 8086 002e PRO/1000 MT Desktop Adapter - 100f 82545EM Gigabit Ethernet Controller (Copper) - 1014 0269 iSeries 1000/100/10 Ethernet Adapter - 1014 028e PRO/1000 MT Network Connection - 8086 1000 PRO/1000 MT Network Connection - 8086 1001 PRO/1000 MT Server Adapter - 1010 82546EB Gigabit Ethernet Controller (Copper) - 1014 027c PRO/1000 MT Dual Port Network Adapter - 18fb 7872 RESlink-X - 4c53 1080 CT8 mainboard - 4c53 10a0 CA3/CR3 mainboard - 8086 1011 PRO/1000 MT Dual Port Server Adapter - 8086 101a PRO/1000 MT Dual Port Network Adapter - 8086 3424 SE7501HG2 Mainboard - 1011 82545EM Gigabit Ethernet Controller (Fiber) - 1014 0268 iSeries Gigabit Ethernet Adapter - 8086 1002 PRO/1000 MF Server Adapter - 8086 1003 PRO/1000 MF Server Adapter (LX) - 1012 82546EB Gigabit Ethernet Controller (Fiber) - 8086 1012 PRO/1000 MF Dual Port Server Adapter - 1013 82541EI Gigabit Ethernet Controller (Copper) - 8086 0013 PRO/1000 MT Network Connection - 8086 1013 IBM ThinkCentre Network Card - 8086 1113 PRO/1000 MT Desktop Adapter - 1014 82541ER Gigabit Ethernet Controller - 1015 82540EM Gigabit Ethernet Controller (LOM) - 1016 82540EP Gigabit Ethernet Controller (LOM) - 1014 052c PRO/1000 MT Mobile Connection - 1179 0001 PRO/1000 MT Mobile Connection - 8086 1016 PRO/1000 MT Mobile Connection - 1017 82540EP Gigabit Ethernet Controller (LOM) - 8086 1017 PR0/1000 MT Desktop Connection -# Update controller name from 82541EP to 82541EI - 1018 82541EI Gigabit Ethernet Controller - 8086 1018 PRO/1000 MT Desktop Adapter - 1019 82547EI Gigabit Ethernet Controller (LOM) - 1458 1019 GA-8IPE1000 Pro2 motherboard (865PE) - 1458 e000 Intel Gigabit Ethernet (Kenai II) - 8086 1019 PRO/1000 CT Desktop Connection - 8086 301f D865PERL mainboard - 8086 3427 S875WP1-E mainboard - 101d 82546EB Gigabit Ethernet Controller - 8086 1000 PRO/1000 MT Quad Port Server Adapter - 101e 82540EP Gigabit Ethernet Controller (Mobile) - 1014 0549 PRO/1000 MT Mobile Connection - 1179 0001 PRO/1000 MT Mobile Connection - 8086 101e PRO/1000 MT Mobile Connection - 1026 82545GM Gigabit Ethernet Controller - 8086 1000 PRO/1000 MT Server Connection - 8086 1001 PRO/1000 MT Server Adapter - 8086 1002 PRO/1000 MT Server Adapter - 8086 1026 PRO/1000 MT Server Connection - 1027 82545GM Gigabit Ethernet Controller - 8086 1001 PRO/1000 MF Server Adapter(LX) - 8086 1002 PRO/1000 MF Server Adapter(LX) - 8086 1003 PRO/1000 MF Server Adapter(LX) - 8086 1027 PRO/1000 MF Server Adapter - 1028 82545GM Gigabit Ethernet Controller - 8086 1028 PRO/1000 MB Server Adapter - 1029 82559 Ethernet Controller - 1030 82559 InBusiness 10/100 - 1031 82801CAM (ICH3) PRO/100 VE (LOM) Ethernet Controller - 1014 0209 ThinkPad A/T/X Series - 104d 80e7 Vaio PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 107b 5350 EtherExpress PRO/100 VE - 1179 0001 EtherExpress PRO/100 VE - 144d c000 EtherExpress PRO/100 VE - 144d c001 EtherExpress PRO/100 VE - 144d c003 EtherExpress PRO/100 VE - 144d c006 vpr Matrix 170B4 - 1032 82801CAM (ICH3) PRO/100 VE Ethernet Controller - 1033 82801CAM (ICH3) PRO/100 VM (LOM) Ethernet Controller - 1034 82801CAM (ICH3) PRO/100 VM Ethernet Controller - 1035 82801CAM (ICH3)/82562EH (LOM) Ethernet Controller - 1036 82801CAM (ICH3) 82562EH Ethernet Controller - 1037 82801CAM (ICH3) Chipset Ethernet Controller - 1038 82801CAM (ICH3) PRO/100 VM (KM) Ethernet Controller - 1039 82801DB PRO/100 VE (LOM) Ethernet Controller - 1014 0267 NetVista A30p - 103a 82801DB PRO/100 VE (CNR) Ethernet Controller - 103b 82801DB PRO/100 VM (LOM) Ethernet Controller - 103c 82801DB PRO/100 VM (CNR) Ethernet Controller - 103d 82801DB PRO/100 VE (MOB) Ethernet Controller - 103e 82801DB PRO/100 VM (MOB) Ethernet Controller - 1040 536EP Data Fax Modem - 16be 1040 V.9X DSP Data Fax Modem - 1043 PRO/Wireless LAN 2100 3B Mini PCI Adapter - 8086 2527 MIM2000/Centrino - 1048 PRO/10GbE LR Server Adapter - 8086 a01f PRO/10GbE LR Server Adapter - 8086 a11f PRO/10GbE LR Server Adapter - 1050 82562EZ 10/100 Ethernet Controller - 1462 728c 865PE Neo2 (MS-6728) - 1462 758c MS-6758 (875P Neo) - 8086 3020 D865PERL mainboard - 8086 3427 S875WP1-E mainboard - 1051 82801EB/ER (ICH5/ICH5R) integrated LAN Controller - 1059 82551QM Ethernet Controller -# ICH-6 Component - 1064 82562ET/EZ/GT/GZ - PRO/100 VE (LOM) Ethernet Controller -# ICH-6 Component - 1065 82562ET/EZ/GT/GZ - PRO/100 VE Ethernet Controller -# ICH-6 Component - 1066 82562 EM/EX/GX - PRO/100 VM (LOM) Ethernet Controller -# ICH-6 Component - 1067 82562 EM/EX/GX - PRO/100 VM Ethernet Controller -# ICH-6 Component - 1068 82562ET/EZ/GT/GZ - PRO/100 VE (LOM) Ethernet Controller Mobile -# ICH-6 Component - 1069 82562 EM/EX/GX - PRO/100 VM (LOM) Ethernet Controller Mobile -# ICH-6 Component - 106a 82562G \t- PRO/100 VE (LOM) Ethernet Controller -# ICH-6 Component - 106b 82562G \t- PRO/100 VE Ethernet Controller Mobile - 1075 82547GI Gigabit Ethernet Controller - 1028 0165 PowerEdge 750 - 8086 0075 PRO/1000 CT Network Connection - 8086 1075 PRO/1000 CT Network Connection - 1076 82541GI/PI Gigabit Ethernet Controller - 1028 0165 PowerEdge 750 - 8086 0076 PRO/1000 MT Network Connection - 8086 1076 PRO/1000 MT Network Connection - 8086 1176 PRO/1000 MT Desktop Adapter - 8086 1276 PRO/1000 MT Desktop Adapter - 1077 82541GI Gigabit Ethernet Controller - 1179 0001 PRO/1000 MT Mobile Connection - 8086 0077 PRO/1000 MT Mobile Connection - 8086 1077 PRO/1000 MT Mobile Connection - 1078 82541EI Gigabit Ethernet Controller - 8086 1078 PRO/1000 MT Network Connection - 1079 82546GB Gigabit Ethernet Controller - 103c 12a6 HP Dual Port 1000Base-T [A9900A] - 103c 12cf HP Core Dual Port 1000Base-T [AB352A] - 4c53 1090 Cx9 / Vx9 mainboard - 4c53 10b0 CL9 mainboard - 8086 0079 PRO/1000 MT Dual Port Network Connection - 8086 1079 PRO/1000 MT Dual Port Network Connection - 8086 1179 PRO/1000 MT Dual Port Network Connection - 8086 117a PRO/1000 MT Dual Port Server Adapter - 107a 82546GB Gigabit Ethernet Controller - 103c 12a8 HP Dual Port 1000base-SX [A9899A] - 8086 107a PRO/1000 MF Dual Port Server Adapter - 8086 127a PRO/1000 MF Dual Port Server Adapter - 107b 82546GB Gigabit Ethernet Controller - 8086 007b PRO/1000 MB Dual Port Server Connection - 8086 107b PRO/1000 MB Dual Port Server Connection - 1107 PRO/1000 MF Server Adapter (LX) - 1130 82815 815 Chipset Host Bridge and Memory Controller Hub - 1025 1016 Travelmate 612 TX - 1043 8027 TUSL2-C Mainboard - 104d 80df Vaio PCG-FX403 - 8086 4532 D815EEA2 mainboard - 8086 4557 D815EGEW Mainboard - 1131 82815 815 Chipset AGP Bridge - 1132 82815 CGC [Chipset Graphics Controller] - 1025 1016 Travelmate 612 TX - 104d 80df Vaio PCG-FX403 - 8086 4532 D815EEA2 Mainboard - 8086 4557 D815EGEW Mainboard - 1161 82806AA PCI64 Hub Advanced Programmable Interrupt Controller - 8086 1161 82806AA PCI64 Hub APIC - 1162 Xscale 80200 Big Endian Companion Chip - 1200 Intel IXP1200 Network Processor - 172a 0000 AEP SSL Accelerator - 1209 8255xER/82551IT Fast Ethernet Controller - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - 4c53 1070 PC6 mainboard - 1221 82092AA PCI to PCMCIA Bridge - 1222 82092AA IDE Controller - 1223 SAA7116 - 1225 82452KX/GX [Orion] - 1226 82596 PRO/10 PCI - 1227 82865 EtherExpress PRO/100A - 1228 82556 EtherExpress PRO/100 Smart -# the revision field differentiates between them (1-3 is 82557, 4-5 is 82558, 6-8 is 82559, 9 is 82559ER) - 1229 82557/8/9 [Ethernet Pro 100] - 0e11 3001 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3002 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3003 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3004 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3005 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3006 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 3007 82559 Fast Ethernet LOM with Alert on LAN* - 0e11 b01e NC3120 Fast Ethernet NIC - 0e11 b01f NC3122 Fast Ethernet NIC (dual port) - 0e11 b02f NC1120 Ethernet NIC - 0e11 b04a Netelligent 10/100TX NIC with Wake on LAN - 0e11 b0c6 NC3161 Fast Ethernet NIC (embedded, WOL) - 0e11 b0c7 NC3160 Fast Ethernet NIC (embedded) - 0e11 b0d7 NC3121 Fast Ethernet NIC (WOL) - 0e11 b0dd NC3131 Fast Ethernet NIC (dual port) - 0e11 b0de NC3132 Fast Ethernet Module (dual port) - 0e11 b0e1 NC3133 Fast Ethernet Module (100-FX) - 0e11 b134 NC3163 Fast Ethernet NIC (embedded, WOL) - 0e11 b13c NC3162 Fast Ethernet NIC (embedded) - 0e11 b144 NC3123 Fast Ethernet NIC (WOL) - 0e11 b163 NC3134 Fast Ethernet NIC (dual port) - 0e11 b164 NC3135 Fast Ethernet Upgrade Module (dual port) - 0e11 b1a4 NC7131 Gigabit Server Adapter - 1014 005c 82558B Ethernet Pro 10/100 - 1014 01bc 82559 Fast Ethernet LAN On Motherboard - 1014 01f1 10/100 Ethernet Server Adapter - 1014 01f2 10/100 Ethernet Server Adapter - 1014 0207 Ethernet Pro/100 S - 1014 0232 10/100 Dual Port Server Adapter - 1014 023a ThinkPad R30 - 1014 105c Netfinity 10/100 - 1014 2205 ThinkPad A22p - 1014 305c 10/100 EtherJet Management Adapter - 1014 405c 10/100 EtherJet Adapter with Alert on LAN - 1014 505c 10/100 EtherJet Secure Management Adapter - 1014 605c 10/100 EtherJet Secure Management Adapter - 1014 705c 10/100 Netfinity 10/100 Ethernet Security Adapter - 1014 805c 10/100 Netfinity 10/100 Ethernet Security Adapter - 1028 009b PowerEdge 2500/2550 - 1028 00ce PowerEdge 1400 - 1033 8000 PC-9821X-B06 - 1033 8016 PK-UG-X006 - 1033 801f PK-UG-X006 - 1033 8026 PK-UG-X006 - 1033 8063 82559-based Fast Ethernet Adapter - 1033 8064 82559-based Fast Ethernet Adapter - 103c 10c0 NetServer 10/100TX - 103c 10c3 NetServer 10/100TX - 103c 10ca NetServer 10/100TX - 103c 10cb NetServer 10/100TX - 103c 10e3 NetServer 10/100TX - 103c 10e4 NetServer 10/100TX - 103c 1200 NetServer 10/100TX - 10c3 1100 SmartEther100 SC1100 - 10cf 1115 8255x-based Ethernet Adapter (10/100) - 10cf 1143 8255x-based Ethernet Adapter (10/100) - 1179 0001 8255x-based Ethernet Adapter (10/100) - 1179 0002 PCI FastEther LAN on Docker - 1179 0003 8255x-based Fast Ethernet - 1259 2560 AT-2560 100 - 1259 2561 AT-2560 100 FX Ethernet Adapter - 1266 0001 NE10/100 Adapter - 13e9 1000 6221L-4U - 144d 2501 SEM-2000 MiniPCI LAN Adapter - 144d 2502 SEM-2100IL MiniPCI LAN Adapter - 1668 1100 EtherExpress PRO/100B (TX) (MiniPCI Ethernet+Modem) - 4c53 1080 CT8 mainboard - 8086 0001 EtherExpress PRO/100B (TX) - 8086 0002 EtherExpress PRO/100B (T4) - 8086 0003 EtherExpress PRO/10+ - 8086 0004 EtherExpress PRO/100 WfM - 8086 0005 82557 10/100 - 8086 0006 82557 10/100 with Wake on LAN - 8086 0007 82558 10/100 Adapter - 8086 0008 82558 10/100 with Wake on LAN - 8086 0009 EtherExpress PRO/100+ - 8086 000a EtherExpress PRO/100+ Management Adapter - 8086 000b EtherExpress PRO/100+ - 8086 000c EtherExpress PRO/100+ Management Adapter - 8086 000d EtherExpress PRO/100+ Alert On LAN II* Adapter - 8086 000e EtherExpress PRO/100+ Management Adapter with Alert On LAN* - 8086 000f EtherExpress PRO/100 Desktop Adapter - 8086 0010 EtherExpress PRO/100 S Management Adapter - 8086 0011 EtherExpress PRO/100 S Management Adapter - 8086 0012 EtherExpress PRO/100 S Advanced Management Adapter (D) - 8086 0013 EtherExpress PRO/100 S Advanced Management Adapter (E) - 8086 0030 EtherExpress PRO/100 Management Adapter with Alert On LAN* GC - 8086 0031 EtherExpress PRO/100 Desktop Adapter - 8086 0040 EtherExpress PRO/100 S Desktop Adapter - 8086 0041 EtherExpress PRO/100 S Desktop Adapter - 8086 0042 EtherExpress PRO/100 Desktop Adapter - 8086 0050 EtherExpress PRO/100 S Desktop Adapter - 8086 1009 EtherExpress PRO/100+ Server Adapter - 8086 100c EtherExpress PRO/100+ Server Adapter (PILA8470B) - 8086 1012 EtherExpress PRO/100 S Server Adapter (D) - 8086 1013 EtherExpress PRO/100 S Server Adapter (E) - 8086 1015 EtherExpress PRO/100 S Dual Port Server Adapter - 8086 1017 EtherExpress PRO/100+ Dual Port Server Adapter - 8086 1030 EtherExpress PRO/100+ Management Adapter with Alert On LAN* G Server - 8086 1040 EtherExpress PRO/100 S Server Adapter - 8086 1041 EtherExpress PRO/100 S Server Adapter - 8086 1042 EtherExpress PRO/100 Server Adapter - 8086 1050 EtherExpress PRO/100 S Server Adapter - 8086 1051 EtherExpress PRO/100 Server Adapter - 8086 1052 EtherExpress PRO/100 Server Adapter - 8086 10f0 EtherExpress PRO/100+ Dual Port Adapter - 8086 2009 EtherExpress PRO/100 S Mobile Adapter - 8086 200d EtherExpress PRO/100 Cardbus - 8086 200e EtherExpress PRO/100 LAN+V90 Cardbus Modem - 8086 200f EtherExpress PRO/100 SR Mobile Adapter - 8086 2010 EtherExpress PRO/100 S Mobile Combo Adapter - 8086 2013 EtherExpress PRO/100 SR Mobile Combo Adapter - 8086 2016 EtherExpress PRO/100 S Mobile Adapter - 8086 2017 EtherExpress PRO/100 S Combo Mobile Adapter - 8086 2018 EtherExpress PRO/100 SR Mobile Adapter - 8086 2019 EtherExpress PRO/100 SR Combo Mobile Adapter - 8086 2101 EtherExpress PRO/100 P Mobile Adapter - 8086 2102 EtherExpress PRO/100 SP Mobile Adapter - 8086 2103 EtherExpress PRO/100 SP Mobile Adapter - 8086 2104 EtherExpress PRO/100 SP Mobile Adapter - 8086 2105 EtherExpress PRO/100 SP Mobile Adapter - 8086 2106 EtherExpress PRO/100 P Mobile Adapter - 8086 2107 EtherExpress PRO/100 Network Connection - 8086 2108 EtherExpress PRO/100 Network Connection - 8086 2200 EtherExpress PRO/100 P Mobile Combo Adapter - 8086 2201 EtherExpress PRO/100 P Mobile Combo Adapter - 8086 2202 EtherExpress PRO/100 SP Mobile Combo Adapter - 8086 2203 EtherExpress PRO/100+ MiniPCI - 8086 2204 EtherExpress PRO/100+ MiniPCI - 8086 2205 EtherExpress PRO/100 SP Mobile Combo Adapter - 8086 2206 EtherExpress PRO/100 SP Mobile Combo Adapter - 8086 2207 EtherExpress PRO/100 SP Mobile Combo Adapter - 8086 2208 EtherExpress PRO/100 P Mobile Combo Adapter - 8086 2402 EtherExpress PRO/100+ MiniPCI - 8086 2407 EtherExpress PRO/100+ MiniPCI - 8086 2408 EtherExpress PRO/100+ MiniPCI - 8086 2409 EtherExpress PRO/100+ MiniPCI - 8086 240f EtherExpress PRO/100+ MiniPCI - 8086 2410 EtherExpress PRO/100+ MiniPCI - 8086 2411 EtherExpress PRO/100+ MiniPCI - 8086 2412 EtherExpress PRO/100+ MiniPCI - 8086 2413 EtherExpress PRO/100+ MiniPCI - 8086 3000 82559 Fast Ethernet LAN on Motherboard - 8086 3001 82559 Fast Ethernet LOM with Basic Alert on LAN* - 8086 3002 82559 Fast Ethernet LOM with Alert on LAN II* - 8086 3006 EtherExpress PRO/100 S Network Connection - 8086 3007 EtherExpress PRO/100 S Network Connection - 8086 3008 EtherExpress PRO/100 Network Connection - 8086 3010 EtherExpress PRO/100 S Network Connection - 8086 3011 EtherExpress PRO/100 S Network Connection - 8086 3012 EtherExpress PRO/100 Network Connection - 8086 3411 SDS2 Mainboard - 122d 430FX - 82437FX TSC [Triton I] - 122e 82371FB PIIX ISA [Triton I] - 1230 82371FB PIIX IDE [Triton I] - 1231 DSVD Modem - 1234 430MX - 82371MX Mobile PCI I/O IDE Xcelerator (MPIIX) - 1235 430MX - 82437MX Mob. System Ctrlr (MTSC) & 82438MX Data Path (MTDP) - 1237 440FX - 82441FX PMC [Natoma] - 1239 82371FB PIIX IDE Interface - 123b 82380PB PCI to PCI Docking Bridge - 123c 82380AB (MISA) Mobile PCI-to-ISA Bridge - 123d 683053 Programmable Interrupt Device -# in" hidden" mode - 123e 82466GX (IHPC) Integrated Hot-Plug Controller - 123f 82466GX Integrated Hot-Plug Controller (IHPC) - 1240 82752 (752) AGP Graphics Accelerator - 124b 82380FB (MPCI2) Mobile Docking Controller - 1250 430HX - 82439HX TXC [Triton II] - 1360 82806AA PCI64 Hub PCI Bridge - 1361 82806AA PCI64 Hub Controller (HRes) - 8086 1361 82806AA PCI64 Hub Controller (HRes) - 8086 8000 82806AA PCI64 Hub Controller (HRes) - 1460 82870P2 P64H2 Hub PCI Bridge - 1461 82870P2 P64H2 I/OxAPIC - 15d9 3480 P4DP6 - 4c53 1090 Cx9 / Vx9 mainboard - 1462 82870P2 P64H2 Hot Plug Controller - 1960 80960RP [i960RP Microprocessor] - 101e 0431 MegaRAID 431 RAID Controller - 101e 0438 MegaRAID 438 Ultra2 LVD RAID Controller - 101e 0466 MegaRAID 466 Express Plus RAID Controller - 101e 0467 MegaRAID 467 Enterprise 1500 RAID Controller - 101e 0490 MegaRAID 490 Express 300 RAID Controller - 101e 0762 MegaRAID 762 Express RAID Controller - 101e 09a0 PowerEdge Expandable RAID Controller 2/SC - 1028 0467 PowerEdge Expandable RAID Controller 2/DC - 1028 1111 PowerEdge Expandable RAID Controller 2/SC - 103c 03a2 MegaRAID - 103c 10c6 MegaRAID 438, HP NetRAID-3Si - 103c 10c7 MegaRAID T5, Integrated HP NetRAID - 103c 10cc MegaRAID, Integrated HP NetRAID - 103c 10cd HP NetRAID-1Si - 105a 0000 SuperTrak - 105a 2168 SuperTrak Pro - 105a 5168 SuperTrak66/100 - 1111 1111 MegaRAID 466, PowerEdge Expandable RAID Controller 2/SC - 1111 1112 PowerEdge Expandable RAID Controller 2/SC - 113c 03a2 MegaRAID - e4bf 1010 CG1-RADIO - e4bf 1020 CU2-QUARTET - e4bf 1040 CU1-CHORUS - e4bf 3100 CX1-BAND - 1962 80960RM [i960RM Microprocessor] - 105a 0000 SuperTrak SX6000 I2O CPU - 1a21 82840 840 (Carmel) Chipset Host Bridge (Hub A) - 1a23 82840 840 (Carmel) Chipset AGP Bridge - 1a24 82840 840 (Carmel) Chipset PCI Bridge (Hub B) - 1a30 82845 845 (Brookdale) Chipset Host Bridge - 1028 010e Optiplex GX240 - 1a31 82845 845 (Brookdale) Chipset AGP Bridge - 2410 82801AA ISA Bridge (LPC) - 2411 82801AA IDE - 2412 82801AA USB - 2413 82801AA SMBus - 2415 82801AA AC'97 Audio - 1028 0095 Precision Workstation 220 Integrated Digital Audio - 11d4 0040 SoundMAX Integrated Digital Audio - 11d4 0048 SoundMAX Integrated Digital Audio - 11d4 5340 SoundMAX Integrated Digital Audio - 2416 82801AA AC'97 Modem - 2418 82801AA PCI Bridge - 2420 82801AB ISA Bridge (LPC) - 2421 82801AB IDE - 2422 82801AB USB - 2423 82801AB SMBus - 2425 82801AB AC'97 Audio - 11d4 0040 SoundMAX Integrated Digital Audio - 11d4 0048 SoundMAX Integrated Digital Audio - 2426 82801AB AC'97 Modem - 2428 82801AB PCI Bridge - 2440 82801BA ISA Bridge (LPC) - 2442 82801BA/BAM USB (Hub #1) - 1014 01c6 Netvista A40/A40p - 1025 1016 Travelmate 612 TX - 1028 010e Optiplex GX240 - 1043 8027 TUSL2-C Mainboard - 104d 80df Vaio PCG-FX403 - 147b 0507 TH7II-RAID - 8086 4532 D815EEA2 mainboard - 8086 4557 D815EGEW Mainboard - 2443 82801BA/BAM SMBus - 1014 01c6 Netvista A40/A40p - 1025 1016 Travelmate 612 TX - 1028 010e Optiplex GX240 - 1043 8027 TUSL2-C Mainboard - 104d 80df Vaio PCG-FX403 - 147b 0507 TH7II-RAID - 8086 4532 D815EEA2 mainboard - 8086 4557 D815EGEW Mainboard - 2444 82801BA/BAM USB (Hub #2) - 1025 1016 Travelmate 612 TX - 1028 010e Optiplex GX240 - 1043 8027 TUSL2-C Mainboard - 104d 80df Vaio PCG-FX403 - 147b 0507 TH7II-RAID - 8086 4532 D815EEA2 mainboard - 2445 82801BA/BAM AC'97 Audio - 1014 01c6 Netvista A40/A40p - 1025 1016 Travelmate 612 TX - 104d 80df Vaio PCG-FX403 - 1462 3370 STAC9721 AC - 147b 0507 TH7II-RAID - 8086 4557 D815EGEW Mainboard - 2446 82801BA/BAM AC'97 Modem - 1025 1016 Travelmate 612 TX - 104d 80df Vaio PCG-FX403 - 2448 82801 Mobile PCI Bridge - 2449 82801BA/BAM/CA/CAM Ethernet Controller - 0e11 0012 EtherExpress PRO/100 VM - 0e11 0091 EtherExpress PRO/100 VE - 1014 01ce EtherExpress PRO/100 VE - 1014 01dc EtherExpress PRO/100 VE - 1014 01eb EtherExpress PRO/100 VE - 1014 01ec EtherExpress PRO/100 VE - 1014 0202 EtherExpress PRO/100 VE - 1014 0205 EtherExpress PRO/100 VE - 1014 0217 EtherExpress PRO/100 VE - 1014 0234 EtherExpress PRO/100 VE - 1014 023d EtherExpress PRO/100 VE - 1014 0244 EtherExpress PRO/100 VE - 1014 0245 EtherExpress PRO/100 VE - 1014 0265 PRO/100 VE Desktop Connection - 1014 0267 PRO/100 VE Desktop Connection - 1014 026a PRO/100 VE Desktop Connection - 109f 315d EtherExpress PRO/100 VE - 109f 3181 EtherExpress PRO/100 VE - 1179 ff01 PRO/100 VE Network Connection - 1186 7801 EtherExpress PRO/100 VE - 144d 2602 HomePNA 1M CNR - 8086 3010 EtherExpress PRO/100 VE - 8086 3011 EtherExpress PRO/100 VM - 8086 3012 82562EH based Phoneline - 8086 3013 EtherExpress PRO/100 VE - 8086 3014 EtherExpress PRO/100 VM - 8086 3015 82562EH based Phoneline - 8086 3016 EtherExpress PRO/100 P Mobile Combo - 8086 3017 EtherExpress PRO/100 P Mobile - 8086 3018 EtherExpress PRO/100 - 244a 82801BAM IDE U100 - 1025 1016 Travelmate 612TX - 104d 80df Vaio PCG-FX403 - 244b 82801BA IDE U100 - 1014 01c6 Netvista A40/A40p - 1028 010e Optiplex GX240 - 1043 8027 TUSL2-C Mainboard - 147b 0507 TH7II-RAID - 8086 4532 D815EEA2 mainboard - 8086 4557 D815EGEW Mainboard - 244c 82801BAM ISA Bridge (LPC) - 244e 82801 PCI Bridge - 1014 0267 NetVista A30p - 2450 82801E ISA Bridge (LPC) - 2452 82801E USB - 2453 82801E SMBus - 2459 82801E Ethernet Controller 0 - 245b 82801E IDE U100 - 245d 82801E Ethernet Controller 1 - 245e 82801E PCI Bridge - 2480 82801CA LPC Interface Controller - 2482 82801CA/CAM USB (Hub #1) - 1014 0220 ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 15d9 3480 P4DP6 - 8086 1958 vpr Matrix 170B4 - 8086 3424 SE7501HG2 Mainboard - 8086 4541 Latitude C640 - 2483 82801CA/CAM SMBus Controller - 1014 0220 ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 15d9 3480 P4DP6 - 8086 1958 vpr Matrix 170B4 - 2484 82801CA/CAM USB (Hub #2) - 1014 0220 ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 15d9 3480 P4DP6 - 8086 1958 vpr Matrix 170B4 - 2485 82801CA/CAM AC'97 Audio Controller - 1013 5959 Crystal WMD Audio Codec - 1014 0222 ThinkPad T23 (2647-4MG) or A30/A30p (2652/2653) - 1014 0508 ThinkPad T30 - 1014 051c ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 144d c006 vpr Matrix 170B4 - 2486 82801CA/CAM AC'97 Modem Controller - 1014 0223 ThinkPad A/T/X Series - 1014 0503 ThinkPad R31 2656BBG - 1014 051a ThinkPad A/T/X Series - 101f 1025 Acer 620 Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 1179 0001 Toshiba Satellite 1110 Z15 internal Modem - 134d 4c21 Dell Inspiron 2100 internal modem - 144d 2115 vpr Matrix 170B4 internal modem - 14f1 5421 MD56ORD V.92 MDC Modem - 2487 82801CA/CAM USB (Hub #3) - 1014 0220 ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 15d9 3480 P4DP6 - 8086 1958 vpr Matrix 170B4 - 248a 82801CAM IDE U100 - 1014 0220 ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 8086 1958 vpr Matrix 170B4 - 8086 4541 Latitude C640 - 248b 82801CA Ultra ATA Storage Controller - 15d9 3480 P4DP6 - 248c 82801CAM ISA Bridge (LPC) - 24c0 82801DB/DBL (ICH4/ICH4-L) LPC Interface Bridge - 1014 0267 NetVista A30p - 1462 5800 845PE Max (MS-6580) - 24c1 82801DBL (ICH4-L) IDE Controller - 24c2 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #1 - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0126 Optiplex GX260 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1462 5800 845PE Max (MS-6580) - 1509 2990 Averatec 5110H laptop - 4c53 1090 Cx9 / Vx9 mainboard - 24c3 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) SMBus Controller - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0126 Optiplex GX260 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1458 24c2 GA-8PE667 Ultra - 1462 5800 845PE Max (MS-6580) - 4c53 1090 Cx9 / Vx9 mainboard - 24c4 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #2 - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0126 Optiplex GX260 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1462 5800 845PE Max (MS-6580) - 1509 2990 Averatec 5110H - 4c53 1090 Cx9 / Vx9 mainboard - 24c5 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) AC'97 Audio Controller - 0e11 00b8 Analog Devices Inc. codec [SoundMAX] - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1458 a002 GA-8PE667 Ultra - 1462 5800 845PE Max (MS-6580) - 24c6 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) AC'97 Modem Controller - 1025 005a TravelMate 290 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 24c7 82801DB/DBL/DBM (ICH4/ICH4-L/ICH4-M) USB UHCI Controller #3 - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0126 Optiplex GX260 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1462 5800 845PE Max (MS-6580) - 1509 2990 Averatec 5110H - 4c53 1090 Cx9 / Vx9 mainboard - 24ca 82801DBM (ICH4-M) IDE Controller - 1025 005a TravelMate 290 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 24cb 82801DB (ICH4) IDE Controller - 1014 0267 NetVista A30p - 1028 0126 Optiplex GX260 - 1458 24c2 GA-8PE667 Ultra - 1462 5800 845PE Max (MS-6580) - 4c53 1090 Cx9 / Vx9 mainboard - 24cc 82801DBM (ICH4-M) LPC Interface Bridge - 24cd 82801DB/DBM (ICH4/ICH4-M) USB2 EHCI Controller - 1014 0267 NetVista A30p - 1025 005a TravelMate 290 - 1028 0126 Optiplex GX260 - 1028 0163 Latitude D505 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 1071 8160 MIM2000 - 1462 3981 845PE Max (MS-6580) - 1509 1968 Averatec 5110H - 4c53 1090 Cx9 / Vx9 mainboard - 24d0 82801EB/ER (ICH5/ICH5R) LPC Interface Bridge - 24d1 82801EB (ICH5) SATA Controller - 103c 12bc d530 CMT (DG746A) - 1458 24d1 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24d2 82801EB/ER (ICH5/ICH5R) USB UHCI Controller #1 - 1028 0183 PowerEdge 1800 - 103c 12bc d530 CMT (DG746A) - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000/8KNXP motherboard - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24d3 82801EB/ER (ICH5/ICH5R) SMBus Controller - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24d4 82801EB/ER (ICH5/ICH5R) USB UHCI Controller #2 - 1028 0183 PowerEdge 1800 - 103c 12bc d530 CMT (DG746A) - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24d5 82801EB/ER (ICH5/ICH5R) AC'97 Audio Controller - 103c 12bc Analog Devices codec [SoundMAX Integrated Digital Audio] - 1043 80f3 P4P800 Mainboard -# Again, I suppose they use the same in different subsystems - 1458 a002 GA-8IPE1000/8KNXP motherboard - 1462 7280 865PE Neo2 (MS-6728) - 8086 a000 D865PERL mainboard - 8086 e000 D865PERL mainboard - 24d6 82801EB/ER (ICH5/ICH5R) AC'97 Modem Controller - 24d7 82801EB/ER (ICH5/ICH5R) USB UHCI #3 - 1028 0183 PowerEdge 1800 - 103c 12bc d530 CMT (DG746A) - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24db 82801EB/ER (ICH5/ICH5R) IDE Controller - 103c 12bc d530 CMT (DG746A) - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 1462 7580 MSI 875P - 8086 24db P4C800 Mainboard - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24dc 82801EB (ICH5) LPC Interface Bridge - 24dd 82801EB/ER (ICH5/ICH5R) USB2 EHCI Controller - 1028 0183 PowerEdge 1800 - 103c 12bc d530 CMT (DG746A) - 1043 80a6 P4P800 Mainboard - 1458 5006 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24de 82801EB/ER (ICH5/ICH5R) USB UHCI Controller #4 - 1043 80a6 P4P800 Mainboard - 1458 24d2 GA-8IPE1000 Pro2 motherboard (865PE) - 1462 7280 865PE Neo2 (MS-6728) - 8086 3427 S875WP1-E mainboard - 8086 524c D865PERL mainboard - 24df 82801ER (ICH5R) SATA Controller - 2500 82820 820 (Camino) Chipset Host Bridge (MCH) - 1028 0095 Precision Workstation 220 Chipset - 1043 801c P3C-2000 system chipset - 2501 82820 820 (Camino) Chipset Host Bridge (MCH) - 1043 801c P3C-2000 system chipset - 250b 82820 820 (Camino) Chipset Host Bridge - 250f 82820 820 (Camino) Chipset AGP Bridge - 2520 82805AA MTH Memory Translator Hub - 2521 82804AA MRH-S Memory Repeater Hub for SDRAM - 2530 82850 850 (Tehama) Chipset Host Bridge (MCH) - 147b 0507 TH7II-RAID - 2531 82860 860 (Wombat) Chipset Host Bridge (MCH) - 2532 82850 850 (Tehama) Chipset AGP Bridge - 2533 82860 860 (Wombat) Chipset AGP Bridge - 2534 82860 860 (Wombat) Chipset PCI Bridge - 2540 E7500 Memory Controller Hub - 15d9 3480 P4DP6 - 2541 E7500/E7501 Host RASUM Controller - 15d9 3480 P4DP6 - 4c53 1090 Cx9 / Vx9 mainboard - 8086 3424 SE7501HG2 Mainboard - 2543 E7500/E7501 Hub Interface B PCI-to-PCI Bridge - 2544 E7500/E7501 Hub Interface B RASUM Controller - 4c53 1090 Cx9 / Vx9 mainboard - 2545 E7500/E7501 Hub Interface C PCI-to-PCI Bridge - 2546 E7500/E7501 Hub Interface C RASUM Controller - 2547 E7500/E7501 Hub Interface D PCI-to-PCI Bridge - 2548 E7500/E7501 Hub Interface D RASUM Controller - 254c E7501 Memory Controller Hub - 4c53 1090 Cx9 / Vx9 mainboard - 8086 3424 SE7501HG2 Mainboard - 2550 E7505 Memory Controller Hub - 2551 E7505/E7205 Series RAS Controller - 2552 E7505/E7205 PCI-to-AGP Bridge - 2553 E7505 Hub Interface B PCI-to-PCI Bridge - 2554 E7505 Hub Interface B PCI-to-PCI Bridge RAS Controller - 255d E7205 Memory Controller Hub - 2560 82845G/GL[Brookdale-G]/GE/PE DRAM Controller/Host-Hub Interface - 1028 0126 Optiplex GX260 - 1458 2560 GA-8PE667 Ultra - 1462 5800 845PE Max (MS-6580) - 2561 82845G/GL[Brookdale-G]/GE/PE Host-to-AGP Bridge - 2562 82845G/GL[Brookdale-G]/GE Chipset Integrated Graphics Device - 1014 0267 NetVista A30p - 2570 82865G/PE/P DRAM Controller/Host-Hub Interface - 1043 80f2 P4P800 Mainboard - 1458 2570 GA-8IPE1000 Pro2 motherboard (865PE) - 2571 82865G/PE/P PCI to AGP Controller - 2572 82865G Integrated Graphics Controller - 2573 82865G/PE/P PCI to CSA Bridge - 2576 82865G/PE/P Processor to I/O Memory Interface - 2578 82875P/E7210 Memory Controller Hub - 1458 2578 GA-8KNXP motherboard (875P) - 1462 7580 MS-6758 (875P Neo) -# Motherboard P4SCE - 15d9 4580 Super Micro Computer Inc. P4SCE - 2579 82875P Processor to AGP Controller - 257b 82875P/E7210 Processor to PCI to CSA Bridge - 257e 82875P/E7210 Processor to I/O Memory Interface - 2580 915G/P/GV/GL/PL/910GL Processor to I/O Controller - 2581 915G/P/GV/GL/PL/910GL PCI Express Root Port - 2582 82915G/GV/910GL Express Chipset Family Graphics Controller - 1028 1079 Optiplex GX280 - 2584 925X/XE Memory Controller Hub - 2585 925X/XE PCI Express Root Port - 2588 E7220/E7221 Memory Controller Hub - 2589 E7220/E7221 PCI Express Root Port - 258a E7221 Integrated Graphics Controller - 2590 Mobile 915GM/PM/GMS/910GML Express Processor to DRAM Controller - 2591 Mobile 915GM/PM Express PCI Express Root Port - 2592 Mobile 915GM/GMS/910GML Express Graphics Controller - 25a1 6300ESB LPC Interface Controller - 25a2 6300ESB PATA Storage Controller - 4c53 10b0 CL9 mainboard - 25a3 6300ESB SATA Storage Controller - 4c53 10b0 CL9 mainboard - 25a4 6300ESB SMBus Controller - 4c53 10b0 CL9 mainboard - 25a6 6300ESB AC'97 Audio Controller - 4c53 10b0 CL9 mainboard - 25a7 6300ESB AC'97 Modem Controller - 25a9 6300ESB USB Universal Host Controller - 4c53 10b0 CL9 mainboard - 25aa 6300ESB USB Universal Host Controller - 4c53 10b0 CL9 mainboard - 25ab 6300ESB Watchdog Timer - 4c53 10b0 CL9 mainboard - 25ac 6300ESB I/O Advanced Programmable Interrupt Controller - 4c53 10b0 CL9 mainboard - 25ad 6300ESB USB2 Enhanced Host Controller - 25ae 6300ESB 64-bit PCI-X Bridge - 25b0 6300ESB SATA RAID Controller - 2600 E8500 Hub Interface - 2601 E8500 PCI Express x4 Port D - 2602 E8500 PCI Express x4 Port C0 - 2603 E8500 PCI Express x4 Port C1 - 2604 E8500 PCI Express x4 Port B0 - 2605 E8500 PCI Express x4 Port B1 - 2606 E8500 PCI Express x4 Port A0 - 2607 E8500 PCI Express x4 Port A1 - 2608 E8500 PCI Express x8 Port C - 2609 E8500 PCI Express x8 Port B - 260a E8500 PCI Express x8 Port A - 260c E8500 IMI Registers - 2610 E8500 System Bus, Boot, and Interrupt Registers - 2611 E8500 Address Mapping Registers - 2612 E8500 RAS Registers - 2613 E8500 Reserved Registers - 2614 E8500 Reserved Registers - 2615 E8500 Miscellaneous Registers - 2617 E8500 Reserved Registers - 2618 E8500 Reserved Registers - 2619 E8500 Reserved Registers - 261a E8500 Reserved Registers - 261b E8500 Reserved Registers - 261c E8500 Reserved Registers - 261d E8500 Reserved Registers - 261e E8500 Reserved Registers - 2620 E8500 eXternal Memory Bridge - 2621 E8500 XMB Miscellaneous Registers - 2622 E8500 XMB Memory Interleaving Registers - 2623 E8500 XMB DDR Initialization and Calibration - 2624 E8500 XMB Reserved Registers - 2625 E8500 XMB Reserved Registers - 2626 E8500 XMB Reserved Registers - 2627 E8500 XMB Reserved Registers - 2640 82801FB/FR (ICH6/ICH6R) LPC Interface Bridge - 2641 82801FBM (ICH6M) LPC Interface Bridge - 2642 82801FW/FRW (ICH6W/ICH6RW) LPC Interface Bridge - 2651 82801FB/FW (ICH6/ICH6W) SATA Controller - 1028 0179 Optiplex GX280 - 2652 82801FR/FRW (ICH6R/ICH6RW) SATA Controller - 2653 82801FBM (ICH6M) SATA Controller - 2658 82801FB/FBM/FR/FW/FRW (ICH6 Family) USB UHCI #1 - 1028 0179 Optiplex GX280 - 2659 82801FB/FBM/FR/FW/FRW (ICH6 Family) USB UHCI #2 - 1028 0179 Optiplex GX280 - 265a 82801FB/FBM/FR/FW/FRW (ICH6 Family) USB UHCI #3 - 1028 0179 Optiplex GX280 - 265b 82801FB/FBM/FR/FW/FRW (ICH6 Family) USB UHCI #4 - 1028 0179 Optiplex GX280 - 265c 82801FB/FBM/FR/FW/FRW (ICH6 Family) USB2 EHCI Controller - 1028 0179 Optiplex GX280 - 2660 82801FB/FBM/FR/FW/FRW (ICH6 Family) PCI Express Port 1 - 2662 82801FB/FBM/FR/FW/FRW (ICH6 Family) PCI Express Port 2 - 2664 82801FB/FBM/FR/FW/FRW (ICH6 Family) PCI Express Port 3 - 2666 82801FB/FBM/FR/FW/FRW (ICH6 Family) PCI Express Port 4 - 2668 82801FB/FBM/FR/FW/FRW (ICH6 Family) High Definition Audio Controller - 266a 82801FB/FBM/FR/FW/FRW (ICH6 Family) SMBus Controller - 1028 0179 Optiplex GX280 - 266c 82801FB/FBM/FR/FW/FRW (ICH6 Family) LAN Controller - 266d 82801FB/FBM/FR/FW/FRW (ICH6 Family) AC'97 Modem Controller - 266e 82801FB/FBM/FR/FW/FRW (ICH6 Family) AC'97 Audio Controller - 1028 0179 Optiplex GX280 - 266f 82801FB/FBM/FR/FW/FRW (ICH6 Family) IDE Controller - 2770 Memory Controller Hub - 2771 PCI Express Graphics Port - 2772 Integrated Graphics Controller - 2774 Workstation Memory Controller Hub - 2775 PCI Express Graphics Port - 2776 Integrated Graphics Controller - 2778 Server Memory Controller Hub - 2779 PCI Express Root Port - 2782 82915G Express Chipset Family Graphics Controller - 2792 Mobile 915GM/GMS/910GML Express Graphics Controller - 27b8 I/O Controller Hub LPC - 27b9 Mobile I/O Controller Hub LPC - 27c0 I/O Controller Hub SATA cc=IDE - 27c1 I/O Controller Hub SATA cc=AHCI - 27c3 I/O Controller Hub SATA cc=RAID - 27c4 Mobile I/O Controller Hub SATA cc=IDE - 27c5 Mobile I/O Controller Hub SATA cc=AHCI - 27c8 I/O Controller Hub UHCI USB #1 - 27c9 I/O Controller Hub UHCI USB #2 - 27ca I/O Controller Hub UHCI USB #3 - 27cb I/O Controller Hub UHCI USB #4 - 27cc I/O Controller Hub EHCI USB - 27d0 I/O Controller Hub PCI Express Port 1 - 27d2 I/O Controller Hub PCI Express Port 2 - 27d4 I/O Controller Hub PCI Express Port 3 - 27d6 I/O Controller Hub PCI Express Port 4 - 27d8 I/O Controller Hub High Definition Audio - 27da I/O Controller Hub SMBus - 27dc I/O Controller Hub LAN - 27dd I/O Controller Hub AC'97 Modem - 27de I/O Controller Hub AC'97 Audio - 27df I/O Controller Hub PATA - 27e0 I/O Controller Hub PCI Express Port 5 - 27e2 I/O Controller Hub PCI Express Port 6 - 3092 Integrated RAID - 3200 GD31244 PCI-X SATA HBA - 3340 82855PM Processor to I/O Controller - 1025 005a TravelMate 290 - 103c 088c nc8000 laptop - 103c 0890 nc6000 laptop - 3341 82855PM Processor to AGP Controller - 3575 82830 830 Chipset Host Bridge - 1014 021d ThinkPad A/T/X Series - 104d 80e7 VAIO PCG-GR214EP/GR214MP/GR215MP/GR314MP/GR315MP - 3576 82830 830 Chipset AGP Bridge - 3577 82830 CGC [Chipset Graphics Controller] - 1014 0513 ThinkPad A/T/X Series - 3578 82830 830 Chipset Host Bridge - 3580 82852/82855 GM/GME/PM/GMV Processor to I/O Controller - 1028 0163 Latitude D505 - 4c53 10b0 CL9 mainboard - 3581 82852/82855 GM/GME/PM/GMV Processor to AGP Controller - 3582 82852/855GM Integrated Graphics Device - 1028 0163 Latitude D505 - 4c53 10b0 CL9 mainboard - 3584 82852/82855 GM/GME/PM/GMV Processor to I/O Controller - 1028 0163 Latitude D505 - 4c53 10b0 CL9 mainboard - 3585 82852/82855 GM/GME/PM/GMV Processor to I/O Controller - 1028 0163 Latitude D505 - 4c53 10b0 CL9 mainboard - 3590 E7520 Memory Controller Hub - 3591 E7525/E7520 Error Reporting Registers - 3592 E7320 Memory Controller Hub - 3593 E7320 Error Reporting Registers - 3594 E7520 DMA Controller - 3595 E7525/E7520/E7320 PCI Express Port A - 3596 E7525/E7520/E7320 PCI Express Port A1 - 3597 E7525/E7520 PCI Express Port B - 3598 E7520 PCI Express Port B1 - 3599 E7520 PCI Express Port C - 359a E7520 PCI Express Port C1 - 359b E7525/E7520/E7320 Extended Configuration Registers - 359e E7525 Memory Controller Hub - 4220 PRO/Wireless 2200BG - 4223 PRO/Wireless 2915ABG MiniPCI Adapter - 5200 EtherExpress PRO/100 Intelligent Server - 5201 EtherExpress PRO/100 Intelligent Server - 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter - 530d 80310 IOP [IO Processor] - 7000 82371SB PIIX3 ISA [Natoma/Triton II] - 7010 82371SB PIIX3 IDE [Natoma/Triton II] - 7020 82371SB PIIX3 USB [Natoma/Triton II] - 7030 430VX - 82437VX TVX [Triton VX] - 7050 Intel Intercast Video Capture Card - 7100 430TX - 82439TX MTXC - 7110 82371AB/EB/MB PIIX4 ISA - 15ad 1976 virtualHW v3 - 7111 82371AB/EB/MB PIIX4 IDE - 15ad 1976 virtualHW v3 - 7112 82371AB/EB/MB PIIX4 USB - 15ad 1976 virtualHW v3 - 7113 82371AB/EB/MB PIIX4 ACPI - 15ad 1976 virtualHW v3 - 7120 82810 GMCH [Graphics Memory Controller Hub] - 4c53 1040 CL7 mainboard - 4c53 1060 PC7 mainboard - 7121 82810 CGC [Chipset Graphics Controller] - 4c53 1040 CL7 mainboard - 4c53 1060 PC7 mainboard - 8086 4341 Cayman (CA810) Mainboard - 7122 82810 DC-100 GMCH [Graphics Memory Controller Hub] - 7123 82810 DC-100 CGC [Chipset Graphics Controller] - 7124 82810E DC-133 GMCH [Graphics Memory Controller Hub] - 7125 82810E DC-133 CGC [Chipset Graphics Controller] - 7126 82810 DC-133 System and Graphics Controller - 7128 82810-M DC-100 System and Graphics Controller - 712a 82810-M DC-133 System and Graphics Controller - 7180 440LX/EX - 82443LX/EX Host bridge - 7181 440LX/EX - 82443LX/EX AGP bridge - 7190 440BX/ZX/DX - 82443BX/ZX/DX Host bridge - 0e11 0500 Armada 1750 Laptop System Chipset - 0e11 b110 Armada M700/E500 - 1179 0001 Toshiba Tecra 8100 Laptop System Chipset - 15ad 1976 virtualHW v3 - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - 7191 440BX/ZX/DX - 82443BX/ZX/DX AGP bridge - 7192 440BX/ZX/DX - 82443BX/ZX/DX Host bridge (AGP disabled) - 0e11 0460 Armada 1700 Laptop System Chipset - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 7194 82440MX Host Bridge - 1033 0000 Versa Note Vxi - 4c53 10a0 CA3/CR3 mainboard - 7195 82440MX AC'97 Audio Controller - 1033 80cc Versa Note VXi - 10cf 1099 QSound_SigmaTel Stac97 PCI Audio - 11d4 0040 SoundMAX Integrated Digital Audio - 11d4 0048 SoundMAX Integrated Digital Audio - 7196 82440MX AC'97 Modem Controller - 7198 82440MX ISA Bridge - 7199 82440MX EIDE Controller - 719a 82440MX USB Universal Host Controller - 719b 82440MX Power Management Controller - 71a0 440GX - 82443GX Host bridge - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - 71a1 440GX - 82443GX AGP bridge - 71a2 440GX - 82443GX Host bridge (AGP disabled) - 4c53 1000 CC7/CR7/CP7/VC7/VP7/VR7 mainboard - 7600 82372FB PIIX5 ISA - 7601 82372FB PIIX5 IDE - 7602 82372FB PIIX5 USB - 7603 82372FB PIIX5 SMBus - 7800 82740 (i740) AGP Graphics Accelerator - 003d 0008 Starfighter AGP - 003d 000b Starfighter AGP - 1092 0100 Stealth II G460 - 10b4 201a Lightspeed 740 - 10b4 202f Lightspeed 740 - 8086 0000 Terminator 2x/i - 8086 0100 Intel740 Graphics Accelerator - 84c4 450KX/GX [Orion] - 82454KX/GX PCI bridge - 84c5 450KX/GX [Orion] - 82453KX/GX Memory controller - 84ca 450NX - 82451NX Memory & I/O Controller - 84cb 450NX - 82454NX/84460GX PCI Expander Bridge - 84e0 460GX - 84460GX System Address Controller (SAC) - 84e1 460GX - 84460GX System Data Controller (SDC) - 84e2 460GX - 84460GX AGP Bridge (GXB function 2) - 84e3 460GX - 84460GX Memory Address Controller (MAC) - 84e4 460GX - 84460GX Memory Data Controller (MDC) - 84e6 460GX - 82466GX Wide and fast PCI eXpander Bridge (WXB) - 84ea 460GX - 84460GX AGP Bridge (GXB function 1) - 8500 IXP4XX - Intel Network Processor family. IXP420, IXP421, IXP422, IXP425 and IXC1100 - 1993 0dee mGuard-PCI AV#1 - 1993 0def mGuard-PCI AV#0 - 9000 IXP2000 Family Network Processor - 9001 IXP2400 Network Processor - 9004 IXP2800 Network Processor - 9621 Integrated RAID - 9622 Integrated RAID - 9641 Integrated RAID - 96a1 Integrated RAID -# retail verson - a01f PRO/10GbE LR Server Adapter -# OEM version - a11f PRO/10GbE LR Server Adapter - b152 21152 PCI-to-PCI Bridge -# observed, and documented in Intel revision note; new mask of 1011:0026 - b154 21154 PCI-to-PCI Bridge - b555 21555 Non transparent PCI-to-PCI Bridge - 12d9 000a PCI VoIP Gateway - 4c53 1050 CT7 mainboard - 4c53 1051 CE7 mainboard - e4bf 1000 CC8-1-BLUES - ffff 450NX/GX [Orion] - 82453KX/GX Memory controller [BUG] -8401 TRENDware International Inc. -8800 Trigem Computer Inc. - 2008 Video assistent component -8866 T-Square Design Inc. -8888 Silicon Magic -# 8c4a is not Winbond but there is a board misprogrammed -8c4a Winbond - 1980 W89C940 misprogrammed [ne2k] -8e0e Computone Corporation -8e2e KTI - 3000 ET32P2 -9004 Adaptec - 0078 AHA-2940U_CN - 1078 AIC-7810 - 1160 AIC-1160 [Family Fibre Channel Adapter] - 2178 AIC-7821 - 3860 AHA-2930CU - 3b78 AHA-4844W/4844UW - 5075 AIC-755x - 5078 AHA-7850 - 9004 7850 AHA-2904/Integrated AIC-7850 - 5175 AIC-755x - 5178 AIC-7851 - 5275 AIC-755x - 5278 AIC-7852 - 5375 AIC-755x - 5378 AIC-7850 - 5475 AIC-755x - 5478 AIC-7850 - 5575 AVA-2930 - 5578 AIC-7855 - 5647 ANA-7711 TCP Offload Engine - 9004 7710 ANA-7711F TCP Offload Engine - Optical - 9004 7711 ANA-7711LP TCP Offload Engine - Copper - 5675 AIC-755x - 5678 AIC-7856 - 5775 AIC-755x - 5778 AIC-7850 - 5800 AIC-5800 - 5900 ANA-5910/5930/5940 ATM155 & 25 LAN Adapter - 5905 ANA-5910A/5930A/5940A ATM Adapter - 6038 AIC-3860 - 6075 AIC-1480 / APA-1480 - 9004 7560 AIC-1480 / APA-1480 Cardbus - 6078 AIC-7860 - 6178 AIC-7861 - 9004 7861 AHA-2940AU Single - 6278 AIC-7860 - 6378 AIC-7860 - 6478 AIC-786x - 6578 AIC-786x - 6678 AIC-786x - 6778 AIC-786x - 6915 ANA620xx/ANA69011A - 9004 0008 ANA69011A/TX 10/100 - 9004 0009 ANA69011A/TX 10/100 - 9004 0010 ANA62022 2-port 10/100 - 9004 0018 ANA62044 4-port 10/100 - 9004 0019 ANA62044 4-port 10/100 - 9004 0020 ANA62022 2-port 10/100 - 9004 0028 ANA69011A/TX 10/100 - 9004 8008 ANA69011A/TX 64 bit 10/100 - 9004 8009 ANA69011A/TX 64 bit 10/100 - 9004 8010 ANA62022 2-port 64 bit 10/100 - 9004 8018 ANA62044 4-port 64 bit 10/100 - 9004 8019 ANA62044 4-port 64 bit 10/100 - 9004 8020 ANA62022 2-port 64 bit 10/100 - 9004 8028 ANA69011A/TX 64 bit 10/100 - 7078 AHA-294x / AIC-7870 - 7178 AHA-2940/2940W / AIC-7871 - 7278 AHA-3940/3940W / AIC-7872 - 7378 AHA-3985 / AIC-7873 - 7478 AHA-2944/2944W / AIC-7874 - 7578 AHA-3944/3944W / AIC-7875 - 7678 AHA-4944W/UW / AIC-7876 - 7710 ANA-7711F Network Accelerator Card (NAC) - Optical - 7711 ANA-7711C Network Accelerator Card (NAC) - Copper - 7778 AIC-787x - 7810 AIC-7810 - 7815 AIC-7815 RAID+Memory Controller IC - 9004 7815 ARO-1130U2 RAID Controller - 9004 7840 AIC-7815 RAID+Memory Controller IC - 7850 AIC-7850 - 7855 AHA-2930 - 7860 AIC-7860 - 7870 AIC-7870 - 7871 AHA-2940 - 7872 AHA-3940 - 7873 AHA-3980 - 7874 AHA-2944 - 7880 AIC-7880P - 7890 AIC-7890 - 7891 AIC-789x - 7892 AIC-789x - 7893 AIC-789x - 7894 AIC-789x - 7895 AHA-2940U/UW / AHA-39xx / AIC-7895 - 9004 7890 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B - 9004 7891 AHA-2940U/2940UW Dual - 9004 7892 AHA-3940AU/AUW/AUWD/UWD - 9004 7894 AHA-3944AUWD - 9004 7895 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B - 9004 7896 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B - 9004 7897 AHA-2940U/2940UW Dual AHA-394xAU/AUW/AUWD AIC-7895B - 7896 AIC-789x - 7897 AIC-789x - 8078 AIC-7880U - 9004 7880 AIC-7880P Ultra/Ultra Wide SCSI Chipset - 8178 AHA-2940U/UW/D / AIC-7881U - 9004 7881 AHA-2940UW SCSI Host Adapter - 8278 AHA-3940U/UW/UWD / AIC-7882U - 8378 AHA-3940U/UW / AIC-7883U - 8478 AHA-2944UW / AIC-7884U - 8578 AHA-3944U/UWD / AIC-7885 - 8678 AHA-4944UW / AIC-7886 - 8778 AHA-2940UW Pro / AIC-788x - 9004 7887 2940UW Pro Ultra-Wide SCSI Controller - 8878 AHA-2930UW / AIC-7888 - 9004 7888 AHA-2930UW SCSI Controller - 8b78 ABA-1030 - ec78 AHA-4944W/UW -9005 Adaptec - 0010 AHA-2940U2/U2W - 9005 2180 AHA-2940U2 SCSI Controller - 9005 8100 AHA-2940U2B SCSI Controller - 9005 a100 AHA-2940U2B SCSI Controller - 9005 a180 AHA-2940U2W SCSI Controller - 9005 e100 AHA-2950U2B SCSI Controller - 0011 AHA-2930U2 - 0013 78902 - 9005 0003 AAA-131U2 Array1000 1 Channel RAID Controller - 9005 000f AIC7890_ARO - 001f AHA-2940U2/U2W / 7890/7891 - 9005 000f 2940U2W SCSI Controller - 9005 a180 2940U2W SCSI Controller - 0020 AIC-7890 - 002f AIC-7890 - 0030 AIC-7890 - 003f AIC-7890 - 0050 AHA-3940U2x/395U2x - 9005 f500 AHA-3950U2B - 9005 ffff AHA-3950U2B - 0051 AHA-3950U2D - 9005 b500 AHA-3950U2D - 0053 AIC-7896 SCSI Controller - 9005 ffff AIC-7896 SCSI Controller mainboard implementation - 005f AIC-7896U2/7897U2 - 0080 AIC-7892A U160/m - 0e11 e2a0 Compaq 64-Bit/66MHz Wide Ultra3 SCSI Adapter - 9005 6220 AHA-29160C - 9005 62a0 29160N Ultra160 SCSI Controller - 9005 e220 29160LP Low Profile Ultra160 SCSI Controller - 9005 e2a0 29160 Ultra160 SCSI Controller - 0081 AIC-7892B U160/m - 9005 62a1 19160 Ultra160 SCSI Controller - 0083 AIC-7892D U160/m - 008f AIC-7892P U160/m - 1179 0001 Magnia Z310 - 15d9 9005 Onboard SCSI Host Adapter - 00c0 AHA-3960D / AIC-7899A U160/m - 0e11 f620 Compaq 64-Bit/66MHz Dual Channel Wide Ultra3 SCSI Adapter - 9005 f620 AHA-3960D U160/m - 00c1 AIC-7899B U160/m - 00c3 AIC-7899D U160/m - 00c5 RAID subsystem HBA - 1028 00c5 PowerEdge 2400,2500,2550,4400 - 00cf AIC-7899P U160/m - 1028 00ce PowerEdge 1400 - 1028 00d1 PowerEdge 2550 - 1028 00d9 PowerEdge 2500 - 10f1 2462 Thunder K7 S2462 - 15d9 9005 Onboard SCSI Host Adapter - 8086 3411 SDS2 Mainboard - 0250 ServeRAID Controller - 1014 0279 ServeRAID-xx - 1014 028c ServeRAID-xx -# from kernel sources - 0279 ServeRAID 6M - 0283 AAC-RAID - 9005 0283 Catapult - 0284 AAC-RAID - 9005 0284 Tomcat - 0285 AAC-RAID - 0e11 0295 SATA 6Ch (Bearcat) - 1014 02f2 ServeRAID 8i - 1028 0287 PowerEdge Expandable RAID Controller 320/DC - 1028 0291 CERC SATA RAID 2 PCI SATA 6ch (DellCorsair) - 103c 3227 AAR-2610SA - 17aa 0286 Legend S220 (Legend Crusader) - 17aa 0287 Legend S230 (Legend Vulcan) - 9005 0285 2200S (Vulcan) - 9005 0286 2120S (Crusader) - 9005 0287 2200S (Vulcan-2m) - 9005 0288 3230S (Harrier) - 9005 0289 3240S (Tornado) - 9005 028a ASR-2020S PCI-X ZCR (Skyhawk) - 9005 028b ASR-2020S SO-DIMM PCI-X ZCR (Terminator) - 9005 0290 AAR-2410SA PCI SATA 4ch (Jaguar II) - 9005 0292 AAR-2810SA PCI SATA 8ch (Corsair-8) - 9005 0293 AAR-21610SA PCI SATA 16ch (Corsair-16) - 9005 0294 ESD SO-DIMM PCI-X SATA ZCR (Prowler) - 0286 AAC-RAID (Rocket) - 9005 028c ASR-2230S + ASR-2230SLP PCI-X (Lancer) - 0503 Scamp chipset SCSI controller - 1014 02BF Quad Channel PCI-X DDR U320 SCSI RAID Adapter (571E) - 8000 ASC-29320A U320 - 800f AIC-7901 U320 - 8010 ASC-39320 U320 - 8011 ASC-32320D U320 - 0e11 00ac ASC-39320D U320 - 9005 0041 ASC-39320D U320 - 8012 ASC-29320 U320 - 8013 ASC-29320B U320 - 8014 ASC-29320LP U320 - 8015 ASC-39320B U320 - 8016 ASC-39320A U320 - 8017 ASC-29320ALP U320 - 801c ASC-39320D U320 - 801d AIC-7902B U320 - 801e AIC-7901A U320 - 801f AIC-7902 U320 - 8080 ASC-29320A U320 w/HostRAID - 808f AIC-7901 U320 w/HostRAID - 8090 ASC-39320 U320 w/HostRAID - 8091 ASC-39320D U320 w/HostRAID - 8092 ASC-29320 U320 w/HostRAID - 8093 ASC-29320B U320 w/HostRAID - 8094 ASC-29320LP U320 w/HostRAID - 8095 ASC-39320(B) U320 w/HostRAID - 8096 ASC-39320A U320 w/HostRAID - 8097 ASC-29320ALP U320 w/HostRAID - 809c ASC-39320D(B) U320 w/HostRAID - 809d AIC-7902(B) U320 w/HostRAID - 809e AIC-7901A U320 w/HostRAID - 809f AIC-7902 U320 w/HostRAID -907f Atronics - 2015 IDE-2015PL -919a Gigapixel Corp -9412 Holtek - 6565 6565 -9699 Omni Media Technology Inc - 6565 6565 -9710 NetMos Technology - 7780 USB IRDA-port - 9705 PCI 9705 Parallel Port - 9715 PCI 9715 Dual Parallel Port - 9735 PCI 9735 Multi-I/O Controller - 1000 0002 0P2S (2 serial) - 1000 0012 1P2S (1 parallel + 2 serial) - 9745 PCI 9745 Multi-I/O Controller - 1000 0002 0P2S (2 serial) - 1000 0012 1P2S (1 parallel + 2 serial) - 9755 PCI 9755 Parallel Port and ISA Bridge - 9805 PCI 9805 Parallel Port - 9815 PCI 9815 Dual Parallel Port - 1000 0020 2P0S (2 port parallel adaptor) - 9835 PCI 9835 Multi-I/O Controller - 1000 0002 0P2S (16C550 UART) - 1000 0012 1P2S - 9845 PCI 9845 Multi-I/O Controller - 1000 0004 0P4S (4 port 16550A serial card) - 1000 0006 0P6S (6 port 16550A serial card) - 1000 0014 1P4S (4 port 16550A serial card + parallel) - 9855 PCI 9855 Multi-I/O Controller - 1000 0014 1P4S -9902 Stargen Inc. - 0001 SG2010 PCI over Starfabric Bridge - 0002 SG2010 PCI to Starfabric Gateway - 0003 SG1010 Starfabric Switch and PCI Bridge -a0a0 AOPEN Inc. -a0f1 UNISYS Corporation -a200 NEC Corporation -a259 Hewlett Packard -a25b Hewlett Packard GmbH PL24-MKT -a304 Sony -a727 3Com Corporation - 0013 3CRPAG175 Wireless PC Card -aa42 Scitex Digital Video -ac1e Digital Receiver Technology Inc -ac3d Actuality Systems -aecb Adrienne Electronics Corporation -b1b3 Shiva Europe Limited -# Pinnacle should be 11bd, but they got it wrong several times --mj -bd11 Pinnacle Systems, Inc. (Wrong ID) -c001 TSI Telsys -c0a9 Micron/Crucial Technology -c0de Motorola -c0fe Motion Engineering, Inc. -ca50 Varian Australia Pty Ltd -cafe Chrysalis-ITS -cccc Catapult Communications -cddd Tyzx, Inc. - 0101 DeepSea 1 High Speed Stereo Vision Frame Grabber - 0200 DeepSea 2 High Speed Stereo Vision Frame Grabber -d4d4 Dy4 Systems Inc - 0601 PCI Mezzanine Card -d531 I+ME ACTIA GmbH -d84d Exsys -dead Indigita Corporation -deaf Middle Digital Inc. - 9050 PC Weasel Virtual VGA - 9051 PC Weasel Serial Port - 9052 PC Weasel Watchdog Timer -e000 Winbond - e000 W89C940 -# see also : http://www.schoenfeld.de/inside/Inside_CWMK3.txt maybe a misuse of TJN id or it use the TJN 3XX chip for other applic -e159 Tiger Jet Network Inc. - 0001 Tiger3XX Modem/ISDN interface - 0059 0001 128k ISDN-S/T Adapter - 0059 0003 128k ISDN-U Adapter - 0002 Tiger100APC ISDN chipset -e4bf EKF Elektronik GmbH -# Innovative and scalable network IC vendor -e55e Essence Technology, Inc. -ea01 Eagle Technology -# The main chip of all these devices is by Xilinx -> It could also be a Xilinx ID. -ea60 RME - 9896 Digi32 - 9897 Digi32 Pro - 9898 Digi32/8 -eabb Aashima Technology B.V. -eace Endace Measurement Systems, Ltd - 3100 DAG 3.10 OC-3/OC-12 - 3200 DAG 3.2x OC-3/OC-12 - 320e DAG 3.2E Fast Ethernet - 340e DAG 3.4E Fast Ethernet - 341e DAG 3.41E Fast Ethernet - 3500 DAG 3.5 OC-3/OC-12 - 351c DAG 3.5ECM Fast Ethernet - 4100 DAG 4.10 OC-48 - 4110 DAG 4.11 OC-48 - 4220 DAG 4.2 OC-48 - 422e DAG 4.2E Dual Gigabit Ethernet -ec80 Belkin Corporation - ec00 F5D6000 -ecc0 Echo Digital Audio Corporation -edd8 ARK Logic Inc - a091 1000PV [Stingray] - a099 2000PV [Stingray] - a0a1 2000MT - a0a9 2000MI -f1d0 AJA Video -# All boards I have seen have this ID not efac, though all docs say efac... - cafe KONA SD SMPTE 259M I/O - efac KONA SD SMPTE 259M I/O - facd KONA HD SMPTE 292M I/O -fa57 Interagon AS - 0001 PMC [Pattern Matching Chip] -febd Ultraview Corp. -feda Broadcom Inc (nee Epigram) - a0fa BCM4210 iLine10 HomePNA 2.0 - a10e BCM4230 iLine10 HomePNA 2.0 -# IT & Telecom company, develops PCI Trunk cards -fede Fedetec Inc. - 0003 TABIC PCI v3 -fffe VMWare Inc - 0405 Virtual SVGA 4.0 - 0710 Virtual SVGA -ffff Illegal Vendor ID - - -# List of known device classes, subclasses and programming interfaces - -# Syntax: -# C class class_name -# subclass subclass_name <-- single tab -# prog-if prog-if_name <-- two tabs - -C 00 Unclassified device - 00 Non-VGA unclassified device - 01 VGA compatible unclassified device -C 01 Mass storage controller - 00 SCSI storage controller - 01 IDE interface - 02 Floppy disk controller - 03 IPI bus controller - 04 RAID bus controller - 80 Unknown mass storage controller -C 02 Network controller - 00 Ethernet controller - 01 Token ring network controller - 02 FDDI network controller - 03 ATM network controller - 04 ISDN controller - 80 Network controller -C 03 Display controller - 00 VGA compatible controller - 00 VGA - 01 8514 - 01 XGA compatible controller - 02 3D controller - 80 Display controller -C 04 Multimedia controller - 00 Multimedia video controller - 01 Multimedia audio controller - 02 Computer telephony device - 80 Multimedia controller -C 05 Memory controller - 00 RAM memory - 01 FLASH memory - 80 Memory controller -C 06 Bridge - 00 Host bridge - 01 ISA bridge - 02 EISA bridge - 03 MicroChannel bridge - 04 PCI bridge - 00 Normal decode - 01 Subtractive decode - 05 PCMCIA bridge - 06 NuBus bridge - 07 CardBus bridge - 08 RACEway bridge - 00 Transparent mode - 01 Endpoint mode - 09 Semi-transparent PCI-to-PCI bridge - 40 Primary bus towards host CPU - 80 Secondary bus towards host CPU - 0a InfiniBand to PCI host bridge - 80 Bridge -C 07 Communication controller - 00 Serial controller - 00 8250 - 01 16450 - 02 16550 - 03 16650 - 04 16750 - 05 16850 - 06 16950 - 01 Parallel controller - 00 SPP - 01 BiDir - 02 ECP - 03 IEEE1284 - fe IEEE1284 Target - 02 Multiport serial controller - 03 Modem - 00 Generic - 01 Hayes/16450 - 02 Hayes/16550 - 03 Hayes/16650 - 04 Hayes/16750 - 80 Communication controller -C 08 Generic system peripheral - 00 PIC - 00 8259 - 01 ISA PIC - 02 EISA PIC - 10 IO-APIC - 20 IO(X)-APIC - 01 DMA controller - 00 8237 - 01 ISA DMA - 02 EISA DMA - 02 Timer - 00 8254 - 01 ISA Timer - 02 EISA Timers - 03 RTC - 00 Generic - 01 ISA RTC - 04 PCI Hot-plug controller - 80 System peripheral -C 09 Input device controller - 00 Keyboard controller - 01 Digitizer Pen - 02 Mouse controller - 03 Scanner controller - 04 Gameport controller - 00 Generic - 10 Extended - 80 Input device controller -C 0a Docking station - 00 Generic Docking Station - 80 Docking Station -C 0b Processor - 00 386 - 01 486 - 02 Pentium - 10 Alpha - 20 Power PC - 30 MIPS - 40 Co-processor -C 0c Serial bus controller - 00 FireWire (IEEE 1394) - 00 Generic - 10 OHCI - 01 ACCESS Bus - 02 SSA - 03 USB Controller - 00 UHCI - 10 OHCI - 20 EHCI - 80 Unspecified - fe USB Device - 04 Fibre Channel - 05 SMBus - 06 InfiniBand -C 0d Wireless controller - 00 IRDA controller - 01 Consumer IR controller - 10 RF controller - 80 Wireless controller -C 0e Intelligent controller - 00 I2O -C 0f Satellite communications controller - 00 Satellite TV controller - 01 Satellite audio communication controller - 03 Satellite voice communication controller - 04 Satellite data communication controller -C 10 Encryption controller - 00 Network and computing encryption device - 10 Entertainment encryption device - 80 Encryption controller -C 11 Signal processing controller - 00 DPIO module - 01 Performance counters - 10 Communication synchronizer - 80 Signal processing controller diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 93e8a878ea95..4be1b887ab62 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -757,8 +757,6 @@ pci_scan_device(struct pci_bus *bus, int devfn) dev->dev.release = pci_release_dev; pci_dev_get(dev); - pci_name_device(dev); - dev->dev.dma_mask = &dev->dma_mask; dev->dev.coherent_dma_mask = 0xffffffffull; diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 7988fc8df3fd..9613f666c110 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -474,7 +474,7 @@ static int show_dev_config(struct seq_file *m, void *v) struct pci_dev *first_dev; struct pci_driver *drv; u32 class_rev; - unsigned char latency, min_gnt, max_lat, *class; + unsigned char latency, min_gnt, max_lat; int reg; first_dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, NULL); @@ -490,16 +490,8 @@ static int show_dev_config(struct seq_file *m, void *v) pci_read_config_byte (dev, PCI_MAX_LAT, &max_lat); seq_printf(m, " Bus %2d, device %3d, function %2d:\n", dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn)); - class = pci_class_name(class_rev >> 16); - if (class) - seq_printf(m, " %s", class); - else - seq_printf(m, " Class %04x", class_rev >> 16); -#ifdef CONFIG_PCI_NAMES - seq_printf(m, ": %s", dev->pretty_name); -#else + seq_printf(m, " Class %04x", class_rev >> 16); seq_printf(m, ": PCI device %04x:%04x", dev->vendor, dev->device); -#endif seq_printf(m, " (rev %d).\n", class_rev & 0xff); if (dev->irq) diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c index fc056062c960..7b9e54c39678 100644 --- a/drivers/usb/core/hcd-pci.c +++ b/drivers/usb/core/hcd-pci.c @@ -121,10 +121,6 @@ int usb_hcd_pci_probe (struct pci_dev *dev, const struct pci_device_id *id) } } -#ifdef CONFIG_PCI_NAMES - hcd->product_desc = dev->pretty_name; -#endif - pci_set_master (dev); retval = usb_add_hcd (hcd, dev->irq, SA_SHIRQ); diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index 52b16850a54e..30f80c23f934 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -1473,10 +1473,6 @@ static int __devinit nvidiafb_probe(struct pci_dev *pd, par->Chipset = (pd->vendor << 16) | pd->device; printk(KERN_INFO PFX "nVidia device/chipset %X\n", par->Chipset); -#ifdef CONFIG_PCI_NAMES - printk(KERN_INFO PFX "%s\n", pd->pretty_name); -#endif - if (par->Architecture == 0) { printk(KERN_ERR PFX "unknown NV_ARCH\n"); goto err_out_free_base0; diff --git a/drivers/video/riva/fbdev.c b/drivers/video/riva/fbdev.c index ae297e222681..3e9f96e9237d 100644 --- a/drivers/video/riva/fbdev.c +++ b/drivers/video/riva/fbdev.c @@ -1936,10 +1936,6 @@ static int __devinit rivafb_probe(struct pci_dev *pd, default_par->Chipset = (pd->vendor << 16) | pd->device; printk(KERN_INFO PFX "nVidia device/chipset %X\n",default_par->Chipset); -#ifdef CONFIG_PCI_NAMES - printk(KERN_INFO PFX "%s\n", pd->pretty_name); -#endif - if(default_par->riva.Architecture == 0) { printk(KERN_ERR PFX "unknown NV_ARCH\n"); ret=-ENODEV; diff --git a/include/linux/pci.h b/include/linux/pci.h index 025bfc39771d..e7a228f2fdde 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -562,11 +562,6 @@ struct pci_dev { struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ int rom_attr_enabled; /* has display of the rom attribute been enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ -#ifdef CONFIG_PCI_NAMES -#define PCI_NAME_SIZE 255 -#define PCI_NAME_HALF __stringify(43) /* less than half to handle slop */ - char pretty_name[PCI_NAME_SIZE]; /* pretty name for users to see */ -#endif }; #define pci_dev_g(n) list_entry(n, struct pci_dev, global_list) @@ -749,8 +744,6 @@ int pci_scan_slot(struct pci_bus *bus, int devfn); struct pci_dev * pci_scan_single_device(struct pci_bus *bus, int devfn); unsigned int pci_scan_child_bus(struct pci_bus *bus); void pci_bus_add_device(struct pci_dev *dev); -void pci_name_device(struct pci_dev *dev); -char *pci_class_name(u32 class); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); @@ -1025,13 +1018,6 @@ static inline char *pci_name(struct pci_dev *pdev) return pdev->dev.bus_id; } -/* Some archs want to see the pretty pci name, so use this macro */ -#ifdef CONFIG_PCI_NAMES -#define pci_pretty_name(dev) ((dev)->pretty_name) -#else -#define pci_pretty_name(dev) "" -#endif - /* Some archs don't want to expose struct resource to userland as-is * in sysfs and /proc -- cgit v1.2.3 From 4352dfd5cd9172f1ee425924a463b43e6157b840 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 28 Jul 2005 11:37:33 -0700 Subject: [PATCH] PCI: clean up pci.h and split pci register info to separate header file. This cleans up some of the #ifdef CONFIG_PCI stuff up, and moves the pci register info out to a separate file, where it belongs. Eventually we can stop including this file from within pci.h, but lots of code needs to be audited first. Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 488 +++-------------------------------------------- include/linux/pci_regs.h | 447 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 477 insertions(+), 458 deletions(-) create mode 100644 include/linux/pci_regs.h (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e7a228f2fdde..830c1bae56cc 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -19,436 +19,10 @@ #include -/* - * Under PCI, each device has 256 bytes of configuration address space, - * of which the first 64 bytes are standardized as follows: - */ -#define PCI_VENDOR_ID 0x00 /* 16 bits */ -#define PCI_DEVICE_ID 0x02 /* 16 bits */ -#define PCI_COMMAND 0x04 /* 16 bits */ -#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ -#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ -#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ -#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ -#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ -#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ -#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ -#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ -#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ -#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ -#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ - -#define PCI_STATUS 0x06 /* 16 bits */ -#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ -#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ -#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ -#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ -#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ -#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ -#define PCI_STATUS_DEVSEL_FAST 0x000 -#define PCI_STATUS_DEVSEL_MEDIUM 0x200 -#define PCI_STATUS_DEVSEL_SLOW 0x400 -#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ -#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ -#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ -#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ -#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ - -#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 - revision */ -#define PCI_REVISION_ID 0x08 /* Revision ID */ -#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ -#define PCI_CLASS_DEVICE 0x0a /* Device class */ - -#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ -#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ -#define PCI_HEADER_TYPE 0x0e /* 8 bits */ -#define PCI_HEADER_TYPE_NORMAL 0 -#define PCI_HEADER_TYPE_BRIDGE 1 -#define PCI_HEADER_TYPE_CARDBUS 2 - -#define PCI_BIST 0x0f /* 8 bits */ -#define PCI_BIST_CODE_MASK 0x0f /* Return result */ -#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ -#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ - -/* - * Base addresses specify locations in memory or I/O space. - * Decoded size can be determined by writing a value of - * 0xffffffff to the register, and reading it back. Only - * 1 bits are decoded. - */ -#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ -#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ -#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ -#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ -#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ -#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ -#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ -#define PCI_BASE_ADDRESS_SPACE_IO 0x01 -#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 -#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 -#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ -#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ -#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ -#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ -#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) -#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) -/* bit 1 is reserved if address_space = 1 */ - -/* Header type 0 (normal devices) */ -#define PCI_CARDBUS_CIS 0x28 -#define PCI_SUBSYSTEM_VENDOR_ID 0x2c -#define PCI_SUBSYSTEM_ID 0x2e -#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ -#define PCI_ROM_ADDRESS_ENABLE 0x01 -#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) - -#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ - -/* 0x35-0x3b are reserved */ -#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ -#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ -#define PCI_MIN_GNT 0x3e /* 8 bits */ -#define PCI_MAX_LAT 0x3f /* 8 bits */ - -/* Header type 1 (PCI-to-PCI bridges) */ -#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ -#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ -#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ -#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ -#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ -#define PCI_IO_LIMIT 0x1d -#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ -#define PCI_IO_RANGE_TYPE_16 0x00 -#define PCI_IO_RANGE_TYPE_32 0x01 -#define PCI_IO_RANGE_MASK (~0x0fUL) -#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ -#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ -#define PCI_MEMORY_LIMIT 0x22 -#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL -#define PCI_MEMORY_RANGE_MASK (~0x0fUL) -#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ -#define PCI_PREF_MEMORY_LIMIT 0x26 -#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL -#define PCI_PREF_RANGE_TYPE_32 0x00 -#define PCI_PREF_RANGE_TYPE_64 0x01 -#define PCI_PREF_RANGE_MASK (~0x0fUL) -#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ -#define PCI_PREF_LIMIT_UPPER32 0x2c -#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ -#define PCI_IO_LIMIT_UPPER16 0x32 -/* 0x34 same as for htype 0 */ -/* 0x35-0x3b is reserved */ -#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_BRIDGE_CONTROL 0x3e -#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ -#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ -#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ -#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ -#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ -#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ -#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ - -/* Header type 2 (CardBus bridges) */ -#define PCI_CB_CAPABILITY_LIST 0x14 -/* 0x15 reserved */ -#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ -#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ -#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ -#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ -#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ -#define PCI_CB_MEMORY_BASE_0 0x1c -#define PCI_CB_MEMORY_LIMIT_0 0x20 -#define PCI_CB_MEMORY_BASE_1 0x24 -#define PCI_CB_MEMORY_LIMIT_1 0x28 -#define PCI_CB_IO_BASE_0 0x2c -#define PCI_CB_IO_BASE_0_HI 0x2e -#define PCI_CB_IO_LIMIT_0 0x30 -#define PCI_CB_IO_LIMIT_0_HI 0x32 -#define PCI_CB_IO_BASE_1 0x34 -#define PCI_CB_IO_BASE_1_HI 0x36 -#define PCI_CB_IO_LIMIT_1 0x38 -#define PCI_CB_IO_LIMIT_1_HI 0x3a -#define PCI_CB_IO_RANGE_MASK (~0x03UL) -/* 0x3c-0x3d are same as for htype 0 */ -#define PCI_CB_BRIDGE_CONTROL 0x3e -#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ -#define PCI_CB_BRIDGE_CTL_SERR 0x02 -#define PCI_CB_BRIDGE_CTL_ISA 0x04 -#define PCI_CB_BRIDGE_CTL_VGA 0x08 -#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 -#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ -#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ -#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 -#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 -#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 -#define PCI_CB_SUBSYSTEM_ID 0x42 -#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ -/* 0x48-0x7f reserved */ - -/* Capability lists */ - -#define PCI_CAP_LIST_ID 0 /* Capability ID */ -#define PCI_CAP_ID_PM 0x01 /* Power Management */ -#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ -#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ -#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ -#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ -#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ -#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ -#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ -#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ -#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ -#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ -#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ -#define PCI_CAP_SIZEOF 4 - -/* Power Management Registers */ - -#define PCI_PM_PMC 2 /* PM Capabilities Register */ -#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ -#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ -#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ -#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ -#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ -#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ -#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ -#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ -#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ -#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ -#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ -#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ -#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ -#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ -#define PCI_PM_CTRL 4 /* PM control and status register */ -#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ -#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ -#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ -#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ -#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ -#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ -#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ -#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ -#define PCI_PM_DATA_REGISTER 7 /* (??) */ -#define PCI_PM_SIZEOF 8 - -/* AGP registers */ - -#define PCI_AGP_VERSION 2 /* BCD version number */ -#define PCI_AGP_RFU 3 /* Rest of capability flags */ -#define PCI_AGP_STATUS 4 /* Status register */ -#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ -#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ -#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ -#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ -#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ -#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ -#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ -#define PCI_AGP_COMMAND 8 /* Control register */ -#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ -#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ -#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ -#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ -#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ -#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ -#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ -#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ -#define PCI_AGP_SIZEOF 12 - -/* Vital Product Data */ - -#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ -#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ -#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ -#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ - -/* Slot Identification */ - -#define PCI_SID_ESR 2 /* Expansion Slot Register */ -#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ -#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ -#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ - -/* Message Signalled Interrupts registers */ - -#define PCI_MSI_FLAGS 2 /* Various flags */ -#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ -#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ -#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ -#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ -#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ -#define PCI_MSI_RFU 3 /* Rest of capability flags */ -#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ -#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ -#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ -#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ -#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ - -/* CompactPCI Hotswap Register */ - -#define PCI_CHSWP_CSR 2 /* Control and Status Register */ -#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ -#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ -#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ -#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ -#define PCI_CHSWP_PI 0x30 /* Programming Interface */ -#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ -#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ - -/* PCI-X registers */ - -#define PCI_X_CMD 2 /* Modes & Features */ -#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ -#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ -#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ -#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ -#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ -#define PCI_X_STATUS 4 /* PCI-X capabilities */ -#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ -#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ -#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ -#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ -#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ -#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ -#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ -#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ -#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ -#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ -#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ -#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ -#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ - -/* PCI Express capability registers */ - -#define PCI_EXP_FLAGS 2 /* Capabilities register */ -#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ -#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ -#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ -#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ -#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ -#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ -#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ -#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ -#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ -#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ -#define PCI_EXP_DEVCAP 4 /* Device capabilities */ -#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ -#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ -#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ -#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ -#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ -#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ -#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ -#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ -#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ -#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ -#define PCI_EXP_DEVCTL 8 /* Device Control */ -#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ -#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ -#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ -#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ -#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ -#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ -#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ -#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ -#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ -#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ -#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ -#define PCI_EXP_DEVSTA 10 /* Device Status */ -#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ -#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ -#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ -#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ -#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ -#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ -#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ -#define PCI_EXP_LNKCTL 16 /* Link Control */ -#define PCI_EXP_LNKSTA 18 /* Link Status */ -#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ -#define PCI_EXP_SLTCTL 24 /* Slot Control */ -#define PCI_EXP_SLTSTA 26 /* Slot Status */ -#define PCI_EXP_RTCTL 28 /* Root Control */ -#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ -#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ -#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ -#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ -#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ -#define PCI_EXP_RTCAP 30 /* Root Capabilities */ -#define PCI_EXP_RTSTA 32 /* Root Status */ - -/* Extended Capabilities (PCI-X 2.0 and Express) */ -#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) -#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) -#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) - -#define PCI_EXT_CAP_ID_ERR 1 -#define PCI_EXT_CAP_ID_VC 2 -#define PCI_EXT_CAP_ID_DSN 3 -#define PCI_EXT_CAP_ID_PWR 4 - -/* Advanced Error Reporting */ -#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ -#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ -#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ -#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ -#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ -#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ -#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ -#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ -#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ -#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ -#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ -#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ -#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ - /* Same bits as above */ -#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ - /* Same bits as above */ -#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ -#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ -#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ -#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ -#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ -#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ -#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ - /* Same bits as above */ -#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ -#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ -#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ -#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ -#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ -#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ -#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ -#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ -#define PCI_ERR_ROOT_STATUS 48 -#define PCI_ERR_ROOT_COR_SRC 52 -#define PCI_ERR_ROOT_SRC 54 - -/* Virtual Channel */ -#define PCI_VC_PORT_REG1 4 -#define PCI_VC_PORT_REG2 8 -#define PCI_VC_PORT_CTRL 12 -#define PCI_VC_PORT_STATUS 14 -#define PCI_VC_RES_CAP 16 -#define PCI_VC_RES_CTRL 20 -#define PCI_VC_RES_STATUS 26 - -/* Power Budgeting */ -#define PCI_PWR_DSR 4 /* Data Select Register */ -#define PCI_PWR_DATA 8 /* Data Register */ -#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ -#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ -#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ -#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ -#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ -#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ -#define PCI_PWR_CAP 12 /* Capability */ -#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ +/* Include the pci register defines */ +#include /* Include the ID list */ - #include /* @@ -496,9 +70,9 @@ enum pci_mmap_state { typedef int __bitwise pci_power_t; -#define PCI_D0 ((pci_power_t __force) 0) -#define PCI_D1 ((pci_power_t __force) 1) -#define PCI_D2 ((pci_power_t __force) 2) +#define PCI_D0 ((pci_power_t __force) 0) +#define PCI_D1 ((pci_power_t __force) 1) +#define PCI_D2 ((pci_power_t __force) 2) #define PCI_D3hot ((pci_power_t __force) 3) #define PCI_D3cold ((pci_power_t __force) 4) #define PCI_POWER_ERROR ((pci_power_t __force) -1) @@ -577,15 +151,15 @@ struct pci_dev { * 7-10 bridges: address space assigned to buses behind the bridge */ -#define PCI_ROM_RESOURCE 6 -#define PCI_BRIDGE_RESOURCES 7 -#define PCI_NUM_RESOURCES 11 +#define PCI_ROM_RESOURCE 6 +#define PCI_BRIDGE_RESOURCES 7 +#define PCI_NUM_RESOURCES 11 #ifndef PCI_BUS_NUM_RESOURCES -#define PCI_BUS_NUM_RESOURCES 8 +#define PCI_BUS_NUM_RESOURCES 8 #endif - -#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ + +#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */ struct pci_bus { struct list_head node; /* node in list of buses */ @@ -694,7 +268,7 @@ struct pci_driver { * @dev_class_mask: the class mask for this device * * This macro is used to create a struct pci_device_id that matches a - * specific PCI class. The vendor, device, subvendor, and subdevice + * specific PCI class. The vendor, device, subvendor, and subdevice * fields will be set to PCI_ANY_ID. */ #define PCI_DEVICE_CLASS(dev_class,dev_class_mask) \ @@ -702,7 +276,7 @@ struct pci_driver { .vendor = PCI_ANY_ID, .device = PCI_ANY_ID, \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID -/* +/* * pci_module_init is obsolete, this stays here till we fix up all usages of it * in the tree. */ @@ -905,18 +479,26 @@ extern void pci_disable_msix(struct pci_dev *dev); extern void msi_remove_pci_irq_vectors(struct pci_dev *dev); #endif -#endif /* CONFIG_PCI */ - -/* Include architecture-dependent settings and functions */ +/* + * PCI domain support. Sometimes called PCI segment (eg by ACPI), + * a PCI domain is defined to be a set of PCI busses which share + * configuration space. + */ +#ifndef CONFIG_PCI_DOMAINS +static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return 0; +} +#endif -#include +#else /* CONFIG_PCI is not enabled */ /* * If the system does not have PCI, clearly these return errors. Define * these as simple inline functions to avoid hair in drivers. */ -#ifndef CONFIG_PCI #define _PCI_NOP(o,s,t) \ static inline int pci_##o##_config_##s (struct pci_dev *dev, int where, t val) \ { return PCIBIOS_FUNC_NOT_SUPPORTED; } @@ -967,21 +549,11 @@ static inline int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int en #define pci_dma_burst_advice(pdev, strat, strategy_parameter) do { } while (0) -#else +#endif /* CONFIG_PCI */ -/* - * PCI domain support. Sometimes called PCI segment (eg by ACPI), - * a PCI domain is defined to be a set of PCI busses which share - * configuration space. - */ -#ifndef CONFIG_PCI_DOMAINS -static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return 0; -} -#endif -#endif /* !CONFIG_PCI */ +/* Include architecture-dependent settings and functions */ + +#include /* these helpers provide future and backwards compatibility * for accessing popular PCI BAR info */ diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h new file mode 100644 index 000000000000..7dc391cad10d --- /dev/null +++ b/include/linux/pci_regs.h @@ -0,0 +1,447 @@ +/* + * pci_regs.h + * + * PCI standard defines + * Copyright 1994, Drew Eckhardt + * Copyright 1997--1999 Martin Mares + * + * For more information, please consult the following manuals (look at + * http://www.pcisig.com/ for how to get them): + * + * PCI BIOS Specification + * PCI Local Bus Specification + * PCI to PCI Bridge Specification + * PCI System Design Guide + */ + +#ifndef LINUX_PCI_REGS_H +#define LINUX_PCI_REGS_H + +/* + * Under PCI, each device has 256 bytes of configuration address space, + * of which the first 64 bytes are standardized as follows: + */ +#define PCI_VENDOR_ID 0x00 /* 16 bits */ +#define PCI_DEVICE_ID 0x02 /* 16 bits */ +#define PCI_COMMAND 0x04 /* 16 bits */ +#define PCI_COMMAND_IO 0x1 /* Enable response in I/O space */ +#define PCI_COMMAND_MEMORY 0x2 /* Enable response in Memory space */ +#define PCI_COMMAND_MASTER 0x4 /* Enable bus mastering */ +#define PCI_COMMAND_SPECIAL 0x8 /* Enable response to special cycles */ +#define PCI_COMMAND_INVALIDATE 0x10 /* Use memory write and invalidate */ +#define PCI_COMMAND_VGA_PALETTE 0x20 /* Enable palette snooping */ +#define PCI_COMMAND_PARITY 0x40 /* Enable parity checking */ +#define PCI_COMMAND_WAIT 0x80 /* Enable address/data stepping */ +#define PCI_COMMAND_SERR 0x100 /* Enable SERR */ +#define PCI_COMMAND_FAST_BACK 0x200 /* Enable back-to-back writes */ +#define PCI_COMMAND_INTX_DISABLE 0x400 /* INTx Emulation Disable */ + +#define PCI_STATUS 0x06 /* 16 bits */ +#define PCI_STATUS_CAP_LIST 0x10 /* Support Capability List */ +#define PCI_STATUS_66MHZ 0x20 /* Support 66 Mhz PCI 2.1 bus */ +#define PCI_STATUS_UDF 0x40 /* Support User Definable Features [obsolete] */ +#define PCI_STATUS_FAST_BACK 0x80 /* Accept fast-back to back */ +#define PCI_STATUS_PARITY 0x100 /* Detected parity error */ +#define PCI_STATUS_DEVSEL_MASK 0x600 /* DEVSEL timing */ +#define PCI_STATUS_DEVSEL_FAST 0x000 +#define PCI_STATUS_DEVSEL_MEDIUM 0x200 +#define PCI_STATUS_DEVSEL_SLOW 0x400 +#define PCI_STATUS_SIG_TARGET_ABORT 0x800 /* Set on target abort */ +#define PCI_STATUS_REC_TARGET_ABORT 0x1000 /* Master ack of " */ +#define PCI_STATUS_REC_MASTER_ABORT 0x2000 /* Set on master abort */ +#define PCI_STATUS_SIG_SYSTEM_ERROR 0x4000 /* Set when we drive SERR */ +#define PCI_STATUS_DETECTED_PARITY 0x8000 /* Set on parity error */ + +#define PCI_CLASS_REVISION 0x08 /* High 24 bits are class, low 8 revision */ +#define PCI_REVISION_ID 0x08 /* Revision ID */ +#define PCI_CLASS_PROG 0x09 /* Reg. Level Programming Interface */ +#define PCI_CLASS_DEVICE 0x0a /* Device class */ + +#define PCI_CACHE_LINE_SIZE 0x0c /* 8 bits */ +#define PCI_LATENCY_TIMER 0x0d /* 8 bits */ +#define PCI_HEADER_TYPE 0x0e /* 8 bits */ +#define PCI_HEADER_TYPE_NORMAL 0 +#define PCI_HEADER_TYPE_BRIDGE 1 +#define PCI_HEADER_TYPE_CARDBUS 2 + +#define PCI_BIST 0x0f /* 8 bits */ +#define PCI_BIST_CODE_MASK 0x0f /* Return result */ +#define PCI_BIST_START 0x40 /* 1 to start BIST, 2 secs or less */ +#define PCI_BIST_CAPABLE 0x80 /* 1 if BIST capable */ + +/* + * Base addresses specify locations in memory or I/O space. + * Decoded size can be determined by writing a value of + * 0xffffffff to the register, and reading it back. Only + * 1 bits are decoded. + */ +#define PCI_BASE_ADDRESS_0 0x10 /* 32 bits */ +#define PCI_BASE_ADDRESS_1 0x14 /* 32 bits [htype 0,1 only] */ +#define PCI_BASE_ADDRESS_2 0x18 /* 32 bits [htype 0 only] */ +#define PCI_BASE_ADDRESS_3 0x1c /* 32 bits */ +#define PCI_BASE_ADDRESS_4 0x20 /* 32 bits */ +#define PCI_BASE_ADDRESS_5 0x24 /* 32 bits */ +#define PCI_BASE_ADDRESS_SPACE 0x01 /* 0 = memory, 1 = I/O */ +#define PCI_BASE_ADDRESS_SPACE_IO 0x01 +#define PCI_BASE_ADDRESS_SPACE_MEMORY 0x00 +#define PCI_BASE_ADDRESS_MEM_TYPE_MASK 0x06 +#define PCI_BASE_ADDRESS_MEM_TYPE_32 0x00 /* 32 bit address */ +#define PCI_BASE_ADDRESS_MEM_TYPE_1M 0x02 /* Below 1M [obsolete] */ +#define PCI_BASE_ADDRESS_MEM_TYPE_64 0x04 /* 64 bit address */ +#define PCI_BASE_ADDRESS_MEM_PREFETCH 0x08 /* prefetchable? */ +#define PCI_BASE_ADDRESS_MEM_MASK (~0x0fUL) +#define PCI_BASE_ADDRESS_IO_MASK (~0x03UL) +/* bit 1 is reserved if address_space = 1 */ + +/* Header type 0 (normal devices) */ +#define PCI_CARDBUS_CIS 0x28 +#define PCI_SUBSYSTEM_VENDOR_ID 0x2c +#define PCI_SUBSYSTEM_ID 0x2e +#define PCI_ROM_ADDRESS 0x30 /* Bits 31..11 are address, 10..1 reserved */ +#define PCI_ROM_ADDRESS_ENABLE 0x01 +#define PCI_ROM_ADDRESS_MASK (~0x7ffUL) + +#define PCI_CAPABILITY_LIST 0x34 /* Offset of first capability list entry */ + +/* 0x35-0x3b are reserved */ +#define PCI_INTERRUPT_LINE 0x3c /* 8 bits */ +#define PCI_INTERRUPT_PIN 0x3d /* 8 bits */ +#define PCI_MIN_GNT 0x3e /* 8 bits */ +#define PCI_MAX_LAT 0x3f /* 8 bits */ + +/* Header type 1 (PCI-to-PCI bridges) */ +#define PCI_PRIMARY_BUS 0x18 /* Primary bus number */ +#define PCI_SECONDARY_BUS 0x19 /* Secondary bus number */ +#define PCI_SUBORDINATE_BUS 0x1a /* Highest bus number behind the bridge */ +#define PCI_SEC_LATENCY_TIMER 0x1b /* Latency timer for secondary interface */ +#define PCI_IO_BASE 0x1c /* I/O range behind the bridge */ +#define PCI_IO_LIMIT 0x1d +#define PCI_IO_RANGE_TYPE_MASK 0x0fUL /* I/O bridging type */ +#define PCI_IO_RANGE_TYPE_16 0x00 +#define PCI_IO_RANGE_TYPE_32 0x01 +#define PCI_IO_RANGE_MASK (~0x0fUL) +#define PCI_SEC_STATUS 0x1e /* Secondary status register, only bit 14 used */ +#define PCI_MEMORY_BASE 0x20 /* Memory range behind */ +#define PCI_MEMORY_LIMIT 0x22 +#define PCI_MEMORY_RANGE_TYPE_MASK 0x0fUL +#define PCI_MEMORY_RANGE_MASK (~0x0fUL) +#define PCI_PREF_MEMORY_BASE 0x24 /* Prefetchable memory range behind */ +#define PCI_PREF_MEMORY_LIMIT 0x26 +#define PCI_PREF_RANGE_TYPE_MASK 0x0fUL +#define PCI_PREF_RANGE_TYPE_32 0x00 +#define PCI_PREF_RANGE_TYPE_64 0x01 +#define PCI_PREF_RANGE_MASK (~0x0fUL) +#define PCI_PREF_BASE_UPPER32 0x28 /* Upper half of prefetchable memory range */ +#define PCI_PREF_LIMIT_UPPER32 0x2c +#define PCI_IO_BASE_UPPER16 0x30 /* Upper half of I/O addresses */ +#define PCI_IO_LIMIT_UPPER16 0x32 +/* 0x34 same as for htype 0 */ +/* 0x35-0x3b is reserved */ +#define PCI_ROM_ADDRESS1 0x38 /* Same as PCI_ROM_ADDRESS, but for htype 1 */ +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_BRIDGE_CONTROL 0x3e +#define PCI_BRIDGE_CTL_PARITY 0x01 /* Enable parity detection on secondary interface */ +#define PCI_BRIDGE_CTL_SERR 0x02 /* The same for SERR forwarding */ +#define PCI_BRIDGE_CTL_NO_ISA 0x04 /* Disable bridging of ISA ports */ +#define PCI_BRIDGE_CTL_VGA 0x08 /* Forward VGA addresses */ +#define PCI_BRIDGE_CTL_MASTER_ABORT 0x20 /* Report master aborts */ +#define PCI_BRIDGE_CTL_BUS_RESET 0x40 /* Secondary bus reset */ +#define PCI_BRIDGE_CTL_FAST_BACK 0x80 /* Fast Back2Back enabled on secondary interface */ + +/* Header type 2 (CardBus bridges) */ +#define PCI_CB_CAPABILITY_LIST 0x14 +/* 0x15 reserved */ +#define PCI_CB_SEC_STATUS 0x16 /* Secondary status */ +#define PCI_CB_PRIMARY_BUS 0x18 /* PCI bus number */ +#define PCI_CB_CARD_BUS 0x19 /* CardBus bus number */ +#define PCI_CB_SUBORDINATE_BUS 0x1a /* Subordinate bus number */ +#define PCI_CB_LATENCY_TIMER 0x1b /* CardBus latency timer */ +#define PCI_CB_MEMORY_BASE_0 0x1c +#define PCI_CB_MEMORY_LIMIT_0 0x20 +#define PCI_CB_MEMORY_BASE_1 0x24 +#define PCI_CB_MEMORY_LIMIT_1 0x28 +#define PCI_CB_IO_BASE_0 0x2c +#define PCI_CB_IO_BASE_0_HI 0x2e +#define PCI_CB_IO_LIMIT_0 0x30 +#define PCI_CB_IO_LIMIT_0_HI 0x32 +#define PCI_CB_IO_BASE_1 0x34 +#define PCI_CB_IO_BASE_1_HI 0x36 +#define PCI_CB_IO_LIMIT_1 0x38 +#define PCI_CB_IO_LIMIT_1_HI 0x3a +#define PCI_CB_IO_RANGE_MASK (~0x03UL) +/* 0x3c-0x3d are same as for htype 0 */ +#define PCI_CB_BRIDGE_CONTROL 0x3e +#define PCI_CB_BRIDGE_CTL_PARITY 0x01 /* Similar to standard bridge control register */ +#define PCI_CB_BRIDGE_CTL_SERR 0x02 +#define PCI_CB_BRIDGE_CTL_ISA 0x04 +#define PCI_CB_BRIDGE_CTL_VGA 0x08 +#define PCI_CB_BRIDGE_CTL_MASTER_ABORT 0x20 +#define PCI_CB_BRIDGE_CTL_CB_RESET 0x40 /* CardBus reset */ +#define PCI_CB_BRIDGE_CTL_16BIT_INT 0x80 /* Enable interrupt for 16-bit cards */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM0 0x100 /* Prefetch enable for both memory regions */ +#define PCI_CB_BRIDGE_CTL_PREFETCH_MEM1 0x200 +#define PCI_CB_BRIDGE_CTL_POST_WRITES 0x400 +#define PCI_CB_SUBSYSTEM_VENDOR_ID 0x40 +#define PCI_CB_SUBSYSTEM_ID 0x42 +#define PCI_CB_LEGACY_MODE_BASE 0x44 /* 16-bit PC Card legacy mode base address (ExCa) */ +/* 0x48-0x7f reserved */ + +/* Capability lists */ + +#define PCI_CAP_LIST_ID 0 /* Capability ID */ +#define PCI_CAP_ID_PM 0x01 /* Power Management */ +#define PCI_CAP_ID_AGP 0x02 /* Accelerated Graphics Port */ +#define PCI_CAP_ID_VPD 0x03 /* Vital Product Data */ +#define PCI_CAP_ID_SLOTID 0x04 /* Slot Identification */ +#define PCI_CAP_ID_MSI 0x05 /* Message Signalled Interrupts */ +#define PCI_CAP_ID_CHSWP 0x06 /* CompactPCI HotSwap */ +#define PCI_CAP_ID_PCIX 0x07 /* PCI-X */ +#define PCI_CAP_ID_SHPC 0x0C /* PCI Standard Hot-Plug Controller */ +#define PCI_CAP_ID_EXP 0x10 /* PCI Express */ +#define PCI_CAP_ID_MSIX 0x11 /* MSI-X */ +#define PCI_CAP_LIST_NEXT 1 /* Next capability in the list */ +#define PCI_CAP_FLAGS 2 /* Capability defined flags (16 bits) */ +#define PCI_CAP_SIZEOF 4 + +/* Power Management Registers */ + +#define PCI_PM_PMC 2 /* PM Capabilities Register */ +#define PCI_PM_CAP_VER_MASK 0x0007 /* Version */ +#define PCI_PM_CAP_PME_CLOCK 0x0008 /* PME clock required */ +#define PCI_PM_CAP_RESERVED 0x0010 /* Reserved field */ +#define PCI_PM_CAP_DSI 0x0020 /* Device specific initialization */ +#define PCI_PM_CAP_AUX_POWER 0x01C0 /* Auxilliary power support mask */ +#define PCI_PM_CAP_D1 0x0200 /* D1 power state support */ +#define PCI_PM_CAP_D2 0x0400 /* D2 power state support */ +#define PCI_PM_CAP_PME 0x0800 /* PME pin supported */ +#define PCI_PM_CAP_PME_MASK 0xF800 /* PME Mask of all supported states */ +#define PCI_PM_CAP_PME_D0 0x0800 /* PME# from D0 */ +#define PCI_PM_CAP_PME_D1 0x1000 /* PME# from D1 */ +#define PCI_PM_CAP_PME_D2 0x2000 /* PME# from D2 */ +#define PCI_PM_CAP_PME_D3 0x4000 /* PME# from D3 (hot) */ +#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ +#define PCI_PM_CTRL 4 /* PM control and status register */ +#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ +#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ +#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ +#define PCI_PM_CTRL_PME_STATUS 0x8000 /* PME pin status */ +#define PCI_PM_PPB_EXTENSIONS 6 /* PPB support extensions (??) */ +#define PCI_PM_PPB_B2_B3 0x40 /* Stop clock when in D3hot (??) */ +#define PCI_PM_BPCC_ENABLE 0x80 /* Bus power/clock control enable (??) */ +#define PCI_PM_DATA_REGISTER 7 /* (??) */ +#define PCI_PM_SIZEOF 8 + +/* AGP registers */ + +#define PCI_AGP_VERSION 2 /* BCD version number */ +#define PCI_AGP_RFU 3 /* Rest of capability flags */ +#define PCI_AGP_STATUS 4 /* Status register */ +#define PCI_AGP_STATUS_RQ_MASK 0xff000000 /* Maximum number of requests - 1 */ +#define PCI_AGP_STATUS_SBA 0x0200 /* Sideband addressing supported */ +#define PCI_AGP_STATUS_64BIT 0x0020 /* 64-bit addressing supported */ +#define PCI_AGP_STATUS_FW 0x0010 /* FW transfers supported */ +#define PCI_AGP_STATUS_RATE4 0x0004 /* 4x transfer rate supported */ +#define PCI_AGP_STATUS_RATE2 0x0002 /* 2x transfer rate supported */ +#define PCI_AGP_STATUS_RATE1 0x0001 /* 1x transfer rate supported */ +#define PCI_AGP_COMMAND 8 /* Control register */ +#define PCI_AGP_COMMAND_RQ_MASK 0xff000000 /* Master: Maximum number of requests */ +#define PCI_AGP_COMMAND_SBA 0x0200 /* Sideband addressing enabled */ +#define PCI_AGP_COMMAND_AGP 0x0100 /* Allow processing of AGP transactions */ +#define PCI_AGP_COMMAND_64BIT 0x0020 /* Allow processing of 64-bit addresses */ +#define PCI_AGP_COMMAND_FW 0x0010 /* Force FW transfers */ +#define PCI_AGP_COMMAND_RATE4 0x0004 /* Use 4x rate */ +#define PCI_AGP_COMMAND_RATE2 0x0002 /* Use 2x rate */ +#define PCI_AGP_COMMAND_RATE1 0x0001 /* Use 1x rate */ +#define PCI_AGP_SIZEOF 12 + +/* Vital Product Data */ + +#define PCI_VPD_ADDR 2 /* Address to access (15 bits!) */ +#define PCI_VPD_ADDR_MASK 0x7fff /* Address mask */ +#define PCI_VPD_ADDR_F 0x8000 /* Write 0, 1 indicates completion */ +#define PCI_VPD_DATA 4 /* 32-bits of data returned here */ + +/* Slot Identification */ + +#define PCI_SID_ESR 2 /* Expansion Slot Register */ +#define PCI_SID_ESR_NSLOTS 0x1f /* Number of expansion slots available */ +#define PCI_SID_ESR_FIC 0x20 /* First In Chassis Flag */ +#define PCI_SID_CHASSIS_NR 3 /* Chassis Number */ + +/* Message Signalled Interrupts registers */ + +#define PCI_MSI_FLAGS 2 /* Various flags */ +#define PCI_MSI_FLAGS_64BIT 0x80 /* 64-bit addresses allowed */ +#define PCI_MSI_FLAGS_QSIZE 0x70 /* Message queue size configured */ +#define PCI_MSI_FLAGS_QMASK 0x0e /* Maximum queue size available */ +#define PCI_MSI_FLAGS_ENABLE 0x01 /* MSI feature enabled */ +#define PCI_MSI_FLAGS_MASKBIT 0x100 /* 64-bit mask bits allowed */ +#define PCI_MSI_RFU 3 /* Rest of capability flags */ +#define PCI_MSI_ADDRESS_LO 4 /* Lower 32 bits */ +#define PCI_MSI_ADDRESS_HI 8 /* Upper 32 bits (if PCI_MSI_FLAGS_64BIT set) */ +#define PCI_MSI_DATA_32 8 /* 16 bits of data for 32-bit devices */ +#define PCI_MSI_DATA_64 12 /* 16 bits of data for 64-bit devices */ +#define PCI_MSI_MASK_BIT 16 /* Mask bits register */ + +/* CompactPCI Hotswap Register */ + +#define PCI_CHSWP_CSR 2 /* Control and Status Register */ +#define PCI_CHSWP_DHA 0x01 /* Device Hiding Arm */ +#define PCI_CHSWP_EIM 0x02 /* ENUM# Signal Mask */ +#define PCI_CHSWP_PIE 0x04 /* Pending Insert or Extract */ +#define PCI_CHSWP_LOO 0x08 /* LED On / Off */ +#define PCI_CHSWP_PI 0x30 /* Programming Interface */ +#define PCI_CHSWP_EXT 0x40 /* ENUM# status - extraction */ +#define PCI_CHSWP_INS 0x80 /* ENUM# status - insertion */ + +/* PCI-X registers */ + +#define PCI_X_CMD 2 /* Modes & Features */ +#define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ +#define PCI_X_CMD_ERO 0x0002 /* Enable Relaxed Ordering */ +#define PCI_X_CMD_MAX_READ 0x000c /* Max Memory Read Byte Count */ +#define PCI_X_CMD_MAX_SPLIT 0x0070 /* Max Outstanding Split Transactions */ +#define PCI_X_CMD_VERSION(x) (((x) >> 12) & 3) /* Version */ +#define PCI_X_STATUS 4 /* PCI-X capabilities */ +#define PCI_X_STATUS_DEVFN 0x000000ff /* A copy of devfn */ +#define PCI_X_STATUS_BUS 0x0000ff00 /* A copy of bus nr */ +#define PCI_X_STATUS_64BIT 0x00010000 /* 64-bit device */ +#define PCI_X_STATUS_133MHZ 0x00020000 /* 133 MHz capable */ +#define PCI_X_STATUS_SPL_DISC 0x00040000 /* Split Completion Discarded */ +#define PCI_X_STATUS_UNX_SPL 0x00080000 /* Unexpected Split Completion */ +#define PCI_X_STATUS_COMPLEX 0x00100000 /* Device Complexity */ +#define PCI_X_STATUS_MAX_READ 0x00600000 /* Designed Max Memory Read Count */ +#define PCI_X_STATUS_MAX_SPLIT 0x03800000 /* Designed Max Outstanding Split Transactions */ +#define PCI_X_STATUS_MAX_CUM 0x1c000000 /* Designed Max Cumulative Read Size */ +#define PCI_X_STATUS_SPL_ERR 0x20000000 /* Rcvd Split Completion Error Msg */ +#define PCI_X_STATUS_266MHZ 0x40000000 /* 266 MHz capable */ +#define PCI_X_STATUS_533MHZ 0x80000000 /* 533 MHz capable */ + +/* PCI Express capability registers */ + +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_UPSTREAM 0x5 /* Upstream Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_TYPE_PCI_BRIDGE 0x7 /* PCI/PCI-X Bridge */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_FLAGS_IRQ 0x3e00 /* Interrupt message number */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVCAP_PAYLOAD 0x07 /* Max_Payload_Size */ +#define PCI_EXP_DEVCAP_PHANTOM 0x18 /* Phantom functions */ +#define PCI_EXP_DEVCAP_EXT_TAG 0x20 /* Extended tags */ +#define PCI_EXP_DEVCAP_L0S 0x1c0 /* L0s Acceptable Latency */ +#define PCI_EXP_DEVCAP_L1 0xe00 /* L1 Acceptable Latency */ +#define PCI_EXP_DEVCAP_ATN_BUT 0x1000 /* Attention Button Present */ +#define PCI_EXP_DEVCAP_ATN_IND 0x2000 /* Attention Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_IND 0x4000 /* Power Indicator Present */ +#define PCI_EXP_DEVCAP_PWR_VAL 0x3fc0000 /* Slot Power Limit Value */ +#define PCI_EXP_DEVCAP_PWR_SCL 0xc000000 /* Slot Power Limit Scale */ +#define PCI_EXP_DEVCTL 8 /* Device Control */ +#define PCI_EXP_DEVCTL_CERE 0x0001 /* Correctable Error Reporting En. */ +#define PCI_EXP_DEVCTL_NFERE 0x0002 /* Non-Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_FERE 0x0004 /* Fatal Error Reporting Enable */ +#define PCI_EXP_DEVCTL_URRE 0x0008 /* Unsupported Request Reporting En. */ +#define PCI_EXP_DEVCTL_RELAX_EN 0x0010 /* Enable relaxed ordering */ +#define PCI_EXP_DEVCTL_PAYLOAD 0x00e0 /* Max_Payload_Size */ +#define PCI_EXP_DEVCTL_EXT_TAG 0x0100 /* Extended Tag Field Enable */ +#define PCI_EXP_DEVCTL_PHANTOM 0x0200 /* Phantom Functions Enable */ +#define PCI_EXP_DEVCTL_AUX_PME 0x0400 /* Auxiliary Power PM Enable */ +#define PCI_EXP_DEVCTL_NOSNOOP_EN 0x0800 /* Enable No Snoop */ +#define PCI_EXP_DEVCTL_READRQ 0x7000 /* Max_Read_Request_Size */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define PCI_EXP_DEVSTA_CED 0x01 /* Correctable Error Detected */ +#define PCI_EXP_DEVSTA_NFED 0x02 /* Non-Fatal Error Detected */ +#define PCI_EXP_DEVSTA_FED 0x04 /* Fatal Error Detected */ +#define PCI_EXP_DEVSTA_URD 0x08 /* Unsupported Request Detected */ +#define PCI_EXP_DEVSTA_AUXPD 0x10 /* AUX Power Detected */ +#define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKCTL 16 /* Link Control */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCTL_SECEE 0x01 /* System Error on Correctable Error */ +#define PCI_EXP_RTCTL_SENFEE 0x02 /* System Error on Non-Fatal Error */ +#define PCI_EXP_RTCTL_SEFEE 0x04 /* System Error on Fatal Error */ +#define PCI_EXP_RTCTL_PMEIE 0x08 /* PME Interrupt Enable */ +#define PCI_EXP_RTCTL_CRSSVE 0x10 /* CRS Software Visibility Enable */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTSTA 32 /* Root Status */ + +/* Extended Capabilities (PCI-X 2.0 and Express) */ +#define PCI_EXT_CAP_ID(header) (header & 0x0000ffff) +#define PCI_EXT_CAP_VER(header) ((header >> 16) & 0xf) +#define PCI_EXT_CAP_NEXT(header) ((header >> 20) & 0xffc) + +#define PCI_EXT_CAP_ID_ERR 1 +#define PCI_EXT_CAP_ID_VC 2 +#define PCI_EXT_CAP_ID_DSN 3 +#define PCI_EXT_CAP_ID_PWR 4 + +/* Advanced Error Reporting */ +#define PCI_ERR_UNCOR_STATUS 4 /* Uncorrectable Error Status */ +#define PCI_ERR_UNC_TRAIN 0x00000001 /* Training */ +#define PCI_ERR_UNC_DLP 0x00000010 /* Data Link Protocol */ +#define PCI_ERR_UNC_POISON_TLP 0x00001000 /* Poisoned TLP */ +#define PCI_ERR_UNC_FCP 0x00002000 /* Flow Control Protocol */ +#define PCI_ERR_UNC_COMP_TIME 0x00004000 /* Completion Timeout */ +#define PCI_ERR_UNC_COMP_ABORT 0x00008000 /* Completer Abort */ +#define PCI_ERR_UNC_UNX_COMP 0x00010000 /* Unexpected Completion */ +#define PCI_ERR_UNC_RX_OVER 0x00020000 /* Receiver Overflow */ +#define PCI_ERR_UNC_MALF_TLP 0x00040000 /* Malformed TLP */ +#define PCI_ERR_UNC_ECRC 0x00080000 /* ECRC Error Status */ +#define PCI_ERR_UNC_UNSUP 0x00100000 /* Unsupported Request */ +#define PCI_ERR_UNCOR_MASK 8 /* Uncorrectable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_UNCOR_SEVER 12 /* Uncorrectable Error Severity */ + /* Same bits as above */ +#define PCI_ERR_COR_STATUS 16 /* Correctable Error Status */ +#define PCI_ERR_COR_RCVR 0x00000001 /* Receiver Error Status */ +#define PCI_ERR_COR_BAD_TLP 0x00000040 /* Bad TLP Status */ +#define PCI_ERR_COR_BAD_DLLP 0x00000080 /* Bad DLLP Status */ +#define PCI_ERR_COR_REP_ROLL 0x00000100 /* REPLAY_NUM Rollover */ +#define PCI_ERR_COR_REP_TIMER 0x00001000 /* Replay Timer Timeout */ +#define PCI_ERR_COR_MASK 20 /* Correctable Error Mask */ + /* Same bits as above */ +#define PCI_ERR_CAP 24 /* Advanced Error Capabilities */ +#define PCI_ERR_CAP_FEP(x) ((x) & 31) /* First Error Pointer */ +#define PCI_ERR_CAP_ECRC_GENC 0x00000020 /* ECRC Generation Capable */ +#define PCI_ERR_CAP_ECRC_GENE 0x00000040 /* ECRC Generation Enable */ +#define PCI_ERR_CAP_ECRC_CHKC 0x00000080 /* ECRC Check Capable */ +#define PCI_ERR_CAP_ECRC_CHKE 0x00000100 /* ECRC Check Enable */ +#define PCI_ERR_HEADER_LOG 28 /* Header Log Register (16 bytes) */ +#define PCI_ERR_ROOT_COMMAND 44 /* Root Error Command */ +#define PCI_ERR_ROOT_STATUS 48 +#define PCI_ERR_ROOT_COR_SRC 52 +#define PCI_ERR_ROOT_SRC 54 + +/* Virtual Channel */ +#define PCI_VC_PORT_REG1 4 +#define PCI_VC_PORT_REG2 8 +#define PCI_VC_PORT_CTRL 12 +#define PCI_VC_PORT_STATUS 14 +#define PCI_VC_RES_CAP 16 +#define PCI_VC_RES_CTRL 20 +#define PCI_VC_RES_STATUS 26 + +/* Power Budgeting */ +#define PCI_PWR_DSR 4 /* Data Select Register */ +#define PCI_PWR_DATA 8 /* Data Register */ +#define PCI_PWR_DATA_BASE(x) ((x) & 0xff) /* Base Power */ +#define PCI_PWR_DATA_SCALE(x) (((x) >> 8) & 3) /* Data Scale */ +#define PCI_PWR_DATA_PM_SUB(x) (((x) >> 10) & 7) /* PM Sub State */ +#define PCI_PWR_DATA_PM_STATE(x) (((x) >> 13) & 3) /* PM State */ +#define PCI_PWR_DATA_TYPE(x) (((x) >> 15) & 7) /* Type */ +#define PCI_PWR_DATA_RAIL(x) (((x) >> 18) & 7) /* Power Rail */ +#define PCI_PWR_CAP 12 /* Capability */ +#define PCI_PWR_CAP_BUDGET(x) ((x) & 1) /* Included in system budget */ + +#endif /* LINUX_PCI_REGS_H */ -- cgit v1.2.3 From 064b53dbcc977dbf2753a67c2b8fc1c061d74f21 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 27 Jul 2005 10:19:44 -0400 Subject: [PATCH] PCI: restore BAR values after D3hot->D0 for devices that need it Some PCI devices (e.g. 3c905B, 3c556B) lose all configuration (including BARs) when transitioning from D3hot->D0. This leaves such a device in an inaccessible state. The patch below causes the BARs to be restored when enabling such a device, so that its driver will be able to access it. The patch also adds pci_restore_bars as a new global symbol, and adds a correpsonding EXPORT_SYMBOL_GPL for that. Some firmware (e.g. Thinkpad T21) leaves devices in D3hot after a (re)boot. Most drivers call pci_enable_device very early, so devices left in D3hot that lose configuration during the D3hot->D0 transition will be inaccessible to their drivers. Drivers could be modified to account for this, but it would be difficult to know which drivers need modification. This is especially true since often many devices are covered by the same driver. It likely would be necessary to replicate code across dozens of drivers. The patch below should trigger only when transitioning from D3hot->D0 (or at boot), and only for devices that have the "no soft reset" bit cleared in the PM control register. I believe it is safe to include this patch as part of the PCI infrastructure. The cleanest implementation of pci_restore_bars was to call pci_update_resource. Unfortunately, that does not currently exist for the sparc64 architecture. The patch below includes a null implemenation of pci_update_resource for sparc64. Some have expressed interest in making general use of the the pci_restore_bars function, so that has been exported to GPL licensed modules. Signed-off-by: John W. Linville Signed-off-by: Greg Kroah-Hartman --- arch/sparc64/kernel/pci.c | 6 +++++ drivers/pci/pci.c | 59 +++++++++++++++++++++++++++++++++++++++++++---- drivers/pci/setup-res.c | 2 +- include/linux/pci.h | 2 ++ include/linux/pci_regs.h | 1 + 5 files changed, 65 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/arch/sparc64/kernel/pci.c b/arch/sparc64/kernel/pci.c index ec8bf4012c0c..9c17591c2a79 100644 --- a/arch/sparc64/kernel/pci.c +++ b/arch/sparc64/kernel/pci.c @@ -413,6 +413,12 @@ static int pci_assign_bus_resource(const struct pci_bus *bus, return -EBUSY; } +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) +{ + /* Not implemented for sparc64... */ + BUG(); +} + int pci_assign_resource(struct pci_dev *pdev, int resource) { struct pcidev_cookie *pcp = pdev->sysdata; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index c62d2f043397..93ec158d06c1 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -221,6 +221,37 @@ pci_find_parent_resource(const struct pci_dev *dev, struct resource *res) return best; } +/** + * pci_restore_bars - restore a devices BAR values (e.g. after wake-up) + * @dev: PCI device to have its BARs restored + * + * Restore the BAR values for a given device, so as to make it + * accessible by its driver. + */ +void +pci_restore_bars(struct pci_dev *dev) +{ + int i, numres; + + switch (dev->hdr_type) { + case PCI_HEADER_TYPE_NORMAL: + numres = 6; + break; + case PCI_HEADER_TYPE_BRIDGE: + numres = 2; + break; + case PCI_HEADER_TYPE_CARDBUS: + numres = 1; + break; + default: + /* Should never get here, but just in case... */ + return; + } + + for (i = 0; i < numres; i ++) + pci_update_resource(dev, &dev->resource[i], i); +} + /** * pci_set_power_state - Set the power state of a PCI device * @dev: PCI device to be suspended @@ -239,7 +270,7 @@ int (*platform_pci_set_power_state)(struct pci_dev *dev, pci_power_t t); int pci_set_power_state(struct pci_dev *dev, pci_power_t state) { - int pm; + int pm, need_restore = 0; u16 pmcsr, pmc; /* bound the state we're entering */ @@ -278,14 +309,17 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return -EIO; } + pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + /* If we're in D3, force entire word to 0. * This doesn't affect PME_Status, disables PME_En, and * sets PowerState to 0. */ - if (dev->current_state >= PCI_D3hot) + if (dev->current_state >= PCI_D3hot) { + if (!(pmcsr & PCI_PM_CTRL_NO_SOFT_RESET)) + need_restore = 1; pmcsr = 0; - else { - pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); + } else { pmcsr &= ~PCI_PM_CTRL_STATE_MASK; pmcsr |= state; } @@ -308,6 +342,22 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) platform_pci_set_power_state(dev, state); dev->current_state = state; + + /* According to section 5.4.1 of the "PCI BUS POWER MANAGEMENT + * INTERFACE SPECIFICATION, REV. 1.2", a device transitioning + * from D3hot to D0 _may_ perform an internal reset, thereby + * going to "D0 Uninitialized" rather than "D0 Initialized". + * For example, at least some versions of the 3c905B and the + * 3c556B exhibit this behaviour. + * + * At least some laptop BIOSen (e.g. the Thinkpad T21) leave + * devices in a D3hot state at boot. Consequently, we need to + * restore at least the BARs so that the device will be + * accessible to its driver. + */ + if (need_restore) + pci_restore_bars(dev); + return 0; } @@ -809,6 +859,7 @@ struct pci_dev *isa_bridge; EXPORT_SYMBOL(isa_bridge); #endif +EXPORT_SYMBOL_GPL(pci_restore_bars); EXPORT_SYMBOL(pci_enable_device_bars); EXPORT_SYMBOL(pci_enable_device); EXPORT_SYMBOL(pci_disable_device); diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 5598b4714f77..362f93337a34 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -26,7 +26,7 @@ #include "pci.h" -static void +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno) { struct pci_bus_region region; diff --git a/include/linux/pci.h b/include/linux/pci.h index 830c1bae56cc..8878ccff9e3c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -384,7 +384,9 @@ int pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); +void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int pci_assign_resource(struct pci_dev *dev, int i); +void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size); diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h index 7dc391cad10d..e2a089b051ed 100644 --- a/include/linux/pci_regs.h +++ b/include/linux/pci_regs.h @@ -222,6 +222,7 @@ #define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */ #define PCI_PM_CTRL 4 /* PM control and status register */ #define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */ +#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */ #define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */ #define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */ #define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */ -- cgit v1.2.3 From cecf4864cf52a4a243a62b2856a6a155edbb55e8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 18 Aug 2005 14:33:01 +1000 Subject: [PATCH] PCI: Add pci_walk_bus function to PCI core (nonrecursive) The PCI error recovery infrastructure needs to be able to contact all the drivers affected by a PCI error event, which may mean traversing all the devices under a given PCI-PCI bridge. This patch adds a function to the PCI core that traverses all the PCI devices on a PCI bus and under any PCI-PCI bridges on that bus (and so on), calling a given function for each device. This provides a way for the error recovery code to iterate through all devices that are affected by an error event. This version is not implemented as a recursive function. Instead, when we reach a PCI-PCI bridge, we set the pointers to start doing the devices on the bus under the bridge, and when we reach the end of a bus's devices, we use the bus->self pointer to go back up to the next higher bus and continue doing its devices. Signed-off-by: Paul Mackerras Signed-off-by: Greg Kroah-Hartman --- drivers/pci/bus.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 +++ 2 files changed, 51 insertions(+) (limited to 'include/linux') diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index a83ee0b85394..eed67d9e73bc 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -151,6 +151,54 @@ void pci_enable_bridges(struct pci_bus *bus) } } +/** pci_walk_bus - walk devices on/under bus, calling callback. + * @top bus whose devices should be walked + * @cb callback to be called for each device found + * @userdata arbitrary pointer to be passed to callback. + * + * Walk the given bus, including any bridged devices + * on buses under this bus. Call the provided callback + * on each device found. + */ +void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), + void *userdata) +{ + struct pci_dev *dev; + struct pci_bus *bus; + struct list_head *next; + + bus = top; + spin_lock(&pci_bus_lock); + next = top->devices.next; + for (;;) { + if (next == &bus->devices) { + /* end of this bus, go up or finish */ + if (bus == top) + break; + next = bus->self->bus_list.next; + bus = bus->self->bus; + continue; + } + dev = list_entry(next, struct pci_dev, bus_list); + pci_dev_get(dev); + if (dev->subordinate) { + /* this is a pci-pci bridge, do its devices next */ + next = dev->subordinate->devices.next; + bus = dev->subordinate; + } else + next = dev->bus_list.next; + spin_unlock(&pci_bus_lock); + + /* Run device routines with the bus unlocked */ + cb(dev, userdata); + + spin_lock(&pci_bus_lock); + pci_dev_put(dev); + } + spin_unlock(&pci_bus_lock); +} +EXPORT_SYMBOL_GPL(pci_walk_bus); + EXPORT_SYMBOL(pci_bus_alloc_resource); EXPORT_SYMBOL_GPL(pci_bus_add_device); EXPORT_SYMBOL(pci_bus_add_devices); diff --git a/include/linux/pci.h b/include/linux/pci.h index 8878ccff9e3c..b0e244713281 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -434,6 +434,9 @@ const struct pci_device_id *pci_match_device(struct pci_driver *drv, struct pci_ const struct pci_device_id *pci_match_id(const struct pci_device_id *ids, struct pci_dev *dev); int pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max, int pass); +void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *), + void *userdata); + /* kmem_cache style wrapper around pci_alloc_consistent() */ #include -- cgit v1.2.3 From 3fe9d19f9e86a55679f5f2b38ec0a43a1a510cee Mon Sep 17 00:00:00 2001 From: Daniel Ritz Date: Wed, 17 Aug 2005 15:32:19 -0700 Subject: [PATCH] PCI: Support PCM PM CAP version 3 - support PCI PM CAP version 3 (as defined in PCI PM Interface Spec v1.2) - pci/probe.c sets the PM state initially to 4 which is D3cold. add a PCI_UNKNOWN - minor cleanups Signed-off-by: Daniel Ritz Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- drivers/pci/pci.c | 12 +++++------- drivers/pci/probe.c | 2 +- include/linux/pci.h | 1 + 3 files changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 3dcb83d7eb25..e179af3186f5 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -294,7 +294,7 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) return -EIO; pci_read_config_word(dev,pm + PCI_PM_PMC,&pmc); - if ((pmc & PCI_PM_CAP_VER_MASK) > 2) { + if ((pmc & PCI_PM_CAP_VER_MASK) > 3) { printk(KERN_DEBUG "PCI: %s has unsupported PM cap regs version (%u)\n", pci_name(dev), pmc & PCI_PM_CAP_VER_MASK); @@ -302,12 +302,10 @@ pci_set_power_state(struct pci_dev *dev, pci_power_t state) } /* check if this device supports the desired state */ - if (state == PCI_D1 || state == PCI_D2) { - if (state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) - return -EIO; - else if (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2)) - return -EIO; - } + if (state == PCI_D1 && !(pmc & PCI_PM_CAP_D1)) + return -EIO; + else if (state == PCI_D2 && !(pmc & PCI_PM_CAP_D2)) + return -EIO; pci_read_config_word(dev, pm + PCI_PM_CTRL, &pmcsr); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 4be1b887ab62..b9c9b03919d4 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -584,7 +584,7 @@ static int pci_setup_device(struct pci_dev * dev) dev->vendor, dev->device, class, dev->hdr_type); /* "Unknown power state" */ - dev->current_state = 4; + dev->current_state = PCI_UNKNOWN; /* Early fixups, before probing the BARs */ pci_fixup_device(pci_fixup_early, dev); diff --git a/include/linux/pci.h b/include/linux/pci.h index b0e244713281..7004dde7fef0 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -75,6 +75,7 @@ typedef int __bitwise pci_power_t; #define PCI_D2 ((pci_power_t __force) 2) #define PCI_D3hot ((pci_power_t __force) 3) #define PCI_D3cold ((pci_power_t __force) 4) +#define PCI_UNKNOWN ((pci_power_t __force) 5) #define PCI_POWER_ERROR ((pci_power_t __force) -1) /* -- cgit v1.2.3 From a04ce0ffcaf561994ecf382cd3caad75556dc499 Mon Sep 17 00:00:00 2001 From: Brett M Russ Date: Mon, 15 Aug 2005 15:23:41 -0400 Subject: [PATCH] PCI/libata INTx cleanup Simple cleanup to eliminate X copies of the pci_enable_intx() function in libata. Moved ahci.c's pci_intx() to pci.c and use it throughout libata and msi.c. Signed-off-by: Brett Russ Signed-off-by: Greg Kroah-Hartman --- drivers/pci/msi.c | 10 ++-------- drivers/pci/pci.c | 26 ++++++++++++++++++++++++++ drivers/scsi/ahci.c | 16 ---------------- drivers/scsi/ata_piix.c | 14 +------------- drivers/scsi/sata_sis.c | 14 +------------- drivers/scsi/sata_uli.c | 14 +------------- include/linux/pci.h | 1 + 7 files changed, 32 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 532f73bb2224..ee8677bda950 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -439,10 +439,7 @@ static void enable_msi_mode(struct pci_dev *dev, int pos, int type) } if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { /* PCI Express Endpoint device detected */ - u16 cmd; - pci_read_config_word(dev, PCI_COMMAND, &cmd); - cmd |= PCI_COMMAND_INTX_DISABLE; - pci_write_config_word(dev, PCI_COMMAND, cmd); + pci_intx(dev, 0); /* disable intx */ } } @@ -461,10 +458,7 @@ void disable_msi_mode(struct pci_dev *dev, int pos, int type) } if (pci_find_capability(dev, PCI_CAP_ID_EXP)) { /* PCI Express Endpoint device detected */ - u16 cmd; - pci_read_config_word(dev, PCI_COMMAND, &cmd); - cmd &= ~PCI_COMMAND_INTX_DISABLE; - pci_write_config_word(dev, PCI_COMMAND, cmd); + pci_intx(dev, 1); /* enable intx */ } } diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e179af3186f5..ccff633a3948 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -798,6 +798,31 @@ pci_clear_mwi(struct pci_dev *dev) } } +/** + * pci_intx - enables/disables PCI INTx for device dev + * @dev: the PCI device to operate on + * @enable: boolean + * + * Enables/disables PCI INTx for device dev + */ +void +pci_intx(struct pci_dev *pdev, int enable) +{ + u16 pci_command, new; + + pci_read_config_word(pdev, PCI_COMMAND, &pci_command); + + if (enable) { + new = pci_command & ~PCI_COMMAND_INTX_DISABLE; + } else { + new = pci_command | PCI_COMMAND_INTX_DISABLE; + } + + if (new != pci_command) { + pci_write_config_word(pdev, PCI_COMMAND, pci_command); + } +} + #ifndef HAVE_ARCH_PCI_SET_DMA_MASK /* * These can be overridden by arch-specific implementations @@ -875,6 +900,7 @@ EXPORT_SYMBOL(pci_request_region); EXPORT_SYMBOL(pci_set_master); EXPORT_SYMBOL(pci_set_mwi); EXPORT_SYMBOL(pci_clear_mwi); +EXPORT_SYMBOL_GPL(pci_intx); EXPORT_SYMBOL(pci_set_dma_mask); EXPORT_SYMBOL(pci_set_consistent_dma_mask); EXPORT_SYMBOL(pci_assign_resource); diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 320df6cd3def..c2c8fa828e24 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -865,22 +865,6 @@ static int ahci_host_init(struct ata_probe_ent *probe_ent) return 0; } -/* move to PCI layer, integrate w/ MSI stuff */ -static void pci_intx(struct pci_dev *pdev, int enable) -{ - u16 pci_command, new; - - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); - - if (enable) - new = pci_command & ~PCI_COMMAND_INTX_DISABLE; - else - new = pci_command | PCI_COMMAND_INTX_DISABLE; - - if (new != pci_command) - pci_write_config_word(pdev, PCI_COMMAND, pci_command); -} - static void ahci_print_info(struct ata_probe_ent *probe_ent) { struct ahci_host_priv *hpriv = probe_ent->private_data; diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 5f8688529041..87e0c36f1554 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -568,18 +568,6 @@ static void piix_set_dmamode (struct ata_port *ap, struct ata_device *adev) } } -/* move to PCI layer, integrate w/ MSI stuff */ -static void pci_enable_intx(struct pci_dev *pdev) -{ - u16 pci_command; - - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); - if (pci_command & PCI_COMMAND_INTX_DISABLE) { - pci_command &= ~PCI_COMMAND_INTX_DISABLE; - pci_write_config_word(pdev, PCI_COMMAND, pci_command); - } -} - #define AHCI_PCI_BAR 5 #define AHCI_GLOBAL_CTL 0x04 #define AHCI_ENABLE (1 << 31) @@ -677,7 +665,7 @@ static int piix_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) * message-signalled interrupts currently). */ if (port_info[0]->host_flags & PIIX_FLAG_CHECKINTR) - pci_enable_intx(pdev); + pci_intx(pdev, 1); if (combined) { port_info[sata_chan] = &piix_port_info[ent->driver_data]; diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index 7d1aaa99aaae..2bd3f11ac010 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -233,18 +233,6 @@ static void sis_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) } } -/* move to PCI layer, integrate w/ MSI stuff */ -static void pci_enable_intx(struct pci_dev *pdev) -{ - u16 pci_command; - - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); - if (pci_command & PCI_COMMAND_INTX_DISABLE) { - pci_command &= ~PCI_COMMAND_INTX_DISABLE; - pci_write_config_word(pdev, PCI_COMMAND, pci_command); - } -} - static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct ata_probe_ent *probe_ent = NULL; @@ -319,7 +307,7 @@ static int sis_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - pci_enable_intx(pdev); + pci_intx(pdev, 1); /* FIXME: check ata_device_add return value */ ata_device_add(probe_ent); diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index 42e13ed8eb5b..4c9fb8b71be1 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -176,18 +176,6 @@ static void uli_scr_write (struct ata_port *ap, unsigned int sc_reg, u32 val) uli_scr_cfg_write(ap, sc_reg, val); } -/* move to PCI layer, integrate w/ MSI stuff */ -static void pci_enable_intx(struct pci_dev *pdev) -{ - u16 pci_command; - - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); - if (pci_command & PCI_COMMAND_INTX_DISABLE) { - pci_command &= ~PCI_COMMAND_INTX_DISABLE; - pci_write_config_word(pdev, PCI_COMMAND, pci_command); - } -} - static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { struct ata_probe_ent *probe_ent; @@ -260,7 +248,7 @@ static int uli_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) } pci_set_master(pdev); - pci_enable_intx(pdev); + pci_intx(pdev, 1); /* FIXME: check ata_device_add return value */ ata_device_add(probe_ent); diff --git a/include/linux/pci.h b/include/linux/pci.h index 7004dde7fef0..6caaba0af469 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -383,6 +383,7 @@ void pci_set_master(struct pci_dev *dev); #define HAVE_PCI_SET_MWI int pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); +void pci_intx(struct pci_dev *dev, int enable); int pci_set_dma_mask(struct pci_dev *dev, u64 mask); int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); -- cgit v1.2.3 From fbf82fd2e1f4e679c60516d772d1862c941ca845 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Sun, 31 Jul 2005 01:05:53 +0200 Subject: [PATCH] USB: real nodes instead of usbfs This patch introduces a /sys/class/usb_device/ class where every connected usb-device will show up: tree /sys/class/usb_device/ /sys/class/usb_device/ |-- usb1.1 | |-- dev | `-- device -> ../../../devices/pci0000:00/0000:00:1d.0/usb1 |-- usb2.1 | |-- dev | `-- device -> ../../../devices/pci0000:00/0000:00:1d.1/usb2 ... The presence of the "dev" file lets udev create real device nodes. kay@pim:~/src/linux-2.6> tree /dev/bus/usb/ /dev/bus/usb/ |-- 1 | `-- 1 |-- 2 | `-- 1 ... udev rule: SUBSYSTEM="usb_device", PROGRAM="/sbin/usb_device %k", NAME="%c" (echo $1 | /bin/sed 's/usb\([0-9]*\)\.\([0-9]*\)/bus\/usb\/\1\/\2/') This makes libusb pick up the real nodes instead of the mounted usbfs: export USB_DEVFS_PATH=/dev/bus/usb Background: All this makes it possible to manage usb devices with udev instead of the devfs solution. We are currently working on a pam_console/resmgr replacement driven by udev and a pam-helper. It applies ACL's to device nodes, which is required for modern desktop functionalty like "Fast User Switching" or multiple local login support. New patch with its own major. I've succesfully disabled usbfs and use real nodes only on my box. With: "export USB_DEVFS_PATH=/dev/bus/usb" libusb picks up the udev managed nodes instead of reading usbfs files. This makes udev to provide symlinks for libusb to pick up: SUBSYSTEM="usb_device", PROGRAM="/sbin/usbdevice %k", SYMLINK="%c" /sbin/usbdevice: #!/bin/sh echo $1 | /bin/sed 's/usbdev\([0-9]*\)\.\([0-9]*\)/bus\/usb\/\1\/\2/' Signed-off-by: Kay Sievers Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/Makefile | 4 +-- drivers/usb/core/devio.c | 91 +++++++++++++++++++++++++++++++++++++++++++++-- drivers/usb/core/hub.c | 2 ++ drivers/usb/core/inode.c | 9 +---- drivers/usb/core/usb.c | 15 ++++++-- drivers/usb/core/usb.h | 5 +++ include/linux/usb.h | 2 ++ 7 files changed, 114 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/core/Makefile b/drivers/usb/core/Makefile index 9e8c377b8161..d5503cf0bf74 100644 --- a/drivers/usb/core/Makefile +++ b/drivers/usb/core/Makefile @@ -3,14 +3,14 @@ # usbcore-objs := usb.o hub.o hcd.o urb.o message.o \ - config.o file.o buffer.o sysfs.o + config.o file.o buffer.o sysfs.o devio.o ifeq ($(CONFIG_PCI),y) usbcore-objs += hcd-pci.o endif ifeq ($(CONFIG_USB_DEVICEFS),y) - usbcore-objs += devio.o inode.o devices.o + usbcore-objs += inode.o devices.o endif obj-$(CONFIG_USB) += usbcore.o diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index f86bf1454e21..d12bc5e84a1a 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -50,6 +51,10 @@ #include "hcd.h" /* for usbcore internals */ #include "usb.h" +#define USB_MAXBUS 64 +#define USB_DEVICE_MAX USB_MAXBUS * 128 +static struct class *usb_device_class; + struct async { struct list_head asynclist; struct dev_state *ps; @@ -487,7 +492,7 @@ static int check_ctrlrecip(struct dev_state *ps, unsigned int requesttype, unsig */ static int usbdev_open(struct inode *inode, struct file *file) { - struct usb_device *dev; + struct usb_device *dev = NULL; struct dev_state *ps; int ret; @@ -501,11 +506,16 @@ static int usbdev_open(struct inode *inode, struct file *file) lock_kernel(); ret = -ENOENT; - dev = usb_get_dev(inode->u.generic_ip); + /* check if we are called from a real node or usbfs */ + if (imajor(inode) == USB_DEVICE_MAJOR) + dev = usbdev_lookup_minor(iminor(inode)); + if (!dev) + dev = inode->u.generic_ip; if (!dev) { kfree(ps); goto out; } + usb_get_dev(dev); ret = 0; ps->dev = dev; ps->file = file; @@ -1477,3 +1487,80 @@ struct file_operations usbfs_device_file_operations = { .open = usbdev_open, .release = usbdev_release, }; + +struct usb_device *usbdev_lookup_minor(int minor) +{ + struct class_device *class_dev; + struct usb_device *dev = NULL; + + down(&usb_device_class->sem); + list_for_each_entry(class_dev, &usb_device_class->children, node) { + if (class_dev->devt == MKDEV(USB_DEVICE_MAJOR, minor)) { + dev = class_dev->class_data; + break; + } + } + up(&usb_device_class->sem); + + return dev; +}; + +void usbdev_add(struct usb_device *dev) +{ + int minor = ((dev->bus->busnum-1) * 128) + (dev->devnum-1); + + dev->class_dev = class_device_create(usb_device_class, + MKDEV(USB_DEVICE_MAJOR, minor), &dev->dev, + "usbdev%d.%d", dev->bus->busnum, dev->devnum); + + dev->class_dev->class_data = dev; +} + +void usbdev_remove(struct usb_device *dev) +{ + class_device_unregister(dev->class_dev); +} + +static struct cdev usb_device_cdev = { + .kobj = {.name = "usb_device", }, + .owner = THIS_MODULE, +}; + +int __init usbdev_init(void) +{ + int retval; + + retval = register_chrdev_region(MKDEV(USB_DEVICE_MAJOR, 0), + USB_DEVICE_MAX, "usb_device"); + if (retval) { + err("unable to register minors for usb_device"); + goto out; + } + cdev_init(&usb_device_cdev, &usbfs_device_file_operations); + retval = cdev_add(&usb_device_cdev, + MKDEV(USB_DEVICE_MAJOR, 0), USB_DEVICE_MAX); + if (retval) { + err("unable to get usb_device major %d", USB_DEVICE_MAJOR); + unregister_chrdev_region(USB_DEVICE_MAJOR, USB_DEVICE_MAX); + goto out; + } + usb_device_class = class_create(THIS_MODULE, "usb_device"); + if (IS_ERR(usb_device_class)) { + err("unable to register usb_device class"); + retval = PTR_ERR(usb_device_class); + usb_device_class = NULL; + cdev_del(&usb_device_cdev); + unregister_chrdev_region(USB_DEVICE_MAJOR, USB_DEVICE_MAX); + } + +out: + return retval; +} + +void usbdev_cleanup(void) +{ + class_destroy(usb_device_class); + cdev_del(&usb_device_cdev); + unregister_chrdev_region(USB_DEVICE_MAJOR, USB_DEVICE_MAX); +} + diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index c9412daff682..a220a5e7f4a5 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1051,6 +1051,7 @@ void usb_disconnect(struct usb_device **pdev) dev_dbg (&udev->dev, "unregistering device\n"); release_address(udev); usbfs_remove_device(udev); + usbdev_remove(udev); usb_remove_sysfs_dev_files(udev); /* Avoid races with recursively_mark_NOTATTACHED() */ @@ -1290,6 +1291,7 @@ int usb_new_device(struct usb_device *udev) /* USB device state == configured ... usable */ /* add a /proc/bus/usb entry */ + usbdev_add(udev); usbfs_add_device(udev); return 0; diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index c3e3a95d3804..640f41e47029 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -728,15 +728,9 @@ int __init usbfs_init(void) { int retval; - retval = usb_register(&usbfs_driver); - if (retval) - return retval; - retval = register_filesystem(&usb_fs_type); - if (retval) { - usb_deregister(&usbfs_driver); + if (retval) return retval; - } /* create mount point for usbfs */ usbdir = proc_mkdir("usb", proc_bus); @@ -746,7 +740,6 @@ int __init usbfs_init(void) void usbfs_cleanup(void) { - usb_deregister(&usbfs_driver); unregister_filesystem(&usb_fs_type); if (usbdir) remove_proc_entry("usb", proc_bus); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 2cddd8a00437..bc966dbc6021 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -1478,13 +1478,18 @@ static int __init usb_init(void) retval = usb_major_init(); if (retval) goto major_init_failed; + retval = usb_register(&usbfs_driver); + if (retval) + goto driver_register_failed; + retval = usbdev_init(); + if (retval) + goto usbdevice_init_failed; retval = usbfs_init(); if (retval) goto fs_init_failed; retval = usb_hub_init(); if (retval) goto hub_init_failed; - retval = driver_register(&usb_generic_driver); if (!retval) goto out; @@ -1493,7 +1498,11 @@ static int __init usb_init(void) hub_init_failed: usbfs_cleanup(); fs_init_failed: - usb_major_cleanup(); + usbdev_cleanup(); +usbdevice_init_failed: + usb_deregister(&usbfs_driver); +driver_register_failed: + usb_major_cleanup(); major_init_failed: usb_host_cleanup(); host_init_failed: @@ -1514,6 +1523,8 @@ static void __exit usb_exit(void) driver_unregister(&usb_generic_driver); usb_major_cleanup(); usbfs_cleanup(); + usb_deregister(&usbfs_driver); + usbdev_cleanup(); usb_hub_cleanup(); usb_host_cleanup(); bus_unregister(&usb_bus_type); diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h index 2c690f6d4c18..83d48c8133af 100644 --- a/drivers/usb/core/usb.h +++ b/drivers/usb/core/usb.h @@ -37,6 +37,11 @@ extern struct file_operations usbfs_devices_fops; extern struct file_operations usbfs_device_file_operations; extern void usbfs_conn_disc_event(void); +extern int usbdev_init(void); +extern void usbdev_cleanup(void); +extern void usbdev_add(struct usb_device *dev); +extern void usbdev_remove(struct usb_device *dev); +extern struct usb_device *usbdev_lookup_minor(int minor); struct dev_state { struct list_head list; /* state list */ diff --git a/include/linux/usb.h b/include/linux/usb.h index 724637792996..434e35120c65 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -5,6 +5,7 @@ #include #define USB_MAJOR 180 +#define USB_DEVICE_MAJOR 189 #ifdef __KERNEL__ @@ -349,6 +350,7 @@ struct usb_device { char *manufacturer; char *serial; /* static strings from the device */ struct list_head filelist; + struct class_device *class_dev; struct dentry *usbfs_dentry; /* usbfs dentry entry for the device */ /* -- cgit v1.2.3 From dc5bed091a7a5fe378055c30a2da874f77228b71 Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 4 Aug 2005 16:46:28 +0300 Subject: [PATCH] USB: isp116x-hcd: use fixed power-on-to-power-good-time This patch removes the power-on-to-power-good-time configuration option for isp116x-hcd. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 6 +----- include/linux/usb_isp116x.h | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 75128c371800..a7cb134cf125 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1580,16 +1580,12 @@ static int isp116x_start(struct usb_hcd *hcd) isp116x_write_reg16(isp116x, HCHWCFG, val); /* ----- Root hub conf */ - val = 0; + val = (25 << 24) & RH_A_POTPGT; /* AN10003_1.pdf recommends NPS to be always 1 */ if (board->no_power_switching) val |= RH_A_NPS; if (board->power_switching_mode) val |= RH_A_PSM; - if (board->potpg) - val |= (board->potpg << 24) & RH_A_POTPGT; - else - val |= (25 << 24) & RH_A_POTPGT; isp116x_write_reg32(isp116x, HCRHDESCA, val); isp116x->rhdesca = isp116x_read_reg32(isp116x, HCRHDESCA); diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h index 5f5a9d9bd6c2..9f4fb5626816 100644 --- a/include/linux/usb_isp116x.h +++ b/include/linux/usb_isp116x.h @@ -26,8 +26,6 @@ struct isp116x_platform_data { /* Ganged port power switching (0) or individual port power switching (1) */ unsigned power_switching_mode:1; - /* Given port_power, msec/2 after power on till power good */ - u8 potpg; /* Hardware reset set/clear. If implemented, this function must: if set == 0, deassert chip's HW reset pin otherwise, assert chip's HW reset pin */ -- cgit v1.2.3 From d4d62861b5cdb0ecfcae448e4281623284de5d05 Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 4 Aug 2005 16:48:19 +0300 Subject: [PATCH] USB: isp116x-hcd: remove unnecessary ClockNotStop configuration option Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 5 +---- include/linux/usb_isp116x.h | 12 +++++------- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index a7cb134cf125..96aaee509929 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1569,7 +1569,7 @@ static int isp116x_start(struct usb_hcd *hcd) if (board->sel15Kres) val |= HCHWCFG_15KRSEL; /* Remote wakeup won't work without working clock */ - if (board->clknotstop || board->remote_wakeup_enable) + if (board->remote_wakeup_enable) val |= HCHWCFG_CLKNOTSTOP; if (board->oc_enable) val |= HCHWCFG_ANALOG_OC; @@ -1615,9 +1615,6 @@ static int isp116x_start(struct usb_hcd *hcd) /* Go operational */ val = HCCONTROL_USB_OPER; - /* Remote wakeup connected - NOT SUPPORTED */ - /* if (board->remote_wakeup_connected) - val |= HCCONTROL_RWC; */ if (board->remote_wakeup_enable) val |= HCCONTROL_RWE; isp116x_write_reg32(isp116x, HCCONTROL, val); diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h index 9f4fb5626816..0d21407ccfc2 100644 --- a/include/linux/usb_isp116x.h +++ b/include/linux/usb_isp116x.h @@ -7,19 +7,17 @@ struct isp116x_platform_data { /* Enable internal resistors on downstream ports */ unsigned sel15Kres:1; - /* Chip's internal clock won't be stopped in suspended state. - Setting/unsetting this bit takes effect only if - 'remote_wakeup_enable' below is not set. */ - unsigned clknotstop:1; /* On-chip overcurrent protection */ unsigned oc_enable:1; /* INT output polarity */ unsigned int_act_high:1; /* INT edge or level triggered */ unsigned int_edge_triggered:1; - /* WAKEUP pin connected - NOT SUPPORTED */ - /* unsigned remote_wakeup_connected:1; */ - /* Wakeup by devices on usb bus enabled */ + /* Enable wakeup by devices on usb bus (e.g. wakeup + by attachment/detachment or by device activity + such as moving a mouse). When chosen, this option + prevents stopping internal clock, increasing + thereby power consumption in suspended state. */ unsigned remote_wakeup_enable:1; /* Switch or not to switch (keep always powered) */ unsigned no_power_switching:1; -- cgit v1.2.3 From 165c0f39390212d7a517b80c3bb61cb8f1782fef Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 4 Aug 2005 16:52:31 +0300 Subject: [PATCH] USB: isp116x-hcd: support only per-port power switching The isp116x chip will now always be in per-port power switching mode. Remove conf options to set any other mode. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 9 ++++----- include/linux/usb_isp116x.h | 5 ----- 2 files changed, 4 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index 96aaee509929..a3e881c60026 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1581,11 +1581,10 @@ static int isp116x_start(struct usb_hcd *hcd) /* ----- Root hub conf */ val = (25 << 24) & RH_A_POTPGT; - /* AN10003_1.pdf recommends NPS to be always 1 */ - if (board->no_power_switching) - val |= RH_A_NPS; - if (board->power_switching_mode) - val |= RH_A_PSM; + /* AN10003_1.pdf recommends RH_A_NPS (no power switching) to + be always set. Yet, instead, we request individual port + power switching. */ + val |= RH_A_PSM; isp116x_write_reg32(isp116x, HCRHDESCA, val); isp116x->rhdesca = isp116x_read_reg32(isp116x, HCRHDESCA); diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h index 0d21407ccfc2..c028d724be94 100644 --- a/include/linux/usb_isp116x.h +++ b/include/linux/usb_isp116x.h @@ -19,11 +19,6 @@ struct isp116x_platform_data { prevents stopping internal clock, increasing thereby power consumption in suspended state. */ unsigned remote_wakeup_enable:1; - /* Switch or not to switch (keep always powered) */ - unsigned no_power_switching:1; - /* Ganged port power switching (0) or individual port - power switching (1) */ - unsigned power_switching_mode:1; /* Hardware reset set/clear. If implemented, this function must: if set == 0, deassert chip's HW reset pin otherwise, assert chip's HW reset pin */ -- cgit v1.2.3 From 9d233d9faedfd8a4ee22288c1fdc698a6f75db21 Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 4 Aug 2005 16:54:08 +0300 Subject: [PATCH] USB: isp116x-hcd: per-port overcurrent reporting This patch sets the isp116x to report overcurrent always per-port. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 2 ++ include/linux/usb_isp116x.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index a3e881c60026..aeddef7a12af 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1585,6 +1585,8 @@ static int isp116x_start(struct usb_hcd *hcd) be always set. Yet, instead, we request individual port power switching. */ val |= RH_A_PSM; + /* Report overcurrent per port */ + val |= RH_A_OCPM; isp116x_write_reg32(isp116x, HCRHDESCA, val); isp116x->rhdesca = isp116x_read_reg32(isp116x, HCRHDESCA); diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h index c028d724be94..8f0b3c226968 100644 --- a/include/linux/usb_isp116x.h +++ b/include/linux/usb_isp116x.h @@ -7,7 +7,7 @@ struct isp116x_platform_data { /* Enable internal resistors on downstream ports */ unsigned sel15Kres:1; - /* On-chip overcurrent protection */ + /* On-chip overcurrent detection */ unsigned oc_enable:1; /* INT output polarity */ unsigned int_act_high:1; -- cgit v1.2.3 From f8d23d309809ae69c763520dababb7e845938272 Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Thu, 4 Aug 2005 17:02:54 +0300 Subject: [PATCH] USB: isp116x-hcd: remove clock() and reset() This patch removes support for user-provided platform-specific hardware reset and clock starting/stopping functions. Hardware reset was needed earlier as getting the software reset working was tricky due to the lack of documentation. Recently, a number of people using isp116x have said the software reset is working for them. I haven't heard of anybody using the clock starting/stopping. Signed-off-by: Olav Kongas Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/isp116x-hcd.c | 31 +++---------------------------- include/linux/usb_isp116x.h | 9 --------- 2 files changed, 3 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c index aeddef7a12af..1ed2abac8d17 100644 --- a/drivers/usb/host/isp116x-hcd.c +++ b/drivers/usb/host/isp116x-hcd.c @@ -1463,10 +1463,6 @@ static int isp116x_sw_reset(struct isp116x *isp116x) return ret; } -/* - Reset. Tries to perform platform-specific hardware - reset first; falls back to software reset. -*/ static int isp116x_reset(struct usb_hcd *hcd) { struct isp116x *isp116x = hcd_to_isp116x(hcd); @@ -1474,17 +1470,7 @@ static int isp116x_reset(struct usb_hcd *hcd) u16 clkrdy = 0; int ret = 0, timeout = 15 /* ms */ ; - if (isp116x->board && isp116x->board->reset) { - /* Hardware reset */ - isp116x->board->reset(hcd->self.controller, 1); - msleep(10); - if (isp116x->board->clock) - isp116x->board->clock(hcd->self.controller, 1); - msleep(1); - isp116x->board->reset(hcd->self.controller, 0); - } else - ret = isp116x_sw_reset(isp116x); - + ret = isp116x_sw_reset(isp116x); if (ret) return ret; @@ -1501,10 +1487,7 @@ static int isp116x_reset(struct usb_hcd *hcd) ERR("Clock not ready after 20ms\n"); /* After sw_reset the clock won't report to be ready, if H_WAKEUP pin is high. */ - if (!isp116x->board || !isp116x->board->reset) - ERR("The driver does not support hardware wakeup.\n"); - ERR("Please make sure that the H_WAKEUP pin " - "is pulled low!\n"); + ERR("Please make sure that the H_WAKEUP pin is pulled low!\n"); ret = -ENODEV; } return ret; @@ -1527,15 +1510,7 @@ static void isp116x_stop(struct usb_hcd *hcd) isp116x_write_reg32(isp116x, HCRHSTATUS, RH_HS_LPS); spin_unlock_irqrestore(&isp116x->lock, flags); - /* Put the chip into reset state */ - if (isp116x->board && isp116x->board->reset) - isp116x->board->reset(hcd->self.controller, 0); - else - isp116x_sw_reset(isp116x); - - /* Stop the clock */ - if (isp116x->board && isp116x->board->clock) - isp116x->board->clock(hcd->self.controller, 0); + isp116x_sw_reset(isp116x); } /* diff --git a/include/linux/usb_isp116x.h b/include/linux/usb_isp116x.h index 8f0b3c226968..436dd8a2b64a 100644 --- a/include/linux/usb_isp116x.h +++ b/include/linux/usb_isp116x.h @@ -19,15 +19,6 @@ struct isp116x_platform_data { prevents stopping internal clock, increasing thereby power consumption in suspended state. */ unsigned remote_wakeup_enable:1; - /* Hardware reset set/clear. If implemented, this function must: - if set == 0, deassert chip's HW reset pin - otherwise, assert chip's HW reset pin */ - void (*reset) (struct device * dev, int set); - /* Hardware clock start/stop. If implemented, this function must: - if start == 0, stop the external clock - otherwise, start the external clock - */ - void (*clock) (struct device * dev, int start); /* Inter-io delay (ns). The chip is picky about access timings; it expects at least: 150ns delay between consecutive accesses to DATA_REG, -- cgit v1.2.3 From b375a0495fd622037560c73c05f23ae6f127bb0c Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Fri, 29 Jul 2005 16:11:07 -0400 Subject: [PATCH] USB: URB_ASYNC_UNLINK flag removed from the kernel 29 July 2005, Cambridge, MA: This afternoon Alan Stern submitted a patch to remove the URB_ASYNC_UNLINK flag from the Linux kernel. Mr. Stern explained, "This flag is a relic from an earlier, less-well-designed system. For over a year it hasn't been used for anything other than printing warning messages." An anonymous spokesman for the Linux kernel development community commented, "This is exactly the sort of thing we see happening all the time. As the kernel evolves, support for old techniques and old code can be jettisoned and replaced by newer, better approaches. Proprietary operating systems do not have the freedom or flexibility to change so quickly." Mr. Stern, a staff member at Harvard University's Rowland Institute who works on Linux only as a hobby, noted that the patch (labelled as548) did not update two files, keyspan.c and option.c, in the USB drivers' "serial" subdirectory. "Those files need more extensive changes," he remarked. "They examine the status field of several URBs at times when they're not supposed to. That will need to be fixed before the URB_ASYNC_UNLINK flag is removed." Greg Kroah-Hartman, the kernel maintainer responsible for overseeing all of Linux's USB drivers, did not respond to our inquiries or return our calls. His only comment was "Applied, thanks." Signed-off-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- drivers/block/ub.c | 8 ++++---- drivers/net/irda/irda-usb.c | 13 ++----------- drivers/usb/atm/cxacru.c | 2 -- drivers/usb/core/message.c | 4 +--- drivers/usb/core/urb.c | 26 ++++---------------------- drivers/usb/input/hid-core.c | 6 +++--- drivers/usb/misc/auerswald.c | 3 +-- drivers/usb/misc/sisusbvga/sisusb.c | 4 ++-- drivers/usb/misc/usbtest.c | 2 -- drivers/usb/net/catc.c | 2 -- drivers/usb/net/kaweth.c | 1 - drivers/usb/net/pegasus.c | 1 - drivers/usb/net/rtl8150.c | 1 - drivers/usb/net/usbnet.c | 2 -- drivers/usb/net/zd1201.c | 1 - drivers/usb/storage/transport.c | 7 +++---- include/linux/usb.h | 9 +-------- sound/usb/usbaudio.c | 10 ++++------ 18 files changed, 25 insertions(+), 77 deletions(-) (limited to 'include/linux') diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 57d3279a8815..aa0bf7ee008d 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -1010,7 +1010,7 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->last_pipe = sc->send_bulk_pipe; usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe, bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc); - sc->work_urb.transfer_flags = URB_ASYNC_UNLINK; + sc->work_urb.transfer_flags = 0; /* Fill what we shouldn't be filling, because usb-storage did so. */ sc->work_urb.actual_length = 0; @@ -1395,7 +1395,7 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, page_address(sg->page) + sg->offset, sg->length, ub_urb_complete, sc); - sc->work_urb.transfer_flags = URB_ASYNC_UNLINK; + sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -1442,7 +1442,7 @@ static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->last_pipe = sc->recv_bulk_pipe; usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe, &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc); - sc->work_urb.transfer_flags = URB_ASYNC_UNLINK; + sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -1563,7 +1563,7 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd, usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, (unsigned char*) cr, NULL, 0, ub_urb_complete, sc); - sc->work_urb.transfer_flags = URB_ASYNC_UNLINK; + sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c index 46e0022d3258..6c766fdc51a6 100644 --- a/drivers/net/irda/irda-usb.c +++ b/drivers/net/irda/irda-usb.c @@ -267,7 +267,7 @@ static void irda_usb_change_speed_xbofs(struct irda_usb_cb *self) frame, IRDA_USB_SPEED_MTU, speed_bulk_callback, self); urb->transfer_buffer_length = USB_IRDA_HEADER; - urb->transfer_flags = URB_ASYNC_UNLINK; + urb->transfer_flags = 0; /* Irq disabled -> GFP_ATOMIC */ if ((ret = usb_submit_urb(urb, GFP_ATOMIC))) { @@ -401,15 +401,12 @@ static int irda_usb_hard_xmit(struct sk_buff *skb, struct net_device *netdev) skb->data, IRDA_SKB_MAX_MTU, write_bulk_callback, skb); urb->transfer_buffer_length = skb->len; - /* Note : unlink *must* be Asynchronous because of the code in - * irda_usb_net_timeout() -> call in irq - Jean II */ - urb->transfer_flags = URB_ASYNC_UNLINK; /* This flag (URB_ZERO_PACKET) indicates that what we send is not * a continuous stream of data but separate packets. * In this case, the USB layer will insert an empty USB frame (TD) * after each of our packets that is exact multiple of the frame size. * This is how the dongle will detect the end of packet - Jean II */ - urb->transfer_flags |= URB_ZERO_PACKET; + urb->transfer_flags = URB_ZERO_PACKET; /* Generate min turn time. FIXME: can we do better than this? */ /* Trying to a turnaround time at this level is trying to measure @@ -630,8 +627,6 @@ static void irda_usb_net_timeout(struct net_device *netdev) * in completion handler, because urb->status will * be -ENOENT. We will fix that at the next watchdog, * leaving more time to USB to recover... - * Also, we are in interrupt, so we need to have - * URB_ASYNC_UNLINK to work properly... * Jean II */ done = 1; break; @@ -1008,9 +1003,7 @@ static int irda_usb_net_close(struct net_device *netdev) } } /* Cancel Tx and speed URB - need to be synchronous to avoid races */ - self->tx_urb->transfer_flags &= ~URB_ASYNC_UNLINK; usb_kill_urb(self->tx_urb); - self->speed_urb->transfer_flags &= ~URB_ASYNC_UNLINK; usb_kill_urb(self->speed_urb); /* Stop and remove instance of IrLAP */ @@ -1521,9 +1514,7 @@ static void irda_usb_disconnect(struct usb_interface *intf) usb_kill_urb(self->rx_urb[i]); /* Cancel Tx and speed URB. * Toggle flags to make sure it's synchronous. */ - self->tx_urb->transfer_flags &= ~URB_ASYNC_UNLINK; usb_kill_urb(self->tx_urb); - self->speed_urb->transfer_flags &= ~URB_ASYNC_UNLINK; usb_kill_urb(self->speed_urb); } diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c index 8e184e2641cb..79861ee12a29 100644 --- a/drivers/usb/atm/cxacru.c +++ b/drivers/usb/atm/cxacru.c @@ -715,13 +715,11 @@ static int cxacru_bind(struct usbatm_data *usbatm_instance, usb_dev, usb_rcvintpipe(usb_dev, CXACRU_EP_CMD), instance->rcv_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->rcv_done, 1); - instance->rcv_urb->transfer_flags |= URB_ASYNC_UNLINK; usb_fill_int_urb(instance->snd_urb, usb_dev, usb_sndintpipe(usb_dev, CXACRU_EP_CMD), instance->snd_buf, PAGE_SIZE, cxacru_blocking_completion, &instance->snd_done, 4); - instance->snd_urb->transfer_flags |= URB_ASYNC_UNLINK; init_MUTEX(&instance->cm_serialize); diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 88d1b376f67c..74197249c245 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -48,7 +48,6 @@ static int usb_start_wait_urb(struct urb *urb, int timeout, int* actual_length) init_completion(&done); urb->context = &done; - urb->transfer_flags |= URB_ASYNC_UNLINK; urb->actual_length = 0; status = usb_submit_urb(urb, GFP_NOIO); @@ -357,8 +356,7 @@ int usb_sg_init ( if (!io->urbs) goto nomem; - urb_flags = URB_ASYNC_UNLINK | URB_NO_TRANSFER_DMA_MAP - | URB_NO_INTERRUPT; + urb_flags = URB_NO_TRANSFER_DMA_MAP | URB_NO_INTERRUPT; if (usb_pipein (pipe)) urb_flags |= URB_SHORT_NOT_OK; diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c index c0feee25ff0a..c846fefb7386 100644 --- a/drivers/usb/core/urb.c +++ b/drivers/usb/core/urb.c @@ -309,9 +309,8 @@ int usb_submit_urb(struct urb *urb, unsigned mem_flags) unsigned int allowed; /* enforce simple/standard policy */ - allowed = URB_ASYNC_UNLINK; // affects later unlinks - allowed |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP); - allowed |= URB_NO_INTERRUPT; + allowed = (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP | + URB_NO_INTERRUPT); switch (temp) { case PIPE_BULK: if (is_out) @@ -400,14 +399,8 @@ int usb_submit_urb(struct urb *urb, unsigned mem_flags) * canceled (rather than any other code) and will quickly be removed * from host controller data structures. * - * In the past, clearing the URB_ASYNC_UNLINK transfer flag for the - * URB indicated that the request was synchronous. This usage is now - * deprecated; if the flag is clear the call will be forwarded to - * usb_kill_urb() and the return value will be 0. In the future, drivers - * should call usb_kill_urb() directly for synchronous unlinking. - * - * When the URB_ASYNC_UNLINK transfer flag for the URB is set, this - * request is asynchronous. Success is indicated by returning -EINPROGRESS, + * This request is always asynchronous. + * Success is indicated by returning -EINPROGRESS, * at which time the URB will normally have been unlinked but not yet * given back to the device driver. When it is called, the completion * function will see urb->status == -ECONNRESET. Failure is indicated @@ -453,17 +446,6 @@ int usb_unlink_urb(struct urb *urb) { if (!urb) return -EINVAL; - if (!(urb->transfer_flags & URB_ASYNC_UNLINK)) { -#ifdef CONFIG_DEBUG_KERNEL - if (printk_ratelimit()) { - printk(KERN_NOTICE "usb_unlink_urb() is deprecated for " - "synchronous unlinks. Use usb_kill_urb() instead.\n"); - WARN_ON(1); - } -#endif - usb_kill_urb(urb); - return 0; - } if (!(urb->dev && urb->dev->bus && urb->dev->bus->op)) return -ENODEV; return urb->dev->bus->op->unlink_urb(urb, -ECONNRESET); diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c index 719c0316cc39..1ab95d24c5e2 100644 --- a/drivers/usb/input/hid-core.c +++ b/drivers/usb/input/hid-core.c @@ -1688,7 +1688,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf) usb_fill_int_urb(hid->urbin, dev, pipe, hid->inbuf, 0, hid_irq_in, hid, interval); hid->urbin->transfer_dma = hid->inbuf_dma; - hid->urbin->transfer_flags |=(URB_NO_TRANSFER_DMA_MAP | URB_ASYNC_UNLINK); + hid->urbin->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } else { if (hid->urbout) continue; @@ -1698,7 +1698,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf) usb_fill_int_urb(hid->urbout, dev, pipe, hid->outbuf, 0, hid_irq_out, hid, interval); hid->urbout->transfer_dma = hid->outbuf_dma; - hid->urbout->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_ASYNC_UNLINK); + hid->urbout->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; } } @@ -1750,7 +1750,7 @@ static struct hid_device *usb_hid_configure(struct usb_interface *intf) hid->ctrlbuf, 1, hid_ctrl, hid); hid->urbctrl->setup_dma = hid->cr_dma; hid->urbctrl->transfer_dma = hid->ctrlbuf_dma; - hid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP | URB_ASYNC_UNLINK); + hid->urbctrl->transfer_flags |= (URB_NO_TRANSFER_DMA_MAP | URB_NO_SETUP_DMA_MAP); return hid; diff --git a/drivers/usb/misc/auerswald.c b/drivers/usb/misc/auerswald.c index 6f7994f5a714..ae4681f9f0ea 100644 --- a/drivers/usb/misc/auerswald.c +++ b/drivers/usb/misc/auerswald.c @@ -426,7 +426,7 @@ static int auerchain_submit_urb (pauerchain_t acp, struct urb * urb) /* cancel an urb which is submitted to the chain the result is 0 if the urb is cancelled, or -EINPROGRESS if - URB_ASYNC_UNLINK is set and the function is successfully started. + the function is successfully started. */ static int auerchain_unlink_urb (pauerchain_t acp, struct urb * urb) { @@ -515,7 +515,6 @@ static void auerchain_unlink_all (pauerchain_t acp) acep = acp->active; if (acep) { urbp = acep->urbp; - urbp->transfer_flags &= ~URB_ASYNC_UNLINK; dbg ("unlink active urb"); usb_kill_urb (urbp); } diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c index 2fd12264fd53..d63ce6c030f3 100644 --- a/drivers/usb/misc/sisusbvga/sisusb.c +++ b/drivers/usb/misc/sisusbvga/sisusb.c @@ -229,7 +229,7 @@ sisusb_bulkout_msg(struct sisusb_usb_data *sisusb, int index, unsigned int pipe, usb_fill_bulk_urb(urb, sisusb->sisusb_dev, pipe, data, len, sisusb_bulk_completeout, &sisusb->urbout_context[index]); - urb->transfer_flags |= (tflags | URB_ASYNC_UNLINK); + urb->transfer_flags |= tflags; urb->actual_length = 0; if ((urb->transfer_dma = transfer_dma)) @@ -295,7 +295,7 @@ sisusb_bulkin_msg(struct sisusb_usb_data *sisusb, unsigned int pipe, void *data, usb_fill_bulk_urb(urb, sisusb->sisusb_dev, pipe, data, len, sisusb_bulk_completein, sisusb); - urb->transfer_flags |= (tflags | URB_ASYNC_UNLINK); + urb->transfer_flags |= tflags; urb->actual_length = 0; if ((urb->transfer_dma = transfer_dma)) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index fd7fb98e4b20..54799eb0bc60 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -986,7 +986,6 @@ test_ctrl_queue (struct usbtest_dev *dev, struct usbtest_param *param) u->context = &context; u->complete = ctrl_complete; - u->transfer_flags |= URB_ASYNC_UNLINK; } /* queue the urbs */ @@ -1052,7 +1051,6 @@ static int unlink1 (struct usbtest_dev *dev, int pipe, int size, int async) urb = simple_alloc_urb (testdev_to_usbdev (dev), pipe, size); if (!urb) return -ENOMEM; - urb->transfer_flags |= URB_ASYNC_UNLINK; urb->context = &completion; urb->complete = unlink1_callback; diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c index c8be912f24e1..37ef365a2472 100644 --- a/drivers/usb/net/catc.c +++ b/drivers/usb/net/catc.c @@ -383,7 +383,6 @@ static void catc_tx_done(struct urb *urb, struct pt_regs *regs) if (urb->status == -ECONNRESET) { dbg("Tx Reset."); - urb->transfer_flags &= ~URB_ASYNC_UNLINK; urb->status = 0; catc->netdev->trans_start = jiffies; catc->stats.tx_errors++; @@ -445,7 +444,6 @@ static void catc_tx_timeout(struct net_device *netdev) struct catc *catc = netdev_priv(netdev); warn("Transmit timed out."); - catc->tx_urb->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(catc->tx_urb); } diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c index 7ffa99b9760f..e04b0ce3611a 100644 --- a/drivers/usb/net/kaweth.c +++ b/drivers/usb/net/kaweth.c @@ -787,7 +787,6 @@ static int kaweth_start_xmit(struct sk_buff *skb, struct net_device *net) kaweth_usb_transmit_complete, kaweth); kaweth->end = 0; - kaweth->tx_urb->transfer_flags |= URB_ASYNC_UNLINK; if((res = usb_submit_urb(kaweth->tx_urb, GFP_ATOMIC))) { diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c index fcd6d3ccef44..7484d34780fc 100644 --- a/drivers/usb/net/pegasus.c +++ b/drivers/usb/net/pegasus.c @@ -825,7 +825,6 @@ static void pegasus_tx_timeout(struct net_device *net) pegasus_t *pegasus = netdev_priv(net); if (netif_msg_timer(pegasus)) printk(KERN_WARNING "%s: tx timeout\n", net->name); - pegasus->tx_urb->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(pegasus->tx_urb); pegasus->stats.tx_errors++; } diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c index 59ab40ebb394..c3d4e3589e30 100644 --- a/drivers/usb/net/rtl8150.c +++ b/drivers/usb/net/rtl8150.c @@ -653,7 +653,6 @@ static void rtl8150_tx_timeout(struct net_device *netdev) { rtl8150_t *dev = netdev_priv(netdev); warn("%s: Tx timeout.", netdev->name); - dev->tx_urb->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(dev->tx_urb); dev->stats.tx_errors++; } diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c index 4682696450db..3c6eef4168e5 100644 --- a/drivers/usb/net/usbnet.c +++ b/drivers/usb/net/usbnet.c @@ -2987,7 +2987,6 @@ static void rx_submit (struct usbnet *dev, struct urb *urb, unsigned flags) usb_fill_bulk_urb (urb, dev->udev, dev->in, skb->data, size, rx_complete, skb); - urb->transfer_flags |= URB_ASYNC_UNLINK; spin_lock_irqsave (&dev->rxq.lock, lockflags); @@ -3561,7 +3560,6 @@ static int usbnet_start_xmit (struct sk_buff *skb, struct net_device *net) usb_fill_bulk_urb (urb, dev->udev, dev->out, skb->data, skb->len, tx_complete, skb); - urb->transfer_flags |= URB_ASYNC_UNLINK; /* don't assume the hardware handles USB_ZERO_PACKET * NOTE: strictly conforming cdc-ether devices should expect diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c index fc013978837e..c4e479ee926a 100644 --- a/drivers/usb/net/zd1201.c +++ b/drivers/usb/net/zd1201.c @@ -847,7 +847,6 @@ static void zd1201_tx_timeout(struct net_device *dev) return; dev_warn(&zd->usb->dev, "%s: TX timeout, shooting down urb\n", dev->name); - zd->tx_urb->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(zd->tx_urb); zd->stats.tx_errors++; /* Restart the timeout to quiet the watchdog: */ diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index e42875152c34..c1ba5301ebfc 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -96,8 +96,8 @@ * or before the URB_ACTIVE bit was set. If so, it's essential to cancel * the URB if it hasn't been cancelled already (i.e., if the URB_ACTIVE bit * is still set). Either way, the function must then wait for the URB to - * finish. Note that because the URB_ASYNC_UNLINK flag is set, the URB can - * still be in progress even after a call to usb_unlink_urb() returns. + * finish. Note that the URB can still be in progress even after a call to + * usb_unlink_urb() returns. * * The idea is that (1) once the ABORTING or DISCONNECTING bit is set, * either the stop_transport() function or the submitting function @@ -158,8 +158,7 @@ static int usb_stor_msg_common(struct us_data *us, int timeout) * hasn't been mapped for DMA. Yes, this is clunky, but it's * easier than always having the caller tell us whether the * transfer buffer has already been mapped. */ - us->current_urb->transfer_flags = - URB_ASYNC_UNLINK | URB_NO_SETUP_DMA_MAP; + us->current_urb->transfer_flags = URB_NO_SETUP_DMA_MAP; if (us->current_urb->transfer_buffer == us->iobuf) us->current_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; us->current_urb->transfer_dma = us->iobuf_dma; diff --git a/include/linux/usb.h b/include/linux/usb.h index 434e35120c65..4dbe580f9335 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -616,7 +616,6 @@ extern int usb_disabled(void); #define URB_ISO_ASAP 0x0002 /* iso-only, urb->start_frame ignored */ #define URB_NO_TRANSFER_DMA_MAP 0x0004 /* urb->transfer_dma valid on submit */ #define URB_NO_SETUP_DMA_MAP 0x0008 /* urb->setup_dma valid on submit */ -#define URB_ASYNC_UNLINK 0x0010 /* usb_unlink_urb() returns asap */ #define URB_NO_FSBR 0x0020 /* UHCI-specific */ #define URB_ZERO_PACKET 0x0040 /* Finish bulk OUTs with short packet */ #define URB_NO_INTERRUPT 0x0080 /* HINT: no non-error interrupt needed */ @@ -724,13 +723,7 @@ typedef void (*usb_complete_t)(struct urb *, struct pt_regs *); * Initialization: * * All URBs submitted must initialize the dev, pipe, transfer_flags (may be - * zero), and complete fields. - * The URB_ASYNC_UNLINK transfer flag affects later invocations of - * the usb_unlink_urb() routine. Note: Failure to set URB_ASYNC_UNLINK - * with usb_unlink_urb() is deprecated. For synchronous unlinks use - * usb_kill_urb() instead. - * - * All URBs must also initialize + * zero), and complete fields. All URBs must also initialize * transfer_buffer and transfer_buffer_length. They may provide the * URB_SHORT_NOT_OK transfer flag, indicating that short reads are * to be treated as errors; that flag is invalid for write requests. diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c index 5aa5fe651a8a..bfbec5876659 100644 --- a/sound/usb/usbaudio.c +++ b/sound/usb/usbaudio.c @@ -735,10 +735,9 @@ static int deactivate_urbs(snd_usb_substream_t *subs, int force, int can_sleep) if (test_bit(i, &subs->active_mask)) { if (! test_and_set_bit(i, &subs->unlink_mask)) { struct urb *u = subs->dataurb[i].urb; - if (async) { - u->transfer_flags |= URB_ASYNC_UNLINK; + if (async) usb_unlink_urb(u); - } else + else usb_kill_urb(u); } } @@ -748,10 +747,9 @@ static int deactivate_urbs(snd_usb_substream_t *subs, int force, int can_sleep) if (test_bit(i+16, &subs->active_mask)) { if (! test_and_set_bit(i+16, &subs->unlink_mask)) { struct urb *u = subs->syncurb[i].urb; - if (async) { - u->transfer_flags |= URB_ASYNC_UNLINK; + if (async) usb_unlink_urb(u); - } else + else usb_kill_urb(u); } } -- cgit v1.2.3 From 507d37cf269ebbd1b32bcc435fe577e411f73151 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:30:07 -0300 Subject: [CCID] Only call the HC insert_options methods when requested Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 2 ++ net/dccp/ccids/ccid3.c | 5 ++++- net/dccp/options.c | 11 ++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 007c290f74d4..5e0af0d08a93 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -433,6 +433,8 @@ struct dccp_sock { struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; enum dccp_role dccps_role:2; + __u8 dccps_hc_rx_insert_options:1; + __u8 dccps_hc_tx_insert_options:1; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index ae0500c79d07..37fd9eb8daaf 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -358,10 +358,12 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, } /* Can we send? if so add options and add to packet history */ - if (rc == 0) + if (rc == 0) { + dp->dccps_hc_tx_insert_options = 1; new_packet->dccphtx_ccval = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count; + } out: return rc; } @@ -811,6 +813,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) hcrx->ccid3hcrx_pinv = ~0; else hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; + dp->dccps_hc_rx_insert_options = 1; dccp_send_ack(sk); } diff --git a/net/dccp/options.c b/net/dccp/options.c index 382c5894acb2..7ad2f4266ff9 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -505,13 +505,18 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)) dccp_insert_option_ack_vector(sk, skb); - if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } - ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); - ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + if (dp->dccps_hc_rx_insert_options) { + ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb); + dp->dccps_hc_rx_insert_options = 0; + } + if (dp->dccps_hc_tx_insert_options) { + ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb); + dp->dccps_hc_tx_insert_options = 0; + } /* XXX: insert other options when appropriate */ -- cgit v1.2.3 From b0e567806d16586629468c824dfb2e71155df7da Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 9 Sep 2005 02:38:35 -0300 Subject: [DCCP] Introduce dccp_timestamp To start the timestamps with 0.0ms, easing the integer maths in the CCIDs, this probably will be reworked to use the to be introduced struct timeval_offset infrastructure out of skb_get_timestamp, etc. Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/dccp.h | 1 + net/dccp/ccids/ccid3.c | 27 +++++++++++----------- net/dccp/ccids/ccid3.h | 2 +- net/dccp/ccids/lib/packet_history.h | 3 ++- net/dccp/dccp.h | 16 ++++--------- net/dccp/input.c | 4 ++-- net/dccp/ipv4.c | 1 + net/dccp/minisocks.c | 1 + net/dccp/options.c | 45 ++++++++++++++++++++++++++++--------- 9 files changed, 61 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 5e0af0d08a93..8bf4bacb5051 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -432,6 +432,7 @@ struct dccp_sock { struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; + struct timeval dccps_epoch; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 145aafafe4e2..348e6fb262c3 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -169,7 +169,7 @@ static void ccid3_hc_tx_update_x(struct sock *sk) } else { struct timeval now; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) { hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv, @@ -317,7 +317,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet); } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hctx->ccid3hctx_state) { case TFRC_SSTATE_NO_SENT: @@ -382,7 +382,7 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len) return; } - do_gettimeofday(&now); + dccp_timestamp(sk, &now); /* check if we have sent a data packet */ if (len > 0) { @@ -461,6 +461,7 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private; struct ccid3_options_received *opt_recv; struct dccp_tx_hist_entry *packet; + struct timeval now; unsigned long next_tmout; u32 t_elapsed; u32 pinv; @@ -508,7 +509,8 @@ static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) } /* Update RTT */ - r_sample = timeval_now_delta(&packet->dccphtx_tstamp); + dccp_timestamp(sk, &now); + r_sample = timeval_delta(&now, &packet->dccphtx_tstamp); if (unlikely(r_sample <= t_elapsed)) LIMIT_NETDEBUG(KERN_WARNING "%s: r_sample=%uus, t_elapsed=%uus\n", @@ -774,7 +776,7 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk) ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk); - do_gettimeofday(&now); + dccp_timestamp(sk, &now); switch (hcrx->ccid3hcrx_state) { case TFRC_RSTATE_NO_DATA: @@ -903,10 +905,9 @@ found: if (rtt == 0) rtt = 1; - delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback); - x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC; - if (likely(delta > 1)) - x_recv /= delta; + dccp_timestamp(sk, &tstamp); + delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback); + x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta); tmp1 = (u64)x_recv * (u64)rtt; do_div(tmp1,10000000); @@ -981,7 +982,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (opt_recv->dccpor_timestamp_echo == 0) break; p_prev = hcrx->ccid3hcrx_rtt; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10); r_sample = timeval_usecs(&now); t_elapsed = opt_recv->dccpor_elapsed_time * 10; @@ -1013,7 +1014,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) return; } - packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp, + packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp, skb, SLAB_ATOMIC); if (packet == NULL) { ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet " @@ -1045,7 +1046,7 @@ static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) if (ins != 0) break; - do_gettimeofday(&now); + dccp_timestamp(sk, &now); if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >= hcrx->ccid3hcrx_rtt) { hcrx->ccid3hcrx_tstamp_last_ack = now; @@ -1100,7 +1101,7 @@ static int ccid3_hc_rx_init(struct sock *sk) hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); - do_gettimeofday(&hcrx->ccid3hcrx_tstamp_last_ack); + dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack); hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack; hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */ return 0; diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index ee8cbace6630..58be6125b695 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -115,7 +115,7 @@ struct ccid3_hc_rx_sock { u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; - unsigned long ccid3hcrx_rtt; + u32 ccid3hcrx_rtt; u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h index fb90a91aa93d..b375ebdb7dcf 100644 --- a/net/dccp/ccids/lib/packet_history.h +++ b/net/dccp/ccids/lib/packet_history.h @@ -134,6 +134,7 @@ static inline struct dccp_tx_hist_entry * static inline struct dccp_rx_hist_entry * dccp_rx_hist_entry_new(struct dccp_rx_hist *hist, + const struct sock *sk, const u32 ndp, const struct sk_buff *skb, const unsigned int __nocast prio) @@ -148,7 +149,7 @@ static inline struct dccp_rx_hist_entry * entry->dccphrx_ccval = dh->dccph_ccval; entry->dccphrx_type = dh->dccph_type; entry->dccphrx_ndp = ndp; - do_gettimeofday(&(entry->dccphrx_tstamp)); + dccp_timestamp(sk, &entry->dccphrx_tstamp); } return entry; diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 33456c0d5937..95c4630b3b18 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -426,10 +426,13 @@ extern struct dccp_ackpkts * dccp_ackpkts_alloc(unsigned int len, const unsigned int __nocast priority); extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); -extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state); extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, u64 ackno); +extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); + static inline suseconds_t timeval_usecs(const struct timeval *tv) { return tv->tv_sec * USEC_PER_SEC + tv->tv_usec; @@ -468,17 +471,6 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } -/* - * Returns the difference in usecs between timeval - * passed in and current time - */ -static inline suseconds_t timeval_now_delta(const struct timeval *tv) -{ - struct timeval now; - do_gettimeofday(&now); - return timeval_delta(&now, tv); -} - #ifdef CONFIG_IP_DCCP_DEBUG extern void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len); diff --git a/net/dccp/input.c b/net/dccp/input.c index ef29cef1dafe..c60bc3433f5e 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -170,7 +170,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (dp->dccps_options.dccpo_send_ack_vector) { struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) { LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " @@ -498,7 +498,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * DCCP_ACKPKTS_STATE_ECN_MARKED */ if (dp->dccps_options.dccpo_send_ack_vector) { - if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, DCCP_SKB_CB(skb)->dccpd_seq, DCCP_ACKPKTS_STATE_RECEIVED)) goto discard; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 3fc75dbee4b8..fee9a8c3777b 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -1243,6 +1243,7 @@ static int dccp_v4_init_sock(struct sock *sk) static int dccp_ctl_socket_init = 1; dccp_options_init(&dp->dccps_options); + do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { dp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index ce5dff4ac22e..18461bc04cbe 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -96,6 +96,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newdp->dccps_hc_rx_ackpkts = NULL; newdp->dccps_role = DCCP_ROLE_SERVER; newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { newdp->dccps_hc_rx_ackpkts = diff --git a/net/dccp/options.c b/net/dccp/options.c index 34b230a00875..d4c4242d8dd7 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -140,7 +140,7 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_timestamp = ntohl(*(u32 *)value); dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp; - do_gettimeofday(&dp->dccps_timestamp_time); + dccp_timestamp(sk, &dp->dccps_timestamp_time); dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n", debug_prefix, opt_recv->dccpor_timestamp, @@ -361,9 +361,13 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) #endif struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; int len = ap->dccpap_buf_vector_len + 2; - const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10; + struct timeval now; + u32 elapsed_time; unsigned char *to, *from; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; + if (elapsed_time != 0) dccp_insert_option_elapsed_time(sk, skb, elapsed_time); @@ -428,13 +432,29 @@ static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) (unsigned long long) ap->dccpap_ack_ackno); } +void dccp_timestamp(const struct sock *sk, struct timeval *tv) +{ + const struct dccp_sock *dp = dccp_sk(sk); + + do_gettimeofday(tv); + tv->tv_sec -= dp->dccps_epoch.tv_sec; + tv->tv_usec -= dp->dccps_epoch.tv_usec; + + while (tv->tv_usec < 0) { + tv->tv_sec--; + tv->tv_usec += USEC_PER_SEC; + } +} + +EXPORT_SYMBOL_GPL(dccp_timestamp); + void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb) { struct timeval tv; u32 now; - do_gettimeofday(&tv); - now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10; + dccp_timestamp(sk, &tv); + now = timeval_usecs(&tv) / 10; /* yes this will overflow but that is the point as we want a * 10 usec 32 bit timer which mean it wraps every 11.9 hours */ @@ -452,13 +472,17 @@ static void dccp_insert_option_timestamp_echo(struct sock *sk, const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " : "server TX opt: "; #endif + struct timeval now; u32 tstamp_echo; - const u32 elapsed_time = - timeval_now_delta(&dp->dccps_timestamp_time) / 10; - const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time); - const int len = 6 + elapsed_time_len; + u32 elapsed_time; + int len, elapsed_time_len; unsigned char *to; + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10; + elapsed_time_len = dccp_elapsed_time_len(elapsed_time); + len = 6 + elapsed_time_len; + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert " "timestamp echo!\n"); @@ -623,7 +647,8 @@ static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, /* * Implements the draft-ietf-dccp-spec-11.txt Appendix A */ -int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) +int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state) { /* * Check at the right places if the buffer is full, if it is, tell the @@ -704,7 +729,7 @@ int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state) } ap->dccpap_buf_ackno = ackno; - do_gettimeofday(&ap->dccpap_time); + dccp_timestamp(sk, &ap->dccpap_time); out: dccp_pr_debug(""); dccp_ackpkts_print(ap); -- cgit v1.2.3 From 144a50ea5e1487b8b8e722289b4177713354448a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 9 Aug 2005 00:20:10 -0400 Subject: [PATCH] must_check attributes for PCI layer. Self explanatory really. Some newer gcc's print a warning if a function is used and we don't check its result. We do this for a bunch of things in the kernel already, this extends that to the PCI layer. Based on a patch originally from Arjan van de Ven. Signed-off-by: Dave Jones Signed-off-by: Arjan van de Ven Signed-off-by: Greg Kroah-Hartman --- include/linux/pci.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 6caaba0af469..609499356e07 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -376,32 +376,32 @@ static inline int pci_write_config_dword(struct pci_dev *dev, int where, u32 val return pci_bus_write_config_dword (dev->bus, dev->devfn, where, val); } -int pci_enable_device(struct pci_dev *dev); -int pci_enable_device_bars(struct pci_dev *dev, int mask); +int __must_check pci_enable_device(struct pci_dev *dev); +int __must_check pci_enable_device_bars(struct pci_dev *dev, int mask); void pci_disable_device(struct pci_dev *dev); void pci_set_master(struct pci_dev *dev); #define HAVE_PCI_SET_MWI -int pci_set_mwi(struct pci_dev *dev); +int __must_check pci_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); -int pci_set_dma_mask(struct pci_dev *dev, u64 mask); -int pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); +int __must_check pci_set_dma_mask(struct pci_dev *dev, u64 mask); +int __must_check pci_set_consistent_dma_mask(struct pci_dev *dev, u64 mask); void pci_update_resource(struct pci_dev *dev, struct resource *res, int resno); int pci_assign_resource(struct pci_dev *dev, int i); void pci_restore_bars(struct pci_dev *dev); /* ROM control related routines */ -void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size); -void __iomem *pci_map_rom_copy(struct pci_dev *pdev, size_t *size); +void __iomem __must_check *pci_map_rom(struct pci_dev *pdev, size_t *size); +void __iomem __must_check *pci_map_rom_copy(struct pci_dev *pdev, size_t *size); void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom); void pci_remove_rom(struct pci_dev *pdev); /* Power management related routines */ int pci_save_state(struct pci_dev *dev); int pci_restore_state(struct pci_dev *dev); -int pci_set_power_state(struct pci_dev *dev, pci_power_t state); -pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state); -int pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); +int __must_check pci_set_power_state(struct pci_dev *dev, pci_power_t state); +pci_power_t __must_check pci_choose_state(struct pci_dev *dev, pm_message_t state); +int __must_check pci_enable_wake(struct pci_dev *dev, pci_power_t state, int enable); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ void pci_bus_assign_resources(struct pci_bus *bus); -- cgit v1.2.3 From 5e41ff9e0650f327a6c819841fa412da95d57319 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 9 Sep 2005 13:01:35 -0700 Subject: [PATCH] security: enable atomic inode security labeling The following patch set enables atomic security labeling of newly created inodes by altering the fs code to invoke a new LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state during the inode creation transaction. This parallels the existing processing for setting ACLs on newly created inodes. Otherwise, it is possible for new inodes to be accessed by another thread via the dcache prior to complete security setup (presently handled by the post_create/mkdir/... LSM hooks in the VFS) and a newly created inode may be left unlabeled on the disk in the event of a crash. SELinux presently works around the issue by ensuring that the incore inode security label is initialized to a special SID that is inaccessible to unprivileged processes (in accordance with policy), thereby preventing inappropriate access but potentially causing false denials on legitimate accesses. A simple test program demonstrates such false denials on SELinux, and the patch solves the problem. Similar such false denials have been encountered in real applications. This patch defines a new inode_init_security LSM hook to obtain the security attribute to apply to a newly created inode and to set up the incore inode security state for it, and adds a corresponding hook function implementation to SELinux. Signed-off-by: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/security.h | 41 +++++++++++++++++++++++++++ security/dummy.c | 7 +++++ security/selinux/hooks.c | 59 +++++++++++++++++++++++++++++++++++++++ security/selinux/include/objsec.h | 1 + 4 files changed, 108 insertions(+) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 7aab6ab7c57f..d4f3b7a94ea6 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -250,6 +250,25 @@ struct swap_info_struct; * @inode contains the inode structure. * Deallocate the inode security structure and set @inode->i_security to * NULL. + * @inode_init_security: + * Obtain the security attribute name suffix and value to set on a newly + * created inode and set up the incore security field for the new inode. + * This hook is called by the fs code as part of the inode creation + * transaction and provides for atomic labeling of the inode, unlike + * the post_create/mkdir/... hooks called by the VFS. The hook function + * is expected to allocate the name and value via kmalloc, with the caller + * being responsible for calling kfree after using them. + * If the security module does not use security attributes or does + * not wish to put a security attribute on this particular inode, + * then it should return -EOPNOTSUPP to skip this processing. + * @inode contains the inode structure of the newly created inode. + * @dir contains the inode structure of the parent directory. + * @name will be set to the allocated name suffix (e.g. selinux). + * @value will be set to the allocated attribute value. + * @len will be set to the length of the value. + * Returns 0 if @name and @value have been successfully set, + * -EOPNOTSUPP if no security attribute is needed, or + * -ENOMEM on memory allocation failure. * @inode_create: * Check permission to create a regular file. * @dir contains inode structure of the parent of the new file. @@ -1080,6 +1099,8 @@ struct security_operations { int (*inode_alloc_security) (struct inode *inode); void (*inode_free_security) (struct inode *inode); + int (*inode_init_security) (struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len); int (*inode_create) (struct inode *dir, struct dentry *dentry, int mode); void (*inode_post_create) (struct inode *dir, @@ -1442,6 +1463,17 @@ static inline void security_inode_free (struct inode *inode) return; security_ops->inode_free_security (inode); } + +static inline int security_inode_init_security (struct inode *inode, + struct inode *dir, + char **name, + void **value, + size_t *len) +{ + if (unlikely (IS_PRIVATE (inode))) + return -EOPNOTSUPP; + return security_ops->inode_init_security (inode, dir, name, value, len); +} static inline int security_inode_create (struct inode *dir, struct dentry *dentry, @@ -2171,6 +2203,15 @@ static inline int security_inode_alloc (struct inode *inode) static inline void security_inode_free (struct inode *inode) { } + +static inline int security_inode_init_security (struct inode *inode, + struct inode *dir, + char **name, + void **value, + size_t *len) +{ + return -EOPNOTSUPP; +} static inline int security_inode_create (struct inode *dir, struct dentry *dentry, diff --git a/security/dummy.c b/security/dummy.c index 6ff887586479..e8a00fa80469 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -258,6 +258,12 @@ static void dummy_inode_free_security (struct inode *inode) return; } +static int dummy_inode_init_security (struct inode *inode, struct inode *dir, + char **name, void **value, size_t *len) +{ + return -EOPNOTSUPP; +} + static int dummy_inode_create (struct inode *inode, struct dentry *dentry, int mask) { @@ -886,6 +892,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sb_post_pivotroot); set_to_dummy_if_null(ops, inode_alloc_security); set_to_dummy_if_null(ops, inode_free_security); + set_to_dummy_if_null(ops, inode_init_security); set_to_dummy_if_null(ops, inode_create); set_to_dummy_if_null(ops, inode_post_create); set_to_dummy_if_null(ops, inode_link); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 8641f8894b4c..63701fe0e1ad 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1274,6 +1274,7 @@ static int post_create(struct inode *dir, struct inode *inode; struct inode_security_struct *dsec; struct superblock_security_struct *sbsec; + struct inode_security_struct *isec; u32 newsid; char *context; unsigned int len; @@ -1293,6 +1294,11 @@ static int post_create(struct inode *dir, return 0; } + isec = inode->i_security; + + if (isec->security_attr_init) + return 0; + if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { newsid = tsec->create_sid; } else { @@ -2018,6 +2024,58 @@ static void selinux_inode_free_security(struct inode *inode) inode_free_security(inode); } +static int selinux_inode_init_security(struct inode *inode, struct inode *dir, + char **name, void **value, + size_t *len) +{ + struct task_security_struct *tsec; + struct inode_security_struct *dsec; + struct superblock_security_struct *sbsec; + struct inode_security_struct *isec; + u32 newsid; + int rc; + char *namep, *context; + + tsec = current->security; + dsec = dir->i_security; + sbsec = dir->i_sb->s_security; + isec = inode->i_security; + + if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { + newsid = tsec->create_sid; + } else { + rc = security_transition_sid(tsec->sid, dsec->sid, + inode_mode_to_security_class(inode->i_mode), + &newsid); + if (rc) { + printk(KERN_WARNING "%s: " + "security_transition_sid failed, rc=%d (dev=%s " + "ino=%ld)\n", + __FUNCTION__, + -rc, inode->i_sb->s_id, inode->i_ino); + return rc; + } + } + + inode_security_set_sid(inode, newsid); + + namep = kstrdup(XATTR_SELINUX_SUFFIX, GFP_KERNEL); + if (!namep) + return -ENOMEM; + *name = namep; + + rc = security_sid_to_context(newsid, &context, len); + if (rc) { + kfree(namep); + return rc; + } + *value = context; + + isec->security_attr_init = 1; + + return 0; +} + static int selinux_inode_create(struct inode *dir, struct dentry *dentry, int mask) { return may_create(dir, dentry, SECCLASS_FILE); @@ -4298,6 +4356,7 @@ static struct security_operations selinux_ops = { .inode_alloc_security = selinux_inode_alloc_security, .inode_free_security = selinux_inode_free_security, + .inode_init_security = selinux_inode_init_security, .inode_create = selinux_inode_create, .inode_post_create = selinux_inode_post_create, .inode_link = selinux_inode_link, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 887937c8134a..c515bc0b58a1 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -46,6 +46,7 @@ struct inode_security_struct { unsigned char initialized; /* initialization flag */ struct semaphore sem; unsigned char inherit; /* inherit SID from parent entry */ + unsigned char security_attr_init; /* security attributes init flag */ }; struct file_security_struct { -- cgit v1.2.3 From a74574aafea3a63add3251047601611111f44562 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 9 Sep 2005 13:01:44 -0700 Subject: [PATCH] Remove security_inode_post_create/mkdir/symlink/mknod hooks This patch removes the inode_post_create/mkdir/mknod/symlink LSM hooks as they are obsoleted by the new inode_init_security hook that enables atomic inode security labeling. If anyone sees any reason to retain these hooks, please speak now. Also, is anyone using the post_rename/link hooks; if not, those could also be removed. Signed-off-by: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 16 ++---- include/linux/security.h | 90 ------------------------------- security/dummy.c | 28 ---------- security/selinux/hooks.c | 111 -------------------------------------- security/selinux/include/objsec.h | 1 - 5 files changed, 4 insertions(+), 242 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 145e852c4bd0..993a65a7d570 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1316,10 +1316,8 @@ int vfs_create(struct inode *dir, struct dentry *dentry, int mode, return error; DQUOT_INIT(dir); error = dir->i_op->create(dir, dentry, mode, nd); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_create(dir, dentry, mode); - } return error; } @@ -1635,10 +1633,8 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) DQUOT_INIT(dir); error = dir->i_op->mknod(dir, dentry, mode, dev); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_mknod(dir, dentry, mode, dev); - } return error; } @@ -1708,10 +1704,8 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) DQUOT_INIT(dir); error = dir->i_op->mkdir(dir, dentry, mode); - if (!error) { + if (!error) fsnotify_mkdir(dir, dentry->d_name.name); - security_inode_post_mkdir(dir,dentry, mode); - } return error; } @@ -1947,10 +1941,8 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i DQUOT_INIT(dir); error = dir->i_op->symlink(dir, dentry, oldname); - if (!error) { + if (!error) fsnotify_create(dir, dentry->d_name.name); - security_inode_post_symlink(dir, dentry, oldname); - } return error; } diff --git a/include/linux/security.h b/include/linux/security.h index d4f3b7a94ea6..875225bf8986 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -275,12 +275,6 @@ struct swap_info_struct; * @dentry contains the dentry structure for the file to be created. * @mode contains the file mode of the file to be created. * Return 0 if permission is granted. - * @inode_post_create: - * Set the security attributes on a newly created regular file. This hook - * is called after a file has been successfully created. - * @dir contains the inode structure of the parent directory of the new file. - * @dentry contains the the dentry structure for the newly created file. - * @mode contains the file mode. * @inode_link: * Check permission before creating a new hard link to a file. * @old_dentry contains the dentry structure for an existing link to the file. @@ -303,13 +297,6 @@ struct swap_info_struct; * @dentry contains the dentry structure of the symbolic link. * @old_name contains the pathname of file. * Return 0 if permission is granted. - * @inode_post_symlink: - * @dir contains the inode structure of the parent directory of the new link. - * @dentry contains the dentry structure of new symbolic link. - * @old_name contains the pathname of file. - * Set security attributes for a newly created symbolic link. Note that - * @dentry->d_inode may be NULL, since the filesystem might not - * instantiate the dentry (e.g. NFS). * @inode_mkdir: * Check permissions to create a new directory in the existing directory * associated with inode strcture @dir. @@ -317,11 +304,6 @@ struct swap_info_struct; * @dentry contains the dentry structure of new directory. * @mode contains the mode of new directory. * Return 0 if permission is granted. - * @inode_post_mkdir: - * Set security attributes on a newly created directory. - * @dir contains the inode structure of parent of the directory to be created. - * @dentry contains the dentry structure of new directory. - * @mode contains the mode of new directory. * @inode_rmdir: * Check the permission to remove a directory. * @dir contains the inode structure of parent of the directory to be removed. @@ -337,13 +319,6 @@ struct swap_info_struct; * @mode contains the mode of the new file. * @dev contains the the device number. * Return 0 if permission is granted. - * @inode_post_mknod: - * Set security attributes on a newly created special file (or socket or - * fifo file created via the mknod system call). - * @dir contains the inode structure of parent of the new node. - * @dentry contains the dentry structure of the new node. - * @mode contains the mode of the new node. - * @dev contains the the device number. * @inode_rename: * Check for permission to rename a file or directory. * @old_dir contains the inode structure for parent of the old link. @@ -1103,8 +1078,6 @@ struct security_operations { char **name, void **value, size_t *len); int (*inode_create) (struct inode *dir, struct dentry *dentry, int mode); - void (*inode_post_create) (struct inode *dir, - struct dentry *dentry, int mode); int (*inode_link) (struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); void (*inode_post_link) (struct dentry *old_dentry, @@ -1112,17 +1085,10 @@ struct security_operations { int (*inode_unlink) (struct inode *dir, struct dentry *dentry); int (*inode_symlink) (struct inode *dir, struct dentry *dentry, const char *old_name); - void (*inode_post_symlink) (struct inode *dir, - struct dentry *dentry, - const char *old_name); int (*inode_mkdir) (struct inode *dir, struct dentry *dentry, int mode); - void (*inode_post_mkdir) (struct inode *dir, struct dentry *dentry, - int mode); int (*inode_rmdir) (struct inode *dir, struct dentry *dentry); int (*inode_mknod) (struct inode *dir, struct dentry *dentry, int mode, dev_t dev); - void (*inode_post_mknod) (struct inode *dir, struct dentry *dentry, - int mode, dev_t dev); int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); void (*inode_post_rename) (struct inode *old_dir, @@ -1484,15 +1450,6 @@ static inline int security_inode_create (struct inode *dir, return security_ops->inode_create (dir, dentry, mode); } -static inline void security_inode_post_create (struct inode *dir, - struct dentry *dentry, - int mode) -{ - if (dentry->d_inode && unlikely (IS_PRIVATE (dentry->d_inode))) - return; - security_ops->inode_post_create (dir, dentry, mode); -} - static inline int security_inode_link (struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -1528,15 +1485,6 @@ static inline int security_inode_symlink (struct inode *dir, return security_ops->inode_symlink (dir, dentry, old_name); } -static inline void security_inode_post_symlink (struct inode *dir, - struct dentry *dentry, - const char *old_name) -{ - if (dentry->d_inode && unlikely (IS_PRIVATE (dentry->d_inode))) - return; - security_ops->inode_post_symlink (dir, dentry, old_name); -} - static inline int security_inode_mkdir (struct inode *dir, struct dentry *dentry, int mode) @@ -1546,15 +1494,6 @@ static inline int security_inode_mkdir (struct inode *dir, return security_ops->inode_mkdir (dir, dentry, mode); } -static inline void security_inode_post_mkdir (struct inode *dir, - struct dentry *dentry, - int mode) -{ - if (dentry->d_inode && unlikely (IS_PRIVATE (dentry->d_inode))) - return; - security_ops->inode_post_mkdir (dir, dentry, mode); -} - static inline int security_inode_rmdir (struct inode *dir, struct dentry *dentry) { @@ -1572,15 +1511,6 @@ static inline int security_inode_mknod (struct inode *dir, return security_ops->inode_mknod (dir, dentry, mode, dev); } -static inline void security_inode_post_mknod (struct inode *dir, - struct dentry *dentry, - int mode, dev_t dev) -{ - if (dentry->d_inode && unlikely (IS_PRIVATE (dentry->d_inode))) - return; - security_ops->inode_post_mknod (dir, dentry, mode, dev); -} - static inline int security_inode_rename (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -2220,11 +2150,6 @@ static inline int security_inode_create (struct inode *dir, return 0; } -static inline void security_inode_post_create (struct inode *dir, - struct dentry *dentry, - int mode) -{ } - static inline int security_inode_link (struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) @@ -2250,11 +2175,6 @@ static inline int security_inode_symlink (struct inode *dir, return 0; } -static inline void security_inode_post_symlink (struct inode *dir, - struct dentry *dentry, - const char *old_name) -{ } - static inline int security_inode_mkdir (struct inode *dir, struct dentry *dentry, int mode) @@ -2262,11 +2182,6 @@ static inline int security_inode_mkdir (struct inode *dir, return 0; } -static inline void security_inode_post_mkdir (struct inode *dir, - struct dentry *dentry, - int mode) -{ } - static inline int security_inode_rmdir (struct inode *dir, struct dentry *dentry) { @@ -2280,11 +2195,6 @@ static inline int security_inode_mknod (struct inode *dir, return 0; } -static inline void security_inode_post_mknod (struct inode *dir, - struct dentry *dentry, - int mode, dev_t dev) -{ } - static inline int security_inode_rename (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, diff --git a/security/dummy.c b/security/dummy.c index e8a00fa80469..5083314e14b1 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -270,12 +270,6 @@ static int dummy_inode_create (struct inode *inode, struct dentry *dentry, return 0; } -static void dummy_inode_post_create (struct inode *inode, struct dentry *dentry, - int mask) -{ - return; -} - static int dummy_inode_link (struct dentry *old_dentry, struct inode *inode, struct dentry *new_dentry) { @@ -300,24 +294,12 @@ static int dummy_inode_symlink (struct inode *inode, struct dentry *dentry, return 0; } -static void dummy_inode_post_symlink (struct inode *inode, - struct dentry *dentry, const char *name) -{ - return; -} - static int dummy_inode_mkdir (struct inode *inode, struct dentry *dentry, int mask) { return 0; } -static void dummy_inode_post_mkdir (struct inode *inode, struct dentry *dentry, - int mask) -{ - return; -} - static int dummy_inode_rmdir (struct inode *inode, struct dentry *dentry) { return 0; @@ -329,12 +311,6 @@ static int dummy_inode_mknod (struct inode *inode, struct dentry *dentry, return 0; } -static void dummy_inode_post_mknod (struct inode *inode, struct dentry *dentry, - int mode, dev_t dev) -{ - return; -} - static int dummy_inode_rename (struct inode *old_inode, struct dentry *old_dentry, struct inode *new_inode, @@ -894,17 +870,13 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, inode_free_security); set_to_dummy_if_null(ops, inode_init_security); set_to_dummy_if_null(ops, inode_create); - set_to_dummy_if_null(ops, inode_post_create); set_to_dummy_if_null(ops, inode_link); set_to_dummy_if_null(ops, inode_post_link); set_to_dummy_if_null(ops, inode_unlink); set_to_dummy_if_null(ops, inode_symlink); - set_to_dummy_if_null(ops, inode_post_symlink); set_to_dummy_if_null(ops, inode_mkdir); - set_to_dummy_if_null(ops, inode_post_mkdir); set_to_dummy_if_null(ops, inode_rmdir); set_to_dummy_if_null(ops, inode_mknod); - set_to_dummy_if_null(ops, inode_post_mknod); set_to_dummy_if_null(ops, inode_rename); set_to_dummy_if_null(ops, inode_post_rename); set_to_dummy_if_null(ops, inode_readlink); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 265f33d3af9b..c9c20828be79 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1265,91 +1265,6 @@ static int inode_security_set_sid(struct inode *inode, u32 sid) return 0; } -/* Set the security attributes on a newly created file. */ -static int post_create(struct inode *dir, - struct dentry *dentry) -{ - - struct task_security_struct *tsec; - struct inode *inode; - struct inode_security_struct *dsec; - struct superblock_security_struct *sbsec; - struct inode_security_struct *isec; - u32 newsid; - char *context; - unsigned int len; - int rc; - - tsec = current->security; - dsec = dir->i_security; - sbsec = dir->i_sb->s_security; - - inode = dentry->d_inode; - if (!inode) { - /* Some file system types (e.g. NFS) may not instantiate - a dentry for all create operations (e.g. symlink), - so we have to check to see if the inode is non-NULL. */ - printk(KERN_WARNING "post_create: no inode, dir (dev=%s, " - "ino=%ld)\n", dir->i_sb->s_id, dir->i_ino); - return 0; - } - - isec = inode->i_security; - - if (isec->security_attr_init) - return 0; - - if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) { - newsid = tsec->create_sid; - } else { - rc = security_transition_sid(tsec->sid, dsec->sid, - inode_mode_to_security_class(inode->i_mode), - &newsid); - if (rc) { - printk(KERN_WARNING "post_create: " - "security_transition_sid failed, rc=%d (dev=%s " - "ino=%ld)\n", - -rc, inode->i_sb->s_id, inode->i_ino); - return rc; - } - } - - rc = inode_security_set_sid(inode, newsid); - if (rc) { - printk(KERN_WARNING "post_create: inode_security_set_sid " - "failed, rc=%d (dev=%s ino=%ld)\n", - -rc, inode->i_sb->s_id, inode->i_ino); - return rc; - } - - if (sbsec->behavior == SECURITY_FS_USE_XATTR && - inode->i_op->setxattr) { - /* Use extended attributes. */ - rc = security_sid_to_context(newsid, &context, &len); - if (rc) { - printk(KERN_WARNING "post_create: sid_to_context " - "failed, rc=%d (dev=%s ino=%ld)\n", - -rc, inode->i_sb->s_id, inode->i_ino); - return rc; - } - down(&inode->i_sem); - rc = inode->i_op->setxattr(dentry, - XATTR_NAME_SELINUX, - context, len, 0); - up(&inode->i_sem); - kfree(context); - if (rc < 0) { - printk(KERN_WARNING "post_create: setxattr failed, " - "rc=%d (dev=%s ino=%ld)\n", - -rc, inode->i_sb->s_id, inode->i_ino); - return rc; - } - } - - return 0; -} - - /* Hook functions begin here. */ static int selinux_ptrace(struct task_struct *parent, struct task_struct *child) @@ -2076,8 +1991,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, *len = clen; } - isec->security_attr_init = 1; - return 0; } @@ -2086,11 +1999,6 @@ static int selinux_inode_create(struct inode *dir, struct dentry *dentry, int ma return may_create(dir, dentry, SECCLASS_FILE); } -static void selinux_inode_post_create(struct inode *dir, struct dentry *dentry, int mask) -{ - post_create(dir, dentry); -} - static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry) { int rc; @@ -2121,21 +2029,11 @@ static int selinux_inode_symlink(struct inode *dir, struct dentry *dentry, const return may_create(dir, dentry, SECCLASS_LNK_FILE); } -static void selinux_inode_post_symlink(struct inode *dir, struct dentry *dentry, const char *name) -{ - post_create(dir, dentry); -} - static int selinux_inode_mkdir(struct inode *dir, struct dentry *dentry, int mask) { return may_create(dir, dentry, SECCLASS_DIR); } -static void selinux_inode_post_mkdir(struct inode *dir, struct dentry *dentry, int mask) -{ - post_create(dir, dentry); -} - static int selinux_inode_rmdir(struct inode *dir, struct dentry *dentry) { return may_link(dir, dentry, MAY_RMDIR); @@ -2152,11 +2050,6 @@ static int selinux_inode_mknod(struct inode *dir, struct dentry *dentry, int mod return may_create(dir, dentry, inode_mode_to_security_class(mode)); } -static void selinux_inode_post_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) -{ - post_create(dir, dentry); -} - static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dentry, struct inode *new_inode, struct dentry *new_dentry) { @@ -4363,17 +4256,13 @@ static struct security_operations selinux_ops = { .inode_free_security = selinux_inode_free_security, .inode_init_security = selinux_inode_init_security, .inode_create = selinux_inode_create, - .inode_post_create = selinux_inode_post_create, .inode_link = selinux_inode_link, .inode_post_link = selinux_inode_post_link, .inode_unlink = selinux_inode_unlink, .inode_symlink = selinux_inode_symlink, - .inode_post_symlink = selinux_inode_post_symlink, .inode_mkdir = selinux_inode_mkdir, - .inode_post_mkdir = selinux_inode_post_mkdir, .inode_rmdir = selinux_inode_rmdir, .inode_mknod = selinux_inode_mknod, - .inode_post_mknod = selinux_inode_post_mknod, .inode_rename = selinux_inode_rename, .inode_post_rename = selinux_inode_post_rename, .inode_readlink = selinux_inode_readlink, diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index c515bc0b58a1..887937c8134a 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -46,7 +46,6 @@ struct inode_security_struct { unsigned char initialized; /* initialization flag */ struct semaphore sem; unsigned char inherit; /* inherit SID from parent entry */ - unsigned char security_attr_init; /* security attributes init flag */ }; struct file_security_struct { -- cgit v1.2.3 From e31e14ec356f36b131576be5bc31d8fef7e95483 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 9 Sep 2005 13:01:45 -0700 Subject: [PATCH] remove the inode_post_link and inode_post_rename LSM hooks This patch removes the inode_post_link and inode_post_rename LSM hooks as they are unused (and likely useless). Signed-off-by: Stephen Smalley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/namei.c | 10 ++-------- include/linux/security.h | 49 ------------------------------------------------ security/dummy.c | 17 ----------------- security/selinux/hooks.c | 13 ------------- 4 files changed, 2 insertions(+), 87 deletions(-) (limited to 'include/linux') diff --git a/fs/namei.c b/fs/namei.c index 993a65a7d570..21d85f1ac839 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2012,10 +2012,8 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de DQUOT_INIT(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); up(&old_dentry->d_inode->i_sem); - if (!error) { + if (!error) fsnotify_create(dir, new_dentry->d_name.name); - security_inode_post_link(old_dentry, dir, new_dentry); - } return error; } @@ -2134,11 +2132,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, d_rehash(new_dentry); dput(new_dentry); } - if (!error) { + if (!error) d_move(old_dentry,new_dentry); - security_inode_post_rename(old_dir, old_dentry, - new_dir, new_dentry); - } return error; } @@ -2164,7 +2159,6 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, /* The following d_move() should become unconditional */ if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) d_move(old_dentry, new_dentry); - security_inode_post_rename(old_dir, old_dentry, new_dir, new_dentry); } if (target) up(&target->i_sem); diff --git a/include/linux/security.h b/include/linux/security.h index 875225bf8986..55b02e1c73f4 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -281,11 +281,6 @@ struct swap_info_struct; * @dir contains the inode structure of the parent directory of the new link. * @new_dentry contains the dentry structure for the new link. * Return 0 if permission is granted. - * @inode_post_link: - * Set security attributes for a new hard link to a file. - * @old_dentry contains the dentry structure for the existing link. - * @dir contains the inode structure of the parent directory of the new file. - * @new_dentry contains the dentry structure for the new file link. * @inode_unlink: * Check the permission to remove a hard link to a file. * @dir contains the inode structure of parent directory of the file. @@ -326,12 +321,6 @@ struct swap_info_struct; * @new_dir contains the inode structure for parent of the new link. * @new_dentry contains the dentry structure of the new link. * Return 0 if permission is granted. - * @inode_post_rename: - * Set security attributes on a renamed file or directory. - * @old_dir contains the inode structure for parent of the old link. - * @old_dentry contains the dentry structure of the old link. - * @new_dir contains the inode structure for parent of the new link. - * @new_dentry contains the dentry structure of the new link. * @inode_readlink: * Check the permission to read the symbolic link. * @dentry contains the dentry structure for the file link. @@ -1080,8 +1069,6 @@ struct security_operations { struct dentry *dentry, int mode); int (*inode_link) (struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry); - void (*inode_post_link) (struct dentry *old_dentry, - struct inode *dir, struct dentry *new_dentry); int (*inode_unlink) (struct inode *dir, struct dentry *dentry); int (*inode_symlink) (struct inode *dir, struct dentry *dentry, const char *old_name); @@ -1091,10 +1078,6 @@ struct security_operations { int mode, dev_t dev); int (*inode_rename) (struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); - void (*inode_post_rename) (struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry); int (*inode_readlink) (struct dentry *dentry); int (*inode_follow_link) (struct dentry *dentry, struct nameidata *nd); int (*inode_permission) (struct inode *inode, int mask, struct nameidata *nd); @@ -1459,15 +1442,6 @@ static inline int security_inode_link (struct dentry *old_dentry, return security_ops->inode_link (old_dentry, dir, new_dentry); } -static inline void security_inode_post_link (struct dentry *old_dentry, - struct inode *dir, - struct dentry *new_dentry) -{ - if (new_dentry->d_inode && unlikely (IS_PRIVATE (new_dentry->d_inode))) - return; - security_ops->inode_post_link (old_dentry, dir, new_dentry); -} - static inline int security_inode_unlink (struct inode *dir, struct dentry *dentry) { @@ -1523,18 +1497,6 @@ static inline int security_inode_rename (struct inode *old_dir, new_dir, new_dentry); } -static inline void security_inode_post_rename (struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry) -{ - if (unlikely (IS_PRIVATE (old_dentry->d_inode) || - (new_dentry->d_inode && IS_PRIVATE (new_dentry->d_inode)))) - return; - security_ops->inode_post_rename (old_dir, old_dentry, - new_dir, new_dentry); -} - static inline int security_inode_readlink (struct dentry *dentry) { if (unlikely (IS_PRIVATE (dentry->d_inode))) @@ -2157,11 +2119,6 @@ static inline int security_inode_link (struct dentry *old_dentry, return 0; } -static inline void security_inode_post_link (struct dentry *old_dentry, - struct inode *dir, - struct dentry *new_dentry) -{ } - static inline int security_inode_unlink (struct inode *dir, struct dentry *dentry) { @@ -2203,12 +2160,6 @@ static inline int security_inode_rename (struct inode *old_dir, return 0; } -static inline void security_inode_post_rename (struct inode *old_dir, - struct dentry *old_dentry, - struct inode *new_dir, - struct dentry *new_dentry) -{ } - static inline int security_inode_readlink (struct dentry *dentry) { return 0; diff --git a/security/dummy.c b/security/dummy.c index 5083314e14b1..9623a61dfc76 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -276,13 +276,6 @@ static int dummy_inode_link (struct dentry *old_dentry, struct inode *inode, return 0; } -static void dummy_inode_post_link (struct dentry *old_dentry, - struct inode *inode, - struct dentry *new_dentry) -{ - return; -} - static int dummy_inode_unlink (struct inode *inode, struct dentry *dentry) { return 0; @@ -319,14 +312,6 @@ static int dummy_inode_rename (struct inode *old_inode, return 0; } -static void dummy_inode_post_rename (struct inode *old_inode, - struct dentry *old_dentry, - struct inode *new_inode, - struct dentry *new_dentry) -{ - return; -} - static int dummy_inode_readlink (struct dentry *dentry) { return 0; @@ -871,14 +856,12 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, inode_init_security); set_to_dummy_if_null(ops, inode_create); set_to_dummy_if_null(ops, inode_link); - set_to_dummy_if_null(ops, inode_post_link); set_to_dummy_if_null(ops, inode_unlink); set_to_dummy_if_null(ops, inode_symlink); set_to_dummy_if_null(ops, inode_mkdir); set_to_dummy_if_null(ops, inode_rmdir); set_to_dummy_if_null(ops, inode_mknod); set_to_dummy_if_null(ops, inode_rename); - set_to_dummy_if_null(ops, inode_post_rename); set_to_dummy_if_null(ops, inode_readlink); set_to_dummy_if_null(ops, inode_follow_link); set_to_dummy_if_null(ops, inode_permission); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index c9c20828be79..3f0b533be92c 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2009,11 +2009,6 @@ static int selinux_inode_link(struct dentry *old_dentry, struct inode *dir, stru return may_link(dir, old_dentry, MAY_LINK); } -static void selinux_inode_post_link(struct dentry *old_dentry, struct inode *inode, struct dentry *new_dentry) -{ - return; -} - static int selinux_inode_unlink(struct inode *dir, struct dentry *dentry) { int rc; @@ -2056,12 +2051,6 @@ static int selinux_inode_rename(struct inode *old_inode, struct dentry *old_dent return may_rename(old_inode, old_dentry, new_inode, new_dentry); } -static void selinux_inode_post_rename(struct inode *old_inode, struct dentry *old_dentry, - struct inode *new_inode, struct dentry *new_dentry) -{ - return; -} - static int selinux_inode_readlink(struct dentry *dentry) { return dentry_has_perm(current, NULL, dentry, FILE__READ); @@ -4257,14 +4246,12 @@ static struct security_operations selinux_ops = { .inode_init_security = selinux_inode_init_security, .inode_create = selinux_inode_create, .inode_link = selinux_inode_link, - .inode_post_link = selinux_inode_post_link, .inode_unlink = selinux_inode_unlink, .inode_symlink = selinux_inode_symlink, .inode_mkdir = selinux_inode_mkdir, .inode_rmdir = selinux_inode_rmdir, .inode_mknod = selinux_inode_mknod, .inode_rename = selinux_inode_rename, - .inode_post_rename = selinux_inode_post_rename, .inode_readlink = selinux_inode_readlink, .inode_follow_link = selinux_inode_follow_link, .inode_permission = selinux_inode_permission, -- cgit v1.2.3 From 383f2835eb9afb723af71850037b2f074ac9db60 Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Fri, 9 Sep 2005 13:02:02 -0700 Subject: [PATCH] Prefetch kernel stacks to speed up context switch For architecture like ia64, the switch stack structure is fairly large (currently 528 bytes). For context switch intensive application, we found that significant amount of cache misses occurs in switch_to() function. The following patch adds a hook in the schedule() function to prefetch switch stack structure as soon as 'next' task is determined. This allows maximum overlap in prefetch cache lines for that structure. Signed-off-by: Ken Chen Cc: Ingo Molnar Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/entry.S | 23 +++++++++++++++++++++++ include/asm-ia64/system.h | 1 + include/linux/sched.h | 5 +++++ kernel/sched.c | 1 + 4 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 3c8821024509..915e12791836 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -470,6 +470,29 @@ ENTRY(load_switch_stack) br.cond.sptk.many b7 END(load_switch_stack) +GLOBAL_ENTRY(prefetch_stack) + add r14 = -IA64_SWITCH_STACK_SIZE, sp + add r15 = IA64_TASK_THREAD_KSP_OFFSET, in0 + ;; + ld8 r16 = [r15] // load next's stack pointer + lfetch.fault.excl [r14], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault.excl [r14], 128 + lfetch.fault [r16], 128 + ;; + lfetch.fault [r16], 128 + br.ret.sptk.many rp +END(prefetch_switch_stack) + GLOBAL_ENTRY(execve) mov r15=__NR_execve // put syscall number in place break __BREAK_SYSCALL diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h index 33256db4a7cf..635235fa1e32 100644 --- a/include/asm-ia64/system.h +++ b/include/asm-ia64/system.h @@ -275,6 +275,7 @@ extern void ia64_load_extra (struct task_struct *task); */ #define __ARCH_WANT_UNLOCKED_CTXSW +#define ARCH_HAS_PREFETCH_SWITCH_STACK #define ia64_platform_is(x) (strcmp(x, platform_name) == 0) void cpu_idle_wait(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index ea1b5f32ec5c..c551e6a1447e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -604,6 +604,11 @@ extern int groups_search(struct group_info *group_info, gid_t grp); #define GROUP_AT(gi, i) \ ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) +#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK +extern void prefetch_stack(struct task_struct*); +#else +static inline void prefetch_stack(struct task_struct *t) { } +#endif struct audit_context; /* See audit.c */ struct mempolicy; diff --git a/kernel/sched.c b/kernel/sched.c index 18b95520a2e2..2632b812cf24 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2888,6 +2888,7 @@ switch_tasks: if (next == rq->idle) schedstat_inc(rq, sched_goidle); prefetch(next); + prefetch_stack(next); clear_tsk_need_resched(prev); rcu_qsctr_inc(task_cpu(prev)); -- cgit v1.2.3 From fac92becdaecff64dd91daab0292c5131de92f0d Mon Sep 17 00:00:00 2001 From: Andrew Stribblehill Date: Fri, 9 Sep 2005 13:02:04 -0700 Subject: [PATCH] bfs: fix endianness, signedness; add trivial bugfix * Makes BFS code endianness-clean. * Fixes some signedness warnings. * Fixes a problem in fs/bfs/inode.c:164 where inodes not synced to disk don't get fully marked as clean. Here's how to reproduce it: # mount -o loop -t bfs /bfs.img /mnt # df -i /mnt Filesystem Inodes IUsed IFree IUse% Mounted on /bfs.img 48 1 47 3% /mnt # df -k /mnt Filesystem 1K-blocks Used Available Use% Mounted on /bfs.img 512 5 508 1% /mnt # cp 60k-archive.zip /mnt/mt.zip # df -k /mnt Filesystem 1K-blocks Used Available Use% Mounted on /bfs.img 512 65 447 13% /mnt # df -i /mnt Filesystem Inodes IUsed IFree IUse% Mounted on /bfs.img 48 2 46 5% /mnt # rm /mnt/mt.zip # echo $? 0 [If the unlink happens before the buffers flush, the following happens:] # df -i /mnt Filesystem Inodes IUsed IFree IUse% Mounted on /bfs.img 48 2 46 5% /mnt # df -k /mnt Filesystem 1K-blocks Used Available Use% Mounted on /bfs.img 512 65 447 13% /mnt fs/bfs/bfs.h | 1 Signed-off-by: Andrew Stribblehill Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/bfs/bfs.h | 1 - fs/bfs/dir.c | 25 ++++++------ fs/bfs/file.c | 23 ++++++----- fs/bfs/inode.c | 102 ++++++++++++++++++++++++++----------------------- include/linux/bfs_fs.h | 23 +++++------ 5 files changed, 93 insertions(+), 81 deletions(-) (limited to 'include/linux') diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 1020dbc88bec..1fbc53f14aba 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -20,7 +20,6 @@ struct bfs_sb_info { unsigned long si_lasti; unsigned long * si_imap; struct buffer_head * si_sbh; /* buffer header w/superblock */ - struct bfs_super_block * si_bfs_sb; /* superblock in si_sbh->b_data */ }; /* diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 5a1e5ce057ff..e240c335eb23 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -2,6 +2,7 @@ * fs/bfs/dir.c * BFS directory operations. * Copyright (C) 1999,2000 Tigran Aivazian + * Made endianness-clean by Andrew Stribblehill 2005 */ #include @@ -20,9 +21,9 @@ #define dprintf(x...) #endif -static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino); +static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino); static struct buffer_head * bfs_find_entry(struct inode * dir, - const char * name, int namelen, struct bfs_dirent ** res_dir); + const unsigned char * name, int namelen, struct bfs_dirent ** res_dir); static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) { @@ -53,7 +54,7 @@ static int bfs_readdir(struct file * f, void * dirent, filldir_t filldir) de = (struct bfs_dirent *)(bh->b_data + offset); if (de->ino) { int size = strnlen(de->name, BFS_NAMELEN); - if (filldir(dirent, de->name, size, f->f_pos, de->ino, DT_UNKNOWN) < 0) { + if (filldir(dirent, de->name, size, f->f_pos, le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { brelse(bh); unlock_kernel(); return 0; @@ -107,7 +108,7 @@ static int bfs_create(struct inode * dir, struct dentry * dentry, int mode, inode->i_mapping->a_ops = &bfs_aops; inode->i_mode = mode; inode->i_ino = ino; - BFS_I(inode)->i_dsk_ino = ino; + BFS_I(inode)->i_dsk_ino = cpu_to_le16(ino); BFS_I(inode)->i_sblock = 0; BFS_I(inode)->i_eblock = 0; insert_inode_hash(inode); @@ -139,7 +140,7 @@ static struct dentry * bfs_lookup(struct inode * dir, struct dentry * dentry, st lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); if (bh) { - unsigned long ino = le32_to_cpu(de->ino); + unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); inode = iget(dir->i_sb, ino); if (!inode) { @@ -183,7 +184,7 @@ static int bfs_unlink(struct inode * dir, struct dentry * dentry) inode = dentry->d_inode; lock_kernel(); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); - if (!bh || de->ino != inode->i_ino) + if (!bh || le16_to_cpu(de->ino) != inode->i_ino) goto out_brelse; if (!inode->i_nlink) { @@ -224,7 +225,7 @@ static int bfs_rename(struct inode * old_dir, struct dentry * old_dentry, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); - if (!old_bh || old_de->ino != old_inode->i_ino) + if (!old_bh || le16_to_cpu(old_de->ino) != old_inode->i_ino) goto end_rename; error = -EPERM; @@ -270,7 +271,7 @@ struct inode_operations bfs_dir_inops = { .rename = bfs_rename, }; -static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int ino) +static int bfs_add_entry(struct inode * dir, const unsigned char * name, int namelen, int ino) { struct buffer_head * bh; struct bfs_dirent * de; @@ -304,7 +305,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int } dir->i_mtime = CURRENT_TIME_SEC; mark_inode_dirty(dir); - de->ino = ino; + de->ino = cpu_to_le16((u16)ino); for (i=0; iname[i] = (i < namelen) ? name[i] : 0; mark_buffer_dirty(bh); @@ -317,7 +318,7 @@ static int bfs_add_entry(struct inode * dir, const char * name, int namelen, int return -ENOSPC; } -static inline int bfs_namecmp(int len, const char * name, const char * buffer) +static inline int bfs_namecmp(int len, const unsigned char * name, const char * buffer) { if (len < BFS_NAMELEN && buffer[len]) return 0; @@ -325,7 +326,7 @@ static inline int bfs_namecmp(int len, const char * name, const char * buffer) } static struct buffer_head * bfs_find_entry(struct inode * dir, - const char * name, int namelen, struct bfs_dirent ** res_dir) + const unsigned char * name, int namelen, struct bfs_dirent ** res_dir) { unsigned long block, offset; struct buffer_head * bh; @@ -346,7 +347,7 @@ static struct buffer_head * bfs_find_entry(struct inode * dir, } de = (struct bfs_dirent *)(bh->b_data + offset); offset += BFS_DIRENT_SIZE; - if (de->ino && bfs_namecmp(namelen, name, de->name)) { + if (le16_to_cpu(de->ino) && bfs_namecmp(namelen, name, de->name)) { *res_dir = de; return bh; } diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 747fd1ea55e0..807723b65daf 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -40,8 +40,8 @@ static int bfs_move_block(unsigned long from, unsigned long to, struct super_blo return 0; } -static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned long end, - unsigned long where) +static int bfs_move_blocks(struct super_block *sb, unsigned long start, + unsigned long end, unsigned long where) { unsigned long i; @@ -57,20 +57,21 @@ static int bfs_move_blocks(struct super_block *sb, unsigned long start, unsigned static int bfs_get_block(struct inode * inode, sector_t block, struct buffer_head * bh_result, int create) { - long phys; + unsigned long phys; int err; struct super_block *sb = inode->i_sb; struct bfs_sb_info *info = BFS_SB(sb); struct bfs_inode_info *bi = BFS_I(inode); struct buffer_head *sbh = info->si_sbh; - if (block < 0 || block > info->si_blocks) + if (block > info->si_blocks) return -EIO; phys = bi->i_sblock + block; if (!create) { if (phys <= bi->i_eblock) { - dprintf("c=%d, b=%08lx, phys=%08lx (granted)\n", create, block, phys); + dprintf("c=%d, b=%08lx, phys=%09lx (granted)\n", + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); } return 0; @@ -80,7 +81,7 @@ static int bfs_get_block(struct inode * inode, sector_t block, of blocks allocated for this file, we can grant it */ if (inode->i_size && phys <= bi->i_eblock) { dprintf("c=%d, b=%08lx, phys=%08lx (interim block granted)\n", - create, block, phys); + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); return 0; } @@ -88,11 +89,12 @@ static int bfs_get_block(struct inode * inode, sector_t block, /* the rest has to be protected against itself */ lock_kernel(); - /* if the last data block for this file is the last allocated block, we can - extend the file trivially, without moving it anywhere */ + /* if the last data block for this file is the last allocated + block, we can extend the file trivially, without moving it + anywhere */ if (bi->i_eblock == info->si_lf_eblk) { dprintf("c=%d, b=%08lx, phys=%08lx (simple extension)\n", - create, block, phys); + create, (unsigned long)block, phys); map_bh(bh_result, sb, phys); info->si_freeb -= phys - bi->i_eblock; info->si_lf_eblk = bi->i_eblock = phys; @@ -114,7 +116,8 @@ static int bfs_get_block(struct inode * inode, sector_t block, } else err = 0; - dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", create, block, phys); + dprintf("c=%d, b=%08lx, phys=%08lx (moved)\n", + create, (unsigned long)block, phys); bi->i_sblock = phys; phys += block; info->si_lf_eblk = bi->i_eblock = phys; diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 628c2c1a7d7e..c7b39aa279d7 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -3,6 +3,8 @@ * BFS superblock and inode operations. * Copyright (C) 1999,2000 Tigran Aivazian * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. + * + * Made endianness-clean by Andrew Stribblehill , 2005. */ #include @@ -54,46 +56,50 @@ static void bfs_read_inode(struct inode * inode) off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - inode->i_mode = 0x0000FFFF & di->i_mode; - if (di->i_vtype == BFS_VDIR) { + inode->i_mode = 0x0000FFFF & le32_to_cpu(di->i_mode); + if (le32_to_cpu(di->i_vtype) == BFS_VDIR) { inode->i_mode |= S_IFDIR; inode->i_op = &bfs_dir_inops; inode->i_fop = &bfs_dir_operations; - } else if (di->i_vtype == BFS_VREG) { + } else if (le32_to_cpu(di->i_vtype) == BFS_VREG) { inode->i_mode |= S_IFREG; inode->i_op = &bfs_file_inops; inode->i_fop = &bfs_file_operations; inode->i_mapping->a_ops = &bfs_aops; } - inode->i_uid = di->i_uid; - inode->i_gid = di->i_gid; - inode->i_nlink = di->i_nlink; + BFS_I(inode)->i_sblock = le32_to_cpu(di->i_sblock); + BFS_I(inode)->i_eblock = le32_to_cpu(di->i_eblock); + inode->i_uid = le32_to_cpu(di->i_uid); + inode->i_gid = le32_to_cpu(di->i_gid); + inode->i_nlink = le32_to_cpu(di->i_nlink); inode->i_size = BFS_FILESIZE(di); inode->i_blocks = BFS_FILEBLOCKS(di); + if (inode->i_size || inode->i_blocks) dprintf("Registered inode with %lld size, %ld blocks\n", inode->i_size, inode->i_blocks); inode->i_blksize = PAGE_SIZE; - inode->i_atime.tv_sec = di->i_atime; - inode->i_mtime.tv_sec = di->i_mtime; - inode->i_ctime.tv_sec = di->i_ctime; + inode->i_atime.tv_sec = le32_to_cpu(di->i_atime); + inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime); + inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime); inode->i_atime.tv_nsec = 0; inode->i_mtime.tv_nsec = 0; inode->i_ctime.tv_nsec = 0; - BFS_I(inode)->i_dsk_ino = di->i_ino; /* can be 0 so we store a copy */ - BFS_I(inode)->i_sblock = di->i_sblock; - BFS_I(inode)->i_eblock = di->i_eblock; + BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino); /* can be 0 so we store a copy */ brelse(bh); } static int bfs_write_inode(struct inode * inode, int unused) { - unsigned long ino = inode->i_ino; + unsigned int ino = (u16)inode->i_ino; + unsigned long i_sblock; struct bfs_inode * di; struct buffer_head * bh; int block, off; + dprintf("ino=%08x\n", ino); + if (ino < BFS_ROOT_INO || ino > BFS_SB(inode->i_sb)->si_lasti) { - printf("Bad inode number %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Bad inode number %s:%08x\n", inode->i_sb->s_id, ino); return -EIO; } @@ -101,7 +107,7 @@ static int bfs_write_inode(struct inode * inode, int unused) block = (ino - BFS_ROOT_INO)/BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { - printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); + printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); unlock_kernel(); return -EIO; } @@ -109,24 +115,26 @@ static int bfs_write_inode(struct inode * inode, int unused) off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; di = (struct bfs_inode *)bh->b_data + off; - if (inode->i_ino == BFS_ROOT_INO) - di->i_vtype = BFS_VDIR; + if (ino == BFS_ROOT_INO) + di->i_vtype = cpu_to_le32(BFS_VDIR); else - di->i_vtype = BFS_VREG; - - di->i_ino = inode->i_ino; - di->i_mode = inode->i_mode; - di->i_uid = inode->i_uid; - di->i_gid = inode->i_gid; - di->i_nlink = inode->i_nlink; - di->i_atime = inode->i_atime.tv_sec; - di->i_mtime = inode->i_mtime.tv_sec; - di->i_ctime = inode->i_ctime.tv_sec; - di->i_sblock = BFS_I(inode)->i_sblock; - di->i_eblock = BFS_I(inode)->i_eblock; - di->i_eoffset = di->i_sblock * BFS_BSIZE + inode->i_size - 1; + di->i_vtype = cpu_to_le32(BFS_VREG); + + di->i_ino = cpu_to_le16(ino); + di->i_mode = cpu_to_le32(inode->i_mode); + di->i_uid = cpu_to_le32(inode->i_uid); + di->i_gid = cpu_to_le32(inode->i_gid); + di->i_nlink = cpu_to_le32(inode->i_nlink); + di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); + di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); + di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); + i_sblock = BFS_I(inode)->i_sblock; + di->i_sblock = cpu_to_le32(i_sblock); + di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); + di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); mark_buffer_dirty(bh); + dprintf("Written ino=%d into %d:%d\n",le16_to_cpu(di->i_ino),block,off); brelse(bh); unlock_kernel(); return 0; @@ -140,13 +148,14 @@ static void bfs_delete_inode(struct inode * inode) int block, off; struct super_block * s = inode->i_sb; struct bfs_sb_info * info = BFS_SB(s); + struct bfs_inode_info * bi = BFS_I(inode); - dprintf("ino=%08lx\n", inode->i_ino); + dprintf("ino=%08lx\n", ino); truncate_inode_pages(&inode->i_data, 0); - if (inode->i_ino < BFS_ROOT_INO || inode->i_ino > info->si_lasti) { - printf("invalid ino=%08lx\n", inode->i_ino); + if (ino < BFS_ROOT_INO || ino > info->si_lasti) { + printf("invalid ino=%08lx\n", ino); return; } @@ -162,13 +171,13 @@ static void bfs_delete_inode(struct inode * inode) return; } off = (ino - BFS_ROOT_INO)%BFS_INODES_PER_BLOCK; - di = (struct bfs_inode *)bh->b_data + off; - if (di->i_ino) { - info->si_freeb += BFS_FILEBLOCKS(di); + di = (struct bfs_inode *) bh->b_data + off; + if (bi->i_dsk_ino) { + info->si_freeb += 1 + bi->i_eblock - bi->i_sblock; info->si_freei++; - clear_bit(di->i_ino, info->si_imap); + clear_bit(ino, info->si_imap); dump_imap("delete_inode", s); - } + } di->i_ino = 0; di->i_sblock = 0; mark_buffer_dirty(bh); @@ -274,14 +283,14 @@ static struct super_operations bfs_sops = { void dump_imap(const char *prefix, struct super_block * s) { -#if 0 +#ifdef DEBUG int i; char *tmpbuf = (char *)get_zeroed_page(GFP_KERNEL); if (!tmpbuf) return; for (i=BFS_SB(s)->si_lasti; i>=0; i--) { - if (i>PAGE_SIZE-100) break; + if (i > PAGE_SIZE-100) break; if (test_bit(i, BFS_SB(s)->si_imap)) strcat(tmpbuf, "1"); else @@ -297,7 +306,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head * bh; struct bfs_super_block * bfs_sb; struct inode * inode; - int i, imap_len; + unsigned i, imap_len; struct bfs_sb_info * info; info = kmalloc(sizeof(*info), GFP_KERNEL); @@ -312,19 +321,18 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) if(!bh) goto out; bfs_sb = (struct bfs_super_block *)bh->b_data; - if (bfs_sb->s_magic != BFS_MAGIC) { + if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) printf("No BFS filesystem on %s (magic=%08x)\n", - s->s_id, bfs_sb->s_magic); + s->s_id, le32_to_cpu(bfs_sb->s_magic)); goto out; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) printf("%s is unclean, continuing\n", s->s_id); s->s_magic = BFS_MAGIC; - info->si_bfs_sb = bfs_sb; info->si_sbh = bh; - info->si_lasti = (bfs_sb->s_start - BFS_BSIZE)/sizeof(struct bfs_inode) + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE)/sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; imap_len = info->si_lasti/8 + 1; @@ -348,8 +356,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) goto out; } - info->si_blocks = (bfs_sb->s_end + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ - info->si_freeb = (bfs_sb->s_end + 1 - bfs_sb->s_start)>>BFS_BSIZE_BITS; + info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1)>>BFS_BSIZE_BITS; /* for statfs(2) */ + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - cpu_to_le32(bfs_sb->s_start))>>BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; info->si_lf_sblk = 0; diff --git a/include/linux/bfs_fs.h b/include/linux/bfs_fs.h index f7f0913cd110..c1237aa92e38 100644 --- a/include/linux/bfs_fs.h +++ b/include/linux/bfs_fs.h @@ -14,8 +14,9 @@ #define BFS_INODES_PER_BLOCK 8 /* SVR4 vnode type values (bfs_inode->i_vtype) */ -#define BFS_VDIR 2 -#define BFS_VREG 1 +#define BFS_VDIR 2L +#define BFS_VREG 1L + /* BFS inode layout on disk */ struct bfs_inode { @@ -58,22 +59,22 @@ struct bfs_super_block { __u32 s_padding[118]; }; -#define BFS_NZFILESIZE(ip) \ - (((ip)->i_eoffset + 1) - (ip)->i_sblock * BFS_BSIZE) - -#define BFS_FILESIZE(ip) \ - ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip)) - -#define BFS_FILEBLOCKS(ip) \ - ((ip)->i_sblock == 0 ? 0 : ((ip)->i_eblock + 1) - (ip)->i_sblock) #define BFS_OFF2INO(offset) \ ((((offset) - BFS_BSIZE) / sizeof(struct bfs_inode)) + BFS_ROOT_INO) #define BFS_INO2OFF(ino) \ ((__u32)(((ino) - BFS_ROOT_INO) * sizeof(struct bfs_inode)) + BFS_BSIZE) +#define BFS_NZFILESIZE(ip) \ + ((cpu_to_le32((ip)->i_eoffset) + 1) - cpu_to_le32((ip)->i_sblock) * BFS_BSIZE) + +#define BFS_FILESIZE(ip) \ + ((ip)->i_sblock == 0 ? 0 : BFS_NZFILESIZE(ip)) +#define BFS_FILEBLOCKS(ip) \ + ((ip)->i_sblock == 0 ? 0 : (cpu_to_le32((ip)->i_eblock) + 1) - cpu_to_le32((ip)->i_sblock)) #define BFS_UNCLEAN(bfs_sb, sb) \ - ((bfs_sb->s_from != -1) && (bfs_sb->s_to != -1) && !(sb->s_flags & MS_RDONLY)) + ((cpu_to_le32(bfs_sb->s_from) != -1) && (cpu_to_le32(bfs_sb->s_to) != -1) && !(sb->s_flags & MS_RDONLY)) + #endif /* _LINUX_BFS_FS_H */ -- cgit v1.2.3 From b3743fa4442fc172e950ff0eaf6aa96e7d5ce9be Mon Sep 17 00:00:00 2001 From: Dominik Brodowski Date: Fri, 9 Sep 2005 13:03:23 -0700 Subject: [PATCH] yenta: share code with PCI core Share code between setup-bus.c and yenta_socket.c: use the write-out code of resources to the bridge also in yenta_socket.c, as it provides useful debug output. In addition, it fixes the bug that the CPU-centric resource view might need to be transferred to the PCI-centric view: setup-bus.c does that, while yenta-socket.c did not. Signed-off-by: Dominik Brodowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/pci/setup-bus.c | 4 ++-- drivers/pcmcia/yenta_socket.c | 41 ++++++++++++++++++----------------------- include/linux/pci.h | 1 + 3 files changed, 21 insertions(+), 25 deletions(-) (limited to 'include/linux') diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 6b0e6464eb39..657be948baf7 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -77,8 +77,7 @@ pbus_assign_resources_sorted(struct pci_bus *bus) } } -static void __devinit -pci_setup_cardbus(struct pci_bus *bus) +void pci_setup_cardbus(struct pci_bus *bus) { struct pci_dev *bridge = bus->self; struct pci_bus_region region; @@ -130,6 +129,7 @@ pci_setup_cardbus(struct pci_bus *bus) region.end); } } +EXPORT_SYMBOL(pci_setup_cardbus); /* Initialize bridges with base/limit values we have collected. PCI-to-PCI Bridge Architecture Specification rev. 1.1 (1998) diff --git a/drivers/pcmcia/yenta_socket.c b/drivers/pcmcia/yenta_socket.c index 0347a29f297b..271a52b7c2e6 100644 --- a/drivers/pcmcia/yenta_socket.c +++ b/drivers/pcmcia/yenta_socket.c @@ -667,7 +667,7 @@ static int yenta_search_res(struct yenta_socket *socket, struct resource *res, return 0; } -static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned type, int addr_start, int addr_end) +static int yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned type, int addr_start, int addr_end) { struct resource *root, *res; struct pci_bus_region region; @@ -676,7 +676,7 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ res = socket->dev->resource + PCI_BRIDGE_RESOURCES + nr; /* Already allocated? */ if (res->parent) - return; + return 0; /* The granularity of the memory limit is 4kB, on IO it's 4 bytes */ mask = ~0xfff; @@ -692,7 +692,7 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ pcibios_bus_to_resource(socket->dev, res, ®ion); root = pci_find_parent_resource(socket->dev, res); if (root && (request_resource(root, res) == 0)) - return; + return 0; printk(KERN_INFO "yenta %s: Preassigned resource %d busy or not available, reconfiguring...\n", pci_name(socket->dev), nr); } @@ -700,35 +700,27 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ if (type & IORESOURCE_IO) { if ((yenta_search_res(socket, res, BRIDGE_IO_MAX)) || (yenta_search_res(socket, res, BRIDGE_IO_ACC)) || - (yenta_search_res(socket, res, BRIDGE_IO_MIN))) { - config_writel(socket, addr_start, res->start); - config_writel(socket, addr_end, res->end); - return; - } + (yenta_search_res(socket, res, BRIDGE_IO_MIN))) + return 1; } else { if (type & IORESOURCE_PREFETCH) { if ((yenta_search_res(socket, res, BRIDGE_MEM_MAX)) || (yenta_search_res(socket, res, BRIDGE_MEM_ACC)) || - (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) { - config_writel(socket, addr_start, res->start); - config_writel(socket, addr_end, res->end); - return; - } + (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) + return 1; /* Approximating prefetchable by non-prefetchable */ res->flags = IORESOURCE_MEM; } if ((yenta_search_res(socket, res, BRIDGE_MEM_MAX)) || (yenta_search_res(socket, res, BRIDGE_MEM_ACC)) || - (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) { - config_writel(socket, addr_start, res->start); - config_writel(socket, addr_end, res->end); - return; - } + (yenta_search_res(socket, res, BRIDGE_MEM_MIN))) + return 1; } printk(KERN_INFO "yenta %s: no resource of type %x available, trying to continue...\n", pci_name(socket->dev), type); res->start = res->end = res->flags = 0; + return 0; } /* @@ -736,14 +728,17 @@ static void yenta_allocate_res(struct yenta_socket *socket, int nr, unsigned typ */ static void yenta_allocate_resources(struct yenta_socket *socket) { - yenta_allocate_res(socket, 0, IORESOURCE_IO, + int program = 0; + program += yenta_allocate_res(socket, 0, IORESOURCE_IO, PCI_CB_IO_BASE_0, PCI_CB_IO_LIMIT_0); - yenta_allocate_res(socket, 1, IORESOURCE_IO, + program += yenta_allocate_res(socket, 1, IORESOURCE_IO, PCI_CB_IO_BASE_1, PCI_CB_IO_LIMIT_1); - yenta_allocate_res(socket, 2, IORESOURCE_MEM|IORESOURCE_PREFETCH, + program += yenta_allocate_res(socket, 2, IORESOURCE_MEM|IORESOURCE_PREFETCH, PCI_CB_MEMORY_BASE_0, PCI_CB_MEMORY_LIMIT_0); - yenta_allocate_res(socket, 3, IORESOURCE_MEM, + program += yenta_allocate_res(socket, 3, IORESOURCE_MEM, PCI_CB_MEMORY_BASE_1, PCI_CB_MEMORY_LIMIT_1); + if (program) + pci_setup_cardbus(socket->dev->subordinate); } @@ -758,7 +753,7 @@ static void yenta_free_resources(struct yenta_socket *socket) res = socket->dev->resource + PCI_BRIDGE_RESOURCES + i; if (res->start != 0 && res->end != 0) release_resource(res); - res->start = res->end = 0; + res->start = res->end = res->flags = 0; } } diff --git a/include/linux/pci.h b/include/linux/pci.h index 6caaba0af469..c62e89270237 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -326,6 +326,7 @@ extern struct pci_dev *pci_dev_get(struct pci_dev *dev); extern void pci_dev_put(struct pci_dev *dev); extern void pci_remove_bus(struct pci_bus *b); extern void pci_remove_bus_device(struct pci_dev *dev); +void pci_setup_cardbus(struct pci_bus *bus); /* Generic PCI functions exported to card drivers */ -- cgit v1.2.3 From 793cf9e6a54c698e109a599c8b8e303658fcaae6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 9 Sep 2005 13:03:37 -0700 Subject: [PATCH] v4l: common part Updates and tuner additions - Remove $Id CVS logs for V4L files - Included newer cards. - Added a new NEC protocol for ir based on pulse distance. - Enable ATSC support for DViCO FusionHDTV5 Gold. - Added tuner LG NTSC (TALN mini series). - Fixed tea5767 autodetection. - Resolve more tuner types. - Commented debug function removed from mainstream. - Remove comments from mainstream. Still on development tree. - linux/version dependencies removed. - BTSC Lang1 now is set to auto_stereo mode. - New tuner standby API. - i2c-core.c uses hexadecimal for the i2c address, so it should stay consistent. Signed-off-by: Uli Luckas Signed-off-by: Mac Michaels Signed-off-by: Michael Krufky Signed-off-by: Hermann Pitton Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/video4linux/CARDLIST.tuner | 1 + drivers/media/common/ir-common.c | 68 ++++++++++++++++++++++++++- drivers/media/video/btcx-risc.c | 1 - drivers/media/video/btcx-risc.h | 1 - drivers/media/video/ir-kbd-gpio.c | 1 - drivers/media/video/ir-kbd-i2c.c | 1 - drivers/media/video/msp3400.h | 1 - drivers/media/video/mt20xx.c | 2 +- drivers/media/video/tda8290.c | 19 +++++++- drivers/media/video/tda9887.c | 39 ++++++++++------ drivers/media/video/tea5767.c | 37 +++++++++------ drivers/media/video/tuner-core.c | 79 +++++++++++++++++++------------- drivers/media/video/tuner-simple.c | 5 +- drivers/media/video/tveeprom.c | 25 ++-------- drivers/media/video/tvmixer.c | 1 - drivers/media/video/v4l1-compat.c | 16 ------- drivers/media/video/v4l2-common.c | 18 -------- drivers/media/video/video-buf-dvb.c | 1 - drivers/media/video/video-buf.c | 1 - include/linux/videodev.h | 3 -- include/linux/videodev2.h | 4 -- include/media/audiochip.h | 1 - include/media/id.h | 1 - include/media/ir-common.h | 4 +- include/media/tuner.h | 25 +++++----- include/media/tveeprom.h | 1 - include/media/video-buf.h | 1 - 27 files changed, 203 insertions(+), 154 deletions(-) (limited to 'include/linux') diff --git a/Documentation/video4linux/CARDLIST.tuner b/Documentation/video4linux/CARDLIST.tuner index f3302e1b1b9c..f5876be658a6 100644 --- a/Documentation/video4linux/CARDLIST.tuner +++ b/Documentation/video4linux/CARDLIST.tuner @@ -64,3 +64,4 @@ tuner=62 - Philips TEA5767HN FM Radio tuner=63 - Philips FMD1216ME MK3 Hybrid Tuner tuner=64 - LG TDVS-H062F/TUA6034 tuner=65 - Ymec TVF66T5-B/DFF +tuner=66 - LG NTSC (TALN mini series) diff --git a/drivers/media/common/ir-common.c b/drivers/media/common/ir-common.c index ab7a1fba4427..a0e700d7a4a4 100644 --- a/drivers/media/common/ir-common.c +++ b/drivers/media/common/ir-common.c @@ -1,5 +1,4 @@ /* - * $Id: ir-common.c,v 1.11 2005/07/07 14:44:43 mchehab Exp $ * * some common structs and functions to handle infrared remotes via * input layer ... @@ -335,6 +334,72 @@ int ir_dump_samples(u32 *samples, int count) return 0; } +/* decode raw samples, pulse distance coding used by NEC remotes */ +int ir_decode_pulsedistance(u32 *samples, int count, int low, int high) +{ + int i,last,bit,len; + u32 curBit; + u32 value; + + /* find start burst */ + for (i = len = 0; i < count * 32; i++) { + bit = getbit(samples,i); + if (bit) { + len++; + } else { + if (len >= 29) + break; + len = 0; + } + } + + /* start burst to short */ + if (len < 29) + return 0xffffffff; + + /* find start silence */ + for (len = 0; i < count * 32; i++) { + bit = getbit(samples,i); + if (bit) { + break; + } else { + len++; + } + } + + /* silence to short */ + if (len < 7) + return 0xffffffff; + + /* go decoding */ + len = 0; + last = 1; + value = 0; curBit = 1; + for (; i < count * 32; i++) { + bit = getbit(samples,i); + if (last) { + if(bit) { + continue; + } else { + len = 1; + } + } else { + if (bit) { + if (len > (low + high) /2) + value |= curBit; + curBit <<= 1; + if (curBit == 1) + break; + } else { + len++; + } + } + last = bit; + } + + return value; +} + /* decode raw samples, biphase coding, used by rc5 for example */ int ir_decode_biphase(u32 *samples, int count, int low, int high) { @@ -383,6 +448,7 @@ EXPORT_SYMBOL_GPL(ir_input_keydown); EXPORT_SYMBOL_GPL(ir_extract_bits); EXPORT_SYMBOL_GPL(ir_dump_samples); EXPORT_SYMBOL_GPL(ir_decode_biphase); +EXPORT_SYMBOL_GPL(ir_decode_pulsedistance); /* * Local variables: diff --git a/drivers/media/video/btcx-risc.c b/drivers/media/video/btcx-risc.c index 7f2d515d2873..a48de3c0e3f0 100644 --- a/drivers/media/video/btcx-risc.c +++ b/drivers/media/video/btcx-risc.c @@ -1,5 +1,4 @@ /* - $Id: btcx-risc.c,v 1.6 2005/02/21 13:57:59 kraxel Exp $ btcx-risc.c diff --git a/drivers/media/video/btcx-risc.h b/drivers/media/video/btcx-risc.h index 41f60395a520..503e6c6d7b69 100644 --- a/drivers/media/video/btcx-risc.h +++ b/drivers/media/video/btcx-risc.h @@ -1,5 +1,4 @@ /* - * $Id: btcx-risc.h,v 1.2 2004/09/15 16:15:24 kraxel Exp $ */ struct btcx_riscmem { unsigned int size; diff --git a/drivers/media/video/ir-kbd-gpio.c b/drivers/media/video/ir-kbd-gpio.c index a565823330aa..eddadc76e11d 100644 --- a/drivers/media/video/ir-kbd-gpio.c +++ b/drivers/media/video/ir-kbd-gpio.c @@ -1,5 +1,4 @@ /* - * $Id: ir-kbd-gpio.c,v 1.13 2005/05/15 19:01:26 mchehab Exp $ * * Copyright (c) 2003 Gerd Knorr * Copyright (c) 2003 Pavel Machek diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c index 1e273ff3f956..67105b9804a2 100644 --- a/drivers/media/video/ir-kbd-i2c.c +++ b/drivers/media/video/ir-kbd-i2c.c @@ -1,5 +1,4 @@ /* - * $Id: ir-kbd-i2c.c,v 1.11 2005/07/07 16:42:11 mchehab Exp $ * * keyboard input driver for i2c IR remote controls * diff --git a/drivers/media/video/msp3400.h b/drivers/media/video/msp3400.h index 023f33056a4f..2d9ff40f0b09 100644 --- a/drivers/media/video/msp3400.h +++ b/drivers/media/video/msp3400.h @@ -1,5 +1,4 @@ /* - * $Id: msp3400.h,v 1.3 2005/06/12 04:19:19 mchehab Exp $ */ #ifndef MSP3400_H diff --git a/drivers/media/video/mt20xx.c b/drivers/media/video/mt20xx.c index 2fb7c2d1787a..972aa5e0aeef 100644 --- a/drivers/media/video/mt20xx.c +++ b/drivers/media/video/mt20xx.c @@ -1,5 +1,4 @@ /* - * $Id: mt20xx.c,v 1.5 2005/06/16 08:29:49 nsh Exp $ * * i2c tv tuner chip device driver * controls microtune tuners, mt2032 + mt2050 at the moment. @@ -494,6 +493,7 @@ int microtune_init(struct i2c_client *c) memset(buf,0,sizeof(buf)); t->tv_freq = NULL; t->radio_freq = NULL; + t->standby = NULL; name = "unknown"; i2c_master_send(c,buf,1); diff --git a/drivers/media/video/tda8290.c b/drivers/media/video/tda8290.c index a8b6a8df5109..c65f0c7680a2 100644 --- a/drivers/media/video/tda8290.c +++ b/drivers/media/video/tda8290.c @@ -1,5 +1,4 @@ /* - * $Id: tda8290.c,v 1.15 2005/07/08 20:21:33 mchehab Exp $ * * i2c tv tuner chip device driver * controls the philips tda8290+75 tuner chip combo. @@ -9,6 +8,9 @@ #include #include +#define I2C_ADDR_TDA8290 0x4b +#define I2C_ADDR_TDA8275 0x61 + /* ---------------------------------------------------------------------- */ struct freq_entry { @@ -75,10 +77,12 @@ static unsigned char i2c_init_tda8275[14] = { 0x00, 0x00, 0x00, 0x00, static unsigned char i2c_set_VS[2] = { 0x30, 0x6F }; static unsigned char i2c_set_GP01_CF[2] = { 0x20, 0x0B }; static unsigned char i2c_tda8290_reset[2] = { 0x00, 0x00 }; +static unsigned char i2c_tda8290_standby[2] = { 0x00, 0x02 }; static unsigned char i2c_gainset_off[2] = { 0x28, 0x14 }; static unsigned char i2c_gainset_on[2] = { 0x28, 0x54 }; static unsigned char i2c_agc3_00[2] = { 0x80, 0x00 }; static unsigned char i2c_agc2_BF[2] = { 0x60, 0xBF }; +static unsigned char i2c_cb1_D0[2] = { 0x30, 0xD0 }; static unsigned char i2c_cb1_D2[2] = { 0x30, 0xD2 }; static unsigned char i2c_cb1_56[2] = { 0x30, 0x56 }; static unsigned char i2c_cb1_52[2] = { 0x30, 0x52 }; @@ -117,6 +121,13 @@ static struct i2c_msg i2c_msg_epilog[] = { { I2C_ADDR_TDA8290, 0, ARRAY_SIZE(i2c_gainset_on), i2c_gainset_on }, }; +static struct i2c_msg i2c_msg_standby[] = { + { I2C_ADDR_TDA8290, 0, ARRAY_SIZE(i2c_enable_bridge), i2c_enable_bridge }, + { I2C_ADDR_TDA8275, 0, ARRAY_SIZE(i2c_cb1_D0), i2c_cb1_D0 }, + { I2C_ADDR_TDA8290, 0, ARRAY_SIZE(i2c_disable_bridge), i2c_disable_bridge }, + { I2C_ADDR_TDA8290, 0, ARRAY_SIZE(i2c_tda8290_standby), i2c_tda8290_standby }, +}; + static int tda8290_tune(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); @@ -205,6 +216,11 @@ static int has_signal(struct i2c_client *c) return (afc & 0x80)? 65535:0; } +static void standby(struct i2c_client *c) +{ + i2c_transfer(c->adapter, i2c_msg_standby, ARRAY_SIZE(i2c_msg_standby)); +} + int tda8290_init(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); @@ -214,6 +230,7 @@ int tda8290_init(struct i2c_client *c) t->tv_freq = set_tv_freq; t->radio_freq = set_radio_freq; t->has_signal = has_signal; + t->standby = standby; i2c_master_send(c, i2c_enable_bridge, ARRAY_SIZE(i2c_enable_bridge)); i2c_transfer(c->adapter, i2c_msg_init, ARRAY_SIZE(i2c_msg_init)); diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c index d60fc562aecd..79e0bd1aa70f 100644 --- a/drivers/media/video/tda9887.c +++ b/drivers/media/video/tda9887.c @@ -49,7 +49,7 @@ MODULE_LICENSE("GPL"); struct tda9887 { struct i2c_client client; v4l2_std_id std; - unsigned int radio; + enum tuner_mode mode; unsigned int config; unsigned int pinnacle_id; unsigned int using_v4l2; @@ -196,7 +196,7 @@ static struct tvnorm tvnorms[] = { .b = ( cNegativeFmTV | cQSS ), .c = ( cDeemphasisON | - cDeemphasis50 ), + cDeemphasis75 ), .e = ( cGating_36 | cAudioIF_4_5 | cVideoIF_45_75 ), @@ -364,7 +364,7 @@ static int tda9887_set_tvnorm(struct tda9887 *t, char *buf) struct tvnorm *norm = NULL; int i; - if (t->radio) { + if (t->mode == T_RADIO) { if (t->radio_mode == V4L2_TUNER_MODE_MONO) norm = &radio_mono; else @@ -378,7 +378,7 @@ static int tda9887_set_tvnorm(struct tda9887 *t, char *buf) } } if (NULL == norm) { - dprintk(PREFIX "Oops: no tvnorm entry found\n"); + dprintk(PREFIX "Unsupported tvnorm entry - audio muted\n"); return -1; } @@ -569,6 +569,10 @@ static int tda9887_configure(struct tda9887 *t) tda9887_set_config(t,buf); tda9887_set_insmod(t,buf); + if (t->mode == T_STANDBY) { + buf[1] |= cForcedMuteAudioON; + } + dprintk(PREFIX "writing: b=0x%02x c=0x%02x e=0x%02x\n", buf[1],buf[2],buf[3]); @@ -653,10 +657,17 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) /* --- configuration --- */ case AUDC_SET_RADIO: - t->radio = 1; + { + t->mode = T_RADIO; tda9887_configure(t); break; - + } + case TUNER_SET_STANDBY: + { + t->mode = T_STANDBY; + tda9887_configure(t); + break; + } case AUDC_CONFIG_PINNACLE: { int *i = arg; @@ -689,7 +700,7 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) struct video_channel *vc = arg; CHECK_V4L2; - t->radio = 0; + t->mode = T_ANALOG_TV; if (vc->norm < ARRAY_SIZE(map)) t->std = map[vc->norm]; tda9887_fixup_std(t); @@ -701,7 +712,7 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) v4l2_std_id *id = arg; SWITCH_V4L2; - t->radio = 0; + t->mode = T_ANALOG_TV; t->std = *id; tda9887_fixup_std(t); tda9887_configure(t); @@ -713,14 +724,14 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) SWITCH_V4L2; if (V4L2_TUNER_ANALOG_TV == f->type) { - if (t->radio == 0) + if (t->mode == T_ANALOG_TV) return 0; - t->radio = 0; + t->mode = T_ANALOG_TV; } if (V4L2_TUNER_RADIO == f->type) { - if (t->radio == 1) + if (t->mode == T_RADIO) return 0; - t->radio = 1; + t->mode = T_RADIO; } tda9887_configure(t); break; @@ -735,7 +746,7 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) }; struct v4l2_tuner* tuner = arg; - if (t->radio) { + if (t->mode == T_RADIO) { __u8 reg = 0; tuner->afc=0; if (1 == i2c_master_recv(&t->client,®,1)) @@ -747,7 +758,7 @@ tda9887_command(struct i2c_client *client, unsigned int cmd, void *arg) { struct v4l2_tuner* tuner = arg; - if (t->radio) { + if (t->mode == T_RADIO) { t->radio_mode = tuner->audmode; tda9887_configure (t); } diff --git a/drivers/media/video/tea5767.c b/drivers/media/video/tea5767.c index cebcc1fa68d1..38bf50943798 100644 --- a/drivers/media/video/tea5767.c +++ b/drivers/media/video/tea5767.c @@ -2,7 +2,6 @@ * For Philips TEA5767 FM Chip used on some TV Cards like Prolink Pixelview * I2C address is allways 0xC0. * - * $Id: tea5767.c,v 1.27 2005/07/31 12:10:56 mchehab Exp $ * * Copyright (c) 2005 Mauro Carvalho Chehab (mchehab@brturbo.com.br) * This code is placed under the terms of the GNU General Public License @@ -205,11 +204,6 @@ static void set_radio_freq(struct i2c_client *c, unsigned int frq) TEA5767_ST_NOISE_CTL | TEA5767_JAPAN_BAND; buffer[4] = 0; - if (t->mode == T_STANDBY) { - tuner_dbg("TEA5767 set to standby mode\n"); - buffer[3] |= TEA5767_STDBY; - } - if (t->audmode == V4L2_TUNER_MODE_MONO) { tuner_dbg("TEA5767 set to mono\n"); buffer[2] |= TEA5767_MONO; @@ -290,13 +284,31 @@ static int tea5767_stereo(struct i2c_client *c) return ((buffer[2] & TEA5767_STEREO_MASK) ? V4L2_TUNER_SUB_STEREO : 0); } +static void tea5767_standby(struct i2c_client *c) +{ + unsigned char buffer[5]; + struct tuner *t = i2c_get_clientdata(c); + unsigned div, rc; + + div = (87500 * 4 + 700 + 225 + 25) / 50; /* Set frequency to 87.5 MHz */ + buffer[0] = (div >> 8) & 0x3f; + buffer[1] = div & 0xff; + buffer[2] = TEA5767_PORT1_HIGH; + buffer[3] = TEA5767_PORT2_HIGH | TEA5767_HIGH_CUT_CTRL | + TEA5767_ST_NOISE_CTL | TEA5767_JAPAN_BAND | TEA5767_STDBY; + buffer[4] = 0; + + if (5 != (rc = i2c_master_send(c, buffer, 5))) + tuner_warn("i2c i/o error: rc == %d (should be 5)\n", rc); +} + int tea5767_autodetection(struct i2c_client *c) { unsigned char buffer[7] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; int rc; struct tuner *t = i2c_get_clientdata(c); - if (7 != (rc = i2c_master_recv(c, buffer, 7))) { + if ((rc = i2c_master_recv(c, buffer, 7))< 5) { tuner_warn("It is not a TEA5767. Received %i bytes.\n", rc); return EINVAL; } @@ -313,15 +325,10 @@ int tea5767_autodetection(struct i2c_client *c) * bit 0 : internally set to 0 * Byte 5: bit 7:0 : == 0 */ - if (!((buffer[3] & 0x0f) == 0x00) && (buffer[4] == 0x00)) { + if (((buffer[3] & 0x0f) != 0x00) || (buffer[4] != 0x00)) { tuner_warn("Chip ID is not zero. It is not a TEA5767\n"); return EINVAL; } - /* It seems that tea5767 returns 0xff after the 5th byte */ - if ((buffer[5] != 0xff) || (buffer[6] != 0xff)) { - tuner_warn("Returned more than 5 bytes. It is not a TEA5767\n"); - return EINVAL; - } /* It seems that tea5767 returns 0xff after the 5th byte */ if ((buffer[5] != 0xff) || (buffer[6] != 0xff)) { @@ -337,14 +344,14 @@ int tea5767_tuner_init(struct i2c_client *c) { struct tuner *t = i2c_get_clientdata(c); - tuner_info("type set to %d (%s)\n", t->type, - "Philips TEA5767HN FM Radio"); + tuner_info("type set to %d (%s)\n", t->type, "Philips TEA5767HN FM Radio"); strlcpy(c->name, "tea5767", sizeof(c->name)); t->tv_freq = set_tv_freq; t->radio_freq = set_radio_freq; t->has_signal = tea5767_signal; t->is_stereo = tea5767_stereo; + t->standby = tea5767_standby; return (0); } diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c index 3b1893c2ae3b..afc96bbb1c11 100644 --- a/drivers/media/video/tuner-core.c +++ b/drivers/media/video/tuner-core.c @@ -1,5 +1,4 @@ /* - * $Id: tuner-core.c,v 1.63 2005/07/28 18:19:55 mchehab Exp $ * * i2c tv tuner chip device driver * core core, i.e. kernel interfaces, registering and so on @@ -182,6 +181,14 @@ static void set_type(struct i2c_client *c, unsigned int type, i2c_master_send(c, buffer, 4); default_tuner_init(c); break; + case TUNER_LG_TDVS_H062F: + /* Set the Auxiliary Byte. */ + buffer[2] &= ~0x20; + buffer[2] |= 0x18; + buffer[3] = 0x20; + i2c_master_send(c, buffer, 4); + default_tuner_init(c); + break; default: default_tuner_init(c); break; @@ -208,31 +215,31 @@ static void set_addr(struct i2c_client *c, struct tuner_setup *tun_setup) { struct tuner *t = i2c_get_clientdata(c); - if (tun_setup->addr == ADDR_UNSET) { - if (t->mode_mask & tun_setup->mode_mask) + if ((tun_setup->addr == ADDR_UNSET && + (t->mode_mask & tun_setup->mode_mask)) || + tun_setup->addr == c->addr) { set_type(c, tun_setup->type, tun_setup->mode_mask); - } else if (tun_setup->addr == c->addr) { - set_type(c, tun_setup->type, tun_setup->mode_mask); } } static inline int check_mode(struct tuner *t, char *cmd) { - if (1 << t->mode & t->mode_mask) { - switch (t->mode) { - case V4L2_TUNER_RADIO: - tuner_dbg("Cmd %s accepted for radio\n", cmd); - break; - case V4L2_TUNER_ANALOG_TV: - tuner_dbg("Cmd %s accepted for analog TV\n", cmd); - break; - case V4L2_TUNER_DIGITAL_TV: - tuner_dbg("Cmd %s accepted for digital TV\n", cmd); - break; - } - return 0; + if ((1 << t->mode & t->mode_mask) == 0) { + return EINVAL; + } + + switch (t->mode) { + case V4L2_TUNER_RADIO: + tuner_dbg("Cmd %s accepted for radio\n", cmd); + break; + case V4L2_TUNER_ANALOG_TV: + tuner_dbg("Cmd %s accepted for analog TV\n", cmd); + break; + case V4L2_TUNER_DIGITAL_TV: + tuner_dbg("Cmd %s accepted for digital TV\n", cmd); + break; } - return EINVAL; + return 0; } static char pal[] = "-"; @@ -406,20 +413,18 @@ static int tuner_detach(struct i2c_client *client) static inline int set_mode(struct i2c_client *client, struct tuner *t, int mode, char *cmd) { - if (mode != t->mode) { - - t->mode = mode; - if (check_mode(t, cmd) == EINVAL) { - t->mode = T_STANDBY; - if (V4L2_TUNER_RADIO == mode) { - set_tv_freq(client, 400 * 16); - } else { - set_radio_freq(client, 87.5 * 16000); - } - return EINVAL; - } - } - return 0; + if (mode == t->mode) + return 0; + + t->mode = mode; + + if (check_mode(t, cmd) == EINVAL) { + t->mode = T_STANDBY; + if (t->standby) + t->standby (client); + return EINVAL; + } + return 0; } #define switch_v4l2() if (!t->using_v4l2) \ @@ -453,6 +458,14 @@ static int tuner_command(struct i2c_client *client, unsigned int cmd, void *arg) case AUDC_SET_RADIO: set_mode(client,t,V4L2_TUNER_RADIO, "AUDC_SET_RADIO"); break; + case TUNER_SET_STANDBY: + { + if (check_mode(t, "TUNER_SET_STANDBY") == EINVAL) + return 0; + if (t->standby) + t->standby (client); + break; + } case AUDC_CONFIG_PINNACLE: if (check_mode(t, "AUDC_CONFIG_PINNACLE") == EINVAL) return 0; diff --git a/drivers/media/video/tuner-simple.c b/drivers/media/video/tuner-simple.c index de0c93aeb75d..26034406b372 100644 --- a/drivers/media/video/tuner-simple.c +++ b/drivers/media/video/tuner-simple.c @@ -1,5 +1,4 @@ /* - * $Id: tuner-simple.c,v 1.43 2005/07/28 18:41:21 mchehab Exp $ * * i2c tv tuner chip device driver * controls all those simple 4-control-bytes style tuners. @@ -248,9 +247,10 @@ static struct tunertype tuners[] = { { "LG TDVS-H062F/TUA6034", LGINNOTEK, NTSC, 16*160.00,16*455.00,0x01,0x02,0x04,0x8e,732}, - { "Ymec TVF66T5-B/DFF", Philips, PAL, 16*160.25,16*464.25,0x01,0x02,0x08,0x8e,623}, + { "LG NTSC (TALN mini series)", LGINNOTEK, NTSC, + 16*150.00,16*425.00,0x01,0x02,0x08,0x8e,732 }, }; unsigned const int tuner_count = ARRAY_SIZE(tuners); @@ -497,6 +497,7 @@ int default_tuner_init(struct i2c_client *c) t->radio_freq = default_set_radio_freq; t->has_signal = tuner_signal; t->is_stereo = tuner_stereo; + t->standby = NULL; return 0; } diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c index 3c3356a01cc6..d0a00d3a6c4f 100644 --- a/drivers/media/video/tveeprom.c +++ b/drivers/media/video/tveeprom.c @@ -155,10 +155,10 @@ hauppauge_tuner[] = { TUNER_ABSENT, "Philips FQ1216ME MK3"}, { TUNER_ABSENT, "Philips FI1236 MK3"}, { TUNER_PHILIPS_FM1216ME_MK3, "Philips FM1216 ME MK3"}, - { TUNER_ABSENT, "Philips FM1236 MK3"}, + { TUNER_PHILIPS_FM1236_MK3, "Philips FM1236 MK3"}, { TUNER_ABSENT, "Philips FM1216MP MK3"}, /* 60-69 */ - { TUNER_ABSENT, "LG S001D MK3"}, + { TUNER_PHILIPS_FM1216ME_MK3, "LG S001D MK3"}, { TUNER_ABSENT, "LG M001D MK3"}, { TUNER_ABSENT, "LG S701D MK3"}, { TUNER_ABSENT, "LG M701D MK3"}, @@ -183,8 +183,8 @@ hauppauge_tuner[] = { TUNER_ABSENT, "Philips FQ1216LME MK3"}, { TUNER_ABSENT, "LG TAPC G701D"}, { TUNER_LG_NTSC_NEW_TAPC, "LG TAPC H791F"}, - { TUNER_ABSENT, "TCL 2002MB 3"}, - { TUNER_ABSENT, "TCL 2002MI 3"}, + { TUNER_LG_PAL_NEW_TAPC, "TCL 2002MB 3"}, + { TUNER_LG_PAL_NEW_TAPC, "TCL 2002MI 3"}, { TUNER_TCL_2002N, "TCL 2002N 6A"}, { TUNER_ABSENT, "Philips FQ1236 MK3"}, { TUNER_ABSENT, "Samsung TCPN 2121P30A"}, @@ -445,23 +445,6 @@ int tveeprom_read(struct i2c_client *c, unsigned char *eedata, int len) } EXPORT_SYMBOL(tveeprom_read); -#if 0 -int tveeprom_dump(unsigned char *eedata, int len) -{ - int i; - - dprintk(1, "%s\n",__FUNCTION__); - for (i = 0; i < len; i++) { - if (0 == (i % 16)) - printk(KERN_INFO "tveeprom: %02x:",i); - printk(" %02x",eedata[i]); - if (15 == (i % 16)) - printk("\n"); - } - return 0; -} -EXPORT_SYMBOL(tveeprom_dump); -#endif /* 0 */ /* ----------------------------------------------------------------------- */ /* needed for ivtv.sf.net at the moment. Should go away in the long */ diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c index a43301a154af..d86e08ebddfc 100644 --- a/drivers/media/video/tvmixer.c +++ b/drivers/media/video/tvmixer.c @@ -1,5 +1,4 @@ /* - * $Id: tvmixer.c,v 1.8 2005/06/12 04:19:19 mchehab Exp $ */ #include diff --git a/drivers/media/video/v4l1-compat.c b/drivers/media/video/v4l1-compat.c index 70ecbdb80277..59bb71381a1b 100644 --- a/drivers/media/video/v4l1-compat.c +++ b/drivers/media/video/v4l1-compat.c @@ -1,5 +1,4 @@ /* - * $Id: v4l1-compat.c,v 1.9 2005/06/12 04:19:19 mchehab Exp $ * * Video for Linux Two * Backward Compatibility Layer @@ -604,9 +603,6 @@ v4l_compat_translate_ioctl(struct inode *inode, dprintk("VIDIOCGPICT / VIDIOC_G_FMT: %d\n",err); break; } -#if 0 /* FIXME */ - pict->depth = fmt2->fmt.pix.depth; -#endif pict->palette = pixelformat_to_palette( fmt2->fmt.pix.pixelformat); break; @@ -707,13 +703,7 @@ v4l_compat_translate_ioctl(struct inode *inode, } case VIDIOCSTUNER: /* select a tuner input */ { -#if 0 /* FIXME */ - err = drv(inode, file, VIDIOC_S_INPUT, &i); - if (err < 0) - dprintk("VIDIOCSTUNER / VIDIOC_S_INPUT: %d\n",err); -#else err = 0; -#endif break; } case VIDIOCGFREQ: /* get frequency */ @@ -852,12 +842,6 @@ v4l_compat_translate_ioctl(struct inode *inode, err = 0; break; } -#if 0 - case VIDIOCGMBUF: - /* v4l2 drivers must implement that themself. The - mmap() differences can't be translated fully - transparent, thus there is no point to try that */ -#endif case VIDIOCMCAPTURE: /* capture a frame */ { struct video_mmap *mm = arg; diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c index b5e0cf3448f4..597b8db35a13 100644 --- a/drivers/media/video/v4l2-common.c +++ b/drivers/media/video/v4l2-common.c @@ -84,20 +84,6 @@ MODULE_LICENSE("GPL"); * Video Standard Operations (contributed by Michael Schimek) */ -#if 0 /* seems to have no users */ -/* This is the recommended method to deal with the framerate fields. More - sophisticated drivers will access the fields directly. */ -unsigned int -v4l2_video_std_fps(struct v4l2_standard *vs) -{ - if (vs->frameperiod.numerator > 0) - return (((vs->frameperiod.denominator << 8) / - vs->frameperiod.numerator) + - (1 << 7)) / (1 << 8); - return 0; -} -EXPORT_SYMBOL(v4l2_video_std_fps); -#endif /* Fill in the fields of a v4l2_standard structure according to the 'id' and 'transmission' parameters. Returns negative on error. */ @@ -213,10 +199,6 @@ char *v4l2_ioctl_names[256] = { [_IOC_NR(VIDIOC_ENUM_FMT)] = "VIDIOC_ENUM_FMT", [_IOC_NR(VIDIOC_G_FMT)] = "VIDIOC_G_FMT", [_IOC_NR(VIDIOC_S_FMT)] = "VIDIOC_S_FMT", -#if 0 - [_IOC_NR(VIDIOC_G_COMP)] = "VIDIOC_G_COMP", - [_IOC_NR(VIDIOC_S_COMP)] = "VIDIOC_S_COMP", -#endif [_IOC_NR(VIDIOC_REQBUFS)] = "VIDIOC_REQBUFS", [_IOC_NR(VIDIOC_QUERYBUF)] = "VIDIOC_QUERYBUF", [_IOC_NR(VIDIOC_G_FBUF)] = "VIDIOC_G_FBUF", diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c index 15f5bb486963..55f129e964eb 100644 --- a/drivers/media/video/video-buf-dvb.c +++ b/drivers/media/video/video-buf-dvb.c @@ -1,5 +1,4 @@ /* - * $Id: video-buf-dvb.c,v 1.7 2004/12/09 12:51:35 kraxel Exp $ * * some helper function for simple DVB cards which simply DMA the * complete transport stream and let the computer sort everything else diff --git a/drivers/media/video/video-buf.c b/drivers/media/video/video-buf.c index 5afdc7852610..97354f253a80 100644 --- a/drivers/media/video/video-buf.c +++ b/drivers/media/video/video-buf.c @@ -1,5 +1,4 @@ /* - * $Id: video-buf.c,v 1.18 2005/02/24 13:32:30 kraxel Exp $ * * generic helper functions for video4linux capture buffers, to handle * memory management and PCI DMA. Right now bttv + saa7134 use it. diff --git a/include/linux/videodev.h b/include/linux/videodev.h index 9d6fbde3d29c..1cc8c31b7988 100644 --- a/include/linux/videodev.h +++ b/include/linux/videodev.h @@ -3,7 +3,6 @@ #include #include -#include #define HAVE_V4L2 1 #include @@ -29,7 +28,6 @@ struct video_device void (*release)(struct video_device *vfd); -#if 1 /* to be removed in 2.7.x */ /* obsolete -- fops->owner is used instead */ struct module *owner; /* dev->driver_data will be used instead some day. @@ -37,7 +35,6 @@ struct video_device * so the switch over will be transparent for you. * Or use {pci|usb}_{get|set}_drvdata() directly. */ void *priv; -#endif /* for videodev.c intenal usage -- please don't touch */ int users; /* video_exclusive_{open|close} ... */ diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index acbfc525576d..f623a33b9abe 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -270,7 +270,6 @@ struct v4l2_timecode /* The above is based on SMPTE timecodes */ -#if 1 /* * M P E G C O M P R E S S I O N P A R A M E T E R S * @@ -357,7 +356,6 @@ struct v4l2_mpeg_compression { /* I don't expect the above being perfect yet ;) */ __u32 reserved_5[8]; }; -#endif struct v4l2_jpegcompression { @@ -871,10 +869,8 @@ struct v4l2_streamparm #define VIDIOC_ENUM_FMT _IOWR ('V', 2, struct v4l2_fmtdesc) #define VIDIOC_G_FMT _IOWR ('V', 4, struct v4l2_format) #define VIDIOC_S_FMT _IOWR ('V', 5, struct v4l2_format) -#if 1 /* experimental */ #define VIDIOC_G_MPEGCOMP _IOR ('V', 6, struct v4l2_mpeg_compression) #define VIDIOC_S_MPEGCOMP _IOW ('V', 7, struct v4l2_mpeg_compression) -#endif #define VIDIOC_REQBUFS _IOWR ('V', 8, struct v4l2_requestbuffers) #define VIDIOC_QUERYBUF _IOWR ('V', 9, struct v4l2_buffer) #define VIDIOC_G_FBUF _IOR ('V', 10, struct v4l2_framebuffer) diff --git a/include/media/audiochip.h b/include/media/audiochip.h index cd831168fdc1..a7ceee9fc5e9 100644 --- a/include/media/audiochip.h +++ b/include/media/audiochip.h @@ -1,5 +1,4 @@ /* - * $Id: audiochip.h,v 1.5 2005/06/16 22:59:16 hhackmann Exp $ */ #ifndef AUDIOCHIP_H diff --git a/include/media/id.h b/include/media/id.h index 801ddef301aa..6d02c94cdc0d 100644 --- a/include/media/id.h +++ b/include/media/id.h @@ -1,5 +1,4 @@ /* - * $Id: id.h,v 1.4 2005/06/12 04:19:19 mchehab Exp $ */ /* FIXME: this temporarely, until these are included in linux/i2c-id.h */ diff --git a/include/media/ir-common.h b/include/media/ir-common.h index 698670547f16..01b56822df4d 100644 --- a/include/media/ir-common.h +++ b/include/media/ir-common.h @@ -1,5 +1,4 @@ /* - * $Id: ir-common.h,v 1.9 2005/05/15 19:01:26 mchehab Exp $ * * some common structs and functions to handle infrared remotes via * input layer ... @@ -21,11 +20,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include #include #define IR_TYPE_RC5 1 +#define IR_TYPE_PD 2 /* Pulse distance encoded IR */ #define IR_TYPE_OTHER 99 #define IR_KEYTAB_TYPE u32 @@ -60,6 +59,7 @@ void ir_input_keydown(struct input_dev *dev, struct ir_input_state *ir, u32 ir_extract_bits(u32 data, u32 mask); int ir_dump_samples(u32 *samples, int count); int ir_decode_biphase(u32 *samples, int count, int low, int high); +int ir_decode_pulsedistance(u32 *samples, int count, int low, int high); /* * Local variables: diff --git a/include/media/tuner.h b/include/media/tuner.h index eeaa15ddee85..252673bfa592 100644 --- a/include/media/tuner.h +++ b/include/media/tuner.h @@ -1,5 +1,3 @@ - -/* $Id: tuner.h,v 1.45 2005/07/28 18:41:21 mchehab Exp $ * tuner.h - definition for different tuners @@ -111,6 +109,8 @@ #define TUNER_LG_TDVS_H062F 64 /* DViCO FusionHDTV 5 */ #define TUNER_YMEC_TVF66T5_B_DFF 65 /* Acorp Y878F */ +#define TUNER_LG_NTSC_TALN_MINI 66 + #define NOTUNER 0 #define PAL 1 /* PAL_BG */ #define PAL_I 2 @@ -134,6 +134,7 @@ #define THOMSON 12 #define TUNER_SET_TYPE_ADDR _IOW('T',3,int) +#define TUNER_SET_STANDBY _IOW('T',4,int) #define TDA9887_SET_CONFIG _IOW('t',5,int) /* tv card specific */ @@ -153,9 +154,6 @@ #ifdef __KERNEL__ -#define I2C_ADDR_TDA8290 0x4b -#define I2C_ADDR_TDA8275 0x61 - enum tuner_mode { T_UNINITIALIZED = 0, T_RADIO = 1 << V4L2_TUNER_RADIO, @@ -198,6 +196,7 @@ struct tuner { void (*radio_freq)(struct i2c_client *c, unsigned int freq); int (*has_signal)(struct i2c_client *c); int (*is_stereo)(struct i2c_client *c); + void (*standby)(struct i2c_client *c); }; extern unsigned int tuner_debug; @@ -209,12 +208,16 @@ extern int tea5767_tuner_init(struct i2c_client *c); extern int default_tuner_init(struct i2c_client *c); extern int tea5767_autodetection(struct i2c_client *c); -#define tuner_warn(fmt, arg...) \ - dev_printk(KERN_WARNING , &t->i2c.dev , fmt , ## arg) -#define tuner_info(fmt, arg...) \ - dev_printk(KERN_INFO , &t->i2c.dev , fmt , ## arg) -#define tuner_dbg(fmt, arg...) \ - if (tuner_debug) dev_printk(KERN_DEBUG , &t->i2c.dev , fmt , ## arg) +#define tuner_warn(fmt, arg...) do {\ + printk(KERN_WARNING "%s %d-%04x: " fmt, t->i2c.driver->name, \ + t->i2c.adapter->nr, t->i2c.addr , ##arg); } while (0) +#define tuner_info(fmt, arg...) do {\ + printk(KERN_INFO "%s %d-%04x: " fmt, t->i2c.driver->name, \ + t->i2c.adapter->nr, t->i2c.addr , ##arg); } while (0) +#define tuner_dbg(fmt, arg...) do {\ + if (tuner_debug) \ + printk(KERN_DEBUG "%s %d-%04x: " fmt, t->i2c.driver->name, \ + t->i2c.adapter->nr, t->i2c.addr , ##arg); } while (0) #endif /* __KERNEL__ */ diff --git a/include/media/tveeprom.h b/include/media/tveeprom.h index 854a2c2f105b..e24e841c3211 100644 --- a/include/media/tveeprom.h +++ b/include/media/tveeprom.h @@ -1,5 +1,4 @@ /* - * $Id: tveeprom.h,v 1.2 2005/06/12 04:19:19 mchehab Exp $ */ struct tveeprom { diff --git a/include/media/video-buf.h b/include/media/video-buf.h index ae6da6de98de..ae8d7a000440 100644 --- a/include/media/video-buf.h +++ b/include/media/video-buf.h @@ -1,5 +1,4 @@ /* - * $Id: video-buf.h,v 1.9 2004/11/07 13:17:15 kraxel Exp $ * * generic helper functions for video4linux capture buffers, to handle * memory management and PCI DMA. Right now bttv + saa7134 use it. -- cgit v1.2.3 From 8b6490e5faafb3a16ea45654fb55f9ff086f1495 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:07 -0700 Subject: [PATCH] files: fix rcu initializers First of a number of files_lock scaability patches. Here are the x86 numbers - tiobench on a 4(8)-way (HT) P4 system on ramdisk : (lockfree) Test 2.6.10-vanilla Stdev 2.6.10-fd Stdev ------------------------------------------------------------- Seqread 1400.8 11.52 1465.4 34.27 Randread 1594 8.86 2397.2 29.21 Seqwrite 242.72 3.47 238.46 6.53 Randwrite 445.74 9.15 446.4 9.75 The performance improvement is very significant. We are getting killed by the cacheline bouncing of the files_struct lock here. Writes on ramdisk (ext2) seems to vary just too much to get any meaningful number. Also, With Tridge's thread_perf test on a 4(8)-way (HT) P4 xeon system : 2.6.12-rc5-vanilla : Running test 'readwrite' with 8 tasks Threads 0.34 +/- 0.01 seconds Processes 0.16 +/- 0.00 seconds 2.6.12-rc5-fd : Running test 'readwrite' with 8 tasks Threads 0.17 +/- 0.02 seconds Processes 0.17 +/- 0.02 seconds I repeated the measurements on ramfs (as opposed to ext2 on ramdisk in the earlier measurement) and I got more consistent results from tiobench : 4(8) way xeon P4 ----------------- (lock-free) Test 2.6.12-rc5 Stdev 2.6.12-rc5-fd Stdev ------------------------------------------------------------- Seqread 1282 18.59 1343.6 26.37 Randread 1517 7 2415 34.27 Seqwrite 702.2 5.27 709.46 5.9 Randwrite 846.86 15.15 919.68 21.4 4-way ppc64 ------------ (lock-free) Test 2.6.12-rc5 Stdev 2.6.12-rc5-fd Stdev ------------------------------------------------------------- Seqread 1549 91.16 1569.6 47.2 Randread 1473.6 25.11 1585.4 69.99 Seqwrite 1096.8 20.03 1136 29.61 Randwrite 1189.6 4.04 1275.2 32.96 Also running Tridge's thread_perf test on ppc64 : 2.6.12-rc5-vanilla -------------------- Running test 'readwrite' with 4 tasks Threads 0.20 +/- 0.02 seconds Processes 0.16 +/- 0.01 seconds 2.6.12-rc5-fd -------------------- Running test 'readwrite' with 4 tasks Threads 0.18 +/- 0.04 seconds Processes 0.16 +/- 0.01 seconds The benefits are huge (upto ~60%) in some cases on x86 primarily due to the atomic operations during acquisition of ->file_lock and cache line bouncing in fast path. ppc64 benefits are modest due to LL/SC based locking, but still statistically significant. This patch: RCU head initilizer no longer needs the head varible name since we don't use list.h lists anymore. Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rcupdate.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fd276adf0fd5..4e65eb44adfd 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,8 +52,8 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; -#define RCU_HEAD_INIT(head) { .next = NULL, .func = NULL } -#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head) +#define RCU_HEAD_INIT { .next = NULL, .func = NULL } +#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT #define INIT_RCU_HEAD(ptr) do { \ (ptr)->next = NULL; (ptr)->func = NULL; \ } while (0) -- cgit v1.2.3 From c0dfb2905126e9e94edebbce8d3e05001301f52d Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:09 -0700 Subject: [PATCH] files: rcuref APIs Adds a set of primitives to do reference counting for objects that are looked up without locks using RCU. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/RCU/rcuref.txt | 74 +++++++++++++++ include/linux/rcuref.h | 220 +++++++++++++++++++++++++++++++++++++++++++ kernel/rcupdate.c | 14 +++ 3 files changed, 308 insertions(+) create mode 100644 Documentation/RCU/rcuref.txt create mode 100644 include/linux/rcuref.h (limited to 'include/linux') diff --git a/Documentation/RCU/rcuref.txt b/Documentation/RCU/rcuref.txt new file mode 100644 index 000000000000..a23fee66064d --- /dev/null +++ b/Documentation/RCU/rcuref.txt @@ -0,0 +1,74 @@ +Refcounter framework for elements of lists/arrays protected by +RCU. + +Refcounting on elements of lists which are protected by traditional +reader/writer spinlocks or semaphores are straight forward as in: + +1. 2. +add() search_and_reference() +{ { + alloc_object read_lock(&list_lock); + ... search_for_element + atomic_set(&el->rc, 1); atomic_inc(&el->rc); + write_lock(&list_lock); ... + add_element read_unlock(&list_lock); + ... ... + write_unlock(&list_lock); } +} + +3. 4. +release_referenced() delete() +{ { + ... write_lock(&list_lock); + atomic_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (atomic_dec_and_test(&el->rc)) + kfree(el); + ... + } + +If this list/array is made lock free using rcu as in changing the +write_lock in add() and delete() to spin_lock and changing read_lock +in search_and_reference to rcu_read_lock(), the rcuref_get in +search_and_reference could potentially hold reference to an element which +has already been deleted from the list/array. rcuref_lf_get_rcu takes +care of this scenario. search_and_reference should look as; + +1. 2. +add() search_and_reference() +{ { + alloc_object rcu_read_lock(); + ... search_for_element + atomic_set(&el->rc, 1); if (rcuref_inc_lf(&el->rc)) { + write_lock(&list_lock); rcu_read_unlock(); + return FAIL; + add_element } + ... ... + write_unlock(&list_lock); rcu_read_unlock(); +} } +3. 4. +release_referenced() delete() +{ { + ... write_lock(&list_lock); + rcuref_dec(&el->rc, relfunc) ... + ... delete_element +} write_unlock(&list_lock); + ... + if (rcuref_dec_and_test(&el->rc)) + call_rcu(&el->head, el_free); + ... + } + +Sometimes, reference to the element need to be obtained in the +update (write) stream. In such cases, rcuref_inc_lf might be an overkill +since the spinlock serialising list updates are held. rcuref_inc +is to be used in such cases. +For arches which do not have cmpxchg rcuref_inc_lf +api uses a hashed spinlock implementation and the same hashed spinlock +is acquired in all rcuref_xxx primitives to preserve atomicity. +Note: Use rcuref_inc api only if you need to use rcuref_inc_lf on the +refcounter atleast at one place. Mixing rcuref_inc and atomic_xxx api +might lead to races. rcuref_inc_lf() must be used in lockfree +RCU critical sections only. diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h new file mode 100644 index 000000000000..e1adbba14b67 --- /dev/null +++ b/include/linux/rcuref.h @@ -0,0 +1,220 @@ +/* + * rcuref.h + * + * Reference counting for elements of lists/arrays protected by + * RCU. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2005 + * + * Author: Dipankar Sarma + * Ravikiran Thirumalai + * + * See Documentation/RCU/rcuref.txt for detailed user guide. + * + */ + +#ifndef _RCUREF_H_ +#define _RCUREF_H_ + +#ifdef __KERNEL__ + +#include +#include +#include +#include + +/* + * These APIs work on traditional atomic_t counters used in the + * kernel for reference counting. Under special circumstances + * where a lock-free get() operation races with a put() operation + * these APIs can be used. See Documentation/RCU/rcuref.txt. + */ + +#ifdef __HAVE_ARCH_CMPXCHG + +/** + * rcuref_inc - increment refcount for object. + * @rcuref: reference counter in the object in question. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference + * in a lock-free reader-side critical section. + */ +static inline void rcuref_inc(atomic_t *rcuref) +{ + atomic_inc(rcuref); +} + +/** + * rcuref_dec - decrement refcount for object. + * @rcuref: reference counter in the object in question. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference + * in a lock-free reader-side critical section. + */ +static inline void rcuref_dec(atomic_t *rcuref) +{ + atomic_dec(rcuref); +} + +/** + * rcuref_dec_and_test - decrement refcount for object and test + * @rcuref: reference counter in the object. + * @release: pointer to the function that will clean up the object + * when the last reference to the object is released. + * This pointer is required. + * + * Decrement the refcount, and if 0, return 1. Else return 0. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference + * in a lock-free reader-side critical section. + */ +static inline int rcuref_dec_and_test(atomic_t *rcuref) +{ + return atomic_dec_and_test(rcuref); +} + +/* + * cmpxchg is needed on UP too, if deletions to the list/array can happen + * in interrupt context. + */ + +/** + * rcuref_inc_lf - Take reference to an object in a read-side + * critical section protected by RCU. + * @rcuref: reference counter in the object in question. + * + * Try and increment the refcount by 1. The increment might fail if + * the reference counter has been through a 1 to 0 transition and + * is no longer part of the lock-free list. + * Returns non-zero on successful increment and zero otherwise. + */ +static inline int rcuref_inc_lf(atomic_t *rcuref) +{ + int c, old; + c = atomic_read(rcuref); + while (c && (old = cmpxchg(&rcuref->counter, c, c + 1)) != c) + c = old; + return c; +} + +#else /* !__HAVE_ARCH_CMPXCHG */ + +extern spinlock_t __rcuref_hash[]; + +/* + * Use a hash table of locks to protect the reference count + * since cmpxchg is not available in this arch. + */ +#ifdef CONFIG_SMP +#define RCUREF_HASH_SIZE 4 +#define RCUREF_HASH(k) \ + (&__rcuref_hash[(((unsigned long)k)>>8) & (RCUREF_HASH_SIZE-1)]) +#else +#define RCUREF_HASH_SIZE 1 +#define RCUREF_HASH(k) &__rcuref_hash[0] +#endif /* CONFIG_SMP */ + +/** + * rcuref_inc - increment refcount for object. + * @rcuref: reference counter in the object in question. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference in a lock-free + * reader-side critical section. + */ +static inline void rcuref_inc(atomic_t *rcuref) +{ + unsigned long flags; + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); + rcuref->counter += 1; + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); +} + +/** + * rcuref_dec - decrement refcount for object. + * @rcuref: reference counter in the object in question. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference in a lock-free + * reader-side critical section. + */ +static inline void rcuref_dec(atomic_t *rcuref) +{ + unsigned long flags; + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); + rcuref->counter -= 1; + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); +} + +/** + * rcuref_dec_and_test - decrement refcount for object and test + * @rcuref: reference counter in the object. + * @release: pointer to the function that will clean up the object + * when the last reference to the object is released. + * This pointer is required. + * + * Decrement the refcount, and if 0, return 1. Else return 0. + * + * This should be used only for objects where we use RCU and + * use the rcuref_inc_lf() api to acquire a reference in a lock-free + * reader-side critical section. + */ +static inline int rcuref_dec_and_test(atomic_t *rcuref) +{ + unsigned long flags; + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); + rcuref->counter--; + if (!rcuref->counter) { + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); + return 1; + } else { + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); + return 0; + } +} + +/** + * rcuref_inc_lf - Take reference to an object of a lock-free collection + * by traversing a lock-free list/array. + * @rcuref: reference counter in the object in question. + * + * Try and increment the refcount by 1. The increment might fail if + * the reference counter has been through a 1 to 0 transition and + * object is no longer part of the lock-free list. + * Returns non-zero on successful increment and zero otherwise. + */ +static inline int rcuref_inc_lf(atomic_t *rcuref) +{ + int ret; + unsigned long flags; + spin_lock_irqsave(RCUREF_HASH(rcuref), flags); + if (rcuref->counter) + ret = rcuref->counter++; + else + ret = 0; + spin_unlock_irqrestore(RCUREF_HASH(rcuref), flags); + return ret; +} + + +#endif /* !__HAVE_ARCH_CMPXCHG */ + +#endif /* __KERNEL__ */ +#endif /* _RCUREF_H_ */ diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index f436993bd590..bef3b6901b76 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -45,6 +45,7 @@ #include #include #include +#include #include /* Definition for rcupdate control block. */ @@ -72,6 +73,19 @@ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L }; static DEFINE_PER_CPU(struct tasklet_struct, rcu_tasklet) = {NULL}; static int maxbatch = 10; +#ifndef __HAVE_ARCH_CMPXCHG +/* + * We use an array of spinlocks for the rcurefs -- similar to ones in sparc + * 32 bit atomic_t implementations, and a hash function similar to that + * for our refcounting needs. + * Can't help multiprocessors which donot have cmpxchg :( + */ + +spinlock_t __rcuref_hash[RCUREF_HASH_SIZE] = { + [0 ... (RCUREF_HASH_SIZE-1)] = SPIN_LOCK_UNLOCKED +}; +#endif + /** * call_rcu - Queue an RCU callback for invocation after a grace period. * @head: structure to be used for queueing the RCU updates. -- cgit v1.2.3 From badf16621c1f9d1ac753be056fce11b43d6e0be5 Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:10 -0700 Subject: [PATCH] files: break up files struct In order for the RCU to work, the file table array, sets and their sizes must be updated atomically. Instead of ensuring this through too many memory barriers, we put the arrays and their sizes in a separate structure. This patch takes the first step of putting the file table elements in a separate structure fdtable that is embedded withing files_struct. It also changes all the users to refer to the file table using files_fdtable() macro. Subsequent applciation of RCU becomes easier after this. Signed-off-by: Dipankar Sarma Signed-Off-By: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/osf_sys.c | 4 ++- arch/ia64/kernel/perfmon.c | 7 ++-- arch/sparc64/solaris/ioctl.c | 8 +++-- drivers/char/tty_io.c | 4 ++- fs/exec.c | 8 +++-- fs/fcntl.c | 47 +++++++++++++++---------- fs/file.c | 42 ++++++++++++++--------- fs/locks.c | 8 +++-- fs/open.c | 41 +++++++++++++--------- fs/proc/array.c | 5 ++- fs/proc/base.c | 4 ++- fs/select.c | 12 +++++-- include/linux/file.h | 23 ++++++++----- include/linux/init_task.h | 13 ++++--- kernel/exit.c | 21 +++++++----- kernel/fork.c | 82 +++++++++++++++++++++++++++----------------- security/selinux/hooks.c | 6 ++-- 17 files changed, 211 insertions(+), 124 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 167fd89f8707..2b034182a0ca 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -974,6 +974,7 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, size_t size; long timeout; int ret = -EINVAL; + struct fdtable *fdt; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -995,7 +996,8 @@ osf_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, } } - if (n < 0 || n > current->files->max_fdset) + fdt = files_fdtable(current->files); + if (n < 0 || n > fdt->max_fdset) goto out_nofds; /* diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f1201ac8a116..4ad97b3b39dc 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -2217,15 +2218,17 @@ static void pfm_free_fd(int fd, struct file *file) { struct files_struct *files = current->files; + struct fdtable *fdt = files_fdtable(files); /* * there ie no fd_uninstall(), so we do it here */ spin_lock(&files->file_lock); - files->fd[fd] = NULL; + rcu_assign_pointer(fdt->fd[fd], NULL); spin_unlock(&files->file_lock); - if (file) put_filp(file); + if (file) + put_filp(file); put_unused_fd(fd); } diff --git a/arch/sparc64/solaris/ioctl.c b/arch/sparc64/solaris/ioctl.c index cac0a1cf0050..374766455f5e 100644 --- a/arch/sparc64/solaris/ioctl.c +++ b/arch/sparc64/solaris/ioctl.c @@ -293,11 +293,13 @@ static struct module_info { static inline int solaris_sockmod(unsigned int fd, unsigned int cmd, u32 arg) { struct inode *ino; + struct fdtable *fdt; /* I wonder which of these tests are superfluous... --patrik */ spin_lock(¤t->files->file_lock); - if (! current->files->fd[fd] || - ! current->files->fd[fd]->f_dentry || - ! (ino = current->files->fd[fd]->f_dentry->d_inode) || + fdt = files_fdtable(current->files); + if (! fdt->fd[fd] || + ! fdt->fd[fd]->f_dentry || + ! (ino = fdt->fd[fd]->f_dentry->d_inode) || ! S_ISSOCK(ino->i_mode)) { spin_unlock(¤t->files->file_lock); return TBADF; diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 6a56ae4f7725..0bfc7af68917 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2454,6 +2454,7 @@ static void __do_SAK(void *arg) int i; struct file *filp; struct tty_ldisc *disc; + struct fdtable *fdt; if (!tty) return; @@ -2480,7 +2481,8 @@ static void __do_SAK(void *arg) task_lock(p); if (p->files) { spin_lock(&p->files->file_lock); - for (i=0; i < p->files->max_fds; i++) { + fdt = files_fdtable(p->files); + for (i=0; i < fdt->max_fds; i++) { filp = fcheck_files(p->files, i); if (!filp) continue; diff --git a/fs/exec.c b/fs/exec.c index 222ab1c572d8..14dd03907ccb 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -798,6 +798,7 @@ no_thread_group: static inline void flush_old_files(struct files_struct * files) { long j = -1; + struct fdtable *fdt; spin_lock(&files->file_lock); for (;;) { @@ -805,12 +806,13 @@ static inline void flush_old_files(struct files_struct * files) j++; i = j * __NFDBITS; - if (i >= files->max_fds || i >= files->max_fdset) + fdt = files_fdtable(files); + if (i >= fdt->max_fds || i >= fdt->max_fdset) break; - set = files->close_on_exec->fds_bits[j]; + set = fdt->close_on_exec->fds_bits[j]; if (!set) continue; - files->close_on_exec->fds_bits[j] = 0; + fdt->close_on_exec->fds_bits[j] = 0; spin_unlock(&files->file_lock); for ( ; set ; i++,set >>= 1) { if (set & 1) { diff --git a/fs/fcntl.c b/fs/fcntl.c index 6fbc9d8fcc36..bfecc6238083 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -24,20 +24,24 @@ void fastcall set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); + fdt = files_fdtable(files); if (flag) - FD_SET(fd, files->close_on_exec); + FD_SET(fd, fdt->close_on_exec); else - FD_CLR(fd, files->close_on_exec); + FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); } static inline int get_close_on_exec(unsigned int fd) { struct files_struct *files = current->files; + struct fdtable *fdt; int res; spin_lock(&files->file_lock); - res = FD_ISSET(fd, files->close_on_exec); + fdt = files_fdtable(files); + res = FD_ISSET(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); return res; } @@ -54,24 +58,26 @@ static int locate_fd(struct files_struct *files, unsigned int newfd; unsigned int start; int error; + struct fdtable *fdt; error = -EINVAL; if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; + fdt = files_fdtable(files); repeat: /* * Someone might have closed fd's in the range - * orig_start..files->next_fd + * orig_start..fdt->next_fd */ start = orig_start; - if (start < files->next_fd) - start = files->next_fd; + if (start < fdt->next_fd) + start = fdt->next_fd; newfd = start; - if (start < files->max_fdset) { - newfd = find_next_zero_bit(files->open_fds->fds_bits, - files->max_fdset, start); + if (start < fdt->max_fdset) { + newfd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fdset, start); } error = -EMFILE; @@ -89,8 +95,8 @@ repeat: if (error) goto repeat; - if (start <= files->next_fd) - files->next_fd = newfd + 1; + if (start <= fdt->next_fd) + fdt->next_fd = newfd + 1; error = newfd; @@ -101,13 +107,16 @@ out: static int dupfd(struct file *file, unsigned int start) { struct files_struct * files = current->files; + struct fdtable *fdt; int fd; spin_lock(&files->file_lock); fd = locate_fd(files, file, start); if (fd >= 0) { - FD_SET(fd, files->open_fds); - FD_CLR(fd, files->close_on_exec); + /* locate_fd() may have expanded fdtable, load the ptr */ + fdt = files_fdtable(files); + FD_SET(fd, fdt->open_fds); + FD_CLR(fd, fdt->close_on_exec); spin_unlock(&files->file_lock); fd_install(fd, file); } else { @@ -123,6 +132,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) @@ -148,13 +158,14 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) /* Yes. It's a race. In user space. Nothing sane to do */ err = -EBUSY; - tofree = files->fd[newfd]; - if (!tofree && FD_ISSET(newfd, files->open_fds)) + fdt = files_fdtable(files); + tofree = fdt->fd[newfd]; + if (!tofree && FD_ISSET(newfd, fdt->open_fds)) goto out_fput; - files->fd[newfd] = file; - FD_SET(newfd, files->open_fds); - FD_CLR(newfd, files->close_on_exec); + fdt->fd[newfd] = file; + FD_SET(newfd, fdt->open_fds); + FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); if (tofree) diff --git a/fs/file.c b/fs/file.c index 92b5f25985d2..f5926ce73f37 100644 --- a/fs/file.c +++ b/fs/file.c @@ -59,13 +59,15 @@ static int expand_fd_array(struct files_struct *files, int nr) { struct file **new_fds; int error, nfds; + struct fdtable *fdt; error = -EMFILE; - if (files->max_fds >= NR_OPEN || nr >= NR_OPEN) + fdt = files_fdtable(files); + if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) goto out; - nfds = files->max_fds; + nfds = fdt->max_fds; spin_unlock(&files->file_lock); /* @@ -95,13 +97,14 @@ static int expand_fd_array(struct files_struct *files, int nr) goto out; /* Copy the existing array and install the new pointer */ + fdt = files_fdtable(files); - if (nfds > files->max_fds) { + if (nfds > fdt->max_fds) { struct file **old_fds; int i; - old_fds = xchg(&files->fd, new_fds); - i = xchg(&files->max_fds, nfds); + old_fds = xchg(&fdt->fd, new_fds); + i = xchg(&fdt->max_fds, nfds); /* Don't copy/clear the array if we are creating a new fd array for fork() */ @@ -164,12 +167,14 @@ static int expand_fdset(struct files_struct *files, int nr) { fd_set *new_openset = NULL, *new_execset = NULL; int error, nfds = 0; + struct fdtable *fdt; error = -EMFILE; - if (files->max_fdset >= NR_OPEN || nr >= NR_OPEN) + fdt = files_fdtable(files); + if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN) goto out; - nfds = files->max_fdset; + nfds = fdt->max_fdset; spin_unlock(&files->file_lock); /* Expand to the max in easy steps */ @@ -193,24 +198,25 @@ static int expand_fdset(struct files_struct *files, int nr) error = 0; /* Copy the existing tables and install the new pointers */ - if (nfds > files->max_fdset) { - int i = files->max_fdset / (sizeof(unsigned long) * 8); - int count = (nfds - files->max_fdset) / 8; + fdt = files_fdtable(files); + if (nfds > fdt->max_fdset) { + int i = fdt->max_fdset / (sizeof(unsigned long) * 8); + int count = (nfds - fdt->max_fdset) / 8; /* * Don't copy the entire array if the current fdset is * not yet initialised. */ if (i) { - memcpy (new_openset, files->open_fds, files->max_fdset/8); - memcpy (new_execset, files->close_on_exec, files->max_fdset/8); + memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8); + memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8); memset (&new_openset->fds_bits[i], 0, count); memset (&new_execset->fds_bits[i], 0, count); } - nfds = xchg(&files->max_fdset, nfds); - new_openset = xchg(&files->open_fds, new_openset); - new_execset = xchg(&files->close_on_exec, new_execset); + nfds = xchg(&fdt->max_fdset, nfds); + new_openset = xchg(&fdt->open_fds, new_openset); + new_execset = xchg(&fdt->close_on_exec, new_execset); spin_unlock(&files->file_lock); free_fdset (new_openset, nfds); free_fdset (new_execset, nfds); @@ -237,13 +243,15 @@ out: int expand_files(struct files_struct *files, int nr) { int err, expand = 0; + struct fdtable *fdt; - if (nr >= files->max_fdset) { + fdt = files_fdtable(files); + if (nr >= fdt->max_fdset) { expand = 1; if ((err = expand_fdset(files, nr))) goto out; } - if (nr >= files->max_fds) { + if (nr >= fdt->max_fds) { expand = 1; if ((err = expand_fd_array(files, nr))) goto out; diff --git a/fs/locks.c b/fs/locks.c index 11956b6179ff..c2c09b4798d6 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2198,21 +2198,23 @@ void steal_locks(fl_owner_t from) { struct files_struct *files = current->files; int i, j; + struct fdtable *fdt; if (from == files) return; lock_kernel(); j = 0; + fdt = files_fdtable(files); for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= files->max_fdset || i >= files->max_fds) + if (i >= fdt->max_fdset || i >= fdt->max_fds) break; - set = files->open_fds->fds_bits[j++]; + set = fdt->open_fds->fds_bits[j++]; while (set) { if (set & 1) { - struct file *file = files->fd[i]; + struct file *file = fdt->fd[i]; if (file) __steal_locks(file, from); } diff --git a/fs/open.c b/fs/open.c index 4ee2dcc31c28..b6542516a0ca 100644 --- a/fs/open.c +++ b/fs/open.c @@ -842,14 +842,16 @@ int get_unused_fd(void) { struct files_struct * files = current->files; int fd, error; + struct fdtable *fdt; error = -EMFILE; spin_lock(&files->file_lock); repeat: - fd = find_next_zero_bit(files->open_fds->fds_bits, - files->max_fdset, - files->next_fd); + fdt = files_fdtable(files); + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fdset, + fdt->next_fd); /* * N.B. For clone tasks sharing a files structure, this test @@ -872,14 +874,14 @@ repeat: goto repeat; } - FD_SET(fd, files->open_fds); - FD_CLR(fd, files->close_on_exec); - files->next_fd = fd + 1; + FD_SET(fd, fdt->open_fds); + FD_CLR(fd, fdt->close_on_exec); + fdt->next_fd = fd + 1; #if 1 /* Sanity check */ - if (files->fd[fd] != NULL) { + if (fdt->fd[fd] != NULL) { printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); - files->fd[fd] = NULL; + fdt->fd[fd] = NULL; } #endif error = fd; @@ -893,9 +895,10 @@ EXPORT_SYMBOL(get_unused_fd); static inline void __put_unused_fd(struct files_struct *files, unsigned int fd) { - __FD_CLR(fd, files->open_fds); - if (fd < files->next_fd) - files->next_fd = fd; + struct fdtable *fdt = files_fdtable(files); + __FD_CLR(fd, fdt->open_fds); + if (fd < fdt->next_fd) + fdt->next_fd = fd; } void fastcall put_unused_fd(unsigned int fd) @@ -924,10 +927,12 @@ EXPORT_SYMBOL(put_unused_fd); void fastcall fd_install(unsigned int fd, struct file * file) { struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); - if (unlikely(files->fd[fd] != NULL)) + fdt = files_fdtable(files); + if (unlikely(fdt->fd[fd] != NULL)) BUG(); - files->fd[fd] = file; + fdt->fd[fd] = file; spin_unlock(&files->file_lock); } @@ -1010,15 +1015,17 @@ asmlinkage long sys_close(unsigned int fd) { struct file * filp; struct files_struct *files = current->files; + struct fdtable *fdt; spin_lock(&files->file_lock); - if (fd >= files->max_fds) + fdt = files_fdtable(files); + if (fd >= fdt->max_fds) goto out_unlock; - filp = files->fd[fd]; + filp = fdt->fd[fd]; if (!filp) goto out_unlock; - files->fd[fd] = NULL; - FD_CLR(fd, files->close_on_exec); + fdt->fd[fd] = NULL; + FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); return filp_close(filp, files); diff --git a/fs/proc/array.c b/fs/proc/array.c index 37668fe998ad..d88d518d30f6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline char * task_state(struct task_struct *p, char *buffer) { struct group_info *group_info; int g; + struct fdtable *fdt = NULL; read_lock(&tasklist_lock); buffer += sprintf(buffer, @@ -179,10 +180,12 @@ static inline char * task_state(struct task_struct *p, char *buffer) p->gid, p->egid, p->sgid, p->fsgid); read_unlock(&tasklist_lock); task_lock(p); + if (p->files) + fdt = files_fdtable(p->files); buffer += sprintf(buffer, "FDSize:\t%d\n" "Groups:\t", - p->files ? p->files->max_fds : 0); + fdt ? fdt->max_fds : 0); group_info = p->group_info; get_group_info(group_info); diff --git a/fs/proc/base.c b/fs/proc/base.c index 84751f3f52d5..d0087a0b024b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1039,6 +1039,7 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) int retval; char buf[NUMBUF]; struct files_struct * files; + struct fdtable *fdt; retval = -ENOENT; if (!pid_alive(p)) @@ -1062,8 +1063,9 @@ static int proc_readfd(struct file * filp, void * dirent, filldir_t filldir) if (!files) goto out; spin_lock(&files->file_lock); + fdt = files_fdtable(files); for (fd = filp->f_pos-2; - fd < files->max_fds; + fd < fdt->max_fds; fd++, filp->f_pos++) { unsigned int i,j; diff --git a/fs/select.c b/fs/select.c index b80e7eb0ac0d..2e56325c73c4 100644 --- a/fs/select.c +++ b/fs/select.c @@ -132,11 +132,13 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds) unsigned long *open_fds; unsigned long set; int max; + struct fdtable *fdt; /* handle last in-complete long-word first */ set = ~(~0UL << (n & (__NFDBITS-1))); n /= __NFDBITS; - open_fds = current->files->open_fds->fds_bits+n; + fdt = files_fdtable(current->files); + open_fds = fdt->open_fds->fds_bits+n; max = 0; if (set) { set &= BITS(fds, n); @@ -299,6 +301,7 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s char *bits; long timeout; int ret, size, max_fdset; + struct fdtable *fdt; timeout = MAX_SCHEDULE_TIMEOUT; if (tvp) { @@ -326,7 +329,8 @@ sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, s goto out_nofds; /* max_fdset can increase, so grab it once to avoid race */ - max_fdset = current->files->max_fdset; + fdt = files_fdtable(current->files); + max_fdset = fdt->max_fdset; if (n > max_fdset) n = max_fdset; @@ -464,9 +468,11 @@ asmlinkage long sys_poll(struct pollfd __user * ufds, unsigned int nfds, long ti unsigned int i; struct poll_list *head; struct poll_list *walk; + struct fdtable *fdt; /* Do a sanity check on nfds ... */ - if (nfds > current->files->max_fdset && nfds > OPEN_MAX) + fdt = files_fdtable(current->files); + if (nfds > fdt->max_fdset && nfds > OPEN_MAX) return -EINVAL; if (timeout) { diff --git a/include/linux/file.h b/include/linux/file.h index 5206beb9a80e..db372230848e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -16,23 +16,29 @@ */ #define NR_OPEN_DEFAULT BITS_PER_LONG +struct fdtable { + unsigned int max_fds; + int max_fdset; + int next_fd; + struct file ** fd; /* current fd array */ + fd_set *close_on_exec; + fd_set *open_fds; +}; + /* * Open file table structure */ struct files_struct { atomic_t count; spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ - int max_fds; - int max_fdset; - int next_fd; - struct file ** fd; /* current fd array */ - fd_set *close_on_exec; - fd_set *open_fds; + struct fdtable fdtab; fd_set close_on_exec_init; fd_set open_fds_init; struct file * fd_array[NR_OPEN_DEFAULT]; }; +#define files_fdtable(files) (&(files)->fdtab) + extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); @@ -63,9 +69,10 @@ extern int expand_files(struct files_struct *, int nr); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { struct file * file = NULL; + struct fdtable *fdt = files_fdtable(files); - if (fd < files->max_fds) - file = files->fd[fd]; + if (fd < fdt->max_fds) + file = fdt->fd[fd]; return file; } diff --git a/include/linux/init_task.h b/include/linux/init_task.h index c727c195a91a..94aefa54a1b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -3,16 +3,21 @@ #include -#define INIT_FILES \ -{ \ - .count = ATOMIC_INIT(1), \ - .file_lock = SPIN_LOCK_UNLOCKED, \ +#define INIT_FDTABLE \ +{ \ .max_fds = NR_OPEN_DEFAULT, \ .max_fdset = __FD_SETSIZE, \ .next_fd = 0, \ .fd = &init_files.fd_array[0], \ .close_on_exec = &init_files.close_on_exec_init, \ .open_fds = &init_files.open_fds_init, \ +} + +#define INIT_FILES \ +{ \ + .count = ATOMIC_INIT(1), \ + .file_lock = SPIN_LOCK_UNLOCKED, \ + .fdtab = INIT_FDTABLE, \ .close_on_exec_init = { { 0, } }, \ .open_fds_init = { { 0, } }, \ .fd_array = { NULL, } \ diff --git a/kernel/exit.c b/kernel/exit.c index 5b0fb9f09f21..83beb1e93b18 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -368,17 +368,19 @@ EXPORT_SYMBOL(daemonize); static inline void close_files(struct files_struct * files) { int i, j; + struct fdtable *fdt; j = 0; + fdt = files_fdtable(files); for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= files->max_fdset || i >= files->max_fds) + if (i >= fdt->max_fdset || i >= fdt->max_fds) break; - set = files->open_fds->fds_bits[j++]; + set = fdt->open_fds->fds_bits[j++]; while (set) { if (set & 1) { - struct file * file = xchg(&files->fd[i], NULL); + struct file * file = xchg(&fdt->fd[i], NULL); if (file) filp_close(file, files); } @@ -403,16 +405,19 @@ struct files_struct *get_files_struct(struct task_struct *task) void fastcall put_files_struct(struct files_struct *files) { + struct fdtable *fdt; + if (atomic_dec_and_test(&files->count)) { close_files(files); /* * Free the fd and fdset arrays if we expanded them. */ - if (files->fd != &files->fd_array[0]) - free_fd_array(files->fd, files->max_fds); - if (files->max_fdset > __FD_SETSIZE) { - free_fdset(files->open_fds, files->max_fdset); - free_fdset(files->close_on_exec, files->max_fdset); + fdt = files_fdtable(files); + if (fdt->fd != &files->fd_array[0]) + free_fd_array(fdt->fd, fdt->max_fds); + if (fdt->max_fdset > __FD_SETSIZE) { + free_fdset(fdt->open_fds, fdt->max_fdset); + free_fdset(fdt->close_on_exec, fdt->max_fdset); } kmem_cache_free(files_cachep, files); } diff --git a/kernel/fork.c b/kernel/fork.c index b25802065031..ecc694debb50 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -568,21 +568,47 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) static int count_open_files(struct files_struct *files, int size) { int i; + struct fdtable *fdt; /* Find the last open fd */ + fdt = files_fdtable(files); for (i = size/(8*sizeof(long)); i > 0; ) { - if (files->open_fds->fds_bits[--i]) + if (fdt->open_fds->fds_bits[--i]) break; } i = (i+1) * 8 * sizeof(long); return i; } +static struct files_struct *alloc_files(void) +{ + struct files_struct *newf; + struct fdtable *fdt; + + newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); + if (!newf) + goto out; + + atomic_set(&newf->count, 1); + + spin_lock_init(&newf->file_lock); + fdt = files_fdtable(newf); + fdt->next_fd = 0; + fdt->max_fds = NR_OPEN_DEFAULT; + fdt->max_fdset = __FD_SETSIZE; + fdt->close_on_exec = &newf->close_on_exec_init; + fdt->open_fds = &newf->open_fds_init; + fdt->fd = &newf->fd_array[0]; +out: + return newf; +} + static int copy_files(unsigned long clone_flags, struct task_struct * tsk) { struct files_struct *oldf, *newf; struct file **old_fds, **new_fds; int open_files, size, i, error = 0, expand; + struct fdtable *old_fdt, *new_fdt; /* * A background process may not have any files ... @@ -603,35 +629,27 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) */ tsk->files = NULL; error = -ENOMEM; - newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL); - if (!newf) + newf = alloc_files(); + if (!newf) goto out; - atomic_set(&newf->count, 1); - - spin_lock_init(&newf->file_lock); - newf->next_fd = 0; - newf->max_fds = NR_OPEN_DEFAULT; - newf->max_fdset = __FD_SETSIZE; - newf->close_on_exec = &newf->close_on_exec_init; - newf->open_fds = &newf->open_fds_init; - newf->fd = &newf->fd_array[0]; - spin_lock(&oldf->file_lock); - - open_files = count_open_files(oldf, oldf->max_fdset); + old_fdt = files_fdtable(oldf); + new_fdt = files_fdtable(newf); + size = old_fdt->max_fdset; + open_files = count_open_files(oldf, old_fdt->max_fdset); expand = 0; /* * Check whether we need to allocate a larger fd array or fd set. * Note: we're not a clone task, so the open count won't change. */ - if (open_files > newf->max_fdset) { - newf->max_fdset = 0; + if (open_files > new_fdt->max_fdset) { + new_fdt->max_fdset = 0; expand = 1; } - if (open_files > newf->max_fds) { - newf->max_fds = 0; + if (open_files > new_fdt->max_fds) { + new_fdt->max_fds = 0; expand = 1; } @@ -646,11 +664,11 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) spin_lock(&oldf->file_lock); } - old_fds = oldf->fd; - new_fds = newf->fd; + old_fds = old_fdt->fd; + new_fds = new_fdt->fd; - memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8); - memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8); + memcpy(new_fdt->open_fds->fds_bits, old_fdt->open_fds->fds_bits, open_files/8); + memcpy(new_fdt->close_on_exec->fds_bits, old_fdt->close_on_exec->fds_bits, open_files/8); for (i = open_files; i != 0; i--) { struct file *f = *old_fds++; @@ -663,24 +681,24 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) * is partway through open(). So make sure that this * fd is available to the new process. */ - FD_CLR(open_files - i, newf->open_fds); + FD_CLR(open_files - i, new_fdt->open_fds); } *new_fds++ = f; } spin_unlock(&oldf->file_lock); /* compute the remainder to be cleared */ - size = (newf->max_fds - open_files) * sizeof(struct file *); + size = (new_fdt->max_fds - open_files) * sizeof(struct file *); /* This is long word aligned thus could use a optimized version */ memset(new_fds, 0, size); - if (newf->max_fdset > open_files) { - int left = (newf->max_fdset-open_files)/8; + if (new_fdt->max_fdset > open_files) { + int left = (new_fdt->max_fdset-open_files)/8; int start = open_files / (8 * sizeof(unsigned long)); - memset(&newf->open_fds->fds_bits[start], 0, left); - memset(&newf->close_on_exec->fds_bits[start], 0, left); + memset(&new_fdt->open_fds->fds_bits[start], 0, left); + memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } tsk->files = newf; @@ -689,9 +707,9 @@ out: return error; out_release: - free_fdset (newf->close_on_exec, newf->max_fdset); - free_fdset (newf->open_fds, newf->max_fdset); - free_fd_array(newf->fd, newf->max_fds); + free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); + free_fdset (new_fdt->open_fds, new_fdt->max_fdset); + free_fd_array(new_fdt->fd, new_fdt->max_fds); kmem_cache_free(files_cachep, newf); goto out; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 3f0b533be92c..acb5a495a902 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1594,6 +1594,7 @@ static inline void flush_unauthorized_files(struct files_struct * files) struct avc_audit_data ad; struct file *file, *devnull = NULL; struct tty_struct *tty = current->signal->tty; + struct fdtable *fdt; long j = -1; if (tty) { @@ -1627,9 +1628,10 @@ static inline void flush_unauthorized_files(struct files_struct * files) j++; i = j * __NFDBITS; - if (i >= files->max_fds || i >= files->max_fdset) + fdt = files_fdtable(files); + if (i >= fdt->max_fds || i >= fdt->max_fdset) break; - set = files->open_fds->fds_bits[j]; + set = fdt->open_fds->fds_bits[j]; if (!set) continue; spin_unlock(&files->file_lock); -- cgit v1.2.3 From ab2af1f5005069321c5d130f09cce577b03f43ef Mon Sep 17 00:00:00 2001 From: Dipankar Sarma Date: Fri, 9 Sep 2005 13:04:13 -0700 Subject: [PATCH] files: files struct with RCU Patch to eliminate struct files_struct.file_lock spinlock on the reader side and use rcu refcounting rcuref_xxx api for the f_count refcounter. The updates to the fdtable are done by allocating a new fdtable structure and setting files->fdt to point to the new structure. The fdtable structure is protected by RCU thereby allowing lock-free lookup. For fd arrays/sets that are vmalloced, we use keventd to free them since RCU callbacks can't sleep. A global list of fdtable to be freed is not scalable, so we use a per-cpu list. If keventd is already handling the current cpu's work, we use a timer to defer queueing of that work. Since the last publication, this patch has been re-written to avoid using explicit memory barriers and use rcu_assign_pointer(), rcu_dereference() premitives instead. This required that the fd information is kept in a separate structure (fdtable) and updated atomically. Signed-off-by: Dipankar Sarma Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/aio.c | 3 +- fs/fcntl.c | 13 +- fs/file.c | 389 ++++++++++++++++++++++++++++++---------------- fs/file_table.c | 40 +++-- fs/open.c | 8 +- include/linux/file.h | 11 +- include/linux/fs.h | 4 +- include/linux/init_task.h | 5 + kernel/exit.c | 15 +- kernel/fork.c | 23 ++- 10 files changed, 345 insertions(+), 166 deletions(-) (limited to 'include/linux') diff --git a/fs/aio.c b/fs/aio.c index 201c1847fa07..38f62680fd63 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -499,7 +500,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(rcuref_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/fcntl.c b/fs/fcntl.c index bfecc6238083..d2f3ed8acd93 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -64,8 +65,8 @@ static int locate_fd(struct files_struct *files, if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) goto out; - fdt = files_fdtable(files); repeat: + fdt = files_fdtable(files); /* * Someone might have closed fd's in the range * orig_start..fdt->next_fd @@ -95,9 +96,15 @@ repeat: if (error) goto repeat; + /* + * We reacquired files_lock, so we are safe as long as + * we reacquire the fdtable pointer and use it while holding + * the lock, no one can free it during that time. + */ + fdt = files_fdtable(files); if (start <= fdt->next_fd) fdt->next_fd = newfd + 1; - + error = newfd; out: @@ -163,7 +170,7 @@ asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) if (!tofree && FD_ISSET(newfd, fdt->open_fds)) goto out_fput; - fdt->fd[newfd] = file; + rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); FD_CLR(newfd, fdt->close_on_exec); spin_unlock(&files->file_lock); diff --git a/fs/file.c b/fs/file.c index f5926ce73f37..2127a7b9dc3a 100644 --- a/fs/file.c +++ b/fs/file.c @@ -13,6 +13,25 @@ #include #include #include +#include +#include +#include +#include + +struct fdtable_defer { + spinlock_t lock; + struct work_struct wq; + struct timer_list timer; + struct fdtable *next; +}; + +/* + * We use this list to defer free fdtables that have vmalloced + * sets/arrays. By keeping a per-cpu list, we avoid having to embed + * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in + * this per-task structure. + */ +static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); /* @@ -48,85 +67,143 @@ void free_fd_array(struct file **array, int num) vfree(array); } -/* - * Expand the fd array in the files_struct. Called with the files - * spinlock held for write. - */ - -static int expand_fd_array(struct files_struct *files, int nr) - __releases(files->file_lock) - __acquires(files->file_lock) +static void __free_fdtable(struct fdtable *fdt) { - struct file **new_fds; - int error, nfds; - struct fdtable *fdt; + int fdset_size, fdarray_size; - - error = -EMFILE; - fdt = files_fdtable(files); - if (fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) - goto out; + fdset_size = fdt->max_fdset / 8; + fdarray_size = fdt->max_fds * sizeof(struct file *); + free_fdset(fdt->open_fds, fdset_size); + free_fdset(fdt->close_on_exec, fdset_size); + free_fd_array(fdt->fd, fdarray_size); + kfree(fdt); +} - nfds = fdt->max_fds; - spin_unlock(&files->file_lock); +static void fdtable_timer(unsigned long data) +{ + struct fdtable_defer *fddef = (struct fdtable_defer *)data; - /* - * Expand to the max in easy steps, and keep expanding it until - * we have enough for the requested fd array size. + spin_lock(&fddef->lock); + /* + * If someone already emptied the queue return. */ + if (!fddef->next) + goto out; + if (!schedule_work(&fddef->wq)) + mod_timer(&fddef->timer, 5); +out: + spin_unlock(&fddef->lock); +} - do { -#if NR_OPEN_DEFAULT < 256 - if (nfds < 256) - nfds = 256; - else -#endif - if (nfds < (PAGE_SIZE / sizeof(struct file *))) - nfds = PAGE_SIZE / sizeof(struct file *); - else { - nfds = nfds * 2; - if (nfds > NR_OPEN) - nfds = NR_OPEN; - } - } while (nfds <= nr); +static void free_fdtable_work(struct fdtable_defer *f) +{ + struct fdtable *fdt; - error = -ENOMEM; - new_fds = alloc_fd_array(nfds); - spin_lock(&files->file_lock); - if (!new_fds) - goto out; + spin_lock_bh(&f->lock); + fdt = f->next; + f->next = NULL; + spin_unlock_bh(&f->lock); + while(fdt) { + struct fdtable *next = fdt->next; + __free_fdtable(fdt); + fdt = next; + } +} - /* Copy the existing array and install the new pointer */ - fdt = files_fdtable(files); +static void free_fdtable_rcu(struct rcu_head *rcu) +{ + struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); + int fdset_size, fdarray_size; + struct fdtable_defer *fddef; - if (nfds > fdt->max_fds) { - struct file **old_fds; - int i; - - old_fds = xchg(&fdt->fd, new_fds); - i = xchg(&fdt->max_fds, nfds); - - /* Don't copy/clear the array if we are creating a new - fd array for fork() */ - if (i) { - memcpy(new_fds, old_fds, i * sizeof(struct file *)); - /* clear the remainder of the array */ - memset(&new_fds[i], 0, - (nfds-i) * sizeof(struct file *)); - - spin_unlock(&files->file_lock); - free_fd_array(old_fds, i); - spin_lock(&files->file_lock); - } + BUG_ON(!fdt); + fdset_size = fdt->max_fdset / 8; + fdarray_size = fdt->max_fds * sizeof(struct file *); + + if (fdt->free_files) { + /* + * The this fdtable was embedded in the files structure + * and the files structure itself was getting destroyed. + * It is now safe to free the files structure. + */ + kmem_cache_free(files_cachep, fdt->free_files); + return; + } + if (fdt->max_fdset <= __FD_SETSIZE && fdt->max_fds <= NR_OPEN_DEFAULT) { + /* + * The fdtable was embedded + */ + return; + } + if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { + kfree(fdt->open_fds); + kfree(fdt->close_on_exec); + kfree(fdt->fd); + kfree(fdt); } else { - /* Somebody expanded the array while we slept ... */ - spin_unlock(&files->file_lock); - free_fd_array(new_fds, nfds); - spin_lock(&files->file_lock); + fddef = &get_cpu_var(fdtable_defer_list); + spin_lock(&fddef->lock); + fdt->next = fddef->next; + fddef->next = fdt; + /* + * vmallocs are handled from the workqueue context. + * If the per-cpu workqueue is running, then we + * defer work scheduling through a timer. + */ + if (!schedule_work(&fddef->wq)) + mod_timer(&fddef->timer, 5); + spin_unlock(&fddef->lock); + put_cpu_var(fdtable_defer_list); } - error = 0; -out: - return error; +} + +void free_fdtable(struct fdtable *fdt) +{ + if (fdt->free_files || fdt->max_fdset > __FD_SETSIZE || + fdt->max_fds > NR_OPEN_DEFAULT) + call_rcu(&fdt->rcu, free_fdtable_rcu); +} + +/* + * Expand the fdset in the files_struct. Called with the files spinlock + * held for write. + */ +static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) +{ + int i; + int count; + + BUG_ON(nfdt->max_fdset < fdt->max_fdset); + BUG_ON(nfdt->max_fds < fdt->max_fds); + /* Copy the existing tables and install the new pointers */ + + i = fdt->max_fdset / (sizeof(unsigned long) * 8); + count = (nfdt->max_fdset - fdt->max_fdset) / 8; + + /* + * Don't copy the entire array if the current fdset is + * not yet initialised. + */ + if (i) { + memcpy (nfdt->open_fds, fdt->open_fds, + fdt->max_fdset/8); + memcpy (nfdt->close_on_exec, fdt->close_on_exec, + fdt->max_fdset/8); + memset (&nfdt->open_fds->fds_bits[i], 0, count); + memset (&nfdt->close_on_exec->fds_bits[i], 0, count); + } + + /* Don't copy/clear the array if we are creating a new + fd array for fork() */ + if (fdt->max_fds) { + memcpy(nfdt->fd, fdt->fd, + fdt->max_fds * sizeof(struct file *)); + /* clear the remainder of the array */ + memset(&nfdt->fd[fdt->max_fds], 0, + (nfdt->max_fds - fdt->max_fds) * + sizeof(struct file *)); + } + nfdt->next_fd = fdt->next_fd; } /* @@ -157,28 +234,21 @@ void free_fdset(fd_set *array, int num) vfree(array); } -/* - * Expand the fdset in the files_struct. Called with the files spinlock - * held for write. - */ -static int expand_fdset(struct files_struct *files, int nr) - __releases(file->file_lock) - __acquires(file->file_lock) +static struct fdtable *alloc_fdtable(int nr) { - fd_set *new_openset = NULL, *new_execset = NULL; - int error, nfds = 0; - struct fdtable *fdt; - - error = -EMFILE; - fdt = files_fdtable(files); - if (fdt->max_fdset >= NR_OPEN || nr >= NR_OPEN) - goto out; + struct fdtable *fdt = NULL; + int nfds = 0; + fd_set *new_openset = NULL, *new_execset = NULL; + struct file **new_fds; - nfds = fdt->max_fdset; - spin_unlock(&files->file_lock); + fdt = kmalloc(sizeof(*fdt), GFP_KERNEL); + if (!fdt) + goto out; + memset(fdt, 0, sizeof(*fdt)); - /* Expand to the max in easy steps */ - do { + nfds = __FD_SETSIZE; + /* Expand to the max in easy steps */ + do { if (nfds < (PAGE_SIZE * 8)) nfds = PAGE_SIZE * 8; else { @@ -188,50 +258,88 @@ static int expand_fdset(struct files_struct *files, int nr) } } while (nfds <= nr); - error = -ENOMEM; - new_openset = alloc_fdset(nfds); - new_execset = alloc_fdset(nfds); - spin_lock(&files->file_lock); - if (!new_openset || !new_execset) + new_openset = alloc_fdset(nfds); + new_execset = alloc_fdset(nfds); + if (!new_openset || !new_execset) + goto out; + fdt->open_fds = new_openset; + fdt->close_on_exec = new_execset; + fdt->max_fdset = nfds; + + nfds = NR_OPEN_DEFAULT; + /* + * Expand to the max in easy steps, and keep expanding it until + * we have enough for the requested fd array size. + */ + do { +#if NR_OPEN_DEFAULT < 256 + if (nfds < 256) + nfds = 256; + else +#endif + if (nfds < (PAGE_SIZE / sizeof(struct file *))) + nfds = PAGE_SIZE / sizeof(struct file *); + else { + nfds = nfds * 2; + if (nfds > NR_OPEN) + nfds = NR_OPEN; + } + } while (nfds <= nr); + new_fds = alloc_fd_array(nfds); + if (!new_fds) goto out; + fdt->fd = new_fds; + fdt->max_fds = nfds; + fdt->free_files = NULL; + return fdt; +out: + if (new_openset) + free_fdset(new_openset, nfds); + if (new_execset) + free_fdset(new_execset, nfds); + kfree(fdt); + return NULL; +} - error = 0; - - /* Copy the existing tables and install the new pointers */ +/* + * Expands the file descriptor table - it will allocate a new fdtable and + * both fd array and fdset. It is expected to be called with the + * files_lock held. + */ +static int expand_fdtable(struct files_struct *files, int nr) + __releases(files->file_lock) + __acquires(files->file_lock) +{ + int error = 0; + struct fdtable *fdt; + struct fdtable *nfdt = NULL; + + spin_unlock(&files->file_lock); + nfdt = alloc_fdtable(nr); + if (!nfdt) { + error = -ENOMEM; + spin_lock(&files->file_lock); + goto out; + } + + spin_lock(&files->file_lock); fdt = files_fdtable(files); - if (nfds > fdt->max_fdset) { - int i = fdt->max_fdset / (sizeof(unsigned long) * 8); - int count = (nfds - fdt->max_fdset) / 8; - - /* - * Don't copy the entire array if the current fdset is - * not yet initialised. - */ - if (i) { - memcpy (new_openset, fdt->open_fds, fdt->max_fdset/8); - memcpy (new_execset, fdt->close_on_exec, fdt->max_fdset/8); - memset (&new_openset->fds_bits[i], 0, count); - memset (&new_execset->fds_bits[i], 0, count); - } - - nfds = xchg(&fdt->max_fdset, nfds); - new_openset = xchg(&fdt->open_fds, new_openset); - new_execset = xchg(&fdt->close_on_exec, new_execset); + /* + * Check again since another task may have expanded the + * fd table while we dropped the lock + */ + if (nr >= fdt->max_fds || nr >= fdt->max_fdset) { + copy_fdtable(nfdt, fdt); + } else { + /* Somebody expanded while we dropped file_lock */ spin_unlock(&files->file_lock); - free_fdset (new_openset, nfds); - free_fdset (new_execset, nfds); + __free_fdtable(nfdt); spin_lock(&files->file_lock); - return 0; - } - /* Somebody expanded the array while we slept ... */ - + goto out; + } + rcu_assign_pointer(files->fdt, nfdt); + free_fdtable(fdt); out: - spin_unlock(&files->file_lock); - if (new_openset) - free_fdset(new_openset, nfds); - if (new_execset) - free_fdset(new_execset, nfds); - spin_lock(&files->file_lock); return error; } @@ -246,17 +354,36 @@ int expand_files(struct files_struct *files, int nr) struct fdtable *fdt; fdt = files_fdtable(files); - if (nr >= fdt->max_fdset) { - expand = 1; - if ((err = expand_fdset(files, nr))) + if (nr >= fdt->max_fdset || nr >= fdt->max_fds) { + if (fdt->max_fdset >= NR_OPEN || + fdt->max_fds >= NR_OPEN || nr >= NR_OPEN) { + err = -EMFILE; goto out; - } - if (nr >= fdt->max_fds) { + } expand = 1; - if ((err = expand_fd_array(files, nr))) + if ((err = expand_fdtable(files, nr))) goto out; } err = expand; out: return err; } + +static void __devinit fdtable_defer_list_init(int cpu) +{ + struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); + spin_lock_init(&fddef->lock); + INIT_WORK(&fddef->wq, (void (*)(void *))free_fdtable_work, fddef); + init_timer(&fddef->timer); + fddef->timer.data = (unsigned long)fddef; + fddef->timer.function = fdtable_timer; + fddef->next = NULL; +} + +void __init files_defer_init(void) +{ + int i; + /* Really early - can't use for_each_cpu */ + for (i = 0; i < NR_CPUS; i++) + fdtable_defer_list_init(i); +} diff --git a/fs/file_table.c b/fs/file_table.c index 43e9e1737de2..86ec8ae985b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -53,11 +54,17 @@ void filp_dtor(void * objp, struct kmem_cache_s *cachep, unsigned long dflags) spin_unlock_irqrestore(&filp_count_lock, flags); } -static inline void file_free(struct file *f) +static inline void file_free_rcu(struct rcu_head *head) { + struct file *f = container_of(head, struct file, f_rcuhead); kmem_cache_free(filp_cachep, f); } +static inline void file_free(struct file *f) +{ + call_rcu(&f->f_rcuhead, file_free_rcu); +} + /* Find an unused file structure and return a pointer to it. * Returns NULL, if there are no more free file structures or * we run out of memory. @@ -110,7 +117,7 @@ EXPORT_SYMBOL(get_empty_filp); void fastcall fput(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) + if (rcuref_dec_and_test(&file->f_count)) __fput(file); } @@ -156,11 +163,17 @@ struct file fastcall *fget(unsigned int fd) struct file *file; struct files_struct *files = current->files; - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); - if (file) - get_file(file); - spin_unlock(&files->file_lock); + if (file) { + if (!rcuref_inc_lf(&file->f_count)) { + /* File object ref couldn't be taken */ + rcu_read_unlock(); + return NULL; + } + } + rcu_read_unlock(); + return file; } @@ -182,21 +195,25 @@ struct file fastcall *fget_light(unsigned int fd, int *fput_needed) if (likely((atomic_read(&files->count) == 1))) { file = fcheck_files(files, fd); } else { - spin_lock(&files->file_lock); + rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - get_file(file); - *fput_needed = 1; + if (rcuref_inc_lf(&file->f_count)) + *fput_needed = 1; + else + /* Didn't get the reference, someone's freed */ + file = NULL; } - spin_unlock(&files->file_lock); + rcu_read_unlock(); } + return file; } void put_filp(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) { + if (rcuref_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); @@ -257,4 +274,5 @@ void __init files_init(unsigned long mempages) files_stat.max_files = n; if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; + files_defer_init(); } diff --git a/fs/open.c b/fs/open.c index b6542516a0ca..2fac58c51910 100644 --- a/fs/open.c +++ b/fs/open.c @@ -24,6 +24,7 @@ #include #include #include +#include #include @@ -930,9 +931,8 @@ void fastcall fd_install(unsigned int fd, struct file * file) struct fdtable *fdt; spin_lock(&files->file_lock); fdt = files_fdtable(files); - if (unlikely(fdt->fd[fd] != NULL)) - BUG(); - fdt->fd[fd] = file; + BUG_ON(fdt->fd[fd] != NULL); + rcu_assign_pointer(fdt->fd[fd], file); spin_unlock(&files->file_lock); } @@ -1024,7 +1024,7 @@ asmlinkage long sys_close(unsigned int fd) filp = fdt->fd[fd]; if (!filp) goto out_unlock; - fdt->fd[fd] = NULL; + rcu_assign_pointer(fdt->fd[fd], NULL); FD_CLR(fd, fdt->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); diff --git a/include/linux/file.h b/include/linux/file.h index db372230848e..f5bbd4c508b3 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -9,6 +9,7 @@ #include #include #include +#include /* * The default fd array needs to be at least BITS_PER_LONG, @@ -23,6 +24,9 @@ struct fdtable { struct file ** fd; /* current fd array */ fd_set *close_on_exec; fd_set *open_fds; + struct rcu_head rcu; + struct files_struct *free_files; + struct fdtable *next; }; /* @@ -31,13 +35,14 @@ struct fdtable { struct files_struct { atomic_t count; spinlock_t file_lock; /* Protects all the below members. Nests inside tsk->alloc_lock */ + struct fdtable *fdt; struct fdtable fdtab; fd_set close_on_exec_init; fd_set open_fds_init; struct file * fd_array[NR_OPEN_DEFAULT]; }; -#define files_fdtable(files) (&(files)->fdtab) +#define files_fdtable(files) (rcu_dereference((files)->fdt)) extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); @@ -65,6 +70,8 @@ extern fd_set *alloc_fdset(int); extern void free_fdset(fd_set *, int); extern int expand_files(struct files_struct *, int nr); +extern void free_fdtable(struct fdtable *fdt); +extern void __init files_defer_init(void); static inline struct file * fcheck_files(struct files_struct *files, unsigned int fd) { @@ -72,7 +79,7 @@ static inline struct file * fcheck_files(struct files_struct *files, unsigned in struct fdtable *fdt = files_fdtable(files); if (fd < fdt->max_fds) - file = fdt->fd[fd]; + file = rcu_dereference(fdt->fd[fd]); return file; } diff --git a/include/linux/fs.h b/include/linux/fs.h index fd93ab7da905..7f61227827d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -9,6 +9,7 @@ #include #include #include +#include /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -597,12 +598,13 @@ struct file { spinlock_t f_ep_lock; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + struct rcu_head f_rcuhead; }; extern spinlock_t files_lock; #define file_list_lock() spin_lock(&files_lock); #define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) atomic_inc(&(x)->f_count) +#define get_file(x) rcuref_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) #define MAX_NON_LFS ((1UL<<31) - 1) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 94aefa54a1b5..68ab5f2ab9cd 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -2,6 +2,7 @@ #define _LINUX__INIT_TASK_H #include +#include #define INIT_FDTABLE \ { \ @@ -11,12 +12,16 @@ .fd = &init_files.fd_array[0], \ .close_on_exec = &init_files.close_on_exec_init, \ .open_fds = &init_files.open_fds_init, \ + .rcu = RCU_HEAD_INIT, \ + .free_files = NULL, \ + .next = NULL, \ } #define INIT_FILES \ { \ .count = ATOMIC_INIT(1), \ .file_lock = SPIN_LOCK_UNLOCKED, \ + .fdt = &init_files.fdtab, \ .fdtab = INIT_FDTABLE, \ .close_on_exec_init = { { 0, } }, \ .open_fds_init = { { 0, } }, \ diff --git a/kernel/exit.c b/kernel/exit.c index 83beb1e93b18..6d2089a1bce7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -411,15 +411,16 @@ void fastcall put_files_struct(struct files_struct *files) close_files(files); /* * Free the fd and fdset arrays if we expanded them. + * If the fdtable was embedded, pass files for freeing + * at the end of the RCU grace period. Otherwise, + * you can free files immediately. */ fdt = files_fdtable(files); - if (fdt->fd != &files->fd_array[0]) - free_fd_array(fdt->fd, fdt->max_fds); - if (fdt->max_fdset > __FD_SETSIZE) { - free_fdset(fdt->open_fds, fdt->max_fdset); - free_fdset(fdt->close_on_exec, fdt->max_fdset); - } - kmem_cache_free(files_cachep, files); + if (fdt == &files->fdtab) + fdt->free_files = files; + else + kmem_cache_free(files_cachep, files); + free_fdtable(fdt); } } diff --git a/kernel/fork.c b/kernel/fork.c index ecc694debb50..8149f3602881 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -565,13 +566,12 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) return 0; } -static int count_open_files(struct files_struct *files, int size) +static int count_open_files(struct fdtable *fdt) { + int size = fdt->max_fdset; int i; - struct fdtable *fdt; /* Find the last open fd */ - fdt = files_fdtable(files); for (i = size/(8*sizeof(long)); i > 0; ) { if (fdt->open_fds->fds_bits[--i]) break; @@ -592,13 +592,17 @@ static struct files_struct *alloc_files(void) atomic_set(&newf->count, 1); spin_lock_init(&newf->file_lock); - fdt = files_fdtable(newf); + fdt = &newf->fdtab; fdt->next_fd = 0; fdt->max_fds = NR_OPEN_DEFAULT; fdt->max_fdset = __FD_SETSIZE; fdt->close_on_exec = &newf->close_on_exec_init; fdt->open_fds = &newf->open_fds_init; fdt->fd = &newf->fd_array[0]; + INIT_RCU_HEAD(&fdt->rcu); + fdt->free_files = NULL; + fdt->next = NULL; + rcu_assign_pointer(newf->fdt, fdt); out: return newf; } @@ -637,7 +641,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) old_fdt = files_fdtable(oldf); new_fdt = files_fdtable(newf); size = old_fdt->max_fdset; - open_files = count_open_files(oldf, old_fdt->max_fdset); + open_files = count_open_files(old_fdt); expand = 0; /* @@ -661,7 +665,14 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) spin_unlock(&newf->file_lock); if (error < 0) goto out_release; + new_fdt = files_fdtable(newf); + /* + * Reacquire the oldf lock and a pointer to its fd table + * who knows it may have a new bigger fd table. We need + * the latest pointer. + */ spin_lock(&oldf->file_lock); + old_fdt = files_fdtable(oldf); } old_fds = old_fdt->fd; @@ -683,7 +694,7 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) */ FD_CLR(open_files - i, new_fdt->open_fds); } - *new_fds++ = f; + rcu_assign_pointer(*new_fds++, f); } spin_unlock(&oldf->file_lock); -- cgit v1.2.3 From 7726e9e10fc6e026ed2dc00e48f4a3ffc1254ad2 Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Fri, 9 Sep 2005 13:04:29 -0700 Subject: [PATCH] fbdev: Add fbset -a support Add capability to fbdev to listen to the FB_ACTIVATE_ALL flag. If set, it notifies fbcon that all consoles must be set to the current var. Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/console/fbcon.c | 48 +++++++++++++++++++++++++++++++++++++++++++ drivers/video/fbmem.c | 6 ++++-- include/linux/fb.h | 3 +++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c index 35c88bd7ba5e..751890a5f5f3 100644 --- a/drivers/video/console/fbcon.c +++ b/drivers/video/console/fbcon.c @@ -2593,6 +2593,51 @@ static void fbcon_modechanged(struct fb_info *info) } } +static void fbcon_set_all_vcs(struct fb_info *info) +{ + struct fbcon_ops *ops = info->fbcon_par; + struct vc_data *vc; + struct display *p; + int i, rows, cols; + + if (!ops || ops->currcon < 0) + return; + + for (i = 0; i < MAX_NR_CONSOLES; i++) { + vc = vc_cons[i].d; + if (!vc || vc->vc_mode != KD_TEXT || + registered_fb[con2fb_map[i]] != info) + continue; + + p = &fb_display[vc->vc_num]; + + info->var.xoffset = info->var.yoffset = p->yscroll = 0; + var_to_display(p, &info->var, info); + cols = info->var.xres / vc->vc_font.width; + rows = info->var.yres / vc->vc_font.height; + vc_resize(vc, cols, rows); + + if (CON_IS_VISIBLE(vc)) { + updatescrollmode(p, info, vc); + scrollback_max = 0; + scrollback_current = 0; + update_var(vc->vc_num, info); + fbcon_set_palette(vc, color_table); + update_screen(vc); + if (softback_buf) { + int l = fbcon_softback_size / vc->vc_size_row; + if (l > 5) + softback_end = softback_buf + l * vc->vc_size_row; + else { + /* Smaller scrollback makes no sense, and 0 + would screw the operation totally */ + softback_top = 0; + } + } + } + } +} + static int fbcon_mode_deleted(struct fb_info *info, struct fb_videomode *mode) { @@ -2708,6 +2753,9 @@ static int fbcon_event_notify(struct notifier_block *self, case FB_EVENT_MODE_CHANGE: fbcon_modechanged(info); break; + case FB_EVENT_MODE_CHANGE_ALL: + fbcon_set_all_vcs(info); + break; case FB_EVENT_MODE_DELETE: mode = event->data; ret = fbcon_mode_deleted(info, mode); diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c index 4ff853fbe0be..a8eee79e117d 100644 --- a/drivers/video/fbmem.c +++ b/drivers/video/fbmem.c @@ -684,11 +684,13 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var) if (!err && (flags & FBINFO_MISC_USEREVENT)) { struct fb_event event; + int evnt = (var->activate & FB_ACTIVATE_ALL) ? + FB_EVENT_MODE_CHANGE_ALL : + FB_EVENT_MODE_CHANGE; info->flags &= ~FBINFO_MISC_USEREVENT; event.info = info; - notifier_call_chain(&fb_notifier_list, - FB_EVENT_MODE_CHANGE, + notifier_call_chain(&fb_notifier_list, evnt, &event); } } diff --git a/include/linux/fb.h b/include/linux/fb.h index bc24beeed971..70da819df0f5 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -495,6 +495,9 @@ struct fb_cursor_user { #define FB_EVENT_BLANK 0x08 /* Private modelist is to be replaced */ #define FB_EVENT_NEW_MODELIST 0x09 +/* The resolution of the passed in fb_info about to change and + all vc's should be changed */ +#define FB_EVENT_MODE_CHANGE_ALL 0x0A struct fb_event { struct fb_info *info; -- cgit v1.2.3 From d2d58384fc5d4c0fe2d8e34bc2d15a90a9bb372a Mon Sep 17 00:00:00 2001 From: "Antonino A. Daplas" Date: Fri, 9 Sep 2005 13:04:31 -0700 Subject: [PATCH] vesafb: Add blanking support Add rudimentary support by manipulating the VGA registers. However, not all vesa modes are VGA compatible, so VGA compatiblity is checked first. Only 2 levels are supported, powerup and powerdown. Signed-off-by: Antonino Daplas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/boot/video.S | 5 +++++ drivers/video/vesafb.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/tty.h | 3 ++- 3 files changed, 45 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/arch/i386/boot/video.S b/arch/i386/boot/video.S index 0587477c99f2..02bf625a681b 100644 --- a/arch/i386/boot/video.S +++ b/arch/i386/boot/video.S @@ -97,6 +97,7 @@ #define PARAM_VESAPM_OFF 0x30 #define PARAM_LFB_PAGES 0x32 #define PARAM_VESA_ATTRIB 0x34 +#define PARAM_CAPABILITIES 0x36 /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ #ifdef CONFIG_VIDEO_RETAIN @@ -233,6 +234,10 @@ mopar_gr: movw 18(%di), %ax movl %eax, %fs:(PARAM_LFB_SIZE) +# store mode capabilities + movl 10(%di), %eax + movl %eax, %fs:(PARAM_CAPABILITIES) + # switching the DAC to 8-bit is for <= 8 bpp only movw %fs:(PARAM_LFB_DEPTH), %ax cmpw $8, %ax diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c index a272592b0373..1ca80264c7b0 100644 --- a/drivers/video/vesafb.c +++ b/drivers/video/vesafb.c @@ -19,6 +19,7 @@ #include #include #include +#include