From c9fefeb26457b87f4a767faefcf77321bb90db52 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 2 Jul 2006 11:10:18 -0500 Subject: [SCSI] scsi_transport_sas: add unindexed ports Some SAS HBAs don't want to go to the trouble of tracking port numbers, so they'd simply like to say "add this port and give it a number". This is especially beneficial from the hotplug point of view, since tracking ports and the available number space can be a real pain. The current implementation uses an incrementing number per expander to add the port on. However, since there can never be more ports than there are phys, a later implementation will try to be more intelligent about this. Signed-off-by: James Bottomley --- include/scsi/scsi_transport_sas.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index e3c503cd175e..f5772ff61d0d 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -106,6 +106,7 @@ struct sas_end_device { struct sas_expander_device { int level; + int next_port_id; #define SAS_EXPANDER_VENDOR_ID_LEN 8 char vendor_id[SAS_EXPANDER_VENDOR_ID_LEN+1]; @@ -127,7 +128,7 @@ struct sas_expander_device { struct sas_port { struct device dev; - u8 port_identifier; + int port_identifier; int num_phys; /* the other end of the link */ @@ -168,6 +169,7 @@ extern void sas_rphy_delete(struct sas_rphy *); extern int scsi_is_sas_rphy(const struct device *); struct sas_port *sas_port_alloc(struct device *, int); +struct sas_port *sas_port_alloc_num(struct device *); int sas_port_add(struct sas_port *); void sas_port_free(struct sas_port *); void sas_port_delete(struct sas_port *); -- cgit v1.2.3 From 631c228cd09bd5b93090fa60bd9803ec14aa0586 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 8 Jul 2006 20:42:15 +0200 Subject: [SCSI] hide EH backup data outside the scsi_cmnd Currently struct scsi_cmnd has various fields that are used to backup original data after the corresponding fields have been overridden for EH commands. This means drivers can easily get at it and misuse it. Due to the old_ naming this doesn't happen for most of them, but two that have different names have been used wrong a lot (see previous patch). Another downside is that they unessecarily bloat the scsi_cmnd size. This patch moves them onstack in scsi_send_eh_cmnd to fix those two issues aswell as allowing future EH fixes like moving the EH command submissions to use SG lists like everything else. Signed-off-by: Christoph Hellwig Signed-off-by: James Bottomley --- drivers/scsi/aha152x.c | 14 ++-- drivers/scsi/scsi.c | 11 +-- drivers/scsi/scsi_error.c | 210 ++++++++++++++++++++-------------------------- drivers/scsi/scsi_lib.c | 88 ++----------------- drivers/scsi/scsi_priv.h | 1 - drivers/scsi/sd.c | 3 +- drivers/scsi/sr.c | 5 +- include/scsi/scsi_cmnd.h | 9 -- 8 files changed, 108 insertions(+), 233 deletions(-) (limited to 'include') diff --git a/drivers/scsi/aha152x.c b/drivers/scsi/aha152x.c index cff3d389b010..f974869ea323 100644 --- a/drivers/scsi/aha152x.c +++ b/drivers/scsi/aha152x.c @@ -1179,6 +1179,10 @@ static int aha152x_device_reset(Scsi_Cmnd * SCpnt) DECLARE_MUTEX_LOCKED(sem); struct timer_list timer; int ret, issued, disconnected; + unsigned char old_cmd_len = SCpnt->cmd_len; + unsigned short old_use_sg = SCpnt->use_sg; + void *old_buffer = SCpnt->request_buffer; + unsigned old_bufflen = SCpnt->request_bufflen; unsigned long flags; #if defined(AHA152X_DEBUG) @@ -1212,11 +1216,11 @@ static int aha152x_device_reset(Scsi_Cmnd * SCpnt) add_timer(&timer); down(&sem); del_timer(&timer); - - SCpnt->cmd_len = SCpnt->old_cmd_len; - SCpnt->use_sg = SCpnt->old_use_sg; - SCpnt->request_buffer = SCpnt->buffer; - SCpnt->request_bufflen = SCpnt->bufflen; + + SCpnt->cmd_len = old_cmd_len; + SCpnt->use_sg = old_use_sg; + SCpnt->request_buffer = old_buffer; + SCpnt->request_bufflen = old_bufflen; DO_LOCK(flags); diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 2ab7df0dcfe8..b332caddd5b3 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -346,7 +346,7 @@ void scsi_log_send(struct scsi_cmnd *cmd) if (level > 3) { printk(KERN_INFO "buffer = 0x%p, bufflen = %d," " done = 0x%p, queuecommand 0x%p\n", - cmd->buffer, cmd->bufflen, + cmd->request_buffer, cmd->request_bufflen, cmd->done, sdev->host->hostt->queuecommand); @@ -661,11 +661,6 @@ void __scsi_done(struct scsi_cmnd *cmd) */ int scsi_retry_command(struct scsi_cmnd *cmd) { - /* - * Restore the SCSI command state. - */ - scsi_setup_cmd_retry(cmd); - /* * Zero the sense information from the last time we tried * this command. @@ -711,10 +706,6 @@ void scsi_finish_command(struct scsi_cmnd *cmd) "Notifying upper driver of completion " "(result %x)\n", cmd->result)); - /* - * We can get here with use_sg=0, causing a panic in the upper level - */ - cmd->use_sg = cmd->old_use_sg; cmd->done(cmd); } EXPORT_SYMBOL(scsi_finish_command); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 6683d596234a..6a5b731bd5ba 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -460,19 +460,67 @@ static void scsi_eh_done(struct scsi_cmnd *scmd) * Return value: * SUCCESS or FAILED or NEEDS_RETRY **/ -static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) +static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout, int copy_sense) { struct scsi_device *sdev = scmd->device; struct Scsi_Host *shost = sdev->host; + int old_result = scmd->result; DECLARE_COMPLETION(done); unsigned long timeleft; unsigned long flags; + unsigned char old_cmnd[MAX_COMMAND_SIZE]; + enum dma_data_direction old_data_direction; + unsigned short old_use_sg; + unsigned char old_cmd_len; + unsigned old_bufflen; + void *old_buffer; int rtn; + /* + * We need saved copies of a number of fields - this is because + * error handling may need to overwrite these with different values + * to run different commands, and once error handling is complete, + * we will need to restore these values prior to running the actual + * command. + */ + old_buffer = scmd->request_buffer; + old_bufflen = scmd->request_bufflen; + memcpy(old_cmnd, scmd->cmnd, sizeof(scmd->cmnd)); + old_data_direction = scmd->sc_data_direction; + old_cmd_len = scmd->cmd_len; + old_use_sg = scmd->use_sg; + + if (copy_sense) { + int gfp_mask = GFP_ATOMIC; + + if (shost->hostt->unchecked_isa_dma) + gfp_mask |= __GFP_DMA; + + scmd->sc_data_direction = DMA_FROM_DEVICE; + scmd->request_bufflen = 252; + scmd->request_buffer = kzalloc(scmd->request_bufflen, gfp_mask); + if (!scmd->request_buffer) + return FAILED; + } else { + scmd->request_buffer = NULL; + scmd->request_bufflen = 0; + scmd->sc_data_direction = DMA_NONE; + } + + scmd->underflow = 0; + scmd->use_sg = 0; + scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); + if (sdev->scsi_level <= SCSI_2) scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) | (sdev->lun << 5 & 0xe0); + /* + * Zero the sense buffer. The scsi spec mandates that any + * untransferred sense data should be interpreted as being zero. + */ + memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); + shost->eh_action = &done; spin_lock_irqsave(shost->host_lock, flags); @@ -522,6 +570,29 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) rtn = FAILED; } + + /* + * Last chance to have valid sense data. + */ + if (copy_sense) { + if (!SCSI_SENSE_VALID(scmd)) { + memcpy(scmd->sense_buffer, scmd->request_buffer, + sizeof(scmd->sense_buffer)); + } + kfree(scmd->request_buffer); + } + + + /* + * Restore original data + */ + scmd->request_buffer = old_buffer; + scmd->request_bufflen = old_bufflen; + memcpy(scmd->cmnd, old_cmnd, sizeof(scmd->cmnd)); + scmd->sc_data_direction = old_data_direction; + scmd->cmd_len = old_cmd_len; + scmd->use_sg = old_use_sg; + scmd->result = old_result; return rtn; } @@ -537,56 +608,10 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout) static int scsi_request_sense(struct scsi_cmnd *scmd) { static unsigned char generic_sense[6] = - {REQUEST_SENSE, 0, 0, 0, 252, 0}; - unsigned char *scsi_result; - int saved_result; - int rtn; + {REQUEST_SENSE, 0, 0, 0, 252, 0}; memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense)); - - scsi_result = kmalloc(252, GFP_ATOMIC | ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0)); - - - if (unlikely(!scsi_result)) { - printk(KERN_ERR "%s: cannot allocate scsi_result.\n", - __FUNCTION__); - return FAILED; - } - - /* - * zero the sense buffer. some host adapters automatically always - * request sense, so it is not a good idea that - * scmd->request_buffer and scmd->sense_buffer point to the same - * address (db). 0 is not a valid sense code. - */ - memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); - memset(scsi_result, 0, 252); - - saved_result = scmd->result; - scmd->request_buffer = scsi_result; - scmd->request_bufflen = 252; - scmd->use_sg = 0; - scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); - scmd->sc_data_direction = DMA_FROM_DEVICE; - scmd->underflow = 0; - - rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); - - /* last chance to have valid sense data */ - if(!SCSI_SENSE_VALID(scmd)) { - memcpy(scmd->sense_buffer, scmd->request_buffer, - sizeof(scmd->sense_buffer)); - } - - kfree(scsi_result); - - /* - * when we eventually call scsi_finish, we really wish to complete - * the original request, so let's restore the original data. (db) - */ - scsi_setup_cmd_retry(scmd); - scmd->result = saved_result; - return rtn; + return scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT, 1); } /** @@ -605,12 +630,6 @@ void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q) { scmd->device->host->host_failed--; scmd->eh_eflags = 0; - - /* - * set this back so that the upper level can correctly free up - * things. - */ - scsi_setup_cmd_retry(scmd); list_move_tail(&scmd->eh_entry, done_q); } EXPORT_SYMBOL(scsi_eh_finish_cmd); @@ -715,47 +734,26 @@ static int scsi_eh_tur(struct scsi_cmnd *scmd) { static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0}; int retry_cnt = 1, rtn; - int saved_result; retry_tur: memcpy(scmd->cmnd, tur_command, sizeof(tur_command)); - /* - * zero the sense buffer. the scsi spec mandates that any - * untransferred sense data should be interpreted as being zero. - */ - memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); - - saved_result = scmd->result; - scmd->request_buffer = NULL; - scmd->request_bufflen = 0; - scmd->use_sg = 0; - scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); - scmd->underflow = 0; - scmd->sc_data_direction = DMA_NONE; - rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT); + rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT, 0); - /* - * when we eventually call scsi_finish, we really wish to complete - * the original request, so let's restore the original data. (db) - */ - scsi_setup_cmd_retry(scmd); - scmd->result = saved_result; - - /* - * hey, we are done. let's look to see what happened. - */ SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", __FUNCTION__, scmd, rtn)); - if (rtn == SUCCESS) - return 0; - else if (rtn == NEEDS_RETRY) { + + switch (rtn) { + case NEEDS_RETRY: if (retry_cnt--) goto retry_tur; + /*FALLTHRU*/ + case SUCCESS: return 0; + default: + return 1; } - return 1; } /** @@ -837,44 +835,16 @@ static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd) static int scsi_eh_try_stu(struct scsi_cmnd *scmd) { static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0}; - int rtn; - int saved_result; - if (!scmd->device->allow_restart) - return 1; - - memcpy(scmd->cmnd, stu_command, sizeof(stu_command)); - - /* - * zero the sense buffer. the scsi spec mandates that any - * untransferred sense data should be interpreted as being zero. - */ - memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer)); - - saved_result = scmd->result; - scmd->request_buffer = NULL; - scmd->request_bufflen = 0; - scmd->use_sg = 0; - scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]); - scmd->underflow = 0; - scmd->sc_data_direction = DMA_NONE; + if (scmd->device->allow_restart) { + int rtn; - rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT); - - /* - * when we eventually call scsi_finish, we really wish to complete - * the original request, so let's restore the original data. (db) - */ - scsi_setup_cmd_retry(scmd); - scmd->result = saved_result; + memcpy(scmd->cmnd, stu_command, sizeof(stu_command)); + rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT, 0); + if (rtn == SUCCESS) + return 0; + } - /* - * hey, we are done. let's look to see what happened. - */ - SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n", - __FUNCTION__, scmd, rtn)); - if (rtn == SUCCESS) - return 0; return 1; } @@ -1684,8 +1654,6 @@ scsi_reset_provider(struct scsi_device *dev, int flag) scmd->scsi_done = scsi_reset_provider_done_command; scmd->done = NULL; - scmd->buffer = NULL; - scmd->bufflen = 0; scmd->request_buffer = NULL; scmd->request_bufflen = 0; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 08af9aae7df3..077c1c691210 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -436,60 +436,16 @@ EXPORT_SYMBOL_GPL(scsi_execute_async); * * Arguments: cmd - command that is ready to be queued. * - * Returns: Nothing - * * Notes: This function has the job of initializing a number of * fields related to error handling. Typically this will * be called once for each command, as required. */ -static int scsi_init_cmd_errh(struct scsi_cmnd *cmd) +static void scsi_init_cmd_errh(struct scsi_cmnd *cmd) { cmd->serial_number = 0; - memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer); - if (cmd->cmd_len == 0) cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]); - - /* - * We need saved copies of a number of fields - this is because - * error handling may need to overwrite these with different values - * to run different commands, and once error handling is complete, - * we will need to restore these values prior to running the actual - * command. - */ - cmd->old_use_sg = cmd->use_sg; - cmd->old_cmd_len = cmd->cmd_len; - cmd->sc_old_data_direction = cmd->sc_data_direction; - cmd->old_underflow = cmd->underflow; - memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd)); - cmd->buffer = cmd->request_buffer; - cmd->bufflen = cmd->request_bufflen; - - return 1; -} - -/* - * Function: scsi_setup_cmd_retry() - * - * Purpose: Restore the command state for a retry - * - * Arguments: cmd - command to be restored - * - * Returns: Nothing - * - * Notes: Immediately prior to retrying a command, we need - * to restore certain fields that we saved above. - */ -void scsi_setup_cmd_retry(struct scsi_cmnd *cmd) -{ - memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd)); - cmd->request_buffer = cmd->buffer; - cmd->request_bufflen = cmd->bufflen; - cmd->use_sg = cmd->old_use_sg; - cmd->cmd_len = cmd->old_cmd_len; - cmd->sc_data_direction = cmd->sc_old_data_direction; - cmd->underflow = cmd->old_underflow; } void scsi_device_unbusy(struct scsi_device *sdev) @@ -807,22 +763,13 @@ static void scsi_free_sgtable(struct scatterlist *sgl, int index) */ static void scsi_release_buffers(struct scsi_cmnd *cmd) { - struct request *req = cmd->request; - - /* - * Free up any indirection buffers we allocated for DMA purposes. - */ if (cmd->use_sg) scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len); - else if (cmd->request_buffer != req->buffer) - kfree(cmd->request_buffer); /* * Zero these out. They now point to freed memory, and it is * dangerous to hang onto the pointers. */ - cmd->buffer = NULL; - cmd->bufflen = 0; cmd->request_buffer = NULL; cmd->request_bufflen = 0; } @@ -858,7 +805,7 @@ static void scsi_release_buffers(struct scsi_cmnd *cmd) void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - int this_count = cmd->bufflen; + int this_count = cmd->request_bufflen; request_queue_t *q = cmd->device->request_queue; struct request *req = cmd->request; int clear_errors = 1; @@ -866,28 +813,14 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) int sense_valid = 0; int sense_deferred = 0; - /* - * Free up any indirection buffers we allocated for DMA purposes. - * For the case of a READ, we need to copy the data out of the - * bounce buffer and into the real buffer. - */ - if (cmd->use_sg) - scsi_free_sgtable(cmd->buffer, cmd->sglist_len); - else if (cmd->buffer != req->buffer) { - if (rq_data_dir(req) == READ) { - unsigned long flags; - char *to = bio_kmap_irq(req->bio, &flags); - memcpy(to, cmd->buffer, cmd->bufflen); - bio_kunmap_irq(to, &flags); - } - kfree(cmd->buffer); - } + scsi_release_buffers(cmd); if (result) { sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_deferred = scsi_sense_is_deferred(&sshdr); } + if (blk_pc_request(req)) { /* SG_IO ioctl from block level */ req->errors = result; if (result) { @@ -907,15 +840,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) req->data_len = cmd->resid; } - /* - * Zero these out. They now point to freed memory, and it is - * dangerous to hang onto the pointers. - */ - cmd->buffer = NULL; - cmd->bufflen = 0; - cmd->request_buffer = NULL; - cmd->request_bufflen = 0; - /* * Next deal with any sectors which we were able to correctly * handle. @@ -1012,7 +936,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) if (!(req->flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, "Volume overflow, CDB: "); - __scsi_print_command(cmd->data_cmnd); + __scsi_print_command(cmd->cmnd); scsi_print_sense("", cmd); } /* See SSC3rXX or current. */ @@ -1143,7 +1067,7 @@ static void scsi_blk_pc_done(struct scsi_cmnd *cmd) * successfully. Since this is a REQ_BLOCK_PC command the * caller should check the request's errors value */ - scsi_io_completion(cmd, cmd->bufflen); + scsi_io_completion(cmd, cmd->request_bufflen); } static void scsi_setup_blk_pc_cmnd(struct scsi_cmnd *cmd) diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h index e2fbe9a9d5a9..ae24c85aaeea 100644 --- a/drivers/scsi/scsi_priv.h +++ b/drivers/scsi/scsi_priv.h @@ -57,7 +57,6 @@ extern int scsi_eh_scmd_add(struct scsi_cmnd *, int); /* scsi_lib.c */ extern int scsi_maybe_unblock_host(struct scsi_device *sdev); -extern void scsi_setup_cmd_retry(struct scsi_cmnd *cmd); extern void scsi_device_unbusy(struct scsi_device *sdev); extern int scsi_queue_insert(struct scsi_cmnd *cmd, int reason); extern void scsi_next_command(struct scsi_cmnd *cmd); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3225d31449e1..98bd3aab9739 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -502,8 +502,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) SCpnt->cmnd[4] = (unsigned char) this_count; SCpnt->cmnd[5] = 0; } - SCpnt->request_bufflen = SCpnt->bufflen = - this_count * sdp->sector_size; + SCpnt->request_bufflen = this_count * sdp->sector_size; /* * We shouldn't disconnect in the middle of a sector, so with a dumb diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index fd94408577e5..fae6e95a6298 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -360,7 +360,7 @@ static int sr_init_command(struct scsi_cmnd * SCpnt) "mismatch count %d, bytes %d\n", size, SCpnt->request_bufflen); if (SCpnt->request_bufflen > size) - SCpnt->request_bufflen = SCpnt->bufflen = size; + SCpnt->request_bufflen = size; } } @@ -387,8 +387,7 @@ static int sr_init_command(struct scsi_cmnd * SCpnt) if (this_count > 0xffff) { this_count = 0xffff; - SCpnt->request_bufflen = SCpnt->bufflen = - this_count * s_size; + SCpnt->request_bufflen = this_count * s_size; } SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 371f70d9aa92..58e6444eebee 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -58,9 +58,7 @@ struct scsi_cmnd { int timeout_per_command; unsigned char cmd_len; - unsigned char old_cmd_len; enum dma_data_direction sc_data_direction; - enum dma_data_direction sc_old_data_direction; /* These elements define the operation we are about to perform */ #define MAX_COMMAND_SIZE 16 @@ -71,18 +69,11 @@ struct scsi_cmnd { void *request_buffer; /* Actual requested buffer */ /* These elements define the operation we ultimately want to perform */ - unsigned char data_cmnd[MAX_COMMAND_SIZE]; - unsigned short old_use_sg; /* We save use_sg here when requesting - * sense info */ unsigned short use_sg; /* Number of pieces of scatter-gather */ unsigned short sglist_len; /* size of malloc'd scatter-gather list */ - unsigned bufflen; /* Size of data buffer */ - void *buffer; /* Data buffer */ unsigned underflow; /* Return error if less than this amount is transferred */ - unsigned old_underflow; /* save underflow here when reusing the - * command for error handling */ unsigned transfersize; /* How much we are guaranteed to transfer with each SCSI transfer -- cgit v1.2.3 From a0e1b6ef3b851fe6f1dcc259432e83de79ce5e7f Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sun, 9 Jul 2006 12:38:19 -0500 Subject: [SCSI] scsi_transport_sas: add expander backlink This patch adds the ability to add a backlink to a particular port. The idea is to represent properly ports on expanders that are used specifically for linking to the parent device in the topology. Signed-off-by: James Bottomley --- drivers/scsi/scsi_transport_sas.c | 20 ++++++++++++++++++++ include/scsi/scsi_transport_sas.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'include') diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index 9bb4e37a1a61..7871bc25fafb 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -692,6 +692,13 @@ void sas_port_delete(struct sas_port *port) } mutex_unlock(&port->phy_list_mutex); + if (port->is_backlink) { + struct device *parent = port->dev.parent; + + sysfs_remove_link(&port->dev.kobj, parent->bus_id); + port->is_backlink = 0; + } + transport_remove_device(dev); device_del(dev); transport_destroy_device(dev); @@ -767,6 +774,19 @@ void sas_port_delete_phy(struct sas_port *port, struct sas_phy *phy) } EXPORT_SYMBOL(sas_port_delete_phy); +void sas_port_mark_backlink(struct sas_port *port) +{ + struct device *parent = port->dev.parent->parent->parent; + + if (port->is_backlink) + return; + port->is_backlink = 1; + sysfs_create_link(&port->dev.kobj, &parent->kobj, + parent->bus_id); + +} +EXPORT_SYMBOL(sas_port_mark_backlink); + /* * SAS remote PHY attributes. */ diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index f5772ff61d0d..6cc2314098cf 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -130,6 +130,8 @@ struct sas_port { int port_identifier; int num_phys; + /* port flags */ + unsigned int is_backlink:1; /* the other end of the link */ struct sas_rphy *rphy; @@ -175,6 +177,7 @@ void sas_port_free(struct sas_port *); void sas_port_delete(struct sas_port *); void sas_port_add_phy(struct sas_port *, struct sas_phy *); void sas_port_delete_phy(struct sas_port *, struct sas_phy *); +void sas_port_mark_backlink(struct sas_port *); int scsi_is_sas_port(const struct device *); extern struct scsi_transport_template * -- cgit v1.2.3 From 5a651c93d3a823af63b1b15bb94fdc951670fb2f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Jul 2006 16:09:18 +0200 Subject: [S390] Fix gcc warning about unused return values. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- include/asm-s390/system.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h index 36a3a85d611a..16040048cd1b 100644 --- a/include/asm-s390/system.h +++ b/include/asm-s390/system.h @@ -128,8 +128,13 @@ extern void account_system_vtime(struct task_struct *); #define nop() __asm__ __volatile__ ("nop") -#define xchg(ptr,x) \ - ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(void *)(ptr),sizeof(*(ptr)))) +#define xchg(ptr,x) \ +({ \ + __typeof__(*(ptr)) __ret; \ + __ret = (__typeof__(*(ptr))) \ + __xchg((unsigned long)(x), (void *)(ptr),sizeof(*(ptr))); \ + __ret; \ +}) static inline unsigned long __xchg(unsigned long x, void * ptr, int size) { -- cgit v1.2.3 From 53ba5e09fe37518683ff8b3f28410ce0b9ed2f74 Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Mon, 17 Jul 2006 16:09:42 +0200 Subject: [S390] get_clock inline assembly. Add missing volatile to the get_clock / get_cycles inline assemblies to avoid that consecutive calls get optimized away. Signed-off-by: Andreas Krebbel Signed-off-by: Martin Schwidefsky --- include/asm-s390/timex.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-s390/timex.h b/include/asm-s390/timex.h index 4848057dafe4..5d0332a4c2bd 100644 --- a/include/asm-s390/timex.h +++ b/include/asm-s390/timex.h @@ -19,7 +19,7 @@ static inline cycles_t get_cycles(void) { cycles_t cycles; - __asm__("stck 0(%1)" : "=m" (cycles) : "a" (&cycles) : "cc"); + __asm__ __volatile__ ("stck 0(%1)" : "=m" (cycles) : "a" (&cycles) : "cc"); return cycles >> 2; } @@ -27,7 +27,7 @@ static inline unsigned long long get_clock (void) { unsigned long long clk; - __asm__("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc"); + __asm__ __volatile__ ("stck 0(%1)" : "=m" (clk) : "a" (&clk) : "cc"); return clk; } -- cgit v1.2.3 From 46ba6d7d8b0486e9d565729880ddfb2b84d3af31 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 16 Jul 2006 22:10:44 -0700 Subject: [SPARC64]: Fix more of_device layer IRQ bugs, and correct PROMREG_MAX. Sabre and Psycho PCI controllers can have partial interrupt-map properties, meaning that on-board devices don't match up to any entries. Instead, they are fully specified from the beginning and we should pass them directly to the IRQ translator as-is. Also, fill in the necessary translator slots for the "graphics" and "expansion UPA" interrupts on Sabre, Psycho, and SYSIO SBUS. Increase PROMREG_MAX to 24, as seen on SUNW,ffb devices. Finally, prevent accidentally writing past the end of the of_device struct resource[] and irqs[] arrays. Spit out a log message when we ignore some entries because there are too many of them. Signed-off-by: David S. Miller --- arch/sparc64/kernel/of_device.c | 34 ++++++++++++++++++++++++++++++++-- arch/sparc64/kernel/prom.c | 12 ++++++++++-- include/asm-sparc64/openprom.h | 2 +- 3 files changed, 43 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/arch/sparc64/kernel/of_device.c b/arch/sparc64/kernel/of_device.c index 7064cee290ae..238bbf6de07d 100644 --- a/arch/sparc64/kernel/of_device.c +++ b/arch/sparc64/kernel/of_device.c @@ -542,9 +542,17 @@ static void __init build_device_resources(struct of_device *op, /* Convert to num-cells. */ num_reg /= 4; - /* Conver to num-entries. */ + /* Convert to num-entries. */ num_reg /= na + ns; + /* Prevent overruning the op->resources[] array. */ + if (num_reg > PROMREG_MAX) { + printk(KERN_WARNING "%s: Too many regs (%d), " + "limiting to %d.\n", + op->node->full_name, num_reg, PROMREG_MAX); + num_reg = PROMREG_MAX; + } + for (index = 0; index < num_reg; index++) { struct resource *r = &op->resource[index]; u32 addr[OF_MAX_ADDR_CELLS]; @@ -650,8 +658,22 @@ apply_interrupt_map(struct device_node *dp, struct device_node *pp, next: imap += (na + 3); } - if (i == imlen) + if (i == imlen) { + /* Psycho and Sabre PCI controllers can have 'interrupt-map' + * properties that do not include the on-board device + * interrupts. Instead, the device's 'interrupts' property + * is already a fully specified INO value. + * + * Handle this by deciding that, if we didn't get a + * match in the parent's 'interrupt-map', and the + * parent is an IRQ translater, then use the parent as + * our IRQ controller. + */ + if (pp->irq_trans) + return pp; + return NULL; + } *irq_p = irq; cp = of_find_node_by_phandle(handle); @@ -803,6 +825,14 @@ static struct of_device * __init scan_one_device(struct device_node *dp, op->num_irqs = 0; } + /* Prevent overruning the op->irqs[] array. */ + if (op->num_irqs > PROMINTR_MAX) { + printk(KERN_WARNING "%s: Too many irqs (%d), " + "limiting to %d.\n", + dp->full_name, op->num_irqs, PROMINTR_MAX); + op->num_irqs = PROMINTR_MAX; + } + build_device_resources(op, parent); for (i = 0; i < op->num_irqs; i++) op->irqs[i] = build_one_device_irq(op, parent, op->irqs[i]); diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c index c86007a2aa3f..5cc5ab63293f 100644 --- a/arch/sparc64/kernel/prom.c +++ b/arch/sparc64/kernel/prom.c @@ -344,10 +344,12 @@ static unsigned long __psycho_onboard_imap_off[] = { /*0x2f*/ PSYCHO_IMAP_CE, /*0x30*/ PSYCHO_IMAP_A_ERR, /*0x31*/ PSYCHO_IMAP_B_ERR, -/*0x32*/ PSYCHO_IMAP_PMGMT +/*0x32*/ PSYCHO_IMAP_PMGMT, +/*0x33*/ PSYCHO_IMAP_GFX, +/*0x34*/ PSYCHO_IMAP_EUPA, }; #define PSYCHO_ONBOARD_IRQ_BASE 0x20 -#define PSYCHO_ONBOARD_IRQ_LAST 0x32 +#define PSYCHO_ONBOARD_IRQ_LAST 0x34 #define psycho_onboard_imap_offset(__ino) \ __psycho_onboard_imap_off[(__ino) - PSYCHO_ONBOARD_IRQ_BASE] @@ -529,6 +531,10 @@ static unsigned long __sabre_onboard_imap_off[] = { /*0x2e*/ SABRE_IMAP_UE, /*0x2f*/ SABRE_IMAP_CE, /*0x30*/ SABRE_IMAP_PCIERR, +/*0x31*/ 0 /* reserved */, +/*0x32*/ 0 /* reserved */, +/*0x33*/ SABRE_IMAP_GFX, +/*0x34*/ SABRE_IMAP_EUPA, }; #define SABRE_ONBOARD_IRQ_BASE 0x20 #define SABRE_ONBOARD_IRQ_LAST 0x30 @@ -895,6 +901,8 @@ static unsigned long sysio_irq_offsets[] = { SYSIO_IMAP_CE, SYSIO_IMAP_SBERR, SYSIO_IMAP_PMGMT, + SYSIO_IMAP_GFX, + SYSIO_IMAP_EUPA, }; #undef bogon diff --git a/include/asm-sparc64/openprom.h b/include/asm-sparc64/openprom.h index b4959d2b0d99..e01b80559c93 100644 --- a/include/asm-sparc64/openprom.h +++ b/include/asm-sparc64/openprom.h @@ -175,7 +175,7 @@ struct linux_nodeops { }; /* More fun PROM structures for device probing. */ -#define PROMREG_MAX 16 +#define PROMREG_MAX 24 #define PROMVADDR_MAX 16 #define PROMINTR_MAX 15 -- cgit v1.2.3 From 06ffd7956e4790d824b4b5575b56def8448ec6d4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 16 Jul 2006 22:19:40 -0700 Subject: [SPARC]: Kill prom_getname, unused and not implemented properly. The m68k port's sun3 asm/oplib.h had a stray reference too, so I killed that off as well. Signed-off-by: David S. Miller --- arch/sparc/kernel/sparc_ksyms.c | 1 - arch/sparc/prom/tree.c | 18 -------- arch/sparc64/kernel/sparc64_ksyms.c | 1 - arch/sparc64/prom/tree.c | 85 ------------------------------------- include/asm-m68k/oplib.h | 5 --- include/asm-sparc/oplib.h | 5 --- include/asm-sparc64/oplib.h | 5 --- 7 files changed, 120 deletions(-) (limited to 'include') diff --git a/arch/sparc/kernel/sparc_ksyms.c b/arch/sparc/kernel/sparc_ksyms.c index 5fb987fc3d63..4d441a554d35 100644 --- a/arch/sparc/kernel/sparc_ksyms.c +++ b/arch/sparc/kernel/sparc_ksyms.c @@ -237,7 +237,6 @@ EXPORT_SYMBOL(prom_node_has_property); EXPORT_SYMBOL(prom_setprop); EXPORT_SYMBOL(saved_command_line); EXPORT_SYMBOL(prom_apply_obio_ranges); -EXPORT_SYMBOL(prom_getname); EXPORT_SYMBOL(prom_feval); EXPORT_SYMBOL(prom_getbool); EXPORT_SYMBOL(prom_getstring); diff --git a/arch/sparc/prom/tree.c b/arch/sparc/prom/tree.c index 2bf03ee8cde5..5ec246573a98 100644 --- a/arch/sparc/prom/tree.c +++ b/arch/sparc/prom/tree.c @@ -205,24 +205,6 @@ int prom_searchsiblings(int node_start, char *nodename) return 0; } -/* Gets name in the form prom v2+ uses it (name@x,yyyyy or name (if no reg)) */ -int prom_getname (int node, char *buffer, int len) -{ - int i; - struct linux_prom_registers reg[PROMREG_MAX]; - - i = prom_getproperty (node, "name", buffer, len); - if (i <= 0) return -1; - buffer [i] = 0; - len -= i; - i = prom_getproperty (node, "reg", (char *)reg, sizeof (reg)); - if (i <= 0) return 0; - if (len < 11) return -1; - buffer = strchr (buffer, 0); - sprintf (buffer, "@%x,%x", reg[0].which_io, (uint)reg[0].phys_addr); - return 0; -} - /* Interal version of nextprop that does not alter return values. */ char * __prom_nextprop(int node, char * oprop) { diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c index 237524d87cab..beffc82a1e85 100644 --- a/arch/sparc64/kernel/sparc64_ksyms.c +++ b/arch/sparc64/kernel/sparc64_ksyms.c @@ -254,7 +254,6 @@ EXPORT_SYMBOL(prom_getproperty); EXPORT_SYMBOL(prom_node_has_property); EXPORT_SYMBOL(prom_setprop); EXPORT_SYMBOL(saved_command_line); -EXPORT_SYMBOL(prom_getname); EXPORT_SYMBOL(prom_finddevice); EXPORT_SYMBOL(prom_feval); EXPORT_SYMBOL(prom_getbool); diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c index 49075abd7cbc..500f05e2cfcb 100644 --- a/arch/sparc64/prom/tree.c +++ b/arch/sparc64/prom/tree.c @@ -193,91 +193,6 @@ prom_searchsiblings(int node_start, const char *nodename) return 0; } -/* Gets name in the {name@x,yyyyy|name (if no reg)} form */ -int -prom_getname (int node, char *buffer, int len) -{ - int i, sbus = 0; - int pci = 0, ebus = 0, ide = 0; - struct linux_prom_registers *reg; - struct linux_prom64_registers reg64[PROMREG_MAX]; - - for (sbus = prom_getparent (node); sbus; sbus = prom_getparent (sbus)) { - i = prom_getproperty (sbus, "name", buffer, len); - if (i > 0) { - buffer [i] = 0; - if (!strcmp (buffer, "sbus")) - goto getit; - } - } - if ((pci = prom_getparent (node))) { - i = prom_getproperty (pci, "name", buffer, len); - if (i > 0) { - buffer [i] = 0; - if (!strcmp (buffer, "pci")) - goto getit; - } - pci = 0; - } - if ((ebus = prom_getparent (node))) { - i = prom_getproperty (ebus, "name", buffer, len); - if (i > 0) { - buffer[i] = 0; - if (!strcmp (buffer, "ebus")) - goto getit; - } - ebus = 0; - } - if ((ide = prom_getparent (node))) { - i = prom_getproperty (ide, "name", buffer, len); - if (i > 0) { - buffer [i] = 0; - if (!strcmp (buffer, "ide")) - goto getit; - } - ide = 0; - } -getit: - i = prom_getproperty (node, "name", buffer, len); - if (i <= 0) { - buffer [0] = 0; - return -1; - } - buffer [i] = 0; - len -= i; - i = prom_getproperty (node, "reg", (char *)reg64, sizeof (reg64)); - if (i <= 0) return 0; - if (len < 16) return -1; - buffer = strchr (buffer, 0); - if (sbus) { - reg = (struct linux_prom_registers *)reg64; - sprintf (buffer, "@%x,%x", reg[0].which_io, (uint)reg[0].phys_addr); - } else if (pci) { - int dev, fn; - reg = (struct linux_prom_registers *)reg64; - fn = (reg[0].which_io >> 8) & 0x07; - dev = (reg[0].which_io >> 11) & 0x1f; - if (fn) - sprintf (buffer, "@%x,%x", dev, fn); - else - sprintf (buffer, "@%x", dev); - } else if (ebus) { - reg = (struct linux_prom_registers *)reg64; - sprintf (buffer, "@%x,%x", reg[0].which_io, reg[0].phys_addr); - } else if (ide) { - reg = (struct linux_prom_registers *)reg64; - sprintf (buffer, "@%x,%x", reg[0].which_io, reg[0].phys_addr); - } else if (i == 4) { /* Happens on 8042's children on Ultra/PCI. */ - reg = (struct linux_prom_registers *)reg64; - sprintf (buffer, "@%x", reg[0].which_io); - } else { - sprintf (buffer, "@%x,%x", - (unsigned int)(reg64[0].phys_addr >> 36), - (unsigned int)(reg64[0].phys_addr)); - } - return 0; -} - /* Return the first property type for node 'node'. * buffer should be at least 32B in length */ diff --git a/include/asm-m68k/oplib.h b/include/asm-m68k/oplib.h index c3594f473ef7..06caa2d08451 100644 --- a/include/asm-m68k/oplib.h +++ b/include/asm-m68k/oplib.h @@ -244,11 +244,6 @@ extern void prom_getstring(int node, char *prop, char *buf, int bufsize); /* Does the passed node have the given "name"? YES=1 NO=0 */ extern int prom_nodematch(int thisnode, char *name); -/* Puts in buffer a prom name in the form name@x,y or name (x for which_io - * and y for first regs phys address - */ -extern int prom_getname(int node, char *buf, int buflen); - /* Search all siblings starting at the passed node for "name" matching * the given string. Returns the node on success, zero on failure. */ diff --git a/include/asm-sparc/oplib.h b/include/asm-sparc/oplib.h index f283f8aaf6a9..91691e52c058 100644 --- a/include/asm-sparc/oplib.h +++ b/include/asm-sparc/oplib.h @@ -267,11 +267,6 @@ extern void prom_getstring(int node, char *prop, char *buf, int bufsize); /* Does the passed node have the given "name"? YES=1 NO=0 */ extern int prom_nodematch(int thisnode, char *name); -/* Puts in buffer a prom name in the form name@x,y or name (x for which_io - * and y for first regs phys address - */ -extern int prom_getname(int node, char *buf, int buflen); - /* Search all siblings starting at the passed node for "name" matching * the given string. Returns the node on success, zero on failure. */ diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h index a68b0bb05958..6a0da3b1695c 100644 --- a/include/asm-sparc64/oplib.h +++ b/include/asm-sparc64/oplib.h @@ -287,11 +287,6 @@ extern void prom_getstring(int node, const char *prop, char *buf, int bufsize); /* Does the passed node have the given "name"? YES=1 NO=0 */ extern int prom_nodematch(int thisnode, const char *name); -/* Puts in buffer a prom name in the form name@x,y or name (x for which_io - * and y for first regs phys address - */ -extern int prom_getname(int node, char *buf, int buflen); - /* Search all siblings starting at the passed node for "name" matching * the given string. Returns the node on success, zero on failure. */ -- cgit v1.2.3 From ad8fec1720e000ba2384de6408076a60fc92a981 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 21 Jul 2006 14:48:50 -0700 Subject: [SCTP]: Verify all the paths to a peer via heartbeat before using them. This patch implements Path Initialization procedure as described in Sec 2.36 of RFC4460. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ include/net/sctp/user.h | 9 +++++++++ net/sctp/associola.c | 27 +++++++++++++++++---------- net/sctp/outqueue.c | 9 ++++++--- net/sctp/sm_make_chunk.c | 4 ++-- net/sctp/sm_sideeffect.c | 12 ++++++++++-- net/sctp/sm_statefuns.c | 5 +++++ net/sctp/transport.c | 9 +++++++-- 8 files changed, 60 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 5f69158c1006..268f2e19ccbb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -445,6 +445,7 @@ typedef struct sctp_sender_hb_info { struct sctp_paramhdr param_hdr; union sctp_addr daddr; unsigned long sent_at; + __u64 hb_nonce; } __attribute__((packed)) sctp_sender_hb_info_t; /* @@ -984,6 +985,9 @@ struct sctp_transport { */ char cacc_saw_newack; } cacc; + + /* 64-bit random number sent with heartbeat. */ + __u64 hb_nonce; }; struct sctp_transport *sctp_transport_new(const union sctp_addr *, diff --git a/include/net/sctp/user.h b/include/net/sctp/user.h index 8a6bef6f91eb..1b7aae6cdd82 100644 --- a/include/net/sctp/user.h +++ b/include/net/sctp/user.h @@ -560,9 +560,18 @@ struct sctp_paddrinfo { } __attribute__((packed, aligned(4))); /* Peer addresses's state. */ +/* UNKNOWN: Peer address passed by the upper layer in sendmsg or connect[x] + * calls. + * UNCONFIRMED: Peer address received in INIT/INIT-ACK address parameters. + * Not yet confirmed by a heartbeat and not available for data + * transfers. + * ACTIVE : Peer address confirmed, active and available for data transfers. + * INACTIVE: Peer address inactive and not available for data transfers. + */ enum sctp_spinfo_state { SCTP_INACTIVE, SCTP_ACTIVE, + SCTP_UNCONFIRMED, SCTP_UNKNOWN = 0xffff /* Value used for transport state unknown */ }; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 9d05e13e92f6..27329ce9c311 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -441,7 +441,8 @@ void sctp_assoc_set_primary(struct sctp_association *asoc, /* If the primary path is changing, assume that the * user wants to use this new path. */ - if (transport->state != SCTP_INACTIVE) + if ((transport->state == SCTP_ACTIVE) || + (transport->state == SCTP_UNKNOWN)) asoc->peer.active_path = transport; /* @@ -532,11 +533,11 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, port = addr->v4.sin_port; SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_add_peer:association %p addr: ", - " port: %d state:%s\n", + " port: %d state:%d\n", asoc, addr, addr->v4.sin_port, - peer_state == SCTP_UNKNOWN?"UNKNOWN":"ACTIVE"); + peer_state); /* Set the port if it has not been set yet. */ if (0 == asoc->peer.port) @@ -545,9 +546,12 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, /* Check to see if this is a duplicate. */ peer = sctp_assoc_lookup_paddr(asoc, addr); if (peer) { - if (peer_state == SCTP_ACTIVE && - peer->state == SCTP_UNKNOWN) - peer->state = SCTP_ACTIVE; + if (peer->state == SCTP_UNKNOWN) { + if (peer_state == SCTP_ACTIVE) + peer->state = SCTP_ACTIVE; + if (peer_state == SCTP_UNCONFIRMED) + peer->state = SCTP_UNCONFIRMED; + } return peer; } @@ -739,7 +743,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, list_for_each(pos, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); - if (t->state == SCTP_INACTIVE) + if ((t->state == SCTP_INACTIVE) || + (t->state == SCTP_UNCONFIRMED)) continue; if (!first || t->last_time_heard > first->last_time_heard) { second = first; @@ -759,7 +764,8 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, * [If the primary is active but not most recent, bump the most * recently used transport.] */ - if (asoc->peer.primary_path->state != SCTP_INACTIVE && + if (((asoc->peer.primary_path->state == SCTP_ACTIVE) || + (asoc->peer.primary_path->state == SCTP_UNKNOWN)) && first != asoc->peer.primary_path) { second = first; first = asoc->peer.primary_path; @@ -1054,7 +1060,7 @@ void sctp_assoc_update(struct sctp_association *asoc, transports); if (!sctp_assoc_lookup_paddr(asoc, &trans->ipaddr)) sctp_assoc_add_peer(asoc, &trans->ipaddr, - GFP_ATOMIC, SCTP_ACTIVE); + GFP_ATOMIC, trans->state); } asoc->ctsn_ack_point = asoc->next_tsn - 1; @@ -1094,7 +1100,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) /* Try to find an active transport. */ - if (t->state != SCTP_INACTIVE) { + if ((t->state == SCTP_ACTIVE) || + (t->state == SCTP_UNKNOWN)) { break; } else { /* Keep track of the next transport in case diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e5faa351aaad..30b710c54e64 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -691,7 +691,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) if (!new_transport) { new_transport = asoc->peer.active_path; - } else if (new_transport->state == SCTP_INACTIVE) { + } else if ((new_transport->state == SCTP_INACTIVE) || + (new_transport->state == SCTP_UNCONFIRMED)) { /* If the chunk is Heartbeat or Heartbeat Ack, * send it to chunk->transport, even if it's * inactive. @@ -848,7 +849,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) */ new_transport = chunk->transport; if (!new_transport || - new_transport->state == SCTP_INACTIVE) + ((new_transport->state == SCTP_INACTIVE) || + (new_transport->state == SCTP_UNCONFIRMED))) new_transport = asoc->peer.active_path; /* Change packets if necessary. */ @@ -1464,7 +1466,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Mark the destination transport address as * active if it is not so marked. */ - if (transport->state == SCTP_INACTIVE) { + if ((transport->state == SCTP_INACTIVE) || + (transport->state == SCTP_UNCONFIRMED)) { sctp_assoc_control_transport( transport->asoc, transport, diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2a8773691695..8134e8b1cdca 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2017,7 +2017,7 @@ static int sctp_process_param(struct sctp_association *asoc, af->from_addr_param(&addr, param.addr, asoc->peer.port, 0); scope = sctp_scope(peer_addr); if (sctp_in_scope(&addr, scope)) - if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_ACTIVE)) + if (!sctp_assoc_add_peer(asoc, &addr, gfp, SCTP_UNCONFIRMED)) return 0; break; @@ -2418,7 +2418,7 @@ static __u16 sctp_process_asconf_param(struct sctp_association *asoc, * Due to Resource Shortage'. */ - peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_ACTIVE); + peer = sctp_assoc_add_peer(asoc, &addr, GFP_ATOMIC, SCTP_UNCONFIRMED); if (!peer) return SCTP_ERROR_RSRC_LOW; diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index c5beb2ad7ef7..9c10bdec1afe 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -430,7 +430,11 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, /* The check for association's overall error counter exceeding the * threshold is done in the state function. */ - asoc->overall_error_count++; + /* When probing UNCONFIRMED addresses, the association overall + * error count is NOT incremented + */ + if (transport->state != SCTP_UNCONFIRMED) + asoc->overall_error_count++; if (transport->state != SCTP_INACTIVE && (transport->error_count++ >= transport->pathmaxrxt)) { @@ -610,7 +614,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Mark the destination transport address as active if it is not so * marked. */ - if (t->state == SCTP_INACTIVE) + if ((t->state == SCTP_INACTIVE) || (t->state == SCTP_UNCONFIRMED)) sctp_assoc_control_transport(asoc, t, SCTP_TRANSPORT_UP, SCTP_HEARTBEAT_SUCCESS); @@ -620,6 +624,10 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, */ hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); + + /* Update the heartbeat timer. */ + if (!mod_timer(&t->hb_timer, sctp_transport_timeout(t))) + sctp_transport_hold(t); } /* Helper function to do a transport reset at the expiry of the hearbeat diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 9e58144f4851..e8498dce5335 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -846,6 +846,7 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t)); hbinfo.daddr = transport->ipaddr; hbinfo.sent_at = jiffies; + hbinfo.hb_nonce = transport->hb_nonce; /* Send a heartbeat to our peer. */ paylen = sizeof(sctp_sender_hb_info_t); @@ -1048,6 +1049,10 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DISCARD; } + /* Validate the 64-bit random nonce. */ + if (hbinfo->hb_nonce != link->hb_nonce) + return SCTP_DISPOSITION_DISCARD; + max_interval = link->hbinterval + link->rto; /* Check if the timestamp looks valid. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 160f62ad1cc5..2763aa93de1a 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -49,6 +49,7 @@ */ #include +#include #include #include @@ -85,7 +86,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->init_sent_count = 0; - peer->state = SCTP_ACTIVE; peer->param_flags = SPP_HB_DISABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -109,6 +109,9 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->hb_timer.function = sctp_generate_heartbeat_event; peer->hb_timer.data = (unsigned long)peer; + /* Initialize the 64-bit random nonce sent with heartbeat. */ + get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); + atomic_set(&peer->refcnt, 1); peer->dead = 0; @@ -517,7 +520,9 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, unsigned long sctp_transport_timeout(struct sctp_transport *t) { unsigned long timeout; - timeout = t->hbinterval + t->rto + sctp_jitter(t->rto); + timeout = t->rto + sctp_jitter(t->rto); + if (t->state != SCTP_UNCONFIRMED) + timeout += t->hbinterval; timeout += jiffies; return timeout; } -- cgit v1.2.3 From dc022a9874d026c7d1635ae66d1afafc5f053731 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Fri, 21 Jul 2006 14:49:25 -0700 Subject: [SCTP]: ADDIP: Don't use an address as source until it is ASCONF-ACKed This implements Rules D1 and D4 of Sec 4.3 in the ADDIP draft. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 ++--- net/sctp/bind_addr.c | 8 ++++-- net/sctp/ipv6.c | 3 +- net/sctp/protocol.c | 7 +++-- net/sctp/sm_make_chunk.c | 10 +++++-- net/sctp/socket.c | 72 +++++++++++++++++++++++++++++++++++++--------- 6 files changed, 80 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 268f2e19ccbb..e5aa7ff1f5b5 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -731,13 +731,10 @@ void sctp_init_addrs(struct sctp_chunk *, union sctp_addr *, const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); /* This is a structure for holding either an IPv6 or an IPv4 address. */ -/* sin_family -- AF_INET or AF_INET6 - * sin_port -- ordinary port number - * sin_addr -- cast to either (struct in_addr) or (struct in6_addr) - */ struct sctp_sockaddr_entry { struct list_head list; union sctp_addr a; + __u8 use_as_src; }; typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *); @@ -1142,7 +1139,7 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, sctp_scope_t scope, gfp_t gfp, int flags); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, - gfp_t gfp); + __u8 use_as_src, gfp_t gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 2b962627f631..2b9c12a170e5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -146,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp) /* Add an address to the bind address list in the SCTP_bind_addr structure. */ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, - gfp_t gfp) + __u8 use_as_src, gfp_t gfp) { struct sctp_sockaddr_entry *addr; @@ -163,6 +163,8 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, if (!addr->a.v4.sin_port) addr->a.v4.sin_port = bp->port; + addr->use_as_src = use_as_src; + INIT_LIST_HEAD(&addr->list); list_add_tail(&addr->list, &bp->address_list); SCTP_DBG_OBJCNT_INC(addr); @@ -274,7 +276,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, } af->from_addr_param(&addr, rawaddr, port, 0); - retval = sctp_add_bind_addr(bp, &addr, gfp); + retval = sctp_add_bind_addr(bp, &addr, 1, gfp); if (retval) { /* Can't finish building the list, clean up. */ sctp_bind_addr_clean(bp); @@ -367,7 +369,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, (((AF_INET6 == addr->sa.sa_family) && (flags & SCTP_ADDR6_ALLOWED) && (flags & SCTP_ADDR6_PEERSUPP)))) - error = sctp_add_bind_addr(dest, addr, gfp); + error = sctp_add_bind_addr(dest, addr, 1, gfp); } return error; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 8ef08070c8b6..99c0cefc04e0 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -290,7 +290,8 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, sctp_read_lock(addr_lock); list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if ((laddr->a.sa.sa_family == AF_INET6) && + if ((laddr->use_as_src) && + (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { bmatchlen = sctp_v6_addr_match_len(daddr, &laddr->a); if (!baddr || (matchlen < bmatchlen)) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 816c033d7886..1ab03a27a76e 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -240,7 +240,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (((AF_INET6 == addr->a.sa.sa_family) && (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { - error = sctp_add_bind_addr(bp, &addr->a, + error = sctp_add_bind_addr(bp, &addr->a, 1, GFP_ATOMIC); if (error) goto end_copy; @@ -486,6 +486,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (!laddr->use_as_src) + continue; sctp_v4_dst_saddr(&dst_saddr, dst, bp->port); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; @@ -506,7 +508,8 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, list_for_each(pos, &bp->address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (AF_INET == laddr->a.sa.sa_family) { + if ((laddr->use_as_src) && + (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; if (!ip_route_output_key(&rt, &fl)) { dst = &rt->u.dst; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 8134e8b1cdca..4f11f5858209 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1493,7 +1493,7 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { - sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, + sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, GFP_ATOMIC); } @@ -2565,6 +2565,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, union sctp_addr_param *addr_param; struct list_head *pos; struct sctp_transport *transport; + struct sctp_sockaddr_entry *saddr; int retval = 0; addr_param = (union sctp_addr_param *) @@ -2578,7 +2579,11 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, case SCTP_PARAM_ADD_IP: sctp_local_bh_disable(); sctp_write_lock(&asoc->base.addr_lock); - retval = sctp_add_bind_addr(bp, &addr, GFP_ATOMIC); + list_for_each(pos, &bp->address_list) { + saddr = list_entry(pos, struct sctp_sockaddr_entry, list); + if (sctp_cmp_addr_exact(&saddr->a, &addr)) + saddr->use_as_src = 1; + } sctp_write_unlock(&asoc->base.addr_lock); sctp_local_bh_enable(); break; @@ -2591,6 +2596,7 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); + dst_release(transport->dst); sctp_transport_route(transport, NULL, sctp_sk(asoc->base.sk)); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 518c55ab610b..54722e622e6d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -369,7 +369,7 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) /* Use GFP_ATOMIC since BHs are disabled. */ addr->v4.sin_port = ntohs(addr->v4.sin_port); - ret = sctp_add_bind_addr(bp, addr, GFP_ATOMIC); + ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); addr->v4.sin_port = htons(addr->v4.sin_port); sctp_write_unlock(&ep->base.addr_lock); sctp_local_bh_enable(); @@ -491,6 +491,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sctp_chunk *chunk; struct sctp_sockaddr_entry *laddr; union sctp_addr *addr; + union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; struct list_head *pos; @@ -558,14 +559,26 @@ static int sctp_send_asconf_add_ip(struct sock *sk, } retval = sctp_send_asconf(asoc, chunk); + if (retval) + goto out; - /* FIXME: After sending the add address ASCONF chunk, we - * cannot append the address to the association's binding - * address list, because the new address may be used as the - * source of a message sent to the peer before the ASCONF - * chunk is received by the peer. So we should wait until - * ASCONF_ACK is received. + /* Add the new addresses to the bind address list with + * use_as_src set to 0. */ + sctp_local_bh_disable(); + sctp_write_lock(&asoc->base.addr_lock); + addr_buf = addrs; + for (i = 0; i < addrcnt; i++) { + addr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(addr->v4.sin_family); + memcpy(&saveaddr, addr, af->sockaddr_len); + saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); + retval = sctp_add_bind_addr(bp, &saveaddr, 0, + GFP_ATOMIC); + addr_buf += af->sockaddr_len; + } + sctp_write_unlock(&asoc->base.addr_lock); + sctp_local_bh_enable(); } out: @@ -676,12 +689,15 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; + struct sctp_transport *transport; struct sctp_bind_addr *bp; struct sctp_chunk *chunk; union sctp_addr *laddr; + union sctp_addr saveaddr; void *addr_buf; struct sctp_af *af; - struct list_head *pos; + struct list_head *pos, *pos1; + struct sctp_sockaddr_entry *saddr; int i; int retval = 0; @@ -748,14 +764,42 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto out; } - retval = sctp_send_asconf(asoc, chunk); + /* Reset use_as_src flag for the addresses in the bind address + * list that are to be deleted. + */ + sctp_local_bh_disable(); + sctp_write_lock(&asoc->base.addr_lock); + addr_buf = addrs; + for (i = 0; i < addrcnt; i++) { + laddr = (union sctp_addr *)addr_buf; + af = sctp_get_af_specific(laddr->v4.sin_family); + memcpy(&saveaddr, laddr, af->sockaddr_len); + saveaddr.v4.sin_port = ntohs(saveaddr.v4.sin_port); + list_for_each(pos1, &bp->address_list) { + saddr = list_entry(pos1, + struct sctp_sockaddr_entry, + list); + if (sctp_cmp_addr_exact(&saddr->a, &saveaddr)) + saddr->use_as_src = 0; + } + addr_buf += af->sockaddr_len; + } + sctp_write_unlock(&asoc->base.addr_lock); + sctp_local_bh_enable(); - /* FIXME: After sending the delete address ASCONF chunk, we - * cannot remove the addresses from the association's bind - * address list, because there maybe some packet send to - * the delete addresses, so we should wait until ASCONF_ACK - * packet is received. + /* Update the route and saddr entries for all the transports + * as some of the addresses in the bind address list are + * about to be deleted and cannot be used as source addresses. */ + list_for_each(pos1, &asoc->peer.transport_addr_list) { + transport = list_entry(pos1, struct sctp_transport, + transports); + dst_release(transport->dst); + sctp_transport_route(transport, NULL, + sctp_sk(asoc->base.sk)); + } + + retval = sctp_send_asconf(asoc, chunk); } out: return retval; -- cgit v1.2.3 From 64d2f0855e50a7185546ee1fbc03c2badc31330f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 21 Jul 2006 14:49:49 -0700 Subject: [I/OAT]: net/core/user_dma.c should #include Every file should #include the headers containing the prototypes for its global functions. Especially in cases like this one where gcc can tell us through a compile error that the prototype was wrong... Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/netdma.h | 2 +- net/core/user_dma.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/netdma.h b/include/net/netdma.h index 19760eb131aa..ceae5ee85c04 100644 --- a/include/net/netdma.h +++ b/include/net/netdma.h @@ -37,7 +37,7 @@ static inline struct dma_chan *get_softnet_dma(void) } int dma_skb_copy_datagram_iovec(struct dma_chan* chan, - const struct sk_buff *skb, int offset, struct iovec *to, + struct sk_buff *skb, int offset, struct iovec *to, size_t len, struct dma_pinned_list *pinned_list); #endif /* CONFIG_NET_DMA */ diff --git a/net/core/user_dma.c b/net/core/user_dma.c index b7c98dbcdb81..248a6b666aff 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -29,6 +29,7 @@ #include #include /* for BUG_TRAP */ #include +#include #define NET_DMA_DEFAULT_COPYBREAK 4096 -- cgit v1.2.3 From 53c4b2cc7a05c034fd21d104d2ab43ea8cc0e075 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 21 Jul 2006 14:55:38 -0700 Subject: [NET]: Fix reversed error test in netif_tx_trylock A non-zero return value indicates success from spin_trylock, not error. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 76cc099c8580..75f02d8c6ed3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -924,10 +924,10 @@ static inline void netif_tx_lock_bh(struct net_device *dev) static inline int netif_tx_trylock(struct net_device *dev) { - int err = spin_trylock(&dev->_xmit_lock); - if (!err) + int ok = spin_trylock(&dev->_xmit_lock); + if (likely(ok)) dev->xmit_lock_owner = smp_processor_id(); - return err; + return ok; } static inline void netif_tx_unlock(struct net_device *dev) -- cgit v1.2.3 From aa95387774039096c11803c04011f1aa42d85758 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 23 Jul 2006 12:12:16 -0700 Subject: cpu hotplug: simplify and hopefully fix locking The CPU hotplug locking was quite messy, with a recursive lock to handle the fact that both the actual up/down sequence wanted to protect itself from being re-entered, but the callbacks that it called also tended to want to protect themselves from CPU events. This splits the lock into two (one to serialize the whole hotplug sequence, the other to protect against the CPU present bitmaps changing). The latter still allows recursive usage because some subsystems (ondemand policy for cpufreq at least) had already gotten too used to the lax locking, but the locking mistakes are hopefully now less fundamental, and we now warn about recursive lock usage when we see it, in the hope that it can be fixed. Signed-off-by: Linus Torvalds --- include/linux/cpu.h | 6 ----- kernel/cpu.c | 75 ++++++++++++++++++++++++----------------------------- 2 files changed, 34 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 44a11f1ccaf2..8fb344a9abd8 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -48,7 +48,6 @@ static inline void unregister_cpu_notifier(struct notifier_block *nb) { } #endif -extern int current_in_cpu_hotplug(void); int cpu_up(unsigned int cpu); @@ -61,10 +60,6 @@ static inline int register_cpu_notifier(struct notifier_block *nb) static inline void unregister_cpu_notifier(struct notifier_block *nb) { } -static inline int current_in_cpu_hotplug(void) -{ - return 0; -} #endif /* CONFIG_SMP */ extern struct sysdev_class cpu_sysdev_class; @@ -73,7 +68,6 @@ extern struct sysdev_class cpu_sysdev_class; /* Stop CPUs going up and down. */ extern void lock_cpu_hotplug(void); extern void unlock_cpu_hotplug(void); -extern int lock_cpu_hotplug_interruptible(void); #define hotcpu_notifier(fn, pri) { \ static struct notifier_block fn##_nb = \ { .notifier_call = fn, .priority = pri }; \ diff --git a/kernel/cpu.c b/kernel/cpu.c index 70fbf2e83766..f230f9ae01c2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -16,56 +16,48 @@ #include /* This protects CPUs going up and down... */ -static DEFINE_MUTEX(cpucontrol); +static DEFINE_MUTEX(cpu_add_remove_lock); +static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); #ifdef CONFIG_HOTPLUG_CPU -static struct task_struct *lock_cpu_hotplug_owner; -static int lock_cpu_hotplug_depth; -static int __lock_cpu_hotplug(int interruptible) -{ - int ret = 0; - - if (lock_cpu_hotplug_owner != current) { - if (interruptible) - ret = mutex_lock_interruptible(&cpucontrol); - else - mutex_lock(&cpucontrol); - } - - /* - * Set only if we succeed in locking - */ - if (!ret) { - lock_cpu_hotplug_depth++; - lock_cpu_hotplug_owner = current; - } - - return ret; -} +/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ +static struct task_struct *recursive; +static int recursive_depth; void lock_cpu_hotplug(void) { - __lock_cpu_hotplug(0); + struct task_struct *tsk = current; + + if (tsk == recursive) { + static int warnings = 10; + if (warnings) { + printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); + WARN_ON(1); + warnings--; + } + recursive_depth++; + return; + } + mutex_lock(&cpu_bitmask_lock); + recursive = tsk; } EXPORT_SYMBOL_GPL(lock_cpu_hotplug); void unlock_cpu_hotplug(void) { - if (--lock_cpu_hotplug_depth == 0) { - lock_cpu_hotplug_owner = NULL; - mutex_unlock(&cpucontrol); + WARN_ON(recursive != current); + if (recursive_depth) { + recursive_depth--; + return; } + mutex_unlock(&cpu_bitmask_lock); + recursive = NULL; } EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); -int lock_cpu_hotplug_interruptible(void) -{ - return __lock_cpu_hotplug(1); -} -EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); #endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ @@ -122,9 +114,7 @@ int cpu_down(unsigned int cpu) struct task_struct *p; cpumask_t old_allowed, tmp; - if ((err = lock_cpu_hotplug_interruptible()) != 0) - return err; - + mutex_lock(&cpu_add_remove_lock); if (num_online_cpus() == 1) { err = -EBUSY; goto out; @@ -150,7 +140,10 @@ int cpu_down(unsigned int cpu) cpu_clear(cpu, tmp); set_cpus_allowed(current, tmp); + mutex_lock(&cpu_bitmask_lock); p = __stop_machine_run(take_cpu_down, NULL, cpu); + mutex_unlock(&cpu_bitmask_lock); + if (IS_ERR(p)) { /* CPU didn't die: tell everyone. Can't complain. */ if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, @@ -187,7 +180,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: - unlock_cpu_hotplug(); + mutex_unlock(&cpu_add_remove_lock); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ @@ -197,9 +190,7 @@ int __devinit cpu_up(unsigned int cpu) int ret; void *hcpu = (void *)(long)cpu; - if ((ret = lock_cpu_hotplug_interruptible()) != 0) - return ret; - + mutex_lock(&cpu_add_remove_lock); if (cpu_online(cpu) || !cpu_present(cpu)) { ret = -EINVAL; goto out; @@ -214,7 +205,9 @@ int __devinit cpu_up(unsigned int cpu) } /* Arch-specific enabling code. */ + mutex_lock(&cpu_bitmask_lock); ret = __cpu_up(cpu); + mutex_unlock(&cpu_bitmask_lock); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); @@ -227,6 +220,6 @@ out_notify: blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: - unlock_cpu_hotplug(); + mutex_unlock(&cpu_add_remove_lock); return ret; } -- cgit v1.2.3