diff options
author | Dexuan Cui <decui@microsoft.com> | 2020-01-26 08:49:41 +0300 |
---|---|---|
committer | Sasha Levin <sashal@kernel.org> | 2020-01-27 06:10:10 +0300 |
commit | 9fc3c01a1fae669a2ef9f13ee1e1a26e057d79f8 (patch) | |
tree | abfa8d6dacc777830d4c10eafbfd5497460e4402 /tools/hv/hv_fcopy_daemon.c | |
parent | 3a6fb6c4255c3893ab61e2bd4e9ae01ca6bbcd94 (diff) | |
download | linux-9fc3c01a1fae669a2ef9f13ee1e1a26e057d79f8.tar.xz |
Tools: hv: Reopen the devices if read() or write() returns errors
The state machine in the hv_utils driver can run out of order in some
corner cases, e.g. if the kvp daemon doesn't call write() fast enough
due to some reason, kvp_timeout_func() can run first and move the state
to HVUTIL_READY; next, when kvp_on_msg() is called it returns -EINVAL
since kvp_transaction.state is smaller than HVUTIL_USERSPACE_REQ; later,
the daemon's write() gets an error -EINVAL, and the daemon will exit().
We can reproduce the issue by sending a SIGSTOP signal to the daemon, wait
for 1 minute, and send a SIGCONT signal to the daemon: the daemon will
exit() quickly.
We can fix the issue by forcing a reset of the device (which means the
daemon can close() and open() the device again) and doing extra necessary
clean-up.
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Diffstat (limited to 'tools/hv/hv_fcopy_daemon.c')
-rw-r--r-- | tools/hv/hv_fcopy_daemon.c | 37 |
1 files changed, 32 insertions, 5 deletions
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index aea2d91ab364..16d629b22c25 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -80,6 +80,8 @@ static int hv_start_fcopy(struct hv_start_fcopy *smsg) error = 0; done: + if (error) + target_fname[0] = '\0'; return error; } @@ -108,15 +110,29 @@ static int hv_copy_data(struct hv_do_fcopy *cpmsg) return ret; } +/* + * Reset target_fname to "" in the two below functions for hibernation: if + * the fcopy operation is aborted by hibernation, the daemon should remove the + * partially-copied file; to achieve this, the hv_utils driver always fakes a + * CANCEL_FCOPY message upon suspend, and later when the VM resumes back, + * the daemon calls hv_copy_cancel() to remove the file; if a file is copied + * successfully before suspend, hv_copy_finished() must reset target_fname to + * avoid that the file can be incorrectly removed upon resume, since the faked + * CANCEL_FCOPY message is spurious in this case. + */ static int hv_copy_finished(void) { close(target_fd); + target_fname[0] = '\0'; return 0; } static int hv_copy_cancel(void) { close(target_fd); - unlink(target_fname); + if (strlen(target_fname) > 0) { + unlink(target_fname); + target_fname[0] = '\0'; + } return 0; } @@ -131,7 +147,7 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fcopy_fd; + int fcopy_fd = -1; int error; int daemonize = 1, long_index = 0, opt; int version = FCOPY_CURRENT_VERSION; @@ -141,7 +157,7 @@ int main(int argc, char *argv[]) struct hv_do_fcopy copy; __u32 kernel_modver; } buffer = { }; - int in_handshake = 1; + int in_handshake; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -170,6 +186,12 @@ int main(int argc, char *argv[]) openlog("HV_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); +reopen_fcopy_fd: + if (fcopy_fd != -1) + close(fcopy_fd); + /* Remove any possible partially-copied file on error */ + hv_copy_cancel(); + in_handshake = 1; fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR); if (fcopy_fd < 0) { @@ -196,7 +218,7 @@ int main(int argc, char *argv[]) len = pread(fcopy_fd, &buffer, sizeof(buffer), 0); if (len < 0) { syslog(LOG_ERR, "pread failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + goto reopen_fcopy_fd; } if (in_handshake) { @@ -231,9 +253,14 @@ int main(int argc, char *argv[]) } + /* + * pwrite() may return an error due to the faked CANCEL_FCOPY + * message upon hibernation. Ignore the error by resetting the + * dev file, i.e. closing and re-opening it. + */ if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) { syslog(LOG_ERR, "pwrite failed: %s", strerror(errno)); - exit(EXIT_FAILURE); + goto reopen_fcopy_fd; } } } |