diff options
author | David S. Miller <davem@davemloft.net> | 2012-10-09 21:14:32 +0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-10-09 21:14:32 +0400 |
commit | 8dd9117cc7a021ced1c5cf177e2d44dd92b88617 (patch) | |
tree | cad990f58f9ec6d400226dda86718fc10781416e /tools | |
parent | 16e310ae6ed352c4963b1f2413fcd88fa693eeda (diff) | |
parent | 547b1e81afe3119f7daf702cc03b158495535a25 (diff) | |
download | linux-8dd9117cc7a021ced1c5cf177e2d44dd92b88617.tar.xz |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux
Pulled mainline in order to get the UAPI infrastructure already
merged before I pull in David Howells's UAPI trees for networking.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'tools')
-rw-r--r-- | tools/lguest/lguest.c | 1 | ||||
-rw-r--r-- | tools/perf/Makefile | 2 | ||||
-rw-r--r-- | tools/perf/util/include/linux/rbtree.h | 1 | ||||
-rw-r--r-- | tools/power/acpi/Makefile | 18 | ||||
-rw-r--r-- | tools/power/acpi/acpidump.8 | 59 | ||||
-rw-r--r-- | tools/power/acpi/acpidump.c | 560 | ||||
-rw-r--r-- | tools/power/cpupower/Makefile | 2 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 55 | ||||
-rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 214 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/Makefile | 13 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/README | 118 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/trace-agent-ctl.c | 137 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/trace-agent-rw.c | 192 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/trace-agent.c | 270 | ||||
-rw-r--r-- | tools/virtio/virtio-trace/trace-agent.h | 75 |
15 files changed, 1679 insertions, 38 deletions
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index f759f4f097c7..fd2f9221b241 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -1299,6 +1299,7 @@ static struct device *new_device(const char *name, u16 type) dev->feature_len = 0; dev->num_vq = 0; dev->running = false; + dev->next = NULL; /* * Append to device list. Prepending to a single-linked list is diff --git a/tools/perf/Makefile b/tools/perf/Makefile index e5e71e7d95a0..86258c2a2c23 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -72,7 +72,7 @@ ifeq ($(ARCH),x86_64) override ARCH := x86 IS_X86_64 := 0 ifeq (, $(findstring m32,$(EXTRA_CFLAGS))) - IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1) + IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1) endif ifeq (${IS_X86_64}, 1) RAW_ARCH := x86_64 diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index 2a030c5af3aa..9bcdc844b330 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1,2 +1,3 @@ #include <stdbool.h> +#include <stdbool.h> #include "../../../../include/linux/rbtree.h" diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile new file mode 100644 index 000000000000..6b9cf7a987c7 --- /dev/null +++ b/tools/power/acpi/Makefile @@ -0,0 +1,18 @@ +PROG= acpidump +SRCS= acpidump.c +KERNEL_INCLUDE := ../../../include +CFLAGS += -Wall -Wstrict-prototypes -Wdeclaration-after-statement -Os -s -D_LINUX -DDEFINE_ALTERNATE_TYPES -I$(KERNEL_INCLUDE) + +all: acpidump +$(PROG) : $(SRCS) + $(CC) $(CFLAGS) $(SRCS) -o $(PROG) + +CLEANFILES= $(PROG) + +clean : + rm -f $(CLEANFILES) $(patsubst %.c,%.o, $(SRCS)) *~ + +install : + install acpidump /usr/bin/acpidump + install acpidump.8 /usr/share/man/man8 + diff --git a/tools/power/acpi/acpidump.8 b/tools/power/acpi/acpidump.8 new file mode 100644 index 000000000000..adfa99166e5e --- /dev/null +++ b/tools/power/acpi/acpidump.8 @@ -0,0 +1,59 @@ +.TH ACPIDUMP 8 +.SH NAME +acpidump \- Dump system's ACPI tables to an ASCII file. +.SH SYNOPSIS +.ft B +.B acpidump > acpidump.out +.SH DESCRIPTION +\fBacpidump \fP dumps the systems ACPI tables to an ASCII file +appropriate for attaching to a bug report. + +Subsequently, they can be processed by utilities in the ACPICA package. +.SS Options +no options worth worrying about. +.PP +.SH EXAMPLE + +.nf +# acpidump > acpidump.out + +$ acpixtract -a acpidump.out + Acpi table [DSDT] - 15974 bytes written to DSDT.dat + Acpi table [FACS] - 64 bytes written to FACS.dat + Acpi table [FACP] - 116 bytes written to FACP.dat + Acpi table [APIC] - 120 bytes written to APIC.dat + Acpi table [MCFG] - 60 bytes written to MCFG.dat + Acpi table [SSDT] - 444 bytes written to SSDT1.dat + Acpi table [SSDT] - 439 bytes written to SSDT2.dat + Acpi table [SSDT] - 439 bytes written to SSDT3.dat + Acpi table [SSDT] - 439 bytes written to SSDT4.dat + Acpi table [SSDT] - 439 bytes written to SSDT5.dat + Acpi table [RSDT] - 76 bytes written to RSDT.dat + Acpi table [RSDP] - 20 bytes written to RSDP.dat + +$ iasl -d *.dat +... +.fi +creates *.dsl, a human readable form which can be edited +and compiled using iasl. + + +.SH NOTES + +.B "acpidump " +must be run as root. + +.SH REFERENCES +ACPICA: https://acpica.org/ + +.SH FILES +.ta +.nf +/dev/mem +/sys/firmware/acpi/tables/dynamic/* +.fi + +.PP +.SH AUTHOR +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/acpi/acpidump.c b/tools/power/acpi/acpidump.c new file mode 100644 index 000000000000..07779871421c --- /dev/null +++ b/tools/power/acpi/acpidump.c @@ -0,0 +1,560 @@ +/* + * (c) Alexey Starikovskiy, Intel, 2005-2006. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions, and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * substantially similar to the "NO WARRANTY" disclaimer below + * ("Disclaimer") and any redistribution must be conditioned upon + * including a substantially similar Disclaimer requirement for further + * binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGES. + */ + +#ifdef DEFINE_ALTERNATE_TYPES +/* hack to enable building old application with new headers -lenb */ +#define acpi_fadt_descriptor acpi_table_fadt +#define acpi_rsdp_descriptor acpi_table_rsdp +#define DSDT_SIG ACPI_SIG_DSDT +#define FACS_SIG ACPI_SIG_FACS +#define FADT_SIG ACPI_SIG_FADT +#define xfirmware_ctrl Xfacs +#define firmware_ctrl facs + +typedef int s32; +typedef unsigned char u8; +typedef unsigned short u16; +typedef unsigned int u32; +typedef unsigned long long u64; +typedef long long s64; +#endif + +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <getopt.h> + +#include <sys/types.h> +#include <dirent.h> + +#include <acpi/acconfig.h> +#include <acpi/platform/acenv.h> +#include <acpi/actypes.h> +#include <acpi/actbl.h> + +static inline u8 checksum(u8 * buffer, u32 length) +{ + u8 sum = 0, *i = buffer; + buffer += length; + for (; i < buffer; sum += *(i++)); + return sum; +} + +static unsigned long psz, addr, length; +static int print, connect, skip; +static u8 select_sig[4]; + +static unsigned long read_efi_systab( void ) +{ + char buffer[80]; + unsigned long addr; + FILE *f = fopen("/sys/firmware/efi/systab", "r"); + if (f) { + while (fgets(buffer, 80, f)) { + if (sscanf(buffer, "ACPI20=0x%lx", &addr) == 1) + return addr; + } + fclose(f); + } + return 0; +} + +static u8 *acpi_map_memory(unsigned long where, unsigned length) +{ + unsigned long offset; + u8 *there; + int fd = open("/dev/mem", O_RDONLY); + if (fd < 0) { + fprintf(stderr, "acpi_os_map_memory: cannot open /dev/mem\n"); + exit(1); + } + offset = where % psz; + there = mmap(NULL, length + offset, PROT_READ, MAP_PRIVATE, + fd, where - offset); + close(fd); + if (there == MAP_FAILED) return 0; + return (there + offset); +} + +static void acpi_unmap_memory(u8 * there, unsigned length) +{ + unsigned long offset = (unsigned long)there % psz; + munmap(there - offset, length + offset); +} + +static struct acpi_table_header *acpi_map_table(unsigned long where, char *sig) +{ + unsigned size; + struct acpi_table_header *tbl = (struct acpi_table_header *) + acpi_map_memory(where, sizeof(struct acpi_table_header)); + if (!tbl || (sig && memcmp(sig, tbl->signature, 4))) return 0; + size = tbl->length; + acpi_unmap_memory((u8 *) tbl, sizeof(struct acpi_table_header)); + return (struct acpi_table_header *)acpi_map_memory(where, size); +} + +static void acpi_unmap_table(struct acpi_table_header *tbl) +{ + acpi_unmap_memory((u8 *)tbl, tbl->length); +} + +static struct acpi_rsdp_descriptor *acpi_scan_for_rsdp(u8 *begin, u32 length) +{ + struct acpi_rsdp_descriptor *rsdp; + u8 *i, *end = begin + length; + /* Search from given start address for the requested length */ + for (i = begin; i < end; i += ACPI_RSDP_SCAN_STEP) { + /* The signature and checksum must both be correct */ + if (memcmp((char *)i, "RSD PTR ", 8)) continue; + rsdp = (struct acpi_rsdp_descriptor *)i; + /* Signature matches, check the appropriate checksum */ + if (!checksum((u8 *) rsdp, (rsdp->revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : + ACPI_RSDP_XCHECKSUM_LENGTH)) + /* Checksum valid, we have found a valid RSDP */ + return rsdp; + } + /* Searched entire block, no RSDP was found */ + return 0; +} + +/* + * Output data + */ +static void acpi_show_data(int fd, u8 * data, int size) +{ + char buffer[256]; + int len; + int i, remain = size; + while (remain > 0) { + len = snprintf(buffer, 256, " %04x:", size - remain); + for (i = 0; i < 16 && i < remain; i++) { + len += + snprintf(&buffer[len], 256 - len, " %02x", data[i]); + } + for (; i < 16; i++) { + len += snprintf(&buffer[len], 256 - len, " "); + } + len += snprintf(&buffer[len], 256 - len, " "); + for (i = 0; i < 16 && i < remain; i++) { + buffer[len++] = (isprint(data[i])) ? data[i] : '.'; + } + buffer[len++] = '\n'; + write(fd, buffer, len); + data += 16; + remain -= 16; + } +} + +/* + * Output ACPI table + */ +static void acpi_show_table(int fd, struct acpi_table_header *table, unsigned long addr) +{ + char buff[80]; + int len = snprintf(buff, 80, "%.4s @ %p\n", table->signature, (void *)addr); + write(fd, buff, len); + acpi_show_data(fd, (u8 *) table, table->length); + buff[0] = '\n'; + write(fd, buff, 1); +} + +static void write_table(int fd, struct acpi_table_header *tbl, unsigned long addr) +{ + static int select_done = 0; + if (!select_sig[0]) { + if (print) { + acpi_show_table(fd, tbl, addr); + } else { + write(fd, tbl, tbl->length); + } + } else if (!select_done && !memcmp(select_sig, tbl->signature, 4)) { + if (skip > 0) { + --skip; + return; + } + if (print) { + acpi_show_table(fd, tbl, addr); + } else { + write(fd, tbl, tbl->length); + } + select_done = 1; + } +} + +static void acpi_dump_FADT(int fd, struct acpi_table_header *tbl, unsigned long xaddr) { + struct acpi_fadt_descriptor x; + unsigned long addr; + size_t len = sizeof(struct acpi_fadt_descriptor); + if (len > tbl->length) len = tbl->length; + memcpy(&x, tbl, len); + x.header.length = len; + if (checksum((u8 *)tbl, len)) { + fprintf(stderr, "Wrong checksum for FADT!\n"); + } + if (x.header.length >= 148 && x.Xdsdt) { + addr = (unsigned long)x.Xdsdt; + if (connect) { + x.Xdsdt = lseek(fd, 0, SEEK_CUR); + } + } else if (x.header.length >= 44 && x.dsdt) { + addr = (unsigned long)x.dsdt; + if (connect) { + x.dsdt = lseek(fd, 0, SEEK_CUR); + } + } else { + fprintf(stderr, "No DSDT in FADT!\n"); + goto no_dsdt; + } + tbl = acpi_map_table(addr, DSDT_SIG); + if (!tbl) goto no_dsdt; + if (checksum((u8 *)tbl, tbl->length)) + fprintf(stderr, "Wrong checksum for DSDT!\n"); + write_table(fd, tbl, addr); + acpi_unmap_table(tbl); +no_dsdt: + if (x.header.length >= 140 && x.xfirmware_ctrl) { + addr = (unsigned long)x.xfirmware_ctrl; + if (connect) { + x.xfirmware_ctrl = lseek(fd, 0, SEEK_CUR); + } + } else if (x.header.length >= 40 && x.firmware_ctrl) { + addr = (unsigned long)x.firmware_ctrl; + if (connect) { + x.firmware_ctrl = lseek(fd, 0, SEEK_CUR); + } + } else { + fprintf(stderr, "No FACS in FADT!\n"); + goto no_facs; + } + tbl = acpi_map_table(addr, FACS_SIG); + if (!tbl) goto no_facs; + /* do not checksum FACS */ + write_table(fd, tbl, addr); + acpi_unmap_table(tbl); +no_facs: + write_table(fd, (struct acpi_table_header *)&x, xaddr); +} + +static int acpi_dump_SDT(int fd, struct acpi_rsdp_descriptor *rsdp) +{ + struct acpi_table_header *sdt, *tbl = 0; + int xsdt = 1, i, num; + char *offset; + unsigned long addr; + if (rsdp->revision > 1 && rsdp->xsdt_physical_address) { + tbl = acpi_map_table(rsdp->xsdt_physical_address, "XSDT"); + } + if (!tbl && rsdp->rsdt_physical_address) { + xsdt = 0; + tbl = acpi_map_table(rsdp->rsdt_physical_address, "RSDT"); + } + if (!tbl) return 0; + sdt = malloc(tbl->length); + memcpy(sdt, tbl, tbl->length); + acpi_unmap_table(tbl); + if (checksum((u8 *)sdt, sdt->length)) + fprintf(stderr, "Wrong checksum for %s!\n", (xsdt)?"XSDT":"RSDT"); + num = (sdt->length - sizeof(struct acpi_table_header))/((xsdt)?sizeof(u64):sizeof(u32)); + offset = (char *)sdt + sizeof(struct acpi_table_header); + for (i = 0; i < num; ++i, offset += ((xsdt) ? sizeof(u64) : sizeof(u32))) { + addr = (xsdt) ? (unsigned long)(*(u64 *)offset): + (unsigned long)(*(u32 *)offset); + if (!addr) continue; + tbl = acpi_map_table(addr, 0); + if (!tbl) continue; + if (!memcmp(tbl->signature, FADT_SIG, 4)) { + acpi_dump_FADT(fd, tbl, addr); + } else { + if (checksum((u8 *)tbl, tbl->length)) + fprintf(stderr, "Wrong checksum for generic table!\n"); + write_table(fd, tbl, addr); + } + acpi_unmap_table(tbl); + if (connect) { + if (xsdt) + (*(u64*)offset) = lseek(fd, 0, SEEK_CUR); + else + (*(u32*)offset) = lseek(fd, 0, SEEK_CUR); + } + } + if (xsdt) { + addr = (unsigned long)rsdp->xsdt_physical_address; + if (connect) { + rsdp->xsdt_physical_address = lseek(fd, 0, SEEK_CUR); + } + } else { + addr = (unsigned long)rsdp->rsdt_physical_address; + if (connect) { + rsdp->rsdt_physical_address = lseek(fd, 0, SEEK_CUR); + } + } + write_table(fd, sdt, addr); + free (sdt); + return 1; +} + +#define DYNAMIC_SSDT "/sys/firmware/acpi/tables/dynamic" + +static void acpi_dump_dynamic_SSDT(int fd) +{ + struct stat file_stat; + char filename[256], *ptr; + DIR *tabledir; + struct dirent *entry; + FILE *fp; + int count, readcount, length; + struct acpi_table_header table_header, *ptable; + + if (stat(DYNAMIC_SSDT, &file_stat) == -1) { + /* The directory doesn't exist */ + return; + } + tabledir = opendir(DYNAMIC_SSDT); + if(!tabledir){ + /*can't open the directory */ + return; + } + + while ((entry = readdir(tabledir)) != 0){ + /* skip the file of . /.. */ + if (entry->d_name[0] == '.') + continue; + + sprintf(filename, "%s/%s", DYNAMIC_SSDT, entry->d_name); + fp = fopen(filename, "r"); + if (fp == NULL) { + fprintf(stderr, "Can't open the file of %s\n", + filename); + continue; + } + /* Read the Table header to parse the table length */ + count = fread(&table_header, 1, sizeof(struct acpi_table_header), fp); + if (count < sizeof(table_header)) { + /* the length is lessn than ACPI table header. skip it */ + fclose(fp); + continue; + } + length = table_header.length; + ptr = malloc(table_header.length); + fseek(fp, 0, SEEK_SET); + readcount = 0; + while(!feof(fp) && readcount < length) { + count = fread(ptr + readcount, 1, 256, fp); + readcount += count; + } + fclose(fp); + ptable = (struct acpi_table_header *) ptr; + if (checksum((u8 *) ptable, ptable->length)) + fprintf(stderr, "Wrong checksum " + "for dynamic SSDT table!\n"); + write_table(fd, ptable, 0); + free(ptr); + } + closedir(tabledir); + return; +} + +static void usage(const char *progname) +{ + puts("Usage:"); + printf("%s [--addr 0x1234][--table DSDT][--output filename]" + "[--binary][--length 0x456][--help]\n", progname); + puts("\t--addr 0x1234 or -a 0x1234 -- look for tables at this physical address"); + puts("\t--table DSDT or -t DSDT -- only dump table with DSDT signature"); + puts("\t--output filename or -o filename -- redirect output from stdin to filename"); + puts("\t--binary or -b -- dump data in binary form rather than in hex-dump format"); + puts("\t--length 0x456 or -l 0x456 -- works only with --addr, dump physical memory" + "\n\t\tregion without trying to understand it's contents"); + puts("\t--skip 2 or -s 2 -- skip 2 tables of the given name and output only 3rd one"); + puts("\t--help or -h -- this help message"); + exit(0); +} + +static struct option long_options[] = { + {"addr", 1, 0, 0}, + {"table", 1, 0, 0}, + {"output", 1, 0, 0}, + {"binary", 0, 0, 0}, + {"length", 1, 0, 0}, + {"skip", 1, 0, 0}, + {"help", 0, 0, 0}, + {0, 0, 0, 0} +}; +int main(int argc, char **argv) +{ + int option_index, c, fd; + u8 *raw; + struct acpi_rsdp_descriptor rsdpx, *x = 0; + char *filename = 0; + char buff[80]; + memset(select_sig, 0, 4); + print = 1; + connect = 0; + addr = length = 0; + skip = 0; + while (1) { + option_index = 0; + c = getopt_long(argc, argv, "a:t:o:bl:s:h", + long_options, &option_index); + if (c == -1) + break; + + switch (c) { + case 0: + switch (option_index) { + case 0: + addr = strtoul(optarg, (char **)NULL, 16); + break; + case 1: + memcpy(select_sig, optarg, 4); + break; + case 2: + filename = optarg; + break; + case 3: + print = 0; + break; + case 4: + length = strtoul(optarg, (char **)NULL, 16); + break; + case 5: + skip = strtoul(optarg, (char **)NULL, 10); + break; + case 6: + usage(argv[0]); + exit(0); + } + break; + case 'a': + addr = strtoul(optarg, (char **)NULL, 16); + break; + case 't': + memcpy(select_sig, optarg, 4); + break; + case 'o': + filename = optarg; + break; + case 'b': + print = 0; + break; + case 'l': + length = strtoul(optarg, (char **)NULL, 16); + break; + case 's': + skip = strtoul(optarg, (char **)NULL, 10); + break; + case 'h': + usage(argv[0]); + exit(0); + default: + printf("Unknown option!\n"); + usage(argv[0]); + exit(0); + } + } + + fd = STDOUT_FILENO; + if (filename) { + fd = creat(filename, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH); + if (fd < 0) + return fd; + } + + if (!select_sig[0] && !print) { + connect = 1; + } + + psz = sysconf(_SC_PAGESIZE); + if (length && addr) { + /* We know length and address, it means we just want a memory dump */ + if (!(raw = acpi_map_memory(addr, length))) + goto not_found; + write(fd, raw, length); + acpi_unmap_memory(raw, length); + close(fd); + return 0; + } + + length = sizeof(struct acpi_rsdp_descriptor); + if (!addr) { + addr = read_efi_systab(); + if (!addr) { + addr = ACPI_HI_RSDP_WINDOW_BASE; + length = ACPI_HI_RSDP_WINDOW_SIZE; + } + } + + if (!(raw = acpi_map_memory(addr, length)) || + !(x = acpi_scan_for_rsdp(raw, length))) + goto not_found; + + /* Find RSDP and print all found tables */ + memcpy(&rsdpx, x, sizeof(struct acpi_rsdp_descriptor)); + acpi_unmap_memory(raw, length); + if (connect) { + lseek(fd, sizeof(struct acpi_rsdp_descriptor), SEEK_SET); + } + if (!acpi_dump_SDT(fd, &rsdpx)) + goto not_found; + if (connect) { + lseek(fd, 0, SEEK_SET); + write(fd, x, (rsdpx.revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH); + } else if (!select_sig[0] || !memcmp("RSD PTR ", select_sig, 4)) { + addr += (long)x - (long)raw; + length = snprintf(buff, 80, "RSD PTR @ %p\n", (void *)addr); + write(fd, buff, length); + acpi_show_data(fd, (u8 *) & rsdpx, (rsdpx.revision < 2) ? + ACPI_RSDP_CHECKSUM_LENGTH : ACPI_RSDP_XCHECKSUM_LENGTH); + buff[0] = '\n'; + write(fd, buff, 1); + } + acpi_dump_dynamic_SSDT(fd); + close(fd); + return 0; +not_found: + close(fd); + fprintf(stderr, "ACPI tables were not found. If you know location " + "of RSD PTR table (from dmesg, etc), " + "supply it with either --addr or -a option\n"); + return 1; +} diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index a93e06cfcc2a..cf397bd26d0c 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -111,7 +111,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo; export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS # check if compiler option is supported -cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} +cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;} # use '-Os' optimization if available, else use -O2 OPTIMIZATION := $(call cc-supports,-Os,-O2) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 74e44507dfe9..e4d0690cccf9 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -4,15 +4,11 @@ turbostat \- Report processor frequency and idle statistics .SH SYNOPSIS .ft B .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB command .br .B turbostat -.RB [ "\-s" ] -.RB [ "\-v" ] -.RB [ "\-M MSR#" ] +.RB [ Options ] .RB [ "\-i interval_sec" ] .SH DESCRIPTION \fBturbostat \fP reports processor topology, frequency @@ -27,16 +23,23 @@ supports an "invariant" TSC, plus the APERF and MPERF MSRs. on processors that additionally support C-state residency counters. .SS Options -The \fB-s\fP option limits output to a 1-line system summary for each interval. +The \fB-p\fP option limits output to the 1st thread in 1st core of each package. .PP -The \fB-c\fP option limits output to the 1st thread in each core. +The \fB-P\fP option limits output to the 1st thread in each Package. .PP -The \fB-p\fP option limits output to the 1st thread in each package. +The \fB-S\fP option limits output to a 1-line System Summary for each interval. .PP The \fB-v\fP option increases verbosity. .PP -The \fB-M MSR#\fP option dumps the specified MSR, -in addition to the usual frequency and idle statistics. +The \fB-s\fP option prints the SMI counter, equivalent to "-c 0x34" +.PP +The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +.PP +The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +.PP +The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +.PP +The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. .PP The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. The default is 5 seconds. @@ -150,6 +153,29 @@ Note that turbostat reports average GHz of 3.63, while the arithmetic average of the GHz column above is lower. This is a weighted average, where the weight is %c0. ie. it is the total number of un-halted cycles elapsed per time divided by the number of CPUs. +.SH SMI COUNTING EXAMPLE +On Intel Nehalem and newer processors, MSR 0x34 is a System Management Mode Interrupt (SMI) counter. +Using the -m option, you can display how many SMIs have fired since reset, or if there +are SMIs during the measurement interval, you can display the delta using the -d option. +.nf +[root@x980 ~]# turbostat -m 0x34 +cor CPU %c0 GHz TSC MSR 0x034 %c1 %c3 %c6 %pc3 %pc6 + 1.41 1.82 3.38 0x00000000 8.92 37.82 51.85 17.37 0.55 + 0 0 3.73 2.03 3.38 0x00000055 1.72 48.25 46.31 17.38 0.55 + 0 6 0.14 1.63 3.38 0x00000056 5.30 + 1 2 2.51 1.80 3.38 0x00000056 15.65 29.33 52.52 + 1 8 0.10 1.65 3.38 0x00000056 18.05 + 2 4 1.16 1.68 3.38 0x00000056 5.87 24.47 68.50 + 2 10 0.10 1.63 3.38 0x00000056 6.93 + 8 1 3.84 1.91 3.38 0x00000056 1.36 50.65 44.16 + 8 7 0.08 1.64 3.38 0x00000056 5.12 + 9 3 1.82 1.73 3.38 0x00000056 7.59 24.21 66.38 + 9 9 0.09 1.68 3.38 0x00000056 9.32 + 10 5 1.66 1.65 3.38 0x00000056 15.10 50.00 33.23 + 10 11 1.72 1.65 3.38 0x00000056 15.05 +^C +[root@x980 ~]# +.fi .SH NOTES .B "turbostat " @@ -165,6 +191,13 @@ may work poorly on Linux-2.6.20 through 2.6.29, as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF in those kernels. +If the TSC column does not make sense, then +the other numbers will also make no sense. +Turbostat is lightweight, and its data collection is not atomic. +These issues are usually caused by an extremely short measurement +interval (much less than 1 second), or system activity that prevents +turbostat from being able to run on all CPUS to quickly collect data. + The APERF, MPERF MSRs are defined to count non-halted cycles. Although it is not guaranteed by the architecture, turbostat assumes that they count at TSC rate, which is true on all processors tested to date. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 861d77190206..2655ae9a3ad8 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -35,9 +35,9 @@ #include <ctype.h> #include <sched.h> -#define MSR_TSC 0x10 #define MSR_NEHALEM_PLATFORM_INFO 0xCE #define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1AD +#define MSR_IVT_TURBO_RATIO_LIMIT 0x1AE #define MSR_APERF 0xE8 #define MSR_MPERF 0xE7 #define MSR_PKG_C2_RESIDENCY 0x60D /* SNB only */ @@ -62,7 +62,11 @@ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nehalem_platform_info; unsigned int do_nehalem_turbo_ratio_limit; -unsigned int extra_msr_offset; +unsigned int do_ivt_turbo_ratio_limit; +unsigned int extra_msr_offset32; +unsigned int extra_msr_offset64; +unsigned int extra_delta_offset32; +unsigned int extra_delta_offset64; double bclk; unsigned int show_pkg; unsigned int show_core; @@ -83,7 +87,10 @@ struct thread_data { unsigned long long aperf; unsigned long long mperf; unsigned long long c1; /* derived */ - unsigned long long extra_msr; + unsigned long long extra_msr64; + unsigned long long extra_delta64; + unsigned long long extra_msr32; + unsigned long long extra_delta32; unsigned int cpu_id; unsigned int flags; #define CPU_IS_FIRST_THREAD_IN_CORE 0x2 @@ -222,6 +229,14 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " GHz"); outp += sprintf(outp, " TSC"); + if (extra_delta_offset32) + outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); + if (extra_delta_offset64) + outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64); + if (extra_msr_offset32) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); + if (extra_msr_offset64) + outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); if (do_nhm_cstates) outp += sprintf(outp, " %%c1"); if (do_nhm_cstates) @@ -238,8 +253,6 @@ void print_header(void) outp += sprintf(outp, " %%pc6"); if (do_snb_cstates) outp += sprintf(outp, " %%pc7"); - if (extra_msr_offset) - outp += sprintf(outp, " MSR 0x%x ", extra_msr_offset); outp += sprintf(outp, "\n"); } @@ -255,8 +268,14 @@ int dump_counters(struct thread_data *t, struct core_data *c, fprintf(stderr, "aperf: %016llX\n", t->aperf); fprintf(stderr, "mperf: %016llX\n", t->mperf); fprintf(stderr, "c1: %016llX\n", t->c1); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_delta_offset32, t->extra_delta32); fprintf(stderr, "msr0x%x: %016llX\n", - extra_msr_offset, t->extra_msr); + extra_delta_offset64, t->extra_delta64); + fprintf(stderr, "msr0x%x: %08llX\n", + extra_msr_offset32, t->extra_msr32); + fprintf(stderr, "msr0x%x: %016llX\n", + extra_msr_offset64, t->extra_msr64); } if (c) { @@ -360,6 +379,21 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC */ outp += sprintf(outp, "%5.2f", 1.0 * t->tsc/units/interval_float); + /* delta */ + if (extra_delta_offset32) + outp += sprintf(outp, " %11llu", t->extra_delta32); + + /* DELTA */ + if (extra_delta_offset64) + outp += sprintf(outp, " %11llu", t->extra_delta64); + /* msr */ + if (extra_msr_offset32) + outp += sprintf(outp, " 0x%08llx", t->extra_msr32); + + /* MSR */ + if (extra_msr_offset64) + outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, " %6.2f", 100.0 * t->c1/t->tsc); @@ -391,8 +425,6 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_snb_cstates) outp += sprintf(outp, " %6.2f", 100.0 * p->pc7/t->tsc); done: - if (extra_msr_offset) - outp += sprintf(outp, " 0x%016llx", t->extra_msr); outp += sprintf(outp, "\n"); return 0; @@ -502,10 +534,16 @@ delta_thread(struct thread_data *new, struct thread_data *old, old->mperf = 1; /* divide by 0 protection */ } + old->extra_delta32 = new->extra_delta32 - old->extra_delta32; + old->extra_delta32 &= 0xFFFFFFFF; + + old->extra_delta64 = new->extra_delta64 - old->extra_delta64; + /* - * for "extra msr", just copy the latest w/o subtracting + * Extra MSR is just a snapshot, simply copy latest w/o subtracting */ - old->extra_msr = new->extra_msr; + old->extra_msr32 = new->extra_msr32; + old->extra_msr64 = new->extra_msr64; } int delta_cpu(struct thread_data *t, struct core_data *c, @@ -533,6 +571,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data t->mperf = 0; t->c1 = 0; + t->extra_delta32 = 0; + t->extra_delta64 = 0; + /* tells format_counters to dump all fields from this set */ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; @@ -553,6 +594,9 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.threads.mperf += t->mperf; average.threads.c1 += t->c1; + average.threads.extra_delta32 += t->extra_delta32; + average.threads.extra_delta64 += t->extra_delta64; + /* sum per-core values only for 1st thread in core */ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; @@ -588,6 +632,11 @@ void compute_average(struct thread_data *t, struct core_data *c, average.threads.mperf /= topo.num_cpus; average.threads.c1 /= topo.num_cpus; + average.threads.extra_delta32 /= topo.num_cpus; + average.threads.extra_delta32 &= 0xFFFFFFFF; + + average.threads.extra_delta64 /= topo.num_cpus; + average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; @@ -629,8 +678,24 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -4; } - if (extra_msr_offset) - if (get_msr(cpu, extra_msr_offset, &t->extra_msr)) + if (extra_delta_offset32) { + if (get_msr(cpu, extra_delta_offset32, &t->extra_delta32)) + return -5; + t->extra_delta32 &= 0xFFFFFFFF; + } + + if (extra_delta_offset64) + if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) + return -5; + + if (extra_msr_offset32) { + if (get_msr(cpu, extra_msr_offset32, &t->extra_msr32)) + return -5; + t->extra_msr32 &= 0xFFFFFFFF; + } + + if (extra_msr_offset64) + if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) return -5; /* collect core counters only for 1st thread in core */ @@ -677,6 +742,9 @@ void print_verbose_header(void) get_msr(0, MSR_NEHALEM_PLATFORM_INFO, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + ratio = (msr >> 40) & 0xFF; fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", ratio, bclk, ratio * bclk); @@ -685,14 +753,84 @@ void print_verbose_header(void) fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", ratio, bclk, ratio * bclk); + if (!do_ivt_turbo_ratio_limit) + goto print_nhm_turbo_ratio_limits; + + get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) - fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr); + fprintf(stderr, "MSR_IVT_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 24) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 16) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", + ratio, bclk, ratio * bclk); + +print_nhm_turbo_ratio_limits: if (!do_nehalem_turbo_ratio_limit) return; get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT, &msr); + if (verbose > 1) + fprintf(stderr, "MSR_NEHALEM_TURBO_RATIO_LIMIT: 0x%llx\n", msr); + + ratio = (msr >> 56) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 48) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 40) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 32) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", + ratio, bclk, ratio * bclk); + ratio = (msr >> 24) & 0xFF; if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", @@ -712,7 +850,6 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); - } void free_all_buffers(void) @@ -1038,7 +1175,7 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) case 0x2A: /* SNB */ case 0x2D: /* SNB Xeon */ case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; case 0x2E: /* Nehalem-EX Xeon - Beckton */ case 0x2F: /* Westmere-EX Xeon - Eagleton */ @@ -1046,6 +1183,22 @@ int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) return 0; } } +int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3E: /* IVB Xeon */ + return 1; + default: + return 0; + } +} + int is_snb(unsigned int family, unsigned int model) { @@ -1056,7 +1209,7 @@ int is_snb(unsigned int family, unsigned int model) case 0x2A: case 0x2D: case 0x3A: /* IVB */ - case 0x3D: /* IVB Xeon */ + case 0x3E: /* IVB Xeon */ return 1; } return 0; @@ -1145,12 +1298,13 @@ void check_cpuid() bclk = discover_bclk(family, model); do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); + do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); } void usage() { - fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n", + fprintf(stderr, "%s: [-v][-p|-P|-S][-c MSR# | -s]][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", progname); exit(1); } @@ -1440,15 +1594,15 @@ void cmdline(int argc, char **argv) progname = argv[0]; - while ((opt = getopt(argc, argv, "+cpsvi:M:")) != -1) { + while ((opt = getopt(argc, argv, "+pPSvisc:sC:m:M:")) != -1) { switch (opt) { - case 'c': + case 'p': show_core_only++; break; - case 'p': + case 'P': show_pkg_only++; break; - case 's': + case 'S': summary_only++; break; case 'v': @@ -1457,10 +1611,20 @@ void cmdline(int argc, char **argv) case 'i': interval_sec = atoi(optarg); break; + case 'c': + sscanf(optarg, "%x", &extra_delta_offset32); + break; + case 's': + extra_delta_offset32 = 0x34; /* SMI counter */ + break; + case 'C': + sscanf(optarg, "%x", &extra_delta_offset64); + break; + case 'm': + sscanf(optarg, "%x", &extra_msr_offset32); + break; case 'M': - sscanf(optarg, "%x", &extra_msr_offset); - if (verbose > 1) - fprintf(stderr, "MSR 0x%X\n", extra_msr_offset); + sscanf(optarg, "%x", &extra_msr_offset64); break; default: usage(); @@ -1473,7 +1637,7 @@ int main(int argc, char **argv) cmdline(argc, argv); if (verbose > 1) - fprintf(stderr, "turbostat v2.0 May 16, 2012" + fprintf(stderr, "turbostat v2.1 October 6, 2012" " - Len Brown <lenb@kernel.org>\n"); turbostat_init(); diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile new file mode 100644 index 000000000000..0d2381633475 --- /dev/null +++ b/tools/virtio/virtio-trace/Makefile @@ -0,0 +1,13 @@ +CC = gcc +CFLAGS = -O2 -Wall -pthread + +all: trace-agent + +.c.o: + $(CC) $(CFLAGS) -c $^ -o $@ + +trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o + $(CC) $(CFLAGS) -o $@ $^ + +clean: + rm -f *.o trace-agent diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README new file mode 100644 index 000000000000..b64845b823ab --- /dev/null +++ b/tools/virtio/virtio-trace/README @@ -0,0 +1,118 @@ +Trace Agent for virtio-trace +============================ + +Trace agent is a user tool for sending trace data of a guest to a Host in low +overhead. Trace agent has the following functions: + - splice a page of ring-buffer to read_pipe without memory copying + - splice the page from write_pipe to virtio-console without memory copying + - write trace data to stdout by using -o option + - controlled by start/stop orders from a Host + +The trace agent operates as follows: + 1) Initialize all structures. + 2) Create a read/write thread per CPU. Each thread is bound to a CPU. + The read/write threads hold it. + 3) A controller thread does poll() for a start order of a host. + 4) After the controller of the trace agent receives a start order from a host, + the controller wake read/write threads. + 5) The read/write threads start to read trace data from ring-buffers and + write the data to virtio-serial. + 6) If the controller receives a stop order from a host, the read/write threads + stop to read trace data. + + +Files +===== + +README: this file +Makefile: Makefile of trace agent for virtio-trace +trace-agent.c: includes main function, sets up for operating trace agent +trace-agent.h: includes all structures and some macros +trace-agent-ctl.c: includes controller function for read/write threads +trace-agent-rw.c: includes read/write threads function + + +Setup +===== + +To use this trace agent for virtio-trace, we need to prepare some virtio-serial +I/Fs. + +1) Make FIFO in a host + virtio-trace uses virtio-serial pipe as trace data paths as to the number +of CPUs and a control path, so FIFO (named pipe) should be created as follows: + # mkdir /tmp/virtio-trace/ + # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out} + # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out} + +For example, if a guest use three CPUs, the names are + trace-path-cpu{0,1,2}.{in.out} +and + agent-ctl-path.{in,out}. + +2) Set up of virtio-serial pipe in a host + Add qemu option to use virtio-serial pipe. + + ##virtio-serial device## + -device virtio-serial-pci,id=virtio-serial0\ + ##control path## + -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\ + -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\ + id=channel0,name=agent-ctl-path\ + ##data path## + -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\ + -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\ + id=channel1,name=trace-path-cpu0\ + ... + +If you manage guests with libvirt, add the following tags to domain XML files. +Then, libvirt passes the same command option to qemu. + + <channel type='pipe'> + <source path='/tmp/virtio-trace/agent-ctl-path'/> + <target type='virtio' name='agent-ctl-path'/> + <address type='virtio-serial' controller='0' bus='0' port='0'/> + </channel> + <channel type='pipe'> + <source path='/tmp/virtio-trace/trace-path-cpu0'/> + <target type='virtio' name='trace-path-cpu0'/> + <address type='virtio-serial' controller='0' bus='0' port='1'/> + </channel> + ... +Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For +example, if a guest use three CPUs, chardev names should be trace-path-cpu0, +trace-path-cpu1, trace-path-cpu2, and agent-ctl-path. + +3) Boot the guest + You can find some chardev in /dev/virtio-ports/ in the guest. + + +Run +=== + +0) Build trace agent in a guest + $ make + +1) Enable ftrace in the guest + <Example> + # echo 1 > /sys/kernel/debug/tracing/events/sched/enable + +2) Run trace agent in the guest + This agent must be operated as root. + # ./trace-agent +read/write threads in the agent wait for start order from host. If you add -o +option, trace data are output via stdout in the guest. + +3) Open FIFO in a host + # cat /tmp/virtio-trace/trace-path-cpu0.out +If a host does not open these, trace data get stuck in buffers of virtio. Then, +the guest will stop by specification of chardev in QEMU. This blocking mode may +be solved in the future. + +4) Start to read trace data by ordering from a host + A host injects read start order to the guest via virtio-serial. + # echo 1 > /tmp/virtio-trace/agent-ctl-path.in + +5) Stop to read trace data by ordering from a host + A host injects read stop order to the guest via virtio-serial. + # echo 0 > /tmp/virtio-trace/agent-ctl-path.in diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c new file mode 100644 index 000000000000..a2d0403c4f94 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-ctl.c @@ -0,0 +1,137 @@ +/* + * Controller of read/write threads for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define HOST_MSG_SIZE 256 +#define EVENT_WAIT_MSEC 100 + +static volatile sig_atomic_t global_signal_val; +bool global_sig_receive; /* default false */ +bool global_run_operation; /* default false*/ + +/* Handle SIGTERM/SIGINT/SIGQUIT to exit */ +static void signal_handler(int sig) +{ + global_signal_val = sig; +} + +int rw_ctl_init(const char *ctl_path) +{ + int ctl_fd; + + ctl_fd = open(ctl_path, O_RDONLY); + if (ctl_fd == -1) { + pr_err("Cannot open ctl_fd\n"); + goto error; + } + + return ctl_fd; + +error: + exit(EXIT_FAILURE); +} + +static int wait_order(int ctl_fd) +{ + struct pollfd poll_fd; + int ret = 0; + + while (!global_sig_receive) { + poll_fd.fd = ctl_fd; + poll_fd.events = POLLIN; + + ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC); + + if (global_signal_val) { + global_sig_receive = true; + pr_info("Receive interrupt %d\n", global_signal_val); + + /* Wakes rw-threads when they are sleeping */ + if (!global_run_operation) + pthread_cond_broadcast(&cond_wakeup); + + ret = -1; + break; + } + + if (ret < 0) { + pr_err("Polling error\n"); + goto error; + } + + if (ret) + break; + }; + + return ret; + +error: + exit(EXIT_FAILURE); +} + +/* + * contol read/write threads by handling global_run_operation + */ +void *rw_ctl_loop(int ctl_fd) +{ + ssize_t rlen; + char buf[HOST_MSG_SIZE]; + int ret; + + /* Setup signal handlers */ + signal(SIGTERM, signal_handler); + signal(SIGINT, signal_handler); + signal(SIGQUIT, signal_handler); + + while (!global_sig_receive) { + + ret = wait_order(ctl_fd); + if (ret < 0) + break; + + rlen = read(ctl_fd, buf, sizeof(buf)); + if (rlen < 0) { + pr_err("read data error in ctl thread\n"); + goto error; + } + + if (rlen == 2 && buf[0] == '1') { + /* + * If host writes '1' to a control path, + * this controller wakes all read/write threads. + */ + global_run_operation = true; + pthread_cond_broadcast(&cond_wakeup); + pr_debug("Wake up all read/write threads\n"); + } else if (rlen == 2 && buf[0] == '0') { + /* + * If host writes '0' to a control path, read/write + * threads will wait for notification from Host. + */ + global_run_operation = false; + pr_debug("Stop all read/write threads\n"); + } else + pr_info("Invalid host notification: %s\n", buf); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c new file mode 100644 index 000000000000..3aace5ea4842 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent-rw.c @@ -0,0 +1,192 @@ +/* + * Read/write thread of a guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/syscall.h> +#include "trace-agent.h" + +#define READ_WAIT_USEC 100000 + +void *rw_thread_info_new(void) +{ + struct rw_thread_info *rw_ti; + + rw_ti = zalloc(sizeof(struct rw_thread_info)); + if (rw_ti == NULL) { + pr_err("rw_thread_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + rw_ti->cpu_num = -1; + rw_ti->in_fd = -1; + rw_ti->out_fd = -1; + rw_ti->read_pipe = -1; + rw_ti->write_pipe = -1; + rw_ti->pipe_size = PIPE_INIT; + + return rw_ti; +} + +void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti) +{ + int data_pipe[2]; + + rw_ti->cpu_num = cpu; + + /* set read(input) fd */ + rw_ti->in_fd = open(in_path, O_RDONLY); + if (rw_ti->in_fd == -1) { + pr_err("Could not open in_fd (CPU:%d)\n", cpu); + goto error; + } + + /* set write(output) fd */ + if (!stdout_flag) { + /* virtio-serial output mode */ + rw_ti->out_fd = open(out_path, O_WRONLY); + if (rw_ti->out_fd == -1) { + pr_err("Could not open out_fd (CPU:%d)\n", cpu); + goto error; + } + } else + /* stdout mode */ + rw_ti->out_fd = STDOUT_FILENO; + + if (pipe2(data_pipe, O_NONBLOCK) < 0) { + pr_err("Could not create pipe in rw-thread(%d)\n", cpu); + goto error; + } + + /* + * Size of pipe is 64kB in default based on fs/pipe.c. + * To read/write trace data speedy, pipe size is changed. + */ + if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) { + pr_err("Could not change pipe size in rw-thread(%d)\n", cpu); + goto error; + } + + rw_ti->read_pipe = data_pipe[1]; + rw_ti->write_pipe = data_pipe[0]; + rw_ti->pipe_size = pipe_size; + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +/* Bind a thread to a cpu */ +static void bind_cpu(int cpu_num) +{ + cpu_set_t mask; + + CPU_ZERO(&mask); + CPU_SET(cpu_num, &mask); + + /* bind my thread to cpu_num by assigning zero to the first argument */ + if (sched_setaffinity(0, sizeof(mask), &mask) == -1) + pr_err("Could not set CPU#%d affinity\n", (int)cpu_num); +} + +static void *rw_thread_main(void *thread_info) +{ + ssize_t rlen, wlen; + ssize_t ret; + struct rw_thread_info *ts = (struct rw_thread_info *)thread_info; + + bind_cpu(ts->cpu_num); + + while (1) { + /* Wait for a read order of trace data by Host OS */ + if (!global_run_operation) { + pthread_mutex_lock(&mutex_notify); + pthread_cond_wait(&cond_wakeup, &mutex_notify); + pthread_mutex_unlock(&mutex_notify); + } + + if (global_sig_receive) + break; + + /* + * Each thread read trace_pipe_raw of each cpu bounding the + * thread, so contention of multi-threads does not occur. + */ + rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, + ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); + + if (rlen < 0) { + pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); + goto error; + } else if (rlen == 0) { + /* + * If trace data do not exist or are unreadable not + * for exceeding the page size, splice_read returns + * NULL. Then, this waits for being filled the data in a + * ring-buffer. + */ + usleep(READ_WAIT_USEC); + pr_debug("Read retry(cpu:%d)\n", ts->cpu_num); + continue; + } + + wlen = 0; + + do { + ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL, + rlen - wlen, + SPLICE_F_MOVE | SPLICE_F_MORE); + + if (ret < 0) { + pr_err("Splice_write in rw-thread(%d)\n", + ts->cpu_num); + goto error; + } else if (ret == 0) + /* + * When host reader is not in time for reading + * trace data, guest will be stopped. This is + * because char dev in QEMU is not supported + * non-blocking mode. Then, writer might be + * sleep in that case. + * This sleep will be removed by supporting + * non-blocking mode. + */ + sleep(1); + wlen += ret; + } while (wlen < rlen); + } + + return NULL; + +error: + exit(EXIT_FAILURE); +} + + +pthread_t rw_thread_run(struct rw_thread_info *rw_ti) +{ + int ret; + pthread_t rw_thread_per_cpu; + + ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti); + if (ret != 0) { + pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num); + exit(EXIT_FAILURE); + } + + return rw_thread_per_cpu; +} diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c new file mode 100644 index 000000000000..0a0a7dd4eff7 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.c @@ -0,0 +1,270 @@ +/* + * Guest agent for virtio-trace + * + * Copyright (C) 2012 Hitachi, Ltd. + * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com> + * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> + * + * Licensed under GPL version 2 only. + * + */ + +#define _GNU_SOURCE +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include "trace-agent.h" + +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) +#define PIPE_DEF_BUFS 16 +#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS) +#define PIPE_MAX_SIZE (1024*1024) +#define READ_PATH_FMT \ + "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw" +#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d" +#define CTL_PATH "/dev/virtio-ports/agent-ctl-path" + +pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER; + +static int get_total_cpus(void) +{ + int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF); + + if (nr_cpus <= 0) { + pr_err("Could not read cpus\n"); + goto error; + } else if (nr_cpus > MAX_CPUS) { + pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS); + goto error; + } + + return nr_cpus; + +error: + exit(EXIT_FAILURE); +} + +static void *agent_info_new(void) +{ + struct agent_info *s; + int i; + + s = zalloc(sizeof(struct agent_info)); + if (s == NULL) { + pr_err("agent_info zalloc error\n"); + exit(EXIT_FAILURE); + } + + s->pipe_size = PIPE_INIT; + s->use_stdout = false; + s->cpus = get_total_cpus(); + s->ctl_fd = -1; + + /* read/write threads init */ + for (i = 0; i < s->cpus; i++) + s->rw_ti[i] = rw_thread_info_new(); + + return s; +} + +static unsigned long parse_size(const char *arg) +{ + unsigned long value, round; + char *ptr; + + value = strtoul(arg, &ptr, 10); + switch (*ptr) { + case 'K': case 'k': + value <<= 10; + break; + case 'M': case 'm': + value <<= 20; + break; + default: + break; + } + + if (value > PIPE_MAX_SIZE) { + pr_err("Pipe size must be less than 1MB\n"); + goto error; + } else if (value < PIPE_MIN_SIZE) { + pr_err("Pipe size must be over 64KB\n"); + goto error; + } + + /* Align buffer size with page unit */ + round = value & (PAGE_SIZE - 1); + value = value - round; + + return value; +error: + return 0; +} + +static void usage(char const *prg) +{ + pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg); +} + +static const char *make_path(int cpu_num, bool this_is_write_path) +{ + int ret; + char *buf; + + buf = zalloc(PATH_MAX); + if (buf == NULL) { + pr_err("Could not allocate buffer\n"); + goto error; + } + + if (this_is_write_path) + /* write(output) path */ + ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num); + else + /* read(input) path */ + ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num); + + if (ret <= 0) { + pr_err("Failed to generate %s path(CPU#%d):%d\n", + this_is_write_path ? "read" : "write", cpu_num, ret); + goto error; + } + + return buf; + +error: + free(buf); + return NULL; +} + +static const char *make_input_path(int cpu_num) +{ + return make_path(cpu_num, false); +} + +static const char *make_output_path(int cpu_num) +{ + return make_path(cpu_num, true); +} + +static void *agent_info_init(struct agent_info *s) +{ + int cpu; + const char *in_path = NULL; + const char *out_path = NULL; + + /* init read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + /* set read(input) path per read/write thread */ + in_path = make_input_path(cpu); + if (in_path == NULL) + goto error; + + /* set write(output) path per read/write thread*/ + if (!s->use_stdout) { + out_path = make_output_path(cpu); + if (out_path == NULL) + goto error; + } else + /* stdout mode */ + pr_debug("stdout mode\n"); + + rw_thread_init(cpu, in_path, out_path, s->use_stdout, + s->pipe_size, s->rw_ti[cpu]); + } + + /* init controller of read/write threads */ + s->ctl_fd = rw_ctl_init((const char *)CTL_PATH); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void *parse_args(int argc, char *argv[], struct agent_info *s) +{ + int cmd; + unsigned long size; + + while ((cmd = getopt(argc, argv, "hos:")) != -1) { + switch (cmd) { + /* stdout mode */ + case 'o': + s->use_stdout = true; + break; + /* size of pipe */ + case 's': + size = parse_size(optarg); + if (size == 0) + goto error; + s->pipe_size = size; + break; + case 'h': + default: + usage(argv[0]); + goto error; + } + } + + agent_info_init(s); + + return NULL; + +error: + exit(EXIT_FAILURE); +} + +static void agent_main_loop(struct agent_info *s) +{ + int cpu; + pthread_t rw_thread_per_cpu[MAX_CPUS]; + + /* Start all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) + rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]); + + rw_ctl_loop(s->ctl_fd); + + /* Finish all read/write threads */ + for (cpu = 0; cpu < s->cpus; cpu++) { + int ret; + + ret = pthread_join(rw_thread_per_cpu[cpu], NULL); + if (ret != 0) { + pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu); + exit(EXIT_FAILURE); + } + } +} + +static void agent_info_free(struct agent_info *s) +{ + int i; + + close(s->ctl_fd); + for (i = 0; i < s->cpus; i++) { + close(s->rw_ti[i]->in_fd); + close(s->rw_ti[i]->out_fd); + close(s->rw_ti[i]->read_pipe); + close(s->rw_ti[i]->write_pipe); + free(s->rw_ti[i]); + } + free(s); +} + +int main(int argc, char *argv[]) +{ + struct agent_info *s = NULL; + + s = agent_info_new(); + parse_args(argc, argv, s); + + agent_main_loop(s); + + agent_info_free(s); + + return 0; +} diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h new file mode 100644 index 000000000000..8de79bfeaa73 --- /dev/null +++ b/tools/virtio/virtio-trace/trace-agent.h @@ -0,0 +1,75 @@ +#ifndef __TRACE_AGENT_H__ +#define __TRACE_AGENT_H__ +#include <pthread.h> +#include <stdbool.h> + +#define MAX_CPUS 256 +#define PIPE_INIT (1024*1024) + +/* + * agent_info - structure managing total information of guest agent + * @pipe_size: size of pipe (default 1MB) + * @use_stdout: set to true when o option is added (default false) + * @cpus: total number of CPUs + * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path + * @rw_ti: structure managing information of read/write threads + */ +struct agent_info { + unsigned long pipe_size; + bool use_stdout; + int cpus; + int ctl_fd; + struct rw_thread_info *rw_ti[MAX_CPUS]; +}; + +/* + * rw_thread_info - structure managing a read/write thread a cpu + * @cpu_num: cpu number operating this read/write thread + * @in_fd: fd of reading trace data path in cpu_num + * @out_fd: fd of writing trace data path in cpu_num + * @read_pipe: fd of read pipe + * @write_pipe: fd of write pipe + * @pipe_size: size of pipe (default 1MB) + */ +struct rw_thread_info { + int cpu_num; + int in_fd; + int out_fd; + int read_pipe; + int write_pipe; + unsigned long pipe_size; +}; + +/* use for stopping rw threads */ +extern bool global_sig_receive; + +/* use for notification */ +extern bool global_run_operation; +extern pthread_mutex_t mutex_notify; +extern pthread_cond_t cond_wakeup; + +/* for controller of read/write threads */ +extern int rw_ctl_init(const char *ctl_path); +extern void *rw_ctl_loop(int ctl_fd); + +/* for trace read/write thread */ +extern void *rw_thread_info_new(void); +extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path, + bool stdout_flag, unsigned long pipe_size, + struct rw_thread_info *rw_ti); +extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti); + +static inline void *zalloc(size_t size) +{ + return calloc(1, size); +} + +#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__) +#ifdef DEBUG +#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__) +#else +#define pr_debug(format, ...) do {} while (0) +#endif + +#endif /*__TRACE_AGENT_H__*/ |