diff options
Diffstat (limited to 'tools/testing')
150 files changed, 13650 insertions, 1260 deletions
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl new file mode 100755 index 000000000000..b28feea7c363 --- /dev/null +++ b/tools/testing/ktest/config-bisect.pl @@ -0,0 +1,770 @@ +#!/usr/bin/perl -w +# +# Copyright 2015 - Steven Rostedt, Red Hat Inc. +# Copyright 2017 - Steven Rostedt, VMware, Inc. +# +# Licensed under the terms of the GNU GPL License version 2 +# + +# usage: +# config-bisect.pl [options] good-config bad-config [good|bad] +# + +# Compares a good config to a bad config, then takes half of the diffs +# and produces a config that is somewhere between the good config and +# the bad config. That is, the resulting config will start with the +# good config and will try to make half of the differences of between +# the good and bad configs match the bad config. It tries because of +# dependencies between the two configs it may not be able to change +# exactly half of the configs that are different between the two config +# files. + +# Here's a normal way to use it: +# +# $ cd /path/to/linux/kernel +# $ config-bisect.pl /path/to/good/config /path/to/bad/config + +# This will now pull in good config (blowing away .config in that directory +# so do not make that be one of the good or bad configs), and then +# build the config with "make oldconfig" to make sure it matches the +# current kernel. It will then store the configs in that result for +# the good config. It does the same for the bad config as well. +# The algorithm will run, merging half of the differences between +# the two configs and building them with "make oldconfig" to make sure +# the result changes (dependencies may reset changes the tool had made). +# It then copies the result of its good config to /path/to/good/config.tmp +# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the +# files passed in). And the ".config" that you should test will be in +# directory + +# After the first run, determine if the result is good or bad then +# run the same command appending the result + +# For good results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config good + +# For bad results: +# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad + +# Do not change the good-config or bad-config, config-bisect.pl will +# copy the good-config to a temp file with the same name as good-config +# but with a ".tmp" after it. It will do the same with the bad-config. + +# If "good" or "bad" is not stated at the end, it will copy the good and +# bad configs to the .tmp versions. If a .tmp version already exists, it will +# warn before writing over them (-r will not warn, and just write over them). +# If the last config is labeled "good", then it will copy it to the good .tmp +# version. If the last config is labeled "bad", it will copy it to the bad +# .tmp version. It will continue this until it can not merge the two any more +# without the result being equal to either the good or bad .tmp configs. + +my $start = 0; +my $val = ""; + +my $pwd = `pwd`; +chomp $pwd; +my $tree = $pwd; +my $build; + +my $output_config; +my $reset_bisect; + +sub usage { + print << "EOF" + +usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad] + -l [optional] define location of linux-tree (default is current directory) + -b [optional] define location to build (O=build-dir) (default is linux-tree) + good-config the config that is considered good + bad-config the config that does not work + "good" add this if the last run produced a good config + "bad" add this if the last run produced a bad config + If "good" or "bad" is not specified, then it is the start of a new bisect + + Note, each run will create copy of good and bad configs with ".tmp" appended. + +EOF +; + + exit(-1); +} + +sub doprint { + print @_; +} + +sub dodie { + doprint "CRITICAL FAILURE... ", @_, "\n"; + + die @_, "\n"; +} + +sub expand_path { + my ($file) = @_; + + if ($file =~ m,^/,) { + return $file; + } + return "$pwd/$file"; +} + +sub read_prompt { + my ($cancel, $prompt) = @_; + + my $ans; + + for (;;) { + if ($cancel) { + print "$prompt [y/n/C] "; + } else { + print "$prompt [y/N] "; + } + $ans = <STDIN>; + chomp $ans; + if ($ans =~ /^\s*$/) { + if ($cancel) { + $ans = "c"; + } else { + $ans = "n"; + } + } + last if ($ans =~ /^y$/i || $ans =~ /^n$/i); + if ($cancel) { + last if ($ans =~ /^c$/i); + print "Please answer either 'y', 'n' or 'c'.\n"; + } else { + print "Please answer either 'y' or 'n'.\n"; + } + } + if ($ans =~ /^c/i) { + exit; + } + if ($ans !~ /^y$/i) { + return 0; + } + return 1; +} + +sub read_yn { + my ($prompt) = @_; + + return read_prompt 0, $prompt; +} + +sub read_ync { + my ($prompt) = @_; + + return read_prompt 1, $prompt; +} + +sub run_command { + my ($command, $redirect) = @_; + my $start_time; + my $end_time; + my $dord = 0; + my $pid; + + $start_time = time; + + doprint("$command ... "); + + $pid = open(CMD, "$command 2>&1 |") or + dodie "unable to exec $command"; + + if (defined($redirect)) { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } + + while (<CMD>) { + print RD if ($dord); + } + + waitpid($pid, 0); + my $failed = $?; + + close(CMD); + close(RD) if ($dord); + + $end_time = time; + my $delta = $end_time - $start_time; + + if ($delta == 1) { + doprint "[1 second] "; + } else { + doprint "[$delta seconds] "; + } + + if ($failed) { + doprint "FAILED!\n"; + } else { + doprint "SUCCESS\n"; + } + + return !$failed; +} + +###### CONFIG BISECT ###### + +# config_ignore holds the configs that were set (or unset) for +# a good config and we will ignore these configs for the rest +# of a config bisect. These configs stay as they were. +my %config_ignore; + +# config_set holds what all configs were set as. +my %config_set; + +# config_off holds the set of configs that the bad config had disabled. +# We need to record them and set them in the .config when running +# olddefconfig, because olddefconfig keeps the defaults. +my %config_off; + +# config_off_tmp holds a set of configs to turn off for now +my @config_off_tmp; + +# config_list is the set of configs that are being tested +my %config_list; +my %null_config; + +my %dependency; + +my $make; + +sub make_oldconfig { + + if (!run_command "$make olddefconfig") { + # Perhaps olddefconfig doesn't exist in this version of the kernel + # try oldnoconfig + doprint "olddefconfig failed, trying make oldnoconfig\n"; + if (!run_command "$make oldnoconfig") { + doprint "oldnoconfig failed, trying yes '' | make oldconfig\n"; + # try a yes '' | oldconfig + run_command "yes '' | $make oldconfig" or + dodie "failed make config oldconfig"; + } + } +} + +sub assign_configs { + my ($hash, $config) = @_; + + doprint "Reading configs from $config\n"; + + open (IN, $config) + or dodie "Failed to read $config"; + + while (<IN>) { + chomp; + if (/^((CONFIG\S*)=.*)/) { + ${$hash}{$2} = $1; + } elsif (/^(# (CONFIG\S*) is not set)/) { + ${$hash}{$2} = $1; + } + } + + close(IN); +} + +sub process_config_ignore { + my ($config) = @_; + + assign_configs \%config_ignore, $config; +} + +sub get_dependencies { + my ($config) = @_; + + my $arr = $dependency{$config}; + if (!defined($arr)) { + return (); + } + + my @deps = @{$arr}; + + foreach my $dep (@{$arr}) { + print "ADD DEP $dep\n"; + @deps = (@deps, get_dependencies $dep); + } + + return @deps; +} + +sub save_config { + my ($pc, $file) = @_; + + my %configs = %{$pc}; + + doprint "Saving configs into $file\n"; + + open(OUT, ">$file") or dodie "Can not write to $file"; + + foreach my $config (keys %configs) { + print OUT "$configs{$config}\n"; + } + close(OUT); +} + +sub create_config { + my ($name, $pc) = @_; + + doprint "Creating old config from $name configs\n"; + + save_config $pc, $output_config; + + make_oldconfig; +} + +# compare two config hashes, and return configs with different vals. +# It returns B's config values, but you can use A to see what A was. +sub diff_config_vals { + my ($pa, $pb) = @_; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + my %ret; + + foreach my $item (keys %a) { + if (defined($b{$item}) && $b{$item} ne $a{$item}) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# compare two config hashes and return the configs in B but not A +sub diff_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + $ret{$item} = $b{$item}; + } + } + + return %ret; +} + +# return if two configs are equal or not +# 0 is equal +1 b has something a does not +# +1 if a and b have a different item. +# -1 if a has something b does not +sub compare_configs { + my ($pa, $pb) = @_; + + my %ret; + + # crappy Perl way to pass in hashes. + my %a = %{$pa}; + my %b = %{$pb}; + + foreach my $item (keys %b) { + if (!defined($a{$item})) { + return 1; + } + if ($a{$item} ne $b{$item}) { + return 1; + } + } + + foreach my $item (keys %a) { + if (!defined($b{$item})) { + return -1; + } + } + + return 0; +} + +sub process_failed { + my ($config) = @_; + + doprint "\n\n***************************************\n"; + doprint "Found bad config: $config\n"; + doprint "***************************************\n\n"; +} + +sub process_new_config { + my ($tc, $nc, $gc, $bc) = @_; + + my %tmp_config = %{$tc}; + my %good_configs = %{$gc}; + my %bad_configs = %{$bc}; + + my %new_configs; + + my $runtest = 1; + my $ret; + + create_config "tmp_configs", \%tmp_config; + assign_configs \%new_configs, $output_config; + + $ret = compare_configs \%new_configs, \%bad_configs; + if (!$ret) { + doprint "New config equals bad config, try next test\n"; + $runtest = 0; + } + + if ($runtest) { + $ret = compare_configs \%new_configs, \%good_configs; + if (!$ret) { + doprint "New config equals good config, try next test\n"; + $runtest = 0; + } + } + + %{$nc} = %new_configs; + + return $runtest; +} + +sub convert_config { + my ($config) = @_; + + if ($config =~ /^# (.*) is not set/) { + $config = "$1=n"; + } + + $config =~ s/^CONFIG_//; + return $config; +} + +sub print_config { + my ($sym, $config) = @_; + + $config = convert_config $config; + doprint "$sym$config\n"; +} + +sub print_config_compare { + my ($good_config, $bad_config) = @_; + + $good_config = convert_config $good_config; + $bad_config = convert_config $bad_config; + + my $good_value = $good_config; + my $bad_value = $bad_config; + $good_value =~ s/(.*)=//; + my $config = $1; + + $bad_value =~ s/.*=//; + + doprint " $config $good_value -> $bad_value\n"; +} + +# Pass in: +# $phalf: half of the configs names you want to add +# $oconfigs: The orginial configs to start with +# $sconfigs: The source to update $oconfigs with (from $phalf) +# $which: The name of which half that is updating (top / bottom) +# $type: The name of the source type (good / bad) +sub make_half { + my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_; + + my @half = @{$phalf}; + my %orig_configs = %{$oconfigs}; + my %source_configs = %{$sconfigs}; + + my %tmp_config = %orig_configs; + + doprint "Settings bisect with $which half of $type configs:\n"; + foreach my $item (@half) { + doprint "Updating $item to $source_configs{$item}\n"; + $tmp_config{$item} = $source_configs{$item}; + } + + return %tmp_config; +} + +sub run_config_bisect { + my ($pgood, $pbad) = @_; + + my %good_configs = %{$pgood}; + my %bad_configs = %{$pbad}; + + my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; + my %b_configs = diff_configs \%good_configs, \%bad_configs; + my %g_configs = diff_configs \%bad_configs, \%good_configs; + + # diff_arr is what is in both good and bad but are different (y->n) + my @diff_arr = keys %diff_configs; + my $len_diff = $#diff_arr + 1; + + # b_arr is what is in bad but not in good (has depends) + my @b_arr = keys %b_configs; + my $len_b = $#b_arr + 1; + + # g_arr is what is in good but not in bad + my @g_arr = keys %g_configs; + my $len_g = $#g_arr + 1; + + my $runtest = 0; + my %new_configs; + my $ret; + + # Look at the configs that are different between good and bad. + # This does not include those that depend on other configs + # (configs depending on other configs that are not set would + # not show up even as a "# CONFIG_FOO is not set" + + + doprint "# of configs to check: $len_diff\n"; + doprint "# of configs showing only in good: $len_g\n"; + doprint "# of configs showing only in bad: $len_b\n"; + + if ($len_diff > 0) { + # Now test for different values + + doprint "Configs left to check:\n"; + doprint " Good Config\t\t\tBad Config\n"; + doprint " -----------\t\t\t----------\n"; + foreach my $item (@diff_arr) { + doprint " $good_configs{$item}\t$bad_configs{$item}\n"; + } + + my $half = int($#diff_arr / 2); + my @tophalf = @diff_arr[0 .. $half]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@tophalf, \%good_configs, + \%bad_configs, "top", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@tophalf, \%bad_configs, + \%good_configs, "top", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if (!$runtest && $len_diff > 0) { + # do the same thing, but this time with bottom half + + my $half = int($#diff_arr / 2); + my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; + + doprint "Set tmp config to be good config with some bad config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%good_configs, + \%bad_configs, "bottom", "bad"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + + if (!$runtest) { + doprint "Set tmp config to be bad config with some good config values\n"; + + my %tmp_config = make_half \@bottomhalf, \%bad_configs, + \%good_configs, "bottom", "good"; + + $runtest = process_new_config \%tmp_config, \%new_configs, + \%good_configs, \%bad_configs; + } + } + + if ($runtest) { + make_oldconfig; + doprint "READY TO TEST .config IN $build\n"; + return 0; + } + + doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; + doprint "Hmm, can't make any more changes without making good == bad?\n"; + doprint "Difference between good (+) and bad (-)\n"; + + foreach my $item (keys %bad_configs) { + if (!defined($good_configs{$item})) { + print_config "-", $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + next if (!defined($bad_configs{$item})); + if ($good_configs{$item} ne $bad_configs{$item}) { + print_config_compare $good_configs{$item}, $bad_configs{$item}; + } + } + + foreach my $item (keys %good_configs) { + if (!defined($bad_configs{$item})) { + print_config "+", $good_configs{$item}; + } + } + return -1; +} + +sub config_bisect { + my ($good_config, $bad_config) = @_; + my $ret; + + my %good_configs; + my %bad_configs; + my %tmp_configs; + + doprint "Run good configs through make oldconfig\n"; + assign_configs \%tmp_configs, $good_config; + create_config "$good_config", \%tmp_configs; + assign_configs \%good_configs, $output_config; + + doprint "Run bad configs through make oldconfig\n"; + assign_configs \%tmp_configs, $bad_config; + create_config "$bad_config", \%tmp_configs; + assign_configs \%bad_configs, $output_config; + + save_config \%good_configs, $good_config; + save_config \%bad_configs, $bad_config; + + return run_config_bisect \%good_configs, \%bad_configs; +} + +while ($#ARGV >= 0) { + if ($ARGV[0] !~ m/^-/) { + last; + } + my $opt = shift @ARGV; + + if ($opt eq "-b") { + $val = shift @ARGV; + if (!defined($val)) { + die "-b requires value\n"; + } + $build = $val; + } + + elsif ($opt eq "-l") { + $val = shift @ARGV; + if (!defined($val)) { + die "-l requires value\n"; + } + $tree = $val; + } + + elsif ($opt eq "-r") { + $reset_bisect = 1; + } + + elsif ($opt eq "-h") { + usage; + } + + else { + die "Unknow option $opt\n"; + } +} + +$build = $tree if (!defined($build)); + +$tree = expand_path $tree; +$build = expand_path $build; + +if ( ! -d $tree ) { + die "$tree not a directory\n"; +} + +if ( ! -d $build ) { + die "$build not a directory\n"; +} + +usage if $#ARGV < 1; + +if ($#ARGV == 1) { + $start = 1; +} elsif ($#ARGV == 2) { + $val = $ARGV[2]; + if ($val ne "good" && $val ne "bad") { + die "Unknown command '$val', bust be either \"good\" or \"bad\"\n"; + } +} else { + usage; +} + +my $good_start = expand_path $ARGV[0]; +my $bad_start = expand_path $ARGV[1]; + +my $good = "$good_start.tmp"; +my $bad = "$bad_start.tmp"; + +$make = "make"; + +if ($build ne $tree) { + $make = "make O=$build" +} + +$output_config = "$build/.config"; + +if ($start) { + if ( ! -f $good_start ) { + die "$good_start not found\n"; + } + if ( ! -f $bad_start ) { + die "$bad_start not found\n"; + } + if ( -f $good || -f $bad ) { + my $p = ""; + + if ( -f $good ) { + $p = "$good exists\n"; + } + + if ( -f $bad ) { + $p = "$p$bad exists\n"; + } + + if (!defined($reset_bisect)) { + if (!read_yn "${p}Overwrite and start new bisect anyway?") { + exit (-1); + } + } + } + run_command "cp $good_start $good" or die "failed to copy to $good\n"; + run_command "cp $bad_start $bad" or die "faield to copy to $bad\n"; +} else { + if ( ! -f $good ) { + die "Can not find file $good\n"; + } + if ( ! -f $bad ) { + die "Can not find file $bad\n"; + } + if ($val eq "good") { + run_command "cp $output_config $good" or die "failed to copy $config to $good\n"; + } elsif ($val eq "bad") { + run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n"; + } +} + +chdir $tree || die "can't change directory to $tree"; + +my $ret = config_bisect $good, $bad; + +if (!$ret) { + exit(0); +} + +if ($ret > 0) { + doprint "Cleaning temp files\n"; + run_command "rm $good"; + run_command "rm $bad"; + exit(1); +} else { + doprint "See good and bad configs for details:\n"; + doprint "good: $good\n"; + doprint "bad: $bad\n"; + doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n"; +} +exit(2); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index 8809f244bb7c..87af8a68ab25 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -10,6 +10,7 @@ use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK); use File::Path qw(mkpath); use File::Copy qw(cp); use FileHandle; +use FindBin; my $VERSION = "0.2"; @@ -22,6 +23,11 @@ my %evals; #default opts my %default = ( + "MAILER" => "sendmail", # default mailer + "EMAIL_ON_ERROR" => 1, + "EMAIL_WHEN_FINISHED" => 1, + "EMAIL_WHEN_CANCELED" => 0, + "EMAIL_WHEN_STARTED" => 0, "NUM_TESTS" => 1, "TEST_TYPE" => "build", "BUILD_TYPE" => "randconfig", @@ -59,6 +65,7 @@ my %default = ( "GRUB_REBOOT" => "grub2-reboot", "SYSLINUX" => "extlinux", "SYSLINUX_PATH" => "/boot/extlinux", + "CONNECT_TIMEOUT" => 25, # required, and we will ask users if they don't have them but we keep the default # value something that is common. @@ -163,6 +170,8 @@ my $store_failures; my $store_successes; my $test_name; my $timeout; +my $connect_timeout; +my $config_bisect_exec; my $booted_timeout; my $detect_triplefault; my $console; @@ -204,6 +213,20 @@ my $install_time; my $reboot_time; my $test_time; +my $pwd; +my $dirname = $FindBin::Bin; + +my $mailto; +my $mailer; +my $mail_path; +my $mail_command; +my $email_on_error; +my $email_when_finished; +my $email_when_started; +my $email_when_canceled; + +my $script_start_time = localtime(); + # set when a test is something other that just building or install # which would require more options. my $buildonly = 1; @@ -229,6 +252,14 @@ my $no_reboot = 1; my $reboot_success = 0; my %option_map = ( + "MAILTO" => \$mailto, + "MAILER" => \$mailer, + "MAIL_PATH" => \$mail_path, + "MAIL_COMMAND" => \$mail_command, + "EMAIL_ON_ERROR" => \$email_on_error, + "EMAIL_WHEN_FINISHED" => \$email_when_finished, + "EMAIL_WHEN_STARTED" => \$email_when_started, + "EMAIL_WHEN_CANCELED" => \$email_when_canceled, "MACHINE" => \$machine, "SSH_USER" => \$ssh_user, "TMP_DIR" => \$tmpdir, @@ -296,6 +327,8 @@ my %option_map = ( "STORE_SUCCESSES" => \$store_successes, "TEST_NAME" => \$test_name, "TIMEOUT" => \$timeout, + "CONNECT_TIMEOUT" => \$connect_timeout, + "CONFIG_BISECT_EXEC" => \$config_bisect_exec, "BOOTED_TIMEOUT" => \$booted_timeout, "CONSOLE" => \$console, "CLOSE_CONSOLE_SIGNAL" => \$close_console_signal, @@ -337,6 +370,7 @@ my %used_options; # default variables that can be used chomp ($variable{"PWD"} = `pwd`); +$pwd = $variable{"PWD"}; $config_help{"MACHINE"} = << "EOF" The machine hostname that you will test. @@ -718,22 +752,14 @@ sub set_value { my $prvalue = process_variables($rvalue); - if ($buildonly && $lvalue =~ /^TEST_TYPE(\[.*\])?$/ && $prvalue ne "build") { + if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ && + $prvalue !~ /^(config_|)bisect$/ && + $prvalue !~ /^build$/ && + $buildonly) { + # Note if a test is something other than build, then we # will need other mandatory options. if ($prvalue ne "install") { - # for bisect, we need to check BISECT_TYPE - if ($prvalue ne "bisect") { - $buildonly = 0; - } - } else { - # install still limits some mandatory options. - $buildonly = 2; - } - } - - if ($buildonly && $lvalue =~ /^BISECT_TYPE(\[.*\])?$/ && $prvalue ne "build") { - if ($prvalue ne "install") { $buildonly = 0; } else { # install still limits some mandatory options. @@ -1140,7 +1166,8 @@ sub __read_config { sub get_test_case { print "What test case would you like to run?\n"; print " (build, install or boot)\n"; - print " Other tests are available but require editing the config file\n"; + print " Other tests are available but require editing ktest.conf\n"; + print " (see tools/testing/ktest/sample.conf)\n"; my $ans = <STDIN>; chomp $ans; $default{"TEST_TYPE"} = $ans; @@ -1328,8 +1355,8 @@ sub reboot { my ($time) = @_; my $powercycle = 0; - # test if the machine can be connected to within 5 seconds - my $stat = run_ssh("echo check machine status", 5); + # test if the machine can be connected to within a few seconds + my $stat = run_ssh("echo check machine status", $connect_timeout); if (!$stat) { doprint("power cycle\n"); $powercycle = 1; @@ -1404,10 +1431,18 @@ sub do_not_reboot { return $test_type eq "build" || $no_reboot || ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") || - ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build"); + ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") || + ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build"); } +my $in_die = 0; + sub dodie { + + # avoid recusion + return if ($in_die); + $in_die = 1; + doprint "CRITICAL FAILURE... ", @_, "\n"; my $i = $iteration; @@ -1426,6 +1461,11 @@ sub dodie { print " See $opt{LOG_FILE} for more info.\n"; } + if ($email_on_error) { + send_email("KTEST: critical failure for your [$test_type] test", + "Your test started at $script_start_time has failed with:\n@_\n"); + } + if ($monitor_cnt) { # restore terminal settings system("stty $stty_orig"); @@ -1477,7 +1517,7 @@ sub exec_console { close($pts); exec $console or - die "Can't open console $console"; + dodie "Can't open console $console"; } sub open_console { @@ -1515,6 +1555,9 @@ sub close_console { doprint "kill child process $pid\n"; kill $close_console_signal, $pid; + doprint "wait for child process $pid to exit\n"; + waitpid($pid, 0); + print "closing!\n"; close($fp); @@ -1625,7 +1668,7 @@ sub save_logs { if (!-d $dir) { mkpath($dir) or - die "can't create $dir"; + dodie "can't create $dir"; } my %files = ( @@ -1638,7 +1681,7 @@ sub save_logs { while (my ($name, $source) = each(%files)) { if (-f "$source") { cp "$source", "$dir/$name" or - die "failed to copy $source"; + dodie "failed to copy $source"; } } @@ -1692,6 +1735,7 @@ sub run_command { my $end_time; my $dolog = 0; my $dord = 0; + my $dostdout = 0; my $pid; $command =~ s/\$SSH_USER/$ssh_user/g; @@ -1710,9 +1754,15 @@ sub run_command { } if (defined($redirect)) { - open (RD, ">$redirect") or - dodie "failed to write to redirect $redirect"; - $dord = 1; + if ($redirect eq 1) { + $dostdout = 1; + # Have the output of the command on its own line + doprint "\n"; + } else { + open (RD, ">$redirect") or + dodie "failed to write to redirect $redirect"; + $dord = 1; + } } my $hit_timeout = 0; @@ -1734,6 +1784,7 @@ sub run_command { } print LOG $line if ($dolog); print RD $line if ($dord); + print $line if ($dostdout); } waitpid($pid, 0); @@ -1812,7 +1863,7 @@ sub get_grub2_index { $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g; open(IN, "$ssh_grub |") - or die "unable to get $grub_file"; + or dodie "unable to get $grub_file"; my $found = 0; @@ -1821,13 +1872,13 @@ sub get_grub2_index { $grub_number++; $found = 1; last; - } elsif (/^menuentry\s/) { + } elsif (/^menuentry\s|^submenu\s/) { $grub_number++; } } close(IN); - die "Could not find '$grub_menu' in $grub_file on $machine" + dodie "Could not find '$grub_menu' in $grub_file on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1855,7 +1906,7 @@ sub get_grub_index { $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g; open(IN, "$ssh_grub |") - or die "unable to get menu.lst"; + or dodie "unable to get menu.lst"; my $found = 0; @@ -1870,7 +1921,7 @@ sub get_grub_index { } close(IN); - die "Could not find '$grub_menu' in /boot/grub/menu on $machine" + dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine" if (!$found); doprint "$grub_number\n"; $last_grub_menu = $grub_menu; @@ -1983,7 +2034,7 @@ sub monitor { my $full_line = ""; open(DMESG, "> $dmesg") or - die "unable to write to $dmesg"; + dodie "unable to write to $dmesg"; reboot_to; @@ -2862,7 +2913,7 @@ sub run_bisect { sub update_bisect_replay { my $tmp_log = "$tmpdir/ktest_bisect_log"; run_command "git bisect log > $tmp_log" or - die "can't create bisect log"; + dodie "can't create bisect log"; return $tmp_log; } @@ -2871,9 +2922,9 @@ sub bisect { my $result; - die "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); - die "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); - die "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); + dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good)); + dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad)); + dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type)); my $good = $bisect_good; my $bad = $bisect_bad; @@ -2936,7 +2987,7 @@ sub bisect { if ($check ne "good") { doprint "TESTING BISECT BAD [$bad]\n"; run_command "git checkout $bad" or - die "Failed to checkout $bad"; + dodie "Failed to checkout $bad"; $result = run_bisect $type; @@ -2948,7 +2999,7 @@ sub bisect { if ($check ne "bad") { doprint "TESTING BISECT GOOD [$good]\n"; run_command "git checkout $good" or - die "Failed to checkout $good"; + dodie "Failed to checkout $good"; $result = run_bisect $type; @@ -2959,7 +3010,7 @@ sub bisect { # checkout where we started run_command "git checkout $head" or - die "Failed to checkout $head"; + dodie "Failed to checkout $head"; } run_command "git bisect start$start_files" or @@ -3092,76 +3143,6 @@ sub create_config { make_oldconfig; } -# compare two config hashes, and return configs with different vals. -# It returns B's config values, but you can use A to see what A was. -sub diff_config_vals { - my ($pa, $pb) = @_; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - my %ret; - - foreach my $item (keys %a) { - if (defined($b{$item}) && $b{$item} ne $a{$item}) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# compare two config hashes and return the configs in B but not A -sub diff_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - $ret{$item} = $b{$item}; - } - } - - return %ret; -} - -# return if two configs are equal or not -# 0 is equal +1 b has something a does not -# +1 if a and b have a different item. -# -1 if a has something b does not -sub compare_configs { - my ($pa, $pb) = @_; - - my %ret; - - # crappy Perl way to pass in hashes. - my %a = %{$pa}; - my %b = %{$pb}; - - foreach my $item (keys %b) { - if (!defined($a{$item})) { - return 1; - } - if ($a{$item} ne $b{$item}) { - return 1; - } - } - - foreach my $item (keys %a) { - if (!defined($b{$item})) { - return -1; - } - } - - return 0; -} - sub run_config_bisect_test { my ($type) = @_; @@ -3174,166 +3155,57 @@ sub run_config_bisect_test { return $ret; } -sub process_failed { - my ($config) = @_; +sub config_bisect_end { + my ($good, $bad) = @_; + my $diffexec = "diff -u"; + if (-f "$builddir/scripts/diffconfig") { + $diffexec = "$builddir/scripts/diffconfig"; + } doprint "\n\n***************************************\n"; - doprint "Found bad config: $config\n"; + doprint "No more config bisecting possible.\n"; + run_command "$diffexec $good $bad", 1; doprint "***************************************\n\n"; } -# used for config bisecting -my $good_config; -my $bad_config; - -sub process_new_config { - my ($tc, $nc, $gc, $bc) = @_; - - my %tmp_config = %{$tc}; - my %good_configs = %{$gc}; - my %bad_configs = %{$bc}; - - my %new_configs; - - my $runtest = 1; - my $ret; - - create_config "tmp_configs", \%tmp_config; - assign_configs \%new_configs, $output_config; - - $ret = compare_configs \%new_configs, \%bad_configs; - if (!$ret) { - doprint "New config equals bad config, try next test\n"; - $runtest = 0; - } - - if ($runtest) { - $ret = compare_configs \%new_configs, \%good_configs; - if (!$ret) { - doprint "New config equals good config, try next test\n"; - $runtest = 0; - } - } - - %{$nc} = %new_configs; - - return $runtest; -} - sub run_config_bisect { - my ($pgood, $pbad) = @_; - - my $type = $config_bisect_type; - - my %good_configs = %{$pgood}; - my %bad_configs = %{$pbad}; - - my %diff_configs = diff_config_vals \%good_configs, \%bad_configs; - my %b_configs = diff_configs \%good_configs, \%bad_configs; - my %g_configs = diff_configs \%bad_configs, \%good_configs; - - my @diff_arr = keys %diff_configs; - my $len_diff = $#diff_arr + 1; - - my @b_arr = keys %b_configs; - my $len_b = $#b_arr + 1; - - my @g_arr = keys %g_configs; - my $len_g = $#g_arr + 1; - - my $runtest = 1; - my %new_configs; + my ($good, $bad, $last_result) = @_; + my $reset = ""; + my $cmd; my $ret; - # First, lets get it down to a single subset. - # Is the problem with a difference in values? - # Is the problem with a missing config? - # Is the problem with a config that breaks things? - - # Enable all of one set and see if we get a new bad - # or good config. - - # first set the good config to the bad values. - - doprint "d=$len_diff g=$len_g b=$len_b\n"; - - # first lets enable things in bad config that are enabled in good config - - if ($len_diff > 0) { - if ($len_b > 0 || $len_g > 0) { - my %tmp_config = %bad_configs; - - doprint "Set tmp config to be bad config with good config values\n"; - foreach my $item (@diff_arr) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + if (!length($last_result)) { + $reset = "-r"; } + run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1; - if (!$runtest && $len_diff > 0) { - - if ($len_diff == 1) { - process_failed $diff_arr[0]; - return 1; - } - my %tmp_config = %bad_configs; - - my $half = int($#diff_arr / 2); - my @tophalf = @diff_arr[0 .. $half]; - - doprint "Settings bisect with top half:\n"; - doprint "Set tmp config to be bad config with some good config values\n"; - foreach my $item (@tophalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - - if (!$runtest) { - my %tmp_config = %bad_configs; - - doprint "Try bottom half\n"; - - my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr]; - - foreach my $item (@bottomhalf) { - $tmp_config{$item} = $good_configs{$item}; - } - - $runtest = process_new_config \%tmp_config, \%new_configs, - \%good_configs, \%bad_configs; - } + # config-bisect returns: + # 0 if there is more to bisect + # 1 for finding a good config + # 2 if it can not find any more configs + # -1 (255) on error + if ($run_command_status) { + return $run_command_status; } - if ($runtest) { - $ret = run_config_bisect_test $type; - if ($ret) { - doprint "NEW GOOD CONFIG\n"; - %good_configs = %new_configs; - run_command "mv $good_config ${good_config}.last"; - save_config \%good_configs, $good_config; - %{$pgood} = %good_configs; - } else { - doprint "NEW BAD CONFIG\n"; - %bad_configs = %new_configs; - run_command "mv $bad_config ${bad_config}.last"; - save_config \%bad_configs, $bad_config; - %{$pbad} = %bad_configs; - } - return 0; + $ret = run_config_bisect_test $config_bisect_type; + if ($ret) { + doprint "NEW GOOD CONFIG\n"; + # Return 3 for good config + return 3; + } else { + doprint "NEW BAD CONFIG\n"; + # Return 4 for bad config + return 4; } - - fail "Hmm, need to do a mix match?\n"; - return -1; } sub config_bisect { my ($i) = @_; + my $good_config; + my $bad_config; + my $type = $config_bisect_type; my $ret; @@ -3353,6 +3225,24 @@ sub config_bisect { $good_config = $output_config; } + if (!defined($config_bisect_exec)) { + # First check the location that ktest.pl ran + my @locations = ( "$pwd/config-bisect.pl", + "$dirname/config-bisect.pl", + "$builddir/tools/testing/ktest/config-bisect.pl", + undef ); + foreach my $loc (@locations) { + doprint "loc = $loc\n"; + $config_bisect_exec = $loc; + last if (defined($config_bisect_exec && -x $config_bisect_exec)); + } + if (!defined($config_bisect_exec)) { + fail "Could not find an executable config-bisect.pl\n", + " Set CONFIG_BISECT_EXEC to point to config-bisect.pl"; + return 1; + } + } + # we don't want min configs to cause issues here. doprint "Disabling 'MIN_CONFIG' for this test\n"; undef $minconfig; @@ -3361,21 +3251,31 @@ sub config_bisect { my %bad_configs; my %tmp_configs; + if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") { + if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") { + if (-f "$tmpdir/good_config.tmp") { + $good_config = "$tmpdir/good_config.tmp"; + } else { + $good_config = "$tmpdir/good_config"; + } + if (-f "$tmpdir/bad_config.tmp") { + $bad_config = "$tmpdir/bad_config.tmp"; + } else { + $bad_config = "$tmpdir/bad_config"; + } + } + } doprint "Run good configs through make oldconfig\n"; assign_configs \%tmp_configs, $good_config; create_config "$good_config", \%tmp_configs; - assign_configs \%good_configs, $output_config; + $good_config = "$tmpdir/good_config"; + system("cp $output_config $good_config") == 0 or dodie "cp good config"; doprint "Run bad configs through make oldconfig\n"; assign_configs \%tmp_configs, $bad_config; create_config "$bad_config", \%tmp_configs; - assign_configs \%bad_configs, $output_config; - - $good_config = "$tmpdir/good_config"; $bad_config = "$tmpdir/bad_config"; - - save_config \%good_configs, $good_config; - save_config \%bad_configs, $bad_config; + system("cp $output_config $bad_config") == 0 or dodie "cp bad config"; if (defined($config_bisect_check) && $config_bisect_check ne "0") { if ($config_bisect_check ne "good") { @@ -3398,10 +3298,21 @@ sub config_bisect { } } + my $last_run = ""; + do { - $ret = run_config_bisect \%good_configs, \%bad_configs; + $ret = run_config_bisect $good_config, $bad_config, $last_run; + if ($ret == 3) { + $last_run = "good"; + } elsif ($ret == 4) { + $last_run = "bad"; + } print_times; - } while (!$ret); + } while ($ret == 3 || $ret == 4); + + if ($ret == 2) { + config_bisect_end "$good_config.tmp", "$bad_config.tmp"; + } return $ret if ($ret < 0); @@ -3416,9 +3327,9 @@ sub patchcheck_reboot { sub patchcheck { my ($i) = @_; - die "PATCHCHECK_START[$i] not defined\n" + dodie "PATCHCHECK_START[$i] not defined\n" if (!defined($patchcheck_start)); - die "PATCHCHECK_TYPE[$i] not defined\n" + dodie "PATCHCHECK_TYPE[$i] not defined\n" if (!defined($patchcheck_type)); my $start = $patchcheck_start; @@ -3432,7 +3343,7 @@ sub patchcheck { if (defined($patchcheck_end)) { $end = $patchcheck_end; } elsif ($cherry) { - die "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; + dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n"; } # Get the true sha1's since we can use things like HEAD~3 @@ -3496,7 +3407,7 @@ sub patchcheck { doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or - die "Failed to checkout $sha1"; + dodie "Failed to checkout $sha1"; # only clean on the first and last patch if ($item eq $list[0] || @@ -3587,7 +3498,7 @@ sub read_kconfig { } open(KIN, "$kconfig") - or die "Can't open $kconfig"; + or dodie "Can't open $kconfig"; while (<KIN>) { chomp; @@ -3746,7 +3657,7 @@ sub get_depends { $dep =~ s/^[^$valid]*[$valid]+//; } else { - die "this should never happen"; + dodie "this should never happen"; } } @@ -4007,7 +3918,7 @@ sub make_min_config { # update new ignore configs if (defined($ignore_config)) { open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %save_configs) { print OUT "$save_configs{$config}\n"; } @@ -4035,7 +3946,7 @@ sub make_min_config { # Save off all the current mandatory configs open (OUT, ">$temp_config") - or die "Can't write to $temp_config"; + or dodie "Can't write to $temp_config"; foreach my $config (keys %keep_configs) { print OUT "$keep_configs{$config}\n"; } @@ -4222,6 +4133,74 @@ sub set_test_option { return eval_option($name, $option, $i); } +sub find_mailer { + my ($mailer) = @_; + + my @paths = split /:/, $ENV{PATH}; + + # sendmail is usually in /usr/sbin + $paths[$#paths + 1] = "/usr/sbin"; + + foreach my $path (@paths) { + if (-x "$path/$mailer") { + return $path; + } + } + + return undef; +} + +sub do_send_mail { + my ($subject, $message) = @_; + + if (!defined($mail_path)) { + # find the mailer + $mail_path = find_mailer $mailer; + if (!defined($mail_path)) { + die "\nCan not find $mailer in PATH\n"; + } + } + + if (!defined($mail_command)) { + if ($mailer eq "mail" || $mailer eq "mailx") { + $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'"; + } elsif ($mailer eq "sendmail" ) { + $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO"; + } else { + die "\nYour mailer: $mailer is not supported.\n"; + } + } + + $mail_command =~ s/\$MAILER/$mailer/g; + $mail_command =~ s/\$MAIL_PATH/$mail_path/g; + $mail_command =~ s/\$MAILTO/$mailto/g; + $mail_command =~ s/\$SUBJECT/$subject/g; + $mail_command =~ s/\$MESSAGE/$message/g; + + run_command $mail_command; +} + +sub send_email { + + if (defined($mailto)) { + if (!defined($mailer)) { + doprint "No email sent: email or mailer not specified in config.\n"; + return; + } + do_send_mail @_; + } +} + +sub cancel_test { + if ($email_when_canceled) { + send_email("KTEST: Your [$test_type] test was cancelled", + "Your test started at $script_start_time was cancelled: sig int"); + } + die "\nCaught Sig Int, test interrupted: $!\n" +} + +$SIG{INT} = qw(cancel_test); + # First we need to do is the builds for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { @@ -4245,11 +4224,11 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $outputdir = set_test_option("OUTPUT_DIR", $i); $builddir = set_test_option("BUILD_DIR", $i); - chdir $builddir || die "can't change directory to $builddir"; + chdir $builddir || dodie "can't change directory to $builddir"; if (!-d $outputdir) { mkpath($outputdir) or - die "can't create $outputdir"; + dodie "can't create $outputdir"; } $make = "$makecmd O=$outputdir"; @@ -4262,9 +4241,15 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { $start_minconfig_defined = 1; # The first test may override the PRE_KTEST option - if (defined($pre_ktest) && $i == 1) { - doprint "\n"; - run_command $pre_ktest; + if ($i == 1) { + if (defined($pre_ktest)) { + doprint "\n"; + run_command $pre_ktest; + } + if ($email_when_started) { + send_email("KTEST: Your [$test_type] test was started", + "Your test was started on $script_start_time"); + } } # Any test can override the POST_KTEST option @@ -4280,7 +4265,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (!-d $tmpdir) { mkpath($tmpdir) or - die "can't create $tmpdir"; + dodie "can't create $tmpdir"; } $ENV{"SSH_USER"} = $ssh_user; @@ -4353,7 +4338,7 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { if (defined($checkout)) { run_command "git checkout $checkout" or - die "failed to checkout $checkout"; + dodie "failed to checkout $checkout"; } $no_reboot = 0; @@ -4428,4 +4413,8 @@ if ($opt{"POWEROFF_ON_SUCCESS"}) { doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n"; +if ($email_when_finished) { + send_email("KTEST: Your [$test_type] test has finished!", + "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!"); +} exit 0; diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index 6c58cd8bbbae..6ca6ca0ce695 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -1,6 +1,11 @@ # # Config file for ktest.pl # +# Place your customized version of this, in the working directory that +# ktest.pl is run from. By default, ktest.pl will look for a file +# called "ktest.conf", but you can name it anything you like and specify +# the name of your config file as the first argument of ktest.pl. +# # Note, all paths must be absolute # @@ -396,6 +401,44 @@ #### Optional Config Options (all have defaults) #### +# Email options for receiving notifications. Users must setup +# the specified mailer prior to using this feature. +# +# (default undefined) +#MAILTO = +# +# Supported mailers: sendmail, mail, mailx +# (default sendmail) +#MAILER = sendmail +# +# The executable to run +# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER}) +#MAIL_EXEC = /usr/sbin/sendmail +# +# The command used to send mail, which uses the above options +# can be modified. By default if the mailer is "sendmail" then +# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# For mail or mailx: +# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\' +# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time +# it sends the mail if "$FOO" format is used. If "${FOO}" format is used, +# then the substitutions will occur at the time the config file is read. +# But note, MAIL_PATH and MAILER require being set by the config file if +# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are. +#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO +# +# Errors are defined as those would terminate the script +# (default 1) +#EMAIL_ON_ERROR = 1 +# (default 1) +#EMAIL_WHEN_FINISHED = 1 +# (default 0) +#EMAIL_WHEN_STARTED = 1 +# +# Users can cancel the test by Ctrl^C +# (default 0) +#EMAIL_WHEN_CANCELED = 1 + # Start a test setup. If you leave this off, all options # will be default and the test will run once. # This is a label and not really an option (it takes no value). @@ -725,6 +768,13 @@ # (default 120) #TIMEOUT = 120 +# The timeout in seconds when to test if the box can be rebooted +# or not. Before issuing the reboot command, a ssh connection +# is attempted to see if the target machine is still active. +# If the target does not connect within this timeout, a power cycle +# is issued instead of a reboot. +# CONNECT_TIMEOUT = 25 + # In between tests, a reboot of the box may occur, and this # is the time to wait for the console after it stops producing # output. Some machines may not produce a large lag on reboot @@ -1167,6 +1217,16 @@ # Set it to "good" to test only the good config and set it # to "bad" to only test the bad config. # +# CONFIG_BISECT_EXEC (optional) +# The config bisect is a separate program that comes with ktest.pl. +# By befault, it will look for: +# `pwd`/config-bisect.pl # the location ktest.pl was executed from. +# If it does not find it there, it will look for: +# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl +# If it does not find it there, it will look for: +# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl +# Setting CONFIG_BISECT_EXEC will override where it looks. +# # Example: # TEST_START # TEST_TYPE = config_bisect diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 620fa78b3b1b..4ea385be528f 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -104,7 +104,8 @@ enum { NUM_HINTS = 8, NUM_BDW = NUM_DCR, NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, - NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + + 4 /* spa1 iset */ + 1 /* spa11 iset */, DIMM_SIZE = SZ_32M, LABEL_SIZE = SZ_128K, SPA_VCD_SIZE = SZ_4M, @@ -137,6 +138,7 @@ static u32 handle[] = { }; static unsigned long dimm_fail_cmd_flags[NUM_DCR]; +static int dimm_fail_cmd_code[NUM_DCR]; struct nfit_test_fw { enum intel_fw_update_state state; @@ -153,6 +155,7 @@ struct nfit_test { void *nfit_buf; dma_addr_t nfit_dma; size_t nfit_size; + size_t nfit_filled; int dcr_idx; int num_dcr; int num_pm; @@ -709,7 +712,9 @@ static void smart_notify(struct device *bus_dev, >= thresh->media_temperature) || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP) && smart->ctrl_temperature - >= thresh->ctrl_temperature)) { + >= thresh->ctrl_temperature) + || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH) + || (smart->shutdown_state != 0)) { device_lock(bus_dev); __acpi_nvdimm_notify(dimm_dev, 0x81); device_unlock(bus_dev); @@ -735,6 +740,32 @@ static int nfit_test_cmd_smart_set_threshold( return 0; } +static int nfit_test_cmd_smart_inject( + struct nd_intel_smart_inject *inj, + unsigned int buf_len, + struct nd_intel_smart_threshold *thresh, + struct nd_intel_smart *smart, + struct device *bus_dev, struct device *dimm_dev) +{ + if (buf_len != sizeof(*inj)) + return -EINVAL; + + if (inj->mtemp_enable) + smart->media_temperature = inj->media_temperature; + if (inj->spare_enable) + smart->spares = inj->spares; + if (inj->fatal_enable) + smart->health = ND_INTEL_SMART_FATAL_HEALTH; + if (inj->unsafe_shutdown_enable) { + smart->shutdown_state = 1; + smart->shutdown_count++; + } + inj->status = 0; + smart_notify(bus_dev, dimm_dev, smart, thresh); + + return 0; +} + static void uc_error_notify(struct work_struct *work) { struct nfit_test *t = container_of(work, typeof(*t), work); @@ -862,8 +893,11 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func) if (i >= ARRAY_SIZE(handle)) return -ENXIO; - if ((1 << func) & dimm_fail_cmd_flags[i]) + if ((1 << func) & dimm_fail_cmd_flags[i]) { + if (dimm_fail_cmd_code[i]) + return dimm_fail_cmd_code[i]; return -EIO; + } return i; } @@ -935,6 +969,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, t->dcr_idx], &t->smart[i - t->dcr_idx], &t->pdev.dev, t->dimm_dev[i]); + case ND_INTEL_SMART_INJECT: + return nfit_test_cmd_smart_inject(buf, + buf_len, + &t->smart_threshold[i - + t->dcr_idx], + &t->smart[i - t->dcr_idx], + &t->pdev.dev, t->dimm_dev[i]); default: return -ENOTTY; } @@ -1125,12 +1166,12 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state) static void put_dimms(void *data) { - struct device **dimm_dev = data; + struct nfit_test *t = data; int i; - for (i = 0; i < NUM_DCR; i++) - if (dimm_dev[i]) - device_unregister(dimm_dev[i]); + for (i = 0; i < t->num_dcr; i++) + if (t->dimm_dev[i]) + device_unregister(t->dimm_dev[i]); } static struct class *nfit_test_dimm; @@ -1139,13 +1180,11 @@ static int dimm_name_to_id(struct device *dev) { int dimm; - if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1 - || dimm >= NUM_DCR || dimm < 0) + if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1) return -ENXIO; return dimm; } - static ssize_t handle_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1154,7 +1193,7 @@ static ssize_t handle_show(struct device *dev, struct device_attribute *attr, if (dimm < 0) return dimm; - return sprintf(buf, "%#x", handle[dimm]); + return sprintf(buf, "%#x\n", handle[dimm]); } DEVICE_ATTR_RO(handle); @@ -1188,8 +1227,39 @@ static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RW(fail_cmd); +static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int dimm = dimm_name_to_id(dev); + + if (dimm < 0) + return dimm; + + return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]); +} + +static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + int dimm = dimm_name_to_id(dev); + unsigned long val; + ssize_t rc; + + if (dimm < 0) + return dimm; + + rc = kstrtol(buf, 0, &val); + if (rc) + return rc; + + dimm_fail_cmd_code[dimm] = val; + return size; +} +static DEVICE_ATTR_RW(fail_cmd_code); + static struct attribute *nfit_test_dimm_attributes[] = { &dev_attr_fail_cmd.attr, + &dev_attr_fail_cmd_code.attr, &dev_attr_handle.attr, NULL, }; @@ -1203,6 +1273,23 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = { NULL, }; +static int nfit_test_dimm_init(struct nfit_test *t) +{ + int i; + + if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t)) + return -ENOMEM; + for (i = 0; i < t->num_dcr; i++) { + t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, + &t->pdev.dev, 0, NULL, + nfit_test_dimm_attribute_groups, + "test_dimm%d", i + t->dcr_idx); + if (!t->dimm_dev[i]) + return -ENOMEM; + } + return 0; +} + static void smart_init(struct nfit_test *t) { int i; @@ -1222,7 +1309,7 @@ static void smart_init(struct nfit_test *t) | ND_INTEL_SMART_MTEMP_VALID, .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH, .media_temperature = 23 * 16, - .ctrl_temperature = 30 * 16, + .ctrl_temperature = 25 * 16, .pmic_temperature = 40 * 16, .spares = 75, .alarm_flags = ND_INTEL_SMART_SPARE_TRIP @@ -1298,17 +1385,8 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->_fit) return -ENOMEM; - if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev)) + if (nfit_test_dimm_init(t)) return -ENOMEM; - for (i = 0; i < NUM_DCR; i++) { - t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm, - &t->pdev.dev, 0, NULL, - nfit_test_dimm_attribute_groups, - "test_dimm%d", i); - if (!t->dimm_dev[i]) - return -ENOMEM; - } - smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1340,6 +1418,8 @@ static int nfit_test1_alloc(struct nfit_test *t) if (!t->spa_set[1]) return -ENOMEM; + if (nfit_test_dimm_init(t)) + return -ENOMEM; smart_init(t); return ars_state_init(&t->pdev.dev, &t->ars_state); } @@ -1366,7 +1446,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_data_region *bdw; struct acpi_nfit_flush_address *flush; struct acpi_nfit_capabilities *pcap; - unsigned int offset, i; + unsigned int offset = 0, i; /* * spa0 (interleave first half of dimm0 and dimm1, note storage @@ -1380,93 +1460,102 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* * spa1 (interleave last half of the 4 DIMMS, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 1+1; spa->address = t->spa_set_dma[1]; spa->length = SPA1_SIZE; + offset += spa->header.length; /* spa2 (dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 2+1; spa->address = t->dcr_dma[0]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa3 (dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 3; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 3+1; spa->address = t->dcr_dma[1]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa4 (dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 4; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 4+1; spa->address = t->dcr_dma[2]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa5 (dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 5; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); spa->range_index = 5+1; spa->address = t->dcr_dma[3]; spa->length = DCR_SIZE; + offset += spa->header.length; /* spa6 (bdw for dcr0) dimm0 */ - spa = nfit_buf + sizeof(*spa) * 6; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 6+1; spa->address = t->dimm_dma[0]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa7 (bdw for dcr1) dimm1 */ - spa = nfit_buf + sizeof(*spa) * 7; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 7+1; spa->address = t->dimm_dma[1]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa8 (bdw for dcr2) dimm2 */ - spa = nfit_buf + sizeof(*spa) * 8; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 8+1; spa->address = t->dimm_dma[2]; spa->length = DIMM_SIZE; + offset += spa->header.length; /* spa9 (bdw for dcr3) dimm3 */ - spa = nfit_buf + sizeof(*spa) * 9; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 9+1; spa->address = t->dimm_dma[3]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = sizeof(*spa) * 10; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1481,9 +1570,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 2; + offset += memdev->header.length; /* mem-region1 (spa0, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1497,9 +1587,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 2; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region2 (spa1, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1513,9 +1604,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region3 (spa1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1528,9 +1620,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region4 (spa1, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1544,9 +1637,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 4; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region5 (spa1, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1559,9 +1653,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = SPA0_SIZE/2; memdev->interleave_index = 0; memdev->interleave_ways = 4; + offset += memdev->header.length; /* mem-region6 (spa/dcr0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1574,9 +1669,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region7 (spa/dcr1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1589,9 +1685,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region8 (spa/dcr2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1604,9 +1701,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region9 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1619,9 +1717,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region10 (spa/bdw0, dimm0) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[0]; @@ -1634,9 +1733,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region11 (spa/bdw1, dimm1) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[1]; @@ -1649,9 +1749,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region12 (spa/bdw2, dimm2) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[2]; @@ -1664,9 +1765,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; /* mem-region13 (spa/dcr3, dimm3) */ - memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[3]; @@ -1680,12 +1782,12 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; /* dcr-descriptor0: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 0+1; dcr_common_init(dcr); dcr->serial_number = ~handle[0]; @@ -1696,11 +1798,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor1: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 1+1; dcr_common_init(dcr); dcr->serial_number = ~handle[1]; @@ -1711,11 +1814,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor2: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 2+1; dcr_common_init(dcr); dcr->serial_number = ~handle[2]; @@ -1726,11 +1830,12 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; /* dcr-descriptor3: blk */ - dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 3+1; dcr_common_init(dcr); dcr->serial_number = ~handle[3]; @@ -1741,8 +1846,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region) * 4; /* dcr-descriptor0: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1753,10 +1858,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[0]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor1: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size); + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1765,10 +1870,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[1]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor2: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 2; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1777,10 +1882,10 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[2]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; /* dcr-descriptor3: pmem */ - dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region, - window_size) * 3; + dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, window_size); @@ -1789,54 +1894,56 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[3]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size) * 4; /* bdw0 (spa/dcr0, dimm0) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 0+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw1 (spa/dcr1, dimm1) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 1+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw2 (spa/dcr2, dimm2) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 2+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; /* bdw3 (spa/dcr3, dimm3) */ - bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 3+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region) * 4; /* flush0 (dimm0) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -1845,48 +1952,52 @@ static void nfit_test0_setup(struct nfit_test *t) flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64); + offset += flush->header.length; /* flush1 (dimm1) */ - flush = nfit_buf + offset + flush_hint_size * 1; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[1]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64); + offset += flush->header.length; /* flush2 (dimm2) */ - flush = nfit_buf + offset + flush_hint_size * 2; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[2]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64); + offset += flush->header.length; /* flush3 (dimm3) */ - flush = nfit_buf + offset + flush_hint_size * 3; + flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; flush->header.length = flush_hint_size; flush->device_handle = handle[3]; flush->hint_count = NUM_HINTS; for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); + offset += flush->header.length; /* platform capabilities */ - pcap = nfit_buf + offset + flush_hint_size * 4; + pcap = nfit_buf + offset; pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES; pcap->header.length = sizeof(*pcap); pcap->highest_capability = 1; pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH | ACPI_NFIT_CAPABILITY_MEM_FLUSH; + offset += pcap->header.length; if (t->setup_hotplug) { - offset = offset + flush_hint_size * 4 + sizeof(*pcap); /* dcr-descriptor4: blk */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; - dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->header.length = sizeof(*dcr); dcr->region_index = 8+1; dcr_common_init(dcr); dcr->serial_number = ~handle[4]; @@ -1897,8 +2008,8 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->command_size = 8; dcr->status_offset = 8; dcr->status_size = 4; + offset += dcr->header.length; - offset = offset + sizeof(struct acpi_nfit_control_region); /* dcr-descriptor4: pmem */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -1909,21 +2020,20 @@ static void nfit_test0_setup(struct nfit_test *t) dcr->serial_number = ~handle[4]; dcr->code = NFIT_FIC_BYTEN; dcr->windows = 0; + offset += dcr->header.length; - offset = offset + offsetof(struct acpi_nfit_control_region, - window_size); /* bdw4 (spa/dcr4, dimm4) */ bdw = nfit_buf + offset; bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; - bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->header.length = sizeof(*bdw); bdw->region_index = 8+1; bdw->windows = 1; bdw->offset = 0; bdw->size = BDW_SIZE; bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset += bdw->header.length; - offset = offset + sizeof(struct acpi_nfit_data_region); /* spa10 (dcr4) dimm4 */ spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; @@ -1932,30 +2042,32 @@ static void nfit_test0_setup(struct nfit_test *t) spa->range_index = 10+1; spa->address = t->dcr_dma[4]; spa->length = DCR_SIZE; + offset += spa->header.length; /* * spa11 (single-dimm interleave for hotplug, note storage * does not actually alias the related block-data-window * regions) */ - spa = nfit_buf + offset + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); spa->range_index = 11+1; spa->address = t->spa_set_dma[2]; spa->length = SPA0_SIZE; + offset += spa->header.length; /* spa12 (bdw for dcr4) dimm4 */ - spa = nfit_buf + offset + sizeof(*spa) * 2; + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); spa->range_index = 12+1; spa->address = t->dimm_dma[4]; spa->length = DIMM_SIZE; + offset += spa->header.length; - offset = offset + sizeof(*spa) * 3; /* mem-region14 (spa/dcr4, dimm4) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -1970,10 +2082,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - /* mem-region15 (spa0, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map); + /* mem-region15 (spa11, dimm4) */ + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -1987,10 +2099,10 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED; + offset += memdev->header.length; /* mem-region16 (spa/bdw4, dimm4) */ - memdev = nfit_buf + offset + - sizeof(struct acpi_nfit_memory_map) * 2; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); memdev->device_handle = handle[4]; @@ -2003,8 +2115,8 @@ static void nfit_test0_setup(struct nfit_test *t) memdev->address = 0; memdev->interleave_index = 0; memdev->interleave_ways = 1; + offset += memdev->header.length; - offset = offset + sizeof(struct acpi_nfit_memory_map) * 3; /* flush3 (dimm4) */ flush = nfit_buf + offset; flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; @@ -2014,8 +2126,14 @@ static void nfit_test0_setup(struct nfit_test *t) for (i = 0; i < NUM_HINTS; i++) flush->hint_address[i] = t->flush_dma[4] + i * sizeof(u64); + offset += flush->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); } + t->nfit_filled = offset; + post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA0_SIZE); @@ -2026,6 +2144,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); @@ -2061,17 +2180,18 @@ static void nfit_test1_setup(struct nfit_test *t) spa->range_index = 0+1; spa->address = t->spa_set_dma[0]; spa->length = SPA2_SIZE; + offset += spa->header.length; /* virtual cd region */ - spa = nfit_buf + sizeof(*spa); + spa = nfit_buf + offset; spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; spa->header.length = sizeof(*spa); memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16); spa->range_index = 0; spa->address = t->spa_set_dma[1]; spa->length = SPA_VCD_SIZE; + offset += spa->header.length; - offset += sizeof(*spa) * 2; /* mem-region0 (spa0, dimm0) */ memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; @@ -2089,8 +2209,8 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED | ACPI_NFIT_MEM_NOT_ARMED; + offset += memdev->header.length; - offset += sizeof(*memdev); /* dcr-descriptor0 */ dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; @@ -2101,8 +2221,8 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[5]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; - offset += dcr->header.length; + memdev = nfit_buf + offset; memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; memdev->header.length = sizeof(*memdev); @@ -2117,9 +2237,9 @@ static void nfit_test1_setup(struct nfit_test *t) memdev->interleave_index = 0; memdev->interleave_ways = 1; memdev->flags = ACPI_NFIT_MEM_MAP_FAILED; + offset += memdev->header.length; /* dcr-descriptor1 */ - offset += sizeof(*memdev); dcr = nfit_buf + offset; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.length = offsetof(struct acpi_nfit_control_region, @@ -2129,6 +2249,12 @@ static void nfit_test1_setup(struct nfit_test *t) dcr->serial_number = ~handle[6]; dcr->code = NFIT_FIC_BYTE; dcr->windows = 0; + offset += dcr->header.length; + + /* sanity check to make sure we've filled the buffer */ + WARN_ON(offset != t->nfit_size); + + t->nfit_filled = offset; post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0], SPA2_SIZE); @@ -2139,6 +2265,9 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -2487,7 +2616,7 @@ static int nfit_test_probe(struct platform_device *pdev) nd_desc->ndctl = nfit_test_ctl; rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf, - nfit_test->nfit_size); + nfit_test->nfit_filled); if (rc) return rc; diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index 428344519cdf..33752e06ff8d 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_FW_FINISH_UPDATE 15 #define ND_INTEL_FW_FINISH_QUERY 16 #define ND_INTEL_SMART_SET_THRESHOLD 17 +#define ND_INTEL_SMART_INJECT 18 #define ND_INTEL_SMART_HEALTH_VALID (1 << 0) #define ND_INTEL_SMART_SPARES_VALID (1 << 1) @@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat { #define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0) #define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1) #define ND_INTEL_SMART_FATAL_HEALTH (1 << 2) +#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0) +#define ND_INTEL_SMART_INJECT_SPARE (1 << 1) +#define ND_INTEL_SMART_INJECT_FATAL (1 << 2) +#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3) struct nd_intel_smart { __u32 status; @@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold { __u32 status; } __packed; +struct nd_intel_smart_inject { + __u64 flags; + __u8 mtemp_enable; + __u16 media_temperature; + __u8 spare_enable; + __u8 spares; + __u8 fatal_enable; + __u8 unsafe_shutdown_enable; + __u32 status; +} __packed; + #define INTEL_FW_STORAGE_SIZE 0x100000 #define INTEL_FW_MAX_SEND_LEN 0xFFEC #define INTEL_FW_QUERY_INTERVAL 250000 diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 6c645eb77d42..ee820fcc29b0 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -252,6 +252,13 @@ void idr_checks(void) idr_remove(&idr, 3); idr_remove(&idr, 0); + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0); + idr_remove(&idr, 1); + for (i = 1; i < RADIX_TREE_MAP_SIZE; i++) + assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i); + idr_remove(&idr, 1 << 30); + idr_destroy(&idr); + for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) { struct item *item = item_create(i, 0); assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i); diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h index e3201ccf54c3..32159c08a52e 100644 --- a/tools/testing/radix-tree/linux/gfp.h +++ b/tools/testing/radix-tree/linux/gfp.h @@ -19,6 +19,7 @@ #define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM) +#define GFP_ZONEMASK 0x0fu #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include <linux/radix-tree.h> #include <linux/slab.h> #include <linux/errno.h> +#include <pthread.h> #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include <linux/rcupdate.h> struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -12,9 +13,11 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index dbda89c9d9b9..305130de910c 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -3,6 +3,7 @@ TARGETS = android TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities +TARGETS += cgroup TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += efivarfs @@ -15,6 +16,7 @@ TARGETS += gpio TARGETS += intel_pstate TARGETS += ipc TARGETS += kcmp +TARGETS += kvm TARGETS += lib TARGETS += membarrier TARGETS += memfd @@ -24,8 +26,10 @@ TARGETS += mqueue TARGETS += net TARGETS += nsfs TARGETS += powerpc +TARGETS += proc TARGETS += pstore TARGETS += ptrace +TARGETS += rtc TARGETS += seccomp TARGETS += sigaltstack TARGETS += size @@ -67,6 +71,12 @@ ifndef BUILD BUILD := $(shell pwd) endif +# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header +# printing from tests. Applicable to run_tests case where run_tests adds +# TAP header prior running tests and when a test program invokes another +# with system() call. Export it here to cover override RUN_TESTS defines. +export KSFT_TAP_LEVEL=`echo 1` + export BUILD all: @for TARGET in $(TARGETS); do \ @@ -126,11 +136,15 @@ ifdef INSTALL_PATH echo "else" >> $(ALL_SCRIPT) echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT) echo "fi" >> $(ALL_SCRIPT) + echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT) + echo "export skip=4" >> $(ALL_SCRIPT) for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \ + echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ echo "cd $$TARGET" >> $(ALL_SCRIPT); \ make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile index f6304d2be90c..72c25a3cb658 100644 --- a/tools/testing/selftests/android/Makefile +++ b/tools/testing/selftests/android/Makefile @@ -18,10 +18,6 @@ all: fi \ done -override define RUN_TESTS - @cd $(OUTPUT); ./run.sh -endef - override define INSTALL_RULE mkdir -p $(INSTALL_PATH) install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @@ -33,10 +29,6 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS - echo "./run.sh" -endef - override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore index 67e6f391b2a9..95e8f4561474 100644 --- a/tools/testing/selftests/android/ion/.gitignore +++ b/tools/testing/selftests/android/ion/.gitignore @@ -1,2 +1,3 @@ ionapp_export ionapp_import +ionmap_test diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile index 96e0c448b39d..e03695287f76 100644 --- a/tools/testing/selftests/android/ion/Makefile +++ b/tools/testing/selftests/android/ion/Makefile @@ -1,8 +1,8 @@ -INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ +INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/ CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g -TEST_GEN_FILES := ionapp_export ionapp_import +TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test all: $(TEST_GEN_FILES) @@ -14,3 +14,4 @@ include ../../lib.mk $(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c $(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c +$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c diff --git a/tools/testing/selftests/android/ion/config b/tools/testing/selftests/android/ion/config index 19db6ca9aa2b..b4ad748a9dd9 100644 --- a/tools/testing/selftests/android/ion/config +++ b/tools/testing/selftests/android/ion/config @@ -2,3 +2,4 @@ CONFIG_ANDROID=y CONFIG_STAGING=y CONFIG_ION=y CONFIG_ION_SYSTEM_HEAP=y +CONFIG_DRM_VGEM=y diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh index a1aff506f5e6..69e676cfc94e 100755 --- a/tools/testing/selftests/android/ion/ion_test.sh +++ b/tools/testing/selftests/android/ion/ion_test.sh @@ -4,6 +4,9 @@ heapsize=4096 TCID="ion_test.sh" errcode=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + run_test() { heaptype=$1 @@ -25,7 +28,7 @@ check_root() uid=$(id -u) if [ $uid -ne 0 ]; then echo $TCID: must be run as root >&2 - exit 0 + exit $ksft_skip fi } @@ -35,7 +38,7 @@ check_device() if [ ! -e $DEVICE ]; then echo $TCID: No $DEVICE device found >&2 echo $TCID: May be CONFIG_ION is not set >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c new file mode 100644 index 000000000000..dab36b06b37d --- /dev/null +++ b/tools/testing/selftests/android/ion/ionmap_test.c @@ -0,0 +1,136 @@ +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> + +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <linux/dma-buf.h> + +#include <drm/drm.h> + +#include "ion.h" +#include "ionutils.h" + +int check_vgem(int fd) +{ + drm_version_t version = { 0 }; + char name[5]; + int ret; + + version.name_len = 4; + version.name = name; + + ret = ioctl(fd, DRM_IOCTL_VERSION, &version); + if (ret) + return 1; + + return strcmp(name, "vgem"); +} + +int open_vgem(void) +{ + int i, fd; + const char *drmstr = "/dev/dri/card"; + + fd = -1; + for (i = 0; i < 16; i++) { + char name[80]; + + sprintf(name, "%s%u", drmstr, i); + + fd = open(name, O_RDWR); + if (fd < 0) + continue; + + if (check_vgem(fd)) { + close(fd); + continue; + } else { + break; + } + + } + return fd; +} + +int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle) +{ + struct drm_prime_handle import_handle = { 0 }; + int ret; + + import_handle.fd = dma_buf_fd; + import_handle.flags = 0; + import_handle.handle = 0; + + ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle); + if (ret == 0) + *handle = import_handle.handle; + return ret; +} + +void close_handle(int vgem_fd, uint32_t handle) +{ + struct drm_gem_close close = { 0 }; + + close.handle = handle; + ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +int main() +{ + int ret, vgem_fd; + struct ion_buffer_info info; + uint32_t handle = 0; + struct dma_buf_sync sync = { 0 }; + + info.heap_type = ION_HEAP_TYPE_SYSTEM; + info.heap_size = 4096; + info.flag_type = ION_FLAG_CACHED; + + ret = ion_export_buffer_fd(&info); + if (ret < 0) { + printf("ion buffer alloc failed\n"); + return -1; + } + + vgem_fd = open_vgem(); + if (vgem_fd < 0) { + ret = vgem_fd; + printf("Failed to open vgem\n"); + goto out_ion; + } + + ret = import_vgem_fd(vgem_fd, info.buffd, &handle); + + if (ret < 0) { + printf("Failed to import buffer\n"); + goto out_vgem; + } + + sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW; + ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); + if (ret) + printf("sync start failed %d\n", errno); + + memset(info.buffer, 0xff, 4096); + + sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; + ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync); + if (ret) + printf("sync end failed %d\n", errno); + + close_handle(vgem_fd, handle); + ret = 0; + +out_vgem: + close(vgem_fd); +out_ion: + ion_close_buffer_fd(&info); + printf("done.\n"); + return ret; +} diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c index ce69c14f51fa..7d1d37c4ef6a 100644 --- a/tools/testing/selftests/android/ion/ionutils.c +++ b/tools/testing/selftests/android/ion/ionutils.c @@ -80,11 +80,6 @@ int ion_export_buffer_fd(struct ion_buffer_info *ion_info) heap_id = MAX_HEAP_COUNT + 1; for (i = 0; i < query.cnt; i++) { if (heap_data[i].type == ion_info->heap_type) { - printf("--------------------------------------\n"); - printf("heap type: %d\n", heap_data[i].type); - printf(" heap id: %d\n", heap_data[i].heap_id); - printf("heap name: %s\n", heap_data[i].name); - printf("--------------------------------------\n"); heap_id = heap_data[i].heap_id; break; } @@ -204,7 +199,6 @@ void ion_close_buffer_fd(struct ion_buffer_info *ion_info) /* Finally, close the client fd */ if (ion_info->ionfd > 0) close(ion_info->ionfd); - printf("<%s>: buffer release successfully....\n", __func__); } } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9cf83f895d98..5e1ab2f0eb79 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -12,3 +12,6 @@ test_tcpbpf_user test_verifier_log feature test_libbpf_open +test_sock +test_sock_addr +urandom_read diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 983dd25d49f4..1eefe211a4a8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -5,3 +5,5 @@ CONFIG_BPF_EVENTS=y CONFIG_TEST_BPF=m CONFIG_CGROUP_BPF=y CONFIG_NETDEVSIM=m +CONFIG_NET_CLS_ACT=y +CONFIG_NET_SCH_INGRESS=y diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..4123d0ab90ba 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void) assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); - assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + assert(system("./urandom_read") == 0); /* disable stack trace collection */ key = 0; val = 1; @@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void) } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + "Didn't find expected build ID from the map\n"); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c index 73bb20cfb9b7..f4d99fabc56d 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/test_sock.c @@ -13,6 +13,7 @@ #include <bpf/bpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #ifndef ARRAY_SIZE # define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index d488f20926e8..2950f80ba7fb 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -15,6 +15,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "bpf_rlimit.h" #define CG_PATH "/foo" #define CONNECT4_PROG_PATH "./connect4_prog.o" diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh index c6e1dcf992c4..9832a875a828 100755 --- a/tools/testing/selftests/bpf/test_sock_addr.sh +++ b/tools/testing/selftests/bpf/test_sock_addr.sh @@ -4,7 +4,7 @@ set -eu ping_once() { - ping -q -c 1 -W 1 ${1%%/*} >/dev/null 2>&1 + ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1 } wait_for_ip() @@ -13,7 +13,7 @@ wait_for_ip() echo -n "Wait for testing IPv4/IPv6 to become available " for _i in $(seq ${MAX_PING_TRIES}); do echo -n "." - if ping_once ${TEST_IPv4} && ping_once ${TEST_IPv6}; then + if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then echo " OK" return fi diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 3e7718b1a9ae..fd7de7eb329e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -11713,6 +11713,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 3fece06e9f64..f82dcc1f8841 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -143,10 +143,14 @@ void suspend(void) int err; struct itimerspec spec = {}; + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); + power_state_fd = open("/sys/power/state", O_RDWR); if (power_state_fd < 0) ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed (is this test running as root?)\n"); + "open(\"/sys/power/state\") failed %s)\n", + strerror(errno)); timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile new file mode 100644 index 000000000000..f7a31392eb2f --- /dev/null +++ b/tools/testing/selftests/cgroup/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wall + +all: + +TEST_GEN_PROGS = test_memcontrol + +include ../lib.mk + +$(OUTPUT)/test_memcontrol: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c new file mode 100644 index 000000000000..b69bdeb4b9fe --- /dev/null +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -0,0 +1,331 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <linux/limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "cgroup_util.h" + +static ssize_t read_text(const char *path, char *buf, size_t max_len) +{ + ssize_t len; + int fd; + + fd = open(path, O_RDONLY); + if (fd < 0) + return fd; + + len = read(fd, buf, max_len - 1); + if (len < 0) + goto out; + + buf[len] = 0; +out: + close(fd); + return len; +} + +static ssize_t write_text(const char *path, char *buf, size_t len) +{ + int fd; + + fd = open(path, O_WRONLY | O_APPEND); + if (fd < 0) + return fd; + + len = write(fd, buf, len); + if (len < 0) { + close(fd); + return len; + } + + close(fd); + + return len; +} + +char *cg_name(const char *root, const char *name) +{ + size_t len = strlen(root) + strlen(name) + 2; + char *ret = malloc(len); + + snprintf(ret, len, "%s/%s", root, name); + + return ret; +} + +char *cg_name_indexed(const char *root, const char *name, int index) +{ + size_t len = strlen(root) + strlen(name) + 10; + char *ret = malloc(len); + + snprintf(ret, len, "%s/%s_%d", root, name, index); + + return ret; +} + +int cg_read(const char *cgroup, const char *control, char *buf, size_t len) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/%s", cgroup, control); + + if (read_text(path, buf, len) >= 0) + return 0; + + return -1; +} + +int cg_read_strcmp(const char *cgroup, const char *control, + const char *expected) +{ + size_t size = strlen(expected) + 1; + char *buf; + + buf = malloc(size); + if (!buf) + return -1; + + if (cg_read(cgroup, control, buf, size)) + return -1; + + return strcmp(expected, buf); +} + +int cg_read_strstr(const char *cgroup, const char *control, const char *needle) +{ + char buf[PAGE_SIZE]; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + return strstr(buf, needle) ? 0 : -1; +} + +long cg_read_long(const char *cgroup, const char *control) +{ + char buf[128]; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + return atol(buf); +} + +long cg_read_key_long(const char *cgroup, const char *control, const char *key) +{ + char buf[PAGE_SIZE]; + char *ptr; + + if (cg_read(cgroup, control, buf, sizeof(buf))) + return -1; + + ptr = strstr(buf, key); + if (!ptr) + return -1; + + return atol(ptr + strlen(key)); +} + +int cg_write(const char *cgroup, const char *control, char *buf) +{ + char path[PATH_MAX]; + size_t len = strlen(buf); + + snprintf(path, sizeof(path), "%s/%s", cgroup, control); + + if (write_text(path, buf, len) == len) + return 0; + + return -1; +} + +int cg_find_unified_root(char *root, size_t len) +{ + char buf[10 * PAGE_SIZE]; + char *fs, *mount, *type; + const char delim[] = "\n\t "; + + if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0) + return -1; + + /* + * Example: + * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0 + */ + for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) { + mount = strtok(NULL, delim); + type = strtok(NULL, delim); + strtok(NULL, delim); + strtok(NULL, delim); + strtok(NULL, delim); + + if (strcmp(fs, "cgroup") == 0 && + strcmp(type, "cgroup2") == 0) { + strncpy(root, mount, len); + return 0; + } + } + + return -1; +} + +int cg_create(const char *cgroup) +{ + return mkdir(cgroup, 0644); +} + +static int cg_killall(const char *cgroup) +{ + char buf[PAGE_SIZE]; + char *ptr = buf; + + if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf))) + return -1; + + while (ptr < buf + sizeof(buf)) { + int pid = strtol(ptr, &ptr, 10); + + if (pid == 0) + break; + if (*ptr) + ptr++; + else + break; + if (kill(pid, SIGKILL)) + return -1; + } + + return 0; +} + +int cg_destroy(const char *cgroup) +{ + int ret; + +retry: + ret = rmdir(cgroup); + if (ret && errno == EBUSY) { + ret = cg_killall(cgroup); + if (ret) + return ret; + usleep(100); + goto retry; + } + + if (ret && errno == ENOENT) + ret = 0; + + return ret; +} + +int cg_run(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int pid, retcode; + + pid = fork(); + if (pid < 0) { + return pid; + } else if (pid == 0) { + char buf[64]; + + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cgroup, "cgroup.procs", buf)) + exit(EXIT_FAILURE); + exit(fn(cgroup, arg)); + } else { + waitpid(pid, &retcode, 0); + if (WIFEXITED(retcode)) + return WEXITSTATUS(retcode); + else + return -1; + } +} + +int cg_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg) +{ + int pid; + + pid = fork(); + if (pid == 0) { + char buf[64]; + + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cgroup, "cgroup.procs", buf)) + exit(EXIT_FAILURE); + exit(fn(cgroup, arg)); + } + + return pid; +} + +int get_temp_fd(void) +{ + return open(".", O_TMPFILE | O_RDWR | O_EXCL); +} + +int alloc_pagecache(int fd, size_t size) +{ + char buf[PAGE_SIZE]; + struct stat st; + int i; + + if (fstat(fd, &st)) + goto cleanup; + + size += st.st_size; + + if (ftruncate(fd, size)) + goto cleanup; + + for (i = 0; i < size; i += sizeof(buf)) + read(fd, buf, sizeof(buf)); + + return 0; + +cleanup: + return -1; +} + +int alloc_anon(const char *cgroup, void *arg) +{ + size_t size = (unsigned long)arg; + char *buf, *ptr; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + free(buf); + return 0; +} + +int is_swap_enabled(void) +{ + char buf[PAGE_SIZE]; + const char delim[] = "\n"; + int cnt = 0; + char *line; + + if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0) + return -1; + + for (line = strtok(buf, delim); line; line = strtok(NULL, delim)) + cnt++; + + return cnt > 1; +} diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h new file mode 100644 index 000000000000..fe82a297d4e0 --- /dev/null +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <stdlib.h> + +#define PAGE_SIZE 4096 + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define MB(x) (x << 20) + +/* + * Checks if two given values differ by less than err% of their sum. + */ +static inline int values_close(long a, long b, int err) +{ + return abs(a - b) <= (a + b) / 100 * err; +} + +extern int cg_find_unified_root(char *root, size_t len); +extern char *cg_name(const char *root, const char *name); +extern char *cg_name_indexed(const char *root, const char *name, int index); +extern int cg_create(const char *cgroup); +extern int cg_destroy(const char *cgroup); +extern int cg_read(const char *cgroup, const char *control, + char *buf, size_t len); +extern int cg_read_strcmp(const char *cgroup, const char *control, + const char *expected); +extern int cg_read_strstr(const char *cgroup, const char *control, + const char *needle); +extern long cg_read_long(const char *cgroup, const char *control); +long cg_read_key_long(const char *cgroup, const char *control, const char *key); +extern int cg_write(const char *cgroup, const char *control, char *buf); +extern int cg_run(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg); +extern int cg_run_nowait(const char *cgroup, + int (*fn)(const char *cgroup, void *arg), + void *arg); +extern int get_temp_fd(void); +extern int alloc_pagecache(int fd, size_t size); +extern int alloc_anon(const char *cgroup, void *arg); +extern int is_swap_enabled(void); diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c new file mode 100644 index 000000000000..cf0bddc9d271 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -0,0 +1,1015 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/socket.h> +#include <sys/wait.h> +#include <arpa/inet.h> +#include <netinet/in.h> +#include <netdb.h> +#include <errno.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +/* + * This test creates two nested cgroups with and without enabling + * the memory controller. + */ +static int test_memcg_subtree_control(const char *root) +{ + char *parent, *child, *parent2, *child2; + int ret = KSFT_FAIL; + char buf[PAGE_SIZE]; + + /* Create two nested cgroups with the memory controller enabled */ + parent = cg_name(root, "memcg_test_0"); + child = cg_name(root, "memcg_test_0/memcg_test_1"); + if (!parent || !child) + goto cleanup; + + if (cg_create(parent)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(child)) + goto cleanup; + + if (cg_read_strstr(child, "cgroup.controllers", "memory")) + goto cleanup; + + /* Create two nested cgroups without enabling memory controller */ + parent2 = cg_name(root, "memcg_test_1"); + child2 = cg_name(root, "memcg_test_1/memcg_test_1"); + if (!parent2 || !child2) + goto cleanup; + + if (cg_create(parent2)) + goto cleanup; + + if (cg_create(child2)) + goto cleanup; + + if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf))) + goto cleanup; + + if (!cg_read_strstr(child2, "cgroup.controllers", "memory")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(child); + cg_destroy(parent); + free(parent); + free(child); + + cg_destroy(child2); + cg_destroy(parent2); + free(parent2); + free(child2); + + return ret; +} + +static int alloc_anon_50M_check(const char *cgroup, void *arg) +{ + size_t size = MB(50); + char *buf, *ptr; + long anon, current; + int ret = -1; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + current = cg_read_long(cgroup, "memory.current"); + if (current < size) + goto cleanup; + + if (!values_close(size, current, 3)) + goto cleanup; + + anon = cg_read_key_long(cgroup, "memory.stat", "anon "); + if (anon < 0) + goto cleanup; + + if (!values_close(anon, current, 3)) + goto cleanup; + + ret = 0; +cleanup: + free(buf); + return ret; +} + +static int alloc_pagecache_50M_check(const char *cgroup, void *arg) +{ + size_t size = MB(50); + int ret = -1; + long current, file; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + return -1; + + if (alloc_pagecache(fd, size)) + goto cleanup; + + current = cg_read_long(cgroup, "memory.current"); + if (current < size) + goto cleanup; + + file = cg_read_key_long(cgroup, "memory.stat", "file "); + if (file < 0) + goto cleanup; + + if (!values_close(file, current, 10)) + goto cleanup; + + ret = 0; + +cleanup: + close(fd); + return ret; +} + +/* + * This test create a memory cgroup, allocates + * some anonymous memory and some pagecache + * and check memory.current and some memory.stat values. + */ +static int test_memcg_current(const char *root) +{ + int ret = KSFT_FAIL; + long current; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + if (cg_run(memcg, alloc_anon_50M_check, NULL)) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_50M_check, NULL)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int alloc_pagecache_50M(const char *cgroup, void *arg) +{ + int fd = (long)arg; + + return alloc_pagecache(fd, MB(50)); +} + +static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) +{ + int fd = (long)arg; + int ppid = getppid(); + + if (alloc_pagecache(fd, MB(50))) + return -1; + + while (getppid() == ppid) + sleep(1); + + return 0; +} + +/* + * First, this test creates the following hierarchy: + * A memory.min = 50M, memory.max = 200M + * A/B memory.min = 50M, memory.current = 50M + * A/B/C memory.min = 75M, memory.current = 50M + * A/B/D memory.min = 25M, memory.current = 50M + * A/B/E memory.min = 500M, memory.current = 0 + * A/B/F memory.min = 0, memory.current = 50M + * + * Usages are pagecache, but the test keeps a running + * process in every leaf cgroup. + * Then it creates A/G and creates a significant + * memory pressure in it. + * + * A/B memory.current ~= 50M + * A/B/C memory.current ~= 33M + * A/B/D memory.current ~= 17M + * A/B/E memory.current ~= 0 + * + * After that it tries to allocate more than there is + * unprotected memory in A available, and checks + * checks that memory.min protects pagecache even + * in this case. + */ +static int test_memcg_min(const char *root) +{ + int ret = KSFT_FAIL; + char *parent[3] = {NULL}; + char *children[4] = {NULL}; + long c[4]; + int i, attempts; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + parent[0] = cg_name(root, "memcg_test_0"); + if (!parent[0]) + goto cleanup; + + parent[1] = cg_name(parent[0], "memcg_test_1"); + if (!parent[1]) + goto cleanup; + + parent[2] = cg_name(parent[0], "memcg_test_2"); + if (!parent[2]) + goto cleanup; + + if (cg_create(parent[0])) + goto cleanup; + + if (cg_read_long(parent[0], "memory.min")) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_write(parent[0], "memory.max", "200M")) + goto cleanup; + + if (cg_write(parent[0], "memory.swap.max", "0")) + goto cleanup; + + if (cg_create(parent[1])) + goto cleanup; + + if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(parent[2])) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i] = cg_name_indexed(parent[1], "child_memcg", i); + if (!children[i]) + goto cleanup; + + if (cg_create(children[i])) + goto cleanup; + + if (i == 2) + continue; + + cg_run_nowait(children[i], alloc_pagecache_50M_noexit, + (void *)(long)fd); + } + + if (cg_write(parent[0], "memory.min", "50M")) + goto cleanup; + if (cg_write(parent[1], "memory.min", "50M")) + goto cleanup; + if (cg_write(children[0], "memory.min", "75M")) + goto cleanup; + if (cg_write(children[1], "memory.min", "25M")) + goto cleanup; + if (cg_write(children[2], "memory.min", "500M")) + goto cleanup; + if (cg_write(children[3], "memory.min", "0")) + goto cleanup; + + attempts = 0; + while (!values_close(cg_read_long(parent[1], "memory.current"), + MB(150), 3)) { + if (attempts++ > 5) + break; + sleep(1); + } + + if (cg_run(parent[2], alloc_anon, (void *)MB(148))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) + c[i] = cg_read_long(children[i], "memory.current"); + + if (!values_close(c[0], MB(33), 10)) + goto cleanup; + + if (!values_close(c[1], MB(17), 10)) + goto cleanup; + + if (!values_close(c[2], 0, 1)) + goto cleanup; + + if (!cg_run(parent[2], alloc_anon, (void *)MB(170))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { + if (!children[i]) + continue; + + cg_destroy(children[i]); + free(children[i]); + } + + for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { + if (!parent[i]) + continue; + + cg_destroy(parent[i]); + free(parent[i]); + } + close(fd); + return ret; +} + +/* + * First, this test creates the following hierarchy: + * A memory.low = 50M, memory.max = 200M + * A/B memory.low = 50M, memory.current = 50M + * A/B/C memory.low = 75M, memory.current = 50M + * A/B/D memory.low = 25M, memory.current = 50M + * A/B/E memory.low = 500M, memory.current = 0 + * A/B/F memory.low = 0, memory.current = 50M + * + * Usages are pagecache. + * Then it creates A/G an creates a significant + * memory pressure in it. + * + * Then it checks actual memory usages and expects that: + * A/B memory.current ~= 50M + * A/B/ memory.current ~= 33M + * A/B/D memory.current ~= 17M + * A/B/E memory.current ~= 0 + * + * After that it tries to allocate more than there is + * unprotected memory in A available, + * and checks low and oom events in memory.events. + */ +static int test_memcg_low(const char *root) +{ + int ret = KSFT_FAIL; + char *parent[3] = {NULL}; + char *children[4] = {NULL}; + long low, oom; + long c[4]; + int i; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + parent[0] = cg_name(root, "memcg_test_0"); + if (!parent[0]) + goto cleanup; + + parent[1] = cg_name(parent[0], "memcg_test_1"); + if (!parent[1]) + goto cleanup; + + parent[2] = cg_name(parent[0], "memcg_test_2"); + if (!parent[2]) + goto cleanup; + + if (cg_create(parent[0])) + goto cleanup; + + if (cg_read_long(parent[0], "memory.low")) + goto cleanup; + + if (cg_write(parent[0], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_write(parent[0], "memory.max", "200M")) + goto cleanup; + + if (cg_write(parent[0], "memory.swap.max", "0")) + goto cleanup; + + if (cg_create(parent[1])) + goto cleanup; + + if (cg_write(parent[1], "cgroup.subtree_control", "+memory")) + goto cleanup; + + if (cg_create(parent[2])) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) { + children[i] = cg_name_indexed(parent[1], "child_memcg", i); + if (!children[i]) + goto cleanup; + + if (cg_create(children[i])) + goto cleanup; + + if (i == 2) + continue; + + if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd)) + goto cleanup; + } + + if (cg_write(parent[0], "memory.low", "50M")) + goto cleanup; + if (cg_write(parent[1], "memory.low", "50M")) + goto cleanup; + if (cg_write(children[0], "memory.low", "75M")) + goto cleanup; + if (cg_write(children[1], "memory.low", "25M")) + goto cleanup; + if (cg_write(children[2], "memory.low", "500M")) + goto cleanup; + if (cg_write(children[3], "memory.low", "0")) + goto cleanup; + + if (cg_run(parent[2], alloc_anon, (void *)MB(148))) + goto cleanup; + + if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(children); i++) + c[i] = cg_read_long(children[i], "memory.current"); + + if (!values_close(c[0], MB(33), 10)) + goto cleanup; + + if (!values_close(c[1], MB(17), 10)) + goto cleanup; + + if (!values_close(c[2], 0, 1)) + goto cleanup; + + if (cg_run(parent[2], alloc_anon, (void *)MB(166))) { + fprintf(stderr, + "memory.low prevents from allocating anon memory\n"); + goto cleanup; + } + + for (i = 0; i < ARRAY_SIZE(children); i++) { + oom = cg_read_key_long(children[i], "memory.events", "oom "); + low = cg_read_key_long(children[i], "memory.events", "low "); + + if (oom) + goto cleanup; + if (i < 2 && low <= 0) + goto cleanup; + if (i >= 2 && low) + goto cleanup; + } + + ret = KSFT_PASS; + +cleanup: + for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) { + if (!children[i]) + continue; + + cg_destroy(children[i]); + free(children[i]); + } + + for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) { + if (!parent[i]) + continue; + + cg_destroy(parent[i]); + free(parent[i]); + } + close(fd); + return ret; +} + +static int alloc_pagecache_max_30M(const char *cgroup, void *arg) +{ + size_t size = MB(50); + int ret = -1; + long current; + int fd; + + fd = get_temp_fd(); + if (fd < 0) + return -1; + + if (alloc_pagecache(fd, size)) + goto cleanup; + + current = cg_read_long(cgroup, "memory.current"); + if (current <= MB(29) || current > MB(30)) + goto cleanup; + + ret = 0; + +cleanup: + close(fd); + return ret; + +} + +/* + * This test checks that memory.high limits the amount of + * memory which can be consumed by either anonymous memory + * or pagecache. + */ +static int test_memcg_high(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long high; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.high", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.high", "30M")) + goto cleanup; + + if (cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (!cg_run(memcg, alloc_pagecache_50M_check, NULL)) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) + goto cleanup; + + high = cg_read_key_long(memcg, "memory.events", "high "); + if (high <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +/* + * This test checks that memory.max limits the amount of + * memory which can be consumed by either anonymous memory + * or pagecache. + */ +static int test_memcg_max(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long current, max; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.max", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + /* Should be killed by OOM killer */ + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_run(memcg, alloc_pagecache_max_30M, NULL)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current > MB(30) || !current) + goto cleanup; + + max = cg_read_key_long(memcg, "memory.events", "max "); + if (max <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) +{ + long mem_max = (long)arg; + size_t size = MB(50); + char *buf, *ptr; + long mem_current, swap_current; + int ret = -1; + + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; + + mem_current = cg_read_long(cgroup, "memory.current"); + if (!mem_current || !values_close(mem_current, mem_max, 3)) + goto cleanup; + + swap_current = cg_read_long(cgroup, "memory.swap.current"); + if (!swap_current || + !values_close(mem_current + swap_current, size, 3)) + goto cleanup; + + ret = 0; +cleanup: + free(buf); + return ret; +} + +/* + * This test checks that memory.swap.max limits the amount of + * anonymous memory which can be swapped out. + */ +static int test_memcg_swap_max(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + long max; + + if (!is_swap_enabled()) + return KSFT_SKIP; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_read_long(memcg, "memory.swap.current")) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (cg_read_strcmp(memcg, "memory.max", "max\n")) + goto cleanup; + + if (cg_read_strcmp(memcg, "memory.swap.max", "max\n")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "30M")) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + /* Should be killed by OOM killer */ + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) + goto cleanup; + + if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30))) + goto cleanup; + + max = cg_read_key_long(memcg, "memory.events", "max "); + if (max <= 0) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +/* + * This test disables swapping and tries to allocate anonymous memory + * up to OOM. Then it checks for oom and oom_kill events in + * memory.events. + */ +static int test_memcg_oom_events(const char *root) +{ + int ret = KSFT_FAIL; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + if (cg_write(memcg, "memory.max", "30M")) + goto cleanup; + + if (cg_write(memcg, "memory.swap.max", "0")) + goto cleanup; + + if (!cg_run(memcg, alloc_anon, (void *)MB(100))) + goto cleanup; + + if (cg_read_strcmp(memcg, "cgroup.procs", "")) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom ") != 1) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +struct tcp_server_args { + unsigned short port; + int ctl[2]; +}; + +static int tcp_server(const char *cgroup, void *arg) +{ + struct tcp_server_args *srv_args = arg; + struct sockaddr_in6 saddr = { 0 }; + socklen_t slen = sizeof(saddr); + int sk, client_sk, ctl_fd, yes = 1, ret = -1; + + close(srv_args->ctl[0]); + ctl_fd = srv_args->ctl[1]; + + saddr.sin6_family = AF_INET6; + saddr.sin6_addr = in6addr_any; + saddr.sin6_port = htons(srv_args->port); + + sk = socket(AF_INET6, SOCK_STREAM, 0); + if (sk < 0) + return ret; + + if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) + goto cleanup; + + if (bind(sk, (struct sockaddr *)&saddr, slen)) { + write(ctl_fd, &errno, sizeof(errno)); + goto cleanup; + } + + if (listen(sk, 1)) + goto cleanup; + + ret = 0; + if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) { + ret = -1; + goto cleanup; + } + + client_sk = accept(sk, NULL, NULL); + if (client_sk < 0) + goto cleanup; + + ret = -1; + for (;;) { + uint8_t buf[0x100000]; + + if (write(client_sk, buf, sizeof(buf)) <= 0) { + if (errno == ECONNRESET) + ret = 0; + break; + } + } + + close(client_sk); + +cleanup: + close(sk); + return ret; +} + +static int tcp_client(const char *cgroup, unsigned short port) +{ + const char server[] = "localhost"; + struct addrinfo *ai; + char servport[6]; + int retries = 0x10; /* nice round number */ + int sk, ret; + + snprintf(servport, sizeof(servport), "%hd", port); + ret = getaddrinfo(server, servport, NULL, &ai); + if (ret) + return ret; + + sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if (sk < 0) + goto free_ainfo; + + ret = connect(sk, ai->ai_addr, ai->ai_addrlen); + if (ret < 0) + goto close_sk; + + ret = KSFT_FAIL; + while (retries--) { + uint8_t buf[0x100000]; + long current, sock; + + if (read(sk, buf, sizeof(buf)) <= 0) + goto close_sk; + + current = cg_read_long(cgroup, "memory.current"); + sock = cg_read_key_long(cgroup, "memory.stat", "sock "); + + if (current < 0 || sock < 0) + goto close_sk; + + if (current < sock) + goto close_sk; + + if (values_close(current, sock, 10)) { + ret = KSFT_PASS; + break; + } + } + +close_sk: + close(sk); +free_ainfo: + freeaddrinfo(ai); + return ret; +} + +/* + * This test checks socket memory accounting. + * The test forks a TCP server listens on a random port between 1000 + * and 61000. Once it gets a client connection, it starts writing to + * its socket. + * The TCP client interleaves reads from the socket with check whether + * memory.current and memory.stat.sock are similar. + */ +static int test_memcg_sock(const char *root) +{ + int bind_retries = 5, ret = KSFT_FAIL, pid, err; + unsigned short port; + char *memcg; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + while (bind_retries--) { + struct tcp_server_args args; + + if (pipe(args.ctl)) + goto cleanup; + + port = args.port = 1000 + rand() % 60000; + + pid = cg_run_nowait(memcg, tcp_server, &args); + if (pid < 0) + goto cleanup; + + close(args.ctl[1]); + if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err)) + goto cleanup; + close(args.ctl[0]); + + if (!err) + break; + if (err != EADDRINUSE) + goto cleanup; + + waitpid(pid, NULL, 0); + } + + if (err == EADDRINUSE) { + ret = KSFT_SKIP; + goto cleanup; + } + + if (tcp_client(memcg, port) != KSFT_PASS) + goto cleanup; + + waitpid(pid, &err, 0); + if (WEXITSTATUS(err)) + goto cleanup; + + if (cg_read_long(memcg, "memory.current") < 0) + goto cleanup; + + if (cg_read_key_long(memcg, "memory.stat", "sock ")) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + cg_destroy(memcg); + free(memcg); + + return ret; +} + +#define T(x) { x, #x } +struct memcg_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_memcg_subtree_control), + T(test_memcg_current), + T(test_memcg_min), + T(test_memcg_low), + T(test_memcg_high), + T(test_memcg_max), + T(test_memcg_oom_events), + T(test_memcg_swap_max), + T(test_memcg_sock), +}; +#undef T + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index f3a8933c1275..bab13dd025a6 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 prerequisite() { @@ -9,7 +11,7 @@ prerequisite() if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi taskset -p 01 $$ @@ -18,12 +20,12 @@ prerequisite() if [ ! -d "$SYSFS" ]; then echo $msg sysfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then echo $msg cpu hotplug is not supported >&2 - exit 0 + exit $ksft_skip fi echo "CPU online/offline summary:" @@ -32,7 +34,7 @@ prerequisite() if [[ "$online_cpus" = "$online_max" ]]; then echo "$msg: since there is only one cpu: $online_cpus" - exit 0 + exit $ksft_skip fi echo -e "\t Cpus in online state: $online_cpus" @@ -237,12 +239,12 @@ prerequisite_extra() if [ ! -d "$DEBUGFS" ]; then echo $msg debugfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then echo $msg cpu-notifier-error-inject module is not available >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh index d83922de9d89..31f8c9a76c5f 100755 --- a/tools/testing/selftests/cpufreq/main.sh +++ b/tools/testing/selftests/cpufreq/main.sh @@ -13,6 +13,9 @@ SYSFS= CPUROOT= CPUFREQROOT= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + helpme() { printf "Usage: $0 [-h] [-todg args] @@ -38,7 +41,7 @@ prerequisite() if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 2 + exit $ksft_skip fi taskset -p 01 $$ diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh new file mode 100755 index 000000000000..1893d0f59ad7 --- /dev/null +++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; } + +while getopts "h:b:p:" arg; do + case "${arg}" in + h) + usage + ;; + b) + busid=${OPTARG} + ;; + p) + tools_path=${OPTARG} + ;; + *) + usage + ;; + esac +done +shift $((OPTIND-1)) + +if [ -z "${busid}" ]; then + usage +fi + +echo "Running USB over IP Testing on $busid"; + +test_end_msg="End of USB over IP Testing on $busid" + +if [ $UID != 0 ]; then + echo "Please run usbip_test as root [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Load usbip_host module" +if ! /sbin/modprobe -q -n usbip_host; then + echo "usbip_test: module usbip_host is not found [SKIP]" + echo $test_end_msg + exit $ksft_skip +fi + +if /sbin/modprobe -q usbip_host; then + /sbin/modprobe -q -r test_bitmap + echo "usbip_test: module usbip_host is loaded [OK]" +else + echo "usbip_test: module usbip_host failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi + +echo "Load vhci_hcd module" +if /sbin/modprobe -q vhci_hcd; then + /sbin/modprobe -q -r test_bitmap + echo "usbip_test: module vhci_hcd is loaded [OK]" +else + echo "usbip_test: module vhci_hcd failed to load [FAIL]" + echo $test_end_msg + exit 1 +fi +echo "==============================================================" + +cd $tools_path; + +if [ ! -f src/usbip ]; then + echo "Please build usbip tools" + echo $test_end_msg + exit $ksft_skip +fi + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Run lsusb to see all usb devices" +lsusb -t; +echo "==============================================================" + +src/usbipd -D; + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be under usbip_host control" +lsusb -t; +echo "==============================================================" + +echo "bind devices - expect already bound messages" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "unbind devices"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "unbind devices - expect no devices bound message"; +src/usbip unbind -b $busid; +echo "==============================================================" + +echo "Get exported devices from localhost - expect to see none"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should fail with no devices" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "bind devices"; +src/usbip bind -b $busid; +echo "==============================================================" + +echo "List imported devices - expect to see exported devices"; +src/usbip list -r localhost; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - should work" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "List imported devices - expect to see imported devices"; +src/usbip port; +echo "==============================================================" + +echo "Import devices from localhost - expect already imported messages" +src/usbip attach -r localhost -b $busid; +echo "==============================================================" + +echo "Un-import devices"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "List imported devices - expect to see none"; +src/usbip port; +echo "==============================================================" + +echo "Un-import devices - expect no devices to detach messages"; +src/usbip detach -p 00; +src/usbip detach -p 01; +echo "==============================================================" + +echo "Detach invalid port tests - expect invalid port error message"; +src/usbip detach -p 100; +echo "==============================================================" + +echo "Expect to see export-able devices"; +src/usbip list -l; +echo "==============================================================" + +echo "Remove usbip_host module"; +rmmod usbip_host; + +echo "Run lsusb - bound devices should be rebound to original drivers" +lsusb -t; +echo "==============================================================" + +echo "Run bind without usbip_host - expect fail" +src/usbip bind -b $busid; +echo "==============================================================" + +echo "Run lsusb - devices that failed to bind aren't bound to any driver" +lsusb -t; +echo "==============================================================" + +echo "modprobe usbip_host - does it work?" +/sbin/modprobe usbip_host +echo "Should see -busid- is not in match_busid table... skip! dmesg" +echo "==============================================================" +dmesg | grep "is not in match_busid table" +echo "==============================================================" + +echo $test_end_msg diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index c6d5790575ae..a47029a799d2 100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh @@ -4,18 +4,21 @@ efivarfs_mount=/sys/firmware/efi/efivars test_guid=210be57c-9849-4fc7-a635-e6382d1aec27 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + check_prereqs() { local msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then echo $msg efivarfs is not mounted on $efivarfs_mount >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c index 67cd4597db2b..47cbf54d0801 100644 --- a/tools/testing/selftests/exec/execveat.c +++ b/tools/testing/selftests/exec/execveat.c @@ -20,6 +20,8 @@ #include <string.h> #include <unistd.h> +#include "../kselftest.h" + static char longpath[2 * PATH_MAX] = ""; static char *envp[] = { "IN_TEST=yes", NULL, NULL }; static char *argv[] = { "execveat", "99", NULL }; @@ -249,8 +251,8 @@ static int run_tests(void) errno = 0; execveat_(-1, NULL, NULL, NULL, 0); if (errno == ENOSYS) { - printf("[FAIL] ENOSYS calling execveat - no kernel support?\n"); - return 1; + ksft_exit_skip( + "ENOSYS calling execveat - no kernel support?\n"); } /* Change file position to confirm it doesn't affect anything */ diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 4e6d09fb166f..129880fb42d3 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,8 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_PROGS := dnotify_test devpts_pts -all: $(TEST_PROGS) -include ../lib.mk +CFLAGS += -I../../../../usr/include/ +TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS_EXTENDED := dnotify_test -clean: - rm -fr $(TEST_PROGS) +include ../lib.mk diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c index b9055e974289..b1fc9b916ace 100644 --- a/tools/testing/selftests/filesystems/devpts_pts.c +++ b/tools/testing/selftests/filesystems/devpts_pts.c @@ -8,9 +8,10 @@ #include <stdlib.h> #include <string.h> #include <unistd.h> -#include <sys/ioctl.h> +#include <asm/ioctls.h> #include <sys/mount.h> #include <sys/wait.h> +#include "../kselftest.h" static bool terminal_dup2(int duplicate, int original) { @@ -125,10 +126,12 @@ static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents) if (errno == EINVAL) { fprintf(stderr, "TIOCGPTPEER is not supported. " "Skipping test.\n"); - fret = EXIT_SUCCESS; + fret = KSFT_SKIP; + } else { + fprintf(stderr, + "Failed to perform TIOCGPTPEER ioctl\n"); + fret = EXIT_FAILURE; } - - fprintf(stderr, "Failed to perform TIOCGPTPEER ioctl\n"); goto do_cleanup; } @@ -279,9 +282,9 @@ int main(int argc, char *argv[]) int ret; if (!isatty(STDIN_FILENO)) { - fprintf(stderr, "Standard input file desciptor is not attached " + fprintf(stderr, "Standard input file descriptor is not attached " "to a terminal. Skipping test\n"); - exit(EXIT_FAILURE); + exit(KSFT_SKIP); } ret = unshare(CLONE_NEWNS); diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index 826f38d5dd19..261c81f08606 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -4,6 +4,7 @@ all: TEST_PROGS := fw_run_tests.sh +TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh include ../lib.mk diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh index 8e2e34a2ca69..70d18be46af5 100755 --- a/tools/testing/selftests/firmware/fw_fallback.sh +++ b/tools/testing/selftests/firmware/fw_fallback.sh @@ -74,7 +74,7 @@ load_fw_custom() { if [ ! -e "$DIR"/trigger_custom_fallback ]; then echo "$0: custom fallback trigger not present, ignoring test" >&2 - return 1 + exit $ksft_skip fi local name="$1" @@ -107,7 +107,7 @@ load_fw_custom_cancel() { if [ ! -e "$DIR"/trigger_custom_fallback ]; then echo "$0: canceling custom fallback trigger not present, ignoring test" >&2 - return 1 + exit $ksft_skip fi local name="$1" diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 6452d2129cd9..a4320c4b44dc 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -30,6 +30,7 @@ fi if [ ! -e "$DIR"/trigger_async_request ]; then echo "$0: empty filename: async trigger not present, ignoring test" >&2 + exit $ksft_skip else if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then echo "$0: empty filename should not succeed (async)" >&2 @@ -69,6 +70,7 @@ fi # Try the asynchronous version too if [ ! -e "$DIR"/trigger_async_request ]; then echo "$0: firmware loading: async trigger not present, ignoring test" >&2 + exit $ksft_skip else if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then echo "$0: could not trigger async request" >&2 @@ -89,7 +91,7 @@ test_config_present() { if [ ! -f $DIR/reset ]; then echo "Configuration triggers not present, ignoring test" - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh index 9ea31b57d71a..6c5f1b2ffb74 100755 --- a/tools/testing/selftests/firmware/fw_lib.sh +++ b/tools/testing/selftests/firmware/fw_lib.sh @@ -9,11 +9,14 @@ DIR=/sys/devices/virtual/misc/test_firmware PROC_CONFIG="/proc/config.gz" TEST_DIR=$(dirname $0) +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + print_reqs_exit() { echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip } test_modprobe() @@ -88,7 +91,7 @@ verify_reqs() if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "usermode helper disabled so ignoring test" - exit 0 + exit $ksft_skip fi fi } @@ -154,11 +157,13 @@ test_finish() if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout fi - if [ "$OLD_FWPATH" = "" ]; then - OLD_FWPATH=" " - fi if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then - echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + if [ "$OLD_FWPATH" = "" ]; then + # A zero-length write won't work; write a null byte + printf '\000' >/sys/module/firmware_class/parameters/path + else + echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path + fi fi if [ -f $FW ]; then rm -f "$FW" diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh index 06d638e9dc62..cffdd4eb0a57 100755 --- a/tools/testing/selftests/firmware/fw_run_tests.sh +++ b/tools/testing/selftests/firmware/fw_run_tests.sh @@ -66,5 +66,5 @@ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then run_test_config_0003 else echo "Running basic kernel configuration, working with your config" - run_test + run_tests fi diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index df3dd7fe5f9b..2a4f16fc9819 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -59,6 +59,13 @@ disable_events() { echo 0 > events/enable } +clear_synthetic_events() { # reset all current synthetic events + grep -v ^# synthetic_events | + while read line; do + echo "!$line" >> synthetic_events + done +} + initialize_ftrace() { # Reset ftrace to initial-state # As the initial state, ftrace will be set to nop tracer, # no events, no triggers, no filters, no function filters, diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc new file mode 100644 index 000000000000..2aabab363cfb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc @@ -0,0 +1,39 @@ +#!/bin/sh +# description: event trigger - test extended error support + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset + +echo "Test extended error support" +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null +if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then + fail "Failed to generate extended error in histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc new file mode 100644 index 000000000000..7fd5b4a8f060 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test field variable support + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test field variable support" + +echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events +echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create inter-event histogram" +fi + +if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to create histogram with field variable" +fi + +echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger + +if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then + fail "Failed to remove histogram with field variable" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc new file mode 100644 index 000000000000..c93dbe38b5df --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -0,0 +1,58 @@ +#!/bin/sh +# description: event trigger - test inter-event combined histogram trigger + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +reset_tracer +do_reset +clear_synthetic_events + +echo "Test create synthetic event" + +echo 'waking_latency u64 lat pid_t pid' > synthetic_events +if [ ! -d events/synthetic/waking_latency ]; then + fail "Failed to create waking_latency synthetic event" +fi + +echo "Test combined histogram" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger +echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger + +echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events +echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger + +echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger + +ping localhost -c 3 +if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then + fail "Failed to create combined histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc new file mode 100644 index 000000000000..c193dce611a2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# description: event trigger - test multiple actions on hist trigger + + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test multiple actions on hist trigger" +echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events +TRIGGER1=events/sched/sched_wakeup/trigger +TRIGGER2=events/sched/sched_switch/trigger + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1 +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2 +echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc new file mode 100644 index 000000000000..e84e7d048566 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then + fail "Failed to create onmatch action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc new file mode 100644 index 000000000000..7907d8aacde3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc @@ -0,0 +1,50 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmatch-onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test create histogram for synthetic event" +echo "Test histogram variables,simple expression support and onmatch-onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger +ping localhost -c 5 +if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then + fail "Failed to create onmatch-onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc new file mode 100644 index 000000000000..38b7ed6242b2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc @@ -0,0 +1,48 @@ +#!/bin/sh +# description: event trigger - test inter-event histogram trigger onmax action + +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +echo "Test onmax action" + +echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger +echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger +ping localhost -c 3 +if ! grep -q "max:" events/sched/sched_switch/hist; then + fail "Failed to create onmax action inter-event histogram" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc new file mode 100644 index 000000000000..cef11377dcbd --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc @@ -0,0 +1,54 @@ +#!/bin/sh +# description: event trigger - test synthetic event create remove +do_reset() { + reset_trigger + echo > set_event + clear_trace +} + +fail() { #msg + do_reset + echo $1 + exit_fail +} + +if [ ! -f set_event ]; then + echo "event tracing is not supported" + exit_unsupported +fi + +if [ ! -f synthetic_events ]; then + echo "synthetic event is not supported" + exit_unsupported +fi + +clear_synthetic_events +reset_tracer +do_reset + +echo "Test create synthetic event" + +echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ ! -d events/synthetic/wakeup_latency ]; then + fail "Failed to create wakeup_latency synthetic event" +fi + +reset_trigger + +echo "Test create synthetic event with an error" +echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null +if [ -d events/synthetic/wakeup_latency ]; then + fail "Created wakeup_latency synthetic event with an invalid format" +fi + +reset_trigger + +echo "Test remove synthetic event" +echo '!wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events +if [ -d events/synthetic/wakeup_latency ]; then + fail "Failed to delete wakeup_latency synthetic event" +fi + +do_reset + +exit 0 diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile index a63e8453984d..12631f0076a1 100644 --- a/tools/testing/selftests/futex/Makefile +++ b/tools/testing/selftests/futex/Makefile @@ -17,10 +17,6 @@ all: fi \ done -override define RUN_TESTS - @cd $(OUTPUT); ./run.sh -endef - override define INSTALL_RULE mkdir -p $(INSTALL_PATH) install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) @@ -32,10 +28,6 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS - echo "./run.sh" -endef - override define CLEAN @for DIR in $(SUBDIRS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh index 183fb932edbd..7f35b9880485 100755 --- a/tools/testing/selftests/gpio/gpio-mockup.sh +++ b/tools/testing/selftests/gpio/gpio-mockup.sh @@ -2,10 +2,11 @@ # SPDX-License-Identifier: GPL-2.0 #exit status -#1: run as non-root user +#1: Internal error #2: sysfs/debugfs not mount #3: insert module fail when gpio-mockup is a module. -#4: other reason. +#4: Skip test including run as non-root user. +#5: other reason. SYSFS= GPIO_SYSFS= @@ -15,6 +16,9 @@ GPIO_DEBUGFS= dev_type= module= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + usage() { echo "Usage:" @@ -34,7 +38,7 @@ prerequisite() msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 1 + exit $ksft_skip fi SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` if [ ! -d "$SYSFS" ]; then @@ -73,7 +77,7 @@ remove_module() die() { remove_module - exit 4 + exit 5 } test_chips() diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile index 5a3f7d37e912..7340fd6a9a9f 100644 --- a/tools/testing/selftests/intel_pstate/Makefile +++ b/tools/testing/selftests/intel_pstate/Makefile @@ -2,7 +2,10 @@ CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE LDLIBS := $(LDLIBS) -lm -ifeq (,$(filter $(ARCH),x86)) +uname_M := $(shell uname -m 2>/dev/null || echo not) +ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) + +ifeq (x86,$(ARCH)) TEST_GEN_FILES := msr aperf endif diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c index d21edea9c560..f6cd03a87493 100644 --- a/tools/testing/selftests/intel_pstate/aperf.c +++ b/tools/testing/selftests/intel_pstate/aperf.c @@ -9,6 +9,8 @@ #include <sys/timeb.h> #include <sched.h> #include <errno.h> +#include <string.h> +#include "../kselftest.h" void usage(char *name) { printf ("Usage: %s cpunum\n", name); @@ -41,8 +43,8 @@ int main(int argc, char **argv) { fd = open(msr_file_name, O_RDONLY); if (fd == -1) { - perror("Failed to open"); - return 1; + printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno)); + return KSFT_SKIP; } CPU_ZERO(&cpuset); diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index c670359becc6..e7008f614ad7 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -30,9 +30,18 @@ EVALUATE_ONLY=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then echo "$0 # Skipped: Test can only run on x86 architectures." - exit 0 + exit $ksft_skip +fi + +msg="skip all tests:" +if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then + echo $msg please run this as root >&2 + exit $ksft_skip fi max_cpus=$(($(nproc)-1)) @@ -48,11 +57,12 @@ function run_test () { echo "sleeping for 5 seconds" sleep 5 - num_freqs=$(cat /proc/cpuinfo | grep MHz | sort -u | wc -l) - if [ $num_freqs -le 2 ]; then - cat /proc/cpuinfo | grep MHz | sort -u | tail -1 > /tmp/result.$1 + grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs + num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ') + if [ $num_freqs -ge 2 ]; then + tail -n 1 /tmp/result.freqs > /tmp/result.$1 else - cat /proc/cpuinfo | grep MHz | sort -u > /tmp/result.$1 + cp /tmp/result.freqs /tmp/result.$1 fi ./msr 0 >> /tmp/result.$1 @@ -82,32 +92,37 @@ _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') max_freq=$(($_max_freq / 1000)) -for freq in `seq $max_freq -100 $min_freq` +[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq` do echo "Setting maximum frequency to $freq" cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null - [ $EVALUATE_ONLY -eq 0 ] && run_test $freq + run_test $freq done -echo "==============================================================================" +[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null +echo "========================================================================" echo "The marketing frequency of the cpu is $mkt_freq MHz" echo "The maximum frequency of the cpu is $max_freq MHz" echo "The minimum frequency of the cpu is $min_freq MHz" -cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null - # make a pretty table -echo "Target Actual Difference MSR(0x199) max_perf_pct" +echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab for freq in `seq $max_freq -100 $min_freq` do result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ') msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ') max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' ) - if [ $result_freq -eq $freq ]; then - echo " $freq $result_freq 0 $msr $(($max_perf_pct*3300))" - else - echo " $freq $result_freq $(($result_freq-$freq)) $msr $(($max_perf_pct*$max_freq))" - fi + cat >> /tmp/result.tab << EOF +$freq +$result_freq +$((result_freq - freq)) +$msr +$((max_perf_pct * max_freq)) +EOF done + +# print the table +pr -aTt -5 < /tmp/result.tab + exit 0 diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index ee9382bdfadc..dac927e82336 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -196,10 +196,9 @@ int main(int argc, char **argv) int msg, pid, err; struct msgque_data msgque; - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - return ksft_exit_fail(); - } + if (getuid() != 0) + return ksft_exit_skip( + "Please run the test as root - Exiting.\n"); msgque.key = ftok(argv[0], 822155650); if (msgque.key == -1) { diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh index 7956ea3be667..0a76314b4414 100755 --- a/tools/testing/selftests/kmod/kmod.sh +++ b/tools/testing/selftests/kmod/kmod.sh @@ -62,13 +62,16 @@ ALL_TESTS="$ALL_TESTS 0007:5:1" ALL_TESTS="$ALL_TESTS 0008:150:1" ALL_TESTS="$ALL_TESTS 0009:150:1" +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + test_modprobe() { if [ ! -d $DIR ]; then echo "$0: $DIR not present" >&2 echo "You must have the following enabled in your kernel:" >&2 cat $TEST_DIR/config >&2 - exit 1 + exit $ksft_skip fi } @@ -105,12 +108,12 @@ test_reqs() { if ! which modprobe 2> /dev/null > /dev/null; then echo "$0: You need modprobe installed" >&2 - exit 1 + exit $ksft_skip fi if ! which kmod 2> /dev/null > /dev/null; then echo "$0: You need kmod installed" >&2 - exit 1 + exit $ksft_skip fi # kmod 19 has a bad bug where it returns 0 when modprobe @@ -124,13 +127,13 @@ test_reqs() echo "$0: You need at least kmod 20" >&2 echo "kmod <= 19 is buggy, for details see:" >&2 echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2 - exit 1 + exit $ksft_skip fi uid=$(id -u) if [ $uid -ne 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi } diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 1a52b03962a3..15e6b75fc3a5 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -20,7 +20,7 @@ #define KSFT_XFAIL 2 #define KSFT_XPASS 3 /* Treat skip as pass */ -#define KSFT_SKIP KSFT_PASS +#define KSFT_SKIP 4 /* counters */ struct ksft_count { @@ -57,7 +57,8 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { - printf("TAP version 13\n"); + if (!(getenv("KSFT_TAP_LEVEL"))) + printf("TAP version 13\n"); } static inline void ksft_print_cnts(void) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index e81bd28bdd89..6ae3730c4ee3 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -107,6 +107,27 @@ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) /** + * XFAIL(statement, fmt, ...) + * + * @statement: statement to run after reporting XFAIL + * @fmt: format string + * @...: optional arguments + * + * This forces a "pass" after reporting a failure with an XFAIL prefix, + * and runs "statement", which is usually "return" or "goto skip". + */ +#define XFAIL(statement, fmt, ...) do { \ + if (TH_LOG_ENABLED) { \ + fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \ + ##__VA_ARGS__); \ + } \ + /* TODO: find a way to pass xfail to test runner process. */ \ + _metadata->passed = 1; \ + _metadata->trigger = 0; \ + statement; \ +} while (0) + +/** * TEST(test_name) - Defines the test function and creates the registration * stub * @@ -198,7 +219,7 @@ /** * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture. - * *_metadata* is included so that ASSERT_* work as a convenience + * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name * @@ -221,6 +242,7 @@ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) /** * FIXTURE_TEARDOWN(fixture_name) + * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly. * * @fixture_name: fixture name * @@ -253,6 +275,8 @@ * Defines a test that depends on a fixture (e.g., is part of a test case). * Very similar to TEST() except that *self* is the setup instance of fixture's * datatype exposed for use by the implementation. + * + * Warning: use of ASSERT_* here will skip TEARDOWN. */ /* TODO(wad) register fixtures on dedicated test lists. */ #define TEST_F(fixture_name, test_name) \ diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore new file mode 100644 index 000000000000..63fc1ab9248f --- /dev/null +++ b/tools/testing/selftests/kvm/.gitignore @@ -0,0 +1,3 @@ +set_sregs_test +sync_regs_test +vmx_tsc_adjust_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile new file mode 100644 index 000000000000..d9d00319b07c --- /dev/null +++ b/tools/testing/selftests/kvm/Makefile @@ -0,0 +1,40 @@ +all: + +top_srcdir = ../../../../ +UNAME_M := $(shell uname -m) + +LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c +LIBKVM_x86_64 = lib/x86.c lib/vmx.c + +TEST_GEN_PROGS_x86_64 = set_sregs_test +TEST_GEN_PROGS_x86_64 += sync_regs_test +TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test + +TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) +LIBKVM += $(LIBKVM_$(UNAME_M)) + +INSTALL_HDR_PATH = $(top_srcdir)/usr +LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ +CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I.. + +# After inclusion, $(OUTPUT) is defined and +# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ +include ../lib.mk + +STATIC_LIBS := $(OUTPUT)/libkvm.a +LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) +EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) +$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) + $(AR) crs $@ $^ + +$(LINUX_HDR_PATH): + make -C $(top_srcdir) headers_install + +all: $(STATIC_LIBS) $(LINUX_HDR_PATH) +$(TEST_GEN_PROGS): $(STATIC_LIBS) +$(TEST_GEN_PROGS) $(LIBKVM_OBJ): | $(LINUX_HDR_PATH) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h new file mode 100644 index 000000000000..637b7017b6ee --- /dev/null +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -0,0 +1,145 @@ +/* + * tools/testing/selftests/kvm/include/kvm_util.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ +#ifndef SELFTEST_KVM_UTIL_H +#define SELFTEST_KVM_UTIL_H 1 + +#include "test_util.h" + +#include "asm/kvm.h" +#include "linux/kvm.h" +#include <sys/ioctl.h> + +#include "sparsebit.h" + +/* + * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be + * created. Only applies to VMs using EPT. + */ +#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul + + +/* Callers of kvm_util only have an incomplete/opaque description of the + * structure kvm_util is using to maintain the state of a VM. + */ +struct kvm_vm; + +typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */ +typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */ + +/* Minimum allocated guest virtual and physical addresses */ +#define KVM_UTIL_MIN_VADDR 0x2000 + +#define DEFAULT_GUEST_PHY_PAGES 512 +#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000 +#define DEFAULT_STACK_PGS 5 + +enum vm_guest_mode { + VM_MODE_FLAT48PG, +}; + +enum vm_mem_backing_src_type { + VM_MEM_SRC_ANONYMOUS, + VM_MEM_SRC_ANONYMOUS_THP, + VM_MEM_SRC_ANONYMOUS_HUGETLB, +}; + +int kvm_check_cap(long cap); + +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); +void kvm_vm_free(struct kvm_vm *vmp); + +int kvm_memcmp_hva_gva(void *hva, + struct kvm_vm *vm, const vm_vaddr_t gva, size_t len); + +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, + uint32_t data_memslot, uint32_t pgd_memslot); + +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void vcpu_dump(FILE *stream, struct kvm_vm *vm, + uint32_t vcpuid, uint8_t indent); + +void vm_create_irqchip(struct kvm_vm *vm); + +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags); + +void vcpu_ioctl(struct kvm_vm *vm, + uint32_t vcpuid, unsigned long ioctl, void *arg); +void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg); +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags); +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid); +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + uint32_t data_memslot, uint32_t pgd_memslot); +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa); +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva); +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva); +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); + +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state); +void vcpu_regs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_regs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs); +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...); +void vcpu_sregs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +int _vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs); +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events); + +const char *exit_reason_str(unsigned int exit_reason); + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot); +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot); +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, + vm_paddr_t paddr_min, uint32_t memslot); + +struct kvm_cpuid2 *kvm_get_supported_cpuid(void); +void vcpu_set_cpuid( + struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid); + +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index); + +static inline struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_entry(uint32_t function) +{ + return kvm_get_supported_cpuid_index(function, 0); +} + +struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code); +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code); + +typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr, + vm_paddr_t vmxon_paddr, + vm_vaddr_t vmcs_vaddr, + vm_paddr_t vmcs_paddr); + +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end); + +struct kvm_dirty_log * +allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region); + +int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); + +#endif /* SELFTEST_KVM_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h new file mode 100644 index 000000000000..54cfeb6568d3 --- /dev/null +++ b/tools/testing/selftests/kvm/include/sparsebit.h @@ -0,0 +1,75 @@ +/* + * tools/testing/selftests/kvm/include/sparsebit.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * Header file that describes API to the sparsebit library. + * This library provides a memory efficient means of storing + * the settings of bits indexed via a uint64_t. Memory usage + * is reasonable, significantly less than (2^64 / 8) bytes, as + * long as bits that are mostly set or mostly cleared are close + * to each other. This library is efficient in memory usage + * even in the case where most bits are set. + */ + +#ifndef _TEST_SPARSEBIT_H_ +#define _TEST_SPARSEBIT_H_ + +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +struct sparsebit; +typedef uint64_t sparsebit_idx_t; +typedef uint64_t sparsebit_num_t; + +struct sparsebit *sparsebit_alloc(void); +void sparsebit_free(struct sparsebit **sbitp); +void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src); + +bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_set_num(struct sparsebit *sbit, + sparsebit_idx_t idx, sparsebit_num_t num); +bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx); +bool sparsebit_is_clear_num(struct sparsebit *sbit, + sparsebit_idx_t idx, sparsebit_num_t num); +sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit); +bool sparsebit_any_set(struct sparsebit *sbit); +bool sparsebit_any_clear(struct sparsebit *sbit); +bool sparsebit_all_set(struct sparsebit *sbit); +bool sparsebit_all_clear(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit); +sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev); +sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit, + sparsebit_idx_t start, sparsebit_num_t num); +sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit, + sparsebit_idx_t start, sparsebit_num_t num); + +void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx); +void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start, + sparsebit_num_t num); +void sparsebit_set_all(struct sparsebit *sbitp); + +void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx); +void sparsebit_clear_num(struct sparsebit *sbitp, + sparsebit_idx_t start, sparsebit_num_t num); +void sparsebit_clear_all(struct sparsebit *sbitp); + +void sparsebit_dump(FILE *stream, struct sparsebit *sbit, + unsigned int indent); +void sparsebit_validate_internal(struct sparsebit *sbit); + +#ifdef __cplusplus +} +#endif + +#endif /* _TEST_SPARSEBIT_H_ */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h new file mode 100644 index 000000000000..ac53730b30aa --- /dev/null +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -0,0 +1,46 @@ +/* + * tools/testing/selftests/kvm/include/test_util.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef TEST_UTIL_H +#define TEST_UTIL_H 1 + +#include <stdlib.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> +#include "kselftest.h" + +ssize_t test_write(int fd, const void *buf, size_t count); +ssize_t test_read(int fd, void *buf, size_t count); +int test_seq_read(const char *path, char **bufp, size_t *sizep); + +void test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, const char *fmt, ...); + +#define ARRAY_SIZE(array) (sizeof(array) / sizeof((array)[0])) + +#define TEST_ASSERT(e, fmt, ...) \ + test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) + +#define ASSERT_EQ(a, b) do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + TEST_ASSERT(__a == __b, \ + "ASSERT_EQ(%s, %s) failed.\n" \ + "\t%s is %#lx\n" \ + "\t%s is %#lx", \ + #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ +} while (0) + +#endif /* TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h new file mode 100644 index 000000000000..6ed8499807fd --- /dev/null +++ b/tools/testing/selftests/kvm/include/vmx.h @@ -0,0 +1,494 @@ +/* + * tools/testing/selftests/kvm/include/vmx.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_VMX_H +#define SELFTEST_KVM_VMX_H + +#include <stdint.h> +#include "x86.h" + +#define CPUID_VMX_BIT 5 + +#define CPUID_VMX (1 << 5) + +/* + * Definitions of Primary Processor-Based VM-Execution Controls. + */ +#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define CPU_BASED_HLT_EXITING 0x00000080 +#define CPU_BASED_INVLPG_EXITING 0x00000200 +#define CPU_BASED_MWAIT_EXITING 0x00000400 +#define CPU_BASED_RDPMC_EXITING 0x00000800 +#define CPU_BASED_RDTSC_EXITING 0x00001000 +#define CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define CPU_BASED_TPR_SHADOW 0x00200000 +#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define CPU_BASED_MOV_DR_EXITING 0x00800000 +#define CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define CPU_BASED_MONITOR_TRAP 0x08000000 +#define CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define CPU_BASED_MONITOR_EXITING 0x20000000 +#define CPU_BASED_PAUSE_EXITING 0x40000000 +#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172 + +/* + * Definitions of Secondary Processor-Based VM-Execution Controls. + */ +#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define SECONDARY_EXEC_DESC 0x00000004 +#define SECONDARY_EXEC_RDTSCP 0x00000008 +#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define SECONDARY_EPT_VE 0x00040000 +#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000 +#define SECONDARY_EXEC_TSC_SCALING 0x02000000 + +#define PIN_BASED_EXT_INTR_MASK 0x00000001 +#define PIN_BASED_NMI_EXITING 0x00000008 +#define PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define PIN_BASED_POSTED_INTR 0x00000080 + +#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016 + +#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 + +#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff + +#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VM_ENTRY_IA32E_MODE 0x00000200 +#define VM_ENTRY_SMM 0x00000400 +#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VM_ENTRY_LOAD_IA32_EFER 0x00008000 + +#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff + +#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f +#define VMX_MISC_SAVE_EFER_LMA 0x00000020 + +#define EXIT_REASON_FAILED_VMENTRY 0x80000000 +#define EXIT_REASON_EXCEPTION_NMI 0 +#define EXIT_REASON_EXTERNAL_INTERRUPT 1 +#define EXIT_REASON_TRIPLE_FAULT 2 +#define EXIT_REASON_PENDING_INTERRUPT 7 +#define EXIT_REASON_NMI_WINDOW 8 +#define EXIT_REASON_TASK_SWITCH 9 +#define EXIT_REASON_CPUID 10 +#define EXIT_REASON_HLT 12 +#define EXIT_REASON_INVD 13 +#define EXIT_REASON_INVLPG 14 +#define EXIT_REASON_RDPMC 15 +#define EXIT_REASON_RDTSC 16 +#define EXIT_REASON_VMCALL 18 +#define EXIT_REASON_VMCLEAR 19 +#define EXIT_REASON_VMLAUNCH 20 +#define EXIT_REASON_VMPTRLD 21 +#define EXIT_REASON_VMPTRST 22 +#define EXIT_REASON_VMREAD 23 +#define EXIT_REASON_VMRESUME 24 +#define EXIT_REASON_VMWRITE 25 +#define EXIT_REASON_VMOFF 26 +#define EXIT_REASON_VMON 27 +#define EXIT_REASON_CR_ACCESS 28 +#define EXIT_REASON_DR_ACCESS 29 +#define EXIT_REASON_IO_INSTRUCTION 30 +#define EXIT_REASON_MSR_READ 31 +#define EXIT_REASON_MSR_WRITE 32 +#define EXIT_REASON_INVALID_STATE 33 +#define EXIT_REASON_MWAIT_INSTRUCTION 36 +#define EXIT_REASON_MONITOR_INSTRUCTION 39 +#define EXIT_REASON_PAUSE_INSTRUCTION 40 +#define EXIT_REASON_MCE_DURING_VMENTRY 41 +#define EXIT_REASON_TPR_BELOW_THRESHOLD 43 +#define EXIT_REASON_APIC_ACCESS 44 +#define EXIT_REASON_EOI_INDUCED 45 +#define EXIT_REASON_EPT_VIOLATION 48 +#define EXIT_REASON_EPT_MISCONFIG 49 +#define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 +#define EXIT_REASON_PREEMPTION_TIMER 52 +#define EXIT_REASON_INVVPID 53 +#define EXIT_REASON_WBINVD 54 +#define EXIT_REASON_XSETBV 55 +#define EXIT_REASON_APIC_WRITE 56 +#define EXIT_REASON_INVPCID 58 +#define EXIT_REASON_PML_FULL 62 +#define EXIT_REASON_XSAVES 63 +#define EXIT_REASON_XRSTORS 64 +#define LAST_EXIT_REASON 64 + +enum vmcs_field { + VIRTUAL_PROCESSOR_ID = 0x00000000, + POSTED_INTR_NV = 0x00000002, + GUEST_ES_SELECTOR = 0x00000800, + GUEST_CS_SELECTOR = 0x00000802, + GUEST_SS_SELECTOR = 0x00000804, + GUEST_DS_SELECTOR = 0x00000806, + GUEST_FS_SELECTOR = 0x00000808, + GUEST_GS_SELECTOR = 0x0000080a, + GUEST_LDTR_SELECTOR = 0x0000080c, + GUEST_TR_SELECTOR = 0x0000080e, + GUEST_INTR_STATUS = 0x00000810, + GUEST_PML_INDEX = 0x00000812, + HOST_ES_SELECTOR = 0x00000c00, + HOST_CS_SELECTOR = 0x00000c02, + HOST_SS_SELECTOR = 0x00000c04, + HOST_DS_SELECTOR = 0x00000c06, + HOST_FS_SELECTOR = 0x00000c08, + HOST_GS_SELECTOR = 0x00000c0a, + HOST_TR_SELECTOR = 0x00000c0c, + IO_BITMAP_A = 0x00002000, + IO_BITMAP_A_HIGH = 0x00002001, + IO_BITMAP_B = 0x00002002, + IO_BITMAP_B_HIGH = 0x00002003, + MSR_BITMAP = 0x00002004, + MSR_BITMAP_HIGH = 0x00002005, + VM_EXIT_MSR_STORE_ADDR = 0x00002006, + VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007, + VM_EXIT_MSR_LOAD_ADDR = 0x00002008, + VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009, + VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a, + VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b, + PML_ADDRESS = 0x0000200e, + PML_ADDRESS_HIGH = 0x0000200f, + TSC_OFFSET = 0x00002010, + TSC_OFFSET_HIGH = 0x00002011, + VIRTUAL_APIC_PAGE_ADDR = 0x00002012, + VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013, + APIC_ACCESS_ADDR = 0x00002014, + APIC_ACCESS_ADDR_HIGH = 0x00002015, + POSTED_INTR_DESC_ADDR = 0x00002016, + POSTED_INTR_DESC_ADDR_HIGH = 0x00002017, + EPT_POINTER = 0x0000201a, + EPT_POINTER_HIGH = 0x0000201b, + EOI_EXIT_BITMAP0 = 0x0000201c, + EOI_EXIT_BITMAP0_HIGH = 0x0000201d, + EOI_EXIT_BITMAP1 = 0x0000201e, + EOI_EXIT_BITMAP1_HIGH = 0x0000201f, + EOI_EXIT_BITMAP2 = 0x00002020, + EOI_EXIT_BITMAP2_HIGH = 0x00002021, + EOI_EXIT_BITMAP3 = 0x00002022, + EOI_EXIT_BITMAP3_HIGH = 0x00002023, + VMREAD_BITMAP = 0x00002026, + VMREAD_BITMAP_HIGH = 0x00002027, + VMWRITE_BITMAP = 0x00002028, + VMWRITE_BITMAP_HIGH = 0x00002029, + XSS_EXIT_BITMAP = 0x0000202C, + XSS_EXIT_BITMAP_HIGH = 0x0000202D, + TSC_MULTIPLIER = 0x00002032, + TSC_MULTIPLIER_HIGH = 0x00002033, + GUEST_PHYSICAL_ADDRESS = 0x00002400, + GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401, + VMCS_LINK_POINTER = 0x00002800, + VMCS_LINK_POINTER_HIGH = 0x00002801, + GUEST_IA32_DEBUGCTL = 0x00002802, + GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, + GUEST_IA32_PAT = 0x00002804, + GUEST_IA32_PAT_HIGH = 0x00002805, + GUEST_IA32_EFER = 0x00002806, + GUEST_IA32_EFER_HIGH = 0x00002807, + GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808, + GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809, + GUEST_PDPTR0 = 0x0000280a, + GUEST_PDPTR0_HIGH = 0x0000280b, + GUEST_PDPTR1 = 0x0000280c, + GUEST_PDPTR1_HIGH = 0x0000280d, + GUEST_PDPTR2 = 0x0000280e, + GUEST_PDPTR2_HIGH = 0x0000280f, + GUEST_PDPTR3 = 0x00002810, + GUEST_PDPTR3_HIGH = 0x00002811, + GUEST_BNDCFGS = 0x00002812, + GUEST_BNDCFGS_HIGH = 0x00002813, + HOST_IA32_PAT = 0x00002c00, + HOST_IA32_PAT_HIGH = 0x00002c01, + HOST_IA32_EFER = 0x00002c02, + HOST_IA32_EFER_HIGH = 0x00002c03, + HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04, + HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05, + PIN_BASED_VM_EXEC_CONTROL = 0x00004000, + CPU_BASED_VM_EXEC_CONTROL = 0x00004002, + EXCEPTION_BITMAP = 0x00004004, + PAGE_FAULT_ERROR_CODE_MASK = 0x00004006, + PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008, + CR3_TARGET_COUNT = 0x0000400a, + VM_EXIT_CONTROLS = 0x0000400c, + VM_EXIT_MSR_STORE_COUNT = 0x0000400e, + VM_EXIT_MSR_LOAD_COUNT = 0x00004010, + VM_ENTRY_CONTROLS = 0x00004012, + VM_ENTRY_MSR_LOAD_COUNT = 0x00004014, + VM_ENTRY_INTR_INFO_FIELD = 0x00004016, + VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018, + VM_ENTRY_INSTRUCTION_LEN = 0x0000401a, + TPR_THRESHOLD = 0x0000401c, + SECONDARY_VM_EXEC_CONTROL = 0x0000401e, + PLE_GAP = 0x00004020, + PLE_WINDOW = 0x00004022, + VM_INSTRUCTION_ERROR = 0x00004400, + VM_EXIT_REASON = 0x00004402, + VM_EXIT_INTR_INFO = 0x00004404, + VM_EXIT_INTR_ERROR_CODE = 0x00004406, + IDT_VECTORING_INFO_FIELD = 0x00004408, + IDT_VECTORING_ERROR_CODE = 0x0000440a, + VM_EXIT_INSTRUCTION_LEN = 0x0000440c, + VMX_INSTRUCTION_INFO = 0x0000440e, + GUEST_ES_LIMIT = 0x00004800, + GUEST_CS_LIMIT = 0x00004802, + GUEST_SS_LIMIT = 0x00004804, + GUEST_DS_LIMIT = 0x00004806, + GUEST_FS_LIMIT = 0x00004808, + GUEST_GS_LIMIT = 0x0000480a, + GUEST_LDTR_LIMIT = 0x0000480c, + GUEST_TR_LIMIT = 0x0000480e, + GUEST_GDTR_LIMIT = 0x00004810, + GUEST_IDTR_LIMIT = 0x00004812, + GUEST_ES_AR_BYTES = 0x00004814, + GUEST_CS_AR_BYTES = 0x00004816, + GUEST_SS_AR_BYTES = 0x00004818, + GUEST_DS_AR_BYTES = 0x0000481a, + GUEST_FS_AR_BYTES = 0x0000481c, + GUEST_GS_AR_BYTES = 0x0000481e, + GUEST_LDTR_AR_BYTES = 0x00004820, + GUEST_TR_AR_BYTES = 0x00004822, + GUEST_INTERRUPTIBILITY_INFO = 0x00004824, + GUEST_ACTIVITY_STATE = 0X00004826, + GUEST_SYSENTER_CS = 0x0000482A, + VMX_PREEMPTION_TIMER_VALUE = 0x0000482E, + HOST_IA32_SYSENTER_CS = 0x00004c00, + CR0_GUEST_HOST_MASK = 0x00006000, + CR4_GUEST_HOST_MASK = 0x00006002, + CR0_READ_SHADOW = 0x00006004, + CR4_READ_SHADOW = 0x00006006, + CR3_TARGET_VALUE0 = 0x00006008, + CR3_TARGET_VALUE1 = 0x0000600a, + CR3_TARGET_VALUE2 = 0x0000600c, + CR3_TARGET_VALUE3 = 0x0000600e, + EXIT_QUALIFICATION = 0x00006400, + GUEST_LINEAR_ADDRESS = 0x0000640a, + GUEST_CR0 = 0x00006800, + GUEST_CR3 = 0x00006802, + GUEST_CR4 = 0x00006804, + GUEST_ES_BASE = 0x00006806, + GUEST_CS_BASE = 0x00006808, + GUEST_SS_BASE = 0x0000680a, + GUEST_DS_BASE = 0x0000680c, + GUEST_FS_BASE = 0x0000680e, + GUEST_GS_BASE = 0x00006810, + GUEST_LDTR_BASE = 0x00006812, + GUEST_TR_BASE = 0x00006814, + GUEST_GDTR_BASE = 0x00006816, + GUEST_IDTR_BASE = 0x00006818, + GUEST_DR7 = 0x0000681a, + GUEST_RSP = 0x0000681c, + GUEST_RIP = 0x0000681e, + GUEST_RFLAGS = 0x00006820, + GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822, + GUEST_SYSENTER_ESP = 0x00006824, + GUEST_SYSENTER_EIP = 0x00006826, + HOST_CR0 = 0x00006c00, + HOST_CR3 = 0x00006c02, + HOST_CR4 = 0x00006c04, + HOST_FS_BASE = 0x00006c06, + HOST_GS_BASE = 0x00006c08, + HOST_TR_BASE = 0x00006c0a, + HOST_GDTR_BASE = 0x00006c0c, + HOST_IDTR_BASE = 0x00006c0e, + HOST_IA32_SYSENTER_ESP = 0x00006c10, + HOST_IA32_SYSENTER_EIP = 0x00006c12, + HOST_RSP = 0x00006c14, + HOST_RIP = 0x00006c16, +}; + +struct vmx_msr_entry { + uint32_t index; + uint32_t reserved; + uint64_t value; +} __attribute__ ((aligned(16))); + +static inline int vmxon(uint64_t phys) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(phys) + : "cc", "memory"); + + return ret; +} + +static inline void vmxoff(void) +{ + __asm__ __volatile__("vmxoff"); +} + +static inline int vmclear(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +static inline int vmptrld(uint64_t vmcs_pa) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]" + : [ret]"=rm"(ret) + : [pa]"m"(vmcs_pa) + : "cc", "memory"); + + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmlaunch. + */ +static inline int vmlaunch(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmlaunch;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +/* + * No guest state (e.g. GPRs) is established by this vmresume. + */ +static inline int vmresume(void) +{ + int ret; + + __asm__ __volatile__("push %%rbp;" + "push %%rcx;" + "push %%rdx;" + "push %%rsi;" + "push %%rdi;" + "push $0;" + "vmwrite %%rsp, %[host_rsp];" + "lea 1f(%%rip), %%rax;" + "vmwrite %%rax, %[host_rip];" + "vmresume;" + "incq (%%rsp);" + "1: pop %%rax;" + "pop %%rdi;" + "pop %%rsi;" + "pop %%rdx;" + "pop %%rcx;" + "pop %%rbp;" + : [ret]"=&a"(ret) + : [host_rsp]"r"((uint64_t)HOST_RSP), + [host_rip]"r"((uint64_t)HOST_RIP) + : "memory", "cc", "rbx", "r8", "r9", "r10", + "r11", "r12", "r13", "r14", "r15"); + return ret; +} + +static inline int vmread(uint64_t encoding, uint64_t *value) +{ + uint64_t tmp; + uint8_t ret; + + __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]" + : [value]"=rm"(tmp), [ret]"=rm"(ret) + : [encoding]"r"(encoding) + : "cc", "memory"); + + *value = tmp; + return ret; +} + +/* + * A wrapper around vmread that ignores errors and returns zero if the + * vmread instruction fails. + */ +static inline uint64_t vmreadz(uint64_t encoding) +{ + uint64_t value = 0; + vmread(encoding, &value); + return value; +} + +static inline int vmwrite(uint64_t encoding, uint64_t value) +{ + uint8_t ret; + + __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]" + : [ret]"=rm"(ret) + : [value]"rm"(value), [encoding]"r"(encoding) + : "cc", "memory"); + + return ret; +} + +static inline uint32_t vmcs_revision(void) +{ + return rdmsr(MSR_IA32_VMX_BASIC); +} + +void prepare_for_vmx_operation(void); +void prepare_vmcs(void *guest_rip, void *guest_rsp); +struct kvm_vm *vm_create_default_vmx(uint32_t vcpuid, + vmx_guest_code_t guest_code); + +#endif /* !SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h new file mode 100644 index 000000000000..4a5b2c4c1a0f --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86.h @@ -0,0 +1,1043 @@ +/* + * tools/testing/selftests/kvm/include/x86.h + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + */ + +#ifndef SELFTEST_KVM_X86_H +#define SELFTEST_KVM_X86_H + +#include <assert.h> +#include <stdint.h> + +#define X86_EFLAGS_FIXED (1u << 1) + +#define X86_CR4_VME (1ul << 0) +#define X86_CR4_PVI (1ul << 1) +#define X86_CR4_TSD (1ul << 2) +#define X86_CR4_DE (1ul << 3) +#define X86_CR4_PSE (1ul << 4) +#define X86_CR4_PAE (1ul << 5) +#define X86_CR4_MCE (1ul << 6) +#define X86_CR4_PGE (1ul << 7) +#define X86_CR4_PCE (1ul << 8) +#define X86_CR4_OSFXSR (1ul << 9) +#define X86_CR4_OSXMMEXCPT (1ul << 10) +#define X86_CR4_UMIP (1ul << 11) +#define X86_CR4_VMXE (1ul << 13) +#define X86_CR4_SMXE (1ul << 14) +#define X86_CR4_FSGSBASE (1ul << 16) +#define X86_CR4_PCIDE (1ul << 17) +#define X86_CR4_OSXSAVE (1ul << 18) +#define X86_CR4_SMEP (1ul << 20) +#define X86_CR4_SMAP (1ul << 21) +#define X86_CR4_PKE (1ul << 22) + +/* The enum values match the intruction encoding of each register */ +enum x86_register { + RAX = 0, + RCX, + RDX, + RBX, + RSP, + RBP, + RSI, + RDI, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, +}; + +struct desc64 { + uint16_t limit0; + uint16_t base0; + unsigned base1:8, type:5, dpl:2, p:1; + unsigned limit1:4, zero0:3, g:1, base2:8; + uint32_t base3; + uint32_t zero1; +} __attribute__((packed)); + +struct desc_ptr { + uint16_t size; + uint64_t address; +} __attribute__((packed)); + +static inline uint64_t get_desc64_base(const struct desc64 *desc) +{ + return ((uint64_t)desc->base3 << 32) | + (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24)); +} + +static inline uint64_t rdtsc(void) +{ + uint32_t eax, edx; + + /* + * The lfence is to wait (on Intel CPUs) until all previous + * instructions have been executed. + */ + __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdtscp(uint32_t *aux) +{ + uint32_t eax, edx; + + __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux)); + return ((uint64_t)edx) << 32 | eax; +} + +static inline uint64_t rdmsr(uint32_t msr) +{ + uint32_t a, d; + + __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory"); + + return a | ((uint64_t) d << 32); +} + +static inline void wrmsr(uint32_t msr, uint64_t value) +{ + uint32_t a = value; + uint32_t d = value >> 32; + + __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory"); +} + + +static inline uint16_t inw(uint16_t port) +{ + uint16_t tmp; + + __asm__ __volatile__("in %%dx, %%ax" + : /* output */ "=a" (tmp) + : /* input */ "d" (port)); + + return tmp; +} + +static inline uint16_t get_es(void) +{ + uint16_t es; + + __asm__ __volatile__("mov %%es, %[es]" + : /* output */ [es]"=rm"(es)); + return es; +} + +static inline uint16_t get_cs(void) +{ + uint16_t cs; + + __asm__ __volatile__("mov %%cs, %[cs]" + : /* output */ [cs]"=rm"(cs)); + return cs; +} + +static inline uint16_t get_ss(void) +{ + uint16_t ss; + + __asm__ __volatile__("mov %%ss, %[ss]" + : /* output */ [ss]"=rm"(ss)); + return ss; +} + +static inline uint16_t get_ds(void) +{ + uint16_t ds; + + __asm__ __volatile__("mov %%ds, %[ds]" + : /* output */ [ds]"=rm"(ds)); + return ds; +} + +static inline uint16_t get_fs(void) +{ + uint16_t fs; + + __asm__ __volatile__("mov %%fs, %[fs]" + : /* output */ [fs]"=rm"(fs)); + return fs; +} + +static inline uint16_t get_gs(void) +{ + uint16_t gs; + + __asm__ __volatile__("mov %%gs, %[gs]" + : /* output */ [gs]"=rm"(gs)); + return gs; +} + +static inline uint16_t get_tr(void) +{ + uint16_t tr; + + __asm__ __volatile__("str %[tr]" + : /* output */ [tr]"=rm"(tr)); + return tr; +} + +static inline uint64_t get_cr0(void) +{ + uint64_t cr0; + + __asm__ __volatile__("mov %%cr0, %[cr0]" + : /* output */ [cr0]"=r"(cr0)); + return cr0; +} + +static inline uint64_t get_cr3(void) +{ + uint64_t cr3; + + __asm__ __volatile__("mov %%cr3, %[cr3]" + : /* output */ [cr3]"=r"(cr3)); + return cr3; +} + +static inline uint64_t get_cr4(void) +{ + uint64_t cr4; + + __asm__ __volatile__("mov %%cr4, %[cr4]" + : /* output */ [cr4]"=r"(cr4)); + return cr4; +} + +static inline void set_cr4(uint64_t val) +{ + __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory"); +} + +static inline uint64_t get_gdt_base(void) +{ + struct desc_ptr gdt; + __asm__ __volatile__("sgdt %[gdt]" + : /* output */ [gdt]"=m"(gdt)); + return gdt.address; +} + +static inline uint64_t get_idt_base(void) +{ + struct desc_ptr idt; + __asm__ __volatile__("sidt %[idt]" + : /* output */ [idt]"=m"(idt)); + return idt.address; +} + +#define SET_XMM(__var, __xmm) \ + asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) + +static inline void set_xmm(int n, unsigned long val) +{ + switch (n) { + case 0: + SET_XMM(val, xmm0); + break; + case 1: + SET_XMM(val, xmm1); + break; + case 2: + SET_XMM(val, xmm2); + break; + case 3: + SET_XMM(val, xmm3); + break; + case 4: + SET_XMM(val, xmm4); + break; + case 5: + SET_XMM(val, xmm5); + break; + case 6: + SET_XMM(val, xmm6); + break; + case 7: + SET_XMM(val, xmm7); + break; + } +} + +typedef unsigned long v1di __attribute__ ((vector_size (8))); +static inline unsigned long get_xmm(int n) +{ + assert(n >= 0 && n <= 7); + + register v1di xmm0 __asm__("%xmm0"); + register v1di xmm1 __asm__("%xmm1"); + register v1di xmm2 __asm__("%xmm2"); + register v1di xmm3 __asm__("%xmm3"); + register v1di xmm4 __asm__("%xmm4"); + register v1di xmm5 __asm__("%xmm5"); + register v1di xmm6 __asm__("%xmm6"); + register v1di xmm7 __asm__("%xmm7"); + switch (n) { + case 0: + return (unsigned long)xmm0; + case 1: + return (unsigned long)xmm1; + case 2: + return (unsigned long)xmm2; + case 3: + return (unsigned long)xmm3; + case 4: + return (unsigned long)xmm4; + case 5: + return (unsigned long)xmm5; + case 6: + return (unsigned long)xmm6; + case 7: + return (unsigned long)xmm7; + } + return 0; +} + +/* + * Basic CPU control in CR0 + */ +#define X86_CR0_PE (1UL<<0) /* Protection Enable */ +#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */ +#define X86_CR0_EM (1UL<<2) /* Emulation */ +#define X86_CR0_TS (1UL<<3) /* Task Switched */ +#define X86_CR0_ET (1UL<<4) /* Extension Type */ +#define X86_CR0_NE (1UL<<5) /* Numeric Error */ +#define X86_CR0_WP (1UL<<16) /* Write Protect */ +#define X86_CR0_AM (1UL<<18) /* Alignment Mask */ +#define X86_CR0_NW (1UL<<29) /* Not Write-through */ +#define X86_CR0_CD (1UL<<30) /* Cache Disable */ +#define X86_CR0_PG (1UL<<31) /* Paging */ + +/* + * CPU model specific register (MSR) numbers. + */ + +/* x86-64 specific MSRs */ +#define MSR_EFER 0xc0000080 /* extended feature register */ +#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ +#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ +#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ +#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ +#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ +#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ +#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ +#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */ + +/* EFER bits: */ +#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */ +#define EFER_LME (1<<8) /* Long mode enable */ +#define EFER_LMA (1<<10) /* Long mode active (read-only) */ +#define EFER_NX (1<<11) /* No execute enable */ +#define EFER_SVME (1<<12) /* Enable virtualization */ +#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */ +#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */ + +/* Intel MSRs. Some also available on other CPUs */ + +#define MSR_PPIN_CTL 0x0000004e +#define MSR_PPIN 0x0000004f + +#define MSR_IA32_PERFCTR0 0x000000c1 +#define MSR_IA32_PERFCTR1 0x000000c2 +#define MSR_FSB_FREQ 0x000000cd +#define MSR_PLATFORM_INFO 0x000000ce +#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31 +#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT) + +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 +#define NHM_C3_AUTO_DEMOTE (1UL << 25) +#define NHM_C1_AUTO_DEMOTE (1UL << 26) +#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + +#define MSR_MTRRcap 0x000000fe +#define MSR_IA32_BBL_CR_CTL 0x00000119 +#define MSR_IA32_BBL_CR_CTL3 0x0000011e + +#define MSR_IA32_SYSENTER_CS 0x00000174 +#define MSR_IA32_SYSENTER_ESP 0x00000175 +#define MSR_IA32_SYSENTER_EIP 0x00000176 + +#define MSR_IA32_MCG_CAP 0x00000179 +#define MSR_IA32_MCG_STATUS 0x0000017a +#define MSR_IA32_MCG_CTL 0x0000017b +#define MSR_IA32_MCG_EXT_CTL 0x000004d0 + +#define MSR_OFFCORE_RSP_0 0x000001a6 +#define MSR_OFFCORE_RSP_1 0x000001a7 +#define MSR_TURBO_RATIO_LIMIT 0x000001ad +#define MSR_TURBO_RATIO_LIMIT1 0x000001ae +#define MSR_TURBO_RATIO_LIMIT2 0x000001af + +#define MSR_LBR_SELECT 0x000001c8 +#define MSR_LBR_TOS 0x000001c9 +#define MSR_LBR_NHM_FROM 0x00000680 +#define MSR_LBR_NHM_TO 0x000006c0 +#define MSR_LBR_CORE_FROM 0x00000040 +#define MSR_LBR_CORE_TO 0x00000060 + +#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */ +#define LBR_INFO_MISPRED BIT_ULL(63) +#define LBR_INFO_IN_TX BIT_ULL(62) +#define LBR_INFO_ABORT BIT_ULL(61) +#define LBR_INFO_CYCLES 0xffff + +#define MSR_IA32_PEBS_ENABLE 0x000003f1 +#define MSR_IA32_DS_AREA 0x00000600 +#define MSR_IA32_PERF_CAPABILITIES 0x00000345 +#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6 + +#define MSR_IA32_RTIT_CTL 0x00000570 +#define MSR_IA32_RTIT_STATUS 0x00000571 +#define MSR_IA32_RTIT_ADDR0_A 0x00000580 +#define MSR_IA32_RTIT_ADDR0_B 0x00000581 +#define MSR_IA32_RTIT_ADDR1_A 0x00000582 +#define MSR_IA32_RTIT_ADDR1_B 0x00000583 +#define MSR_IA32_RTIT_ADDR2_A 0x00000584 +#define MSR_IA32_RTIT_ADDR2_B 0x00000585 +#define MSR_IA32_RTIT_ADDR3_A 0x00000586 +#define MSR_IA32_RTIT_ADDR3_B 0x00000587 +#define MSR_IA32_RTIT_CR3_MATCH 0x00000572 +#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560 +#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561 + +#define MSR_MTRRfix64K_00000 0x00000250 +#define MSR_MTRRfix16K_80000 0x00000258 +#define MSR_MTRRfix16K_A0000 0x00000259 +#define MSR_MTRRfix4K_C0000 0x00000268 +#define MSR_MTRRfix4K_C8000 0x00000269 +#define MSR_MTRRfix4K_D0000 0x0000026a +#define MSR_MTRRfix4K_D8000 0x0000026b +#define MSR_MTRRfix4K_E0000 0x0000026c +#define MSR_MTRRfix4K_E8000 0x0000026d +#define MSR_MTRRfix4K_F0000 0x0000026e +#define MSR_MTRRfix4K_F8000 0x0000026f +#define MSR_MTRRdefType 0x000002ff + +#define MSR_IA32_CR_PAT 0x00000277 + +#define MSR_IA32_DEBUGCTLMSR 0x000001d9 +#define MSR_IA32_LASTBRANCHFROMIP 0x000001db +#define MSR_IA32_LASTBRANCHTOIP 0x000001dc +#define MSR_IA32_LASTINTFROMIP 0x000001dd +#define MSR_IA32_LASTINTTOIP 0x000001de + +/* DEBUGCTLMSR bits (others vary by model): */ +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF_SHIFT 1 +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) +#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14 +#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT) + +#define MSR_PEBS_FRONTEND 0x000003f7 + +#define MSR_IA32_POWER_CTL 0x000001fc + +#define MSR_IA32_MC0_CTL 0x00000400 +#define MSR_IA32_MC0_STATUS 0x00000401 +#define MSR_IA32_MC0_ADDR 0x00000402 +#define MSR_IA32_MC0_MISC 0x00000403 + +/* C-state Residency Counters */ +#define MSR_PKG_C3_RESIDENCY 0x000003f8 +#define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa +#define MSR_PKG_C7_RESIDENCY 0x000003fa +#define MSR_CORE_C3_RESIDENCY 0x000003fc +#define MSR_CORE_C6_RESIDENCY 0x000003fd +#define MSR_CORE_C7_RESIDENCY 0x000003fe +#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff +#define MSR_PKG_C2_RESIDENCY 0x0000060d +#define MSR_PKG_C8_RESIDENCY 0x00000630 +#define MSR_PKG_C9_RESIDENCY 0x00000631 +#define MSR_PKG_C10_RESIDENCY 0x00000632 + +/* Interrupt Response Limit */ +#define MSR_PKGC3_IRTL 0x0000060a +#define MSR_PKGC6_IRTL 0x0000060b +#define MSR_PKGC7_IRTL 0x0000060c +#define MSR_PKGC8_IRTL 0x00000633 +#define MSR_PKGC9_IRTL 0x00000634 +#define MSR_PKGC10_IRTL 0x00000635 + +/* Run Time Average Power Limiting (RAPL) Interface */ + +#define MSR_RAPL_POWER_UNIT 0x00000606 + +#define MSR_PKG_POWER_LIMIT 0x00000610 +#define MSR_PKG_ENERGY_STATUS 0x00000611 +#define MSR_PKG_PERF_STATUS 0x00000613 +#define MSR_PKG_POWER_INFO 0x00000614 + +#define MSR_DRAM_POWER_LIMIT 0x00000618 +#define MSR_DRAM_ENERGY_STATUS 0x00000619 +#define MSR_DRAM_PERF_STATUS 0x0000061b +#define MSR_DRAM_POWER_INFO 0x0000061c + +#define MSR_PP0_POWER_LIMIT 0x00000638 +#define MSR_PP0_ENERGY_STATUS 0x00000639 +#define MSR_PP0_POLICY 0x0000063a +#define MSR_PP0_PERF_STATUS 0x0000063b + +#define MSR_PP1_POWER_LIMIT 0x00000640 +#define MSR_PP1_ENERGY_STATUS 0x00000641 +#define MSR_PP1_POLICY 0x00000642 + +/* Config TDP MSRs */ +#define MSR_CONFIG_TDP_NOMINAL 0x00000648 +#define MSR_CONFIG_TDP_LEVEL_1 0x00000649 +#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A +#define MSR_CONFIG_TDP_CONTROL 0x0000064B +#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C + +#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D + +#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 +#define MSR_PKG_ANY_CORE_C0_RES 0x00000659 +#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A +#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + +#define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 + +#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 +#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 + +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + +#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 +#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 +#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 + +/* Hardware P state interface */ +#define MSR_PPERF 0x0000064e +#define MSR_PERF_LIMIT_REASONS 0x0000064f +#define MSR_PM_ENABLE 0x00000770 +#define MSR_HWP_CAPABILITIES 0x00000771 +#define MSR_HWP_REQUEST_PKG 0x00000772 +#define MSR_HWP_INTERRUPT 0x00000773 +#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_STATUS 0x00000777 + +/* CPUID.6.EAX */ +#define HWP_BASE_BIT (1<<7) +#define HWP_NOTIFICATIONS_BIT (1<<8) +#define HWP_ACTIVITY_WINDOW_BIT (1<<9) +#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10) +#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11) + +/* IA32_HWP_CAPABILITIES */ +#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff) +#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff) +#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff) +#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) + +/* IA32_HWP_REQUEST */ +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_EPP_PERFORMANCE 0x00 +#define HWP_EPP_BALANCE_PERFORMANCE 0x80 +#define HWP_EPP_BALANCE_POWERSAVE 0xC0 +#define HWP_EPP_POWERSAVE 0xFF +#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) + +/* IA32_HWP_STATUS */ +#define HWP_GUARANTEED_CHANGE(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4) + +/* IA32_HWP_INTERRUPT */ +#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1) +#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2) + +#define MSR_AMD64_MC0_MASK 0xc0010044 + +#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) +#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x)) +#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x)) +#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x)) + +#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x)) + +/* These are consecutive and not in the normal 4er MCE bank block */ +#define MSR_IA32_MC0_CTL2 0x00000280 +#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x)) + +#define MSR_P6_PERFCTR0 0x000000c1 +#define MSR_P6_PERFCTR1 0x000000c2 +#define MSR_P6_EVNTSEL0 0x00000186 +#define MSR_P6_EVNTSEL1 0x00000187 + +#define MSR_KNC_PERFCTR0 0x00000020 +#define MSR_KNC_PERFCTR1 0x00000021 +#define MSR_KNC_EVNTSEL0 0x00000028 +#define MSR_KNC_EVNTSEL1 0x00000029 + +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + +/* AMD64 MSRs. Not complete. See the architecture manual for a more + complete list. */ + +#define MSR_AMD64_PATCH_LEVEL 0x0000008b +#define MSR_AMD64_TSC_RATIO 0xc0000104 +#define MSR_AMD64_NB_CFG 0xc001001f +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 +#define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_LS_CFG 0xc0011020 +#define MSR_AMD64_DC_CFG 0xc0011022 +#define MSR_AMD64_BU_CFG2 0xc001102a +#define MSR_AMD64_IBSFETCHCTL 0xc0011030 +#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 +#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) +#define MSR_AMD64_IBSOPCTL 0xc0011033 +#define MSR_AMD64_IBSOPRIP 0xc0011034 +#define MSR_AMD64_IBSOPDATA 0xc0011035 +#define MSR_AMD64_IBSOPDATA2 0xc0011036 +#define MSR_AMD64_IBSOPDATA3 0xc0011037 +#define MSR_AMD64_IBSDCLINAD 0xc0011038 +#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSOP_REG_COUNT 7 +#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) +#define MSR_AMD64_IBSCTL 0xc001103a +#define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBSOPDATA4 0xc001103d +#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV 0xc0010131 +#define MSR_AMD64_SEV_ENABLED_BIT 0 +#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) + +/* Fam 17h MSRs */ +#define MSR_F17H_IRPERF 0xc00000e9 + +/* Fam 16h MSRs */ +#define MSR_F16H_L2I_PERF_CTL 0xc0010230 +#define MSR_F16H_L2I_PERF_CTR 0xc0010231 +#define MSR_F16H_DR1_ADDR_MASK 0xc0011019 +#define MSR_F16H_DR2_ADDR_MASK 0xc001101a +#define MSR_F16H_DR3_ADDR_MASK 0xc001101b +#define MSR_F16H_DR0_ADDR_MASK 0xc0011027 + +/* Fam 15h MSRs */ +#define MSR_F15H_PERF_CTL 0xc0010200 +#define MSR_F15H_PERF_CTR 0xc0010201 +#define MSR_F15H_NB_PERF_CTL 0xc0010240 +#define MSR_F15H_NB_PERF_CTR 0xc0010241 +#define MSR_F15H_PTSC 0xc0010280 +#define MSR_F15H_IC_CFG 0xc0011021 + +/* Fam 10h MSRs */ +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#define FAM10H_MMIO_CONF_ENABLE (1<<0) +#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf +#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 +#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL +#define FAM10H_MMIO_CONF_BASE_SHIFT 20 +#define MSR_FAM10H_NODE_ID 0xc001100c +#define MSR_F10H_DECFG 0xc0011029 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1 +#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT) + +/* K8 MSRs */ +#define MSR_K8_TOP_MEM1 0xc001001a +#define MSR_K8_TOP_MEM2 0xc001001d +#define MSR_K8_SYSCFG 0xc0010010 +#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23 +#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT) +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +/* C1E active bits in int pending message */ +#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 +#define MSR_K8_TSEG_ADDR 0xc0010112 +#define MSR_K8_TSEG_MASK 0xc0010113 +#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ +#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ +#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ + +/* K7 MSRs */ +#define MSR_K7_EVNTSEL0 0xc0010000 +#define MSR_K7_PERFCTR0 0xc0010004 +#define MSR_K7_EVNTSEL1 0xc0010001 +#define MSR_K7_PERFCTR1 0xc0010005 +#define MSR_K7_EVNTSEL2 0xc0010002 +#define MSR_K7_PERFCTR2 0xc0010006 +#define MSR_K7_EVNTSEL3 0xc0010003 +#define MSR_K7_PERFCTR3 0xc0010007 +#define MSR_K7_CLK_CTL 0xc001001b +#define MSR_K7_HWCR 0xc0010015 +#define MSR_K7_HWCR_SMMLOCK_BIT 0 +#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT) +#define MSR_K7_FID_VID_CTL 0xc0010041 +#define MSR_K7_FID_VID_STATUS 0xc0010042 + +/* K6 MSRs */ +#define MSR_K6_WHCR 0xc0000082 +#define MSR_K6_UWCCR 0xc0000085 +#define MSR_K6_EPMR 0xc0000086 +#define MSR_K6_PSOR 0xc0000087 +#define MSR_K6_PFIR 0xc0000088 + +/* Centaur-Hauls/IDT defined MSRs. */ +#define MSR_IDT_FCR1 0x00000107 +#define MSR_IDT_FCR2 0x00000108 +#define MSR_IDT_FCR3 0x00000109 +#define MSR_IDT_FCR4 0x0000010a + +#define MSR_IDT_MCR0 0x00000110 +#define MSR_IDT_MCR1 0x00000111 +#define MSR_IDT_MCR2 0x00000112 +#define MSR_IDT_MCR3 0x00000113 +#define MSR_IDT_MCR4 0x00000114 +#define MSR_IDT_MCR5 0x00000115 +#define MSR_IDT_MCR6 0x00000116 +#define MSR_IDT_MCR7 0x00000117 +#define MSR_IDT_MCR_CTRL 0x00000120 + +/* VIA Cyrix defined MSRs*/ +#define MSR_VIA_FCR 0x00001107 +#define MSR_VIA_LONGHAUL 0x0000110a +#define MSR_VIA_RNG 0x0000110b +#define MSR_VIA_BCR2 0x00001147 + +/* Transmeta defined MSRs */ +#define MSR_TMTA_LONGRUN_CTRL 0x80868010 +#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 +#define MSR_TMTA_LRTI_READOUT 0x80868018 +#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a + +/* Intel defined MSRs. */ +#define MSR_IA32_P5_MC_ADDR 0x00000000 +#define MSR_IA32_P5_MC_TYPE 0x00000001 +#define MSR_IA32_TSC 0x00000010 +#define MSR_IA32_PLATFORM_ID 0x00000017 +#define MSR_IA32_EBL_CR_POWERON 0x0000002a +#define MSR_EBC_FREQUENCY_ID 0x0000002c +#define MSR_SMI_COUNT 0x00000034 +#define MSR_IA32_FEATURE_CONTROL 0x0000003a +#define MSR_IA32_TSC_ADJUST 0x0000003b +#define MSR_IA32_BNDCFGS 0x00000d90 + +#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc + +#define MSR_IA32_XSS 0x00000da0 + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) +#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_LMCE (1<<20) + +#define MSR_IA32_APICBASE 0x0000001b +#define MSR_IA32_APICBASE_BSP (1<<8) +#define MSR_IA32_APICBASE_ENABLE (1<<11) +#define MSR_IA32_APICBASE_BASE (0xfffff<<12) + +#define MSR_IA32_TSCDEADLINE 0x000006e0 + +#define MSR_IA32_UCODE_WRITE 0x00000079 +#define MSR_IA32_UCODE_REV 0x0000008b + +#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b +#define MSR_IA32_SMBASE 0x0000009e + +#define MSR_IA32_PERF_STATUS 0x00000198 +#define MSR_IA32_PERF_CTL 0x00000199 +#define INTEL_PERF_CTL_MASK 0xffff +#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD_PERF_STATUS 0xc0010063 +#define MSR_AMD_PERF_CTL 0xc0010062 + +#define MSR_IA32_MPERF 0x000000e7 +#define MSR_IA32_APERF 0x000000e8 + +#define MSR_IA32_THERM_CONTROL 0x0000019a +#define MSR_IA32_THERM_INTERRUPT 0x0000019b + +#define THERM_INT_HIGH_ENABLE (1 << 0) +#define THERM_INT_LOW_ENABLE (1 << 1) +#define THERM_INT_PLN_ENABLE (1 << 24) + +#define MSR_IA32_THERM_STATUS 0x0000019c + +#define THERM_STATUS_PROCHOT (1 << 0) +#define THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_THERM2_CTL 0x0000019d + +#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16) + +#define MSR_IA32_MISC_ENABLE 0x000001a0 + +#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 + +#define MSR_MISC_FEATURE_CONTROL 0x000001a4 +#define MSR_MISC_PWR_MGMT 0x000001aa + +#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 +#define ENERGY_PERF_BIAS_PERFORMANCE 0 +#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4 +#define ENERGY_PERF_BIAS_NORMAL 6 +#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8 +#define ENERGY_PERF_BIAS_POWERSAVE 15 + +#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1 + +#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) +#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) + +#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 + +#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) +#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) +#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) + +/* Thermal Thresholds Support */ +#define THERM_INT_THRESHOLD0_ENABLE (1 << 15) +#define THERM_SHIFT_THRESHOLD0 8 +#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0) +#define THERM_INT_THRESHOLD1_ENABLE (1 << 23) +#define THERM_SHIFT_THRESHOLD1 16 +#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1) +#define THERM_STATUS_THRESHOLD0 (1 << 6) +#define THERM_LOG_THRESHOLD0 (1 << 7) +#define THERM_STATUS_THRESHOLD1 (1 << 8) +#define THERM_LOG_THRESHOLD1 (1 << 9) + +/* MISC_ENABLE bits: architectural */ +#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 +#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) +#define MSR_IA32_MISC_ENABLE_TCC_BIT 1 +#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) +#define MSR_IA32_MISC_ENABLE_EMON_BIT 7 +#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 +#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 +#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 +#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) +#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 +#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 +#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 +#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) + +/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ +#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 +#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) +#define MSR_IA32_MISC_ENABLE_TM1_BIT 3 +#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 +#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 +#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 +#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 +#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 +#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) +#define MSR_IA32_MISC_ENABLE_TM2_BIT 13 +#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 +#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 +#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 +#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 +#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 +#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 +#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) + +/* MISC_FEATURES_ENABLES non-architectural features */ +#define MSR_MISC_FEATURES_ENABLES 0x00000140 + +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0 +#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT) +#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1 + +#define MSR_IA32_TSC_DEADLINE 0x000006E0 + +/* P4/Xeon+ specific */ +#define MSR_IA32_MCG_EAX 0x00000180 +#define MSR_IA32_MCG_EBX 0x00000181 +#define MSR_IA32_MCG_ECX 0x00000182 +#define MSR_IA32_MCG_EDX 0x00000183 +#define MSR_IA32_MCG_ESI 0x00000184 +#define MSR_IA32_MCG_EDI 0x00000185 +#define MSR_IA32_MCG_EBP 0x00000186 +#define MSR_IA32_MCG_ESP 0x00000187 +#define MSR_IA32_MCG_EFLAGS 0x00000188 +#define MSR_IA32_MCG_EIP 0x00000189 +#define MSR_IA32_MCG_RESERVED 0x0000018a + +/* Pentium IV performance counter MSRs */ +#define MSR_P4_BPU_PERFCTR0 0x00000300 +#define MSR_P4_BPU_PERFCTR1 0x00000301 +#define MSR_P4_BPU_PERFCTR2 0x00000302 +#define MSR_P4_BPU_PERFCTR3 0x00000303 +#define MSR_P4_MS_PERFCTR0 0x00000304 +#define MSR_P4_MS_PERFCTR1 0x00000305 +#define MSR_P4_MS_PERFCTR2 0x00000306 +#define MSR_P4_MS_PERFCTR3 0x00000307 +#define MSR_P4_FLAME_PERFCTR0 0x00000308 +#define MSR_P4_FLAME_PERFCTR1 0x00000309 +#define MSR_P4_FLAME_PERFCTR2 0x0000030a +#define MSR_P4_FLAME_PERFCTR3 0x0000030b +#define MSR_P4_IQ_PERFCTR0 0x0000030c +#define MSR_P4_IQ_PERFCTR1 0x0000030d +#define MSR_P4_IQ_PERFCTR2 0x0000030e +#define MSR_P4_IQ_PERFCTR3 0x0000030f +#define MSR_P4_IQ_PERFCTR4 0x00000310 +#define MSR_P4_IQ_PERFCTR5 0x00000311 +#define MSR_P4_BPU_CCCR0 0x00000360 +#define MSR_P4_BPU_CCCR1 0x00000361 +#define MSR_P4_BPU_CCCR2 0x00000362 +#define MSR_P4_BPU_CCCR3 0x00000363 +#define MSR_P4_MS_CCCR0 0x00000364 +#define MSR_P4_MS_CCCR1 0x00000365 +#define MSR_P4_MS_CCCR2 0x00000366 +#define MSR_P4_MS_CCCR3 0x00000367 +#define MSR_P4_FLAME_CCCR0 0x00000368 +#define MSR_P4_FLAME_CCCR1 0x00000369 +#define MSR_P4_FLAME_CCCR2 0x0000036a +#define MSR_P4_FLAME_CCCR3 0x0000036b +#define MSR_P4_IQ_CCCR0 0x0000036c +#define MSR_P4_IQ_CCCR1 0x0000036d +#define MSR_P4_IQ_CCCR2 0x0000036e +#define MSR_P4_IQ_CCCR3 0x0000036f +#define MSR_P4_IQ_CCCR4 0x00000370 +#define MSR_P4_IQ_CCCR5 0x00000371 +#define MSR_P4_ALF_ESCR0 0x000003ca +#define MSR_P4_ALF_ESCR1 0x000003cb +#define MSR_P4_BPU_ESCR0 0x000003b2 +#define MSR_P4_BPU_ESCR1 0x000003b3 +#define MSR_P4_BSU_ESCR0 0x000003a0 +#define MSR_P4_BSU_ESCR1 0x000003a1 +#define MSR_P4_CRU_ESCR0 0x000003b8 +#define MSR_P4_CRU_ESCR1 0x000003b9 +#define MSR_P4_CRU_ESCR2 0x000003cc +#define MSR_P4_CRU_ESCR3 0x000003cd +#define MSR_P4_CRU_ESCR4 0x000003e0 +#define MSR_P4_CRU_ESCR5 0x000003e1 +#define MSR_P4_DAC_ESCR0 0x000003a8 +#define MSR_P4_DAC_ESCR1 0x000003a9 +#define MSR_P4_FIRM_ESCR0 0x000003a4 +#define MSR_P4_FIRM_ESCR1 0x000003a5 +#define MSR_P4_FLAME_ESCR0 0x000003a6 +#define MSR_P4_FLAME_ESCR1 0x000003a7 +#define MSR_P4_FSB_ESCR0 0x000003a2 +#define MSR_P4_FSB_ESCR1 0x000003a3 +#define MSR_P4_IQ_ESCR0 0x000003ba +#define MSR_P4_IQ_ESCR1 0x000003bb +#define MSR_P4_IS_ESCR0 0x000003b4 +#define MSR_P4_IS_ESCR1 0x000003b5 +#define MSR_P4_ITLB_ESCR0 0x000003b6 +#define MSR_P4_ITLB_ESCR1 0x000003b7 +#define MSR_P4_IX_ESCR0 0x000003c8 +#define MSR_P4_IX_ESCR1 0x000003c9 +#define MSR_P4_MOB_ESCR0 0x000003aa +#define MSR_P4_MOB_ESCR1 0x000003ab +#define MSR_P4_MS_ESCR0 0x000003c0 +#define MSR_P4_MS_ESCR1 0x000003c1 +#define MSR_P4_PMH_ESCR0 0x000003ac +#define MSR_P4_PMH_ESCR1 0x000003ad +#define MSR_P4_RAT_ESCR0 0x000003bc +#define MSR_P4_RAT_ESCR1 0x000003bd +#define MSR_P4_SAAT_ESCR0 0x000003ae +#define MSR_P4_SAAT_ESCR1 0x000003af +#define MSR_P4_SSU_ESCR0 0x000003be +#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ + +#define MSR_P4_TBPU_ESCR0 0x000003c2 +#define MSR_P4_TBPU_ESCR1 0x000003c3 +#define MSR_P4_TC_ESCR0 0x000003c4 +#define MSR_P4_TC_ESCR1 0x000003c5 +#define MSR_P4_U2L_ESCR0 0x000003b0 +#define MSR_P4_U2L_ESCR1 0x000003b1 + +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + +/* Intel Core-based CPU performance counters */ +#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 +#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a +#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d +#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e +#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f +#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 + +/* Geode defined MSRs */ +#define MSR_GEODE_BUSCONT_CONF0 0x00001900 + +/* Intel VT MSRs */ +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 + +/* VMX_BASIC bits and bitmasks */ +#define VMX_BASIC_VMCS_SIZE_SHIFT 32 +#define VMX_BASIC_TRUE_CTLS (1ULL << 55) +#define VMX_BASIC_64 0x0001000000000000LLU +#define VMX_BASIC_MEM_TYPE_SHIFT 50 +#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU +#define VMX_BASIC_MEM_TYPE_WB 6LLU +#define VMX_BASIC_INOUT 0x0040000000000000LLU + +/* MSR_IA32_VMX_MISC bits */ +#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) +#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F +/* AMD-V MSRs */ + +#define MSR_VM_CR 0xc0010114 +#define MSR_VM_IGNNE 0xc0010115 +#define MSR_VM_HSAVE_PA 0xc0010117 + +#endif /* !SELFTEST_KVM_X86_H */ diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c new file mode 100644 index 000000000000..cd01144d27c8 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/assert.c @@ -0,0 +1,92 @@ +/* + * tools/testing/selftests/kvm/lib/assert.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/ + +#include "test_util.h" + +#include <execinfo.h> +#include <sys/syscall.h> + +#include "../../kselftest.h" + +/* Dumps the current stack trace to stderr. */ +static void __attribute__((noinline)) test_dump_stack(void); +static void test_dump_stack(void) +{ + /* + * Build and run this command: + * + * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \ + * grep -v test_dump_stack | cat -n 1>&2 + * + * Note that the spacing is different and there's no newline. + */ + size_t i; + size_t n = 20; + void *stack[n]; + const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai"; + const char *pipeline = "|cat -n 1>&2"; + char cmd[strlen(addr2line) + strlen(pipeline) + + /* N bytes per addr * 2 digits per byte + 1 space per addr: */ + n * (((sizeof(void *)) * 2) + 1) + + /* Null terminator: */ + 1]; + char *c; + + n = backtrace(stack, n); + c = &cmd[0]; + c += sprintf(c, "%s", addr2line); + /* + * Skip the first 3 frames: backtrace, test_dump_stack, and + * test_assert. We hope that backtrace isn't inlined and the other two + * we've declared noinline. + */ + for (i = 2; i < n; i++) + c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1); + c += sprintf(c, "%s", pipeline); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-result" + system(cmd); +#pragma GCC diagnostic pop +} + +static pid_t gettid(void) +{ + return syscall(SYS_gettid); +} + +void __attribute__((noinline)) +test_assert(bool exp, const char *exp_str, + const char *file, unsigned int line, const char *fmt, ...) +{ + va_list ap; + + if (!(exp)) { + va_start(ap, fmt); + + fprintf(stderr, "==== Test Assertion Failure ====\n" + " %s:%u: %s\n" + " pid=%d tid=%d - %s\n", + file, line, exp_str, getpid(), gettid(), + strerror(errno)); + test_dump_stack(); + if (fmt) { + fputs(" ", stderr); + vfprintf(stderr, fmt, ap); + fputs("\n", stderr); + } + va_end(ap); + + if (errno == EACCES) + ksft_exit_skip("Access denied - Exiting.\n"); + exit(254); + } + + return; +} diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c new file mode 100644 index 000000000000..5eb857584aa3 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/elf.c @@ -0,0 +1,197 @@ +/* + * tools/testing/selftests/kvm/lib/elf.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" + +#include <bits/endian.h> +#include <linux/elf.h> + +#include "kvm_util.h" +#include "kvm_util_internal.h" + +static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp) +{ + off_t offset_rv; + + /* Open the ELF file. */ + int fd; + fd = open(filename, O_RDONLY); + TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" + " filename: %s\n" + " rv: %i errno: %i", filename, fd, errno); + + /* Read in and validate ELF Identification Record. + * The ELF Identification record is the first 16 (EI_NIDENT) bytes + * of the ELF header, which is at the beginning of the ELF file. + * For now it is only safe to read the first EI_NIDENT bytes. Once + * read and validated, the value of e_ehsize can be used to determine + * the real size of the ELF header. + */ + unsigned char ident[EI_NIDENT]; + test_read(fd, ident, sizeof(ident)); + TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1) + && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3), + "ELF MAGIC Mismatch,\n" + " filename: %s\n" + " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n" + " Expected: %02x %02x %02x %02x", + filename, + ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3], + ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3); + TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64, + "Current implementation only able to handle ELFCLASS64,\n" + " filename: %s\n" + " ident[EI_CLASS]: %02x\n" + " expected: %02x", + filename, + ident[EI_CLASS], ELFCLASS64); + TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN) + && (ident[EI_DATA] == ELFDATA2LSB)) + || ((BYTE_ORDER == BIG_ENDIAN) + && (ident[EI_DATA] == ELFDATA2MSB)), "Current " + "implementation only able to handle\n" + "cases where the host and ELF file endianness\n" + "is the same:\n" + " host BYTE_ORDER: %u\n" + " host LITTLE_ENDIAN: %u\n" + " host BIG_ENDIAN: %u\n" + " ident[EI_DATA]: %u\n" + " ELFDATA2LSB: %u\n" + " ELFDATA2MSB: %u", + BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN, + ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB); + TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT, + "Current implementation only able to handle current " + "ELF version,\n" + " filename: %s\n" + " ident[EI_VERSION]: %02x\n" + " expected: %02x", + filename, ident[EI_VERSION], EV_CURRENT); + + /* Read in the ELF header. + * With the ELF Identification portion of the ELF header + * validated, especially that the value at EI_VERSION is + * as expected, it is now safe to read the entire ELF header. + */ + offset_rv = lseek(fd, 0, SEEK_SET); + TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n" + " rv: %zi expected: %i", offset_rv, 0); + test_read(fd, hdrp, sizeof(*hdrp)); + TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr), + "Unexpected physical header size,\n" + " hdrp->e_phentsize: %x\n" + " expected: %zx", + hdrp->e_phentsize, sizeof(Elf64_Phdr)); + TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr), + "Unexpected section header size,\n" + " hdrp->e_shentsize: %x\n" + " expected: %zx", + hdrp->e_shentsize, sizeof(Elf64_Shdr)); +} + +/* VM ELF Load + * + * Input Args: + * filename - Path to ELF file + * + * Output Args: None + * + * Input/Output Args: + * vm - Pointer to opaque type that describes the VM. + * + * Return: None, TEST_ASSERT failures for all error conditions + * + * Loads the program image of the ELF file specified by filename, + * into the virtual address space of the VM pointed to by vm. On entry + * the VM needs to not be using any of the virtual address space used + * by the image and it needs to have sufficient available physical pages, to + * back the virtual pages used to load the image. + */ +void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename, + uint32_t data_memslot, uint32_t pgd_memslot) +{ + off_t offset, offset_rv; + Elf64_Ehdr hdr; + + /* Open the ELF file. */ + int fd; + fd = open(filename, O_RDONLY); + TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n" + " filename: %s\n" + " rv: %i errno: %i", filename, fd, errno); + + /* Read in the ELF header. */ + elfhdr_get(filename, &hdr); + + /* For each program header. + * The following ELF header members specify the location + * and size of the program headers: + * + * e_phoff - File offset to start of program headers + * e_phentsize - Size of each program header + * e_phnum - Number of program header entries + */ + for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) { + /* Seek to the beginning of the program header. */ + offset = hdr.e_phoff + (n1 * hdr.e_phentsize); + offset_rv = lseek(fd, offset, SEEK_SET); + TEST_ASSERT(offset_rv == offset, + "Failed to seek to begining of program header %u,\n" + " filename: %s\n" + " rv: %jd errno: %i", + n1, filename, (intmax_t) offset_rv, errno); + + /* Read in the program header. */ + Elf64_Phdr phdr; + test_read(fd, &phdr, sizeof(phdr)); + + /* Skip if this header doesn't describe a loadable segment. */ + if (phdr.p_type != PT_LOAD) + continue; + + /* Allocate memory for this segment within the VM. */ + TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment " + "memsize of 0,\n" + " phdr index: %u p_memsz: 0x%" PRIx64, + n1, (uint64_t) phdr.p_memsz); + vm_vaddr_t seg_vstart = phdr.p_vaddr; + seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1); + vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1; + seg_vend |= vm->page_size - 1; + size_t seg_size = seg_vend - seg_vstart + 1; + + vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart, + data_memslot, pgd_memslot); + TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate " + "virtual memory for segment at requested min addr,\n" + " segment idx: %u\n" + " seg_vstart: 0x%lx\n" + " vaddr: 0x%lx", + n1, seg_vstart, vaddr); + memset(addr_gva2hva(vm, vaddr), 0, seg_size); + /* TODO(lhuemill): Set permissions of each memory segment + * based on the least-significant 3 bits of phdr.p_flags. + */ + + /* Load portion of initial state that is contained within + * the ELF file. + */ + if (phdr.p_filesz) { + offset_rv = lseek(fd, phdr.p_offset, SEEK_SET); + TEST_ASSERT(offset_rv == phdr.p_offset, + "Seek to program segment offset failed,\n" + " program header idx: %u errno: %i\n" + " offset_rv: 0x%jx\n" + " expected: 0x%jx\n", + n1, errno, (intmax_t) offset_rv, + (intmax_t) phdr.p_offset); + test_read(fd, addr_gva2hva(vm, phdr.p_vaddr), + phdr.p_filesz); + } + } +} diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c new file mode 100644 index 000000000000..cff869ffe6ee --- /dev/null +++ b/tools/testing/selftests/kvm/lib/io.c @@ -0,0 +1,158 @@ +/* + * tools/testing/selftests/kvm/lib/io.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" + +/* Test Write + * + * A wrapper for write(2), that automatically handles the following + * special conditions: + * + * + Interrupted system call (EINTR) + * + Write of less than requested amount + * + Non-block return (EAGAIN) + * + * For each of the above, an additional write is performed to automatically + * continue writing the requested data. + * There are also many cases where write(2) can return an unexpected + * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. + * + * Note, for function signature compatibility with write(2), this function + * returns the number of bytes written, but that value will always be equal + * to the number of requested bytes. All other conditions in this and + * future enhancements to this function either automatically issue another + * write(2) or cause a TEST_ASSERT failure. + * + * Args: + * fd - Opened file descriptor to file to be written. + * count - Number of bytes to write. + * + * Output: + * buf - Starting address of data to be written. + * + * Return: + * On success, number of bytes written. + * On failure, a TEST_ASSERT failure is caused. + */ +ssize_t test_write(int fd, const void *buf, size_t count) +{ + ssize_t rc; + ssize_t num_written = 0; + size_t num_left = count; + const char *ptr = buf; + + /* Note: Count of zero is allowed (see "RETURN VALUE" portion of + * write(2) manpage for details. + */ + TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); + + do { + rc = write(fd, ptr, num_left); + + switch (rc) { + case -1: + TEST_ASSERT(errno == EAGAIN || errno == EINTR, + "Unexpected write failure,\n" + " rc: %zi errno: %i", rc, errno); + continue; + + case 0: + TEST_ASSERT(false, "Unexpected EOF,\n" + " rc: %zi num_written: %zi num_left: %zu", + rc, num_written, num_left); + break; + + default: + TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n" + " rc: %zi errno: %i", rc, errno); + num_written += rc; + num_left -= rc; + ptr += rc; + break; + } + } while (num_written < count); + + return num_written; +} + +/* Test Read + * + * A wrapper for read(2), that automatically handles the following + * special conditions: + * + * + Interrupted system call (EINTR) + * + Read of less than requested amount + * + Non-block return (EAGAIN) + * + * For each of the above, an additional read is performed to automatically + * continue reading the requested data. + * There are also many cases where read(2) can return an unexpected + * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note, + * it is expected that the file opened by fd at the current file position + * contains at least the number of requested bytes to be read. A TEST_ASSERT + * failure is produced if an End-Of-File condition occurs, before all the + * data is read. It is the callers responsibility to assure that sufficient + * data exists. + * + * Note, for function signature compatibility with read(2), this function + * returns the number of bytes read, but that value will always be equal + * to the number of requested bytes. All other conditions in this and + * future enhancements to this function either automatically issue another + * read(2) or cause a TEST_ASSERT failure. + * + * Args: + * fd - Opened file descriptor to file to be read. + * count - Number of bytes to read. + * + * Output: + * buf - Starting address of where to write the bytes read. + * + * Return: + * On success, number of bytes read. + * On failure, a TEST_ASSERT failure is caused. + */ +ssize_t test_read(int fd, void *buf, size_t count) +{ + ssize_t rc; + ssize_t num_read = 0; + size_t num_left = count; + char *ptr = buf; + + /* Note: Count of zero is allowed (see "If count is zero" portion of + * read(2) manpage for details. + */ + TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count); + + do { + rc = read(fd, ptr, num_left); + + switch (rc) { + case -1: + TEST_ASSERT(errno == EAGAIN || errno == EINTR, + "Unexpected read failure,\n" + " rc: %zi errno: %i", rc, errno); + break; + + case 0: + TEST_ASSERT(false, "Unexpected EOF,\n" + " rc: %zi num_read: %zi num_left: %zu", + rc, num_read, num_left); + break; + + default: + TEST_ASSERT(rc > 0, "Unexpected ret from read,\n" + " rc: %zi errno: %i", rc, errno); + num_read += rc; + num_left -= rc; + ptr += rc; + break; + } + } while (num_read < count); + + return num_read; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c new file mode 100644 index 000000000000..37e2a787d2fc --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -0,0 +1,1486 @@ +/* + * tools/testing/selftests/kvm/lib/kvm_util.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "kvm_util_internal.h" + +#include <assert.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> + +#define KVM_DEV_PATH "/dev/kvm" + +#define KVM_UTIL_PGS_PER_HUGEPG 512 +#define KVM_UTIL_MIN_PADDR 0x2000 + +/* Aligns x up to the next multiple of size. Size must be a power of 2. */ +static void *align(void *x, size_t size) +{ + size_t mask = size - 1; + TEST_ASSERT(size != 0 && !(size & (size - 1)), + "size not a power of 2: %lu", size); + return (void *) (((size_t) x + mask) & ~mask); +} + +/* Capability + * + * Input Args: + * cap - Capability + * + * Output Args: None + * + * Return: + * On success, the Value corresponding to the capability (KVM_CAP_*) + * specified by the value of cap. On failure a TEST_ASSERT failure + * is produced. + * + * Looks up and returns the value corresponding to the capability + * (KVM_CAP_*) given by cap. + */ +int kvm_check_cap(long cap) +{ + int ret; + int kvm_fd; + + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); + TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" + " rc: %i errno: %i", ret, errno); + + close(kvm_fd); + + return ret; +} + +/* VM Create + * + * Input Args: + * mode - VM Mode (e.g. VM_MODE_FLAT48PG) + * phy_pages - Physical memory pages + * perm - permission + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + * + * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG). + * When phy_pages is non-zero, a memory region of phy_pages physical pages + * is created and mapped starting at guest physical address 0. The file + * descriptor to control the created VM is created with the permissions + * given by perm (e.g. O_RDWR). + */ +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +{ + struct kvm_vm *vm; + int kvm_fd; + + /* Allocate memory. */ + vm = calloc(1, sizeof(*vm)); + TEST_ASSERT(vm != NULL, "Insufficent Memory"); + + vm->mode = mode; + kvm_fd = open(KVM_DEV_PATH, perm); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + /* Create VM. */ + vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); + TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, " + "rc: %i errno: %i", vm->fd, errno); + + close(kvm_fd); + + /* Setup mode specific traits. */ + switch (vm->mode) { + case VM_MODE_FLAT48PG: + vm->page_size = 0x1000; + vm->page_shift = 12; + + /* Limit to 48-bit canonical virtual addresses. */ + vm->vpages_valid = sparsebit_alloc(); + sparsebit_set_num(vm->vpages_valid, + 0, (1ULL << (48 - 1)) >> vm->page_shift); + sparsebit_set_num(vm->vpages_valid, + (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift, + (1ULL << (48 - 1)) >> vm->page_shift); + + /* Limit physical addresses to 52-bits. */ + vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1; + break; + + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode); + } + + /* Allocate and setup memory for guest. */ + vm->vpages_mapped = sparsebit_alloc(); + if (phy_pages != 0) + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + 0, 0, phy_pages, 0); + + return vm; +} + +/* Userspace Memory Region Find + * + * Input Args: + * vm - Virtual Machine + * start - Starting VM physical address + * end - Ending VM physical address, inclusive. + * + * Output Args: None + * + * Return: + * Pointer to overlapping region, NULL if no such region. + * + * Searches for a region with any physical memory that overlaps with + * any portion of the guest physical addresses from start to end + * inclusive. If multiple overlapping regions exist, a pointer to any + * of the regions is returned. Null is returned only when no overlapping + * region exists. + */ +static struct userspace_mem_region *userspace_mem_region_find( + struct kvm_vm *vm, uint64_t start, uint64_t end) +{ + struct userspace_mem_region *region; + + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + uint64_t existing_start = region->region.guest_phys_addr; + uint64_t existing_end = region->region.guest_phys_addr + + region->region.memory_size - 1; + if (start <= existing_end && end >= existing_start) + return region; + } + + return NULL; +} + +/* KVM Userspace Memory Region Find + * + * Input Args: + * vm - Virtual Machine + * start - Starting VM physical address + * end - Ending VM physical address, inclusive. + * + * Output Args: None + * + * Return: + * Pointer to overlapping region, NULL if no such region. + * + * Public interface to userspace_mem_region_find. Allows tests to look up + * the memslot datastructure for a given range of guest physical memory. + */ +struct kvm_userspace_memory_region * +kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start, + uint64_t end) +{ + struct userspace_mem_region *region; + + region = userspace_mem_region_find(vm, start, end); + if (!region) + return NULL; + + return ®ion->region; +} + +/* VCPU Find + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: + * Pointer to VCPU structure + * + * Locates a vcpu structure that describes the VCPU specified by vcpuid and + * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU + * for the specified vcpuid. + */ +struct vcpu *vcpu_find(struct kvm_vm *vm, + uint32_t vcpuid) +{ + struct vcpu *vcpup; + + for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) { + if (vcpup->id == vcpuid) + return vcpup; + } + + return NULL; +} + +/* VM VCPU Remove + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None, TEST_ASSERT failures for all error conditions + * + * Within the VM specified by vm, removes the VCPU given by vcpuid. + */ +static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + + int ret = close(vcpu->fd); + TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i " + "errno: %i", ret, errno); + + if (vcpu->next) + vcpu->next->prev = vcpu->prev; + if (vcpu->prev) + vcpu->prev->next = vcpu->next; + else + vm->vcpu_head = vcpu->next; + free(vcpu); +} + + +/* Destroys and frees the VM pointed to by vmp. + */ +void kvm_vm_free(struct kvm_vm *vmp) +{ + int ret; + + if (vmp == NULL) + return; + + /* Free userspace_mem_regions. */ + while (vmp->userspace_mem_region_head) { + struct userspace_mem_region *region + = vmp->userspace_mem_region_head; + + region->region.memory_size = 0; + ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, + ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, " + "rc: %i errno: %i", ret, errno); + + vmp->userspace_mem_region_head = region->next; + sparsebit_free(®ion->unused_phy_pages); + ret = munmap(region->mmap_start, region->mmap_size); + TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", + ret, errno); + + free(region); + } + + /* Free VCPUs. */ + while (vmp->vcpu_head) + vm_vcpu_rm(vmp, vmp->vcpu_head->id); + + /* Free sparsebit arrays. */ + sparsebit_free(&vmp->vpages_valid); + sparsebit_free(&vmp->vpages_mapped); + + /* Close file descriptor for the VM. */ + ret = close(vmp->fd); + TEST_ASSERT(ret == 0, "Close of vm fd failed,\n" + " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno); + + /* Free the structure describing the VM. */ + free(vmp); +} + +/* Memory Compare, host virtual to guest virtual + * + * Input Args: + * hva - Starting host virtual address + * vm - Virtual Machine + * gva - Starting guest virtual address + * len - number of bytes to compare + * + * Output Args: None + * + * Input/Output Args: None + * + * Return: + * Returns 0 if the bytes starting at hva for a length of len + * are equal the guest virtual bytes starting at gva. Returns + * a value < 0, if bytes at hva are less than those at gva. + * Otherwise a value > 0 is returned. + * + * Compares the bytes starting at the host virtual address hva, for + * a length of len, to the guest bytes starting at the guest virtual + * address given by gva. + */ +int kvm_memcmp_hva_gva(void *hva, + struct kvm_vm *vm, vm_vaddr_t gva, size_t len) +{ + size_t amt; + + /* Compare a batch of bytes until either a match is found + * or all the bytes have been compared. + */ + for (uintptr_t offset = 0; offset < len; offset += amt) { + uintptr_t ptr1 = (uintptr_t)hva + offset; + + /* Determine host address for guest virtual address + * at offset. + */ + uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); + + /* Determine amount to compare on this pass. + * Don't allow the comparsion to cross a page boundary. + */ + amt = len - offset; + if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) + amt = vm->page_size - (ptr1 % vm->page_size); + if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) + amt = vm->page_size - (ptr2 % vm->page_size); + + assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); + assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); + + /* Perform the comparison. If there is a difference + * return that result to the caller, otherwise need + * to continue on looking for a mismatch. + */ + int ret = memcmp((void *)ptr1, (void *)ptr2, amt); + if (ret != 0) + return ret; + } + + /* No mismatch found. Let the caller know the two memory + * areas are equal. + */ + return 0; +} + +/* Allocate an instance of struct kvm_cpuid2 + * + * Input Args: None + * + * Output Args: None + * + * Return: A pointer to the allocated struct. The caller is responsible + * for freeing this struct. + * + * Since kvm_cpuid2 uses a 0-length array to allow a the size of the + * array to be decided at allocation time, allocation is slightly + * complicated. This function uses a reasonable default length for + * the array and performs the appropriate allocation. + */ +static struct kvm_cpuid2 *allocate_kvm_cpuid2(void) +{ + struct kvm_cpuid2 *cpuid; + int nent = 100; + size_t size; + + size = sizeof(*cpuid); + size += nent * sizeof(struct kvm_cpuid_entry2); + cpuid = malloc(size); + if (!cpuid) { + perror("malloc"); + abort(); + } + + cpuid->nent = nent; + + return cpuid; +} + +/* KVM Supported CPUID Get + * + * Input Args: None + * + * Output Args: + * + * Return: The supported KVM CPUID + * + * Get the guest CPUID supported by KVM. + */ +struct kvm_cpuid2 *kvm_get_supported_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int ret; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(); + kvm_fd = open(KVM_DEV_PATH, O_RDONLY); + if (kvm_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", + ret, errno); + + close(kvm_fd); + return cpuid; +} + +/* Locate a cpuid entry. + * + * Input Args: + * cpuid: The cpuid. + * function: The function of the cpuid entry to find. + * + * Output Args: None + * + * Return: A pointer to the cpuid entry. Never returns NULL. + */ +struct kvm_cpuid_entry2 * +kvm_get_supported_cpuid_index(uint32_t function, uint32_t index) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry = NULL; + int i; + + cpuid = kvm_get_supported_cpuid(); + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + entry = &cpuid->entries[i]; + break; + } + } + + TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).", + function, index); + return entry; +} + +/* VM Userspace Memory Region Add + * + * Input Args: + * vm - Virtual Machine + * backing_src - Storage source for this region. + * NULL to use anonymous memory. + * guest_paddr - Starting guest physical address + * slot - KVM region slot + * npages - Number of physical pages + * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES) + * + * Output Args: None + * + * Return: None + * + * Allocates a memory area of the number of pages specified by npages + * and maps it to the VM specified by vm, at a starting physical address + * given by guest_paddr. The region is created with a KVM region slot + * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The + * region is created with the flags given by flags. + */ +void vm_userspace_mem_region_add(struct kvm_vm *vm, + enum vm_mem_backing_src_type src_type, + uint64_t guest_paddr, uint32_t slot, uint64_t npages, + uint32_t flags) +{ + int ret; + unsigned long pmem_size = 0; + struct userspace_mem_region *region; + size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; + + TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " + "address not on a page boundary.\n" + " guest_paddr: 0x%lx vm->page_size: 0x%x", + guest_paddr, vm->page_size); + TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1) + <= vm->max_gfn, "Physical range beyond maximum " + "supported physical address,\n" + " guest_paddr: 0x%lx npages: 0x%lx\n" + " vm->max_gfn: 0x%lx vm->page_size: 0x%x", + guest_paddr, npages, vm->max_gfn, vm->page_size); + + /* Confirm a mem region with an overlapping address doesn't + * already exist. + */ + region = (struct userspace_mem_region *) userspace_mem_region_find( + vm, guest_paddr, guest_paddr + npages * vm->page_size); + if (region != NULL) + TEST_ASSERT(false, "overlapping userspace_mem_region already " + "exists\n" + " requested guest_paddr: 0x%lx npages: 0x%lx " + "page_size: 0x%x\n" + " existing guest_paddr: 0x%lx size: 0x%lx", + guest_paddr, npages, vm->page_size, + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size); + + /* Confirm no region with the requested slot already exists. */ + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if (region->region.slot == slot) + break; + if ((guest_paddr <= (region->region.guest_phys_addr + + region->region.memory_size)) + && ((guest_paddr + npages * vm->page_size) + >= region->region.guest_phys_addr)) + break; + } + if (region != NULL) + TEST_ASSERT(false, "A mem region with the requested slot " + "or overlapping physical memory range already exists.\n" + " requested slot: %u paddr: 0x%lx npages: 0x%lx\n" + " existing slot: %u paddr: 0x%lx size: 0x%lx", + slot, guest_paddr, npages, + region->region.slot, + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size); + + /* Allocate and initialize new mem region structure. */ + region = calloc(1, sizeof(*region)); + TEST_ASSERT(region != NULL, "Insufficient Memory"); + region->mmap_size = npages * vm->page_size; + + /* Enough memory to align up to a huge page. */ + if (src_type == VM_MEM_SRC_ANONYMOUS_THP) + region->mmap_size += huge_page_size; + region->mmap_start = mmap(NULL, region->mmap_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS + | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0), + -1, 0); + TEST_ASSERT(region->mmap_start != MAP_FAILED, + "test_malloc failed, mmap_start: %p errno: %i", + region->mmap_start, errno); + + /* Align THP allocation up to start of a huge page. */ + region->host_mem = align(region->mmap_start, + src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1); + + /* As needed perform madvise */ + if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed,\n" + " addr: %p\n" + " length: 0x%lx\n" + " src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } + + region->unused_phy_pages = sparsebit_alloc(); + sparsebit_set_num(region->unused_phy_pages, + guest_paddr >> vm->page_shift, npages); + region->region.slot = slot; + region->region.flags = flags; + region->region.guest_phys_addr = guest_paddr; + region->region.memory_size = npages * vm->page_size; + region->region.userspace_addr = (uintptr_t) region->host_mem; + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i\n" + " slot: %u flags: 0x%x\n" + " guest_phys_addr: 0x%lx size: 0x%lx", + ret, errno, slot, flags, + guest_paddr, (uint64_t) region->region.memory_size); + + /* Add to linked-list of memory regions. */ + if (vm->userspace_mem_region_head) + vm->userspace_mem_region_head->prev = region; + region->next = vm->userspace_mem_region_head; + vm->userspace_mem_region_head = region; +} + +/* Memslot to region + * + * Input Args: + * vm - Virtual Machine + * memslot - KVM memory slot ID + * + * Output Args: None + * + * Return: + * Pointer to memory region structure that describe memory region + * using kvm memory slot ID given by memslot. TEST_ASSERT failure + * on error (e.g. currently no memory region using memslot as a KVM + * memory slot ID). + */ +static struct userspace_mem_region *memslot2region(struct kvm_vm *vm, + uint32_t memslot) +{ + struct userspace_mem_region *region; + + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if (region->region.slot == memslot) + break; + } + if (region == NULL) { + fprintf(stderr, "No mem region with the requested slot found,\n" + " requested slot: %u\n", memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + TEST_ASSERT(false, "Mem region not found"); + } + + return region; +} + +/* VM Memory Region Flags Set + * + * Input Args: + * vm - Virtual Machine + * flags - Starting guest physical address + * + * Output Args: None + * + * Return: None + * + * Sets the flags of the memory region specified by the value of slot, + * to the values given by flags. + */ +void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags) +{ + int ret; + struct userspace_mem_region *region; + + /* Locate memory region. */ + region = memslot2region(vm, slot); + + region->region.flags = flags; + + ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, ®ion->region); + + TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n" + " rc: %i errno: %i slot: %u flags: 0x%x", + ret, errno, slot, flags); +} + +/* VCPU mmap Size + * + * Input Args: None + * + * Output Args: None + * + * Return: + * Size of VCPU state + * + * Returns the size of the structure pointed to by the return value + * of vcpu_state(). + */ +static int vcpu_mmap_sz(void) +{ + int dev_fd, ret; + + dev_fd = open(KVM_DEV_PATH, O_RDONLY); + if (dev_fd < 0) + exit(KSFT_SKIP); + + ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + TEST_ASSERT(ret >= sizeof(struct kvm_run), + "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i", + __func__, ret, errno); + + close(dev_fd); + + return ret; +} + +/* VM VCPU Add + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None + * + * Creates and adds to the VM specified by vm and virtual CPU with + * the ID given by vcpuid. + */ +void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu; + + /* Confirm a vcpu with the specified id doesn't already exist. */ + vcpu = vcpu_find(vm, vcpuid); + if (vcpu != NULL) + TEST_ASSERT(false, "vcpu with the specified id " + "already exists,\n" + " requested vcpuid: %u\n" + " existing vcpuid: %u state: %p", + vcpuid, vcpu->id, vcpu->state); + + /* Allocate and initialize new vcpu structure. */ + vcpu = calloc(1, sizeof(*vcpu)); + TEST_ASSERT(vcpu != NULL, "Insufficient Memory"); + vcpu->id = vcpuid; + vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid); + TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i", + vcpu->fd, errno); + + TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " + "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + vcpu_mmap_sz(), sizeof(*vcpu->state)); + vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), + PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); + TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " + "vcpu id: %u errno: %i", vcpuid, errno); + + /* Add to linked-list of VCPUs. */ + if (vm->vcpu_head) + vm->vcpu_head->prev = vcpu; + vcpu->next = vm->vcpu_head; + vm->vcpu_head = vcpu; + + vcpu_setup(vm, vcpuid); +} + +/* VM Virtual Address Unused Gap + * + * Input Args: + * vm - Virtual Machine + * sz - Size (bytes) + * vaddr_min - Minimum Virtual Address + * + * Output Args: None + * + * Return: + * Lowest virtual address at or below vaddr_min, with at least + * sz unused bytes. TEST_ASSERT failure if no area of at least + * size sz is available. + * + * Within the VM specified by vm, locates the lowest starting virtual + * address >= vaddr_min, that has at least sz unallocated bytes. A + * TEST_ASSERT failure occurs for invalid input or no area of at least + * sz unallocated bytes >= vaddr_min is available. + */ +static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, + vm_vaddr_t vaddr_min) +{ + uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift; + + /* Determine lowest permitted virtual page index. */ + uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift; + if ((pgidx_start * vm->page_size) < vaddr_min) + goto no_va_found; + + /* Loop over section with enough valid virtual page indexes. */ + if (!sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages)) + pgidx_start = sparsebit_next_set_num(vm->vpages_valid, + pgidx_start, pages); + do { + /* + * Are there enough unused virtual pages available at + * the currently proposed starting virtual page index. + * If not, adjust proposed starting index to next + * possible. + */ + if (sparsebit_is_clear_num(vm->vpages_mapped, + pgidx_start, pages)) + goto va_found; + pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped, + pgidx_start, pages); + if (pgidx_start == 0) + goto no_va_found; + + /* + * If needed, adjust proposed starting virtual address, + * to next range of valid virtual addresses. + */ + if (!sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages)) { + pgidx_start = sparsebit_next_set_num( + vm->vpages_valid, pgidx_start, pages); + if (pgidx_start == 0) + goto no_va_found; + } + } while (pgidx_start != 0); + +no_va_found: + TEST_ASSERT(false, "No vaddr of specified pages available, " + "pages: 0x%lx", pages); + + /* NOT REACHED */ + return -1; + +va_found: + TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid, + pgidx_start, pages), + "Unexpected, invalid virtual page index range,\n" + " pgidx_start: 0x%lx\n" + " pages: 0x%lx", + pgidx_start, pages); + TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped, + pgidx_start, pages), + "Unexpected, pages already mapped,\n" + " pgidx_start: 0x%lx\n" + " pages: 0x%lx", + pgidx_start, pages); + + return pgidx_start * vm->page_size; +} + +/* VM Virtual Address Allocate + * + * Input Args: + * vm - Virtual Machine + * sz - Size in bytes + * vaddr_min - Minimum starting virtual address + * data_memslot - Memory region slot for data pages + * pgd_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: + * Starting guest virtual address + * + * Allocates at least sz bytes within the virtual address space of the vm + * given by vm. The allocated bytes are mapped to a virtual address >= + * the address given by vaddr_min. Note that each allocation uses a + * a unique set of pages, with the minimum real allocation being at least + * a page. + */ +vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min, + uint32_t data_memslot, uint32_t pgd_memslot) +{ + uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0); + + virt_pgd_alloc(vm, pgd_memslot); + + /* Find an unused range of virtual page addresses of at least + * pages in length. + */ + vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min); + + /* Map the virtual pages. */ + for (vm_vaddr_t vaddr = vaddr_start; pages > 0; + pages--, vaddr += vm->page_size) { + vm_paddr_t paddr; + + paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot); + + virt_pg_map(vm, vaddr, paddr, pgd_memslot); + + sparsebit_set(vm->vpages_mapped, + vaddr >> vm->page_shift); + } + + return vaddr_start; +} + +/* Address VM Physical to Host Virtual + * + * Input Args: + * vm - Virtual Machine + * gpa - VM physical address + * + * Output Args: None + * + * Return: + * Equivalent host virtual address + * + * Locates the memory region containing the VM physical address given + * by gpa, within the VM given by vm. When found, the host virtual + * address providing the memory to the vm physical address is returned. + * A TEST_ASSERT failure occurs if no region containing gpa exists. + */ +void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa) +{ + struct userspace_mem_region *region; + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if ((gpa >= region->region.guest_phys_addr) + && (gpa <= (region->region.guest_phys_addr + + region->region.memory_size - 1))) + return (void *) ((uintptr_t) region->host_mem + + (gpa - region->region.guest_phys_addr)); + } + + TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa); + return NULL; +} + +/* Address Host Virtual to VM Physical + * + * Input Args: + * vm - Virtual Machine + * hva - Host virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Locates the memory region containing the host virtual address given + * by hva, within the VM given by vm. When found, the equivalent + * VM physical address is returned. A TEST_ASSERT failure occurs if no + * region containing hva exists. + */ +vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva) +{ + struct userspace_mem_region *region; + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + if ((hva >= region->host_mem) + && (hva <= (region->host_mem + + region->region.memory_size - 1))) + return (vm_paddr_t) ((uintptr_t) + region->region.guest_phys_addr + + (hva - (uintptr_t) region->host_mem)); + } + + TEST_ASSERT(false, "No mapping to a guest physical address, " + "hva: %p", hva); + return -1; +} + +/* VM Create IRQ Chip + * + * Input Args: + * vm - Virtual Machine + * + * Output Args: None + * + * Return: None + * + * Creates an interrupt controller chip for the VM specified by vm. + */ +void vm_create_irqchip(struct kvm_vm *vm) +{ + int ret; + + ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0); + TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +/* VM VCPU State + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: + * Pointer to structure that describes the state of the VCPU. + * + * Locates and returns a pointer to a structure that describes the + * state of the VCPU with the given vcpuid. + */ +struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + return vcpu->state; +} + +/* VM VCPU Run + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: None + * + * Return: None + * + * Switch to executing the code for the VCPU given by vcpuid, within the VM + * given by vm. + */ +void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) +{ + int ret = _vcpu_run(vm, vcpuid); + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + do { + rc = ioctl(vcpu->fd, KVM_RUN, NULL); + } while (rc == -1 && errno == EINTR); + return rc; +} + +/* VM VCPU Set MP State + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * mp_state - mp_state to be set + * + * Output Args: None + * + * Return: None + * + * Sets the MP state of the VCPU given by vcpuid, to the state given + * by mp_state. + */ +void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_mp_state *mp_state) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state); + TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +/* VM VCPU Regs Get + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * regs - current state of VCPU regs + * + * Return: None + * + * Obtains the current register state for the VCPU specified by vcpuid + * and stores it at the location given by regs. + */ +void vcpu_regs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_REGS, regs); + TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU Regs Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * regs - Values to set VCPU regs to + * + * Output Args: None + * + * Return: None + * + * Sets the regs of the VCPU specified by vcpuid to the values + * given by regs. + */ +void vcpu_regs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_regs *regs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Set the regs. */ + ret = ioctl(vcpu->fd, KVM_SET_REGS, regs); + TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i", + ret, errno); +} + +void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events); + TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i", + ret, errno); +} + +void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, + struct kvm_vcpu_events *events) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Set the regs. */ + ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events); + TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU Args Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * num - number of arguments + * ... - arguments, each of type uint64_t + * + * Output Args: None + * + * Return: None + * + * Sets the first num function input arguments to the values + * given as variable args. Each of the variable args is expected to + * be of type uint64_t. + */ +void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) +{ + va_list ap; + struct kvm_regs regs; + + TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n" + " num: %u\n", + num); + + va_start(ap, num); + vcpu_regs_get(vm, vcpuid, ®s); + + if (num >= 1) + regs.rdi = va_arg(ap, uint64_t); + + if (num >= 2) + regs.rsi = va_arg(ap, uint64_t); + + if (num >= 3) + regs.rdx = va_arg(ap, uint64_t); + + if (num >= 4) + regs.rcx = va_arg(ap, uint64_t); + + if (num >= 5) + regs.r8 = va_arg(ap, uint64_t); + + if (num >= 6) + regs.r9 = va_arg(ap, uint64_t); + + vcpu_regs_set(vm, vcpuid, ®s); + va_end(ap); +} + +/* VM VCPU System Regs Get + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * sregs - current state of VCPU system regs + * + * Return: None + * + * Obtains the current system register state for the VCPU specified by + * vcpuid and stores it at the location given by sregs. + */ +void vcpu_sregs_get(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + /* Get the regs. */ + ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs); + TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i", + ret, errno); +} + +/* VM VCPU System Regs Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * sregs - Values to set VCPU system regs to + * + * Output Args: None + * + * Return: None + * + * Sets the system regs of the VCPU specified by vcpuid to the values + * given by sregs. + */ +void vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + int ret = _vcpu_sregs_set(vm, vcpuid, sregs); + TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, " + "rc: %i errno: %i", ret, errno); +} + +int _vcpu_sregs_set(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_sregs *sregs) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + /* Get the regs. */ + return ioctl(vcpu->fd, KVM_SET_SREGS, sregs); +} + +/* VCPU Ioctl + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a VCPU fd. + */ +void vcpu_ioctl(struct kvm_vm *vm, + uint32_t vcpuid, unsigned long cmd, void *arg) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + ret = ioctl(vcpu->fd, cmd, arg); + TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +/* VM Ioctl + * + * Input Args: + * vm - Virtual Machine + * cmd - Ioctl number + * arg - Argument to pass to the ioctl + * + * Return: None + * + * Issues an arbitrary ioctl on a VM fd. + */ +void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg) +{ + int ret; + + ret = ioctl(vm->fd, cmd, arg); + TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)", + cmd, ret, errno, strerror(errno)); +} + +/* VM Dump + * + * Input Args: + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VM given by vm, to the FILE stream + * given by stream. + */ +void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct userspace_mem_region *region; + struct vcpu *vcpu; + + fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode); + fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd); + fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size); + fprintf(stream, "%*sMem Regions:\n", indent, ""); + for (region = vm->userspace_mem_region_head; region; + region = region->next) { + fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx " + "host_virt: %p\n", indent + 2, "", + (uint64_t) region->region.guest_phys_addr, + (uint64_t) region->region.memory_size, + region->host_mem); + fprintf(stream, "%*sunused_phy_pages: ", indent + 2, ""); + sparsebit_dump(stream, region->unused_phy_pages, 0); + } + fprintf(stream, "%*sMapped Virtual Pages:\n", indent, ""); + sparsebit_dump(stream, vm->vpages_mapped, indent + 2); + fprintf(stream, "%*spgd_created: %u\n", indent, "", + vm->pgd_created); + if (vm->pgd_created) { + fprintf(stream, "%*sVirtual Translation Tables:\n", + indent + 2, ""); + virt_dump(stream, vm, indent + 4); + } + fprintf(stream, "%*sVCPUs:\n", indent, ""); + for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next) + vcpu_dump(stream, vm, vcpu->id, indent + 2); +} + +/* VM VCPU Dump + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the current state of the VCPU specified by vcpuid, within the VM + * given by vm, to the FILE stream given by stream. + */ +void vcpu_dump(FILE *stream, struct kvm_vm *vm, + uint32_t vcpuid, uint8_t indent) +{ + struct kvm_regs regs; + struct kvm_sregs sregs; + + fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid); + + fprintf(stream, "%*sregs:\n", indent + 2, ""); + vcpu_regs_get(vm, vcpuid, ®s); + regs_dump(stream, ®s, indent + 4); + + fprintf(stream, "%*ssregs:\n", indent + 2, ""); + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs_dump(stream, &sregs, indent + 4); +} + +/* Known KVM exit reasons */ +static struct exit_reason { + unsigned int reason; + const char *name; +} exit_reasons_known[] = { + {KVM_EXIT_UNKNOWN, "UNKNOWN"}, + {KVM_EXIT_EXCEPTION, "EXCEPTION"}, + {KVM_EXIT_IO, "IO"}, + {KVM_EXIT_HYPERCALL, "HYPERCALL"}, + {KVM_EXIT_DEBUG, "DEBUG"}, + {KVM_EXIT_HLT, "HLT"}, + {KVM_EXIT_MMIO, "MMIO"}, + {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"}, + {KVM_EXIT_SHUTDOWN, "SHUTDOWN"}, + {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"}, + {KVM_EXIT_INTR, "INTR"}, + {KVM_EXIT_SET_TPR, "SET_TPR"}, + {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"}, + {KVM_EXIT_S390_SIEIC, "S390_SIEIC"}, + {KVM_EXIT_S390_RESET, "S390_RESET"}, + {KVM_EXIT_DCR, "DCR"}, + {KVM_EXIT_NMI, "NMI"}, + {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"}, + {KVM_EXIT_OSI, "OSI"}, + {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"}, +#ifdef KVM_EXIT_MEMORY_NOT_PRESENT + {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"}, +#endif +}; + +/* Exit Reason String + * + * Input Args: + * exit_reason - Exit reason + * + * Output Args: None + * + * Return: + * Constant string pointer describing the exit reason. + * + * Locates and returns a constant string that describes the KVM exit + * reason given by exit_reason. If no such string is found, a constant + * string of "Unknown" is returned. + */ +const char *exit_reason_str(unsigned int exit_reason) +{ + unsigned int n1; + + for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) { + if (exit_reason == exit_reasons_known[n1].reason) + return exit_reasons_known[n1].name; + } + + return "Unknown"; +} + +/* Physical Page Allocate + * + * Input Args: + * vm - Virtual Machine + * paddr_min - Physical address minimum + * memslot - Memory region to allocate page from + * + * Output Args: None + * + * Return: + * Starting physical address + * + * Within the VM specified by vm, locates an available physical page + * at or above paddr_min. If found, the page is marked as in use + * and its address is returned. A TEST_ASSERT failure occurs if no + * page is available at or above paddr_min. + */ +vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, + vm_paddr_t paddr_min, uint32_t memslot) +{ + struct userspace_mem_region *region; + sparsebit_idx_t pg; + + TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address " + "not divisible by page size.\n" + " paddr_min: 0x%lx page_size: 0x%x", + paddr_min, vm->page_size); + + /* Locate memory region. */ + region = memslot2region(vm, memslot); + + /* Locate next available physical page at or above paddr_min. */ + pg = paddr_min >> vm->page_shift; + + if (!sparsebit_is_set(region->unused_phy_pages, pg)) { + pg = sparsebit_next_set(region->unused_phy_pages, pg); + if (pg == 0) { + fprintf(stderr, "No guest physical page available, " + "paddr_min: 0x%lx page_size: 0x%x memslot: %u", + paddr_min, vm->page_size, memslot); + fputs("---- vm dump ----\n", stderr); + vm_dump(stderr, vm, 2); + abort(); + } + } + + /* Specify page as in use and return its address. */ + sparsebit_clear(region->unused_phy_pages, pg); + + return pg * vm->page_size; +} + +/* Address Guest Virtual to Host Virtual + * + * Input Args: + * vm - Virtual Machine + * gva - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent host virtual address + */ +void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva) +{ + return addr_gpa2hva(vm, addr_gva2gpa(vm, gva)); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h new file mode 100644 index 000000000000..a0bd1980c81c --- /dev/null +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -0,0 +1,67 @@ +/* + * tools/testing/selftests/kvm/lib/kvm_util.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#ifndef KVM_UTIL_INTERNAL_H +#define KVM_UTIL_INTERNAL_H 1 + +#include "sparsebit.h" + +#ifndef BITS_PER_BYTE +#define BITS_PER_BYTE 8 +#endif + +#ifndef BITS_PER_LONG +#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long)) +#endif + +#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG) + +/* Concrete definition of struct kvm_vm. */ +struct userspace_mem_region { + struct userspace_mem_region *next, *prev; + struct kvm_userspace_memory_region region; + struct sparsebit *unused_phy_pages; + int fd; + off_t offset; + void *host_mem; + void *mmap_start; + size_t mmap_size; +}; + +struct vcpu { + struct vcpu *next, *prev; + uint32_t id; + int fd; + struct kvm_run *state; +}; + +struct kvm_vm { + int mode; + int fd; + unsigned int page_size; + unsigned int page_shift; + uint64_t max_gfn; + struct vcpu *vcpu_head; + struct userspace_mem_region *userspace_mem_region_head; + struct sparsebit *vpages_valid; + struct sparsebit *vpages_mapped; + bool pgd_created; + vm_paddr_t pgd; +}; + +struct vcpu *vcpu_find(struct kvm_vm *vm, + uint32_t vcpuid); +void vcpu_setup(struct kvm_vm *vm, int vcpuid); +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent); +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent); +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent); + +#endif diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c new file mode 100644 index 000000000000..b132bc95d183 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -0,0 +1,2087 @@ +/* + * Sparse bit array + * + * Copyright (C) 2018, Google LLC. + * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver) + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * This library provides functions to support a memory efficient bit array, + * with an index size of 2^64. A sparsebit array is allocated through + * the use sparsebit_alloc() and free'd via sparsebit_free(), + * such as in the following: + * + * struct sparsebit *s; + * s = sparsebit_alloc(); + * sparsebit_free(&s); + * + * The struct sparsebit type resolves down to a struct sparsebit. + * Note that, sparsebit_free() takes a pointer to the sparsebit + * structure. This is so that sparsebit_free() is able to poison + * the pointer (e.g. set it to NULL) to the struct sparsebit before + * returning to the caller. + * + * Between the return of sparsebit_alloc() and the call of + * sparsebit_free(), there are multiple query and modifying operations + * that can be performed on the allocated sparsebit array. All of + * these operations take as a parameter the value returned from + * sparsebit_alloc() and most also take a bit index. Frequently + * used routines include: + * + * ---- Query Operations + * sparsebit_is_set(s, idx) + * sparsebit_is_clear(s, idx) + * sparsebit_any_set(s) + * sparsebit_first_set(s) + * sparsebit_next_set(s, prev_idx) + * + * ---- Modifying Operations + * sparsebit_set(s, idx) + * sparsebit_clear(s, idx) + * sparsebit_set_num(s, idx, num); + * sparsebit_clear_num(s, idx, num); + * + * A common operation, is to itterate over all the bits set in a test + * sparsebit array. This can be done via code with the following structure: + * + * sparsebit_idx_t idx; + * if (sparsebit_any_set(s)) { + * idx = sparsebit_first_set(s); + * do { + * ... + * idx = sparsebit_next_set(s, idx); + * } while (idx != 0); + * } + * + * The index of the first bit set needs to be obtained via + * sparsebit_first_set(), because sparsebit_next_set(), needs + * the index of the previously set. The sparsebit_idx_t type is + * unsigned, so there is no previous index before 0 that is available. + * Also, the call to sparsebit_first_set() is not made unless there + * is at least 1 bit in the array set. This is because sparsebit_first_set() + * aborts if sparsebit_first_set() is called with no bits set. + * It is the callers responsibility to assure that the + * sparsebit array has at least a single bit set before calling + * sparsebit_first_set(). + * + * ==== Implementation Overview ==== + * For the most part the internal implementation of sparsebit is + * opaque to the caller. One important implementation detail that the + * caller may need to be aware of is the spatial complexity of the + * implementation. This implementation of a sparsebit array is not + * only sparse, in that it uses memory proportional to the number of bits + * set. It is also efficient in memory usage when most of the bits are + * set. + * + * At a high-level the state of the bit settings are maintained through + * the use of a binary-search tree, where each node contains at least + * the following members: + * + * typedef uint64_t sparsebit_idx_t; + * typedef uint64_t sparsebit_num_t; + * + * sparsebit_idx_t idx; + * uint32_t mask; + * sparsebit_num_t num_after; + * + * The idx member contains the bit index of the first bit described by this + * node, while the mask member stores the setting of the first 32-bits. + * The setting of the bit at idx + n, where 0 <= n < 32, is located in the + * mask member at 1 << n. + * + * Nodes are sorted by idx and the bits described by two nodes will never + * overlap. The idx member is always aligned to the mask size, i.e. a + * multiple of 32. + * + * Beyond a typical implementation, the nodes in this implementation also + * contains a member named num_after. The num_after member holds the + * number of bits immediately after the mask bits that are contiguously set. + * The use of the num_after member allows this implementation to efficiently + * represent cases where most bits are set. For example, the case of all + * but the last two bits set, is represented by the following two nodes: + * + * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0 + * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0 + * + * ==== Invariants ==== + * This implementation usses the following invariants: + * + * + Node are only used to represent bits that are set. + * Nodes with a mask of 0 and num_after of 0 are not allowed. + * + * + Sum of bits set in all the nodes is equal to the value of + * the struct sparsebit_pvt num_set member. + * + * + The setting of at least one bit is always described in a nodes + * mask (mask >= 1). + * + * + A node with all mask bits set only occurs when the last bit + * described by the previous node is not equal to this nodes + * starting index - 1. All such occurences of this condition are + * avoided by moving the setting of the nodes mask bits into + * the previous nodes num_after setting. + * + * + Node starting index is evenly divisible by the number of bits + * within a nodes mask member. + * + * + Nodes never represent a range of bits that wrap around the + * highest supported index. + * + * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1) + * + * As a consequence of the above, the num_after member of a node + * will always be <=: + * + * maximum_index - nodes_starting_index - number_of_mask_bits + * + * + Nodes within the binary search tree are sorted based on each + * nodes starting index. + * + * + The range of bits described by any two nodes do not overlap. The + * range of bits described by a single node is: + * + * start: node->idx + * end (inclusive): node->idx + MASK_BITS + node->num_after - 1; + * + * Note, at times these invariants are temporarily violated for a + * specific portion of the code. For example, when setting a mask + * bit, there is a small delay between when the mask bit is set and the + * value in the struct sparsebit_pvt num_set member is updated. Other + * temporary violations occur when node_split() is called with a specified + * index and assures that a node where its mask represents the bit + * at the specified index exists. At times to do this node_split() + * must split an existing node into two nodes or create a node that + * has no bits set. Such temporary violations must be corrected before + * returning to the caller. These corrections are typically performed + * by the local function node_reduce(). + */ + +#include "test_util.h" +#include "sparsebit.h" +#include <limits.h> +#include <assert.h> + +#define DUMP_LINE_MAX 100 /* Does not include indent amount */ + +typedef uint32_t mask_t; +#define MASK_BITS (sizeof(mask_t) * CHAR_BIT) + +struct node { + struct node *parent; + struct node *left; + struct node *right; + sparsebit_idx_t idx; /* index of least-significant bit in mask */ + sparsebit_num_t num_after; /* num contiguously set after mask */ + mask_t mask; +}; + +struct sparsebit { + /* + * Points to root node of the binary search + * tree. Equal to NULL when no bits are set in + * the entire sparsebit array. + */ + struct node *root; + + /* + * A redundant count of the total number of bits set. Used for + * diagnostic purposes and to change the time complexity of + * sparsebit_num_set() from O(n) to O(1). + * Note: Due to overflow, a value of 0 means none or all set. + */ + sparsebit_num_t num_set; +}; + +/* Returns the number of set bits described by the settings + * of the node pointed to by nodep. + */ +static sparsebit_num_t node_num_set(struct node *nodep) +{ + return nodep->num_after + __builtin_popcount(nodep->mask); +} + +/* Returns a pointer to the node that describes the + * lowest bit index. + */ +static struct node *node_first(struct sparsebit *s) +{ + struct node *nodep; + + for (nodep = s->root; nodep && nodep->left; nodep = nodep->left) + ; + + return nodep; +} + +/* Returns a pointer to the node that describes the + * lowest bit index > the index of the node pointed to by np. + * Returns NULL if no node with a higher index exists. + */ +static struct node *node_next(struct sparsebit *s, struct node *np) +{ + struct node *nodep = np; + + /* + * If current node has a right child, next node is the left-most + * of the right child. + */ + if (nodep->right) { + for (nodep = nodep->right; nodep->left; nodep = nodep->left) + ; + return nodep; + } + + /* + * No right child. Go up until node is left child of a parent. + * That parent is then the next node. + */ + while (nodep->parent && nodep == nodep->parent->right) + nodep = nodep->parent; + + return nodep->parent; +} + +/* Searches for and returns a pointer to the node that describes the + * highest index < the index of the node pointed to by np. + * Returns NULL if no node with a lower index exists. + */ +static struct node *node_prev(struct sparsebit *s, struct node *np) +{ + struct node *nodep = np; + + /* + * If current node has a left child, next node is the right-most + * of the left child. + */ + if (nodep->left) { + for (nodep = nodep->left; nodep->right; nodep = nodep->right) + ; + return (struct node *) nodep; + } + + /* + * No left child. Go up until node is right child of a parent. + * That parent is then the next node. + */ + while (nodep->parent && nodep == nodep->parent->left) + nodep = nodep->parent; + + return (struct node *) nodep->parent; +} + + +/* Allocates space to hold a copy of the node sub-tree pointed to by + * subtree and duplicates the bit settings to the newly allocated nodes. + * Returns the newly allocated copy of subtree. + */ +static struct node *node_copy_subtree(struct node *subtree) +{ + struct node *root; + + /* Duplicate the node at the root of the subtree */ + root = calloc(1, sizeof(*root)); + if (!root) { + perror("calloc"); + abort(); + } + + root->idx = subtree->idx; + root->mask = subtree->mask; + root->num_after = subtree->num_after; + + /* As needed, recursively duplicate the left and right subtrees */ + if (subtree->left) { + root->left = node_copy_subtree(subtree->left); + root->left->parent = root; + } + + if (subtree->right) { + root->right = node_copy_subtree(subtree->right); + root->right->parent = root; + } + + return root; +} + +/* Searches for and returns a pointer to the node that describes the setting + * of the bit given by idx. A node describes the setting of a bit if its + * index is within the bits described by the mask bits or the number of + * contiguous bits set after the mask. Returns NULL if there is no such node. + */ +static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Find the node that describes the setting of the bit at idx */ + for (nodep = s->root; nodep; + nodep = nodep->idx > idx ? nodep->left : nodep->right) { + if (idx >= nodep->idx && + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) + break; + } + + return nodep; +} + +/* Entry Requirements: + * + A node that describes the setting of idx is not already present. + * + * Adds a new node to describe the setting of the bit at the index given + * by idx. Returns a pointer to the newly added node. + * + * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced. + */ +static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep, *parentp, *prev; + + /* Allocate and initialize the new node. */ + nodep = calloc(1, sizeof(*nodep)); + if (!nodep) { + perror("calloc"); + abort(); + } + + nodep->idx = idx & -MASK_BITS; + + /* If no nodes, set it up as the root node. */ + if (!s->root) { + s->root = nodep; + return nodep; + } + + /* + * Find the parent where the new node should be attached + * and add the node there. + */ + parentp = s->root; + while (true) { + if (idx < parentp->idx) { + if (!parentp->left) { + parentp->left = nodep; + nodep->parent = parentp; + break; + } + parentp = parentp->left; + } else { + assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1); + if (!parentp->right) { + parentp->right = nodep; + nodep->parent = parentp; + break; + } + parentp = parentp->right; + } + } + + /* + * Does num_after bits of previous node overlap with the mask + * of the new node? If so set the bits in the new nodes mask + * and reduce the previous nodes num_after. + */ + prev = node_prev(s, nodep); + while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) { + unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1) + - nodep->idx; + assert(prev->num_after > 0); + assert(n1 < MASK_BITS); + assert(!(nodep->mask & (1 << n1))); + nodep->mask |= (1 << n1); + prev->num_after--; + } + + return nodep; +} + +/* Returns whether all the bits in the sparsebit array are set. */ +bool sparsebit_all_set(struct sparsebit *s) +{ + /* + * If any nodes there must be at least one bit set. Only case + * where a bit is set and total num set is 0, is when all bits + * are set. + */ + return s->root && s->num_set == 0; +} + +/* Clears all bits described by the node pointed to by nodep, then + * removes the node. + */ +static void node_rm(struct sparsebit *s, struct node *nodep) +{ + struct node *tmp; + sparsebit_num_t num_set; + + num_set = node_num_set(nodep); + assert(s->num_set >= num_set || sparsebit_all_set(s)); + s->num_set -= node_num_set(nodep); + + /* Have both left and right child */ + if (nodep->left && nodep->right) { + /* + * Move left children to the leftmost leaf node + * of the right child. + */ + for (tmp = nodep->right; tmp->left; tmp = tmp->left) + ; + tmp->left = nodep->left; + nodep->left = NULL; + tmp->left->parent = tmp; + } + + /* Left only child */ + if (nodep->left) { + if (!nodep->parent) { + s->root = nodep->left; + nodep->left->parent = NULL; + } else { + nodep->left->parent = nodep->parent; + if (nodep == nodep->parent->left) + nodep->parent->left = nodep->left; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = nodep->left; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; + } + + + /* Right only child */ + if (nodep->right) { + if (!nodep->parent) { + s->root = nodep->right; + nodep->right->parent = NULL; + } else { + nodep->right->parent = nodep->parent; + if (nodep == nodep->parent->left) + nodep->parent->left = nodep->right; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = nodep->right; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; + } + + /* Leaf Node */ + if (!nodep->parent) { + s->root = NULL; + } else { + if (nodep->parent->left == nodep) + nodep->parent->left = NULL; + else { + assert(nodep == nodep->parent->right); + nodep->parent->right = NULL; + } + } + + nodep->parent = nodep->left = nodep->right = NULL; + free(nodep); + + return; +} + +/* Splits the node containing the bit at idx so that there is a node + * that starts at the specified index. If no such node exists, a new + * node at the specified index is created. Returns the new node. + * + * idx must start of a mask boundary. + */ +static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep1, *nodep2; + sparsebit_idx_t offset; + sparsebit_num_t orig_num_after; + + assert(!(idx % MASK_BITS)); + + /* + * Is there a node that describes the setting of idx? + * If not, add it. + */ + nodep1 = node_find(s, idx); + if (!nodep1) + return node_add(s, idx); + + /* + * All done if the starting index of the node is where the + * split should occur. + */ + if (nodep1->idx == idx) + return nodep1; + + /* + * Split point not at start of mask, so it must be part of + * bits described by num_after. + */ + + /* + * Calculate offset within num_after for where the split is + * to occur. + */ + offset = idx - (nodep1->idx + MASK_BITS); + orig_num_after = nodep1->num_after; + + /* + * Add a new node to describe the bits starting at + * the split point. + */ + nodep1->num_after = offset; + nodep2 = node_add(s, idx); + + /* Move bits after the split point into the new node */ + nodep2->num_after = orig_num_after - offset; + if (nodep2->num_after >= MASK_BITS) { + nodep2->mask = ~(mask_t) 0; + nodep2->num_after -= MASK_BITS; + } else { + nodep2->mask = (1 << nodep2->num_after) - 1; + nodep2->num_after = 0; + } + + return nodep2; +} + +/* Iteratively reduces the node pointed to by nodep and its adjacent + * nodes into a more compact form. For example, a node with a mask with + * all bits set adjacent to a previous node, will get combined into a + * single node with an increased num_after setting. + * + * After each reduction, a further check is made to see if additional + * reductions are possible with the new previous and next nodes. Note, + * a search for a reduction is only done across the nodes nearest nodep + * and those that became part of a reduction. Reductions beyond nodep + * and the adjacent nodes that are reduced are not discovered. It is the + * responsibility of the caller to pass a nodep that is within one node + * of each possible reduction. + * + * This function does not fix the temporary violation of all invariants. + * For example it does not fix the case where the bit settings described + * by two or more nodes overlap. Such a violation introduces the potential + * complication of a bit setting for a specific index having different settings + * in different nodes. This would then introduce the further complication + * of which node has the correct setting of the bit and thus such conditions + * are not allowed. + * + * This function is designed to fix invariant violations that are introduced + * by node_split() and by changes to the nodes mask or num_after members. + * For example, when setting a bit within a nodes mask, the function that + * sets the bit doesn't have to worry about whether the setting of that + * bit caused the mask to have leading only or trailing only bits set. + * Instead, the function can call node_reduce(), with nodep equal to the + * node address that it set a mask bit in, and node_reduce() will notice + * the cases of leading or trailing only bits and that there is an + * adjacent node that the bit settings could be merged into. + * + * This implementation specifically detects and corrects violation of the + * following invariants: + * + * + Node are only used to represent bits that are set. + * Nodes with a mask of 0 and num_after of 0 are not allowed. + * + * + The setting of at least one bit is always described in a nodes + * mask (mask >= 1). + * + * + A node with all mask bits set only occurs when the last bit + * described by the previous node is not equal to this nodes + * starting index - 1. All such occurences of this condition are + * avoided by moving the setting of the nodes mask bits into + * the previous nodes num_after setting. + */ +static void node_reduce(struct sparsebit *s, struct node *nodep) +{ + bool reduction_performed; + + do { + reduction_performed = false; + struct node *prev, *next, *tmp; + + /* 1) Potential reductions within the current node. */ + + /* Nodes with all bits cleared may be removed. */ + if (nodep->mask == 0 && nodep->num_after == 0) { + /* + * About to remove the node pointed to by + * nodep, which normally would cause a problem + * for the next pass through the reduction loop, + * because the node at the starting point no longer + * exists. This potential problem is handled + * by first remembering the location of the next + * or previous nodes. Doesn't matter which, because + * once the node at nodep is removed, there will be + * no other nodes between prev and next. + * + * Note, the checks performed on nodep against both + * both prev and next both check for an adjacent + * node that can be reduced into a single node. As + * such, after removing the node at nodep, doesn't + * matter whether the nodep for the next pass + * through the loop is equal to the previous pass + * prev or next node. Either way, on the next pass + * the one not selected will become either the + * prev or next node. + */ + tmp = node_next(s, nodep); + if (!tmp) + tmp = node_prev(s, nodep); + + node_rm(s, nodep); + nodep = NULL; + + nodep = tmp; + reduction_performed = true; + continue; + } + + /* + * When the mask is 0, can reduce the amount of num_after + * bits by moving the initial num_after bits into the mask. + */ + if (nodep->mask == 0) { + assert(nodep->num_after != 0); + assert(nodep->idx + MASK_BITS > nodep->idx); + + nodep->idx += MASK_BITS; + + if (nodep->num_after >= MASK_BITS) { + nodep->mask = ~0; + nodep->num_after -= MASK_BITS; + } else { + nodep->mask = (1u << nodep->num_after) - 1; + nodep->num_after = 0; + } + + reduction_performed = true; + continue; + } + + /* + * 2) Potential reductions between the current and + * previous nodes. + */ + prev = node_prev(s, nodep); + if (prev) { + sparsebit_idx_t prev_highest_bit; + + /* Nodes with no bits set can be removed. */ + if (prev->mask == 0 && prev->num_after == 0) { + node_rm(s, prev); + + reduction_performed = true; + continue; + } + + /* + * All mask bits set and previous node has + * adjacent index. + */ + if (nodep->mask + 1 == 0 && + prev->idx + MASK_BITS == nodep->idx) { + prev->num_after += MASK_BITS + nodep->num_after; + nodep->mask = 0; + nodep->num_after = 0; + + reduction_performed = true; + continue; + } + + /* + * Is node adjacent to previous node and the node + * contains a single contiguous range of bits + * starting from the beginning of the mask? + */ + prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after; + if (prev_highest_bit + 1 == nodep->idx && + (nodep->mask | (nodep->mask >> 1)) == nodep->mask) { + /* + * How many contiguous bits are there? + * Is equal to the total number of set + * bits, due to an earlier check that + * there is a single contiguous range of + * set bits. + */ + unsigned int num_contiguous + = __builtin_popcount(nodep->mask); + assert((num_contiguous > 0) && + ((1ULL << num_contiguous) - 1) == nodep->mask); + + prev->num_after += num_contiguous; + nodep->mask = 0; + + /* + * For predictable performance, handle special + * case where all mask bits are set and there + * is a non-zero num_after setting. This code + * is functionally correct without the following + * conditionalized statements, but without them + * the value of num_after is only reduced by + * the number of mask bits per pass. There are + * cases where num_after can be close to 2^64. + * Without this code it could take nearly + * (2^64) / 32 passes to perform the full + * reduction. + */ + if (num_contiguous == MASK_BITS) { + prev->num_after += nodep->num_after; + nodep->num_after = 0; + } + + reduction_performed = true; + continue; + } + } + + /* + * 3) Potential reductions between the current and + * next nodes. + */ + next = node_next(s, nodep); + if (next) { + /* Nodes with no bits set can be removed. */ + if (next->mask == 0 && next->num_after == 0) { + node_rm(s, next); + reduction_performed = true; + continue; + } + + /* + * Is next node index adjacent to current node + * and has a mask with all bits set? + */ + if (next->idx == nodep->idx + MASK_BITS + nodep->num_after && + next->mask == ~(mask_t) 0) { + nodep->num_after += MASK_BITS; + next->mask = 0; + nodep->num_after += next->num_after; + next->num_after = 0; + + node_rm(s, next); + next = NULL; + + reduction_performed = true; + continue; + } + } + } while (nodep && reduction_performed); +} + +/* Returns whether the bit at the index given by idx, within the + * sparsebit array is set or not. + */ +bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Find the node that describes the setting of the bit at idx */ + for (nodep = s->root; nodep; + nodep = nodep->idx > idx ? nodep->left : nodep->right) + if (idx >= nodep->idx && + idx <= nodep->idx + MASK_BITS + nodep->num_after - 1) + goto have_node; + + return false; + +have_node: + /* Bit is set if it is any of the bits described by num_after */ + if (nodep->num_after && idx >= nodep->idx + MASK_BITS) + return true; + + /* Is the corresponding mask bit set */ + assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS); + return !!(nodep->mask & (1 << (idx - nodep->idx))); +} + +/* Within the sparsebit array pointed to by s, sets the bit + * at the index given by idx. + */ +static void bit_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Skip bits that are already set */ + if (sparsebit_is_set(s, idx)) + return; + + /* + * Get a node where the bit at idx is described by the mask. + * The node_split will also create a node, if there isn't + * already a node that describes the setting of bit. + */ + nodep = node_split(s, idx & -MASK_BITS); + + /* Set the bit within the nodes mask */ + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); + assert(!(nodep->mask & (1 << (idx - nodep->idx)))); + nodep->mask |= 1 << (idx - nodep->idx); + s->num_set++; + + node_reduce(s, nodep); +} + +/* Within the sparsebit array pointed to by s, clears the bit + * at the index given by idx. + */ +static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx) +{ + struct node *nodep; + + /* Skip bits that are already cleared */ + if (!sparsebit_is_set(s, idx)) + return; + + /* Is there a node that describes the setting of this bit? */ + nodep = node_find(s, idx); + if (!nodep) + return; + + /* + * If a num_after bit, split the node, so that the bit is + * part of a node mask. + */ + if (idx >= nodep->idx + MASK_BITS) + nodep = node_split(s, idx & -MASK_BITS); + + /* + * After node_split above, bit at idx should be within the mask. + * Clear that bit. + */ + assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1); + assert(nodep->mask & (1 << (idx - nodep->idx))); + nodep->mask &= ~(1 << (idx - nodep->idx)); + assert(s->num_set > 0 || sparsebit_all_set(s)); + s->num_set--; + + node_reduce(s, nodep); +} + +/* Recursively dumps to the FILE stream given by stream the contents + * of the sub-tree of nodes pointed to by nodep. Each line of output + * is prefixed by the number of spaces given by indent. On each + * recursion, the indent amount is increased by 2. This causes nodes + * at each level deeper into the binary search tree to be displayed + * with a greater indent. + */ +static void dump_nodes(FILE *stream, struct node *nodep, + unsigned int indent) +{ + char *node_type; + + /* Dump contents of node */ + if (!nodep->parent) + node_type = "root"; + else if (nodep == nodep->parent->left) + node_type = "left"; + else { + assert(nodep == nodep->parent->right); + node_type = "right"; + } + fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep); + fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "", + nodep->parent, nodep->left, nodep->right); + fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n", + indent, "", nodep->idx, nodep->mask, nodep->num_after); + + /* If present, dump contents of left child nodes */ + if (nodep->left) + dump_nodes(stream, nodep->left, indent + 2); + + /* If present, dump contents of right child nodes */ + if (nodep->right) + dump_nodes(stream, nodep->right, indent + 2); +} + +static inline sparsebit_idx_t node_first_set(struct node *nodep, int start) +{ + mask_t leading = (mask_t)1 << start; + int n1 = __builtin_ctz(nodep->mask & -leading); + + return nodep->idx + n1; +} + +static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start) +{ + mask_t leading = (mask_t)1 << start; + int n1 = __builtin_ctz(~nodep->mask & -leading); + + return nodep->idx + n1; +} + +/* Dumps to the FILE stream specified by stream, the implementation dependent + * internal state of s. Each line of output is prefixed with the number + * of spaces given by indent. The output is completely implementation + * dependent and subject to change. Output from this function should only + * be used for diagnostic purposes. For example, this function can be + * used by test cases after they detect an unexpected condition, as a means + * to capture diagnostic information. + */ +static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s, + unsigned int indent) +{ + /* Dump the contents of s */ + fprintf(stream, "%*sroot: %p\n", indent, "", s->root); + fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set); + + if (s->root) + dump_nodes(stream, s->root, indent); +} + +/* Allocates and returns a new sparsebit array. The initial state + * of the newly allocated sparsebit array has all bits cleared. + */ +struct sparsebit *sparsebit_alloc(void) +{ + struct sparsebit *s; + + /* Allocate top level structure. */ + s = calloc(1, sizeof(*s)); + if (!s) { + perror("calloc"); + abort(); + } + + return s; +} + +/* Frees the implementation dependent data for the sparsebit array + * pointed to by s and poisons the pointer to that data. + */ +void sparsebit_free(struct sparsebit **sbitp) +{ + struct sparsebit *s = *sbitp; + + if (!s) + return; + + sparsebit_clear_all(s); + free(s); + *sbitp = NULL; +} + +/* Makes a copy of the sparsebit array given by s, to the sparsebit + * array given by d. Note, d must have already been allocated via + * sparsebit_alloc(). It can though already have bits set, which + * if different from src will be cleared. + */ +void sparsebit_copy(struct sparsebit *d, struct sparsebit *s) +{ + /* First clear any bits already set in the destination */ + sparsebit_clear_all(d); + + if (s->root) { + d->root = node_copy_subtree(s->root); + d->num_set = s->num_set; + } +} + +/* Returns whether num consecutive bits starting at idx are all set. */ +bool sparsebit_is_set_num(struct sparsebit *s, + sparsebit_idx_t idx, sparsebit_num_t num) +{ + sparsebit_idx_t next_cleared; + + assert(num > 0); + assert(idx + num - 1 >= idx); + + /* With num > 0, the first bit must be set. */ + if (!sparsebit_is_set(s, idx)) + return false; + + /* Find the next cleared bit */ + next_cleared = sparsebit_next_clear(s, idx); + + /* + * If no cleared bits beyond idx, then there are at least num + * set bits. idx + num doesn't wrap. Otherwise check if + * there are enough set bits between idx and the next cleared bit. + */ + return next_cleared == 0 || next_cleared - idx >= num; +} + +/* Returns whether the bit at the index given by idx. */ +bool sparsebit_is_clear(struct sparsebit *s, + sparsebit_idx_t idx) +{ + return !sparsebit_is_set(s, idx); +} + +/* Returns whether num consecutive bits starting at idx are all cleared. */ +bool sparsebit_is_clear_num(struct sparsebit *s, + sparsebit_idx_t idx, sparsebit_num_t num) +{ + sparsebit_idx_t next_set; + + assert(num > 0); + assert(idx + num - 1 >= idx); + + /* With num > 0, the first bit must be cleared. */ + if (!sparsebit_is_clear(s, idx)) + return false; + + /* Find the next set bit */ + next_set = sparsebit_next_set(s, idx); + + /* + * If no set bits beyond idx, then there are at least num + * cleared bits. idx + num doesn't wrap. Otherwise check if + * there are enough cleared bits between idx and the next set bit. + */ + return next_set == 0 || next_set - idx >= num; +} + +/* Returns the total number of bits set. Note: 0 is also returned for + * the case of all bits set. This is because with all bits set, there + * is 1 additional bit set beyond what can be represented in the return + * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0, + * to determine if the sparsebit array has any bits set. + */ +sparsebit_num_t sparsebit_num_set(struct sparsebit *s) +{ + return s->num_set; +} + +/* Returns whether any bit is set in the sparsebit array. */ +bool sparsebit_any_set(struct sparsebit *s) +{ + /* + * Nodes only describe set bits. If any nodes then there + * is at least 1 bit set. + */ + if (!s->root) + return false; + + /* + * Every node should have a non-zero mask. For now will + * just assure that the root node has a non-zero mask, + * which is a quick check that at least 1 bit is set. + */ + assert(s->root->mask != 0); + assert(s->num_set > 0 || + (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS && + s->root->mask == ~(mask_t) 0)); + + return true; +} + +/* Returns whether all the bits in the sparsebit array are cleared. */ +bool sparsebit_all_clear(struct sparsebit *s) +{ + return !sparsebit_any_set(s); +} + +/* Returns whether all the bits in the sparsebit array are set. */ +bool sparsebit_any_clear(struct sparsebit *s) +{ + return !sparsebit_all_set(s); +} + +/* Returns the index of the first set bit. Abort if no bits are set. + */ +sparsebit_idx_t sparsebit_first_set(struct sparsebit *s) +{ + struct node *nodep; + + /* Validate at least 1 bit is set */ + assert(sparsebit_any_set(s)); + + nodep = node_first(s); + return node_first_set(nodep, 0); +} + +/* Returns the index of the first cleared bit. Abort if + * no bits are cleared. + */ +sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s) +{ + struct node *nodep1, *nodep2; + + /* Validate at least 1 bit is cleared. */ + assert(sparsebit_any_clear(s)); + + /* If no nodes or first node index > 0 then lowest cleared is 0 */ + nodep1 = node_first(s); + if (!nodep1 || nodep1->idx > 0) + return 0; + + /* Does the mask in the first node contain any cleared bits. */ + if (nodep1->mask != ~(mask_t) 0) + return node_first_clear(nodep1, 0); + + /* + * All mask bits set in first node. If there isn't a second node + * then the first cleared bit is the first bit after the bits + * described by the first node. + */ + nodep2 = node_next(s, nodep1); + if (!nodep2) { + /* + * No second node. First cleared bit is first bit beyond + * bits described by first node. + */ + assert(nodep1->mask == ~(mask_t) 0); + assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0); + return nodep1->idx + MASK_BITS + nodep1->num_after; + } + + /* + * There is a second node. + * If it is not adjacent to the first node, then there is a gap + * of cleared bits between the nodes, and the first cleared bit + * is the first bit within the gap. + */ + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * Second node is adjacent to the first node. + * Because it is adjacent, its mask should be non-zero. If all + * its mask bits are set, then with it being adjacent, it should + * have had the mask bits moved into the num_after setting of the + * previous node. + */ + return node_first_clear(nodep2, 0); +} + +/* Returns index of next bit set within s after the index given by prev. + * Returns 0 if there are no bits after prev that are set. + */ +sparsebit_idx_t sparsebit_next_set(struct sparsebit *s, + sparsebit_idx_t prev) +{ + sparsebit_idx_t lowest_possible = prev + 1; + sparsebit_idx_t start; + struct node *nodep; + + /* A bit after the highest index can't be set. */ + if (lowest_possible == 0) + return 0; + + /* + * Find the leftmost 'candidate' overlapping or to the right + * of lowest_possible. + */ + struct node *candidate = NULL; + + /* True iff lowest_possible is within candidate */ + bool contains = false; + + /* + * Find node that describes setting of bit at lowest_possible. + * If such a node doesn't exist, find the node with the lowest + * starting index that is > lowest_possible. + */ + for (nodep = s->root; nodep;) { + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) + >= lowest_possible) { + candidate = nodep; + if (candidate->idx <= lowest_possible) { + contains = true; + break; + } + nodep = nodep->left; + } else { + nodep = nodep->right; + } + } + if (!candidate) + return 0; + + assert(candidate->mask != 0); + + /* Does the candidate node describe the setting of lowest_possible? */ + if (!contains) { + /* + * Candidate doesn't describe setting of bit at lowest_possible. + * Candidate points to the first node with a starting index + * > lowest_possible. + */ + assert(candidate->idx > lowest_possible); + + return node_first_set(candidate, 0); + } + + /* + * Candidate describes setting of bit at lowest_possible. + * Note: although the node describes the setting of the bit + * at lowest_possible, its possible that its setting and the + * setting of all latter bits described by this node are 0. + * For now, just handle the cases where this node describes + * a bit at or after an index of lowest_possible that is set. + */ + start = lowest_possible - candidate->idx; + + if (start < MASK_BITS && candidate->mask >= (1 << start)) + return node_first_set(candidate, start); + + if (candidate->num_after) { + sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS; + + return lowest_possible < first_num_after_idx + ? first_num_after_idx : lowest_possible; + } + + /* + * Although candidate node describes setting of bit at + * the index of lowest_possible, all bits at that index and + * latter that are described by candidate are cleared. With + * this, the next bit is the first bit in the next node, if + * such a node exists. If a next node doesn't exist, then + * there is no next set bit. + */ + candidate = node_next(s, candidate); + if (!candidate) + return 0; + + return node_first_set(candidate, 0); +} + +/* Returns index of next bit cleared within s after the index given by prev. + * Returns 0 if there are no bits after prev that are cleared. + */ +sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s, + sparsebit_idx_t prev) +{ + sparsebit_idx_t lowest_possible = prev + 1; + sparsebit_idx_t idx; + struct node *nodep1, *nodep2; + + /* A bit after the highest index can't be set. */ + if (lowest_possible == 0) + return 0; + + /* + * Does a node describing the setting of lowest_possible exist? + * If not, the bit at lowest_possible is cleared. + */ + nodep1 = node_find(s, lowest_possible); + if (!nodep1) + return lowest_possible; + + /* Does a mask bit in node 1 describe the next cleared bit. */ + for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++) + if (!(nodep1->mask & (1 << idx))) + return nodep1->idx + idx; + + /* + * Next cleared bit is not described by node 1. If there + * isn't a next node, then next cleared bit is described + * by bit after the bits described by the first node. + */ + nodep2 = node_next(s, nodep1); + if (!nodep2) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * There is a second node. + * If it is not adjacent to the first node, then there is a gap + * of cleared bits between the nodes, and the next cleared bit + * is the first bit within the gap. + */ + if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx) + return nodep1->idx + MASK_BITS + nodep1->num_after; + + /* + * Second node is adjacent to the first node. + * Because it is adjacent, its mask should be non-zero. If all + * its mask bits are set, then with it being adjacent, it should + * have had the mask bits moved into the num_after setting of the + * previous node. + */ + return node_first_clear(nodep2, 0); +} + +/* Starting with the index 1 greater than the index given by start, finds + * and returns the index of the first sequence of num consecutively set + * bits. Returns a value of 0 of no such sequence exists. + */ +sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + sparsebit_idx_t idx; + + assert(num >= 1); + + for (idx = sparsebit_next_set(s, start); + idx != 0 && idx + num - 1 >= idx; + idx = sparsebit_next_set(s, idx)) { + assert(sparsebit_is_set(s, idx)); + + /* + * Does the sequence of bits starting at idx consist of + * num set bits? + */ + if (sparsebit_is_set_num(s, idx, num)) + return idx; + + /* + * Sequence of set bits at idx isn't large enough. + * Skip this entire sequence of set bits. + */ + idx = sparsebit_next_clear(s, idx); + if (idx == 0) + return 0; + } + + return 0; +} + +/* Starting with the index 1 greater than the index given by start, finds + * and returns the index of the first sequence of num consecutively cleared + * bits. Returns a value of 0 of no such sequence exists. + */ +sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + sparsebit_idx_t idx; + + assert(num >= 1); + + for (idx = sparsebit_next_clear(s, start); + idx != 0 && idx + num - 1 >= idx; + idx = sparsebit_next_clear(s, idx)) { + assert(sparsebit_is_clear(s, idx)); + + /* + * Does the sequence of bits starting at idx consist of + * num cleared bits? + */ + if (sparsebit_is_clear_num(s, idx, num)) + return idx; + + /* + * Sequence of cleared bits at idx isn't large enough. + * Skip this entire sequence of cleared bits. + */ + idx = sparsebit_next_set(s, idx); + if (idx == 0) + return 0; + } + + return 0; +} + +/* Sets the bits * in the inclusive range idx through idx + num - 1. */ +void sparsebit_set_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + struct node *nodep, *next; + unsigned int n1; + sparsebit_idx_t idx; + sparsebit_num_t n; + sparsebit_idx_t middle_start, middle_end; + + assert(num > 0); + assert(start + num - 1 >= start); + + /* + * Leading - bits before first mask boundary. + * + * TODO(lhuemill): With some effort it may be possible to + * replace the following loop with a sequential sequence + * of statements. High level sequence would be: + * + * 1. Use node_split() to force node that describes setting + * of idx to be within the mask portion of a node. + * 2. Form mask of bits to be set. + * 3. Determine number of mask bits already set in the node + * and store in a local variable named num_already_set. + * 4. Set the appropriate mask bits within the node. + * 5. Increment struct sparsebit_pvt num_set member + * by the number of bits that were actually set. + * Exclude from the counts bits that were already set. + * 6. Before returning to the caller, use node_reduce() to + * handle the multiple corner cases that this method + * introduces. + */ + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) + bit_set(s, idx); + + /* Middle - bits spanning one or more entire mask */ + middle_start = idx; + middle_end = middle_start + (n & -MASK_BITS) - 1; + if (n >= MASK_BITS) { + nodep = node_split(s, middle_start); + + /* + * As needed, split just after end of middle bits. + * No split needed if end of middle bits is at highest + * supported bit index. + */ + if (middle_end + 1 > middle_end) + (void) node_split(s, middle_end + 1); + + /* Delete nodes that only describe bits within the middle. */ + for (next = node_next(s, nodep); + next && (next->idx < middle_end); + next = node_next(s, nodep)) { + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); + node_rm(s, next); + next = NULL; + } + + /* As needed set each of the mask bits */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (!(nodep->mask & (1 << n1))) { + nodep->mask |= 1 << n1; + s->num_set++; + } + } + + s->num_set -= nodep->num_after; + nodep->num_after = middle_end - middle_start + 1 - MASK_BITS; + s->num_set += nodep->num_after; + + node_reduce(s, nodep); + } + idx = middle_end + 1; + n -= middle_end - middle_start + 1; + + /* Trailing - bits at and beyond last mask boundary */ + assert(n < MASK_BITS); + for (; n > 0; idx++, n--) + bit_set(s, idx); +} + +/* Clears the bits * in the inclusive range idx through idx + num - 1. */ +void sparsebit_clear_num(struct sparsebit *s, + sparsebit_idx_t start, sparsebit_num_t num) +{ + struct node *nodep, *next; + unsigned int n1; + sparsebit_idx_t idx; + sparsebit_num_t n; + sparsebit_idx_t middle_start, middle_end; + + assert(num > 0); + assert(start + num - 1 >= start); + + /* Leading - bits before first mask boundary */ + for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--) + bit_clear(s, idx); + + /* Middle - bits spanning one or more entire mask */ + middle_start = idx; + middle_end = middle_start + (n & -MASK_BITS) - 1; + if (n >= MASK_BITS) { + nodep = node_split(s, middle_start); + + /* + * As needed, split just after end of middle bits. + * No split needed if end of middle bits is at highest + * supported bit index. + */ + if (middle_end + 1 > middle_end) + (void) node_split(s, middle_end + 1); + + /* Delete nodes that only describe bits within the middle. */ + for (next = node_next(s, nodep); + next && (next->idx < middle_end); + next = node_next(s, nodep)) { + assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end); + node_rm(s, next); + next = NULL; + } + + /* As needed clear each of the mask bits */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) { + nodep->mask &= ~(1 << n1); + s->num_set--; + } + } + + /* Clear any bits described by num_after */ + s->num_set -= nodep->num_after; + nodep->num_after = 0; + + /* + * Delete the node that describes the beginning of + * the middle bits and perform any allowed reductions + * with the nodes prev or next of nodep. + */ + node_reduce(s, nodep); + nodep = NULL; + } + idx = middle_end + 1; + n -= middle_end - middle_start + 1; + + /* Trailing - bits at and beyond last mask boundary */ + assert(n < MASK_BITS); + for (; n > 0; idx++, n--) + bit_clear(s, idx); +} + +/* Sets the bit at the index given by idx. */ +void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx) +{ + sparsebit_set_num(s, idx, 1); +} + +/* Clears the bit at the index given by idx. */ +void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx) +{ + sparsebit_clear_num(s, idx, 1); +} + +/* Sets the bits in the entire addressable range of the sparsebit array. */ +void sparsebit_set_all(struct sparsebit *s) +{ + sparsebit_set(s, 0); + sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0); + assert(sparsebit_all_set(s)); +} + +/* Clears the bits in the entire addressable range of the sparsebit array. */ +void sparsebit_clear_all(struct sparsebit *s) +{ + sparsebit_clear(s, 0); + sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0); + assert(!sparsebit_any_set(s)); +} + +static size_t display_range(FILE *stream, sparsebit_idx_t low, + sparsebit_idx_t high, bool prepend_comma_space) +{ + char *fmt_str; + size_t sz; + + /* Determine the printf format string */ + if (low == high) + fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx"; + else + fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx"; + + /* + * When stream is NULL, just determine the size of what would + * have been printed, else print the range. + */ + if (!stream) + sz = snprintf(NULL, 0, fmt_str, low, high); + else + sz = fprintf(stream, fmt_str, low, high); + + return sz; +} + + +/* Dumps to the FILE stream given by stream, the bit settings + * of s. Each line of output is prefixed with the number of + * spaces given by indent. The length of each line is implementation + * dependent and does not depend on the indent amount. The following + * is an example output of a sparsebit array that has bits: + * + * 0x5, 0x8, 0xa:0xe, 0x12 + * + * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18 + * are set. Note that a ':', instead of a '-' is used to specify a range of + * contiguous bits. This is done because '-' is used to specify command-line + * options, and sometimes ranges are specified as command-line arguments. + */ +void sparsebit_dump(FILE *stream, struct sparsebit *s, + unsigned int indent) +{ + size_t current_line_len = 0; + size_t sz; + struct node *nodep; + + if (!sparsebit_any_set(s)) + return; + + /* Display initial indent */ + fprintf(stream, "%*s", indent, ""); + + /* For each node */ + for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) { + unsigned int n1; + sparsebit_idx_t low, high; + + /* For each group of bits in the mask */ + for (n1 = 0; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) { + low = high = nodep->idx + n1; + + for (; n1 < MASK_BITS; n1++) { + if (nodep->mask & (1 << n1)) + high = nodep->idx + n1; + else + break; + } + + if ((n1 == MASK_BITS) && nodep->num_after) + high += nodep->num_after; + + /* + * How much room will it take to display + * this range. + */ + sz = display_range(NULL, low, high, + current_line_len != 0); + + /* + * If there is not enough room, display + * a newline plus the indent of the next + * line. + */ + if (current_line_len + sz > DUMP_LINE_MAX) { + fputs("\n", stream); + fprintf(stream, "%*s", indent, ""); + current_line_len = 0; + } + + /* Display the range */ + sz = display_range(stream, low, high, + current_line_len != 0); + current_line_len += sz; + } + } + + /* + * If num_after and most significant-bit of mask is not + * set, then still need to display a range for the bits + * described by num_after. + */ + if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) { + low = nodep->idx + MASK_BITS; + high = nodep->idx + MASK_BITS + nodep->num_after - 1; + + /* + * How much room will it take to display + * this range. + */ + sz = display_range(NULL, low, high, + current_line_len != 0); + + /* + * If there is not enough room, display + * a newline plus the indent of the next + * line. + */ + if (current_line_len + sz > DUMP_LINE_MAX) { + fputs("\n", stream); + fprintf(stream, "%*s", indent, ""); + current_line_len = 0; + } + + /* Display the range */ + sz = display_range(stream, low, high, + current_line_len != 0); + current_line_len += sz; + } + } + fputs("\n", stream); +} + +/* Validates the internal state of the sparsebit array given by + * s. On error, diagnostic information is printed to stderr and + * abort is called. + */ +void sparsebit_validate_internal(struct sparsebit *s) +{ + bool error_detected = false; + struct node *nodep, *prev = NULL; + sparsebit_num_t total_bits_set = 0; + unsigned int n1; + + /* For each node */ + for (nodep = node_first(s); nodep; + prev = nodep, nodep = node_next(s, nodep)) { + + /* + * Increase total bits set by the number of bits set + * in this node. + */ + for (n1 = 0; n1 < MASK_BITS; n1++) + if (nodep->mask & (1 << n1)) + total_bits_set++; + + total_bits_set += nodep->num_after; + + /* + * Arbitrary choice as to whether a mask of 0 is allowed + * or not. For diagnostic purposes it is beneficial to + * have only one valid means to represent a set of bits. + * To support this an arbitrary choice has been made + * to not allow a mask of zero. + */ + if (nodep->mask == 0) { + fprintf(stderr, "Node mask of zero, " + "nodep: %p nodep->mask: 0x%x", + nodep, nodep->mask); + error_detected = true; + break; + } + + /* + * Validate num_after is not greater than the max index + * - the number of mask bits. The num_after member + * uses 0-based indexing and thus has no value that + * represents all bits set. This limitation is handled + * by requiring a non-zero mask. With a non-zero mask, + * MASK_BITS worth of bits are described by the mask, + * which makes the largest needed num_after equal to: + * + * (~(sparsebit_num_t) 0) - MASK_BITS + 1 + */ + if (nodep->num_after + > (~(sparsebit_num_t) 0) - MASK_BITS + 1) { + fprintf(stderr, "num_after too large, " + "nodep: %p nodep->num_after: 0x%lx", + nodep, nodep->num_after); + error_detected = true; + break; + } + + /* Validate node index is divisible by the mask size */ + if (nodep->idx % MASK_BITS) { + fprintf(stderr, "Node index not divisible by " + "mask size,\n" + " nodep: %p nodep->idx: 0x%lx " + "MASK_BITS: %lu\n", + nodep, nodep->idx, MASK_BITS); + error_detected = true; + break; + } + + /* + * Validate bits described by node don't wrap beyond the + * highest supported index. + */ + if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) { + fprintf(stderr, "Bits described by node wrap " + "beyond highest supported index,\n" + " nodep: %p nodep->idx: 0x%lx\n" + " MASK_BITS: %lu nodep->num_after: 0x%lx", + nodep, nodep->idx, MASK_BITS, nodep->num_after); + error_detected = true; + break; + } + + /* Check parent pointers. */ + if (nodep->left) { + if (nodep->left->parent != nodep) { + fprintf(stderr, "Left child parent pointer " + "doesn't point to this node,\n" + " nodep: %p nodep->left: %p " + "nodep->left->parent: %p", + nodep, nodep->left, + nodep->left->parent); + error_detected = true; + break; + } + } + + if (nodep->right) { + if (nodep->right->parent != nodep) { + fprintf(stderr, "Right child parent pointer " + "doesn't point to this node,\n" + " nodep: %p nodep->right: %p " + "nodep->right->parent: %p", + nodep, nodep->right, + nodep->right->parent); + error_detected = true; + break; + } + } + + if (!nodep->parent) { + if (s->root != nodep) { + fprintf(stderr, "Unexpected root node, " + "s->root: %p nodep: %p", + s->root, nodep); + error_detected = true; + break; + } + } + + if (prev) { + /* + * Is index of previous node before index of + * current node? + */ + if (prev->idx >= nodep->idx) { + fprintf(stderr, "Previous node index " + ">= current node index,\n" + " prev: %p prev->idx: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx", + prev, prev->idx, nodep, nodep->idx); + error_detected = true; + break; + } + + /* + * Nodes occur in asscending order, based on each + * nodes starting index. + */ + if ((prev->idx + MASK_BITS + prev->num_after - 1) + >= nodep->idx) { + fprintf(stderr, "Previous node bit range " + "overlap with current node bit range,\n" + " prev: %p prev->idx: 0x%lx " + "prev->num_after: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx " + "nodep->num_after: 0x%lx\n" + " MASK_BITS: %lu", + prev, prev->idx, prev->num_after, + nodep, nodep->idx, nodep->num_after, + MASK_BITS); + error_detected = true; + break; + } + + /* + * When the node has all mask bits set, it shouldn't + * be adjacent to the last bit described by the + * previous node. + */ + if (nodep->mask == ~(mask_t) 0 && + prev->idx + MASK_BITS + prev->num_after == nodep->idx) { + fprintf(stderr, "Current node has mask with " + "all bits set and is adjacent to the " + "previous node,\n" + " prev: %p prev->idx: 0x%lx " + "prev->num_after: 0x%lx\n" + " nodep: %p nodep->idx: 0x%lx " + "nodep->num_after: 0x%lx\n" + " MASK_BITS: %lu", + prev, prev->idx, prev->num_after, + nodep, nodep->idx, nodep->num_after, + MASK_BITS); + + error_detected = true; + break; + } + } + } + + if (!error_detected) { + /* + * Is sum of bits set in each node equal to the count + * of total bits set. + */ + if (s->num_set != total_bits_set) { + fprintf(stderr, "Number of bits set missmatch,\n" + " s->num_set: 0x%lx total_bits_set: 0x%lx", + s->num_set, total_bits_set); + + error_detected = true; + } + } + + if (error_detected) { + fputs(" dump_internal:\n", stderr); + sparsebit_dump_internal(stderr, s, 4); + abort(); + } +} + + +#ifdef FUZZ +/* A simple but effective fuzzing driver. Look for bugs with the help + * of some invariants and of a trivial representation of sparsebit. + * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let + * afl-fuzz do the magic. :) + */ + +#include <stdlib.h> +#include <assert.h> + +struct range { + sparsebit_idx_t first, last; + bool set; +}; + +struct sparsebit *s; +struct range ranges[1000]; +int num_ranges; + +static bool get_value(sparsebit_idx_t idx) +{ + int i; + + for (i = num_ranges; --i >= 0; ) + if (ranges[i].first <= idx && idx <= ranges[i].last) + return ranges[i].set; + + return false; +} + +static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last) +{ + sparsebit_num_t num; + sparsebit_idx_t next; + + if (first < last) { + num = last - first + 1; + } else { + num = first - last + 1; + first = last; + last = first + num - 1; + } + + switch (code) { + case 0: + sparsebit_set(s, first); + assert(sparsebit_is_set(s, first)); + assert(!sparsebit_is_clear(s, first)); + assert(sparsebit_any_set(s)); + assert(!sparsebit_all_clear(s)); + if (get_value(first)) + return; + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = first, .set = true }; + break; + case 1: + sparsebit_clear(s, first); + assert(!sparsebit_is_set(s, first)); + assert(sparsebit_is_clear(s, first)); + assert(sparsebit_any_clear(s)); + assert(!sparsebit_all_set(s)); + if (!get_value(first)) + return; + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = first, .set = false }; + break; + case 2: + assert(sparsebit_is_set(s, first) == get_value(first)); + assert(sparsebit_is_clear(s, first) == !get_value(first)); + break; + case 3: + if (sparsebit_any_set(s)) + assert(get_value(sparsebit_first_set(s))); + if (sparsebit_any_clear(s)) + assert(!get_value(sparsebit_first_clear(s))); + sparsebit_set_all(s); + assert(!sparsebit_any_clear(s)); + assert(sparsebit_all_set(s)); + num_ranges = 0; + ranges[num_ranges++] = (struct range) + { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true }; + break; + case 4: + if (sparsebit_any_set(s)) + assert(get_value(sparsebit_first_set(s))); + if (sparsebit_any_clear(s)) + assert(!get_value(sparsebit_first_clear(s))); + sparsebit_clear_all(s); + assert(!sparsebit_any_set(s)); + assert(sparsebit_all_clear(s)); + num_ranges = 0; + break; + case 5: + next = sparsebit_next_set(s, first); + assert(next == 0 || next > first); + assert(next == 0 || get_value(next)); + break; + case 6: + next = sparsebit_next_clear(s, first); + assert(next == 0 || next > first); + assert(next == 0 || !get_value(next)); + break; + case 7: + next = sparsebit_next_clear(s, first); + if (sparsebit_is_set_num(s, first, num)) { + assert(next == 0 || next > last); + if (first) + next = sparsebit_next_set(s, first - 1); + else if (sparsebit_any_set(s)) + next = sparsebit_first_set(s); + else + return; + assert(next == first); + } else { + assert(sparsebit_is_clear(s, first) || next <= last); + } + break; + case 8: + next = sparsebit_next_set(s, first); + if (sparsebit_is_clear_num(s, first, num)) { + assert(next == 0 || next > last); + if (first) + next = sparsebit_next_clear(s, first - 1); + else if (sparsebit_any_clear(s)) + next = sparsebit_first_clear(s); + else + return; + assert(next == first); + } else { + assert(sparsebit_is_set(s, first) || next <= last); + } + break; + case 9: + sparsebit_set_num(s, first, num); + assert(sparsebit_is_set_num(s, first, num)); + assert(!sparsebit_is_clear_num(s, first, num)); + assert(sparsebit_any_set(s)); + assert(!sparsebit_all_clear(s)); + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = last, .set = true }; + break; + case 10: + sparsebit_clear_num(s, first, num); + assert(!sparsebit_is_set_num(s, first, num)); + assert(sparsebit_is_clear_num(s, first, num)); + assert(sparsebit_any_clear(s)); + assert(!sparsebit_all_set(s)); + if (num_ranges == 1000) + exit(0); + ranges[num_ranges++] = (struct range) + { .first = first, .last = last, .set = false }; + break; + case 11: + sparsebit_validate_internal(s); + break; + default: + break; + } +} + +unsigned char get8(void) +{ + int ch; + + ch = getchar(); + if (ch == EOF) + exit(0); + return ch; +} + +uint64_t get64(void) +{ + uint64_t x; + + x = get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + x = (x << 8) | get8(); + return (x << 8) | get8(); +} + +int main(void) +{ + s = sparsebit_alloc(); + for (;;) { + uint8_t op = get8() & 0xf; + uint64_t first = get64(); + uint64_t last = get64(); + + operate(op, first, last); + } +} +#endif diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c new file mode 100644 index 000000000000..0231bc0aae7b --- /dev/null +++ b/tools/testing/selftests/kvm/lib/vmx.c @@ -0,0 +1,243 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +/* Create a default VM for VMX tests. + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm * +vm_create_default_vmx(uint32_t vcpuid, vmx_guest_code_t guest_code) +{ + struct kvm_cpuid2 *cpuid; + struct kvm_vm *vm; + vm_vaddr_t vmxon_vaddr; + vm_paddr_t vmxon_paddr; + vm_vaddr_t vmcs_vaddr; + vm_paddr_t vmcs_paddr; + + vm = vm_create_default(vcpuid, (void *) guest_code); + + /* Enable nesting in CPUID */ + vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid()); + + /* Setup of a region of guest memory for the vmxon region. */ + vmxon_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmxon_paddr = addr_gva2gpa(vm, vmxon_vaddr); + + /* Setup of a region of guest memory for a vmcs. */ + vmcs_vaddr = vm_vaddr_alloc(vm, getpagesize(), 0, 0, 0); + vmcs_paddr = addr_gva2gpa(vm, vmcs_vaddr); + + vcpu_args_set(vm, vcpuid, 4, vmxon_vaddr, vmxon_paddr, vmcs_vaddr, + vmcs_paddr); + + return vm; +} + +void prepare_for_vmx_operation(void) +{ + uint64_t feature_control; + uint64_t required; + unsigned long cr0; + unsigned long cr4; + + /* + * Ensure bits in CR0 and CR4 are valid in VMX operation: + * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx. + * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx. + */ + __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory"); + cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1); + cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0); + __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory"); + + __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory"); + cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1); + cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0); + /* Enable VMX operation */ + cr4 |= X86_CR4_VMXE; + __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory"); + + /* + * Configure IA32_FEATURE_CONTROL MSR to allow VMXON: + * Bit 0: Lock bit. If clear, VMXON causes a #GP. + * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON + * outside of SMX causes a #GP. + */ + required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; + required |= FEATURE_CONTROL_LOCKED; + feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL); + if ((feature_control & required) != required) + wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required); +} + +/* + * Initialize the control fields to the most basic settings possible. + */ +static inline void init_vmcs_control_fields(void) +{ + vmwrite(VIRTUAL_PROCESSOR_ID, 0); + vmwrite(POSTED_INTR_NV, 0); + + vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PINBASED_CTLS)); + vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_PROCBASED_CTLS)); + vmwrite(EXCEPTION_BITMAP, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0); + vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */ + vmwrite(CR3_TARGET_COUNT, 0); + vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) | + VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */ + vmwrite(VM_EXIT_MSR_STORE_COUNT, 0); + vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) | + VM_ENTRY_IA32E_MODE); /* 64-bit guest */ + vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0); + vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0); + vmwrite(TPR_THRESHOLD, 0); + vmwrite(SECONDARY_VM_EXEC_CONTROL, 0); + + vmwrite(CR0_GUEST_HOST_MASK, 0); + vmwrite(CR4_GUEST_HOST_MASK, 0); + vmwrite(CR0_READ_SHADOW, get_cr0()); + vmwrite(CR4_READ_SHADOW, get_cr4()); +} + +/* + * Initialize the host state fields based on the current host state, with + * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch + * or vmresume. + */ +static inline void init_vmcs_host_state(void) +{ + uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS); + + vmwrite(HOST_ES_SELECTOR, get_es()); + vmwrite(HOST_CS_SELECTOR, get_cs()); + vmwrite(HOST_SS_SELECTOR, get_ss()); + vmwrite(HOST_DS_SELECTOR, get_ds()); + vmwrite(HOST_FS_SELECTOR, get_fs()); + vmwrite(HOST_GS_SELECTOR, get_gs()); + vmwrite(HOST_TR_SELECTOR, get_tr()); + + if (exit_controls & VM_EXIT_LOAD_IA32_PAT) + vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT)); + if (exit_controls & VM_EXIT_LOAD_IA32_EFER) + vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER)); + if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + vmwrite(HOST_IA32_PERF_GLOBAL_CTRL, + rdmsr(MSR_CORE_PERF_GLOBAL_CTRL)); + + vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS)); + + vmwrite(HOST_CR0, get_cr0()); + vmwrite(HOST_CR3, get_cr3()); + vmwrite(HOST_CR4, get_cr4()); + vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE)); + vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE)); + vmwrite(HOST_TR_BASE, + get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr()))); + vmwrite(HOST_GDTR_BASE, get_gdt_base()); + vmwrite(HOST_IDTR_BASE, get_idt_base()); + vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP)); + vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP)); +} + +/* + * Initialize the guest state fields essentially as a clone of + * the host state fields. Some host state fields have fixed + * values, and we set the corresponding guest state fields accordingly. + */ +static inline void init_vmcs_guest_state(void *rip, void *rsp) +{ + vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR)); + vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR)); + vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR)); + vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR)); + vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR)); + vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR)); + vmwrite(GUEST_LDTR_SELECTOR, 0); + vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR)); + vmwrite(GUEST_INTR_STATUS, 0); + vmwrite(GUEST_PML_INDEX, 0); + + vmwrite(VMCS_LINK_POINTER, -1ll); + vmwrite(GUEST_IA32_DEBUGCTL, 0); + vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT)); + vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER)); + vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL, + vmreadz(HOST_IA32_PERF_GLOBAL_CTRL)); + + vmwrite(GUEST_ES_LIMIT, -1); + vmwrite(GUEST_CS_LIMIT, -1); + vmwrite(GUEST_SS_LIMIT, -1); + vmwrite(GUEST_DS_LIMIT, -1); + vmwrite(GUEST_FS_LIMIT, -1); + vmwrite(GUEST_GS_LIMIT, -1); + vmwrite(GUEST_LDTR_LIMIT, -1); + vmwrite(GUEST_TR_LIMIT, 0x67); + vmwrite(GUEST_GDTR_LIMIT, 0xffff); + vmwrite(GUEST_IDTR_LIMIT, 0xffff); + vmwrite(GUEST_ES_AR_BYTES, + vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_CS_AR_BYTES, 0xa09b); + vmwrite(GUEST_SS_AR_BYTES, 0xc093); + vmwrite(GUEST_DS_AR_BYTES, + vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_FS_AR_BYTES, + vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_GS_AR_BYTES, + vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093); + vmwrite(GUEST_LDTR_AR_BYTES, 0x10000); + vmwrite(GUEST_TR_AR_BYTES, 0x8b); + vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0); + vmwrite(GUEST_ACTIVITY_STATE, 0); + vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS)); + vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0); + + vmwrite(GUEST_CR0, vmreadz(HOST_CR0)); + vmwrite(GUEST_CR3, vmreadz(HOST_CR3)); + vmwrite(GUEST_CR4, vmreadz(HOST_CR4)); + vmwrite(GUEST_ES_BASE, 0); + vmwrite(GUEST_CS_BASE, 0); + vmwrite(GUEST_SS_BASE, 0); + vmwrite(GUEST_DS_BASE, 0); + vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE)); + vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE)); + vmwrite(GUEST_LDTR_BASE, 0); + vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE)); + vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE)); + vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE)); + vmwrite(GUEST_DR7, 0x400); + vmwrite(GUEST_RSP, (uint64_t)rsp); + vmwrite(GUEST_RIP, (uint64_t)rip); + vmwrite(GUEST_RFLAGS, 2); + vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP)); + vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP)); +} + +void prepare_vmcs(void *guest_rip, void *guest_rsp) +{ + init_vmcs_control_fields(); + init_vmcs_host_state(); + init_vmcs_guest_state(guest_rip, guest_rsp); +} diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c new file mode 100644 index 000000000000..2f17675f4275 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86.c @@ -0,0 +1,700 @@ +/* + * tools/testing/selftests/kvm/lib/x86.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include "test_util.h" +#include "kvm_util.h" +#include "kvm_util_internal.h" +#include "x86.h" + +/* Minimum physical address used for virtual translation tables. */ +#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 + +/* Virtual translation table structure declarations */ +struct pageMapL4Entry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryPointerEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageDirectoryEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t ignored_06:1; + uint64_t page_size:1; + uint64_t ignored_11_08:4; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +struct pageTableEntry { + uint64_t present:1; + uint64_t writable:1; + uint64_t user:1; + uint64_t write_through:1; + uint64_t cache_disable:1; + uint64_t accessed:1; + uint64_t dirty:1; + uint64_t reserved_07:1; + uint64_t global:1; + uint64_t ignored_11_09:3; + uint64_t address:40; + uint64_t ignored_62_52:11; + uint64_t execute_disable:1; +}; + +/* Register Dump + * + * Input Args: + * indent - Left margin indent amount + * regs - register + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the registers given by regs, to the FILE stream + * given by steam. + */ +void regs_dump(FILE *stream, struct kvm_regs *regs, + uint8_t indent) +{ + fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx " + "rcx: 0x%.16llx rdx: 0x%.16llx\n", + indent, "", + regs->rax, regs->rbx, regs->rcx, regs->rdx); + fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx " + "rsp: 0x%.16llx rbp: 0x%.16llx\n", + indent, "", + regs->rsi, regs->rdi, regs->rsp, regs->rbp); + fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx " + "r10: 0x%.16llx r11: 0x%.16llx\n", + indent, "", + regs->r8, regs->r9, regs->r10, regs->r11); + fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx " + "r14: 0x%.16llx r15: 0x%.16llx\n", + indent, "", + regs->r12, regs->r13, regs->r14, regs->r15); + fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n", + indent, "", + regs->rip, regs->rflags); +} + +/* Segment Dump + * + * Input Args: + * indent - Left margin indent amount + * segment - KVM segment + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM segment given by segment, to the FILE stream + * given by steam. + */ +static void segment_dump(FILE *stream, struct kvm_segment *segment, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x " + "selector: 0x%.4x type: 0x%.2x\n", + indent, "", segment->base, segment->limit, + segment->selector, segment->type); + fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x " + "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n", + indent, "", segment->present, segment->dpl, + segment->db, segment->s, segment->l); + fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x " + "unusable: 0x%.2x padding: 0x%.2x\n", + indent, "", segment->g, segment->avl, + segment->unusable, segment->padding); +} + +/* dtable Dump + * + * Input Args: + * indent - Left margin indent amount + * dtable - KVM dtable + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the KVM dtable given by dtable, to the FILE stream + * given by steam. + */ +static void dtable_dump(FILE *stream, struct kvm_dtable *dtable, + uint8_t indent) +{ + fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x " + "padding: 0x%.4x 0x%.4x 0x%.4x\n", + indent, "", dtable->base, dtable->limit, + dtable->padding[0], dtable->padding[1], dtable->padding[2]); +} + +/* System Register Dump + * + * Input Args: + * indent - Left margin indent amount + * sregs - System registers + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps the state of the system registers given by sregs, to the FILE stream + * given by steam. + */ +void sregs_dump(FILE *stream, struct kvm_sregs *sregs, + uint8_t indent) +{ + unsigned int i; + + fprintf(stream, "%*scs:\n", indent, ""); + segment_dump(stream, &sregs->cs, indent + 2); + fprintf(stream, "%*sds:\n", indent, ""); + segment_dump(stream, &sregs->ds, indent + 2); + fprintf(stream, "%*ses:\n", indent, ""); + segment_dump(stream, &sregs->es, indent + 2); + fprintf(stream, "%*sfs:\n", indent, ""); + segment_dump(stream, &sregs->fs, indent + 2); + fprintf(stream, "%*sgs:\n", indent, ""); + segment_dump(stream, &sregs->gs, indent + 2); + fprintf(stream, "%*sss:\n", indent, ""); + segment_dump(stream, &sregs->ss, indent + 2); + fprintf(stream, "%*str:\n", indent, ""); + segment_dump(stream, &sregs->tr, indent + 2); + fprintf(stream, "%*sldt:\n", indent, ""); + segment_dump(stream, &sregs->ldt, indent + 2); + + fprintf(stream, "%*sgdt:\n", indent, ""); + dtable_dump(stream, &sregs->gdt, indent + 2); + fprintf(stream, "%*sidt:\n", indent, ""); + dtable_dump(stream, &sregs->idt, indent + 2); + + fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx " + "cr3: 0x%.16llx cr4: 0x%.16llx\n", + indent, "", + sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4); + fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx " + "apic_base: 0x%.16llx\n", + indent, "", + sregs->cr8, sregs->efer, sregs->apic_base); + + fprintf(stream, "%*sinterrupt_bitmap:\n", indent, ""); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) { + fprintf(stream, "%*s%.16llx\n", indent + 2, "", + sregs->interrupt_bitmap[i]); + } +} + +void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot) +{ + int rc; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + /* If needed, create page map l4 table. */ + if (!vm->pgd_created) { + vm_paddr_t paddr = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot); + vm->pgd = paddr; + + /* Set pointer to pgd tables in all the VCPUs that + * have already been created. Future VCPUs will have + * the value set as each one is created. + */ + for (struct vcpu *vcpu = vm->vcpu_head; vcpu; + vcpu = vcpu->next) { + struct kvm_sregs sregs; + + /* Obtain the current system register settings */ + vcpu_sregs_get(vm, vcpu->id, &sregs); + + /* Set and store the pointer to the start of the + * pgd tables. + */ + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpu->id, &sregs); + } + + vm->pgd_created = true; + } +} + +/* VM Virtual Page Map + * + * Input Args: + * vm - Virtual Machine + * vaddr - VM Virtual Address + * paddr - VM Physical Address + * pgd_memslot - Memory region slot for new virtual translation tables + * + * Output Args: None + * + * Return: None + * + * Within the VM given by vm, creates a virtual translation for the page + * starting at vaddr to the page starting at paddr. + */ +void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, + uint32_t pgd_memslot) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + TEST_ASSERT((vaddr % vm->page_size) == 0, + "Virtual address not on page boundary,\n" + " vaddr: 0x%lx vm->page_size: 0x%x", + vaddr, vm->page_size); + TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, + (vaddr >> vm->page_shift)), + "Invalid virtual address, vaddr: 0x%lx", + vaddr); + TEST_ASSERT((paddr % vm->page_size) == 0, + "Physical address not on page boundary,\n" + " paddr: 0x%lx vm->page_size: 0x%x", + paddr, vm->page_size); + TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn, + "Physical address beyond beyond maximum supported,\n" + " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", + paddr, vm->max_gfn, vm->page_size); + + index[0] = (vaddr >> 12) & 0x1ffu; + index[1] = (vaddr >> 21) & 0x1ffu; + index[2] = (vaddr >> 30) & 0x1ffu; + index[3] = (vaddr >> 39) & 0x1ffu; + + /* Allocate page directory pointer table if not present. */ + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) { + pml4e[index[3]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pml4e[index[3]].writable = true; + pml4e[index[3]].present = true; + } + + /* Allocate page directory table if not present. */ + struct pageDirectoryPointerEntry *pdpe; + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) { + pdpe[index[2]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pdpe[index[2]].writable = true; + pdpe[index[2]].present = true; + } + + /* Allocate page table if not present. */ + struct pageDirectoryEntry *pde; + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) { + pde[index[1]].address = vm_phy_page_alloc(vm, + KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot) + >> vm->page_shift; + pde[index[1]].writable = true; + pde[index[1]].present = true; + } + + /* Fill in page table entry. */ + struct pageTableEntry *pte; + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + pte[index[0]].address = paddr >> vm->page_shift; + pte[index[0]].writable = true; + pte[index[0]].present = 1; +} + +/* Virtual Translation Tables Dump + * + * Input Args: + * vm - Virtual Machine + * indent - Left margin indent amount + * + * Output Args: + * stream - Output FILE stream + * + * Return: None + * + * Dumps to the FILE stream given by stream, the contents of all the + * virtual translation tables for the VM given by vm. + */ +void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent) +{ + struct pageMapL4Entry *pml4e, *pml4e_start; + struct pageDirectoryPointerEntry *pdpe, *pdpe_start; + struct pageDirectoryEntry *pde, *pde_start; + struct pageTableEntry *pte, *pte_start; + + if (!vm->pgd_created) + return; + + fprintf(stream, "%*s " + " no\n", indent, ""); + fprintf(stream, "%*s index hvaddr gpaddr " + "addr w exec dirty\n", + indent, ""); + pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm, + vm->pgd); + for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) { + pml4e = &pml4e_start[n1]; + if (!pml4e->present) + continue; + fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u " + " %u\n", + indent, "", + pml4e - pml4e_start, pml4e, + addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address, + pml4e->writable, pml4e->execute_disable); + + pdpe_start = addr_gpa2hva(vm, pml4e->address + * vm->page_size); + for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) { + pdpe = &pdpe_start[n2]; + if (!pdpe->present) + continue; + fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx " + "%u %u\n", + indent, "", + pdpe - pdpe_start, pdpe, + addr_hva2gpa(vm, pdpe), + (uint64_t) pdpe->address, pdpe->writable, + pdpe->execute_disable); + + pde_start = addr_gpa2hva(vm, + pdpe->address * vm->page_size); + for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) { + pde = &pde_start[n3]; + if (!pde->present) + continue; + fprintf(stream, "%*spde 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u\n", + indent, "", pde - pde_start, pde, + addr_hva2gpa(vm, pde), + (uint64_t) pde->address, pde->writable, + pde->execute_disable); + + pte_start = addr_gpa2hva(vm, + pde->address * vm->page_size); + for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) { + pte = &pte_start[n4]; + if (!pte->present) + continue; + fprintf(stream, "%*spte 0x%-3zx %p " + "0x%-12lx 0x%-10lx %u %u " + " %u 0x%-10lx\n", + indent, "", + pte - pte_start, pte, + addr_hva2gpa(vm, pte), + (uint64_t) pte->address, + pte->writable, + pte->execute_disable, + pte->dirty, + ((uint64_t) n1 << 27) + | ((uint64_t) n2 << 18) + | ((uint64_t) n3 << 9) + | ((uint64_t) n4)); + } + } + } + } +} + +/* Set Unusable Segment + * + * Input Args: None + * + * Output Args: + * segp - Pointer to segment register + * + * Return: None + * + * Sets the segment register pointed to by segp to an unusable state. + */ +static void kvm_seg_set_unusable(struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->unusable = true; +} + +/* Set Long Mode Flat Kernel Code Segment + * + * Input Args: + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a code segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_code_64bit(uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed + * | kFlagCodeReadable + */ + segp->g = true; + segp->l = true; + segp->present = 1; +} + +/* Set Long Mode Flat Kernel Data Segment + * + * Input Args: + * selector - selector value + * + * Output Args: + * segp - Pointer to KVM segment + * + * Return: None + * + * Sets up the KVM segment pointed to by segp, to be a data segment + * with the selector value given by selector. + */ +static void kvm_seg_set_kernel_data_64bit(uint16_t selector, + struct kvm_segment *segp) +{ + memset(segp, 0, sizeof(*segp)); + segp->selector = selector; + segp->limit = 0xFFFFFFFFu; + segp->s = 0x1; /* kTypeCodeData */ + segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed + * | kFlagDataWritable + */ + segp->g = true; + segp->present = true; +} + +/* Address Guest Virtual to Guest Physical + * + * Input Args: + * vm - Virtual Machine + * gpa - VM virtual address + * + * Output Args: None + * + * Return: + * Equivalent VM physical address + * + * Translates the VM virtual address given by gva to a VM physical + * address and then locates the memory region containing the VM + * physical address, within the VM given by vm. When found, the host + * virtual address providing the memory to the vm physical address is returned. + * A TEST_ASSERT failure occurs if no region containing translated + * VM virtual address exists. + */ +vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) +{ + uint16_t index[4]; + struct pageMapL4Entry *pml4e; + struct pageDirectoryPointerEntry *pdpe; + struct pageDirectoryEntry *pde; + struct pageTableEntry *pte; + void *hva; + + TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use " + "unknown or unsupported guest mode, mode: 0x%x", vm->mode); + + index[0] = (gva >> 12) & 0x1ffu; + index[1] = (gva >> 21) & 0x1ffu; + index[2] = (gva >> 30) & 0x1ffu; + index[3] = (gva >> 39) & 0x1ffu; + + if (!vm->pgd_created) + goto unmapped_gva; + pml4e = addr_gpa2hva(vm, vm->pgd); + if (!pml4e[index[3]].present) + goto unmapped_gva; + + pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size); + if (!pdpe[index[2]].present) + goto unmapped_gva; + + pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size); + if (!pde[index[1]].present) + goto unmapped_gva; + + pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size); + if (!pte[index[0]].present) + goto unmapped_gva; + + return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu); + +unmapped_gva: + TEST_ASSERT(false, "No mapping for vm virtual address, " + "gva: 0x%lx", gva); +} + +void vcpu_setup(struct kvm_vm *vm, int vcpuid) +{ + struct kvm_sregs sregs; + + /* Set mode specific system register values. */ + vcpu_sregs_get(vm, vcpuid, &sregs); + + switch (vm->mode) { + case VM_MODE_FLAT48PG: + sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; + sregs.cr4 |= X86_CR4_PAE; + sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); + + kvm_seg_set_unusable(&sregs.ldt); + kvm_seg_set_kernel_code_64bit(0x8, &sregs.cs); + kvm_seg_set_kernel_data_64bit(0x10, &sregs.ds); + kvm_seg_set_kernel_data_64bit(0x10, &sregs.es); + break; + + default: + TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode); + } + vcpu_sregs_set(vm, vcpuid, &sregs); + + /* If virtual translation table have been setup, set system register + * to point to the tables. It's okay if they haven't been setup yet, + * in that the code that sets up the virtual translation tables, will + * go back through any VCPUs that have already been created and set + * their values. + */ + if (vm->pgd_created) { + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + + sregs.cr3 = vm->pgd; + vcpu_sregs_set(vm, vcpuid, &sregs); + } +} +/* Adds a vCPU with reasonable defaults (i.e., a stack) + * + * Input Args: + * vcpuid - The id of the VCPU to add to the VM. + * guest_code - The vCPU's entry point + */ +void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code) +{ + struct kvm_mp_state mp_state; + struct kvm_regs regs; + vm_vaddr_t stack_vaddr; + stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(), + DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0); + + /* Create VCPU */ + vm_vcpu_add(vm, vcpuid); + + /* Setup guest general purpose registers */ + vcpu_regs_get(vm, vcpuid, ®s); + regs.rflags = regs.rflags | 0x2; + regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()); + regs.rip = (unsigned long) guest_code; + vcpu_regs_set(vm, vcpuid, ®s); + + /* Setup the MP state */ + mp_state.mp_state = 0; + vcpu_set_mp_state(vm, vcpuid, &mp_state); +} + +/* VM VCPU CPUID Set + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU id + * cpuid - The CPUID values to set. + * + * Output Args: None + * + * Return: void + * + * Set the VCPU's CPUID. + */ +void vcpu_set_cpuid(struct kvm_vm *vm, + uint32_t vcpuid, struct kvm_cpuid2 *cpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int rc; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid); + TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i", + rc, errno); + +} +/* Create a VM with reasonable defaults + * + * Input Args: + * vcpuid - The id of the single VCPU to add to the VM. + * guest_code - The vCPU's entry point + * + * Output Args: None + * + * Return: + * Pointer to opaque structure that describes the created VM. + */ +struct kvm_vm *vm_create_default(uint32_t vcpuid, void *guest_code) +{ + struct kvm_vm *vm; + + /* Create VM */ + vm = vm_create(VM_MODE_FLAT48PG, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + + /* Setup guest code */ + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + + /* Setup IRQ Chip */ + vm_create_irqchip(vm); + + /* Add the first vCPU. */ + vm_vcpu_add_default(vm, vcpuid, guest_code); + + return vm; +} diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c new file mode 100644 index 000000000000..090fd3f19352 --- /dev/null +++ b/tools/testing/selftests/kvm/set_sregs_test.c @@ -0,0 +1,54 @@ +/* + * KVM_SET_SREGS tests + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * This is a regression test for the bug fixed by the following commit: + * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values") + * + * That bug allowed a user-mode program that called the KVM_SET_SREGS + * ioctl to put a VCPU's local APIC into an invalid state. + * + */ +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" + +#include "kvm_util.h" +#include "x86.h" + +#define VCPU_ID 5 + +int main(int argc, char *argv[]) +{ + struct kvm_sregs sregs; + struct kvm_vm *vm; + int rc; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + /* Create VM */ + vm = vm_create_default(VCPU_ID, NULL); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.apic_base = 1 << 10; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)", + sregs.apic_base); + sregs.apic_base = 1 << 11; + rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs); + TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)", + sregs.apic_base); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c new file mode 100644 index 000000000000..eae1ece3c31b --- /dev/null +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -0,0 +1,254 @@ +/* + * Test for x86 KVM_CAP_SYNC_REGS + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality, + * including requesting an invalid register set, updates to/from values + * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled. + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" + +#define VCPU_ID 5 +#define PORT_HOST_SYNC 0x1000 + +static void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg0), "S"(arg1) + : "rax"); +} + +#define exit_to_l0(_port, _arg0, _arg1) \ + __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1)) + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition, 0);\ +} while (0) + +void guest_code(void) +{ + for (;;) { + exit_to_l0(PORT_HOST_SYNC, "hello", 0); + asm volatile ("inc %r11"); + } +} + +static void compare_regs(struct kvm_regs *left, struct kvm_regs *right) +{ +#define REG_COMPARE(reg) \ + TEST_ASSERT(left->reg == right->reg, \ + "Register " #reg \ + " values did not match: 0x%llx, 0x%llx\n", \ + left->reg, right->reg) + REG_COMPARE(rax); + REG_COMPARE(rbx); + REG_COMPARE(rcx); + REG_COMPARE(rdx); + REG_COMPARE(rsi); + REG_COMPARE(rdi); + REG_COMPARE(rsp); + REG_COMPARE(rbp); + REG_COMPARE(r8); + REG_COMPARE(r9); + REG_COMPARE(r10); + REG_COMPARE(r11); + REG_COMPARE(r12); + REG_COMPARE(r13); + REG_COMPARE(r14); + REG_COMPARE(r15); + REG_COMPARE(rip); + REG_COMPARE(rflags); +#undef REG_COMPARE +} + +static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right) +{ +} + +static void compare_vcpu_events(struct kvm_vcpu_events *left, + struct kvm_vcpu_events *right) +{ +} + +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + struct kvm_run *run; + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_vcpu_events events; + int rv, cap; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + cap = kvm_check_cap(KVM_CAP_SYNC_REGS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } + + /* Create VM */ + vm = vm_create_default(VCPU_ID, guest_code); + + run = vcpu_state(vm, VCPU_ID); + + /* Request reading invalid register set from VCPU. */ + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + /* Request setting invalid register set into VCPU. */ + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + /* Request and verify all valid register sets. */ + /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Set and verify various register values. */ + run->s.regs.regs.r11 = 0xBAD1DEA; + run->s.regs.sregs.apic_base = 1 << 11; + /* TODO run->s.regs.events.XYZ = ABC; */ + + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11, + "apic_base sync regs value incorrect 0x%llx.", + run->s.regs.sregs.apic_base); + + vcpu_regs_get(vm, VCPU_ID, ®s); + compare_regs(®s, &run->s.regs.regs); + + vcpu_sregs_get(vm, VCPU_ID, &sregs); + compare_sregs(&sregs, &run->s.regs.sregs); + + vcpu_events_get(vm, VCPU_ID, &events); + compare_vcpu_events(&events, &run->s.regs.events); + + /* Clear kvm_dirty_regs bits, verify new s.regs values are + * overwritten with existing guest values. + */ + run->kvm_valid_regs = TEST_SYNC_FIELDS; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xDEADBEEF; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + + /* Clear kvm_valid_regs bits and kvm_dirty_bits. + * Verify s.regs values are not overwritten with existing guest values + * and that guest values are not overwritten with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = 0; + run->s.regs.regs.r11 = 0xAAAA; + regs.r11 = 0xBAC0; + vcpu_regs_set(vm, VCPU_ID, ®s); + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBAC0 + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten + * with existing guest values but that guest values are overwritten + * with kvm_sync_regs values. + */ + run->kvm_valid_regs = 0; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; + run->s.regs.regs.r11 = 0xBBBB; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB, + "r11 sync regs value incorrect 0x%llx.", + run->s.regs.regs.r11); + vcpu_regs_get(vm, VCPU_ID, ®s); + TEST_ASSERT(regs.r11 == 0xBBBB + 1, + "r11 guest value incorrect 0x%llx.", + regs.r11); + + kvm_vm_free(vm); + + return 0; +} diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c new file mode 100644 index 000000000000..d7cb7944a42e --- /dev/null +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -0,0 +1,233 @@ +/* + * gtests/tests/vmx_tsc_adjust_test.c + * + * Copyright (C) 2018, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * + * IA32_TSC_ADJUST test + * + * According to the SDM, "if an execution of WRMSR to the + * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC, + * the logical processor also adds (or subtracts) value X from the + * IA32_TSC_ADJUST MSR. + * + * Note that when L1 doesn't intercept writes to IA32_TSC, a + * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC + * value. + * + * This test verifies that this unusual case is handled correctly. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "x86.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#include "../kselftest.h" + +#ifndef MSR_IA32_TSC_ADJUST +#define MSR_IA32_TSC_ADJUST 0x3b +#endif + +#define PAGE_SIZE 4096 +#define VCPU_ID 5 + +#define TSC_ADJUST_VALUE (1ll << 32) +#define TSC_OFFSET_VALUE -(1ll << 48) + +enum { + PORT_ABORT = 0x1000, + PORT_REPORT, + PORT_DONE, +}; + +struct vmx_page { + vm_vaddr_t virt; + vm_paddr_t phys; +}; + +enum { + VMXON_PAGE = 0, + VMCS_PAGE, + MSR_BITMAP_PAGE, + + NUM_VMX_PAGES, +}; + +struct kvm_single_msr { + struct kvm_msrs header; + struct kvm_msr_entry entry; +} __attribute__((packed)); + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +/* Array of vmx_page descriptors that is shared with the guest. */ +struct vmx_page *vmx_pages; + +#define exit_to_l0(_port, _arg) do_exit_to_l0(_port, (unsigned long) (_arg)) +static void do_exit_to_l0(uint16_t port, unsigned long arg) +{ + __asm__ __volatile__("in %[port], %%al" + : + : [port]"d"(port), "D"(arg) + : "rax"); +} + + +#define GUEST_ASSERT(_condition) do { \ + if (!(_condition)) \ + exit_to_l0(PORT_ABORT, "Failed guest assert: " #_condition); \ +} while (0) + +static void check_ia32_tsc_adjust(int64_t max) +{ + int64_t adjust; + + adjust = rdmsr(MSR_IA32_TSC_ADJUST); + exit_to_l0(PORT_REPORT, adjust); + GUEST_ASSERT(adjust <= max); +} + +static void l2_guest_code(void) +{ + uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE; + + wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_page *vmx_pages) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + uintptr_t save_cr3; + + GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE); + wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + + prepare_for_vmx_operation(); + + /* Enter VMX root operation. */ + *(uint32_t *)vmx_pages[VMXON_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmxon(vmx_pages[VMXON_PAGE].phys)); + + /* Load a VMCS. */ + *(uint32_t *)vmx_pages[VMCS_PAGE].virt = vmcs_revision(); + GUEST_ASSERT(!vmclear(vmx_pages[VMCS_PAGE].phys)); + GUEST_ASSERT(!vmptrld(vmx_pages[VMCS_PAGE].phys)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + vmwrite(MSR_BITMAP, vmx_pages[MSR_BITMAP_PAGE].phys); + vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE); + + /* Jump into L2. First, test failure to load guest CR3. */ + save_cr3 = vmreadz(GUEST_CR3); + vmwrite(GUEST_CR3, -1ull); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == + (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE)); + check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE); + vmwrite(GUEST_CR3, save_cr3); + + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE); + + exit_to_l0(PORT_DONE, 0); +} + +static void allocate_vmx_page(struct vmx_page *page) +{ + vm_vaddr_t virt; + + virt = vm_vaddr_alloc(vm, PAGE_SIZE, 0, 0, 0); + memset(addr_gva2hva(vm, virt), 0, PAGE_SIZE); + + page->virt = virt; + page->phys = addr_gva2gpa(vm, virt); +} + +static vm_vaddr_t allocate_vmx_pages(void) +{ + vm_vaddr_t vmx_pages_vaddr; + int i; + + vmx_pages_vaddr = vm_vaddr_alloc( + vm, sizeof(struct vmx_page) * NUM_VMX_PAGES, 0, 0, 0); + + vmx_pages = (void *) addr_gva2hva(vm, vmx_pages_vaddr); + + for (i = 0; i < NUM_VMX_PAGES; i++) + allocate_vmx_page(&vmx_pages[i]); + + return vmx_pages_vaddr; +} + +void report(int64_t val) +{ + printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n", + val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t vmx_pages_vaddr; + struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + + if (!(entry->ecx & CPUID_VMX)) { + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); + } + + vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); + + /* Allocate VMX pages and shared descriptors (vmx_pages). */ + vmx_pages_vaddr = allocate_vmx_pages(); + vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_vaddr); + + for (;;) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct kvm_regs regs; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + vcpu_regs_get(vm, VCPU_ID, ®s); + + switch (run->io.port) { + case PORT_ABORT: + TEST_ASSERT(false, "%s", (const char *) regs.rdi); + /* NOT REACHED */ + case PORT_REPORT: + report(regs.rdi); + break; + case PORT_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port); + } + } + + kvm_vm_free(vm); +done: + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7de482a0519d..6466294366dc 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -19,24 +19,43 @@ TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) .ONESHELL: +define RUN_TEST_PRINT_RESULT + TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \ + echo $$TEST_HDR_MSG; \ + echo "========================================"; \ + if [ ! -x $$TEST ]; then \ + echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + else \ + cd `dirname $$TEST` > /dev/null; \ + if [ "X$(summary)" != "X" ]; then \ + (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \ + echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \ + (if [ $$? -eq $$skip ]; then \ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \ + else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + fi;) \ + else \ + (./$$BASENAME_TEST && \ + echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \ + (if [ $$? -eq $$skip ]; then \ + echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \ + else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \ + fi;) \ + fi; \ + cd - > /dev/null; \ + fi; +endef + define RUN_TESTS - @test_num=`echo 0`; - @echo "TAP version 13"; - @for TEST in $(1); do \ + @export KSFT_TAP_LEVEL=`echo 1`; \ + test_num=`echo 0`; \ + skip=`echo 4`; \ + echo "TAP version 13"; \ + for TEST in $(1); do \ BASENAME_TEST=`basename $$TEST`; \ test_num=`echo $$test_num+1 | bc`; \ - echo "selftests: $$BASENAME_TEST"; \ - echo "========================================"; \ - if [ ! -x $$TEST ]; then \ - echo "selftests: Warning: file $$BASENAME_TEST is not executable, correct this.";\ - echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; \ - else \ - if [ "X$(summary)" != "X" ]; then \ - cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ - else \ - cd `dirname $$TEST` > /dev/null; (./$$BASENAME_TEST && echo "ok 1..$$test_num selftests: $$BASENAME_TEST [PASS]") || echo "not ok 1..$$test_num selftests: $$BASENAME_TEST [FAIL]"; cd - > /dev/null;\ - fi; \ - fi; \ + $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \ done; endef @@ -75,9 +94,18 @@ else endif define EMIT_TESTS - @for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ + @test_num=`echo 0`; \ + for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"selftests: $$BASENAME_TEST [PASS]\") || echo \"selftests: $$BASENAME_TEST [FAIL]\""; \ + test_num=`echo $$test_num+1 | bc`; \ + TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \ + echo "echo $$TEST_HDR_MSG"; \ + if [ ! -x $$TEST ]; then \ + echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \ + echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \ + else + echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \ + fi; \ done; endef diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile index 08360060ab14..70d5711e3ac8 100644 --- a/tools/testing/selftests/lib/Makefile +++ b/tools/testing/selftests/lib/Makefile @@ -3,6 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -TEST_PROGS := printf.sh bitmap.sh +TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh include ../lib.mk diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh index 4dee4d2a8bbe..5a90006d1aea 100755 --- a/tools/testing/selftests/lib/bitmap.sh +++ b/tools/testing/selftests/lib/bitmap.sh @@ -1,9 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # Runs bitmap infrastructure tests using test_bitmap kernel module if ! /sbin/modprobe -q -n test_bitmap; then - echo "bitmap: [SKIP]" - exit 77 + echo "bitmap: module test_bitmap is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q test_bitmap; then diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh index b363994e5e11..78e7483c8d60 100755 --- a/tools/testing/selftests/lib/prime_numbers.sh +++ b/tools/testing/selftests/lib/prime_numbers.sh @@ -2,9 +2,12 @@ # SPDX-License-Identifier: GPL-2.0 # Checks fast/slow prime_number generation for inconsistencies -if ! /sbin/modprobe -q -r prime_numbers; then - echo "prime_numbers: [SKIP]" - exit 77 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +if ! /sbin/modprobe -q -n prime_numbers; then + echo "prime_numbers: module prime_numbers is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q prime_numbers selftest=65536; then diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh index 0c37377fd7d4..45a23e2d64ad 100755 --- a/tools/testing/selftests/lib/printf.sh +++ b/tools/testing/selftests/lib/printf.sh @@ -1,9 +1,13 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # Runs printf infrastructure using test_printf kernel module + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + if ! /sbin/modprobe -q -n test_printf; then - echo "printf: [SKIP]" - exit 77 + echo "printf: module test_printf is not found [SKIP]" + exit $ksft_skip fi if /sbin/modprobe -q test_printf; then diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile new file mode 100644 index 000000000000..6e7761ab3536 --- /dev/null +++ b/tools/testing/selftests/locking/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for locking/ww_mutx selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := ww_mutex.sh + +include ../lib.mk diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh index 2c3d6b1878c2..91e4ac7566af 100644..100755 --- a/tools/testing/selftests/locking/ww_mutex.sh +++ b/tools/testing/selftests/locking/ww_mutex.sh @@ -1,6 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # Runs API tests for struct ww_mutex (Wait/Wound mutexes) +if ! /sbin/modprobe -q -n test-ww_mutex; then + echo "ww_mutex: module test-ww_mutex is not found [SKIP]" + exit $ksft_skip +fi if /sbin/modprobe -q test-ww_mutex; then /sbin/modprobe -q -r test-ww_mutex diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile index c82cec2497de..60826d7d37d4 100644 --- a/tools/testing/selftests/media_tests/Makefile +++ b/tools/testing/selftests/media_tests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +# +CFLAGS += -I../ -I../../../../usr/include/ TEST_GEN_PROGS := media_device_test media_device_open video_device_test -all: $(TEST_GEN_PROGS) include ../lib.mk diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c index a5ce5434bafd..93183a37b133 100644 --- a/tools/testing/selftests/media_tests/media_device_open.c +++ b/tools/testing/selftests/media_tests/media_device_open.c @@ -34,6 +34,8 @@ #include <sys/stat.h> #include <linux/media.h> +#include "../kselftest.h" + int main(int argc, char **argv) { int opt; @@ -61,10 +63,8 @@ int main(int argc, char **argv) } } - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - exit(-1); - } + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); /* Open Media device and keep it open */ fd = open(media_device, O_RDWR); diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c index 421a367e4bb3..4b9953359e40 100644 --- a/tools/testing/selftests/media_tests/media_device_test.c +++ b/tools/testing/selftests/media_tests/media_device_test.c @@ -39,6 +39,8 @@ #include <time.h> #include <linux/media.h> +#include "../kselftest.h" + int main(int argc, char **argv) { int opt; @@ -66,10 +68,8 @@ int main(int argc, char **argv) } } - if (getuid() != 0) { - printf("Please run the test as root - Exiting.\n"); - exit(-1); - } + if (getuid() != 0) + ksft_exit_skip("Please run the test as root - Exiting.\n"); /* Generate random number of interations */ srand((unsigned int) time(NULL)); @@ -88,7 +88,7 @@ int main(int argc, char **argv) "other Oops in the dmesg. Enable KaSan kernel\n" "config option for use-after-free error detection.\n\n"); - printf("Running test for %d iternations\n", count); + printf("Running test for %d iterations\n", count); while (count > 0) { ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi); diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c index 22bffd55a523..6793f8ecc8e7 100644 --- a/tools/testing/selftests/membarrier/membarrier_test.c +++ b/tools/testing/selftests/membarrier/membarrier_test.c @@ -293,10 +293,9 @@ static int test_membarrier_query(void) } ksft_exit_fail_msg("sys_membarrier() failed\n"); } - if (!(ret & MEMBARRIER_CMD_GLOBAL)) { - ksft_test_result_fail("sys_membarrier() CMD_GLOBAL query failed\n"); - ksft_exit_fail_msg("sys_membarrier is not supported.\n"); - } + if (!(ret & MEMBARRIER_CMD_GLOBAL)) + ksft_exit_skip( + "sys_membarrier unsupported: CMD_GLOBAL not found.\n"); ksft_test_result_pass("sys_membarrier available\n"); return 0; diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index 0862e6f47a38..53a848109f7b 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -4,9 +4,9 @@ CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ CFLAGS += -I../../../../usr/include/ -TEST_PROGS := run_tests.sh -TEST_FILES := run_fuse_test.sh -TEST_GEN_FILES := memfd_test fuse_mnt fuse_test +TEST_GEN_PROGS := memfd_test +TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh +TEST_GEN_FILES := fuse_mnt fuse_test fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags) diff --git a/tools/testing/selftests/memfd/run_tests.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh index c2d41ed81b24..fb633eeb0290 100755 --- a/tools/testing/selftests/memfd/run_tests.sh +++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh @@ -1,11 +1,8 @@ #!/bin/bash # please run as root -# -# Normal tests requiring no special resources -# -./run_fuse_test.sh -./memfd_test +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 # # To test memfd_create with hugetlbfs, there needs to be hpages_test @@ -29,12 +26,13 @@ if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then nr_hugepgs=`cat /proc/sys/vm/nr_hugepages` hpages_needed=`expr $hpages_test - $freepgs` + if [ $UID != 0 ]; then + echo "Please run memfd with hugetlbfs test as root" + exit $ksft_skip + fi + echo 3 > /proc/sys/vm/drop_caches echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages - if [ $? -ne 0 ]; then - echo "Please run this test as root" - exit 1 - fi while read name size unit; do if [ "$name" = "HugePages_Free:" ]; then freepgs=$size @@ -53,7 +51,7 @@ if [ $freepgs -lt $hpages_test ]; then fi printf "Not enough huge pages available (%d < %d)\n" \ $freepgs $needpgs - exit 1 + exit $ksft_skip fi # diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index 686da510f989..e0a625e34f40 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -4,11 +4,8 @@ all: include ../lib.mk TEST_PROGS := mem-on-off-test.sh -override RUN_TESTS := @./mem-on-off-test.sh -r 2 && echo "selftests: memory-hotplug [PASS]" || echo "selftests: memory-hotplug [FAIL]" - -override EMIT_TESTS := echo "$(subst @,,$(RUN_TESTS))" run_full_test: - @/bin/bash ./mem-on-off-test.sh && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index ae2c790d0880..b37585e6aa38 100755 --- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh @@ -3,30 +3,33 @@ SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + prerequisite() { msg="skip all tests:" if [ $UID != 0 ]; then echo $msg must be run as root >&2 - exit 0 + exit $ksft_skip fi SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` if [ ! -d "$SYSFS" ]; then echo $msg sysfs is not mounted >&2 - exit 0 + exit $ksft_skip fi if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then echo $msg memory hotplug is not supported >&2 - exit 0 + exit $ksft_skip fi if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then echo $msg no hot-pluggable memory >&2 - exit 0 + exit $ksft_skip fi } @@ -133,7 +136,8 @@ offline_memory_expect_fail() error=-12 priority=0 -ratio=10 +# Run with default of ratio=2 for Kselftest run +ratio=2 retval=0 while getopts e:hp:r: opt; do diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index e094f71c6dbc..026890744215 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -3,15 +3,7 @@ CFLAGS = -Wall \ -O2 -TEST_GEN_PROGS := unprivileged-remount-test +TEST_PROGS := run_tests.sh +TEST_GEN_FILES := unprivileged-remount-test include ../lib.mk - -override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ - then \ - ./unprivileged-remount-test ; \ - else \ - echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ - fi -override EMIT_TESTS := echo "$(RUN_TESTS)" - diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh new file mode 100755 index 000000000000..4ab8f507dcba --- /dev/null +++ b/tools/testing/selftests/mount/run_tests.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Run mount selftests +if [ -f /proc/self/uid_map ] ; then + ./unprivileged-remount-test ; +else + echo "WARN: No /proc/self/uid_map exist, test skipped." ; + exit $ksft_skip +fi diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 743d3f9e5918..8a58055fc1f5 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,17 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += -O2 LDLIBS = -lrt -lpthread -lpopt + TEST_GEN_PROGS := mq_open_tests mq_perf_tests include ../lib.mk - -override define RUN_TESTS - @$(OUTPUT)/mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" - @$(OUTPUT)/mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" -endef - -override define EMIT_TESTS - echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" - echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" -endef - diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c index e0a74bd207a5..9403ac01ba11 100644 --- a/tools/testing/selftests/mqueue/mq_open_tests.c +++ b/tools/testing/selftests/mqueue/mq_open_tests.c @@ -33,6 +33,8 @@ #include <mqueue.h> #include <error.h> +#include "../kselftest.h" + static char *usage = "Usage:\n" " %s path\n" @@ -53,6 +55,7 @@ int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize; int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize; FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize; char *queue_path; +char *default_queue_path = "/test1"; mqd_t queue = -1; static inline void __set(FILE *stream, int value, char *err_msg); @@ -238,35 +241,33 @@ int main(int argc, char *argv[]) struct mq_attr attr, result; if (argc != 2) { - fprintf(stderr, "Must pass a valid queue name\n\n"); - fprintf(stderr, usage, argv[0]); - exit(1); - } + printf("Using Default queue path - %s\n", default_queue_path); + queue_path = default_queue_path; + } else { /* * Although we can create a msg queue with a non-absolute path name, * unlink will fail. So, if the name doesn't start with a /, add one * when we save it. */ - if (*argv[1] == '/') - queue_path = strdup(argv[1]); - else { - queue_path = malloc(strlen(argv[1]) + 2); - if (!queue_path) { - perror("malloc()"); - exit(1); + if (*argv[1] == '/') + queue_path = strdup(argv[1]); + else { + queue_path = malloc(strlen(argv[1]) + 2); + if (!queue_path) { + perror("malloc()"); + exit(1); + } + queue_path[0] = '/'; + queue_path[1] = 0; + strcat(queue_path, argv[1]); } - queue_path[0] = '/'; - queue_path[1] = 0; - strcat(queue_path, argv[1]); } - if (getuid() != 0) { - fprintf(stderr, "Not running as root, but almost all tests " + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " "require root in order to modify\nsystem settings. " "Exiting.\n"); - exit(1); - } /* Find out what files there are for us to make tweaks in */ def_msgs = fopen(DEF_MSGS, "r+"); diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c index 8188f72de93c..b019e0b8221c 100644 --- a/tools/testing/selftests/mqueue/mq_perf_tests.c +++ b/tools/testing/selftests/mqueue/mq_perf_tests.c @@ -39,6 +39,8 @@ #include <popt.h> #include <error.h> +#include "../kselftest.h" + static char *usage = "Usage:\n" " %s [-c #[,#..] -f] path\n" @@ -626,12 +628,10 @@ int main(int argc, char *argv[]) cpus_to_pin[0] = cpus_online - 1; } - if (getuid() != 0) { - fprintf(stderr, "Not running as root, but almost all tests " + if (getuid() != 0) + ksft_exit_skip("Not running as root, but almost all tests " "require root in order to modify\nsystem settings. " "Exiting.\n"); - exit(1); - } max_msgs = fopen(MAX_MSGS, "r+"); max_msgsize = fopen(MAX_MSGSIZE, "r+"); diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 785fc18a16b4..3ff81a478dbe 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -6,6 +6,7 @@ CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 6a75a3ea44ad..7ba089b33e8b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -7,3 +7,8 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_VETH=y +CONFIG_INET_XFRM_MODE_TUNNEL=y +CONFIG_NET_IPVTI=y +CONFIG_INET6_XFRM_MODE_TUNNEL=y +CONFIG_IPV6_VTI=y +CONFIG_DUMMY=y diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 9164e60d4b66..5baac82b9287 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -5,6 +5,8 @@ # different events. ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 VERBOSE=${VERBOSE:=0} PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} @@ -579,18 +581,18 @@ fib_test() if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip; fi if [ ! -x "$(command -v ip)" ]; then echo "SKIP: Could not run test without ip tool" - exit 0 + exit $ksft_skip fi ip route help 2>&1 | grep -q fibmatch if [ $? -ne 0 ]; then echo "SKIP: iproute2 too old, missing fibmatch" - exit 0 + exit $ksft_skip fi # start clean diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index 903679e0ff31..e3afcb424710 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -8,6 +8,9 @@ # if not they probably have failed earlier in the boot process and their logged error will be catched by another test # +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # this function will try to up the interface # if already up, nothing done # arg1: network interface name @@ -18,7 +21,7 @@ kci_net_start() ip link show "$netdev" |grep -q UP if [ $? -eq 0 ];then echo "SKIP: $netdev: interface already up" - return 0 + return $ksft_skip fi ip link set "$netdev" up @@ -61,12 +64,12 @@ kci_net_setup() ip address show "$netdev" |grep '^[[:space:]]*inet' if [ $? -eq 0 ];then echo "SKIP: $netdev: already have an IP" - return 0 + return $ksft_skip fi # TODO what ipaddr to set ? DHCP ? echo "SKIP: $netdev: set IP address" - return 0 + return $ksft_skip } # test an ethtool command @@ -84,6 +87,7 @@ kci_netdev_ethtool_test() if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then echo "SKIP: $netdev: ethtool $2 not supported" + return $ksft_skip else echo "FAIL: $netdev: ethtool $2" return 1 @@ -104,7 +108,7 @@ kci_netdev_ethtool() ethtool --version 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: ethtool not present" - return 1 + return $ksft_skip fi TMP_ETHTOOL_FEATURES="$(mktemp)" @@ -176,13 +180,13 @@ kci_test_netdev() #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi ip link show 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: Could not run test without the ip tool" - exit 0 + exit $ksft_skip fi TMP_LIST_NETDEV="$(mktemp)" diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 1e428781a625..7514f93e1624 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -43,6 +43,9 @@ # that MTU is properly calculated instead when MTU is not configured from # userspace +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + tests=" pmtu_vti6_exception vti6: PMTU exceptions pmtu_vti4_exception vti4: PMTU exceptions @@ -162,7 +165,7 @@ setup_xfrm6() { } setup() { - [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return 1 + [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip cleanup_done=0 for arg do diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 7f6cd9fdacf3..7ec4fa4d55dc 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -60,6 +60,8 @@ #include "psock_lib.h" +#include "../kselftest.h" + #ifndef bug_on # define bug_on(cond) assert(!(cond)) #endif @@ -825,7 +827,7 @@ static int test_tpacket(int version, int type) fprintf(stderr, "test: skip %s %s since user and kernel " "space have different bit width\n", tpacket_str[version], type_str[type]); - return 0; + return KSFT_SKIP; } sock = pfsocket(version); diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index 365c32e84189..c9f478b40996 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,6 +23,8 @@ #include <unistd.h> #include <numa.h> +#include "../kselftest.h" + static const int PORT = 8888; static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto) @@ -229,7 +231,7 @@ int main(void) int *rcv_fd, nodes; if (numa_available() < 0) - error(1, errno, "no numa api support"); + ksft_exit_skip("no numa api support\n"); nodes = numa_max_node() + 1; diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index e6f485235435..fb3767844e42 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -7,6 +7,9 @@ devdummy="test-dummy0" ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + # set global exit status, but never reset nonzero one. check_err() { @@ -333,7 +336,7 @@ kci_test_vrf() ip link show type vrf 2>/dev/null if [ $? -ne 0 ]; then echo "SKIP: vrf: iproute2 too old" - return 0 + return $ksft_skip fi ip link add "$vrfname" type vrf table 10 @@ -409,7 +412,7 @@ kci_test_encap_fou() ip fou help 2>&1 |grep -q 'Usage: ip fou' if [ $? -ne 0 ];then echo "SKIP: fou: iproute2 too old" - return 1 + return $ksft_skip fi ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null @@ -444,7 +447,7 @@ kci_test_encap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP encap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip netns exec "$testns" ip link set lo up @@ -469,7 +472,7 @@ kci_test_macsec() ip macsec help 2>&1 | grep -q "^Usage: ip macsec" if [ $? -ne 0 ]; then echo "SKIP: macsec: iproute2 too old" - return 0 + return $ksft_skip fi ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on @@ -511,14 +514,14 @@ kci_test_gretap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP gretap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip link help gretap 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: gretap: iproute2 too old" ip netns del "$testns" - return 1 + return $ksft_skip fi # test native tunnel @@ -561,14 +564,14 @@ kci_test_ip6gretap() ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP ip6gretap tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi ip link help ip6gretap 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: ip6gretap: iproute2 too old" ip netns del "$testns" - return 1 + return $ksft_skip fi # test native tunnel @@ -611,13 +614,13 @@ kci_test_erspan() ip link help erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: erspan: iproute2 too old" - return 1 + return $ksft_skip fi ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP erspan tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi # test native tunnel erspan v1 @@ -676,13 +679,13 @@ kci_test_ip6erspan() ip link help ip6erspan 2>&1 | grep -q "^Usage:" if [ $? -ne 0 ];then echo "SKIP: ip6erspan: iproute2 too old" - return 1 + return $ksft_skip fi ip netns add "$testns" if [ $? -ne 0 ]; then echo "SKIP ip6erspan tests: cannot add net namespace $testns" - return 1 + return $ksft_skip fi # test native tunnel ip6erspan v1 @@ -762,14 +765,14 @@ kci_test_rtnl() #check for needed privileges if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" - exit 0 + exit $ksft_skip fi for x in ip tc;do $x -Version 2>/dev/null >/dev/null if [ $? -ne 0 ];then echo "SKIP: Could not run test without the $x tool" - exit 0 + exit $ksft_skip fi done diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore index 04dc1e6ef2ce..9161679b1e1a 100644 --- a/tools/testing/selftests/powerpc/benchmarks/.gitignore +++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore @@ -1,5 +1,7 @@ gettimeofday context_switch +fork +exec_target mmap_bench futex_bench null_syscall diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile index a35058e3766c..b4d7432a0ecd 100644 --- a/tools/testing/selftests/powerpc/benchmarks/Makefile +++ b/tools/testing/selftests/powerpc/benchmarks/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := gettimeofday context_switch mmap_bench futex_bench null_syscall +TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall +TEST_GEN_FILES := exec_target CFLAGS += -O2 @@ -10,3 +11,7 @@ $(TEST_GEN_PROGS): ../harness.c $(OUTPUT)/context_switch: ../utils.c $(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec $(OUTPUT)/context_switch: LDLIBS += -lpthread + +$(OUTPUT)/fork: LDLIBS += -lpthread + +$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c new file mode 100644 index 000000000000..3c9c144192be --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Part of fork context switch microbenchmark. + * + * Copyright 2018, Anton Blanchard, IBM Corp. + */ + +void _exit(int); +void _start(void) +{ + _exit(0); +} diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c new file mode 100644 index 000000000000..d312e638cb37 --- /dev/null +++ b/tools/testing/selftests/powerpc/benchmarks/fork.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Context switch microbenchmark. + * + * Copyright 2018, Anton Blanchard, IBM Corp. + */ + +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <linux/futex.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/shm.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +static unsigned int timeout = 30; + +static void set_cpu(int cpu) +{ + cpu_set_t cpuset; + + if (cpu == -1) + return; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) { + perror("sched_setaffinity"); + exit(1); + } +} + +static void start_process_on(void *(*fn)(void *), void *arg, int cpu) +{ + int pid; + + pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + + if (pid) + return; + + set_cpu(cpu); + + fn(arg); + + exit(0); +} + +static int cpu; +static int do_fork = 0; +static int do_vfork = 0; +static int do_exec = 0; +static char *exec_file; +static int exec_target = 0; +static unsigned long iterations; +static unsigned long iterations_prev; + +static void run_exec(void) +{ + char *const argv[] = { "./exec_target", NULL }; + + if (execve("./exec_target", argv, NULL) == -1) { + perror("execve"); + exit(1); + } +} + +static void bench_fork(void) +{ + while (1) { + pid_t pid = fork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + if (pid == 0) { + if (do_exec) + run_exec(); + _exit(0); + } + pid = waitpid(pid, NULL, 0); + if (pid == -1) { + perror("waitpid"); + exit(1); + } + iterations++; + } +} + +static void bench_vfork(void) +{ + while (1) { + pid_t pid = vfork(); + if (pid == -1) { + perror("fork"); + exit(1); + } + if (pid == 0) { + if (do_exec) + run_exec(); + _exit(0); + } + pid = waitpid(pid, NULL, 0); + if (pid == -1) { + perror("waitpid"); + exit(1); + } + iterations++; + } +} + +static void *null_fn(void *arg) +{ + pthread_exit(NULL); +} + +static void bench_thread(void) +{ + pthread_t tid; + cpu_set_t cpuset; + pthread_attr_t attr; + int rc; + + rc = pthread_attr_init(&attr); + if (rc) { + errno = rc; + perror("pthread_attr_init"); + exit(1); + } + + if (cpu != -1) { + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); + + rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset); + if (rc) { + errno = rc; + perror("pthread_attr_setaffinity_np"); + exit(1); + } + } + + while (1) { + rc = pthread_create(&tid, &attr, null_fn, NULL); + if (rc) { + errno = rc; + perror("pthread_create"); + exit(1); + } + rc = pthread_join(tid, NULL); + if (rc) { + errno = rc; + perror("pthread_join"); + exit(1); + } + iterations++; + } +} + +static void sigalrm_handler(int junk) +{ + unsigned long i = iterations; + + printf("%ld\n", i - iterations_prev); + iterations_prev = i; + + if (--timeout == 0) + kill(0, SIGUSR1); + + alarm(1); +} + +static void sigusr1_handler(int junk) +{ + exit(0); +} + +static void *bench_proc(void *arg) +{ + signal(SIGALRM, sigalrm_handler); + alarm(1); + + if (do_fork) + bench_fork(); + else if (do_vfork) + bench_vfork(); + else + bench_thread(); + + return NULL; +} + +static struct option options[] = { + { "fork", no_argument, &do_fork, 1 }, + { "vfork", no_argument, &do_vfork, 1 }, + { "exec", no_argument, &do_exec, 1 }, + { "timeout", required_argument, 0, 's' }, + { "exec-target", no_argument, &exec_target, 1 }, + { NULL }, +}; + +static void usage(void) +{ + fprintf(stderr, "Usage: fork <options> CPU\n\n"); + fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n"); + fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n"); + fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n"); + fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n"); + fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n"); +} + +int main(int argc, char *argv[]) +{ + signed char c; + + while (1) { + int option_index = 0; + + c = getopt_long(argc, argv, "", options, &option_index); + + if (c == -1) + break; + + switch (c) { + case 0: + if (options[option_index].flag != 0) + break; + + usage(); + exit(1); + break; + + case 's': + timeout = atoi(optarg); + break; + + default: + usage(); + exit(1); + } + } + + if (do_fork && do_vfork) { + usage(); + exit(1); + } + if (do_exec && !do_fork && !do_vfork) { + usage(); + exit(1); + } + + if (do_exec) { + char *dirname = strdup(argv[0]); + int i; + i = strlen(dirname) - 1; + while (i) { + if (dirname[i] == '/') { + dirname[i] = '\0'; + if (chdir(dirname) == -1) { + perror("chdir"); + exit(1); + } + break; + } + i--; + } + } + + if (exec_target) { + exit(0); + } + + if (((argc - optind) != 1)) { + cpu = -1; + } else { + cpu = atoi(argv[optind++]); + } + + if (do_exec) + exec_file = argv[0]; + + set_cpu(cpu); + + printf("Using "); + if (do_fork) + printf("fork"); + else if (do_vfork) + printf("vfork"); + else + printf("clone"); + + if (do_exec) + printf(" + exec"); + + printf(" on cpu %d\n", cpu); + + /* Create a new process group so we can signal everyone for exit */ + setpgid(getpid(), getpid()); + + signal(SIGUSR1, sigusr1_handler); + + start_process_on(bench_proc, NULL, cpu); + + while (1) + sleep(3600); + + return 0; +} diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index ac4a52e19e59..eedce3366f64 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -5,8 +5,8 @@ CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST CFLAGS += -maltivec -# Use our CFLAGS for the implicit .S rule -ASFLAGS = $(CFLAGS) +# Use our CFLAGS for the implicit .S rule & set the asm machine type +ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 5c72ff978f27..c0e45d2dde25 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -4,7 +4,7 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \ - $(SIGNAL_CONTEXT_CHK_TESTS) + $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c new file mode 100644 index 000000000000..85d63449243b --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2015, Laurent Dufour, IBM Corp. + * + * Test the kernel's signal returning code to check reclaim is done if the + * sigreturn() is called while in a transaction (suspended since active is + * already dropped trough the system call path). + * + * The kernel must discard the transaction when entering sigreturn, since + * restoring the potential TM SPRS from the signal frame is requiring to not be + * in a transaction. + */ + +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "tm.h" +#include "utils.h" + + +void handler(int sig) +{ + uint64_t ret; + + asm __volatile__( + "li 3,1 ;" + "tbegin. ;" + "beq 1f ;" + "li 3,0 ;" + "tsuspend. ;" + "1: ;" + "std%X[ret] 3, %[ret] ;" + : [ret] "=m"(ret) + : + : "memory", "3", "cr0"); + + if (ret) + exit(1); + + /* + * We return from the signal handle while in a suspended transaction + */ +} + + +int tm_sigreturn(void) +{ + struct sigaction sa; + uint64_t ret = 0; + + SKIP_IF(!have_htm()); + + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = handler; + sigemptyset(&sa.sa_mask); + + if (sigaction(SIGSEGV, &sa, NULL)) + exit(1); + + asm __volatile__( + "tbegin. ;" + "beq 1f ;" + "li 3,0 ;" + "std 3,0(3) ;" /* trigger SEGV */ + "li 3,1 ;" + "std%X[ret] 3,%[ret] ;" + "tend. ;" + "b 2f ;" + "1: ;" + "li 3,2 ;" + "std%X[ret] 3,%[ret] ;" + "2: ;" + : [ret] "=m"(ret) + : + : "memory", "3", "cr0"); + + if (ret != 2) + exit(1); + + exit(0); +} + +int main(void) +{ + return test_harness(tm_sigreturn, "tm_sigreturn"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index e6a0fad2bfd0..156c8e750259 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -80,7 +80,7 @@ bool is_failure(uint64_t condition_reg) return ((condition_reg >> 28) & 0xa) == 0xa; } -void *ping(void *input) +void *tm_una_ping(void *input) { /* @@ -280,7 +280,7 @@ void *ping(void *input) } /* Thread to force context switch */ -void *pong(void *not_used) +void *tm_una_pong(void *not_used) { /* Wait thread get its name "pong". */ if (DEBUG) @@ -311,11 +311,11 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) do { int rc; - /* Bind 'ping' to CPU 0, as specified in 'attr'. */ - rc = pthread_create(&t0, attr, ping, (void *) &flags); + /* Bind to CPU 0, as specified in 'attr'. */ + rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags); if (rc) pr_err(rc, "pthread_create()"); - rc = pthread_setname_np(t0, "ping"); + rc = pthread_setname_np(t0, "tm_una_ping"); if (rc) pr_warn(rc, "pthread_setname_np"); rc = pthread_join(t0, &ret_value); @@ -333,13 +333,15 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) } } -int main(int argc, char **argv) +int tm_unavailable_test(void) { int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; + SKIP_IF(!have_htm()); + /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ CPU_ZERO(&cpuset); CPU_SET(0, &cpuset); @@ -354,12 +356,12 @@ int main(int argc, char **argv) if (rc) pr_err(rc, "pthread_attr_setaffinity_np()"); - rc = pthread_create(&t1, &attr /* Bind 'pong' to CPU 0 */, pong, NULL); + rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL); if (rc) pr_err(rc, "pthread_create()"); /* Name it for systemtap convenience */ - rc = pthread_setname_np(t1, "pong"); + rc = pthread_setname_np(t1, "tm_una_pong"); if (rc) pr_warn(rc, "pthread_create()"); @@ -394,3 +396,9 @@ int main(int argc, char **argv) exit(0); } } + +int main(int argc, char **argv) +{ + test_harness_set_timeout(220); + return test_harness(tm_unavailable_test, "tm_unavailable_test"); +} diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore new file mode 100644 index 000000000000..6c16f77c722c --- /dev/null +++ b/tools/testing/selftests/proc/.gitignore @@ -0,0 +1,8 @@ +/proc-loadavg-001 +/proc-self-map-files-001 +/proc-self-map-files-002 +/proc-self-syscall +/proc-self-wchan +/proc-uptime-001 +/proc-uptime-002 +/read diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile new file mode 100644 index 000000000000..dbb87e56264c --- /dev/null +++ b/tools/testing/selftests/proc/Makefile @@ -0,0 +1,13 @@ +CFLAGS += -Wall -O2 + +TEST_GEN_PROGS := +TEST_GEN_PROGS += proc-loadavg-001 +TEST_GEN_PROGS += proc-self-map-files-001 +TEST_GEN_PROGS += proc-self-map-files-002 +TEST_GEN_PROGS += proc-self-syscall +TEST_GEN_PROGS += proc-self-wchan +TEST_GEN_PROGS += proc-uptime-001 +TEST_GEN_PROGS += proc-uptime-002 +TEST_GEN_PROGS += read + +include ../lib.mk diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config new file mode 100644 index 000000000000..68fbd2b35884 --- /dev/null +++ b/tools/testing/selftests/proc/config @@ -0,0 +1 @@ +CONFIG_PROC_FS=y diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c new file mode 100644 index 000000000000..fcff7047000d --- /dev/null +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -0,0 +1,83 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test that /proc/loadavg correctly reports last pid in pid namespace. */ +#define _GNU_SOURCE +#include <errno.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/wait.h> + +int main(void) +{ + pid_t pid; + int wstatus; + + if (unshare(CLONE_NEWPID) == -1) { + if (errno == ENOSYS || errno == EPERM) + return 2; + return 1; + } + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) { + char buf[128], *p; + int fd; + ssize_t rv; + + fd = open("/proc/loadavg" , O_RDONLY); + if (fd == -1) + return 1; + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 1 */ + if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n')) + return 1; + + pid = fork(); + if (pid == -1) + return 1; + if (pid == 0) + return 0; + if (waitpid(pid, NULL, 0) == -1) + return 1; + + lseek(fd, 0, SEEK_SET); + rv = read(fd, buf, sizeof(buf)); + if (rv < 3) + return 1; + p = buf + rv; + + /* pid 2 */ + if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n')) + return 1; + + return 0; + } + + if (waitpid(pid, &wstatus, 0) == -1) + return 1; + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) + return 0; + return 1; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c new file mode 100644 index 000000000000..4209c64283d6 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-001.c @@ -0,0 +1,82 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0); + if (p == MAP_FAILED) + return 1; + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c new file mode 100644 index 000000000000..6f1f4a6e1ecb --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-map-files-002.c @@ -0,0 +1,85 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +/* Test readlink /proc/self/map_files/... with address 0. */ +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdio.h> +#include <unistd.h> +#include <sys/mman.h> +#include <stdlib.h> + +static void pass(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1) + exit(1); +} + +static void fail(const char *fmt, unsigned long a, unsigned long b) +{ + char name[64]; + char buf[64]; + + snprintf(name, sizeof(name), fmt, a, b); + if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT) + return; + exit(1); +} + +int main(void) +{ + const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE); + void *p; + int fd; + unsigned long a, b; + + fd = open("/dev/zero", O_RDONLY); + if (fd == -1) + return 1; + + p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0); + if (p == MAP_FAILED) { + if (errno == EPERM) + return 2; + return 1; + } + + a = (unsigned long)p; + b = (unsigned long)p + PAGE_SIZE; + + pass("/proc/self/map_files/%lx-%lx", a, b); + fail("/proc/self/map_files/ %lx-%lx", a, b); + fail("/proc/self/map_files/%lx -%lx", a, b); + fail("/proc/self/map_files/%lx- %lx", a, b); + fail("/proc/self/map_files/%lx-%lx ", a, b); + fail("/proc/self/map_files/0%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-0%lx", a, b); + if (sizeof(long) == 4) { + fail("/proc/self/map_files/100000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-100000000%lx", a, b); + } else if (sizeof(long) == 8) { + fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b); + fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b); + } else + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c new file mode 100644 index 000000000000..5ab5f4810e43 --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -0,0 +1,60 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +static inline ssize_t sys_read(int fd, void *buf, size_t len) +{ + return syscall(SYS_read, fd, buf, len); +} + +int main(void) +{ + char buf1[64]; + char buf2[64]; + int fd; + ssize_t rv; + + fd = open("/proc/self/syscall", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + /* Do direct system call as libc can wrap anything. */ + snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx", + (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2)); + + memset(buf2, 0, sizeof(buf2)); + rv = sys_read(fd, buf2, sizeof(buf2)); + if (rv < 0) + return 1; + if (rv < strlen(buf1)) + return 1; + if (strncmp(buf1, buf2, strlen(buf1)) != 0) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c new file mode 100644 index 000000000000..a38b2fbaa7ad --- /dev/null +++ b/tools/testing/selftests/proc/proc-self-wchan.c @@ -0,0 +1,40 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> + +int main(void) +{ + char buf[64]; + int fd; + + fd = open("/proc/self/wchan", O_RDONLY); + if (fd == -1) { + if (errno == ENOENT) + return 2; + return 1; + } + + buf[0] = '\0'; + if (read(fd, buf, sizeof(buf)) != 1) + return 1; + if (buf[0] != '0') + return 1; + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c new file mode 100644 index 000000000000..781f7a50fc3f --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-001.c @@ -0,0 +1,45 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically. +#undef NDEBUG +#include <assert.h> +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +int main(void) +{ + uint64_t start, u0, u1, i0, i1; + int fd; + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + start = u0; + do { + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } while (u1 - start < 100); + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c new file mode 100644 index 000000000000..30e2b7849089 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -0,0 +1,79 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test that values in /proc/uptime increment monotonically +// while shifting across CPUs. +#define _GNU_SOURCE +#undef NDEBUG +#include <assert.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <stdlib.h> +#include <string.h> + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include "proc-uptime.h" + +static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_getaffinity, pid, len, m); +} + +static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m) +{ + return syscall(SYS_sched_setaffinity, pid, len, m); +} + +int main(void) +{ + unsigned int len; + unsigned long *m; + unsigned int cpu; + uint64_t u0, u1, i0, i1; + int fd; + + /* find out "nr_cpu_ids" */ + m = NULL; + len = 0; + do { + len += sizeof(unsigned long); + free(m); + m = malloc(len); + } while (sys_sched_getaffinity(0, len, m) == -EINVAL); + + fd = open("/proc/uptime", O_RDONLY); + assert(fd >= 0); + + proc_uptime(fd, &u0, &i0); + for (cpu = 0; cpu < len * 8; cpu++) { + memset(m, 0, len); + m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long))); + + /* CPU might not exist, ignore error */ + sys_sched_setaffinity(0, len, m); + + proc_uptime(fd, &u1, &i1); + assert(u1 >= u0); + assert(i1 >= i0); + u0 = u1; + i0 = i1; + } + + return 0; +} diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h new file mode 100644 index 000000000000..0e464b50e9d9 --- /dev/null +++ b/tools/testing/selftests/proc/proc-uptime.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +static unsigned long long xstrtoull(const char *p, char **end) +{ + if (*p == '0') { + *end = (char *)p + 1; + return 0; + } else if ('1' <= *p && *p <= '9') { + unsigned long long val; + + errno = 0; + val = strtoull(p, end, 10); + assert(errno == 0); + return val; + } else + assert(0); +} + +static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle) +{ + uint64_t val1, val2; + char buf[64], *p; + ssize_t rv; + + /* save "p < end" checks */ + memset(buf, 0, sizeof(buf)); + rv = pread(fd, buf, sizeof(buf), 0); + assert(0 <= rv && rv <= sizeof(buf)); + buf[sizeof(buf) - 1] = '\0'; + + p = buf; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == ' '); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *uptime = val1 * 100 + val2; + + p += 4; + + val1 = xstrtoull(p, &p); + assert(p[0] == '.'); + assert('0' <= p[1] && p[1] <= '9'); + assert('0' <= p[2] && p[2] <= '9'); + assert(p[3] == '\n'); + + val2 = (p[1] - '0') * 10 + p[2] - '0'; + *idle = val1 * 100 + val2; + + assert(p + 4 == buf + rv); +} diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c new file mode 100644 index 000000000000..1e73c2232097 --- /dev/null +++ b/tools/testing/selftests/proc/read.c @@ -0,0 +1,147 @@ +/* + * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +// Test +// 1) read of every file in /proc +// 2) readlink of every symlink in /proc +// 3) recursively (1) + (2) for every directory in /proc +// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs +// 5) write to /proc/sysrq-trigger +#undef NDEBUG +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <dirent.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +static inline bool streq(const char *s1, const char *s2) +{ + return strcmp(s1, s2) == 0; +} + +static struct dirent *xreaddir(DIR *d) +{ + struct dirent *de; + + errno = 0; + de = readdir(d); + if (!de && errno != 0) { + exit(1); + } + return de; +} + +static void f_reg(DIR *d, const char *filename) +{ + char buf[4096]; + int fd; + ssize_t rv; + + /* read from /proc/kmsg can block */ + fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK); + if (fd == -1) + return; + rv = read(fd, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); + close(fd); +} + +static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len) +{ + int fd; + ssize_t rv; + + fd = openat(dirfd(d), filename, O_WRONLY); + if (fd == -1) + return; + rv = write(fd, buf, len); + assert((0 <= rv && rv <= len) || rv == -1); + close(fd); +} + +static void f_lnk(DIR *d, const char *filename) +{ + char buf[4096]; + ssize_t rv; + + rv = readlinkat(dirfd(d), filename, buf, sizeof(buf)); + assert((0 <= rv && rv <= sizeof(buf)) || rv == -1); +} + +static void f(DIR *d, unsigned int level) +{ + struct dirent *de; + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, ".")); + + de = xreaddir(d); + assert(de->d_type == DT_DIR); + assert(streq(de->d_name, "..")); + + while ((de = xreaddir(d))) { + assert(!streq(de->d_name, ".")); + assert(!streq(de->d_name, "..")); + + switch (de->d_type) { + DIR *dd; + int fd; + + case DT_REG: + if (level == 0 && streq(de->d_name, "sysrq-trigger")) { + f_reg_write(d, de->d_name, "h", 1); + } else if (level == 1 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else if (level == 3 && streq(de->d_name, "clear_refs")) { + f_reg_write(d, de->d_name, "1", 1); + } else { + f_reg(d, de->d_name); + } + break; + case DT_DIR: + fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY); + if (fd == -1) + continue; + dd = fdopendir(fd); + if (!dd) + continue; + f(dd, level + 1); + closedir(dd); + break; + case DT_LNK: + f_lnk(d, de->d_name); + break; + default: + assert(0); + } + } +} + +int main(void) +{ + DIR *d; + + d = opendir("/proc"); + if (!d) + return 2; + f(d, 0); + return 0; +} diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh new file mode 100755 index 000000000000..98f650c9bf54 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh @@ -0,0 +1,56 @@ +#!/bin/sh +# +# Invoke a text editor on all console.log files for all runs with diagnostics, +# that is, on all such files having a console.log.diags counterpart. +# Note that both console.log.diags and console.log are passed to the +# editor (currently defaulting to "vi"), allowing the user to get an +# idea of what to search for in the console.log file. +# +# Usage: kvm-find-errors.sh directory +# +# The "directory" above should end with the date/time directory, for example, +# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27". + +rundir="${1}" +if test -z "$rundir" -o ! -d "$rundir" +then + echo Usage: $0 directory +fi +editor=${EDITOR-vi} + +# Find builds with errors +files= +for i in ${rundir}/*/Make.out +do + if egrep -q "error:|warning:" < $i + then + egrep "error:|warning:" < $i > $i.diags + files="$files $i.diags $i" + fi +done +if test -n "$files" +then + $editor $files +else + echo No build errors. +fi +if grep -q -e "--buildonly" < ${rundir}/log +then + echo Build-only run, no console logs to check. +fi + +# Find console logs with errors +files= +for i in ${rundir}/*/console.log +do + if test -r $i.diags + then + files="$files $i.diags $i" + fi +done +if test -n "$files" +then + $editor $files +else + echo No errors in console logs. +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh index c2e1bb6d0cba..477ecb1293ab 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh @@ -34,11 +34,15 @@ fi configfile=`echo $i | sed -e 's/^.*\///'` ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'` +stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null | + tail -1 | sed -e 's/^\[[ 0-9.]*] //' | + awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' | + tr -d '\012\015'`" if test -z "$ngps" then - echo "$configfile -------" + echo "$configfile ------- " $stopstate else - title="$configfile ------- $ngps grace periods" + title="$configfile ------- $ngps GPs" dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null` if test -z "$dur" then @@ -46,9 +50,9 @@ else else ngpsps=`awk -v ngps=$ngps -v dur=$dur ' BEGIN { print ngps / dur }' < /dev/null` - title="$title ($ngpsps per second)" + title="$title ($ngpsps/s)" fi - echo $title + echo $title $stopstate nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` if test -z "$nclosecalls" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index f7e988f369dd..c27e97824163 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -48,10 +48,6 @@ do cat $i/Make.oldconfig.err fi parse-build.sh $i/Make.out $configfile - if test "$TORTURE_SUITE" != rcuperf - then - parse-torture.sh $i/console.log $configfile - fi parse-console.sh $i/console.log $configfile if test -r $i/Warnings then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index 5f8fbb0d7c17..c5b0f94341d9 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -267,5 +267,4 @@ then echo Unknown PID, cannot kill qemu command fi -parse-torture.sh $resdir/console.log $title parse-console.sh $resdir/console.log $title diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 08aa7d50ae0e..17293436f551 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -24,57 +24,146 @@ # # Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> +T=${TMPDIR-/tmp}/parse-console.sh.$$ file="$1" title="$2" +trap 'rm -f $T.seq $T.diags' 0 + . functions.sh +# Check for presence and readability of console output file +if test -f "$file" -a -r "$file" +then + : +else + echo $title unreadable console output file: $file + exit 1 +fi if grep -Pq '\x00' < $file then print_warning Console output contains nul bytes, old qemu still running? fi -egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags -if test -s $1.diags +cat /dev/null > $file.diags + +# Check for proper termination, except that rcuperf runs don't indicate this. +if test "$TORTURE_SUITE" != rcuperf then - print_warning Assertion failure in $file $title - # cat $1.diags + # check for abject failure + + if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file + then + nerrs=`grep --binary-files=text '!!!' $file | + tail -1 | + awk ' + { + for (i=NF-8;i<=NF;i++) + sum+=$i; + } + END { print sum }'` + print_bug $title FAILURE, $nerrs instances + exit + fi + + grep --binary-files=text 'torture:.*ver:' $file | + egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | + sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | + awk ' + BEGIN { + ver = 0; + badseq = 0; + } + + { + if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { + badseqno1 = ver; + badseqno2 = $5; + badseqnr = NR; + badseq = 1; + } + ver = $5 + } + + END { + if (badseq) { + if (badseqno1 == badseqno2 && badseqno2 == ver) + print "GP HANG at " ver " torture stat " badseqnr; + else + print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; + } + }' > $T.seq + + if grep -q SUCCESS $file + then + if test -s $T.seq + then + print_warning $title `cat $T.seq` + echo " " $file + exit 2 + fi + else + if grep -q "_HOTPLUG:" $file + then + print_warning HOTPLUG FAILURES $title `cat $T.seq` + echo " " $file + exit 3 + fi + echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages + if test -s $T.seq + then + print_warning $title `cat $T.seq` + fi + exit 2 + fi +fi | tee -a $file.diags + +egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | +grep -v 'ODEBUG: ' | +grep -v 'Warning: unable to open an initial console' > $T.diags +if test -s $T.diags +then + print_warning "Assertion failure in $file $title" + # cat $T.diags summary="" - n_badness=`grep -c Badness $1` + n_badness=`grep -c Badness $file` if test "$n_badness" -ne 0 then summary="$summary Badness: $n_badness" fi - n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'` + n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'` if test "$n_warn" -ne 0 then summary="$summary Warnings: $n_warn" fi - n_bugs=`egrep -c 'BUG|Oops:' $1` + n_bugs=`egrep -c 'BUG|Oops:' $file` if test "$n_bugs" -ne 0 then summary="$summary Bugs: $n_bugs" fi - n_calltrace=`grep -c 'Call Trace:' $1` + n_calltrace=`grep -c 'Call Trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" fi - n_lockdep=`grep -c =========== $1` + n_lockdep=`grep -c =========== $file` if test "$n_badness" -ne 0 then summary="$summary lockdep: $n_badness" fi - n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1` + n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file` if test "$n_stalls" -ne 0 then summary="$summary Stalls: $n_stalls" fi - n_starves=`grep -c 'rcu_.*kthread starved for' $1` + n_starves=`grep -c 'rcu_.*kthread starved for' $file` if test "$n_starves" -ne 0 then summary="$summary Starves: $n_starves" fi print_warning Summary: $summary -else - rm $1.diags + cat $T.diags >> $file.diags +fi +if ! test -s $file.diags +then + rm -f $file.diags fi diff --git a/tools/testing/selftests/rcutorture/bin/parse-torture.sh b/tools/testing/selftests/rcutorture/bin/parse-torture.sh deleted file mode 100755 index 5987e50cfeb4..000000000000 --- a/tools/testing/selftests/rcutorture/bin/parse-torture.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash -# -# Check the console output from a torture run for goodness. -# The "file" is a pathname on the local system, and "title" is -# a text string for error-message purposes. -# -# The file must contain torture output, but can be interspersed -# with other dmesg text, as in console-log output. -# -# Usage: parse-torture.sh file title -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, you can access it online at -# http://www.gnu.org/licenses/gpl-2.0.html. -# -# Copyright (C) IBM Corporation, 2011 -# -# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> - -T=${TMPDIR-/tmp}/parse-torture.sh.$$ -file="$1" -title="$2" - -trap 'rm -f $T.seq' 0 - -. functions.sh - -# check for presence of torture output file. - -if test -f "$file" -a -r "$file" -then - : -else - echo $title unreadable torture output file: $file - exit 1 -fi - -# check for abject failure - -if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file -then - nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'` - print_bug $title FAILURE, $nerrs instances - echo " " $url - exit -fi - -grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | -awk ' -BEGIN { - ver = 0; - badseq = 0; - } - - { - if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { - badseqno1 = ver; - badseqno2 = $5; - badseqnr = NR; - badseq = 1; - } - ver = $5 - } - -END { - if (badseq) { - if (badseqno1 == badseqno2 && badseqno2 == ver) - print "GP HANG at " ver " torture stat " badseqnr; - else - print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr; - } - }' > $T.seq - -if grep -q SUCCESS $file -then - if test -s $T.seq - then - print_warning $title $title `cat $T.seq` - echo " " $file - exit 2 - fi -else - if grep -q "_HOTPLUG:" $file - then - print_warning HOTPLUG FAILURES $title `cat $T.seq` - echo " " $file - exit 3 - fi - echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages - if test -s $T.seq - then - print_warning $title `cat $T.seq` - fi - exit 2 -fi diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore new file mode 100644 index 000000000000..d0ad44f6294a --- /dev/null +++ b/tools/testing/selftests/rtc/.gitignore @@ -0,0 +1,2 @@ +rtctest +setdate diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile new file mode 100644 index 000000000000..de9c8566672a --- /dev/null +++ b/tools/testing/selftests/rtc/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -O3 -Wl,-no-as-needed -Wall +LDFLAGS += -lrt -lpthread -lm + +TEST_GEN_PROGS = rtctest + +TEST_GEN_PROGS_EXTENDED = setdate + +include ../lib.mk diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c new file mode 100644 index 000000000000..e20b017e7073 --- /dev/null +++ b/tools/testing/selftests/rtc/rtctest.c @@ -0,0 +1,238 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Real Time Clock Driver Test Program + * + * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com> + */ + +#include <errno.h> +#include <fcntl.h> +#include <linux/rtc.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#define NUM_UIE 3 +#define ALARM_DELTA 3 + +static char *rtc_file = "/dev/rtc0"; + +FIXTURE(rtc) { + int fd; +}; + +FIXTURE_SETUP(rtc) { + self->fd = open(rtc_file, O_RDONLY); + ASSERT_NE(-1, self->fd); +} + +FIXTURE_TEARDOWN(rtc) { + close(self->fd); +} + +TEST_F(rtc, date_read) { + int rc; + struct rtc_time rtc_tm; + + /* Read the RTC time/date */ + rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm); + ASSERT_NE(-1, rc); + + TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); +} + +TEST_F(rtc, uie_read) { + int i, rc, irq = 0; + unsigned long data; + + /* Turn on update interrupts */ + rc = ioctl(self->fd, RTC_UIE_ON, 0); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip update IRQs not supported."); + return; + } + + for (i = 0; i < NUM_UIE; i++) { + /* This read will block */ + rc = read(self->fd, &data, sizeof(data)); + ASSERT_NE(-1, rc); + irq++; + } + + EXPECT_EQ(NUM_UIE, irq); + + rc = ioctl(self->fd, RTC_UIE_OFF, 0); + ASSERT_NE(-1, rc); +} + +TEST_F(rtc, uie_select) { + int i, rc, irq = 0; + unsigned long data; + + /* Turn on update interrupts */ + rc = ioctl(self->fd, RTC_UIE_ON, 0); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip update IRQs not supported."); + return; + } + + for (i = 0; i < NUM_UIE; i++) { + struct timeval tv = { .tv_sec = 2 }; + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + ASSERT_NE(0, rc); + + /* This read won't block */ + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + irq++; + } + + EXPECT_EQ(NUM_UIE, irq); + + rc = ioctl(self->fd, RTC_UIE_OFF, 0); + ASSERT_NE(-1, rc); +} + +TEST_F(rtc, alarm_alm_set) { + struct timeval tv = { .tv_sec = ALARM_DELTA + 2 }; + unsigned long data; + struct rtc_time tm; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&tm) + ALARM_DELTA; + gmtime_r(&secs, (struct tm *)&tm); + + rc = ioctl(self->fd, RTC_ALM_SET, &tm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_ALM_READ, &tm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d:%02d:%02d.", + tm.tm_hour, tm.tm_min, tm.tm_sec); + + /* Enable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_ON, 0); + ASSERT_NE(-1, rc); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + EXPECT_NE(0, rc); + + /* Disable alarm interrupts */ + rc = ioctl(self->fd, RTC_AIE_OFF, 0); + ASSERT_NE(-1, rc); + + if (rc == 0) + return; + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + TH_LOG("data: %lx", data); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +TEST_F(rtc, alarm_wkalm_set) { + struct timeval tv = { .tv_sec = ALARM_DELTA + 2 }; + struct rtc_wkalrm alarm = { 0 }; + struct rtc_time tm; + unsigned long data; + fd_set readfds; + time_t secs, new; + int rc; + + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); + ASSERT_NE(-1, rc); + + secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA; + gmtime_r(&secs, (struct tm *)&alarm.time); + + alarm.enabled = 1; + + rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); + if (rc == -1) { + ASSERT_EQ(EINVAL, errno); + TH_LOG("skip alarms are not supported."); + return; + } + + rc = ioctl(self->fd, RTC_WKALM_RD, &alarm); + ASSERT_NE(-1, rc); + + TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.", + alarm.time.tm_mday, alarm.time.tm_mon + 1, + alarm.time.tm_year + 1900, alarm.time.tm_hour, + alarm.time.tm_min, alarm.time.tm_sec); + + FD_ZERO(&readfds); + FD_SET(self->fd, &readfds); + + rc = select(self->fd + 1, &readfds, NULL, NULL, &tv); + ASSERT_NE(-1, rc); + EXPECT_NE(0, rc); + + rc = read(self->fd, &data, sizeof(unsigned long)); + ASSERT_NE(-1, rc); + + rc = ioctl(self->fd, RTC_RD_TIME, &tm); + ASSERT_NE(-1, rc); + + new = timegm((struct tm *)&tm); + ASSERT_EQ(new, secs); +} + +static void __attribute__((constructor)) +__constructor_order_last(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; +} + +int main(int argc, char **argv) +{ + switch (argc) { + case 2: + rtc_file = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]); + return 1; + } + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/timers/rtctest_setdate.c b/tools/testing/selftests/rtc/setdate.c index 2cb78489eca4..2cb78489eca4 100644 --- a/tools/testing/selftests/timers/rtctest_setdate.c +++ b/tools/testing/selftests/rtc/setdate.c diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 5df609950a66..e1473234968d 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -134,11 +134,15 @@ struct seccomp_data { #endif #ifndef SECCOMP_FILTER_FLAG_TSYNC -#define SECCOMP_FILTER_FLAG_TSYNC 1 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) #endif #ifndef SECCOMP_FILTER_FLAG_LOG -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#endif + +#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) #endif #ifndef PTRACE_SECCOMP_GET_METADATA @@ -2072,14 +2076,26 @@ TEST(seccomp_syscall_mode_lock) TEST(detect_seccomp_filter_flags) { unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC, - SECCOMP_FILTER_FLAG_LOG }; + SECCOMP_FILTER_FLAG_LOG, + SECCOMP_FILTER_FLAG_SPEC_ALLOW }; unsigned int flag, all_flags; int i; long ret; /* Test detection of known-good filter flags */ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) { + int bits = 0; + flag = flags[i]; + /* Make sure the flag is a single bit! */ + while (flag) { + if (flag & 0x1) + bits ++; + flag >>= 1; + } + ASSERT_EQ(1, bits); + flag = flags[i]; + ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL); ASSERT_NE(ENOSYS, errno) { TH_LOG("Kernel does not support seccomp syscall!"); @@ -2860,6 +2876,7 @@ TEST(get_metadata) int pipefd[2]; char buf; struct seccomp_metadata md; + long ret; ASSERT_EQ(0, pipe(pipefd)); @@ -2893,16 +2910,26 @@ TEST(get_metadata) ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid)); ASSERT_EQ(pid, waitpid(pid, NULL, 0)); + /* Past here must not use ASSERT or child process is never killed. */ + md.filter_off = 0; - ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + errno = 0; + ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); + EXPECT_EQ(sizeof(md), ret) { + if (errno == EINVAL) + XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); + } + EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); EXPECT_EQ(md.filter_off, 0); md.filter_off = 1; - ASSERT_EQ(sizeof(md), ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md)); + ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); + EXPECT_EQ(sizeof(md), ret); EXPECT_EQ(md.flags, 0); EXPECT_EQ(md.filter_off, 1); +skip: ASSERT_EQ(0, kill(pid, SIGKILL)); } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5b012f4981d4..6f289a49e5ec 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -66,7 +66,7 @@ "cmdUnderTest": "$TC action add action bpf object-file _b.o index 667", "expExitCode": "0", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c default-action pipe.*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9]* tag 3b185187f1855c4c( jited)? default-action pipe.*index 667 ref", "matchCount": "1", "teardown": [ "$TC action flush action bpf", @@ -92,10 +92,15 @@ "cmdUnderTest": "$TC action add action bpf object-file _c.o index 667", "expExitCode": "255", "verifyCmd": "$TC action get action bpf index 667", - "matchPattern": "action order [0-9]*: bpf _b.o:\\[action\\] id [0-9].*index 667 ref", + "matchPattern": "action order [0-9]*: bpf _c.o:\\[action\\] id [0-9].*index 667 ref", "matchCount": "0", "teardown": [ - "$TC action flush action bpf", + [ + "$TC action flush action bpf", + 0, + 1, + 255 + ], "rm -f _c.o" ] }, diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore index 2c8ac8416299..32a9eadb2d4e 100644 --- a/tools/testing/selftests/timers/.gitignore +++ b/tools/testing/selftests/timers/.gitignore @@ -9,7 +9,7 @@ nanosleep nsleep-lat posix_timers raw_skew -rtctest +rtcpie set-2038 set-tai set-timer-lat @@ -19,4 +19,3 @@ valid-adjtimex adjtick set-tz freq-step -rtctest_setdate diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 3496680981f2..c02683cfb6c9 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -5,13 +5,13 @@ LDFLAGS += -lrt -lpthread -lm # these are all "safe" tests that don't modify # system time or require escalated privileges TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ - inconsistency-check raw_skew threadtest rtctest + inconsistency-check raw_skew threadtest rtcpie DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ skew_consistency clocksource-switch freq-step leap-a-day \ leapcrash set-tai set-2038 set-tz -TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) rtctest_setdate +TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) include ../lib.mk diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c new file mode 100644 index 000000000000..47b5bad1b393 --- /dev/null +++ b/tools/testing/selftests/timers/rtcpie.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Real Time Clock Periodic Interrupt test program + * + * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for + * events"), PIE are completely handled using hrtimers, without actually using + * any underlying hardware RTC. + * + */ + +#include <stdio.h> +#include <linux/rtc.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data, old_pie_rate; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev] [d]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == EINVAL) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == EINVAL) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + ioctl(fd, RTC_IRQP_SET, old_pie_rate); + + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c deleted file mode 100644 index 411eff625e66..000000000000 --- a/tools/testing/selftests/timers/rtctest.c +++ /dev/null @@ -1,403 +0,0 @@ -/* - * Real Time Clock Driver Test/Example Program - * - * Compile with: - * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest - * - * Copyright (C) 1996, Paul Gortmaker. - * - * Released under the GNU General Public License, version 2, - * included herein by reference. - * - */ - -#include <stdio.h> -#include <linux/rtc.h> -#include <sys/ioctl.h> -#include <sys/time.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#include <stdlib.h> -#include <errno.h> - -#ifndef ARRAY_SIZE -# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#endif - -/* - * This expects the new RTC class driver framework, working with - * clocks that will often not be clones of what the PC-AT had. - * Use the command line to specify another RTC if you need one. - */ -static const char default_rtc[] = "/dev/rtc0"; - -static struct rtc_time cutoff_dates[] = { - { - .tm_year = 70, /* 1970 -1900 */ - .tm_mday = 1, - }, - /* signed time_t 19/01/2038 3:14:08 */ - { - .tm_year = 138, - .tm_mday = 19, - }, - { - .tm_year = 138, - .tm_mday = 20, - }, - { - .tm_year = 199, /* 2099 -1900 */ - .tm_mday = 1, - }, - { - .tm_year = 200, /* 2100 -1900 */ - .tm_mday = 1, - }, - /* unsigned time_t 07/02/2106 7:28:15*/ - { - .tm_year = 205, - .tm_mon = 1, - .tm_mday = 7, - }, - { - .tm_year = 206, - .tm_mon = 1, - .tm_mday = 8, - }, - /* signed time on 64bit in nanoseconds 12/04/2262 01:47:16*/ - { - .tm_year = 362, - .tm_mon = 3, - .tm_mday = 12, - }, - { - .tm_year = 362, /* 2262 -1900 */ - .tm_mon = 3, - .tm_mday = 13, - }, -}; - -static int compare_dates(struct rtc_time *a, struct rtc_time *b) -{ - if (a->tm_year != b->tm_year || - a->tm_mon != b->tm_mon || - a->tm_mday != b->tm_mday || - a->tm_hour != b->tm_hour || - a->tm_min != b->tm_min || - ((b->tm_sec - a->tm_sec) > 1)) - return 1; - - return 0; -} - -int main(int argc, char **argv) -{ - int i, fd, retval, irqcount = 0, dangerous = 0; - unsigned long tmp, data; - struct rtc_time rtc_tm; - const char *rtc = default_rtc; - struct timeval start, end, diff; - - switch (argc) { - case 3: - if (*argv[2] == 'd') - dangerous = 1; - case 2: - rtc = argv[1]; - /* FALLTHROUGH */ - case 1: - break; - default: - fprintf(stderr, "usage: rtctest [rtcdev] [d]\n"); - return 1; - } - - fd = open(rtc, O_RDONLY); - - if (fd == -1) { - perror(rtc); - exit(errno); - } - - fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); - - /* Turn on update interrupts (one per second) */ - retval = ioctl(fd, RTC_UIE_ON, 0); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...Update IRQs not supported.\n"); - goto test_READ; - } - perror("RTC_UIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", - rtc); - fflush(stderr); - for (i=1; i<6; i++) { - /* This read will block */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); - fflush(stderr); - for (i=1; i<6; i++) { - struct timeval tv = {5, 0}; /* 5 second timeout on select */ - fd_set readfds; - - FD_ZERO(&readfds); - FD_SET(fd, &readfds); - /* The select will wait until an RTC interrupt happens. */ - retval = select(fd+1, &readfds, NULL, NULL, &tv); - if (retval == -1) { - perror("select"); - exit(errno); - } - /* This read won't block unlike the select-less case above. */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Turn off update interrupts */ - retval = ioctl(fd, RTC_UIE_OFF, 0); - if (retval == -1) { - perror("RTC_UIE_OFF ioctl"); - exit(errno); - } - -test_READ: - /* Read the RTC time/date */ - retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", - rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Set the alarm to 5 sec in the future, and check for rollover */ - rtc_tm.tm_sec += 5; - if (rtc_tm.tm_sec >= 60) { - rtc_tm.tm_sec %= 60; - rtc_tm.tm_min++; - } - if (rtc_tm.tm_min == 60) { - rtc_tm.tm_min = 0; - rtc_tm.tm_hour++; - } - if (rtc_tm.tm_hour == 24) - rtc_tm.tm_hour = 0; - - retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...Alarm IRQs not supported.\n"); - goto test_PIE; - } - - perror("RTC_ALM_SET ioctl"); - exit(errno); - } - - /* Read the current alarm settings */ - retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); - if (retval == -1) { - if (errno == EINVAL) { - fprintf(stderr, - "\n...EINVAL reading current alarm setting.\n"); - goto test_PIE; - } - perror("RTC_ALM_READ ioctl"); - exit(errno); - } - - fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", - rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); - - /* Enable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_ON, 0); - if (retval == -1) { - if (errno == EINVAL || errno == EIO) { - fprintf(stderr, - "\n...Alarm IRQs not supported.\n"); - goto test_PIE; - } - - perror("RTC_AIE_ON ioctl"); - exit(errno); - } - - fprintf(stderr, "Waiting 5 seconds for alarm..."); - fflush(stderr); - /* This blocks until the alarm ring causes an interrupt */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - irqcount++; - fprintf(stderr, " okay. Alarm rang.\n"); - - /* Disable alarm interrupts */ - retval = ioctl(fd, RTC_AIE_OFF, 0); - if (retval == -1) { - perror("RTC_AIE_OFF ioctl"); - exit(errno); - } - -test_PIE: - /* Read periodic IRQ rate */ - retval = ioctl(fd, RTC_IRQP_READ, &tmp); - if (retval == -1) { - /* not all RTCs support periodic IRQs */ - if (errno == EINVAL) { - fprintf(stderr, "\nNo periodic IRQ support\n"); - goto test_DATE; - } - perror("RTC_IRQP_READ ioctl"); - exit(errno); - } - fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); - - fprintf(stderr, "Counting 20 interrupts at:"); - fflush(stderr); - - /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ - for (tmp=2; tmp<=64; tmp*=2) { - - retval = ioctl(fd, RTC_IRQP_SET, tmp); - if (retval == -1) { - /* not all RTCs can change their periodic IRQ rate */ - if (errno == EINVAL) { - fprintf(stderr, - "\n...Periodic IRQ rate is fixed\n"); - goto test_DATE; - } - perror("RTC_IRQP_SET ioctl"); - exit(errno); - } - - fprintf(stderr, "\n%ldHz:\t", tmp); - fflush(stderr); - - /* Enable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_ON, 0); - if (retval == -1) { - perror("RTC_PIE_ON ioctl"); - exit(errno); - } - - for (i=1; i<21; i++) { - gettimeofday(&start, NULL); - /* This blocks */ - retval = read(fd, &data, sizeof(unsigned long)); - if (retval == -1) { - perror("read"); - exit(errno); - } - gettimeofday(&end, NULL); - timersub(&end, &start, &diff); - if (diff.tv_sec > 0 || - diff.tv_usec > ((1000000L / tmp) * 1.10)) { - fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", - diff.tv_sec, diff.tv_usec, - (1000000L / tmp)); - fflush(stdout); - exit(-1); - } - - fprintf(stderr, " %d",i); - fflush(stderr); - irqcount++; - } - - /* Disable periodic interrupts */ - retval = ioctl(fd, RTC_PIE_OFF, 0); - if (retval == -1) { - perror("RTC_PIE_OFF ioctl"); - exit(errno); - } - } - -test_DATE: - if (!dangerous) - goto done; - - fprintf(stderr, "\nTesting problematic dates\n"); - - for (i = 0; i < ARRAY_SIZE(cutoff_dates); i++) { - struct rtc_time current; - - /* Write the new date in RTC */ - retval = ioctl(fd, RTC_SET_TIME, &cutoff_dates[i]); - if (retval == -1) { - perror("RTC_SET_TIME ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_RD_TIME, ¤t); - if (retval == -1) { - perror("RTC_RD_TIME ioctl"); - exit(errno); - } - - if(compare_dates(&cutoff_dates[i], ¤t)) { - fprintf(stderr,"Setting date %d failed\n", - cutoff_dates[i].tm_year + 1900); - goto done; - } - - cutoff_dates[i].tm_sec += 5; - - /* Write the new alarm in RTC */ - retval = ioctl(fd, RTC_ALM_SET, &cutoff_dates[i]); - if (retval == -1) { - perror("RTC_ALM_SET ioctl"); - close(fd); - exit(errno); - } - - /* Read back */ - retval = ioctl(fd, RTC_ALM_READ, ¤t); - if (retval == -1) { - perror("RTC_ALM_READ ioctl"); - exit(errno); - } - - if(compare_dates(&cutoff_dates[i], ¤t)) { - fprintf(stderr,"Setting alarm %d failed\n", - cutoff_dates[i].tm_year + 1900); - goto done; - } - - fprintf(stderr, "Setting year %d is OK \n", - cutoff_dates[i].tm_year + 1900); - } -done: - fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); - - close(fd); - - return 0; -} diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..186520198de7 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -8,10 +8,11 @@ include ../lib.mk UNAME_M := $(shell uname -m) CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) +CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + protection_keys test_vdso test_vsyscall mov_ss_trap TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer @@ -31,7 +32,12 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64) BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32)) BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64)) -CFLAGS := -O2 -g -std=gnu99 -pthread -Wall -no-pie +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +# call32_from_64 in thunks.S uses absolute addresses. +ifeq ($(CAN_BUILD_WITH_NOPIE),1) +CFLAGS += -no-pie +endif define gen-target-rule-32 $(1) $(1)_32: $(OUTPUT)/$(1)_32 diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS + * + * This does MOV SS from a watchpointed address followed by various + * types of kernel entries. A MOV SS that hits a watchpoint will queue + * up a #DB trap but will not actually deliver that trap. The trap + * will be delivered after the next instruction instead. The CPU's logic + * seems to be: + * + * - Any fault: drop the pending #DB trap. + * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then + * deliver #DB. + * - ICEBP: enter the kernel but do not deliver the watchpoint trap + * - breakpoint: only one #DB is delivered (phew!) + * + * There are plenty of ways for a kernel to handle this incorrectly. This + * test tries to exercise all the cases. + * + * This should mostly cover CVE-2018-1087 and CVE-2018-8897. + */ +#define _GNU_SOURCE + +#include <stdlib.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/user.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <err.h> +#include <string.h> +#include <setjmp.h> +#include <sys/prctl.h> + +#define X86_EFLAGS_RF (1UL << 16) + +#if __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +unsigned short ss; +extern unsigned char breakpoint_insn[]; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; + +static void enable_watchpoint(void) +{ + pid_t parent = getpid(); + int status; + + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + + if (child) { + if (waitpid(child, &status, 0) != child) + err(1, "waitpid for child"); + } else { + unsigned long dr0, dr1, dr7; + + dr0 = (unsigned long)&ss; + dr1 = (unsigned long)breakpoint_insn; + dr7 = ((1UL << 1) | /* G0 */ + (3UL << 16) | /* RW0 = read or write */ + (1UL << 18) | /* LEN0 = 2 bytes */ + (1UL << 3)); /* G1, RW1 = insn */ + + if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_ATTACH"); + + if (waitpid(parent, &status, 0) != parent) + err(1, "waitpid for child"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) + err(1, "PTRACE_POKEUSER DR0"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) + err(1, "PTRACE_POKEUSER DR1"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) + err(1, "PTRACE_POKEUSER DR7"); + + printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); + + if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_DETACH"); + + exit(0); + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); +} + +static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); +} + +static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + + siglongjmp(jmpbuf, 1); +} + +int main() +{ + unsigned long nr; + + asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); + printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); + + if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) + printf("\tPR_SET_PTRACER_ANY succeeded\n"); + + printf("\tSet up a watchpoint\n"); + sethandler(SIGTRAP, sigtrap, 0); + enable_watchpoint(); + + printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); + asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT3\n"); + asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CS CS INT3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CSx14 INT3\n"); + asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 4\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); + +#ifdef __i386__ + printf("[RUN]\tMOV SS; INTO\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + nr = -1; + asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" + : [tmp] "+r" (nr) : [ss] "m" (ss)); +#endif + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; ICEBP\n"); + + /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + + asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; CLI\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; #PF\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" + : [tmp] "=r" (nr) : [ss] "m" (ss)); + } + + /* + * INT $1: if #DB has DPL=3 and there isn't special handling, + * then the kernel will die. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT 1\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); + } + +#ifdef __x86_64__ + /* + * In principle, we should test 32-bit SYSCALL as well, but + * the calling convention is so unpredictable that it's + * not obviously worth the effort. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSCALL\n"); + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + nr = SYS_getpid; + /* + * Toggle the high bit of RSP to make it noncanonical to + * strengthen this test on non-SMAP systems. + */ + asm volatile ("btc $63, %%rsp\n\t" + "mov %[ss], %%ss; syscall\n\t" + "btc $63, %%rsp" + : "+a" (nr) : [ss] "m" (ss) + : "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + } +#endif + + printf("[RUN]\tMOV SS; breakpointed NOP\n"); + asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); + + /* + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); + nr = SYS_getpid; + asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + : [ss] "m" (ss) : "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT $0x80\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + nr = 20; /* compat getpid */ + asm volatile ("mov %[ss], %%ss; int $0x80" + : "+a" (nr) : [ss] "m" (ss) + : "flags" +#ifdef __x86_64__ + , "r8", "r9", "r10", "r11" +#endif + ); + } + + printf("[OK]\tI aten't dead\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + /* * The kernel is supposed to provide some information about the bounds * exception in the siginfo. It should match what we have in the bounds @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define SEGV_BNDERR 3 /* failed address bound checks */ - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); dprintf2("si_signo: %d\n", si->si_signo); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) { va_list ap; - va_start(ap, format); if (!dprint_in_signal) { + va_start(ap, format); vprintf(format, ap); + va_end(ap); } else { int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); + ret = write(1, format, strlen(format)); if (ret < 0) - abort(); + exit(1); } - va_end(ap); } #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -72,10 +72,9 @@ extern void abort_hooks(void); test_nr, iteration_nr); \ dprintf0("errno at assert: %d", errno); \ abort_hooks(); \ - assert(condition); \ + exit(__LINE__); \ } \ } while (0) -#define raw_assert(cond) assert(cond) void cat_into_file(char *str, char *file) { @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) * these need to be raw because they are called under * pkey_assert() */ - raw_assert(fd >= 0); + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + ret = write(fd, str, strlen(str)); if (ret != strlen(str)) { perror("write to file failed"); fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); + exit(__LINE__); } close(fd); } @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) #ifdef __i386__ #ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 +# define SYS_mprotect_key 380 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 #endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 #else #ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 +# define SYS_mprotect_key 329 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 #endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 #endif @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif static char *si_code_str(int si_code) { @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dump_mem(pkru_ptr - 128, 256); pkey_assert(*pkru_ptr); - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_BNDERR)) { @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } dprint_in_signal = 0; } @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) return forkret; } -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif + +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif -u32 pkey_get(int pkey, unsigned long flags) +static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 pkru = __rdpkru(); @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) return masked_pkru; } -int pkey_set(int pkey, unsigned long rights, unsigned long flags) +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 old_pkru = __rdpkru(); @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - pkey_rights = pkey_get(pkey, syscall_flags); + pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, syscall_flags); + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkru and flags have the same format */ shadow_pkru |= flags << (pkey * 2); @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + int pkey_rights = hw_pkey_get(pkey, syscall_flags); u32 orig_pkru = rdpkru(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, 0); + ret = hw_pkey_set(pkey, pkey_rights, 0); /* pkru and flags have the same format */ shadow_pkru &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, struct pkey_malloc_record { void *ptr; long size; + int prot; }; struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) +void record_pkey_malloc(void *ptr, long size, int prot) { long i; struct pkey_malloc_record *rec = NULL; @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) (int)(rec - pkey_malloc_records), rec, ptr, size); rec->ptr = ptr; rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; nr_pkey_malloc_records++; } @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); rdpkru(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) size = ALIGN_UP(size, HPAGE_SIZE * 2); ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); mprotect_pkey(ptr, size, prot, pkey); dprintf1("unaligned ptr: %p\n", ptr); @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); return ptr; @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); close(fd); @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) } int last_pkru_faults; +#define UNKNOWN_PKEY -2 void expected_pk_fault(int pkey) { dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", __func__, last_pkru_faults, pkru_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + /* * The signal handler shold have cleared out PKRU to let the * test program continue. We now have to restore it. @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) last_si_pkey = -1; } -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) int test_fds[10] = { -1 }; int nr_test_fds; @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= NR_PKEYS-3); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +void *get_pointer_to_instructions(void) { void *p1; - int scratch; - int ptr_contents; - int ret; p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) /* Point 'p1' at the *second* page of the function: */ p1 += PAGE_SIZE; + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); lots_o_noops_around_write(&scratch); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); expected_pk_fault(pkey); } +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_kernel_gup_of_access_disabled_region, test_kernel_gup_write_to_write_disabled_region, test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, test_ptrace_of_child, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c index 40370354d4c1..c9c3281077bc 100644 --- a/tools/testing/selftests/x86/test_syscall_vdso.c +++ b/tools/testing/selftests/x86/test_syscall_vdso.c @@ -100,12 +100,19 @@ asm ( " shl $32, %r8\n" " orq $0x7f7f7f7f, %r8\n" " movq %r8, %r9\n" - " movq %r8, %r10\n" - " movq %r8, %r11\n" - " movq %r8, %r12\n" - " movq %r8, %r13\n" - " movq %r8, %r14\n" - " movq %r8, %r15\n" + " incq %r9\n" + " movq %r9, %r10\n" + " incq %r10\n" + " movq %r10, %r11\n" + " incq %r11\n" + " movq %r11, %r12\n" + " incq %r12\n" + " movq %r12, %r13\n" + " incq %r13\n" + " movq %r13, %r14\n" + " incq %r14\n" + " movq %r14, %r15\n" + " incq %r15\n" " ret\n" " .code32\n" " .popsection\n" @@ -128,12 +135,13 @@ int check_regs64(void) int err = 0; int num = 8; uint64_t *r64 = ®s64.r8; + uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; if (!kernel_is_64bit) return 0; do { - if (*r64 == 0x7f7f7f7f7f7f7f7fULL) + if (*r64 == expected++) continue; /* register did not change */ if (syscall_addr != (long)&int80) { /* @@ -147,18 +155,17 @@ int check_regs64(void) continue; } } else { - /* INT80 syscall entrypoint can be used by + /* + * INT80 syscall entrypoint can be used by * 64-bit programs too, unlike SYSCALL/SYSENTER. * Therefore it must preserve R12+ * (they are callee-saved registers in 64-bit C ABI). * - * This was probably historically not intended, - * but R8..11 are clobbered (cleared to 0). - * IOW: they are the only registers which aren't - * preserved across INT80 syscall. + * Starting in Linux 4.17 (and any kernel that + * backports the change), R8..11 are preserved. + * Historically (and probably unintentionally), they + * were clobbered or zeroed. */ - if (*r64 == 0 && num <= 11) - continue; } printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); err++; diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c new file mode 100644 index 000000000000..46a447163b93 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_program.c @@ -0,0 +1,10 @@ +/* Trivial program to check that compilation with certain flags is working. */ + +#include <stdio.h> + +int +main(void) +{ + puts(""); + return 0; +} |