diff options
Diffstat (limited to 'drivers/clocksource/hyperv_timer.c')
| -rw-r--r-- | drivers/clocksource/hyperv_timer.c | 251 | 
1 files changed, 172 insertions, 79 deletions
| diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index 269a691bd2c4..977fd05ac35f 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -18,6 +18,9 @@  #include <linux/sched_clock.h>  #include <linux/mm.h>  #include <linux/cpuhotplug.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/acpi.h>  #include <clocksource/hyperv_timer.h>  #include <asm/hyperv-tlfs.h>  #include <asm/mshyperv.h> @@ -43,14 +46,13 @@ static u64 hv_sched_clock_offset __ro_after_init;   */  static bool direct_mode_enabled; -static int stimer0_irq; -static int stimer0_vector; +static int stimer0_irq = -1;  static int stimer0_message_sint; +static DEFINE_PER_CPU(long, stimer0_evt);  /* - * ISR for when stimer0 is operating in Direct Mode.  Direct Mode - * does not use VMbus or any VMbus messages, so process here and not - * in the VMbus driver code. + * Common code for stimer0 interrupts coming via Direct Mode or + * as a VMbus message.   */  void hv_stimer0_isr(void)  { @@ -61,6 +63,16 @@ void hv_stimer0_isr(void)  }  EXPORT_SYMBOL_GPL(hv_stimer0_isr); +/* + * stimer0 interrupt handler for architectures that support + * per-cpu interrupts, which also implies Direct Mode. + */ +static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id) +{ +	hv_stimer0_isr(); +	return IRQ_HANDLED; +} +  static int hv_ce_set_next_event(unsigned long delta,  				struct clock_event_device *evt)  { @@ -68,16 +80,16 @@ static int hv_ce_set_next_event(unsigned long delta,  	current_tick = hv_read_reference_counter();  	current_tick += delta; -	hv_init_timer(0, current_tick); +	hv_set_register(HV_REGISTER_STIMER0_COUNT, current_tick);  	return 0;  }  static int hv_ce_shutdown(struct clock_event_device *evt)  { -	hv_init_timer(0, 0); -	hv_init_timer_config(0, 0); -	if (direct_mode_enabled) -		hv_disable_stimer0_percpu_irq(stimer0_irq); +	hv_set_register(HV_REGISTER_STIMER0_COUNT, 0); +	hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0); +	if (direct_mode_enabled && stimer0_irq >= 0) +		disable_percpu_irq(stimer0_irq);  	return 0;  } @@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)  		 * on the specified hardware vector/IRQ.  		 */  		timer_cfg.direct_mode = 1; -		timer_cfg.apic_vector = stimer0_vector; -		hv_enable_stimer0_percpu_irq(stimer0_irq); +		timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR; +		if (stimer0_irq >= 0) +			enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE);  	} else {  		/*  		 * When it expires, the timer will generate a VMbus message, @@ -105,7 +118,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)  		timer_cfg.direct_mode = 0;  		timer_cfg.sintx = stimer0_message_sint;  	} -	hv_init_timer_config(0, timer_cfg.as_uint64); +	hv_set_register(HV_REGISTER_STIMER0_CONFIG, timer_cfg.as_uint64);  	return 0;  } @@ -169,10 +182,58 @@ int hv_stimer_cleanup(unsigned int cpu)  }  EXPORT_SYMBOL_GPL(hv_stimer_cleanup); +/* + * These placeholders are overridden by arch specific code on + * architectures that need special setup of the stimer0 IRQ because + * they don't support per-cpu IRQs (such as x86/x64). + */ +void __weak hv_setup_stimer0_handler(void (*handler)(void)) +{ +}; + +void __weak hv_remove_stimer0_handler(void) +{ +}; + +/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */ +static int hv_setup_stimer0_irq(void) +{ +	int ret; + +	ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR, +			ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH); +	if (ret < 0) { +		pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret); +		return ret; +	} +	stimer0_irq = ret; + +	ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr, +		"Hyper-V stimer0", &stimer0_evt); +	if (ret) { +		pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d", +			stimer0_irq, ret); +		acpi_unregister_gsi(stimer0_irq); +		stimer0_irq = -1; +	} +	return ret; +} + +static void hv_remove_stimer0_irq(void) +{ +	if (stimer0_irq == -1) { +		hv_remove_stimer0_handler(); +	} else { +		free_percpu_irq(stimer0_irq, &stimer0_evt); +		acpi_unregister_gsi(stimer0_irq); +		stimer0_irq = -1; +	} +} +  /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */ -int hv_stimer_alloc(void) +int hv_stimer_alloc(bool have_percpu_irqs)  { -	int ret = 0; +	int ret;  	/*  	 * Synthetic timers are always available except on old versions of @@ -188,29 +249,37 @@ int hv_stimer_alloc(void)  	direct_mode_enabled = ms_hyperv.misc_features &  			HV_STIMER_DIRECT_MODE_AVAILABLE; -	if (direct_mode_enabled) { -		ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector, -				hv_stimer0_isr); + +	/* +	 * If Direct Mode isn't enabled, the remainder of the initialization +	 * is done later by hv_stimer_legacy_init() +	 */ +	if (!direct_mode_enabled) +		return 0; + +	if (have_percpu_irqs) { +		ret = hv_setup_stimer0_irq();  		if (ret) -			goto free_percpu; +			goto free_clock_event; +	} else { +		hv_setup_stimer0_handler(hv_stimer0_isr); +	} -		/* -		 * Since we are in Direct Mode, stimer initialization -		 * can be done now with a CPUHP value in the same range -		 * as other clockevent devices. -		 */ -		ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, -				"clockevents/hyperv/stimer:starting", -				hv_stimer_init, hv_stimer_cleanup); -		if (ret < 0) -			goto free_stimer0_irq; +	/* +	 * Since we are in Direct Mode, stimer initialization +	 * can be done now with a CPUHP value in the same range +	 * as other clockevent devices. +	 */ +	ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING, +			"clockevents/hyperv/stimer:starting", +			hv_stimer_init, hv_stimer_cleanup); +	if (ret < 0) { +		hv_remove_stimer0_irq(); +		goto free_clock_event;  	}  	return ret; -free_stimer0_irq: -	hv_remove_stimer0_irq(stimer0_irq); -	stimer0_irq = 0; -free_percpu: +free_clock_event:  	free_percpu(hv_clock_event);  	hv_clock_event = NULL;  	return ret; @@ -254,23 +323,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu)  }  EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup); - -/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */ -void hv_stimer_free(void) -{ -	if (!hv_clock_event) -		return; - -	if (direct_mode_enabled) { -		cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); -		hv_remove_stimer0_irq(stimer0_irq); -		stimer0_irq = 0; -	} -	free_percpu(hv_clock_event); -	hv_clock_event = NULL; -} -EXPORT_SYMBOL_GPL(hv_stimer_free); -  /*   * Do a global cleanup of clockevents for the cases of kexec and   * vmbus exit @@ -287,12 +339,17 @@ void hv_stimer_global_cleanup(void)  		hv_stimer_legacy_cleanup(cpu);  	} -	/* -	 * If Direct Mode is enabled, the cpuhp teardown callback -	 * (hv_stimer_cleanup) will be run on all CPUs to stop the -	 * stimers. -	 */ -	hv_stimer_free(); +	if (!hv_clock_event) +		return; + +	if (direct_mode_enabled) { +		cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING); +		hv_remove_stimer0_irq(); +		stimer0_irq = -1; +	} +	free_percpu(hv_clock_event); +	hv_clock_event = NULL; +  }  EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); @@ -302,14 +359,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);   * the other that uses the TSC reference page feature as defined in the   * TLFS.  The MSR version is for compatibility with old versions of   * Hyper-V and 32-bit x86.  The TSC reference page version is preferred. - * - * The Hyper-V clocksource ratings of 250 are chosen to be below the - * TSC clocksource rating of 300.  In configurations where Hyper-V offers - * an InvariantTSC, the TSC is not marked "unstable", so the TSC clocksource - * is available and preferred.  With the higher rating, it will be the - * default.  On older hardware and Hyper-V versions, the TSC is marked - * "unstable", so no TSC clocksource is created and the selected Hyper-V - * clocksource will be the default.   */  u64 (*hv_read_reference_counter)(void); @@ -331,7 +380,7 @@ static u64 notrace read_hv_clock_tsc(void)  	u64 current_tick = hv_read_tsc_page(hv_get_tsc_page());  	if (current_tick == U64_MAX) -		hv_get_time_ref_count(current_tick); +		current_tick = hv_get_register(HV_REGISTER_TIME_REF_COUNT);  	return current_tick;  } @@ -352,9 +401,9 @@ static void suspend_hv_clock_tsc(struct clocksource *arg)  	u64 tsc_msr;  	/* Disable the TSC page */ -	hv_get_reference_tsc(tsc_msr); +	tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);  	tsc_msr &= ~BIT_ULL(0); -	hv_set_reference_tsc(tsc_msr); +	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);  } @@ -364,39 +413,44 @@ static void resume_hv_clock_tsc(struct clocksource *arg)  	u64 tsc_msr;  	/* Re-enable the TSC page */ -	hv_get_reference_tsc(tsc_msr); +	tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);  	tsc_msr &= GENMASK_ULL(11, 0);  	tsc_msr |= BIT_ULL(0) | (u64)phys_addr; -	hv_set_reference_tsc(tsc_msr); +	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);  } +#ifdef VDSO_CLOCKMODE_HVCLOCK  static int hv_cs_enable(struct clocksource *cs)  { -	hv_enable_vdso_clocksource(); +	vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);  	return 0;  } +#endif  static struct clocksource hyperv_cs_tsc = {  	.name	= "hyperv_clocksource_tsc_page", -	.rating	= 250, +	.rating	= 500,  	.read	= read_hv_clock_tsc_cs,  	.mask	= CLOCKSOURCE_MASK(64),  	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,  	.suspend= suspend_hv_clock_tsc,  	.resume	= resume_hv_clock_tsc, +#ifdef VDSO_CLOCKMODE_HVCLOCK  	.enable = hv_cs_enable, +	.vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK, +#else +	.vdso_clock_mode = VDSO_CLOCKMODE_NONE, +#endif  };  static u64 notrace read_hv_clock_msr(void)  { -	u64 current_tick;  	/*  	 * Read the partition counter to get the current tick count. This count  	 * is set to 0 when the partition is created and is incremented in  	 * 100 nanosecond units.  	 */ -	hv_get_time_ref_count(current_tick); -	return current_tick; +	return hv_get_register(HV_REGISTER_TIME_REF_COUNT);  }  static u64 notrace read_hv_clock_msr_cs(struct clocksource *arg) @@ -412,12 +466,36 @@ static u64 notrace read_hv_sched_clock_msr(void)  static struct clocksource hyperv_cs_msr = {  	.name	= "hyperv_clocksource_msr", -	.rating	= 250, +	.rating	= 500,  	.read	= read_hv_clock_msr_cs,  	.mask	= CLOCKSOURCE_MASK(64),  	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,  }; +/* + * Reference to pv_ops must be inline so objtool + * detection of noinstr violations can work correctly. + */ +#ifdef CONFIG_GENERIC_SCHED_CLOCK +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ +	/* +	 * We're on an architecture with generic sched clock (not x86/x64). +	 * The Hyper-V sched clock read function returns nanoseconds, not +	 * the normal 100ns units of the Hyper-V synthetic clock. +	 */ +	sched_clock_register(sched_clock, 64, NSEC_PER_SEC); +} +#elif defined CONFIG_PARAVIRT +static __always_inline void hv_setup_sched_clock(void *sched_clock) +{ +	/* We're on x86/x64 *and* using PV ops */ +	paravirt_set_sched_clock(sched_clock); +} +#else /* !CONFIG_GENERIC_SCHED_CLOCK && !CONFIG_PARAVIRT */ +static __always_inline void hv_setup_sched_clock(void *sched_clock) {} +#endif /* CONFIG_GENERIC_SCHED_CLOCK */ +  static bool __init hv_init_tsc_clocksource(void)  {  	u64		tsc_msr; @@ -429,6 +507,22 @@ static bool __init hv_init_tsc_clocksource(void)  	if (hv_root_partition)  		return false; +	/* +	 * If Hyper-V offers TSC_INVARIANT, then the virtualized TSC correctly +	 * handles frequency and offset changes due to live migration, +	 * pause/resume, and other VM management operations.  So lower the +	 * Hyper-V Reference TSC rating, causing the generic TSC to be used. +	 * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference +	 * TSC will be preferred over the virtualized ARM64 arch counter. +	 * While the Hyper-V MSR clocksource won't be used since the +	 * Reference TSC clocksource is present, change its rating as +	 * well for consistency. +	 */ +	if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { +		hyperv_cs_tsc.rating = 250; +		hyperv_cs_msr.rating = 250; +	} +  	hv_read_reference_counter = read_hv_clock_tsc;  	phys_addr = virt_to_phys(hv_get_tsc_page()); @@ -439,12 +533,11 @@ static bool __init hv_init_tsc_clocksource(void)  	 * (which already has at least the low 12 bits set to zero since  	 * it is page aligned). Also set the "enable" bit, which is bit 0.  	 */ -	hv_get_reference_tsc(tsc_msr); +	tsc_msr = hv_get_register(HV_REGISTER_REFERENCE_TSC);  	tsc_msr &= GENMASK_ULL(11, 0);  	tsc_msr = tsc_msr | 0x1 | (u64)phys_addr; -	hv_set_reference_tsc(tsc_msr); +	hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr); -	hv_set_clocksource_vdso(hyperv_cs_tsc);  	clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100);  	hv_sched_clock_offset = hv_read_reference_counter(); @@ -457,7 +550,7 @@ void __init hv_init_clocksource(void)  {  	/*  	 * Try to set up the TSC page clocksource. If it succeeds, we're -	 * done. Otherwise, set up the MSR clocksoruce.  At least one of +	 * done. Otherwise, set up the MSR clocksource.  At least one of  	 * these will always be available except on very old versions of  	 * Hyper-V on x86.  In that case we won't have a Hyper-V  	 * clocksource, but Linux will still run with a clocksource based | 
