xref: /linux-6.15/kernel/time/clocksource.c (revision 8fa7292f)
135728b82SThomas Gleixner // SPDX-License-Identifier: GPL-2.0+
2734efb46Sjohn stultz /*
3734efb46Sjohn stultz  * This file contains the functions which manage clocksource drivers.
4734efb46Sjohn stultz  *
5734efb46Sjohn stultz  * Copyright (C) 2004, 2005 IBM, John Stultz ([email protected])
6734efb46Sjohn stultz  */
7734efb46Sjohn stultz 
845bbfe64SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
945bbfe64SJoe Perches 
10d369a5d8SKay Sievers #include <linux/device.h>
11734efb46Sjohn stultz #include <linux/clocksource.h>
12734efb46Sjohn stultz #include <linux/init.h>
13734efb46Sjohn stultz #include <linux/module.h>
14dc29a365SMathieu Desnoyers #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
1579bf2bb3SThomas Gleixner #include <linux/tick.h>
1601548f4dSMartin Schwidefsky #include <linux/kthread.h>
17fa218f1cSPaul E. McKenney #include <linux/prandom.h>
18fa218f1cSPaul E. McKenney #include <linux/cpu.h>
19734efb46Sjohn stultz 
20c1797bafSThomas Gleixner #include "tick-internal.h"
213a978377SThomas Gleixner #include "timekeeping_internal.h"
2203e13cf5SThomas Gleixner 
23bafffd56SDr. David Alan Gilbert static void clocksource_enqueue(struct clocksource *cs);
24bafffd56SDr. David Alan Gilbert 
cycles_to_nsec_safe(struct clocksource * cs,u64 start,u64 end)25d0304569SAdrian Hunter static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
26d0304569SAdrian Hunter {
2776031d95SThomas Gleixner 	u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta);
28d0304569SAdrian Hunter 
29d0304569SAdrian Hunter 	if (likely(delta < cs->max_cycles))
30d0304569SAdrian Hunter 		return clocksource_cyc2ns(delta, cs->mult, cs->shift);
31d0304569SAdrian Hunter 
32d0304569SAdrian Hunter 	return mul_u64_u32_shr(delta, cs->mult, cs->shift);
33d0304569SAdrian Hunter }
34d0304569SAdrian Hunter 
357d2f944aSThomas Gleixner /**
367d2f944aSThomas Gleixner  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
377d2f944aSThomas Gleixner  * @mult:	pointer to mult variable
387d2f944aSThomas Gleixner  * @shift:	pointer to shift variable
397d2f944aSThomas Gleixner  * @from:	frequency to convert from
407d2f944aSThomas Gleixner  * @to:		frequency to convert to
415fdade95SNicolas Pitre  * @maxsec:	guaranteed runtime conversion range in seconds
427d2f944aSThomas Gleixner  *
437d2f944aSThomas Gleixner  * The function evaluates the shift/mult pair for the scaled math
447d2f944aSThomas Gleixner  * operations of clocksources and clockevents.
457d2f944aSThomas Gleixner  *
467d2f944aSThomas Gleixner  * @to and @from are frequency values in HZ. For clock sources @to is
477d2f944aSThomas Gleixner  * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
487d2f944aSThomas Gleixner  * event @to is the counter frequency and @from is NSEC_PER_SEC.
497d2f944aSThomas Gleixner  *
505fdade95SNicolas Pitre  * The @maxsec conversion range argument controls the time frame in
517d2f944aSThomas Gleixner  * seconds which must be covered by the runtime conversion with the
527d2f944aSThomas Gleixner  * calculated mult and shift factors. This guarantees that no 64bit
537d2f944aSThomas Gleixner  * overflow happens when the input value of the conversion is
547d2f944aSThomas Gleixner  * multiplied with the calculated mult factor. Larger ranges may
554bf07f65SIngo Molnar  * reduce the conversion accuracy by choosing smaller mult and shift
567d2f944aSThomas Gleixner  * factors.
577d2f944aSThomas Gleixner  */
587d2f944aSThomas Gleixner void
clocks_calc_mult_shift(u32 * mult,u32 * shift,u32 from,u32 to,u32 maxsec)595fdade95SNicolas Pitre clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
607d2f944aSThomas Gleixner {
617d2f944aSThomas Gleixner 	u64 tmp;
627d2f944aSThomas Gleixner 	u32 sft, sftacc= 32;
637d2f944aSThomas Gleixner 
647d2f944aSThomas Gleixner 	/*
657d2f944aSThomas Gleixner 	 * Calculate the shift factor which is limiting the conversion
667d2f944aSThomas Gleixner 	 * range:
677d2f944aSThomas Gleixner 	 */
685fdade95SNicolas Pitre 	tmp = ((u64)maxsec * from) >> 32;
697d2f944aSThomas Gleixner 	while (tmp) {
707d2f944aSThomas Gleixner 		tmp >>=1;
717d2f944aSThomas Gleixner 		sftacc--;
727d2f944aSThomas Gleixner 	}
737d2f944aSThomas Gleixner 
747d2f944aSThomas Gleixner 	/*
757d2f944aSThomas Gleixner 	 * Find the conversion shift/mult pair which has the best
767d2f944aSThomas Gleixner 	 * accuracy and fits the maxsec conversion range:
777d2f944aSThomas Gleixner 	 */
787d2f944aSThomas Gleixner 	for (sft = 32; sft > 0; sft--) {
797d2f944aSThomas Gleixner 		tmp = (u64) to << sft;
80b5776c4aSjohn stultz 		tmp += from / 2;
817d2f944aSThomas Gleixner 		do_div(tmp, from);
827d2f944aSThomas Gleixner 		if ((tmp >> sftacc) == 0)
837d2f944aSThomas Gleixner 			break;
847d2f944aSThomas Gleixner 	}
857d2f944aSThomas Gleixner 	*mult = tmp;
867d2f944aSThomas Gleixner 	*shift = sft;
877d2f944aSThomas Gleixner }
885304121aSMurali Karicheri EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
897d2f944aSThomas Gleixner 
90734efb46Sjohn stultz /*[Clocksource internal variables]---------
91734efb46Sjohn stultz  * curr_clocksource:
92f1b82746SMartin Schwidefsky  *	currently selected clocksource.
9339232ed5SBaolin Wang  * suspend_clocksource:
9439232ed5SBaolin Wang  *	used to calculate the suspend time.
95734efb46Sjohn stultz  * clocksource_list:
96734efb46Sjohn stultz  *	linked list with the registered clocksources
9775c5158fSMartin Schwidefsky  * clocksource_mutex:
9875c5158fSMartin Schwidefsky  *	protects manipulations to curr_clocksource and the clocksource_list
99734efb46Sjohn stultz  * override_name:
100734efb46Sjohn stultz  *	Name of the user-specified clocksource.
101734efb46Sjohn stultz  */
102f1b82746SMartin Schwidefsky static struct clocksource *curr_clocksource;
10339232ed5SBaolin Wang static struct clocksource *suspend_clocksource;
104734efb46Sjohn stultz static LIST_HEAD(clocksource_list);
10575c5158fSMartin Schwidefsky static DEFINE_MUTEX(clocksource_mutex);
10629b54078SThomas Gleixner static char override_name[CS_NAME_LEN];
10754a6bc0bSThomas Gleixner static int finished_booting;
10839232ed5SBaolin Wang static u64 suspend_start;
109734efb46Sjohn stultz 
1102e27e793SPaul E. McKenney /*
111c37e85c1SPaul E. McKenney  * Interval: 0.5sec.
112c37e85c1SPaul E. McKenney  */
113c37e85c1SPaul E. McKenney #define WATCHDOG_INTERVAL (HZ >> 1)
11464464955SJiri Wiesner #define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
115c37e85c1SPaul E. McKenney 
116c37e85c1SPaul E. McKenney /*
1172e27e793SPaul E. McKenney  * Threshold: 0.0312s, when doubled: 0.0625s.
1182e27e793SPaul E. McKenney  */
1192e27e793SPaul E. McKenney #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
1202e27e793SPaul E. McKenney 
1212e27e793SPaul E. McKenney /*
1222e27e793SPaul E. McKenney  * Maximum permissible delay between two readouts of the watchdog
1232e27e793SPaul E. McKenney  * clocksource surrounding a read of the clocksource being validated.
1242e27e793SPaul E. McKenney  * This delay could be due to SMIs, NMIs, or to VCPU preemptions.  Used as
1252e27e793SPaul E. McKenney  * a lower bound for cs->uncertainty_margin values when registering clocks.
126c37e85c1SPaul E. McKenney  *
127c37e85c1SPaul E. McKenney  * The default of 500 parts per million is based on NTP's limits.
128c37e85c1SPaul E. McKenney  * If a clocksource is good enough for NTP, it is good enough for us!
12917915131SBorislav Petkov  *
13017915131SBorislav Petkov  * In other words, by default, even if a clocksource is extremely
13117915131SBorislav Petkov  * precise (for example, with a sub-nanosecond period), the maximum
13217915131SBorislav Petkov  * permissible skew between the clocksource watchdog and the clocksource
13317915131SBorislav Petkov  * under test is not permitted to go below the 500ppm minimum defined
13417915131SBorislav Petkov  * by MAX_SKEW_USEC.  This 500ppm minimum may be overridden using the
13517915131SBorislav Petkov  * CLOCKSOURCE_WATCHDOG_MAX_SKEW_US Kconfig option.
1362e27e793SPaul E. McKenney  */
137fc153c1cSWaiman Long #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
138fc153c1cSWaiman Long #define MAX_SKEW_USEC	CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
139fc153c1cSWaiman Long #else
140c37e85c1SPaul E. McKenney #define MAX_SKEW_USEC	(125 * WATCHDOG_INTERVAL / HZ)
141fc153c1cSWaiman Long #endif
142fc153c1cSWaiman Long 
143f33a5d4bSPaul E. McKenney /*
144f33a5d4bSPaul E. McKenney  * Default for maximum permissible skew when cs->uncertainty_margin is
145f33a5d4bSPaul E. McKenney  * not specified, and the lower bound even when cs->uncertainty_margin
146f33a5d4bSPaul E. McKenney  * is specified.  This is also the default that is used when registering
147f33a5d4bSPaul E. McKenney  * clocks with unspecifed cs->uncertainty_margin, so this macro is used
148f33a5d4bSPaul E. McKenney  * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels.
149f33a5d4bSPaul E. McKenney  */
150fc153c1cSWaiman Long #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
1512e27e793SPaul E. McKenney 
1525d8b34fdSThomas Gleixner #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
153f79e0258SMartin Schwidefsky static void clocksource_watchdog_work(struct work_struct *work);
154332962f2SThomas Gleixner static void clocksource_select(void);
155f79e0258SMartin Schwidefsky 
1565d8b34fdSThomas Gleixner static LIST_HEAD(watchdog_list);
1575d8b34fdSThomas Gleixner static struct clocksource *watchdog;
1585d8b34fdSThomas Gleixner static struct timer_list watchdog_timer;
159f79e0258SMartin Schwidefsky static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
1605d8b34fdSThomas Gleixner static DEFINE_SPINLOCK(watchdog_lock);
161fb63a0ebSMartin Schwidefsky static int watchdog_running;
1629fb60336SThomas Gleixner static atomic_t watchdog_reset_pending;
16364464955SJiri Wiesner static int64_t watchdog_max_interval;
164b52f52a0SThomas Gleixner 
clocksource_watchdog_lock(unsigned long * flags)1650f48b41fSMathieu Malaterre static inline void clocksource_watchdog_lock(unsigned long *flags)
1662aae7bcfSPeter Zijlstra {
1672aae7bcfSPeter Zijlstra 	spin_lock_irqsave(&watchdog_lock, *flags);
1682aae7bcfSPeter Zijlstra }
1692aae7bcfSPeter Zijlstra 
clocksource_watchdog_unlock(unsigned long * flags)1700f48b41fSMathieu Malaterre static inline void clocksource_watchdog_unlock(unsigned long *flags)
1712aae7bcfSPeter Zijlstra {
1722aae7bcfSPeter Zijlstra 	spin_unlock_irqrestore(&watchdog_lock, *flags);
1732aae7bcfSPeter Zijlstra }
1742aae7bcfSPeter Zijlstra 
175e2c631baSPeter Zijlstra static int clocksource_watchdog_kthread(void *data);
176e2c631baSPeter Zijlstra 
clocksource_watchdog_work(struct work_struct * work)177e2c631baSPeter Zijlstra static void clocksource_watchdog_work(struct work_struct *work)
178e2c631baSPeter Zijlstra {
179e2c631baSPeter Zijlstra 	/*
180e2c631baSPeter Zijlstra 	 * We cannot directly run clocksource_watchdog_kthread() here, because
181e2c631baSPeter Zijlstra 	 * clocksource_select() calls timekeeping_notify() which uses
182e2c631baSPeter Zijlstra 	 * stop_machine(). One cannot use stop_machine() from a workqueue() due
183e2c631baSPeter Zijlstra 	 * lock inversions wrt CPU hotplug.
184e2c631baSPeter Zijlstra 	 *
185e2c631baSPeter Zijlstra 	 * Also, we only ever run this work once or twice during the lifetime
186e2c631baSPeter Zijlstra 	 * of the kernel, so there is no point in creating a more permanent
187e2c631baSPeter Zijlstra 	 * kthread for this.
188e2c631baSPeter Zijlstra 	 *
189e2c631baSPeter Zijlstra 	 * If kthread_run fails the next watchdog scan over the
190e2c631baSPeter Zijlstra 	 * watchdog_list will find the unstable clock again.
191e2c631baSPeter Zijlstra 	 */
192e2c631baSPeter Zijlstra 	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
193e2c631baSPeter Zijlstra }
194e2c631baSPeter Zijlstra 
clocksource_change_rating(struct clocksource * cs,int rating)195bafffd56SDr. David Alan Gilbert static void clocksource_change_rating(struct clocksource *cs, int rating)
196bafffd56SDr. David Alan Gilbert {
197bafffd56SDr. David Alan Gilbert 	list_del(&cs->list);
198bafffd56SDr. David Alan Gilbert 	cs->rating = rating;
199bafffd56SDr. David Alan Gilbert 	clocksource_enqueue(cs);
200bafffd56SDr. David Alan Gilbert }
201bafffd56SDr. David Alan Gilbert 
__clocksource_unstable(struct clocksource * cs)2027285dd7fSThomas Gleixner static void __clocksource_unstable(struct clocksource *cs)
2037285dd7fSThomas Gleixner {
2047285dd7fSThomas Gleixner 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
2057285dd7fSThomas Gleixner 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
20612907fbbSThomas Gleixner 
207cd2af07dSPeter Zijlstra 	/*
208e2c631baSPeter Zijlstra 	 * If the clocksource is registered clocksource_watchdog_kthread() will
209cd2af07dSPeter Zijlstra 	 * re-rate and re-select.
210cd2af07dSPeter Zijlstra 	 */
211cd2af07dSPeter Zijlstra 	if (list_empty(&cs->list)) {
212cd2af07dSPeter Zijlstra 		cs->rating = 0;
2132aae7bcfSPeter Zijlstra 		return;
214cd2af07dSPeter Zijlstra 	}
2152aae7bcfSPeter Zijlstra 
21612907fbbSThomas Gleixner 	if (cs->mark_unstable)
21712907fbbSThomas Gleixner 		cs->mark_unstable(cs);
21812907fbbSThomas Gleixner 
219e2c631baSPeter Zijlstra 	/* kick clocksource_watchdog_kthread() */
22054a6bc0bSThomas Gleixner 	if (finished_booting)
2217285dd7fSThomas Gleixner 		schedule_work(&watchdog_work);
2227285dd7fSThomas Gleixner }
2237285dd7fSThomas Gleixner 
2247285dd7fSThomas Gleixner /**
2257285dd7fSThomas Gleixner  * clocksource_mark_unstable - mark clocksource unstable via watchdog
2267285dd7fSThomas Gleixner  * @cs:		clocksource to be marked unstable
2277285dd7fSThomas Gleixner  *
2287dba33c6SPeter Zijlstra  * This function is called by the x86 TSC code to mark clocksources as unstable;
229e2c631baSPeter Zijlstra  * it defers demotion and re-selection to a kthread.
2307285dd7fSThomas Gleixner  */
clocksource_mark_unstable(struct clocksource * cs)2317285dd7fSThomas Gleixner void clocksource_mark_unstable(struct clocksource *cs)
2327285dd7fSThomas Gleixner {
2337285dd7fSThomas Gleixner 	unsigned long flags;
2347285dd7fSThomas Gleixner 
2357285dd7fSThomas Gleixner 	spin_lock_irqsave(&watchdog_lock, flags);
2367285dd7fSThomas Gleixner 	if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
2372aae7bcfSPeter Zijlstra 		if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
2387285dd7fSThomas Gleixner 			list_add(&cs->wd_list, &watchdog_list);
2397285dd7fSThomas Gleixner 		__clocksource_unstable(cs);
2407285dd7fSThomas Gleixner 	}
2417285dd7fSThomas Gleixner 	spin_unlock_irqrestore(&watchdog_lock, flags);
2425d8b34fdSThomas Gleixner }
2435d8b34fdSThomas Gleixner 
244fa218f1cSPaul E. McKenney static int verify_n_cpus = 8;
245fa218f1cSPaul E. McKenney module_param(verify_n_cpus, int, 0644);
246db3a34e1SPaul E. McKenney 
247c86ff8c5SWaiman Long enum wd_read_status {
248c86ff8c5SWaiman Long 	WD_READ_SUCCESS,
249c86ff8c5SWaiman Long 	WD_READ_UNSTABLE,
250c86ff8c5SWaiman Long 	WD_READ_SKIP
251c86ff8c5SWaiman Long };
252c86ff8c5SWaiman Long 
cs_watchdog_read(struct clocksource * cs,u64 * csnow,u64 * wdnow)253c86ff8c5SWaiman Long static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
254db3a34e1SPaul E. McKenney {
2554ac1dd32SPaul E. McKenney 	int64_t md = 2 * watchdog->uncertainty_margin;
2562ed08e4bSFeng Tang 	unsigned int nretries, max_retries;
257c86ff8c5SWaiman Long 	int64_t wd_delay, wd_seq_delay;
258d0304569SAdrian Hunter 	u64 wd_end, wd_end2;
259db3a34e1SPaul E. McKenney 
2602ed08e4bSFeng Tang 	max_retries = clocksource_get_max_watchdog_retry();
2612ed08e4bSFeng Tang 	for (nretries = 0; nretries <= max_retries; nretries++) {
262db3a34e1SPaul E. McKenney 		local_irq_disable();
263db3a34e1SPaul E. McKenney 		*wdnow = watchdog->read(watchdog);
264db3a34e1SPaul E. McKenney 		*csnow = cs->read(cs);
265db3a34e1SPaul E. McKenney 		wd_end = watchdog->read(watchdog);
266c86ff8c5SWaiman Long 		wd_end2 = watchdog->read(watchdog);
267db3a34e1SPaul E. McKenney 		local_irq_enable();
268db3a34e1SPaul E. McKenney 
269d0304569SAdrian Hunter 		wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
2704ac1dd32SPaul E. McKenney 		if (wd_delay <= md + cs->uncertainty_margin) {
271f2655ac2SPaul E. McKenney 			if (nretries > 1 && nretries >= max_retries) {
272db3a34e1SPaul E. McKenney 				pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
273db3a34e1SPaul E. McKenney 					smp_processor_id(), watchdog->name, nretries);
274db3a34e1SPaul E. McKenney 			}
275c86ff8c5SWaiman Long 			return WD_READ_SUCCESS;
276db3a34e1SPaul E. McKenney 		}
277c86ff8c5SWaiman Long 
278c86ff8c5SWaiman Long 		/*
279c86ff8c5SWaiman Long 		 * Now compute delay in consecutive watchdog read to see if
280c86ff8c5SWaiman Long 		 * there is too much external interferences that cause
281c86ff8c5SWaiman Long 		 * significant delay in reading both clocksource and watchdog.
282c86ff8c5SWaiman Long 		 *
2834ac1dd32SPaul E. McKenney 		 * If consecutive WD read-back delay > md, report
2844ac1dd32SPaul E. McKenney 		 * system busy, reinit the watchdog and skip the current
285c86ff8c5SWaiman Long 		 * watchdog test.
286c86ff8c5SWaiman Long 		 */
287d0304569SAdrian Hunter 		wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2);
2884ac1dd32SPaul E. McKenney 		if (wd_seq_delay > md)
289c86ff8c5SWaiman Long 			goto skip_test;
290db3a34e1SPaul E. McKenney 	}
291db3a34e1SPaul E. McKenney 
292f092eb34SPaul E. McKenney 	pr_warn("timekeeping watchdog on CPU%d: wd-%s-wd excessive read-back delay of %lldns vs. limit of %ldns, wd-wd read-back delay only %lldns, attempt %d, marking %s unstable\n",
293f092eb34SPaul E. McKenney 		smp_processor_id(), cs->name, wd_delay, WATCHDOG_MAX_SKEW, wd_seq_delay, nretries, cs->name);
294c86ff8c5SWaiman Long 	return WD_READ_UNSTABLE;
295c86ff8c5SWaiman Long 
296c86ff8c5SWaiman Long skip_test:
297c86ff8c5SWaiman Long 	pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
298c86ff8c5SWaiman Long 		smp_processor_id(), watchdog->name, wd_seq_delay);
299c86ff8c5SWaiman Long 	pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
300c86ff8c5SWaiman Long 		cs->name, wd_delay);
301c86ff8c5SWaiman Long 	return WD_READ_SKIP;
302db3a34e1SPaul E. McKenney }
303db3a34e1SPaul E. McKenney 
3047560c02bSPaul E. McKenney static u64 csnow_mid;
3057560c02bSPaul E. McKenney static cpumask_t cpus_ahead;
3067560c02bSPaul E. McKenney static cpumask_t cpus_behind;
307fa218f1cSPaul E. McKenney static cpumask_t cpus_chosen;
308fa218f1cSPaul E. McKenney 
clocksource_verify_choose_cpus(void)309fa218f1cSPaul E. McKenney static void clocksource_verify_choose_cpus(void)
310fa218f1cSPaul E. McKenney {
311fa218f1cSPaul E. McKenney 	int cpu, i, n = verify_n_cpus;
312fa218f1cSPaul E. McKenney 
313fa218f1cSPaul E. McKenney 	if (n < 0) {
314fa218f1cSPaul E. McKenney 		/* Check all of the CPUs. */
315fa218f1cSPaul E. McKenney 		cpumask_copy(&cpus_chosen, cpu_online_mask);
316fa218f1cSPaul E. McKenney 		cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
317fa218f1cSPaul E. McKenney 		return;
318fa218f1cSPaul E. McKenney 	}
319fa218f1cSPaul E. McKenney 
320fa218f1cSPaul E. McKenney 	/* If no checking desired, or no other CPU to check, leave. */
321fa218f1cSPaul E. McKenney 	cpumask_clear(&cpus_chosen);
322fa218f1cSPaul E. McKenney 	if (n == 0 || num_online_cpus() <= 1)
323fa218f1cSPaul E. McKenney 		return;
324fa218f1cSPaul E. McKenney 
325fa218f1cSPaul E. McKenney 	/* Make sure to select at least one CPU other than the current CPU. */
3269b51d9d8SYury Norov 	cpu = cpumask_first(cpu_online_mask);
327fa218f1cSPaul E. McKenney 	if (cpu == smp_processor_id())
328fa218f1cSPaul E. McKenney 		cpu = cpumask_next(cpu, cpu_online_mask);
329fa218f1cSPaul E. McKenney 	if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
330fa218f1cSPaul E. McKenney 		return;
331fa218f1cSPaul E. McKenney 	cpumask_set_cpu(cpu, &cpus_chosen);
332fa218f1cSPaul E. McKenney 
333fa218f1cSPaul E. McKenney 	/* Force a sane value for the boot parameter. */
334fa218f1cSPaul E. McKenney 	if (n > nr_cpu_ids)
335fa218f1cSPaul E. McKenney 		n = nr_cpu_ids;
336fa218f1cSPaul E. McKenney 
337fa218f1cSPaul E. McKenney 	/*
338fa218f1cSPaul E. McKenney 	 * Randomly select the specified number of CPUs.  If the same
339fa218f1cSPaul E. McKenney 	 * CPU is selected multiple times, that CPU is checked only once,
340fa218f1cSPaul E. McKenney 	 * and no replacement CPU is selected.  This gracefully handles
341fa218f1cSPaul E. McKenney 	 * situations where verify_n_cpus is greater than the number of
342fa218f1cSPaul E. McKenney 	 * CPUs that are currently online.
343fa218f1cSPaul E. McKenney 	 */
344fa218f1cSPaul E. McKenney 	for (i = 1; i < n; i++) {
3458032bf12SJason A. Donenfeld 		cpu = get_random_u32_below(nr_cpu_ids);
346fa218f1cSPaul E. McKenney 		cpu = cpumask_next(cpu - 1, cpu_online_mask);
347fa218f1cSPaul E. McKenney 		if (cpu >= nr_cpu_ids)
3489b51d9d8SYury Norov 			cpu = cpumask_first(cpu_online_mask);
349fa218f1cSPaul E. McKenney 		if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
350fa218f1cSPaul E. McKenney 			cpumask_set_cpu(cpu, &cpus_chosen);
351fa218f1cSPaul E. McKenney 	}
352fa218f1cSPaul E. McKenney 
353fa218f1cSPaul E. McKenney 	/* Don't verify ourselves. */
354fa218f1cSPaul E. McKenney 	cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
355fa218f1cSPaul E. McKenney }
3567560c02bSPaul E. McKenney 
clocksource_verify_one_cpu(void * csin)3577560c02bSPaul E. McKenney static void clocksource_verify_one_cpu(void *csin)
3587560c02bSPaul E. McKenney {
3597560c02bSPaul E. McKenney 	struct clocksource *cs = (struct clocksource *)csin;
3607560c02bSPaul E. McKenney 
3617560c02bSPaul E. McKenney 	csnow_mid = cs->read(cs);
3627560c02bSPaul E. McKenney }
3637560c02bSPaul E. McKenney 
clocksource_verify_percpu(struct clocksource * cs)3641253b9b8SPaul E. McKenney void clocksource_verify_percpu(struct clocksource *cs)
3657560c02bSPaul E. McKenney {
3667560c02bSPaul E. McKenney 	int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
3677560c02bSPaul E. McKenney 	u64 csnow_begin, csnow_end;
3687560c02bSPaul E. McKenney 	int cpu, testcpu;
3697560c02bSPaul E. McKenney 	s64 delta;
3707560c02bSPaul E. McKenney 
371fa218f1cSPaul E. McKenney 	if (verify_n_cpus == 0)
372fa218f1cSPaul E. McKenney 		return;
3737560c02bSPaul E. McKenney 	cpumask_clear(&cpus_ahead);
3747560c02bSPaul E. McKenney 	cpumask_clear(&cpus_behind);
375698429f9SSebastian Andrzej Siewior 	cpus_read_lock();
3766bb05a33SWaiman Long 	migrate_disable();
377fa218f1cSPaul E. McKenney 	clocksource_verify_choose_cpus();
3788afbcaf8SYury Norov 	if (cpumask_empty(&cpus_chosen)) {
3796bb05a33SWaiman Long 		migrate_enable();
380698429f9SSebastian Andrzej Siewior 		cpus_read_unlock();
381fa218f1cSPaul E. McKenney 		pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
382fa218f1cSPaul E. McKenney 		return;
383fa218f1cSPaul E. McKenney 	}
3847560c02bSPaul E. McKenney 	testcpu = smp_processor_id();
3851f566840SWaiman Long 	pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
3861f566840SWaiman Long 		cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
3876bb05a33SWaiman Long 	preempt_disable();
388fa218f1cSPaul E. McKenney 	for_each_cpu(cpu, &cpus_chosen) {
3897560c02bSPaul E. McKenney 		if (cpu == testcpu)
3907560c02bSPaul E. McKenney 			continue;
3917560c02bSPaul E. McKenney 		csnow_begin = cs->read(cs);
3927560c02bSPaul E. McKenney 		smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
3937560c02bSPaul E. McKenney 		csnow_end = cs->read(cs);
3947560c02bSPaul E. McKenney 		delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
3957560c02bSPaul E. McKenney 		if (delta < 0)
3967560c02bSPaul E. McKenney 			cpumask_set_cpu(cpu, &cpus_behind);
3977560c02bSPaul E. McKenney 		delta = (csnow_end - csnow_mid) & cs->mask;
3987560c02bSPaul E. McKenney 		if (delta < 0)
3997560c02bSPaul E. McKenney 			cpumask_set_cpu(cpu, &cpus_ahead);
400d0304569SAdrian Hunter 		cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end);
4017560c02bSPaul E. McKenney 		if (cs_nsec > cs_nsec_max)
4027560c02bSPaul E. McKenney 			cs_nsec_max = cs_nsec;
4037560c02bSPaul E. McKenney 		if (cs_nsec < cs_nsec_min)
4047560c02bSPaul E. McKenney 			cs_nsec_min = cs_nsec;
4057560c02bSPaul E. McKenney 	}
4067560c02bSPaul E. McKenney 	preempt_enable();
4076bb05a33SWaiman Long 	migrate_enable();
408698429f9SSebastian Andrzej Siewior 	cpus_read_unlock();
4097560c02bSPaul E. McKenney 	if (!cpumask_empty(&cpus_ahead))
4107560c02bSPaul E. McKenney 		pr_warn("        CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
4117560c02bSPaul E. McKenney 			cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
4127560c02bSPaul E. McKenney 	if (!cpumask_empty(&cpus_behind))
4137560c02bSPaul E. McKenney 		pr_warn("        CPUs %*pbl behind CPU %d for clocksource %s.\n",
4147560c02bSPaul E. McKenney 			cpumask_pr_args(&cpus_behind), testcpu, cs->name);
4157560c02bSPaul E. McKenney 	if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
4167560c02bSPaul E. McKenney 		pr_warn("        CPU %d check durations %lldns - %lldns for clocksource %s.\n",
4177560c02bSPaul E. McKenney 			testcpu, cs_nsec_min, cs_nsec_max, cs->name);
4187560c02bSPaul E. McKenney }
4191253b9b8SPaul E. McKenney EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
4207560c02bSPaul E. McKenney 
clocksource_reset_watchdog(void)421b7082cdfSFeng Tang static inline void clocksource_reset_watchdog(void)
422b7082cdfSFeng Tang {
423b7082cdfSFeng Tang 	struct clocksource *cs;
424b7082cdfSFeng Tang 
425b7082cdfSFeng Tang 	list_for_each_entry(cs, &watchdog_list, wd_list)
426b7082cdfSFeng Tang 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
427b7082cdfSFeng Tang }
428b7082cdfSFeng Tang 
429b7082cdfSFeng Tang 
clocksource_watchdog(struct timer_list * unused)430e99e88a9SKees Cook static void clocksource_watchdog(struct timer_list *unused)
4315d8b34fdSThomas Gleixner {
43264464955SJiri Wiesner 	int64_t wd_nsec, cs_nsec, interval;
433d0304569SAdrian Hunter 	u64 csnow, wdnow, cslast, wdlast;
4349fb60336SThomas Gleixner 	int next_cpu, reset_pending;
435db3a34e1SPaul E. McKenney 	struct clocksource *cs;
436c86ff8c5SWaiman Long 	enum wd_read_status read_ret;
437b7082cdfSFeng Tang 	unsigned long extra_wait = 0;
4382e27e793SPaul E. McKenney 	u32 md;
4395d8b34fdSThomas Gleixner 
4405d8b34fdSThomas Gleixner 	spin_lock(&watchdog_lock);
441fb63a0ebSMartin Schwidefsky 	if (!watchdog_running)
442fb63a0ebSMartin Schwidefsky 		goto out;
4435d8b34fdSThomas Gleixner 
4449fb60336SThomas Gleixner 	reset_pending = atomic_read(&watchdog_reset_pending);
4459fb60336SThomas Gleixner 
446c55c87c8SMartin Schwidefsky 	list_for_each_entry(cs, &watchdog_list, wd_list) {
447c55c87c8SMartin Schwidefsky 
448c55c87c8SMartin Schwidefsky 		/* Clocksource already marked unstable? */
44901548f4dSMartin Schwidefsky 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
45054a6bc0bSThomas Gleixner 			if (finished_booting)
45101548f4dSMartin Schwidefsky 				schedule_work(&watchdog_work);
452c55c87c8SMartin Schwidefsky 			continue;
45301548f4dSMartin Schwidefsky 		}
454c55c87c8SMartin Schwidefsky 
455c86ff8c5SWaiman Long 		read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
456c86ff8c5SWaiman Long 
457b7082cdfSFeng Tang 		if (read_ret == WD_READ_UNSTABLE) {
458db3a34e1SPaul E. McKenney 			/* Clock readout unreliable, so give it up. */
459db3a34e1SPaul E. McKenney 			__clocksource_unstable(cs);
460db3a34e1SPaul E. McKenney 			continue;
461db3a34e1SPaul E. McKenney 		}
462b52f52a0SThomas Gleixner 
463b7082cdfSFeng Tang 		/*
464b7082cdfSFeng Tang 		 * When WD_READ_SKIP is returned, it means the system is likely
465b7082cdfSFeng Tang 		 * under very heavy load, where the latency of reading
466b7082cdfSFeng Tang 		 * watchdog/clocksource is very big, and affect the accuracy of
467b7082cdfSFeng Tang 		 * watchdog check. So give system some space and suspend the
468b7082cdfSFeng Tang 		 * watchdog check for 5 minutes.
469b7082cdfSFeng Tang 		 */
470b7082cdfSFeng Tang 		if (read_ret == WD_READ_SKIP) {
471b7082cdfSFeng Tang 			/*
472b7082cdfSFeng Tang 			 * As the watchdog timer will be suspended, and
473b7082cdfSFeng Tang 			 * cs->last could keep unchanged for 5 minutes, reset
474b7082cdfSFeng Tang 			 * the counters.
475b7082cdfSFeng Tang 			 */
476b7082cdfSFeng Tang 			clocksource_reset_watchdog();
477b7082cdfSFeng Tang 			extra_wait = HZ * 300;
478b7082cdfSFeng Tang 			break;
479b7082cdfSFeng Tang 		}
480b7082cdfSFeng Tang 
4818cf4e750SMartin Schwidefsky 		/* Clocksource initialized ? */
4829fb60336SThomas Gleixner 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
4839fb60336SThomas Gleixner 		    atomic_read(&watchdog_reset_pending)) {
4848cf4e750SMartin Schwidefsky 			cs->flags |= CLOCK_SOURCE_WATCHDOG;
485b5199515SThomas Gleixner 			cs->wd_last = wdnow;
486b5199515SThomas Gleixner 			cs->cs_last = csnow;
487b52f52a0SThomas Gleixner 			continue;
488b52f52a0SThomas Gleixner 		}
489b52f52a0SThomas Gleixner 
490d0304569SAdrian Hunter 		wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow);
491d0304569SAdrian Hunter 		cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow);
4920b046b21SJohn Stultz 		wdlast = cs->wd_last; /* save these in case we print them */
4930b046b21SJohn Stultz 		cslast = cs->cs_last;
494b5199515SThomas Gleixner 		cs->cs_last = csnow;
495b5199515SThomas Gleixner 		cs->wd_last = wdnow;
496b5199515SThomas Gleixner 
4979fb60336SThomas Gleixner 		if (atomic_read(&watchdog_reset_pending))
4989fb60336SThomas Gleixner 			continue;
4999fb60336SThomas Gleixner 
50064464955SJiri Wiesner 		/*
50164464955SJiri Wiesner 		 * The processing of timer softirqs can get delayed (usually
50264464955SJiri Wiesner 		 * on account of ksoftirqd not getting to run in a timely
50364464955SJiri Wiesner 		 * manner), which causes the watchdog interval to stretch.
50464464955SJiri Wiesner 		 * Skew detection may fail for longer watchdog intervals
50564464955SJiri Wiesner 		 * on account of fixed margins being used.
50664464955SJiri Wiesner 		 * Some clocksources, e.g. acpi_pm, cannot tolerate
50764464955SJiri Wiesner 		 * watchdog intervals longer than a few seconds.
50864464955SJiri Wiesner 		 */
50964464955SJiri Wiesner 		interval = max(cs_nsec, wd_nsec);
51064464955SJiri Wiesner 		if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
51164464955SJiri Wiesner 			if (system_state > SYSTEM_SCHEDULING &&
51264464955SJiri Wiesner 			    interval > 2 * watchdog_max_interval) {
51364464955SJiri Wiesner 				watchdog_max_interval = interval;
51464464955SJiri Wiesner 				pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
51564464955SJiri Wiesner 					cs_nsec, wd_nsec);
51664464955SJiri Wiesner 			}
51764464955SJiri Wiesner 			watchdog_timer.expires = jiffies;
51864464955SJiri Wiesner 			continue;
51964464955SJiri Wiesner 		}
52064464955SJiri Wiesner 
521b5199515SThomas Gleixner 		/* Check the deviation from the watchdog clocksource. */
5222e27e793SPaul E. McKenney 		md = cs->uncertainty_margin + watchdog->uncertainty_margin;
5232e27e793SPaul E. McKenney 		if (abs(cs_nsec - wd_nsec) > md) {
524e40806e9SPaul E. McKenney 			s64 cs_wd_msec;
525e40806e9SPaul E. McKenney 			s64 wd_msec;
526dd029269SPaul E. McKenney 			u32 wd_rem;
527dd029269SPaul E. McKenney 
528390dd67cSSeiichi Ikarashi 			pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
529390dd67cSSeiichi Ikarashi 				smp_processor_id(), cs->name);
53022a22383SFeng Tang 			pr_warn("                      '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
53122a22383SFeng Tang 				watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
53222a22383SFeng Tang 			pr_warn("                      '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
53322a22383SFeng Tang 				cs->name, cs_nsec, csnow, cslast, cs->mask);
534e40806e9SPaul E. McKenney 			cs_wd_msec = div_s64_rem(cs_nsec - wd_nsec, 1000 * 1000, &wd_rem);
535e40806e9SPaul E. McKenney 			wd_msec = div_s64_rem(wd_nsec, 1000 * 1000, &wd_rem);
536dd029269SPaul E. McKenney 			pr_warn("                      Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
537dd029269SPaul E. McKenney 				cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
538fa218f1cSPaul E. McKenney 			if (curr_clocksource == cs)
539fa218f1cSPaul E. McKenney 				pr_warn("                      '%s' is current clocksource.\n", cs->name);
540fa218f1cSPaul E. McKenney 			else if (curr_clocksource)
541fa218f1cSPaul E. McKenney 				pr_warn("                      '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
542fa218f1cSPaul E. McKenney 			else
543fa218f1cSPaul E. McKenney 				pr_warn("                      No current clocksource.\n");
5440b046b21SJohn Stultz 			__clocksource_unstable(cs);
5458cf4e750SMartin Schwidefsky 			continue;
5468cf4e750SMartin Schwidefsky 		}
5478cf4e750SMartin Schwidefsky 
548b421b22bSPeter Zijlstra 		if (cs == curr_clocksource && cs->tick_stable)
549b421b22bSPeter Zijlstra 			cs->tick_stable(cs);
550b421b22bSPeter Zijlstra 
5518cf4e750SMartin Schwidefsky 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
5528cf4e750SMartin Schwidefsky 		    (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
5535d8b34fdSThomas Gleixner 		    (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
554332962f2SThomas Gleixner 			/* Mark it valid for high-res. */
5555d8b34fdSThomas Gleixner 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
556332962f2SThomas Gleixner 
55779bf2bb3SThomas Gleixner 			/*
558332962f2SThomas Gleixner 			 * clocksource_done_booting() will sort it if
559332962f2SThomas Gleixner 			 * finished_booting is not set yet.
56079bf2bb3SThomas Gleixner 			 */
561332962f2SThomas Gleixner 			if (!finished_booting)
562332962f2SThomas Gleixner 				continue;
563332962f2SThomas Gleixner 
564332962f2SThomas Gleixner 			/*
565332962f2SThomas Gleixner 			 * If this is not the current clocksource let
566332962f2SThomas Gleixner 			 * the watchdog thread reselect it. Due to the
567332962f2SThomas Gleixner 			 * change to high res this clocksource might
568332962f2SThomas Gleixner 			 * be preferred now. If it is the current
569332962f2SThomas Gleixner 			 * clocksource let the tick code know about
570332962f2SThomas Gleixner 			 * that change.
571332962f2SThomas Gleixner 			 */
572332962f2SThomas Gleixner 			if (cs != curr_clocksource) {
573332962f2SThomas Gleixner 				cs->flags |= CLOCK_SOURCE_RESELECT;
574332962f2SThomas Gleixner 				schedule_work(&watchdog_work);
575332962f2SThomas Gleixner 			} else {
57679bf2bb3SThomas Gleixner 				tick_clock_notify();
5775d8b34fdSThomas Gleixner 			}
5785d8b34fdSThomas Gleixner 		}
579332962f2SThomas Gleixner 	}
5805d8b34fdSThomas Gleixner 
5816993fc5bSAndi Kleen 	/*
5829fb60336SThomas Gleixner 	 * We only clear the watchdog_reset_pending, when we did a
5839fb60336SThomas Gleixner 	 * full cycle through all clocksources.
5849fb60336SThomas Gleixner 	 */
5859fb60336SThomas Gleixner 	if (reset_pending)
5869fb60336SThomas Gleixner 		atomic_dec(&watchdog_reset_pending);
5879fb60336SThomas Gleixner 
5889fb60336SThomas Gleixner 	/*
589c55c87c8SMartin Schwidefsky 	 * Cycle through CPUs to check if the CPUs stay synchronized
590c55c87c8SMartin Schwidefsky 	 * to each other.
5916993fc5bSAndi Kleen 	 */
592c55c87c8SMartin Schwidefsky 	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
593cad0e458SMike Travis 	if (next_cpu >= nr_cpu_ids)
5946b954823SRusty Russell 		next_cpu = cpumask_first(cpu_online_mask);
595febac332SKonstantin Khlebnikov 
596febac332SKonstantin Khlebnikov 	/*
597febac332SKonstantin Khlebnikov 	 * Arm timer if not already pending: could race with concurrent
598febac332SKonstantin Khlebnikov 	 * pair clocksource_stop_watchdog() clocksource_start_watchdog().
599febac332SKonstantin Khlebnikov 	 */
600febac332SKonstantin Khlebnikov 	if (!timer_pending(&watchdog_timer)) {
601b7082cdfSFeng Tang 		watchdog_timer.expires += WATCHDOG_INTERVAL + extra_wait;
6026993fc5bSAndi Kleen 		add_timer_on(&watchdog_timer, next_cpu);
603febac332SKonstantin Khlebnikov 	}
604fb63a0ebSMartin Schwidefsky out:
6055d8b34fdSThomas Gleixner 	spin_unlock(&watchdog_lock);
6065d8b34fdSThomas Gleixner }
6070f8e8ef7SMartin Schwidefsky 
clocksource_start_watchdog(void)608fb63a0ebSMartin Schwidefsky static inline void clocksource_start_watchdog(void)
609fb63a0ebSMartin Schwidefsky {
610fb63a0ebSMartin Schwidefsky 	if (watchdog_running || !watchdog || list_empty(&watchdog_list))
611fb63a0ebSMartin Schwidefsky 		return;
612e99e88a9SKees Cook 	timer_setup(&watchdog_timer, clocksource_watchdog, 0);
613fb63a0ebSMartin Schwidefsky 	watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
614fb63a0ebSMartin Schwidefsky 	add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
615fb63a0ebSMartin Schwidefsky 	watchdog_running = 1;
616fb63a0ebSMartin Schwidefsky }
617fb63a0ebSMartin Schwidefsky 
clocksource_stop_watchdog(void)618fb63a0ebSMartin Schwidefsky static inline void clocksource_stop_watchdog(void)
619fb63a0ebSMartin Schwidefsky {
620fb63a0ebSMartin Schwidefsky 	if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
621fb63a0ebSMartin Schwidefsky 		return;
622*8fa7292fSThomas Gleixner 	timer_delete(&watchdog_timer);
623fb63a0ebSMartin Schwidefsky 	watchdog_running = 0;
624fb63a0ebSMartin Schwidefsky }
625fb63a0ebSMartin Schwidefsky 
clocksource_resume_watchdog(void)626b52f52a0SThomas Gleixner static void clocksource_resume_watchdog(void)
627b52f52a0SThomas Gleixner {
6289fb60336SThomas Gleixner 	atomic_inc(&watchdog_reset_pending);
629b52f52a0SThomas Gleixner }
630b52f52a0SThomas Gleixner 
clocksource_enqueue_watchdog(struct clocksource * cs)631fb63a0ebSMartin Schwidefsky static void clocksource_enqueue_watchdog(struct clocksource *cs)
6325d8b34fdSThomas Gleixner {
6335b9e886aSPeter Zijlstra 	INIT_LIST_HEAD(&cs->wd_list);
6345b9e886aSPeter Zijlstra 
6355d8b34fdSThomas Gleixner 	if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
636fb63a0ebSMartin Schwidefsky 		/* cs is a clocksource to be watched. */
6375d8b34fdSThomas Gleixner 		list_add(&cs->wd_list, &watchdog_list);
638fb63a0ebSMartin Schwidefsky 		cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
639948ac6d7SThomas Gleixner 	} else {
640fb63a0ebSMartin Schwidefsky 		/* cs is a watchdog. */
641948ac6d7SThomas Gleixner 		if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
6425d8b34fdSThomas Gleixner 			cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
643bbf66d89SVitaly Kuznetsov 	}
644bbf66d89SVitaly Kuznetsov }
645bbf66d89SVitaly Kuznetsov 
clocksource_select_watchdog(bool fallback)646bbf66d89SVitaly Kuznetsov static void clocksource_select_watchdog(bool fallback)
647bbf66d89SVitaly Kuznetsov {
648bbf66d89SVitaly Kuznetsov 	struct clocksource *cs, *old_wd;
649bbf66d89SVitaly Kuznetsov 	unsigned long flags;
650bbf66d89SVitaly Kuznetsov 
651bbf66d89SVitaly Kuznetsov 	spin_lock_irqsave(&watchdog_lock, flags);
652bbf66d89SVitaly Kuznetsov 	/* save current watchdog */
653bbf66d89SVitaly Kuznetsov 	old_wd = watchdog;
654bbf66d89SVitaly Kuznetsov 	if (fallback)
655bbf66d89SVitaly Kuznetsov 		watchdog = NULL;
656bbf66d89SVitaly Kuznetsov 
657bbf66d89SVitaly Kuznetsov 	list_for_each_entry(cs, &clocksource_list, list) {
658bbf66d89SVitaly Kuznetsov 		/* cs is a clocksource to be watched. */
659bbf66d89SVitaly Kuznetsov 		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
660bbf66d89SVitaly Kuznetsov 			continue;
661bbf66d89SVitaly Kuznetsov 
662bbf66d89SVitaly Kuznetsov 		/* Skip current if we were requested for a fallback. */
663bbf66d89SVitaly Kuznetsov 		if (fallback && cs == old_wd)
664bbf66d89SVitaly Kuznetsov 			continue;
665bbf66d89SVitaly Kuznetsov 
666fb63a0ebSMartin Schwidefsky 		/* Pick the best watchdog. */
667bbf66d89SVitaly Kuznetsov 		if (!watchdog || cs->rating > watchdog->rating)
6685d8b34fdSThomas Gleixner 			watchdog = cs;
669bbf66d89SVitaly Kuznetsov 	}
670bbf66d89SVitaly Kuznetsov 	/* If we failed to find a fallback restore the old one. */
671bbf66d89SVitaly Kuznetsov 	if (!watchdog)
672bbf66d89SVitaly Kuznetsov 		watchdog = old_wd;
673bbf66d89SVitaly Kuznetsov 
674bbf66d89SVitaly Kuznetsov 	/* If we changed the watchdog we need to reset cycles. */
675bbf66d89SVitaly Kuznetsov 	if (watchdog != old_wd)
6760f8e8ef7SMartin Schwidefsky 		clocksource_reset_watchdog();
677bbf66d89SVitaly Kuznetsov 
678fb63a0ebSMartin Schwidefsky 	/* Check if the watchdog timer needs to be started. */
679fb63a0ebSMartin Schwidefsky 	clocksource_start_watchdog();
6805d8b34fdSThomas Gleixner 	spin_unlock_irqrestore(&watchdog_lock, flags);
6815d8b34fdSThomas Gleixner }
682fb63a0ebSMartin Schwidefsky 
clocksource_dequeue_watchdog(struct clocksource * cs)683fb63a0ebSMartin Schwidefsky static void clocksource_dequeue_watchdog(struct clocksource *cs)
684fb63a0ebSMartin Schwidefsky {
685a89c7edbSThomas Gleixner 	if (cs != watchdog) {
686fb63a0ebSMartin Schwidefsky 		if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
687fb63a0ebSMartin Schwidefsky 			/* cs is a watched clocksource. */
688fb63a0ebSMartin Schwidefsky 			list_del_init(&cs->wd_list);
689fb63a0ebSMartin Schwidefsky 			/* Check if the watchdog timer needs to be stopped. */
690fb63a0ebSMartin Schwidefsky 			clocksource_stop_watchdog();
691a89c7edbSThomas Gleixner 		}
692a89c7edbSThomas Gleixner 	}
693fb63a0ebSMartin Schwidefsky }
694fb63a0ebSMartin Schwidefsky 
__clocksource_watchdog_kthread(void)695e2c631baSPeter Zijlstra static int __clocksource_watchdog_kthread(void)
696c55c87c8SMartin Schwidefsky {
697c55c87c8SMartin Schwidefsky 	struct clocksource *cs, *tmp;
698c55c87c8SMartin Schwidefsky 	unsigned long flags;
699332962f2SThomas Gleixner 	int select = 0;
700c55c87c8SMartin Schwidefsky 
7017560c02bSPaul E. McKenney 	/* Do any required per-CPU skew verification. */
7027560c02bSPaul E. McKenney 	if (curr_clocksource &&
7037560c02bSPaul E. McKenney 	    curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
7047560c02bSPaul E. McKenney 	    curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
7057560c02bSPaul E. McKenney 		clocksource_verify_percpu(curr_clocksource);
7067560c02bSPaul E. McKenney 
707c55c87c8SMartin Schwidefsky 	spin_lock_irqsave(&watchdog_lock, flags);
708332962f2SThomas Gleixner 	list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
709c55c87c8SMartin Schwidefsky 		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
710c55c87c8SMartin Schwidefsky 			list_del_init(&cs->wd_list);
711bafffd56SDr. David Alan Gilbert 			clocksource_change_rating(cs, 0);
712332962f2SThomas Gleixner 			select = 1;
713332962f2SThomas Gleixner 		}
714332962f2SThomas Gleixner 		if (cs->flags & CLOCK_SOURCE_RESELECT) {
715332962f2SThomas Gleixner 			cs->flags &= ~CLOCK_SOURCE_RESELECT;
716332962f2SThomas Gleixner 			select = 1;
717332962f2SThomas Gleixner 		}
718c55c87c8SMartin Schwidefsky 	}
719c55c87c8SMartin Schwidefsky 	/* Check if the watchdog timer needs to be stopped. */
720c55c87c8SMartin Schwidefsky 	clocksource_stop_watchdog();
7216ea41d25SThomas Gleixner 	spin_unlock_irqrestore(&watchdog_lock, flags);
7226ea41d25SThomas Gleixner 
723332962f2SThomas Gleixner 	return select;
724332962f2SThomas Gleixner }
725332962f2SThomas Gleixner 
clocksource_watchdog_kthread(void * data)726e2c631baSPeter Zijlstra static int clocksource_watchdog_kthread(void *data)
727332962f2SThomas Gleixner {
728332962f2SThomas Gleixner 	mutex_lock(&clocksource_mutex);
729e2c631baSPeter Zijlstra 	if (__clocksource_watchdog_kthread())
730332962f2SThomas Gleixner 		clocksource_select();
731d0981a1bSThomas Gleixner 	mutex_unlock(&clocksource_mutex);
732e2c631baSPeter Zijlstra 	return 0;
733c55c87c8SMartin Schwidefsky }
734c55c87c8SMartin Schwidefsky 
clocksource_is_watchdog(struct clocksource * cs)7357eaeb343SThomas Gleixner static bool clocksource_is_watchdog(struct clocksource *cs)
7367eaeb343SThomas Gleixner {
7377eaeb343SThomas Gleixner 	return cs == watchdog;
7387eaeb343SThomas Gleixner }
7397eaeb343SThomas Gleixner 
740fb63a0ebSMartin Schwidefsky #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
741fb63a0ebSMartin Schwidefsky 
clocksource_enqueue_watchdog(struct clocksource * cs)742fb63a0ebSMartin Schwidefsky static void clocksource_enqueue_watchdog(struct clocksource *cs)
7435d8b34fdSThomas Gleixner {
7445d8b34fdSThomas Gleixner 	if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
7455d8b34fdSThomas Gleixner 		cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
7465d8b34fdSThomas Gleixner }
747b52f52a0SThomas Gleixner 
clocksource_select_watchdog(bool fallback)748bbf66d89SVitaly Kuznetsov static void clocksource_select_watchdog(bool fallback) { }
clocksource_dequeue_watchdog(struct clocksource * cs)749fb63a0ebSMartin Schwidefsky static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
clocksource_resume_watchdog(void)750b52f52a0SThomas Gleixner static inline void clocksource_resume_watchdog(void) { }
__clocksource_watchdog_kthread(void)751e2c631baSPeter Zijlstra static inline int __clocksource_watchdog_kthread(void) { return 0; }
clocksource_is_watchdog(struct clocksource * cs)7527eaeb343SThomas Gleixner static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
clocksource_mark_unstable(struct clocksource * cs)753397bbf6dSPrarit Bhargava void clocksource_mark_unstable(struct clocksource *cs) { }
754fb63a0ebSMartin Schwidefsky 
clocksource_watchdog_lock(unsigned long * flags)755db6f9e55SMathieu Malaterre static inline void clocksource_watchdog_lock(unsigned long *flags) { }
clocksource_watchdog_unlock(unsigned long * flags)756db6f9e55SMathieu Malaterre static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
7572aae7bcfSPeter Zijlstra 
758fb63a0ebSMartin Schwidefsky #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
7595d8b34fdSThomas Gleixner 
clocksource_is_suspend(struct clocksource * cs)76039232ed5SBaolin Wang static bool clocksource_is_suspend(struct clocksource *cs)
76139232ed5SBaolin Wang {
76239232ed5SBaolin Wang 	return cs == suspend_clocksource;
76339232ed5SBaolin Wang }
76439232ed5SBaolin Wang 
__clocksource_suspend_select(struct clocksource * cs)76539232ed5SBaolin Wang static void __clocksource_suspend_select(struct clocksource *cs)
76639232ed5SBaolin Wang {
76739232ed5SBaolin Wang 	/*
76839232ed5SBaolin Wang 	 * Skip the clocksource which will be stopped in suspend state.
76939232ed5SBaolin Wang 	 */
77039232ed5SBaolin Wang 	if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
77139232ed5SBaolin Wang 		return;
77239232ed5SBaolin Wang 
77339232ed5SBaolin Wang 	/*
77439232ed5SBaolin Wang 	 * The nonstop clocksource can be selected as the suspend clocksource to
77539232ed5SBaolin Wang 	 * calculate the suspend time, so it should not supply suspend/resume
77639232ed5SBaolin Wang 	 * interfaces to suspend the nonstop clocksource when system suspends.
77739232ed5SBaolin Wang 	 */
77839232ed5SBaolin Wang 	if (cs->suspend || cs->resume) {
77939232ed5SBaolin Wang 		pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
78039232ed5SBaolin Wang 			cs->name);
78139232ed5SBaolin Wang 	}
78239232ed5SBaolin Wang 
78339232ed5SBaolin Wang 	/* Pick the best rating. */
78439232ed5SBaolin Wang 	if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
78539232ed5SBaolin Wang 		suspend_clocksource = cs;
78639232ed5SBaolin Wang }
78739232ed5SBaolin Wang 
78839232ed5SBaolin Wang /**
78939232ed5SBaolin Wang  * clocksource_suspend_select - Select the best clocksource for suspend timing
79039232ed5SBaolin Wang  * @fallback:	if select a fallback clocksource
79139232ed5SBaolin Wang  */
clocksource_suspend_select(bool fallback)79239232ed5SBaolin Wang static void clocksource_suspend_select(bool fallback)
79339232ed5SBaolin Wang {
79439232ed5SBaolin Wang 	struct clocksource *cs, *old_suspend;
79539232ed5SBaolin Wang 
79639232ed5SBaolin Wang 	old_suspend = suspend_clocksource;
79739232ed5SBaolin Wang 	if (fallback)
79839232ed5SBaolin Wang 		suspend_clocksource = NULL;
79939232ed5SBaolin Wang 
80039232ed5SBaolin Wang 	list_for_each_entry(cs, &clocksource_list, list) {
80139232ed5SBaolin Wang 		/* Skip current if we were requested for a fallback. */
80239232ed5SBaolin Wang 		if (fallback && cs == old_suspend)
80339232ed5SBaolin Wang 			continue;
80439232ed5SBaolin Wang 
80539232ed5SBaolin Wang 		__clocksource_suspend_select(cs);
80639232ed5SBaolin Wang 	}
80739232ed5SBaolin Wang }
80839232ed5SBaolin Wang 
80939232ed5SBaolin Wang /**
81039232ed5SBaolin Wang  * clocksource_start_suspend_timing - Start measuring the suspend timing
81139232ed5SBaolin Wang  * @cs:			current clocksource from timekeeping
81239232ed5SBaolin Wang  * @start_cycles:	current cycles from timekeeping
81339232ed5SBaolin Wang  *
81439232ed5SBaolin Wang  * This function will save the start cycle values of suspend timer to calculate
81539232ed5SBaolin Wang  * the suspend time when resuming system.
81639232ed5SBaolin Wang  *
81739232ed5SBaolin Wang  * This function is called late in the suspend process from timekeeping_suspend(),
8184bf07f65SIngo Molnar  * that means processes are frozen, non-boot cpus and interrupts are disabled
81939232ed5SBaolin Wang  * now. It is therefore possible to start the suspend timer without taking the
82039232ed5SBaolin Wang  * clocksource mutex.
82139232ed5SBaolin Wang  */
clocksource_start_suspend_timing(struct clocksource * cs,u64 start_cycles)82239232ed5SBaolin Wang void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
82339232ed5SBaolin Wang {
82439232ed5SBaolin Wang 	if (!suspend_clocksource)
82539232ed5SBaolin Wang 		return;
82639232ed5SBaolin Wang 
82739232ed5SBaolin Wang 	/*
82839232ed5SBaolin Wang 	 * If current clocksource is the suspend timer, we should use the
82939232ed5SBaolin Wang 	 * tkr_mono.cycle_last value as suspend_start to avoid same reading
83039232ed5SBaolin Wang 	 * from suspend timer.
83139232ed5SBaolin Wang 	 */
83239232ed5SBaolin Wang 	if (clocksource_is_suspend(cs)) {
83339232ed5SBaolin Wang 		suspend_start = start_cycles;
83439232ed5SBaolin Wang 		return;
83539232ed5SBaolin Wang 	}
83639232ed5SBaolin Wang 
83739232ed5SBaolin Wang 	if (suspend_clocksource->enable &&
83839232ed5SBaolin Wang 	    suspend_clocksource->enable(suspend_clocksource)) {
83939232ed5SBaolin Wang 		pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
84039232ed5SBaolin Wang 		return;
84139232ed5SBaolin Wang 	}
84239232ed5SBaolin Wang 
84339232ed5SBaolin Wang 	suspend_start = suspend_clocksource->read(suspend_clocksource);
84439232ed5SBaolin Wang }
84539232ed5SBaolin Wang 
84639232ed5SBaolin Wang /**
84739232ed5SBaolin Wang  * clocksource_stop_suspend_timing - Stop measuring the suspend timing
84839232ed5SBaolin Wang  * @cs:		current clocksource from timekeeping
84939232ed5SBaolin Wang  * @cycle_now:	current cycles from timekeeping
85039232ed5SBaolin Wang  *
85139232ed5SBaolin Wang  * This function will calculate the suspend time from suspend timer.
85239232ed5SBaolin Wang  *
85339232ed5SBaolin Wang  * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
85439232ed5SBaolin Wang  *
85539232ed5SBaolin Wang  * This function is called early in the resume process from timekeeping_resume(),
85639232ed5SBaolin Wang  * that means there is only one cpu, no processes are running and the interrupts
85739232ed5SBaolin Wang  * are disabled. It is therefore possible to stop the suspend timer without
85839232ed5SBaolin Wang  * taking the clocksource mutex.
85939232ed5SBaolin Wang  */
clocksource_stop_suspend_timing(struct clocksource * cs,u64 cycle_now)86039232ed5SBaolin Wang u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
86139232ed5SBaolin Wang {
862d0304569SAdrian Hunter 	u64 now, nsec = 0;
86339232ed5SBaolin Wang 
86439232ed5SBaolin Wang 	if (!suspend_clocksource)
86539232ed5SBaolin Wang 		return 0;
86639232ed5SBaolin Wang 
86739232ed5SBaolin Wang 	/*
86839232ed5SBaolin Wang 	 * If current clocksource is the suspend timer, we should use the
86939232ed5SBaolin Wang 	 * tkr_mono.cycle_last value from timekeeping as current cycle to
87039232ed5SBaolin Wang 	 * avoid same reading from suspend timer.
87139232ed5SBaolin Wang 	 */
87239232ed5SBaolin Wang 	if (clocksource_is_suspend(cs))
87339232ed5SBaolin Wang 		now = cycle_now;
87439232ed5SBaolin Wang 	else
87539232ed5SBaolin Wang 		now = suspend_clocksource->read(suspend_clocksource);
87639232ed5SBaolin Wang 
877d0304569SAdrian Hunter 	if (now > suspend_start)
878d0304569SAdrian Hunter 		nsec = cycles_to_nsec_safe(suspend_clocksource, suspend_start, now);
87939232ed5SBaolin Wang 
88039232ed5SBaolin Wang 	/*
88139232ed5SBaolin Wang 	 * Disable the suspend timer to save power if current clocksource is
88239232ed5SBaolin Wang 	 * not the suspend timer.
88339232ed5SBaolin Wang 	 */
88439232ed5SBaolin Wang 	if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
88539232ed5SBaolin Wang 		suspend_clocksource->disable(suspend_clocksource);
88639232ed5SBaolin Wang 
88739232ed5SBaolin Wang 	return nsec;
88839232ed5SBaolin Wang }
88939232ed5SBaolin Wang 
890734efb46Sjohn stultz /**
891c54a42b1SMagnus Damm  * clocksource_suspend - suspend the clocksource(s)
892c54a42b1SMagnus Damm  */
clocksource_suspend(void)893c54a42b1SMagnus Damm void clocksource_suspend(void)
894c54a42b1SMagnus Damm {
895c54a42b1SMagnus Damm 	struct clocksource *cs;
896c54a42b1SMagnus Damm 
897c54a42b1SMagnus Damm 	list_for_each_entry_reverse(cs, &clocksource_list, list)
898c54a42b1SMagnus Damm 		if (cs->suspend)
899c54a42b1SMagnus Damm 			cs->suspend(cs);
900c54a42b1SMagnus Damm }
901c54a42b1SMagnus Damm 
902c54a42b1SMagnus Damm /**
903b52f52a0SThomas Gleixner  * clocksource_resume - resume the clocksource(s)
904b52f52a0SThomas Gleixner  */
clocksource_resume(void)905b52f52a0SThomas Gleixner void clocksource_resume(void)
906b52f52a0SThomas Gleixner {
9072e197586SMatthias Kaehlcke 	struct clocksource *cs;
908b52f52a0SThomas Gleixner 
90975c5158fSMartin Schwidefsky 	list_for_each_entry(cs, &clocksource_list, list)
910b52f52a0SThomas Gleixner 		if (cs->resume)
91117622339SMagnus Damm 			cs->resume(cs);
912b52f52a0SThomas Gleixner 
913b52f52a0SThomas Gleixner 	clocksource_resume_watchdog();
914b52f52a0SThomas Gleixner }
915b52f52a0SThomas Gleixner 
916b52f52a0SThomas Gleixner /**
9177c3078b6SJason Wessel  * clocksource_touch_watchdog - Update watchdog
9187c3078b6SJason Wessel  *
9197c3078b6SJason Wessel  * Update the watchdog after exception contexts such as kgdb so as not
9207b7422a5SThomas Gleixner  * to incorrectly trip the watchdog. This might fail when the kernel
9217b7422a5SThomas Gleixner  * was stopped in code which holds watchdog_lock.
9227c3078b6SJason Wessel  */
clocksource_touch_watchdog(void)9237c3078b6SJason Wessel void clocksource_touch_watchdog(void)
9247c3078b6SJason Wessel {
9257c3078b6SJason Wessel 	clocksource_resume_watchdog();
9267c3078b6SJason Wessel }
9277c3078b6SJason Wessel 
928734efb46Sjohn stultz /**
929d65670a7SJohn Stultz  * clocksource_max_adjustment- Returns max adjustment amount
930d65670a7SJohn Stultz  * @cs:         Pointer to clocksource
931d65670a7SJohn Stultz  *
932d65670a7SJohn Stultz  */
clocksource_max_adjustment(struct clocksource * cs)933d65670a7SJohn Stultz static u32 clocksource_max_adjustment(struct clocksource *cs)
934d65670a7SJohn Stultz {
935d65670a7SJohn Stultz 	u64 ret;
936d65670a7SJohn Stultz 	/*
93788b28adfSJim Cromie 	 * We won't try to correct for more than 11% adjustments (110,000 ppm),
938d65670a7SJohn Stultz 	 */
939d65670a7SJohn Stultz 	ret = (u64)cs->mult * 11;
940d65670a7SJohn Stultz 	do_div(ret,100);
941d65670a7SJohn Stultz 	return (u32)ret;
942d65670a7SJohn Stultz }
943d65670a7SJohn Stultz 
944d65670a7SJohn Stultz /**
94587d8b9ebSStephen Boyd  * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
94687d8b9ebSStephen Boyd  * @mult:	cycle to nanosecond multiplier
94787d8b9ebSStephen Boyd  * @shift:	cycle to nanosecond divisor (power of two)
94887d8b9ebSStephen Boyd  * @maxadj:	maximum adjustment value to mult (~11%)
94987d8b9ebSStephen Boyd  * @mask:	bitmask for two's complement subtraction of non 64 bit counters
950fb82fe2fSJohn Stultz  * @max_cyc:	maximum cycle value before potential overflow (does not include
951fb82fe2fSJohn Stultz  *		any safety margin)
952362fde04SJohn Stultz  *
9538e56f33fSJohn Stultz  * NOTE: This function includes a safety margin of 50%, in other words, we
9548e56f33fSJohn Stultz  * return half the number of nanoseconds the hardware counter can technically
9558e56f33fSJohn Stultz  * cover. This is done so that we can potentially detect problems caused by
9568e56f33fSJohn Stultz  * delayed timers or bad hardware, which might result in time intervals that
957571af55aSZhen Lei  * are larger than what the math used can handle without overflows.
95898962465SJon Hunter  */
clocks_calc_max_nsecs(u32 mult,u32 shift,u32 maxadj,u64 mask,u64 * max_cyc)959fb82fe2fSJohn Stultz u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
96098962465SJon Hunter {
96198962465SJon Hunter 	u64 max_nsecs, max_cycles;
96298962465SJon Hunter 
96398962465SJon Hunter 	/*
96498962465SJon Hunter 	 * Calculate the maximum number of cycles that we can pass to the
9656086e346SJohn Stultz 	 * cyc2ns() function without overflowing a 64-bit result.
96698962465SJon Hunter 	 */
9676086e346SJohn Stultz 	max_cycles = ULLONG_MAX;
9686086e346SJohn Stultz 	do_div(max_cycles, mult+maxadj);
96998962465SJon Hunter 
97098962465SJon Hunter 	/*
97198962465SJon Hunter 	 * The actual maximum number of cycles we can defer the clocksource is
97287d8b9ebSStephen Boyd 	 * determined by the minimum of max_cycles and mask.
973d65670a7SJohn Stultz 	 * Note: Here we subtract the maxadj to make sure we don't sleep for
974d65670a7SJohn Stultz 	 * too long if there's a large negative adjustment.
97598962465SJon Hunter 	 */
97687d8b9ebSStephen Boyd 	max_cycles = min(max_cycles, mask);
97787d8b9ebSStephen Boyd 	max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
97898962465SJon Hunter 
979fb82fe2fSJohn Stultz 	/* return the max_cycles value as well if requested */
980fb82fe2fSJohn Stultz 	if (max_cyc)
981fb82fe2fSJohn Stultz 		*max_cyc = max_cycles;
982fb82fe2fSJohn Stultz 
983362fde04SJohn Stultz 	/* Return 50% of the actual maximum, so we can detect bad values */
984362fde04SJohn Stultz 	max_nsecs >>= 1;
985362fde04SJohn Stultz 
98687d8b9ebSStephen Boyd 	return max_nsecs;
98787d8b9ebSStephen Boyd }
98887d8b9ebSStephen Boyd 
98987d8b9ebSStephen Boyd /**
990fb82fe2fSJohn Stultz  * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
991fb82fe2fSJohn Stultz  * @cs:         Pointer to clocksource to be updated
99287d8b9ebSStephen Boyd  *
99387d8b9ebSStephen Boyd  */
clocksource_update_max_deferment(struct clocksource * cs)994fb82fe2fSJohn Stultz static inline void clocksource_update_max_deferment(struct clocksource *cs)
99587d8b9ebSStephen Boyd {
996fb82fe2fSJohn Stultz 	cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
997fb82fe2fSJohn Stultz 						cs->maxadj, cs->mask,
998fb82fe2fSJohn Stultz 						&cs->max_cycles);
99976031d95SThomas Gleixner 
100076031d95SThomas Gleixner 	/*
100176031d95SThomas Gleixner 	 * Threshold for detecting negative motion in clocksource_delta().
100276031d95SThomas Gleixner 	 *
100376031d95SThomas Gleixner 	 * Allow for 0.875 of the counter width so that overly long idle
100476031d95SThomas Gleixner 	 * sleeps, which go slightly over mask/2, do not trigger the
100576031d95SThomas Gleixner 	 * negative motion detection.
100676031d95SThomas Gleixner 	 */
100776031d95SThomas Gleixner 	cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3);
100898962465SJon Hunter }
100998962465SJon Hunter 
clocksource_find_best(bool oneshot,bool skipcur)1010f5a2e343SThomas Gleixner static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
10115d33b883SThomas Gleixner {
10125d33b883SThomas Gleixner 	struct clocksource *cs;
10135d33b883SThomas Gleixner 
10145d33b883SThomas Gleixner 	if (!finished_booting || list_empty(&clocksource_list))
10155d33b883SThomas Gleixner 		return NULL;
10165d33b883SThomas Gleixner 
10175d33b883SThomas Gleixner 	/*
10185d33b883SThomas Gleixner 	 * We pick the clocksource with the highest rating. If oneshot
10195d33b883SThomas Gleixner 	 * mode is active, we pick the highres valid clocksource with
10205d33b883SThomas Gleixner 	 * the best rating.
10215d33b883SThomas Gleixner 	 */
10225d33b883SThomas Gleixner 	list_for_each_entry(cs, &clocksource_list, list) {
1023f5a2e343SThomas Gleixner 		if (skipcur && cs == curr_clocksource)
1024f5a2e343SThomas Gleixner 			continue;
10255d33b883SThomas Gleixner 		if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
10265d33b883SThomas Gleixner 			continue;
10275d33b883SThomas Gleixner 		return cs;
10285d33b883SThomas Gleixner 	}
10295d33b883SThomas Gleixner 	return NULL;
10305d33b883SThomas Gleixner }
10315d33b883SThomas Gleixner 
__clocksource_select(bool skipcur)1032f5a2e343SThomas Gleixner static void __clocksource_select(bool skipcur)
1033734efb46Sjohn stultz {
10345d33b883SThomas Gleixner 	bool oneshot = tick_oneshot_mode_active();
1035f1b82746SMartin Schwidefsky 	struct clocksource *best, *cs;
10365d8b34fdSThomas Gleixner 
10375d33b883SThomas Gleixner 	/* Find the best suitable clocksource */
1038f5a2e343SThomas Gleixner 	best = clocksource_find_best(oneshot, skipcur);
10395d33b883SThomas Gleixner 	if (!best)
1040f1b82746SMartin Schwidefsky 		return;
10415d33b883SThomas Gleixner 
10427f852afeSBaolin Wang 	if (!strlen(override_name))
10437f852afeSBaolin Wang 		goto found;
10447f852afeSBaolin Wang 
1045f1b82746SMartin Schwidefsky 	/* Check for the override clocksource. */
1046f1b82746SMartin Schwidefsky 	list_for_each_entry(cs, &clocksource_list, list) {
1047f5a2e343SThomas Gleixner 		if (skipcur && cs == curr_clocksource)
1048f5a2e343SThomas Gleixner 			continue;
1049f1b82746SMartin Schwidefsky 		if (strcmp(cs->name, override_name) != 0)
1050f1b82746SMartin Schwidefsky 			continue;
1051f1b82746SMartin Schwidefsky 		/*
1052f1b82746SMartin Schwidefsky 		 * Check to make sure we don't switch to a non-highres
1053f1b82746SMartin Schwidefsky 		 * capable clocksource if the tick code is in oneshot
1054f1b82746SMartin Schwidefsky 		 * mode (highres or nohz)
1055f1b82746SMartin Schwidefsky 		 */
10565d33b883SThomas Gleixner 		if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
1057f1b82746SMartin Schwidefsky 			/* Override clocksource cannot be used. */
105836374583SKyle Walker 			if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
105936374583SKyle Walker 				pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
106045bbfe64SJoe Perches 					cs->name);
1061f1b82746SMartin Schwidefsky 				override_name[0] = 0;
106236374583SKyle Walker 			} else {
106336374583SKyle Walker 				/*
106436374583SKyle Walker 				 * The override cannot be currently verified.
106536374583SKyle Walker 				 * Deferring to let the watchdog check.
106636374583SKyle Walker 				 */
106736374583SKyle Walker 				pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
106836374583SKyle Walker 					cs->name);
106936374583SKyle Walker 			}
1070f1b82746SMartin Schwidefsky 		} else
1071f1b82746SMartin Schwidefsky 			/* Override clocksource can be used. */
1072f1b82746SMartin Schwidefsky 			best = cs;
1073f1b82746SMartin Schwidefsky 		break;
1074734efb46Sjohn stultz 	}
1075ba919d1cSThomas Gleixner 
10767f852afeSBaolin Wang found:
1077ba919d1cSThomas Gleixner 	if (curr_clocksource != best && !timekeeping_notify(best)) {
1078ba919d1cSThomas Gleixner 		pr_info("Switched to clocksource %s\n", best->name);
107975c5158fSMartin Schwidefsky 		curr_clocksource = best;
1080f1b82746SMartin Schwidefsky 	}
108175c5158fSMartin Schwidefsky }
108275c5158fSMartin Schwidefsky 
1083f5a2e343SThomas Gleixner /**
1084f5a2e343SThomas Gleixner  * clocksource_select - Select the best clocksource available
1085f5a2e343SThomas Gleixner  *
1086f5a2e343SThomas Gleixner  * Private function. Must hold clocksource_mutex when called.
1087f5a2e343SThomas Gleixner  *
1088f5a2e343SThomas Gleixner  * Select the clocksource with the best rating, or the clocksource,
1089f5a2e343SThomas Gleixner  * which is selected by userspace override.
1090f5a2e343SThomas Gleixner  */
clocksource_select(void)1091f5a2e343SThomas Gleixner static void clocksource_select(void)
1092f5a2e343SThomas Gleixner {
1093cfed432dSGuillaume Gomez 	__clocksource_select(false);
1094f5a2e343SThomas Gleixner }
1095f5a2e343SThomas Gleixner 
clocksource_select_fallback(void)10967eaeb343SThomas Gleixner static void clocksource_select_fallback(void)
10977eaeb343SThomas Gleixner {
1098cfed432dSGuillaume Gomez 	__clocksource_select(true);
10997eaeb343SThomas Gleixner }
11007eaeb343SThomas Gleixner 
110175c5158fSMartin Schwidefsky /*
110275c5158fSMartin Schwidefsky  * clocksource_done_booting - Called near the end of core bootup
110375c5158fSMartin Schwidefsky  *
110475c5158fSMartin Schwidefsky  * Hack to avoid lots of clocksource churn at boot time.
110575c5158fSMartin Schwidefsky  * We use fs_initcall because we want this to start before
110675c5158fSMartin Schwidefsky  * device_initcall but after subsys_initcall.
110775c5158fSMartin Schwidefsky  */
clocksource_done_booting(void)110875c5158fSMartin Schwidefsky static int __init clocksource_done_booting(void)
110975c5158fSMartin Schwidefsky {
1110ad6759fbSjohn stultz 	mutex_lock(&clocksource_mutex);
1111ad6759fbSjohn stultz 	curr_clocksource = clocksource_default_clock();
111275c5158fSMartin Schwidefsky 	finished_booting = 1;
111354a6bc0bSThomas Gleixner 	/*
111454a6bc0bSThomas Gleixner 	 * Run the watchdog first to eliminate unstable clock sources
111554a6bc0bSThomas Gleixner 	 */
1116e2c631baSPeter Zijlstra 	__clocksource_watchdog_kthread();
111775c5158fSMartin Schwidefsky 	clocksource_select();
1118e6c73305SThomas Gleixner 	mutex_unlock(&clocksource_mutex);
111975c5158fSMartin Schwidefsky 	return 0;
112075c5158fSMartin Schwidefsky }
112175c5158fSMartin Schwidefsky fs_initcall(clocksource_done_booting);
1122f1b82746SMartin Schwidefsky 
112392c7e002SThomas Gleixner /*
112492c7e002SThomas Gleixner  * Enqueue the clocksource sorted by rating
1125734efb46Sjohn stultz  */
clocksource_enqueue(struct clocksource * cs)1126f1b82746SMartin Schwidefsky static void clocksource_enqueue(struct clocksource *cs)
1127734efb46Sjohn stultz {
1128f1b82746SMartin Schwidefsky 	struct list_head *entry = &clocksource_list;
1129f1b82746SMartin Schwidefsky 	struct clocksource *tmp;
1130734efb46Sjohn stultz 
11310fb71d34SMinfei Huang 	list_for_each_entry(tmp, &clocksource_list, list) {
113292c7e002SThomas Gleixner 		/* Keep track of the place, where to insert */
11330fb71d34SMinfei Huang 		if (tmp->rating < cs->rating)
11340fb71d34SMinfei Huang 			break;
1135f1b82746SMartin Schwidefsky 		entry = &tmp->list;
11360fb71d34SMinfei Huang 	}
1137f1b82746SMartin Schwidefsky 	list_add(&cs->list, entry);
1138734efb46Sjohn stultz }
1139734efb46Sjohn stultz 
1140d7e81c26SJohn Stultz /**
1141fba9e072SJohn Stultz  * __clocksource_update_freq_scale - Used update clocksource with new freq
1142b1b73d09SKusanagi Kouichi  * @cs:		clocksource to be registered
1143852db46dSJohn Stultz  * @scale:	Scale factor multiplied against freq to get clocksource hz
1144852db46dSJohn Stultz  * @freq:	clocksource frequency (cycles per second) divided by scale
1145852db46dSJohn Stultz  *
1146852db46dSJohn Stultz  * This should only be called from the clocksource->enable() method.
1147852db46dSJohn Stultz  *
1148852db46dSJohn Stultz  * This *SHOULD NOT* be called directly! Please use the
1149fba9e072SJohn Stultz  * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
1150fba9e072SJohn Stultz  * functions.
1151852db46dSJohn Stultz  */
__clocksource_update_freq_scale(struct clocksource * cs,u32 scale,u32 freq)1152fba9e072SJohn Stultz void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
1153852db46dSJohn Stultz {
1154c0e299b1SThomas Gleixner 	u64 sec;
1155f8935983SJohn Stultz 
1156f8935983SJohn Stultz 	/*
1157f8935983SJohn Stultz 	 * Default clocksources are *special* and self-define their mult/shift.
1158f8935983SJohn Stultz 	 * But, you're not special, so you should specify a freq value.
1159f8935983SJohn Stultz 	 */
1160f8935983SJohn Stultz 	if (freq) {
1161852db46dSJohn Stultz 		/*
1162724ed53eSThomas Gleixner 		 * Calc the maximum number of seconds which we can run before
1163f8935983SJohn Stultz 		 * wrapping around. For clocksources which have a mask > 32-bit
1164724ed53eSThomas Gleixner 		 * we need to limit the max sleep time to have a good
1165724ed53eSThomas Gleixner 		 * conversion precision. 10 minutes is still a reasonable
1166724ed53eSThomas Gleixner 		 * amount. That results in a shift value of 24 for a
1167f8935983SJohn Stultz 		 * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
1168362fde04SJohn Stultz 		 * ~ 0.06ppm granularity for NTP.
1169852db46dSJohn Stultz 		 */
1170362fde04SJohn Stultz 		sec = cs->mask;
1171724ed53eSThomas Gleixner 		do_div(sec, freq);
1172724ed53eSThomas Gleixner 		do_div(sec, scale);
1173724ed53eSThomas Gleixner 		if (!sec)
1174724ed53eSThomas Gleixner 			sec = 1;
1175724ed53eSThomas Gleixner 		else if (sec > 600 && cs->mask > UINT_MAX)
1176724ed53eSThomas Gleixner 			sec = 600;
1177724ed53eSThomas Gleixner 
1178852db46dSJohn Stultz 		clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
1179724ed53eSThomas Gleixner 				       NSEC_PER_SEC / scale, sec * scale);
1180f8935983SJohn Stultz 	}
11812e27e793SPaul E. McKenney 
11822e27e793SPaul E. McKenney 	/*
118317915131SBorislav Petkov 	 * If the uncertainty margin is not specified, calculate it.  If
118417915131SBorislav Petkov 	 * both scale and freq are non-zero, calculate the clock period, but
118517915131SBorislav Petkov 	 * bound below at 2*WATCHDOG_MAX_SKEW, that is, 500ppm by default.
118617915131SBorislav Petkov 	 * However, if either of scale or freq is zero, be very conservative
118717915131SBorislav Petkov 	 * and take the tens-of-milliseconds WATCHDOG_THRESHOLD value
118817915131SBorislav Petkov 	 * for the uncertainty margin.  Allow stupidly small uncertainty
118917915131SBorislav Petkov 	 * margins to be specified by the caller for testing purposes,
119017915131SBorislav Petkov 	 * but warn to discourage production use of this capability.
119117915131SBorislav Petkov 	 *
119217915131SBorislav Petkov 	 * Bottom line:  The sum of the uncertainty margins of the
119317915131SBorislav Petkov 	 * watchdog clocksource and the clocksource under test will be at
119417915131SBorislav Petkov 	 * least 500ppm by default.  For more information, please see the
119517915131SBorislav Petkov 	 * comment preceding CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US above.
11962e27e793SPaul E. McKenney 	 */
11972e27e793SPaul E. McKenney 	if (scale && freq && !cs->uncertainty_margin) {
11982e27e793SPaul E. McKenney 		cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
11992e27e793SPaul E. McKenney 		if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
12002e27e793SPaul E. McKenney 			cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
12012e27e793SPaul E. McKenney 	} else if (!cs->uncertainty_margin) {
12022e27e793SPaul E. McKenney 		cs->uncertainty_margin = WATCHDOG_THRESHOLD;
12032e27e793SPaul E. McKenney 	}
12042e27e793SPaul E. McKenney 	WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
12052e27e793SPaul E. McKenney 
1206d65670a7SJohn Stultz 	/*
1207362fde04SJohn Stultz 	 * Ensure clocksources that have large 'mult' values don't overflow
1208362fde04SJohn Stultz 	 * when adjusted.
1209d65670a7SJohn Stultz 	 */
1210d65670a7SJohn Stultz 	cs->maxadj = clocksource_max_adjustment(cs);
1211f8935983SJohn Stultz 	while (freq && ((cs->mult + cs->maxadj < cs->mult)
1212f8935983SJohn Stultz 		|| (cs->mult - cs->maxadj > cs->mult))) {
1213d65670a7SJohn Stultz 		cs->mult >>= 1;
1214d65670a7SJohn Stultz 		cs->shift--;
1215d65670a7SJohn Stultz 		cs->maxadj = clocksource_max_adjustment(cs);
1216d65670a7SJohn Stultz 	}
1217d65670a7SJohn Stultz 
1218f8935983SJohn Stultz 	/*
1219f8935983SJohn Stultz 	 * Only warn for *special* clocksources that self-define
1220f8935983SJohn Stultz 	 * their mult/shift values and don't specify a freq.
1221f8935983SJohn Stultz 	 */
1222f8935983SJohn Stultz 	WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
1223f8935983SJohn Stultz 		"timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
1224f8935983SJohn Stultz 		cs->name);
1225f8935983SJohn Stultz 
1226fb82fe2fSJohn Stultz 	clocksource_update_max_deferment(cs);
12278cc8c525SJohn Stultz 
122845bbfe64SJoe Perches 	pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
12298cc8c525SJohn Stultz 		cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
1230852db46dSJohn Stultz }
1231fba9e072SJohn Stultz EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
1232852db46dSJohn Stultz 
1233852db46dSJohn Stultz /**
1234d7e81c26SJohn Stultz  * __clocksource_register_scale - Used to install new clocksources
1235b1b73d09SKusanagi Kouichi  * @cs:		clocksource to be registered
1236d7e81c26SJohn Stultz  * @scale:	Scale factor multiplied against freq to get clocksource hz
1237d7e81c26SJohn Stultz  * @freq:	clocksource frequency (cycles per second) divided by scale
1238d7e81c26SJohn Stultz  *
1239d7e81c26SJohn Stultz  * Returns -EBUSY if registration fails, zero otherwise.
1240d7e81c26SJohn Stultz  *
1241d7e81c26SJohn Stultz  * This *SHOULD NOT* be called directly! Please use the
1242d7e81c26SJohn Stultz  * clocksource_register_hz() or clocksource_register_khz helper functions.
1243d7e81c26SJohn Stultz  */
__clocksource_register_scale(struct clocksource * cs,u32 scale,u32 freq)1244d7e81c26SJohn Stultz int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
1245d7e81c26SJohn Stultz {
12462aae7bcfSPeter Zijlstra 	unsigned long flags;
1247d7e81c26SJohn Stultz 
1248d67f34c1SThomas Gleixner 	clocksource_arch_init(cs);
1249d67f34c1SThomas Gleixner 
1250b2c67cbeSThomas Gleixner 	if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
1251b2c67cbeSThomas Gleixner 		cs->id = CSID_GENERIC;
12525d51bee7SThomas Gleixner 	if (cs->vdso_clock_mode < 0 ||
12535d51bee7SThomas Gleixner 	    cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
12545d51bee7SThomas Gleixner 		pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
12555d51bee7SThomas Gleixner 			cs->name, cs->vdso_clock_mode);
12565d51bee7SThomas Gleixner 		cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
12575d51bee7SThomas Gleixner 	}
12585d51bee7SThomas Gleixner 
1259b595076aSUwe Kleine-König 	/* Initialize mult/shift and max_idle_ns */
1260fba9e072SJohn Stultz 	__clocksource_update_freq_scale(cs, scale, freq);
1261d7e81c26SJohn Stultz 
1262be278e98SJames Hartley 	/* Add clocksource to the clocksource list */
1263d7e81c26SJohn Stultz 	mutex_lock(&clocksource_mutex);
12642aae7bcfSPeter Zijlstra 
12652aae7bcfSPeter Zijlstra 	clocksource_watchdog_lock(&flags);
1266d7e81c26SJohn Stultz 	clocksource_enqueue(cs);
1267d7e81c26SJohn Stultz 	clocksource_enqueue_watchdog(cs);
12682aae7bcfSPeter Zijlstra 	clocksource_watchdog_unlock(&flags);
12692aae7bcfSPeter Zijlstra 
1270e05b2efbSjohn stultz 	clocksource_select();
1271bbf66d89SVitaly Kuznetsov 	clocksource_select_watchdog(false);
127239232ed5SBaolin Wang 	__clocksource_suspend_select(cs);
1273d7e81c26SJohn Stultz 	mutex_unlock(&clocksource_mutex);
1274d7e81c26SJohn Stultz 	return 0;
1275d7e81c26SJohn Stultz }
1276d7e81c26SJohn Stultz EXPORT_SYMBOL_GPL(__clocksource_register_scale);
1277d7e81c26SJohn Stultz 
12787eaeb343SThomas Gleixner /*
12797eaeb343SThomas Gleixner  * Unbind clocksource @cs. Called with clocksource_mutex held
12807eaeb343SThomas Gleixner  */
clocksource_unbind(struct clocksource * cs)12817eaeb343SThomas Gleixner static int clocksource_unbind(struct clocksource *cs)
12827eaeb343SThomas Gleixner {
12832aae7bcfSPeter Zijlstra 	unsigned long flags;
12842aae7bcfSPeter Zijlstra 
1285bbf66d89SVitaly Kuznetsov 	if (clocksource_is_watchdog(cs)) {
1286bbf66d89SVitaly Kuznetsov 		/* Select and try to install a replacement watchdog. */
1287bbf66d89SVitaly Kuznetsov 		clocksource_select_watchdog(true);
12887eaeb343SThomas Gleixner 		if (clocksource_is_watchdog(cs))
12897eaeb343SThomas Gleixner 			return -EBUSY;
1290bbf66d89SVitaly Kuznetsov 	}
12917eaeb343SThomas Gleixner 
12927eaeb343SThomas Gleixner 	if (cs == curr_clocksource) {
12937eaeb343SThomas Gleixner 		/* Select and try to install a replacement clock source */
12947eaeb343SThomas Gleixner 		clocksource_select_fallback();
12957eaeb343SThomas Gleixner 		if (curr_clocksource == cs)
12967eaeb343SThomas Gleixner 			return -EBUSY;
12977eaeb343SThomas Gleixner 	}
12982aae7bcfSPeter Zijlstra 
129939232ed5SBaolin Wang 	if (clocksource_is_suspend(cs)) {
130039232ed5SBaolin Wang 		/*
130139232ed5SBaolin Wang 		 * Select and try to install a replacement suspend clocksource.
130239232ed5SBaolin Wang 		 * If no replacement suspend clocksource, we will just let the
130339232ed5SBaolin Wang 		 * clocksource go and have no suspend clocksource.
130439232ed5SBaolin Wang 		 */
130539232ed5SBaolin Wang 		clocksource_suspend_select(true);
130639232ed5SBaolin Wang 	}
130739232ed5SBaolin Wang 
13082aae7bcfSPeter Zijlstra 	clocksource_watchdog_lock(&flags);
13097eaeb343SThomas Gleixner 	clocksource_dequeue_watchdog(cs);
13107eaeb343SThomas Gleixner 	list_del_init(&cs->list);
13112aae7bcfSPeter Zijlstra 	clocksource_watchdog_unlock(&flags);
13122aae7bcfSPeter Zijlstra 
13137eaeb343SThomas Gleixner 	return 0;
13147eaeb343SThomas Gleixner }
13157eaeb343SThomas Gleixner 
13164713e22cSThomas Gleixner /**
13174713e22cSThomas Gleixner  * clocksource_unregister - remove a registered clocksource
1318b1b73d09SKusanagi Kouichi  * @cs:	clocksource to be unregistered
13194713e22cSThomas Gleixner  */
clocksource_unregister(struct clocksource * cs)1320a89c7edbSThomas Gleixner int clocksource_unregister(struct clocksource *cs)
13214713e22cSThomas Gleixner {
1322a89c7edbSThomas Gleixner 	int ret = 0;
1323a89c7edbSThomas Gleixner 
132475c5158fSMartin Schwidefsky 	mutex_lock(&clocksource_mutex);
1325a89c7edbSThomas Gleixner 	if (!list_empty(&cs->list))
1326a89c7edbSThomas Gleixner 		ret = clocksource_unbind(cs);
132775c5158fSMartin Schwidefsky 	mutex_unlock(&clocksource_mutex);
1328a89c7edbSThomas Gleixner 	return ret;
13294713e22cSThomas Gleixner }
1330fb63a0ebSMartin Schwidefsky EXPORT_SYMBOL(clocksource_unregister);
13314713e22cSThomas Gleixner 
13322b013700SDaniel Walker #ifdef CONFIG_SYSFS
1333734efb46Sjohn stultz /**
1334e87821d1SBaolin Wang  * current_clocksource_show - sysfs interface for current clocksource
1335734efb46Sjohn stultz  * @dev:	unused
1336b1b73d09SKusanagi Kouichi  * @attr:	unused
1337734efb46Sjohn stultz  * @buf:	char buffer to be filled with clocksource list
1338734efb46Sjohn stultz  *
1339734efb46Sjohn stultz  * Provides sysfs interface for listing current clocksource.
1340734efb46Sjohn stultz  */
current_clocksource_show(struct device * dev,struct device_attribute * attr,char * buf)1341e87821d1SBaolin Wang static ssize_t current_clocksource_show(struct device *dev,
1342e87821d1SBaolin Wang 					struct device_attribute *attr,
1343e87821d1SBaolin Wang 					char *buf)
1344734efb46Sjohn stultz {
13455e2cb101SMiao Xie 	ssize_t count = 0;
1346734efb46Sjohn stultz 
134775c5158fSMartin Schwidefsky 	mutex_lock(&clocksource_mutex);
13488f0acb7fSLi Zhijian 	count = sysfs_emit(buf, "%s\n", curr_clocksource->name);
134975c5158fSMartin Schwidefsky 	mutex_unlock(&clocksource_mutex);
1350734efb46Sjohn stultz 
13515e2cb101SMiao Xie 	return count;
1352734efb46Sjohn stultz }
1353734efb46Sjohn stultz 
sysfs_get_uname(const char * buf,char * dst,size_t cnt)1354891292a7SPatrick Palka ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
135529b54078SThomas Gleixner {
135629b54078SThomas Gleixner 	size_t ret = cnt;
135729b54078SThomas Gleixner 
135829b54078SThomas Gleixner 	/* strings from sysfs write are not 0 terminated! */
135929b54078SThomas Gleixner 	if (!cnt || cnt >= CS_NAME_LEN)
136029b54078SThomas Gleixner 		return -EINVAL;
136129b54078SThomas Gleixner 
136229b54078SThomas Gleixner 	/* strip of \n: */
136329b54078SThomas Gleixner 	if (buf[cnt-1] == '\n')
136429b54078SThomas Gleixner 		cnt--;
136529b54078SThomas Gleixner 	if (cnt > 0)
136629b54078SThomas Gleixner 		memcpy(dst, buf, cnt);
136729b54078SThomas Gleixner 	dst[cnt] = 0;
136829b54078SThomas Gleixner 	return ret;
136929b54078SThomas Gleixner }
137029b54078SThomas Gleixner 
1371734efb46Sjohn stultz /**
1372e87821d1SBaolin Wang  * current_clocksource_store - interface for manually overriding clocksource
1373734efb46Sjohn stultz  * @dev:	unused
1374b1b73d09SKusanagi Kouichi  * @attr:	unused
1375734efb46Sjohn stultz  * @buf:	name of override clocksource
1376734efb46Sjohn stultz  * @count:	length of buffer
1377734efb46Sjohn stultz  *
1378734efb46Sjohn stultz  * Takes input from sysfs interface for manually overriding the default
1379b71a8eb0SUwe Kleine-König  * clocksource selection.
1380734efb46Sjohn stultz  */
current_clocksource_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1381e87821d1SBaolin Wang static ssize_t current_clocksource_store(struct device *dev,
1382d369a5d8SKay Sievers 					 struct device_attribute *attr,
1383734efb46Sjohn stultz 					 const char *buf, size_t count)
1384734efb46Sjohn stultz {
1385233bcb41SElad Wexler 	ssize_t ret;
1386734efb46Sjohn stultz 
138775c5158fSMartin Schwidefsky 	mutex_lock(&clocksource_mutex);
1388734efb46Sjohn stultz 
138903e13cf5SThomas Gleixner 	ret = sysfs_get_uname(buf, override_name, count);
139029b54078SThomas Gleixner 	if (ret >= 0)
1391f1b82746SMartin Schwidefsky 		clocksource_select();
1392734efb46Sjohn stultz 
139375c5158fSMartin Schwidefsky 	mutex_unlock(&clocksource_mutex);
1394734efb46Sjohn stultz 
1395734efb46Sjohn stultz 	return ret;
1396734efb46Sjohn stultz }
1397e87821d1SBaolin Wang static DEVICE_ATTR_RW(current_clocksource);
1398734efb46Sjohn stultz 
1399734efb46Sjohn stultz /**
1400e87821d1SBaolin Wang  * unbind_clocksource_store - interface for manually unbinding clocksource
14017eaeb343SThomas Gleixner  * @dev:	unused
14027eaeb343SThomas Gleixner  * @attr:	unused
14037eaeb343SThomas Gleixner  * @buf:	unused
14047eaeb343SThomas Gleixner  * @count:	length of buffer
14057eaeb343SThomas Gleixner  *
14067eaeb343SThomas Gleixner  * Takes input from sysfs interface for manually unbinding a clocksource.
14077eaeb343SThomas Gleixner  */
unbind_clocksource_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1408e87821d1SBaolin Wang static ssize_t unbind_clocksource_store(struct device *dev,
14097eaeb343SThomas Gleixner 					struct device_attribute *attr,
14107eaeb343SThomas Gleixner 					const char *buf, size_t count)
14117eaeb343SThomas Gleixner {
14127eaeb343SThomas Gleixner 	struct clocksource *cs;
14137eaeb343SThomas Gleixner 	char name[CS_NAME_LEN];
1414233bcb41SElad Wexler 	ssize_t ret;
14157eaeb343SThomas Gleixner 
141603e13cf5SThomas Gleixner 	ret = sysfs_get_uname(buf, name, count);
14177eaeb343SThomas Gleixner 	if (ret < 0)
14187eaeb343SThomas Gleixner 		return ret;
14197eaeb343SThomas Gleixner 
14207eaeb343SThomas Gleixner 	ret = -ENODEV;
14217eaeb343SThomas Gleixner 	mutex_lock(&clocksource_mutex);
14227eaeb343SThomas Gleixner 	list_for_each_entry(cs, &clocksource_list, list) {
14237eaeb343SThomas Gleixner 		if (strcmp(cs->name, name))
14247eaeb343SThomas Gleixner 			continue;
14257eaeb343SThomas Gleixner 		ret = clocksource_unbind(cs);
14267eaeb343SThomas Gleixner 		break;
14277eaeb343SThomas Gleixner 	}
14287eaeb343SThomas Gleixner 	mutex_unlock(&clocksource_mutex);
14297eaeb343SThomas Gleixner 
14307eaeb343SThomas Gleixner 	return ret ? ret : count;
14317eaeb343SThomas Gleixner }
1432e87821d1SBaolin Wang static DEVICE_ATTR_WO(unbind_clocksource);
14337eaeb343SThomas Gleixner 
14347eaeb343SThomas Gleixner /**
1435e87821d1SBaolin Wang  * available_clocksource_show - sysfs interface for listing clocksource
1436734efb46Sjohn stultz  * @dev:	unused
1437b1b73d09SKusanagi Kouichi  * @attr:	unused
1438734efb46Sjohn stultz  * @buf:	char buffer to be filled with clocksource list
1439734efb46Sjohn stultz  *
1440734efb46Sjohn stultz  * Provides sysfs interface for listing registered clocksources
1441734efb46Sjohn stultz  */
available_clocksource_show(struct device * dev,struct device_attribute * attr,char * buf)1442e87821d1SBaolin Wang static ssize_t available_clocksource_show(struct device *dev,
1443d369a5d8SKay Sievers 					  struct device_attribute *attr,
14444a0b2b4dSAndi Kleen 					  char *buf)
1445734efb46Sjohn stultz {
14462e197586SMatthias Kaehlcke 	struct clocksource *src;
14475e2cb101SMiao Xie 	ssize_t count = 0;
1448734efb46Sjohn stultz 
144975c5158fSMartin Schwidefsky 	mutex_lock(&clocksource_mutex);
14502e197586SMatthias Kaehlcke 	list_for_each_entry(src, &clocksource_list, list) {
1451cd6d95d8SThomas Gleixner 		/*
1452cd6d95d8SThomas Gleixner 		 * Don't show non-HRES clocksource if the tick code is
1453cd6d95d8SThomas Gleixner 		 * in one shot mode (highres=on or nohz=on)
1454cd6d95d8SThomas Gleixner 		 */
1455cd6d95d8SThomas Gleixner 		if (!tick_oneshot_mode_active() ||
14563f68535aSjohn stultz 		    (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
14575e2cb101SMiao Xie 			count += snprintf(buf + count,
14585e2cb101SMiao Xie 				  max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
14595e2cb101SMiao Xie 				  "%s ", src->name);
1460734efb46Sjohn stultz 	}
146175c5158fSMartin Schwidefsky 	mutex_unlock(&clocksource_mutex);
1462734efb46Sjohn stultz 
14635e2cb101SMiao Xie 	count += snprintf(buf + count,
14645e2cb101SMiao Xie 			  max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1465734efb46Sjohn stultz 
14665e2cb101SMiao Xie 	return count;
1467734efb46Sjohn stultz }
1468e87821d1SBaolin Wang static DEVICE_ATTR_RO(available_clocksource);
1469734efb46Sjohn stultz 
147027263e8dSBaolin Wang static struct attribute *clocksource_attrs[] = {
147127263e8dSBaolin Wang 	&dev_attr_current_clocksource.attr,
147227263e8dSBaolin Wang 	&dev_attr_unbind_clocksource.attr,
147327263e8dSBaolin Wang 	&dev_attr_available_clocksource.attr,
147427263e8dSBaolin Wang 	NULL
147527263e8dSBaolin Wang };
147627263e8dSBaolin Wang ATTRIBUTE_GROUPS(clocksource);
147727263e8dSBaolin Wang 
14782bc7fc24SRicardo B. Marliere static const struct bus_type clocksource_subsys = {
1479af5ca3f4SKay Sievers 	.name = "clocksource",
1480d369a5d8SKay Sievers 	.dev_name = "clocksource",
1481734efb46Sjohn stultz };
1482734efb46Sjohn stultz 
1483d369a5d8SKay Sievers static struct device device_clocksource = {
1484734efb46Sjohn stultz 	.id	= 0,
1485d369a5d8SKay Sievers 	.bus	= &clocksource_subsys,
148627263e8dSBaolin Wang 	.groups	= clocksource_groups,
1487734efb46Sjohn stultz };
1488734efb46Sjohn stultz 
init_clocksource_sysfs(void)1489ad596171Sjohn stultz static int __init init_clocksource_sysfs(void)
1490734efb46Sjohn stultz {
1491d369a5d8SKay Sievers 	int error = subsys_system_register(&clocksource_subsys, NULL);
1492734efb46Sjohn stultz 
1493734efb46Sjohn stultz 	if (!error)
1494d369a5d8SKay Sievers 		error = device_register(&device_clocksource);
149527263e8dSBaolin Wang 
1496734efb46Sjohn stultz 	return error;
1497734efb46Sjohn stultz }
1498734efb46Sjohn stultz 
1499734efb46Sjohn stultz device_initcall(init_clocksource_sysfs);
15002b013700SDaniel Walker #endif /* CONFIG_SYSFS */
1501734efb46Sjohn stultz 
1502734efb46Sjohn stultz /**
1503734efb46Sjohn stultz  * boot_override_clocksource - boot clock override
1504734efb46Sjohn stultz  * @str:	override name
1505734efb46Sjohn stultz  *
1506734efb46Sjohn stultz  * Takes a clocksource= boot argument and uses it
1507734efb46Sjohn stultz  * as the clocksource override name.
1508734efb46Sjohn stultz  */
boot_override_clocksource(char * str)1509734efb46Sjohn stultz static int __init boot_override_clocksource(char* str)
1510734efb46Sjohn stultz {
151175c5158fSMartin Schwidefsky 	mutex_lock(&clocksource_mutex);
1512734efb46Sjohn stultz 	if (str)
1513fc661d0aSThorsten Blum 		strscpy(override_name, str);
151475c5158fSMartin Schwidefsky 	mutex_unlock(&clocksource_mutex);
1515734efb46Sjohn stultz 	return 1;
1516734efb46Sjohn stultz }
1517734efb46Sjohn stultz 
1518734efb46Sjohn stultz __setup("clocksource=", boot_override_clocksource);
1519734efb46Sjohn stultz 
1520734efb46Sjohn stultz /**
1521734efb46Sjohn stultz  * boot_override_clock - Compatibility layer for deprecated boot option
1522734efb46Sjohn stultz  * @str:	override name
1523734efb46Sjohn stultz  *
1524734efb46Sjohn stultz  * DEPRECATED! Takes a clock= boot argument and uses it
1525734efb46Sjohn stultz  * as the clocksource override name
1526734efb46Sjohn stultz  */
boot_override_clock(char * str)1527734efb46Sjohn stultz static int __init boot_override_clock(char* str)
1528734efb46Sjohn stultz {
15295d0cf410Sjohn stultz 	if (!strcmp(str, "pmtmr")) {
153045bbfe64SJoe Perches 		pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
15315d0cf410Sjohn stultz 		return boot_override_clocksource("acpi_pm");
15325d0cf410Sjohn stultz 	}
153345bbfe64SJoe Perches 	pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1534734efb46Sjohn stultz 	return boot_override_clocksource(str);
1535734efb46Sjohn stultz }
1536734efb46Sjohn stultz 
1537734efb46Sjohn stultz __setup("clock=", boot_override_clock);
1538