135728b82SThomas Gleixner // SPDX-License-Identifier: GPL-2.0+
2734efb46Sjohn stultz /*
3734efb46Sjohn stultz * This file contains the functions which manage clocksource drivers.
4734efb46Sjohn stultz *
5734efb46Sjohn stultz * Copyright (C) 2004, 2005 IBM, John Stultz ([email protected])
6734efb46Sjohn stultz */
7734efb46Sjohn stultz
845bbfe64SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
945bbfe64SJoe Perches
10d369a5d8SKay Sievers #include <linux/device.h>
11734efb46Sjohn stultz #include <linux/clocksource.h>
12734efb46Sjohn stultz #include <linux/init.h>
13734efb46Sjohn stultz #include <linux/module.h>
14dc29a365SMathieu Desnoyers #include <linux/sched.h> /* for spin_unlock_irq() using preempt_count() m68k */
1579bf2bb3SThomas Gleixner #include <linux/tick.h>
1601548f4dSMartin Schwidefsky #include <linux/kthread.h>
17fa218f1cSPaul E. McKenney #include <linux/prandom.h>
18fa218f1cSPaul E. McKenney #include <linux/cpu.h>
19734efb46Sjohn stultz
20c1797bafSThomas Gleixner #include "tick-internal.h"
213a978377SThomas Gleixner #include "timekeeping_internal.h"
2203e13cf5SThomas Gleixner
23bafffd56SDr. David Alan Gilbert static void clocksource_enqueue(struct clocksource *cs);
24bafffd56SDr. David Alan Gilbert
cycles_to_nsec_safe(struct clocksource * cs,u64 start,u64 end)25d0304569SAdrian Hunter static noinline u64 cycles_to_nsec_safe(struct clocksource *cs, u64 start, u64 end)
26d0304569SAdrian Hunter {
2776031d95SThomas Gleixner u64 delta = clocksource_delta(end, start, cs->mask, cs->max_raw_delta);
28d0304569SAdrian Hunter
29d0304569SAdrian Hunter if (likely(delta < cs->max_cycles))
30d0304569SAdrian Hunter return clocksource_cyc2ns(delta, cs->mult, cs->shift);
31d0304569SAdrian Hunter
32d0304569SAdrian Hunter return mul_u64_u32_shr(delta, cs->mult, cs->shift);
33d0304569SAdrian Hunter }
34d0304569SAdrian Hunter
357d2f944aSThomas Gleixner /**
367d2f944aSThomas Gleixner * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
377d2f944aSThomas Gleixner * @mult: pointer to mult variable
387d2f944aSThomas Gleixner * @shift: pointer to shift variable
397d2f944aSThomas Gleixner * @from: frequency to convert from
407d2f944aSThomas Gleixner * @to: frequency to convert to
415fdade95SNicolas Pitre * @maxsec: guaranteed runtime conversion range in seconds
427d2f944aSThomas Gleixner *
437d2f944aSThomas Gleixner * The function evaluates the shift/mult pair for the scaled math
447d2f944aSThomas Gleixner * operations of clocksources and clockevents.
457d2f944aSThomas Gleixner *
467d2f944aSThomas Gleixner * @to and @from are frequency values in HZ. For clock sources @to is
477d2f944aSThomas Gleixner * NSEC_PER_SEC == 1GHz and @from is the counter frequency. For clock
487d2f944aSThomas Gleixner * event @to is the counter frequency and @from is NSEC_PER_SEC.
497d2f944aSThomas Gleixner *
505fdade95SNicolas Pitre * The @maxsec conversion range argument controls the time frame in
517d2f944aSThomas Gleixner * seconds which must be covered by the runtime conversion with the
527d2f944aSThomas Gleixner * calculated mult and shift factors. This guarantees that no 64bit
537d2f944aSThomas Gleixner * overflow happens when the input value of the conversion is
547d2f944aSThomas Gleixner * multiplied with the calculated mult factor. Larger ranges may
554bf07f65SIngo Molnar * reduce the conversion accuracy by choosing smaller mult and shift
567d2f944aSThomas Gleixner * factors.
577d2f944aSThomas Gleixner */
587d2f944aSThomas Gleixner void
clocks_calc_mult_shift(u32 * mult,u32 * shift,u32 from,u32 to,u32 maxsec)595fdade95SNicolas Pitre clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
607d2f944aSThomas Gleixner {
617d2f944aSThomas Gleixner u64 tmp;
627d2f944aSThomas Gleixner u32 sft, sftacc= 32;
637d2f944aSThomas Gleixner
647d2f944aSThomas Gleixner /*
657d2f944aSThomas Gleixner * Calculate the shift factor which is limiting the conversion
667d2f944aSThomas Gleixner * range:
677d2f944aSThomas Gleixner */
685fdade95SNicolas Pitre tmp = ((u64)maxsec * from) >> 32;
697d2f944aSThomas Gleixner while (tmp) {
707d2f944aSThomas Gleixner tmp >>=1;
717d2f944aSThomas Gleixner sftacc--;
727d2f944aSThomas Gleixner }
737d2f944aSThomas Gleixner
747d2f944aSThomas Gleixner /*
757d2f944aSThomas Gleixner * Find the conversion shift/mult pair which has the best
767d2f944aSThomas Gleixner * accuracy and fits the maxsec conversion range:
777d2f944aSThomas Gleixner */
787d2f944aSThomas Gleixner for (sft = 32; sft > 0; sft--) {
797d2f944aSThomas Gleixner tmp = (u64) to << sft;
80b5776c4aSjohn stultz tmp += from / 2;
817d2f944aSThomas Gleixner do_div(tmp, from);
827d2f944aSThomas Gleixner if ((tmp >> sftacc) == 0)
837d2f944aSThomas Gleixner break;
847d2f944aSThomas Gleixner }
857d2f944aSThomas Gleixner *mult = tmp;
867d2f944aSThomas Gleixner *shift = sft;
877d2f944aSThomas Gleixner }
885304121aSMurali Karicheri EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
897d2f944aSThomas Gleixner
90734efb46Sjohn stultz /*[Clocksource internal variables]---------
91734efb46Sjohn stultz * curr_clocksource:
92f1b82746SMartin Schwidefsky * currently selected clocksource.
9339232ed5SBaolin Wang * suspend_clocksource:
9439232ed5SBaolin Wang * used to calculate the suspend time.
95734efb46Sjohn stultz * clocksource_list:
96734efb46Sjohn stultz * linked list with the registered clocksources
9775c5158fSMartin Schwidefsky * clocksource_mutex:
9875c5158fSMartin Schwidefsky * protects manipulations to curr_clocksource and the clocksource_list
99734efb46Sjohn stultz * override_name:
100734efb46Sjohn stultz * Name of the user-specified clocksource.
101734efb46Sjohn stultz */
102f1b82746SMartin Schwidefsky static struct clocksource *curr_clocksource;
10339232ed5SBaolin Wang static struct clocksource *suspend_clocksource;
104734efb46Sjohn stultz static LIST_HEAD(clocksource_list);
10575c5158fSMartin Schwidefsky static DEFINE_MUTEX(clocksource_mutex);
10629b54078SThomas Gleixner static char override_name[CS_NAME_LEN];
10754a6bc0bSThomas Gleixner static int finished_booting;
10839232ed5SBaolin Wang static u64 suspend_start;
109734efb46Sjohn stultz
1102e27e793SPaul E. McKenney /*
111c37e85c1SPaul E. McKenney * Interval: 0.5sec.
112c37e85c1SPaul E. McKenney */
113c37e85c1SPaul E. McKenney #define WATCHDOG_INTERVAL (HZ >> 1)
11464464955SJiri Wiesner #define WATCHDOG_INTERVAL_MAX_NS ((2 * WATCHDOG_INTERVAL) * (NSEC_PER_SEC / HZ))
115c37e85c1SPaul E. McKenney
116c37e85c1SPaul E. McKenney /*
1172e27e793SPaul E. McKenney * Threshold: 0.0312s, when doubled: 0.0625s.
1182e27e793SPaul E. McKenney */
1192e27e793SPaul E. McKenney #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
1202e27e793SPaul E. McKenney
1212e27e793SPaul E. McKenney /*
1222e27e793SPaul E. McKenney * Maximum permissible delay between two readouts of the watchdog
1232e27e793SPaul E. McKenney * clocksource surrounding a read of the clocksource being validated.
1242e27e793SPaul E. McKenney * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
1252e27e793SPaul E. McKenney * a lower bound for cs->uncertainty_margin values when registering clocks.
126c37e85c1SPaul E. McKenney *
127c37e85c1SPaul E. McKenney * The default of 500 parts per million is based on NTP's limits.
128c37e85c1SPaul E. McKenney * If a clocksource is good enough for NTP, it is good enough for us!
12917915131SBorislav Petkov *
13017915131SBorislav Petkov * In other words, by default, even if a clocksource is extremely
13117915131SBorislav Petkov * precise (for example, with a sub-nanosecond period), the maximum
13217915131SBorislav Petkov * permissible skew between the clocksource watchdog and the clocksource
13317915131SBorislav Petkov * under test is not permitted to go below the 500ppm minimum defined
13417915131SBorislav Petkov * by MAX_SKEW_USEC. This 500ppm minimum may be overridden using the
13517915131SBorislav Petkov * CLOCKSOURCE_WATCHDOG_MAX_SKEW_US Kconfig option.
1362e27e793SPaul E. McKenney */
137fc153c1cSWaiman Long #ifdef CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
138fc153c1cSWaiman Long #define MAX_SKEW_USEC CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US
139fc153c1cSWaiman Long #else
140c37e85c1SPaul E. McKenney #define MAX_SKEW_USEC (125 * WATCHDOG_INTERVAL / HZ)
141fc153c1cSWaiman Long #endif
142fc153c1cSWaiman Long
143f33a5d4bSPaul E. McKenney /*
144f33a5d4bSPaul E. McKenney * Default for maximum permissible skew when cs->uncertainty_margin is
145f33a5d4bSPaul E. McKenney * not specified, and the lower bound even when cs->uncertainty_margin
146f33a5d4bSPaul E. McKenney * is specified. This is also the default that is used when registering
147f33a5d4bSPaul E. McKenney * clocks with unspecifed cs->uncertainty_margin, so this macro is used
148f33a5d4bSPaul E. McKenney * even in CONFIG_CLOCKSOURCE_WATCHDOG=n kernels.
149f33a5d4bSPaul E. McKenney */
150fc153c1cSWaiman Long #define WATCHDOG_MAX_SKEW (MAX_SKEW_USEC * NSEC_PER_USEC)
1512e27e793SPaul E. McKenney
1525d8b34fdSThomas Gleixner #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
153f79e0258SMartin Schwidefsky static void clocksource_watchdog_work(struct work_struct *work);
154332962f2SThomas Gleixner static void clocksource_select(void);
155f79e0258SMartin Schwidefsky
1565d8b34fdSThomas Gleixner static LIST_HEAD(watchdog_list);
1575d8b34fdSThomas Gleixner static struct clocksource *watchdog;
1585d8b34fdSThomas Gleixner static struct timer_list watchdog_timer;
159f79e0258SMartin Schwidefsky static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
1605d8b34fdSThomas Gleixner static DEFINE_SPINLOCK(watchdog_lock);
161fb63a0ebSMartin Schwidefsky static int watchdog_running;
1629fb60336SThomas Gleixner static atomic_t watchdog_reset_pending;
16364464955SJiri Wiesner static int64_t watchdog_max_interval;
164b52f52a0SThomas Gleixner
clocksource_watchdog_lock(unsigned long * flags)1650f48b41fSMathieu Malaterre static inline void clocksource_watchdog_lock(unsigned long *flags)
1662aae7bcfSPeter Zijlstra {
1672aae7bcfSPeter Zijlstra spin_lock_irqsave(&watchdog_lock, *flags);
1682aae7bcfSPeter Zijlstra }
1692aae7bcfSPeter Zijlstra
clocksource_watchdog_unlock(unsigned long * flags)1700f48b41fSMathieu Malaterre static inline void clocksource_watchdog_unlock(unsigned long *flags)
1712aae7bcfSPeter Zijlstra {
1722aae7bcfSPeter Zijlstra spin_unlock_irqrestore(&watchdog_lock, *flags);
1732aae7bcfSPeter Zijlstra }
1742aae7bcfSPeter Zijlstra
175e2c631baSPeter Zijlstra static int clocksource_watchdog_kthread(void *data);
176e2c631baSPeter Zijlstra
clocksource_watchdog_work(struct work_struct * work)177e2c631baSPeter Zijlstra static void clocksource_watchdog_work(struct work_struct *work)
178e2c631baSPeter Zijlstra {
179e2c631baSPeter Zijlstra /*
180e2c631baSPeter Zijlstra * We cannot directly run clocksource_watchdog_kthread() here, because
181e2c631baSPeter Zijlstra * clocksource_select() calls timekeeping_notify() which uses
182e2c631baSPeter Zijlstra * stop_machine(). One cannot use stop_machine() from a workqueue() due
183e2c631baSPeter Zijlstra * lock inversions wrt CPU hotplug.
184e2c631baSPeter Zijlstra *
185e2c631baSPeter Zijlstra * Also, we only ever run this work once or twice during the lifetime
186e2c631baSPeter Zijlstra * of the kernel, so there is no point in creating a more permanent
187e2c631baSPeter Zijlstra * kthread for this.
188e2c631baSPeter Zijlstra *
189e2c631baSPeter Zijlstra * If kthread_run fails the next watchdog scan over the
190e2c631baSPeter Zijlstra * watchdog_list will find the unstable clock again.
191e2c631baSPeter Zijlstra */
192e2c631baSPeter Zijlstra kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
193e2c631baSPeter Zijlstra }
194e2c631baSPeter Zijlstra
clocksource_change_rating(struct clocksource * cs,int rating)195bafffd56SDr. David Alan Gilbert static void clocksource_change_rating(struct clocksource *cs, int rating)
196bafffd56SDr. David Alan Gilbert {
197bafffd56SDr. David Alan Gilbert list_del(&cs->list);
198bafffd56SDr. David Alan Gilbert cs->rating = rating;
199bafffd56SDr. David Alan Gilbert clocksource_enqueue(cs);
200bafffd56SDr. David Alan Gilbert }
201bafffd56SDr. David Alan Gilbert
__clocksource_unstable(struct clocksource * cs)2027285dd7fSThomas Gleixner static void __clocksource_unstable(struct clocksource *cs)
2037285dd7fSThomas Gleixner {
2047285dd7fSThomas Gleixner cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
2057285dd7fSThomas Gleixner cs->flags |= CLOCK_SOURCE_UNSTABLE;
20612907fbbSThomas Gleixner
207cd2af07dSPeter Zijlstra /*
208e2c631baSPeter Zijlstra * If the clocksource is registered clocksource_watchdog_kthread() will
209cd2af07dSPeter Zijlstra * re-rate and re-select.
210cd2af07dSPeter Zijlstra */
211cd2af07dSPeter Zijlstra if (list_empty(&cs->list)) {
212cd2af07dSPeter Zijlstra cs->rating = 0;
2132aae7bcfSPeter Zijlstra return;
214cd2af07dSPeter Zijlstra }
2152aae7bcfSPeter Zijlstra
21612907fbbSThomas Gleixner if (cs->mark_unstable)
21712907fbbSThomas Gleixner cs->mark_unstable(cs);
21812907fbbSThomas Gleixner
219e2c631baSPeter Zijlstra /* kick clocksource_watchdog_kthread() */
22054a6bc0bSThomas Gleixner if (finished_booting)
2217285dd7fSThomas Gleixner schedule_work(&watchdog_work);
2227285dd7fSThomas Gleixner }
2237285dd7fSThomas Gleixner
2247285dd7fSThomas Gleixner /**
2257285dd7fSThomas Gleixner * clocksource_mark_unstable - mark clocksource unstable via watchdog
2267285dd7fSThomas Gleixner * @cs: clocksource to be marked unstable
2277285dd7fSThomas Gleixner *
2287dba33c6SPeter Zijlstra * This function is called by the x86 TSC code to mark clocksources as unstable;
229e2c631baSPeter Zijlstra * it defers demotion and re-selection to a kthread.
2307285dd7fSThomas Gleixner */
clocksource_mark_unstable(struct clocksource * cs)2317285dd7fSThomas Gleixner void clocksource_mark_unstable(struct clocksource *cs)
2327285dd7fSThomas Gleixner {
2337285dd7fSThomas Gleixner unsigned long flags;
2347285dd7fSThomas Gleixner
2357285dd7fSThomas Gleixner spin_lock_irqsave(&watchdog_lock, flags);
2367285dd7fSThomas Gleixner if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
2372aae7bcfSPeter Zijlstra if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
2387285dd7fSThomas Gleixner list_add(&cs->wd_list, &watchdog_list);
2397285dd7fSThomas Gleixner __clocksource_unstable(cs);
2407285dd7fSThomas Gleixner }
2417285dd7fSThomas Gleixner spin_unlock_irqrestore(&watchdog_lock, flags);
2425d8b34fdSThomas Gleixner }
2435d8b34fdSThomas Gleixner
244fa218f1cSPaul E. McKenney static int verify_n_cpus = 8;
245fa218f1cSPaul E. McKenney module_param(verify_n_cpus, int, 0644);
246db3a34e1SPaul E. McKenney
247c86ff8c5SWaiman Long enum wd_read_status {
248c86ff8c5SWaiman Long WD_READ_SUCCESS,
249c86ff8c5SWaiman Long WD_READ_UNSTABLE,
250c86ff8c5SWaiman Long WD_READ_SKIP
251c86ff8c5SWaiman Long };
252c86ff8c5SWaiman Long
cs_watchdog_read(struct clocksource * cs,u64 * csnow,u64 * wdnow)253c86ff8c5SWaiman Long static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
254db3a34e1SPaul E. McKenney {
2554ac1dd32SPaul E. McKenney int64_t md = 2 * watchdog->uncertainty_margin;
2562ed08e4bSFeng Tang unsigned int nretries, max_retries;
257c86ff8c5SWaiman Long int64_t wd_delay, wd_seq_delay;
258d0304569SAdrian Hunter u64 wd_end, wd_end2;
259db3a34e1SPaul E. McKenney
2602ed08e4bSFeng Tang max_retries = clocksource_get_max_watchdog_retry();
2612ed08e4bSFeng Tang for (nretries = 0; nretries <= max_retries; nretries++) {
262db3a34e1SPaul E. McKenney local_irq_disable();
263db3a34e1SPaul E. McKenney *wdnow = watchdog->read(watchdog);
264db3a34e1SPaul E. McKenney *csnow = cs->read(cs);
265db3a34e1SPaul E. McKenney wd_end = watchdog->read(watchdog);
266c86ff8c5SWaiman Long wd_end2 = watchdog->read(watchdog);
267db3a34e1SPaul E. McKenney local_irq_enable();
268db3a34e1SPaul E. McKenney
269d0304569SAdrian Hunter wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end);
2704ac1dd32SPaul E. McKenney if (wd_delay <= md + cs->uncertainty_margin) {
271f2655ac2SPaul E. McKenney if (nretries > 1 && nretries >= max_retries) {
272db3a34e1SPaul E. McKenney pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
273db3a34e1SPaul E. McKenney smp_processor_id(), watchdog->name, nretries);
274db3a34e1SPaul E. McKenney }
275c86ff8c5SWaiman Long return WD_READ_SUCCESS;
276db3a34e1SPaul E. McKenney }
277c86ff8c5SWaiman Long
278c86ff8c5SWaiman Long /*
279c86ff8c5SWaiman Long * Now compute delay in consecutive watchdog read to see if
280c86ff8c5SWaiman Long * there is too much external interferences that cause
281c86ff8c5SWaiman Long * significant delay in reading both clocksource and watchdog.
282c86ff8c5SWaiman Long *
2834ac1dd32SPaul E. McKenney * If consecutive WD read-back delay > md, report
2844ac1dd32SPaul E. McKenney * system busy, reinit the watchdog and skip the current
285c86ff8c5SWaiman Long * watchdog test.
286c86ff8c5SWaiman Long */
287d0304569SAdrian Hunter wd_seq_delay = cycles_to_nsec_safe(watchdog, wd_end, wd_end2);
2884ac1dd32SPaul E. McKenney if (wd_seq_delay > md)
289c86ff8c5SWaiman Long goto skip_test;
290db3a34e1SPaul E. McKenney }
291db3a34e1SPaul E. McKenney
292f092eb34SPaul E. McKenney pr_warn("timekeeping watchdog on CPU%d: wd-%s-wd excessive read-back delay of %lldns vs. limit of %ldns, wd-wd read-back delay only %lldns, attempt %d, marking %s unstable\n",
293f092eb34SPaul E. McKenney smp_processor_id(), cs->name, wd_delay, WATCHDOG_MAX_SKEW, wd_seq_delay, nretries, cs->name);
294c86ff8c5SWaiman Long return WD_READ_UNSTABLE;
295c86ff8c5SWaiman Long
296c86ff8c5SWaiman Long skip_test:
297c86ff8c5SWaiman Long pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
298c86ff8c5SWaiman Long smp_processor_id(), watchdog->name, wd_seq_delay);
299c86ff8c5SWaiman Long pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
300c86ff8c5SWaiman Long cs->name, wd_delay);
301c86ff8c5SWaiman Long return WD_READ_SKIP;
302db3a34e1SPaul E. McKenney }
303db3a34e1SPaul E. McKenney
3047560c02bSPaul E. McKenney static u64 csnow_mid;
3057560c02bSPaul E. McKenney static cpumask_t cpus_ahead;
3067560c02bSPaul E. McKenney static cpumask_t cpus_behind;
307fa218f1cSPaul E. McKenney static cpumask_t cpus_chosen;
308fa218f1cSPaul E. McKenney
clocksource_verify_choose_cpus(void)309fa218f1cSPaul E. McKenney static void clocksource_verify_choose_cpus(void)
310fa218f1cSPaul E. McKenney {
311fa218f1cSPaul E. McKenney int cpu, i, n = verify_n_cpus;
312fa218f1cSPaul E. McKenney
313fa218f1cSPaul E. McKenney if (n < 0) {
314fa218f1cSPaul E. McKenney /* Check all of the CPUs. */
315fa218f1cSPaul E. McKenney cpumask_copy(&cpus_chosen, cpu_online_mask);
316fa218f1cSPaul E. McKenney cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
317fa218f1cSPaul E. McKenney return;
318fa218f1cSPaul E. McKenney }
319fa218f1cSPaul E. McKenney
320fa218f1cSPaul E. McKenney /* If no checking desired, or no other CPU to check, leave. */
321fa218f1cSPaul E. McKenney cpumask_clear(&cpus_chosen);
322fa218f1cSPaul E. McKenney if (n == 0 || num_online_cpus() <= 1)
323fa218f1cSPaul E. McKenney return;
324fa218f1cSPaul E. McKenney
325fa218f1cSPaul E. McKenney /* Make sure to select at least one CPU other than the current CPU. */
3269b51d9d8SYury Norov cpu = cpumask_first(cpu_online_mask);
327fa218f1cSPaul E. McKenney if (cpu == smp_processor_id())
328fa218f1cSPaul E. McKenney cpu = cpumask_next(cpu, cpu_online_mask);
329fa218f1cSPaul E. McKenney if (WARN_ON_ONCE(cpu >= nr_cpu_ids))
330fa218f1cSPaul E. McKenney return;
331fa218f1cSPaul E. McKenney cpumask_set_cpu(cpu, &cpus_chosen);
332fa218f1cSPaul E. McKenney
333fa218f1cSPaul E. McKenney /* Force a sane value for the boot parameter. */
334fa218f1cSPaul E. McKenney if (n > nr_cpu_ids)
335fa218f1cSPaul E. McKenney n = nr_cpu_ids;
336fa218f1cSPaul E. McKenney
337fa218f1cSPaul E. McKenney /*
338fa218f1cSPaul E. McKenney * Randomly select the specified number of CPUs. If the same
339fa218f1cSPaul E. McKenney * CPU is selected multiple times, that CPU is checked only once,
340fa218f1cSPaul E. McKenney * and no replacement CPU is selected. This gracefully handles
341fa218f1cSPaul E. McKenney * situations where verify_n_cpus is greater than the number of
342fa218f1cSPaul E. McKenney * CPUs that are currently online.
343fa218f1cSPaul E. McKenney */
344fa218f1cSPaul E. McKenney for (i = 1; i < n; i++) {
3458032bf12SJason A. Donenfeld cpu = get_random_u32_below(nr_cpu_ids);
346fa218f1cSPaul E. McKenney cpu = cpumask_next(cpu - 1, cpu_online_mask);
347fa218f1cSPaul E. McKenney if (cpu >= nr_cpu_ids)
3489b51d9d8SYury Norov cpu = cpumask_first(cpu_online_mask);
349fa218f1cSPaul E. McKenney if (!WARN_ON_ONCE(cpu >= nr_cpu_ids))
350fa218f1cSPaul E. McKenney cpumask_set_cpu(cpu, &cpus_chosen);
351fa218f1cSPaul E. McKenney }
352fa218f1cSPaul E. McKenney
353fa218f1cSPaul E. McKenney /* Don't verify ourselves. */
354fa218f1cSPaul E. McKenney cpumask_clear_cpu(smp_processor_id(), &cpus_chosen);
355fa218f1cSPaul E. McKenney }
3567560c02bSPaul E. McKenney
clocksource_verify_one_cpu(void * csin)3577560c02bSPaul E. McKenney static void clocksource_verify_one_cpu(void *csin)
3587560c02bSPaul E. McKenney {
3597560c02bSPaul E. McKenney struct clocksource *cs = (struct clocksource *)csin;
3607560c02bSPaul E. McKenney
3617560c02bSPaul E. McKenney csnow_mid = cs->read(cs);
3627560c02bSPaul E. McKenney }
3637560c02bSPaul E. McKenney
clocksource_verify_percpu(struct clocksource * cs)3641253b9b8SPaul E. McKenney void clocksource_verify_percpu(struct clocksource *cs)
3657560c02bSPaul E. McKenney {
3667560c02bSPaul E. McKenney int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
3677560c02bSPaul E. McKenney u64 csnow_begin, csnow_end;
3687560c02bSPaul E. McKenney int cpu, testcpu;
3697560c02bSPaul E. McKenney s64 delta;
3707560c02bSPaul E. McKenney
371fa218f1cSPaul E. McKenney if (verify_n_cpus == 0)
372fa218f1cSPaul E. McKenney return;
3737560c02bSPaul E. McKenney cpumask_clear(&cpus_ahead);
3747560c02bSPaul E. McKenney cpumask_clear(&cpus_behind);
375698429f9SSebastian Andrzej Siewior cpus_read_lock();
3766bb05a33SWaiman Long migrate_disable();
377fa218f1cSPaul E. McKenney clocksource_verify_choose_cpus();
3788afbcaf8SYury Norov if (cpumask_empty(&cpus_chosen)) {
3796bb05a33SWaiman Long migrate_enable();
380698429f9SSebastian Andrzej Siewior cpus_read_unlock();
381fa218f1cSPaul E. McKenney pr_warn("Not enough CPUs to check clocksource '%s'.\n", cs->name);
382fa218f1cSPaul E. McKenney return;
383fa218f1cSPaul E. McKenney }
3847560c02bSPaul E. McKenney testcpu = smp_processor_id();
3851f566840SWaiman Long pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n",
3861f566840SWaiman Long cs->name, testcpu, cpumask_pr_args(&cpus_chosen));
3876bb05a33SWaiman Long preempt_disable();
388fa218f1cSPaul E. McKenney for_each_cpu(cpu, &cpus_chosen) {
3897560c02bSPaul E. McKenney if (cpu == testcpu)
3907560c02bSPaul E. McKenney continue;
3917560c02bSPaul E. McKenney csnow_begin = cs->read(cs);
3927560c02bSPaul E. McKenney smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
3937560c02bSPaul E. McKenney csnow_end = cs->read(cs);
3947560c02bSPaul E. McKenney delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
3957560c02bSPaul E. McKenney if (delta < 0)
3967560c02bSPaul E. McKenney cpumask_set_cpu(cpu, &cpus_behind);
3977560c02bSPaul E. McKenney delta = (csnow_end - csnow_mid) & cs->mask;
3987560c02bSPaul E. McKenney if (delta < 0)
3997560c02bSPaul E. McKenney cpumask_set_cpu(cpu, &cpus_ahead);
400d0304569SAdrian Hunter cs_nsec = cycles_to_nsec_safe(cs, csnow_begin, csnow_end);
4017560c02bSPaul E. McKenney if (cs_nsec > cs_nsec_max)
4027560c02bSPaul E. McKenney cs_nsec_max = cs_nsec;
4037560c02bSPaul E. McKenney if (cs_nsec < cs_nsec_min)
4047560c02bSPaul E. McKenney cs_nsec_min = cs_nsec;
4057560c02bSPaul E. McKenney }
4067560c02bSPaul E. McKenney preempt_enable();
4076bb05a33SWaiman Long migrate_enable();
408698429f9SSebastian Andrzej Siewior cpus_read_unlock();
4097560c02bSPaul E. McKenney if (!cpumask_empty(&cpus_ahead))
4107560c02bSPaul E. McKenney pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
4117560c02bSPaul E. McKenney cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
4127560c02bSPaul E. McKenney if (!cpumask_empty(&cpus_behind))
4137560c02bSPaul E. McKenney pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n",
4147560c02bSPaul E. McKenney cpumask_pr_args(&cpus_behind), testcpu, cs->name);
4157560c02bSPaul E. McKenney if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
4167560c02bSPaul E. McKenney pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n",
4177560c02bSPaul E. McKenney testcpu, cs_nsec_min, cs_nsec_max, cs->name);
4187560c02bSPaul E. McKenney }
4191253b9b8SPaul E. McKenney EXPORT_SYMBOL_GPL(clocksource_verify_percpu);
4207560c02bSPaul E. McKenney
clocksource_reset_watchdog(void)421b7082cdfSFeng Tang static inline void clocksource_reset_watchdog(void)
422b7082cdfSFeng Tang {
423b7082cdfSFeng Tang struct clocksource *cs;
424b7082cdfSFeng Tang
425b7082cdfSFeng Tang list_for_each_entry(cs, &watchdog_list, wd_list)
426b7082cdfSFeng Tang cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
427b7082cdfSFeng Tang }
428b7082cdfSFeng Tang
429b7082cdfSFeng Tang
clocksource_watchdog(struct timer_list * unused)430e99e88a9SKees Cook static void clocksource_watchdog(struct timer_list *unused)
4315d8b34fdSThomas Gleixner {
43264464955SJiri Wiesner int64_t wd_nsec, cs_nsec, interval;
433d0304569SAdrian Hunter u64 csnow, wdnow, cslast, wdlast;
4349fb60336SThomas Gleixner int next_cpu, reset_pending;
435db3a34e1SPaul E. McKenney struct clocksource *cs;
436c86ff8c5SWaiman Long enum wd_read_status read_ret;
437b7082cdfSFeng Tang unsigned long extra_wait = 0;
4382e27e793SPaul E. McKenney u32 md;
4395d8b34fdSThomas Gleixner
4405d8b34fdSThomas Gleixner spin_lock(&watchdog_lock);
441fb63a0ebSMartin Schwidefsky if (!watchdog_running)
442fb63a0ebSMartin Schwidefsky goto out;
4435d8b34fdSThomas Gleixner
4449fb60336SThomas Gleixner reset_pending = atomic_read(&watchdog_reset_pending);
4459fb60336SThomas Gleixner
446c55c87c8SMartin Schwidefsky list_for_each_entry(cs, &watchdog_list, wd_list) {
447c55c87c8SMartin Schwidefsky
448c55c87c8SMartin Schwidefsky /* Clocksource already marked unstable? */
44901548f4dSMartin Schwidefsky if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
45054a6bc0bSThomas Gleixner if (finished_booting)
45101548f4dSMartin Schwidefsky schedule_work(&watchdog_work);
452c55c87c8SMartin Schwidefsky continue;
45301548f4dSMartin Schwidefsky }
454c55c87c8SMartin Schwidefsky
455c86ff8c5SWaiman Long read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
456c86ff8c5SWaiman Long
457b7082cdfSFeng Tang if (read_ret == WD_READ_UNSTABLE) {
458db3a34e1SPaul E. McKenney /* Clock readout unreliable, so give it up. */
459db3a34e1SPaul E. McKenney __clocksource_unstable(cs);
460db3a34e1SPaul E. McKenney continue;
461db3a34e1SPaul E. McKenney }
462b52f52a0SThomas Gleixner
463b7082cdfSFeng Tang /*
464b7082cdfSFeng Tang * When WD_READ_SKIP is returned, it means the system is likely
465b7082cdfSFeng Tang * under very heavy load, where the latency of reading
466b7082cdfSFeng Tang * watchdog/clocksource is very big, and affect the accuracy of
467b7082cdfSFeng Tang * watchdog check. So give system some space and suspend the
468b7082cdfSFeng Tang * watchdog check for 5 minutes.
469b7082cdfSFeng Tang */
470b7082cdfSFeng Tang if (read_ret == WD_READ_SKIP) {
471b7082cdfSFeng Tang /*
472b7082cdfSFeng Tang * As the watchdog timer will be suspended, and
473b7082cdfSFeng Tang * cs->last could keep unchanged for 5 minutes, reset
474b7082cdfSFeng Tang * the counters.
475b7082cdfSFeng Tang */
476b7082cdfSFeng Tang clocksource_reset_watchdog();
477b7082cdfSFeng Tang extra_wait = HZ * 300;
478b7082cdfSFeng Tang break;
479b7082cdfSFeng Tang }
480b7082cdfSFeng Tang
4818cf4e750SMartin Schwidefsky /* Clocksource initialized ? */
4829fb60336SThomas Gleixner if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
4839fb60336SThomas Gleixner atomic_read(&watchdog_reset_pending)) {
4848cf4e750SMartin Schwidefsky cs->flags |= CLOCK_SOURCE_WATCHDOG;
485b5199515SThomas Gleixner cs->wd_last = wdnow;
486b5199515SThomas Gleixner cs->cs_last = csnow;
487b52f52a0SThomas Gleixner continue;
488b52f52a0SThomas Gleixner }
489b52f52a0SThomas Gleixner
490d0304569SAdrian Hunter wd_nsec = cycles_to_nsec_safe(watchdog, cs->wd_last, wdnow);
491d0304569SAdrian Hunter cs_nsec = cycles_to_nsec_safe(cs, cs->cs_last, csnow);
4920b046b21SJohn Stultz wdlast = cs->wd_last; /* save these in case we print them */
4930b046b21SJohn Stultz cslast = cs->cs_last;
494b5199515SThomas Gleixner cs->cs_last = csnow;
495b5199515SThomas Gleixner cs->wd_last = wdnow;
496b5199515SThomas Gleixner
4979fb60336SThomas Gleixner if (atomic_read(&watchdog_reset_pending))
4989fb60336SThomas Gleixner continue;
4999fb60336SThomas Gleixner
50064464955SJiri Wiesner /*
50164464955SJiri Wiesner * The processing of timer softirqs can get delayed (usually
50264464955SJiri Wiesner * on account of ksoftirqd not getting to run in a timely
50364464955SJiri Wiesner * manner), which causes the watchdog interval to stretch.
50464464955SJiri Wiesner * Skew detection may fail for longer watchdog intervals
50564464955SJiri Wiesner * on account of fixed margins being used.
50664464955SJiri Wiesner * Some clocksources, e.g. acpi_pm, cannot tolerate
50764464955SJiri Wiesner * watchdog intervals longer than a few seconds.
50864464955SJiri Wiesner */
50964464955SJiri Wiesner interval = max(cs_nsec, wd_nsec);
51064464955SJiri Wiesner if (unlikely(interval > WATCHDOG_INTERVAL_MAX_NS)) {
51164464955SJiri Wiesner if (system_state > SYSTEM_SCHEDULING &&
51264464955SJiri Wiesner interval > 2 * watchdog_max_interval) {
51364464955SJiri Wiesner watchdog_max_interval = interval;
51464464955SJiri Wiesner pr_warn("Long readout interval, skipping watchdog check: cs_nsec: %lld wd_nsec: %lld\n",
51564464955SJiri Wiesner cs_nsec, wd_nsec);
51664464955SJiri Wiesner }
51764464955SJiri Wiesner watchdog_timer.expires = jiffies;
51864464955SJiri Wiesner continue;
51964464955SJiri Wiesner }
52064464955SJiri Wiesner
521b5199515SThomas Gleixner /* Check the deviation from the watchdog clocksource. */
5222e27e793SPaul E. McKenney md = cs->uncertainty_margin + watchdog->uncertainty_margin;
5232e27e793SPaul E. McKenney if (abs(cs_nsec - wd_nsec) > md) {
524e40806e9SPaul E. McKenney s64 cs_wd_msec;
525e40806e9SPaul E. McKenney s64 wd_msec;
526dd029269SPaul E. McKenney u32 wd_rem;
527dd029269SPaul E. McKenney
528390dd67cSSeiichi Ikarashi pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
529390dd67cSSeiichi Ikarashi smp_processor_id(), cs->name);
53022a22383SFeng Tang pr_warn(" '%s' wd_nsec: %lld wd_now: %llx wd_last: %llx mask: %llx\n",
53122a22383SFeng Tang watchdog->name, wd_nsec, wdnow, wdlast, watchdog->mask);
53222a22383SFeng Tang pr_warn(" '%s' cs_nsec: %lld cs_now: %llx cs_last: %llx mask: %llx\n",
53322a22383SFeng Tang cs->name, cs_nsec, csnow, cslast, cs->mask);
534e40806e9SPaul E. McKenney cs_wd_msec = div_s64_rem(cs_nsec - wd_nsec, 1000 * 1000, &wd_rem);
535e40806e9SPaul E. McKenney wd_msec = div_s64_rem(wd_nsec, 1000 * 1000, &wd_rem);
536dd029269SPaul E. McKenney pr_warn(" Clocksource '%s' skewed %lld ns (%lld ms) over watchdog '%s' interval of %lld ns (%lld ms)\n",
537dd029269SPaul E. McKenney cs->name, cs_nsec - wd_nsec, cs_wd_msec, watchdog->name, wd_nsec, wd_msec);
538fa218f1cSPaul E. McKenney if (curr_clocksource == cs)
539fa218f1cSPaul E. McKenney pr_warn(" '%s' is current clocksource.\n", cs->name);
540fa218f1cSPaul E. McKenney else if (curr_clocksource)
541fa218f1cSPaul E. McKenney pr_warn(" '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
542fa218f1cSPaul E. McKenney else
543fa218f1cSPaul E. McKenney pr_warn(" No current clocksource.\n");
5440b046b21SJohn Stultz __clocksource_unstable(cs);
5458cf4e750SMartin Schwidefsky continue;
5468cf4e750SMartin Schwidefsky }
5478cf4e750SMartin Schwidefsky
548b421b22bSPeter Zijlstra if (cs == curr_clocksource && cs->tick_stable)
549b421b22bSPeter Zijlstra cs->tick_stable(cs);
550b421b22bSPeter Zijlstra
5518cf4e750SMartin Schwidefsky if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) &&
5528cf4e750SMartin Schwidefsky (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS) &&
5535d8b34fdSThomas Gleixner (watchdog->flags & CLOCK_SOURCE_IS_CONTINUOUS)) {
554332962f2SThomas Gleixner /* Mark it valid for high-res. */
5555d8b34fdSThomas Gleixner cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
556332962f2SThomas Gleixner
55779bf2bb3SThomas Gleixner /*
558332962f2SThomas Gleixner * clocksource_done_booting() will sort it if
559332962f2SThomas Gleixner * finished_booting is not set yet.
56079bf2bb3SThomas Gleixner */
561332962f2SThomas Gleixner if (!finished_booting)
562332962f2SThomas Gleixner continue;
563332962f2SThomas Gleixner
564332962f2SThomas Gleixner /*
565332962f2SThomas Gleixner * If this is not the current clocksource let
566332962f2SThomas Gleixner * the watchdog thread reselect it. Due to the
567332962f2SThomas Gleixner * change to high res this clocksource might
568332962f2SThomas Gleixner * be preferred now. If it is the current
569332962f2SThomas Gleixner * clocksource let the tick code know about
570332962f2SThomas Gleixner * that change.
571332962f2SThomas Gleixner */
572332962f2SThomas Gleixner if (cs != curr_clocksource) {
573332962f2SThomas Gleixner cs->flags |= CLOCK_SOURCE_RESELECT;
574332962f2SThomas Gleixner schedule_work(&watchdog_work);
575332962f2SThomas Gleixner } else {
57679bf2bb3SThomas Gleixner tick_clock_notify();
5775d8b34fdSThomas Gleixner }
5785d8b34fdSThomas Gleixner }
579332962f2SThomas Gleixner }
5805d8b34fdSThomas Gleixner
5816993fc5bSAndi Kleen /*
5829fb60336SThomas Gleixner * We only clear the watchdog_reset_pending, when we did a
5839fb60336SThomas Gleixner * full cycle through all clocksources.
5849fb60336SThomas Gleixner */
5859fb60336SThomas Gleixner if (reset_pending)
5869fb60336SThomas Gleixner atomic_dec(&watchdog_reset_pending);
5879fb60336SThomas Gleixner
5889fb60336SThomas Gleixner /*
589c55c87c8SMartin Schwidefsky * Cycle through CPUs to check if the CPUs stay synchronized
590c55c87c8SMartin Schwidefsky * to each other.
5916993fc5bSAndi Kleen */
592c55c87c8SMartin Schwidefsky next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
593cad0e458SMike Travis if (next_cpu >= nr_cpu_ids)
5946b954823SRusty Russell next_cpu = cpumask_first(cpu_online_mask);
595febac332SKonstantin Khlebnikov
596febac332SKonstantin Khlebnikov /*
597febac332SKonstantin Khlebnikov * Arm timer if not already pending: could race with concurrent
598febac332SKonstantin Khlebnikov * pair clocksource_stop_watchdog() clocksource_start_watchdog().
599febac332SKonstantin Khlebnikov */
600febac332SKonstantin Khlebnikov if (!timer_pending(&watchdog_timer)) {
601b7082cdfSFeng Tang watchdog_timer.expires += WATCHDOG_INTERVAL + extra_wait;
6026993fc5bSAndi Kleen add_timer_on(&watchdog_timer, next_cpu);
603febac332SKonstantin Khlebnikov }
604fb63a0ebSMartin Schwidefsky out:
6055d8b34fdSThomas Gleixner spin_unlock(&watchdog_lock);
6065d8b34fdSThomas Gleixner }
6070f8e8ef7SMartin Schwidefsky
clocksource_start_watchdog(void)608fb63a0ebSMartin Schwidefsky static inline void clocksource_start_watchdog(void)
609fb63a0ebSMartin Schwidefsky {
610fb63a0ebSMartin Schwidefsky if (watchdog_running || !watchdog || list_empty(&watchdog_list))
611fb63a0ebSMartin Schwidefsky return;
612e99e88a9SKees Cook timer_setup(&watchdog_timer, clocksource_watchdog, 0);
613fb63a0ebSMartin Schwidefsky watchdog_timer.expires = jiffies + WATCHDOG_INTERVAL;
614fb63a0ebSMartin Schwidefsky add_timer_on(&watchdog_timer, cpumask_first(cpu_online_mask));
615fb63a0ebSMartin Schwidefsky watchdog_running = 1;
616fb63a0ebSMartin Schwidefsky }
617fb63a0ebSMartin Schwidefsky
clocksource_stop_watchdog(void)618fb63a0ebSMartin Schwidefsky static inline void clocksource_stop_watchdog(void)
619fb63a0ebSMartin Schwidefsky {
620fb63a0ebSMartin Schwidefsky if (!watchdog_running || (watchdog && !list_empty(&watchdog_list)))
621fb63a0ebSMartin Schwidefsky return;
622*8fa7292fSThomas Gleixner timer_delete(&watchdog_timer);
623fb63a0ebSMartin Schwidefsky watchdog_running = 0;
624fb63a0ebSMartin Schwidefsky }
625fb63a0ebSMartin Schwidefsky
clocksource_resume_watchdog(void)626b52f52a0SThomas Gleixner static void clocksource_resume_watchdog(void)
627b52f52a0SThomas Gleixner {
6289fb60336SThomas Gleixner atomic_inc(&watchdog_reset_pending);
629b52f52a0SThomas Gleixner }
630b52f52a0SThomas Gleixner
clocksource_enqueue_watchdog(struct clocksource * cs)631fb63a0ebSMartin Schwidefsky static void clocksource_enqueue_watchdog(struct clocksource *cs)
6325d8b34fdSThomas Gleixner {
6335b9e886aSPeter Zijlstra INIT_LIST_HEAD(&cs->wd_list);
6345b9e886aSPeter Zijlstra
6355d8b34fdSThomas Gleixner if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
636fb63a0ebSMartin Schwidefsky /* cs is a clocksource to be watched. */
6375d8b34fdSThomas Gleixner list_add(&cs->wd_list, &watchdog_list);
638fb63a0ebSMartin Schwidefsky cs->flags &= ~CLOCK_SOURCE_WATCHDOG;
639948ac6d7SThomas Gleixner } else {
640fb63a0ebSMartin Schwidefsky /* cs is a watchdog. */
641948ac6d7SThomas Gleixner if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
6425d8b34fdSThomas Gleixner cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
643bbf66d89SVitaly Kuznetsov }
644bbf66d89SVitaly Kuznetsov }
645bbf66d89SVitaly Kuznetsov
clocksource_select_watchdog(bool fallback)646bbf66d89SVitaly Kuznetsov static void clocksource_select_watchdog(bool fallback)
647bbf66d89SVitaly Kuznetsov {
648bbf66d89SVitaly Kuznetsov struct clocksource *cs, *old_wd;
649bbf66d89SVitaly Kuznetsov unsigned long flags;
650bbf66d89SVitaly Kuznetsov
651bbf66d89SVitaly Kuznetsov spin_lock_irqsave(&watchdog_lock, flags);
652bbf66d89SVitaly Kuznetsov /* save current watchdog */
653bbf66d89SVitaly Kuznetsov old_wd = watchdog;
654bbf66d89SVitaly Kuznetsov if (fallback)
655bbf66d89SVitaly Kuznetsov watchdog = NULL;
656bbf66d89SVitaly Kuznetsov
657bbf66d89SVitaly Kuznetsov list_for_each_entry(cs, &clocksource_list, list) {
658bbf66d89SVitaly Kuznetsov /* cs is a clocksource to be watched. */
659bbf66d89SVitaly Kuznetsov if (cs->flags & CLOCK_SOURCE_MUST_VERIFY)
660bbf66d89SVitaly Kuznetsov continue;
661bbf66d89SVitaly Kuznetsov
662bbf66d89SVitaly Kuznetsov /* Skip current if we were requested for a fallback. */
663bbf66d89SVitaly Kuznetsov if (fallback && cs == old_wd)
664bbf66d89SVitaly Kuznetsov continue;
665bbf66d89SVitaly Kuznetsov
666fb63a0ebSMartin Schwidefsky /* Pick the best watchdog. */
667bbf66d89SVitaly Kuznetsov if (!watchdog || cs->rating > watchdog->rating)
6685d8b34fdSThomas Gleixner watchdog = cs;
669bbf66d89SVitaly Kuznetsov }
670bbf66d89SVitaly Kuznetsov /* If we failed to find a fallback restore the old one. */
671bbf66d89SVitaly Kuznetsov if (!watchdog)
672bbf66d89SVitaly Kuznetsov watchdog = old_wd;
673bbf66d89SVitaly Kuznetsov
674bbf66d89SVitaly Kuznetsov /* If we changed the watchdog we need to reset cycles. */
675bbf66d89SVitaly Kuznetsov if (watchdog != old_wd)
6760f8e8ef7SMartin Schwidefsky clocksource_reset_watchdog();
677bbf66d89SVitaly Kuznetsov
678fb63a0ebSMartin Schwidefsky /* Check if the watchdog timer needs to be started. */
679fb63a0ebSMartin Schwidefsky clocksource_start_watchdog();
6805d8b34fdSThomas Gleixner spin_unlock_irqrestore(&watchdog_lock, flags);
6815d8b34fdSThomas Gleixner }
682fb63a0ebSMartin Schwidefsky
clocksource_dequeue_watchdog(struct clocksource * cs)683fb63a0ebSMartin Schwidefsky static void clocksource_dequeue_watchdog(struct clocksource *cs)
684fb63a0ebSMartin Schwidefsky {
685a89c7edbSThomas Gleixner if (cs != watchdog) {
686fb63a0ebSMartin Schwidefsky if (cs->flags & CLOCK_SOURCE_MUST_VERIFY) {
687fb63a0ebSMartin Schwidefsky /* cs is a watched clocksource. */
688fb63a0ebSMartin Schwidefsky list_del_init(&cs->wd_list);
689fb63a0ebSMartin Schwidefsky /* Check if the watchdog timer needs to be stopped. */
690fb63a0ebSMartin Schwidefsky clocksource_stop_watchdog();
691a89c7edbSThomas Gleixner }
692a89c7edbSThomas Gleixner }
693fb63a0ebSMartin Schwidefsky }
694fb63a0ebSMartin Schwidefsky
__clocksource_watchdog_kthread(void)695e2c631baSPeter Zijlstra static int __clocksource_watchdog_kthread(void)
696c55c87c8SMartin Schwidefsky {
697c55c87c8SMartin Schwidefsky struct clocksource *cs, *tmp;
698c55c87c8SMartin Schwidefsky unsigned long flags;
699332962f2SThomas Gleixner int select = 0;
700c55c87c8SMartin Schwidefsky
7017560c02bSPaul E. McKenney /* Do any required per-CPU skew verification. */
7027560c02bSPaul E. McKenney if (curr_clocksource &&
7037560c02bSPaul E. McKenney curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
7047560c02bSPaul E. McKenney curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
7057560c02bSPaul E. McKenney clocksource_verify_percpu(curr_clocksource);
7067560c02bSPaul E. McKenney
707c55c87c8SMartin Schwidefsky spin_lock_irqsave(&watchdog_lock, flags);
708332962f2SThomas Gleixner list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
709c55c87c8SMartin Schwidefsky if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
710c55c87c8SMartin Schwidefsky list_del_init(&cs->wd_list);
711bafffd56SDr. David Alan Gilbert clocksource_change_rating(cs, 0);
712332962f2SThomas Gleixner select = 1;
713332962f2SThomas Gleixner }
714332962f2SThomas Gleixner if (cs->flags & CLOCK_SOURCE_RESELECT) {
715332962f2SThomas Gleixner cs->flags &= ~CLOCK_SOURCE_RESELECT;
716332962f2SThomas Gleixner select = 1;
717332962f2SThomas Gleixner }
718c55c87c8SMartin Schwidefsky }
719c55c87c8SMartin Schwidefsky /* Check if the watchdog timer needs to be stopped. */
720c55c87c8SMartin Schwidefsky clocksource_stop_watchdog();
7216ea41d25SThomas Gleixner spin_unlock_irqrestore(&watchdog_lock, flags);
7226ea41d25SThomas Gleixner
723332962f2SThomas Gleixner return select;
724332962f2SThomas Gleixner }
725332962f2SThomas Gleixner
clocksource_watchdog_kthread(void * data)726e2c631baSPeter Zijlstra static int clocksource_watchdog_kthread(void *data)
727332962f2SThomas Gleixner {
728332962f2SThomas Gleixner mutex_lock(&clocksource_mutex);
729e2c631baSPeter Zijlstra if (__clocksource_watchdog_kthread())
730332962f2SThomas Gleixner clocksource_select();
731d0981a1bSThomas Gleixner mutex_unlock(&clocksource_mutex);
732e2c631baSPeter Zijlstra return 0;
733c55c87c8SMartin Schwidefsky }
734c55c87c8SMartin Schwidefsky
clocksource_is_watchdog(struct clocksource * cs)7357eaeb343SThomas Gleixner static bool clocksource_is_watchdog(struct clocksource *cs)
7367eaeb343SThomas Gleixner {
7377eaeb343SThomas Gleixner return cs == watchdog;
7387eaeb343SThomas Gleixner }
7397eaeb343SThomas Gleixner
740fb63a0ebSMartin Schwidefsky #else /* CONFIG_CLOCKSOURCE_WATCHDOG */
741fb63a0ebSMartin Schwidefsky
clocksource_enqueue_watchdog(struct clocksource * cs)742fb63a0ebSMartin Schwidefsky static void clocksource_enqueue_watchdog(struct clocksource *cs)
7435d8b34fdSThomas Gleixner {
7445d8b34fdSThomas Gleixner if (cs->flags & CLOCK_SOURCE_IS_CONTINUOUS)
7455d8b34fdSThomas Gleixner cs->flags |= CLOCK_SOURCE_VALID_FOR_HRES;
7465d8b34fdSThomas Gleixner }
747b52f52a0SThomas Gleixner
clocksource_select_watchdog(bool fallback)748bbf66d89SVitaly Kuznetsov static void clocksource_select_watchdog(bool fallback) { }
clocksource_dequeue_watchdog(struct clocksource * cs)749fb63a0ebSMartin Schwidefsky static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
clocksource_resume_watchdog(void)750b52f52a0SThomas Gleixner static inline void clocksource_resume_watchdog(void) { }
__clocksource_watchdog_kthread(void)751e2c631baSPeter Zijlstra static inline int __clocksource_watchdog_kthread(void) { return 0; }
clocksource_is_watchdog(struct clocksource * cs)7527eaeb343SThomas Gleixner static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
clocksource_mark_unstable(struct clocksource * cs)753397bbf6dSPrarit Bhargava void clocksource_mark_unstable(struct clocksource *cs) { }
754fb63a0ebSMartin Schwidefsky
clocksource_watchdog_lock(unsigned long * flags)755db6f9e55SMathieu Malaterre static inline void clocksource_watchdog_lock(unsigned long *flags) { }
clocksource_watchdog_unlock(unsigned long * flags)756db6f9e55SMathieu Malaterre static inline void clocksource_watchdog_unlock(unsigned long *flags) { }
7572aae7bcfSPeter Zijlstra
758fb63a0ebSMartin Schwidefsky #endif /* CONFIG_CLOCKSOURCE_WATCHDOG */
7595d8b34fdSThomas Gleixner
clocksource_is_suspend(struct clocksource * cs)76039232ed5SBaolin Wang static bool clocksource_is_suspend(struct clocksource *cs)
76139232ed5SBaolin Wang {
76239232ed5SBaolin Wang return cs == suspend_clocksource;
76339232ed5SBaolin Wang }
76439232ed5SBaolin Wang
__clocksource_suspend_select(struct clocksource * cs)76539232ed5SBaolin Wang static void __clocksource_suspend_select(struct clocksource *cs)
76639232ed5SBaolin Wang {
76739232ed5SBaolin Wang /*
76839232ed5SBaolin Wang * Skip the clocksource which will be stopped in suspend state.
76939232ed5SBaolin Wang */
77039232ed5SBaolin Wang if (!(cs->flags & CLOCK_SOURCE_SUSPEND_NONSTOP))
77139232ed5SBaolin Wang return;
77239232ed5SBaolin Wang
77339232ed5SBaolin Wang /*
77439232ed5SBaolin Wang * The nonstop clocksource can be selected as the suspend clocksource to
77539232ed5SBaolin Wang * calculate the suspend time, so it should not supply suspend/resume
77639232ed5SBaolin Wang * interfaces to suspend the nonstop clocksource when system suspends.
77739232ed5SBaolin Wang */
77839232ed5SBaolin Wang if (cs->suspend || cs->resume) {
77939232ed5SBaolin Wang pr_warn("Nonstop clocksource %s should not supply suspend/resume interfaces\n",
78039232ed5SBaolin Wang cs->name);
78139232ed5SBaolin Wang }
78239232ed5SBaolin Wang
78339232ed5SBaolin Wang /* Pick the best rating. */
78439232ed5SBaolin Wang if (!suspend_clocksource || cs->rating > suspend_clocksource->rating)
78539232ed5SBaolin Wang suspend_clocksource = cs;
78639232ed5SBaolin Wang }
78739232ed5SBaolin Wang
78839232ed5SBaolin Wang /**
78939232ed5SBaolin Wang * clocksource_suspend_select - Select the best clocksource for suspend timing
79039232ed5SBaolin Wang * @fallback: if select a fallback clocksource
79139232ed5SBaolin Wang */
clocksource_suspend_select(bool fallback)79239232ed5SBaolin Wang static void clocksource_suspend_select(bool fallback)
79339232ed5SBaolin Wang {
79439232ed5SBaolin Wang struct clocksource *cs, *old_suspend;
79539232ed5SBaolin Wang
79639232ed5SBaolin Wang old_suspend = suspend_clocksource;
79739232ed5SBaolin Wang if (fallback)
79839232ed5SBaolin Wang suspend_clocksource = NULL;
79939232ed5SBaolin Wang
80039232ed5SBaolin Wang list_for_each_entry(cs, &clocksource_list, list) {
80139232ed5SBaolin Wang /* Skip current if we were requested for a fallback. */
80239232ed5SBaolin Wang if (fallback && cs == old_suspend)
80339232ed5SBaolin Wang continue;
80439232ed5SBaolin Wang
80539232ed5SBaolin Wang __clocksource_suspend_select(cs);
80639232ed5SBaolin Wang }
80739232ed5SBaolin Wang }
80839232ed5SBaolin Wang
80939232ed5SBaolin Wang /**
81039232ed5SBaolin Wang * clocksource_start_suspend_timing - Start measuring the suspend timing
81139232ed5SBaolin Wang * @cs: current clocksource from timekeeping
81239232ed5SBaolin Wang * @start_cycles: current cycles from timekeeping
81339232ed5SBaolin Wang *
81439232ed5SBaolin Wang * This function will save the start cycle values of suspend timer to calculate
81539232ed5SBaolin Wang * the suspend time when resuming system.
81639232ed5SBaolin Wang *
81739232ed5SBaolin Wang * This function is called late in the suspend process from timekeeping_suspend(),
8184bf07f65SIngo Molnar * that means processes are frozen, non-boot cpus and interrupts are disabled
81939232ed5SBaolin Wang * now. It is therefore possible to start the suspend timer without taking the
82039232ed5SBaolin Wang * clocksource mutex.
82139232ed5SBaolin Wang */
clocksource_start_suspend_timing(struct clocksource * cs,u64 start_cycles)82239232ed5SBaolin Wang void clocksource_start_suspend_timing(struct clocksource *cs, u64 start_cycles)
82339232ed5SBaolin Wang {
82439232ed5SBaolin Wang if (!suspend_clocksource)
82539232ed5SBaolin Wang return;
82639232ed5SBaolin Wang
82739232ed5SBaolin Wang /*
82839232ed5SBaolin Wang * If current clocksource is the suspend timer, we should use the
82939232ed5SBaolin Wang * tkr_mono.cycle_last value as suspend_start to avoid same reading
83039232ed5SBaolin Wang * from suspend timer.
83139232ed5SBaolin Wang */
83239232ed5SBaolin Wang if (clocksource_is_suspend(cs)) {
83339232ed5SBaolin Wang suspend_start = start_cycles;
83439232ed5SBaolin Wang return;
83539232ed5SBaolin Wang }
83639232ed5SBaolin Wang
83739232ed5SBaolin Wang if (suspend_clocksource->enable &&
83839232ed5SBaolin Wang suspend_clocksource->enable(suspend_clocksource)) {
83939232ed5SBaolin Wang pr_warn_once("Failed to enable the non-suspend-able clocksource.\n");
84039232ed5SBaolin Wang return;
84139232ed5SBaolin Wang }
84239232ed5SBaolin Wang
84339232ed5SBaolin Wang suspend_start = suspend_clocksource->read(suspend_clocksource);
84439232ed5SBaolin Wang }
84539232ed5SBaolin Wang
84639232ed5SBaolin Wang /**
84739232ed5SBaolin Wang * clocksource_stop_suspend_timing - Stop measuring the suspend timing
84839232ed5SBaolin Wang * @cs: current clocksource from timekeeping
84939232ed5SBaolin Wang * @cycle_now: current cycles from timekeeping
85039232ed5SBaolin Wang *
85139232ed5SBaolin Wang * This function will calculate the suspend time from suspend timer.
85239232ed5SBaolin Wang *
85339232ed5SBaolin Wang * Returns nanoseconds since suspend started, 0 if no usable suspend clocksource.
85439232ed5SBaolin Wang *
85539232ed5SBaolin Wang * This function is called early in the resume process from timekeeping_resume(),
85639232ed5SBaolin Wang * that means there is only one cpu, no processes are running and the interrupts
85739232ed5SBaolin Wang * are disabled. It is therefore possible to stop the suspend timer without
85839232ed5SBaolin Wang * taking the clocksource mutex.
85939232ed5SBaolin Wang */
clocksource_stop_suspend_timing(struct clocksource * cs,u64 cycle_now)86039232ed5SBaolin Wang u64 clocksource_stop_suspend_timing(struct clocksource *cs, u64 cycle_now)
86139232ed5SBaolin Wang {
862d0304569SAdrian Hunter u64 now, nsec = 0;
86339232ed5SBaolin Wang
86439232ed5SBaolin Wang if (!suspend_clocksource)
86539232ed5SBaolin Wang return 0;
86639232ed5SBaolin Wang
86739232ed5SBaolin Wang /*
86839232ed5SBaolin Wang * If current clocksource is the suspend timer, we should use the
86939232ed5SBaolin Wang * tkr_mono.cycle_last value from timekeeping as current cycle to
87039232ed5SBaolin Wang * avoid same reading from suspend timer.
87139232ed5SBaolin Wang */
87239232ed5SBaolin Wang if (clocksource_is_suspend(cs))
87339232ed5SBaolin Wang now = cycle_now;
87439232ed5SBaolin Wang else
87539232ed5SBaolin Wang now = suspend_clocksource->read(suspend_clocksource);
87639232ed5SBaolin Wang
877d0304569SAdrian Hunter if (now > suspend_start)
878d0304569SAdrian Hunter nsec = cycles_to_nsec_safe(suspend_clocksource, suspend_start, now);
87939232ed5SBaolin Wang
88039232ed5SBaolin Wang /*
88139232ed5SBaolin Wang * Disable the suspend timer to save power if current clocksource is
88239232ed5SBaolin Wang * not the suspend timer.
88339232ed5SBaolin Wang */
88439232ed5SBaolin Wang if (!clocksource_is_suspend(cs) && suspend_clocksource->disable)
88539232ed5SBaolin Wang suspend_clocksource->disable(suspend_clocksource);
88639232ed5SBaolin Wang
88739232ed5SBaolin Wang return nsec;
88839232ed5SBaolin Wang }
88939232ed5SBaolin Wang
890734efb46Sjohn stultz /**
891c54a42b1SMagnus Damm * clocksource_suspend - suspend the clocksource(s)
892c54a42b1SMagnus Damm */
clocksource_suspend(void)893c54a42b1SMagnus Damm void clocksource_suspend(void)
894c54a42b1SMagnus Damm {
895c54a42b1SMagnus Damm struct clocksource *cs;
896c54a42b1SMagnus Damm
897c54a42b1SMagnus Damm list_for_each_entry_reverse(cs, &clocksource_list, list)
898c54a42b1SMagnus Damm if (cs->suspend)
899c54a42b1SMagnus Damm cs->suspend(cs);
900c54a42b1SMagnus Damm }
901c54a42b1SMagnus Damm
902c54a42b1SMagnus Damm /**
903b52f52a0SThomas Gleixner * clocksource_resume - resume the clocksource(s)
904b52f52a0SThomas Gleixner */
clocksource_resume(void)905b52f52a0SThomas Gleixner void clocksource_resume(void)
906b52f52a0SThomas Gleixner {
9072e197586SMatthias Kaehlcke struct clocksource *cs;
908b52f52a0SThomas Gleixner
90975c5158fSMartin Schwidefsky list_for_each_entry(cs, &clocksource_list, list)
910b52f52a0SThomas Gleixner if (cs->resume)
91117622339SMagnus Damm cs->resume(cs);
912b52f52a0SThomas Gleixner
913b52f52a0SThomas Gleixner clocksource_resume_watchdog();
914b52f52a0SThomas Gleixner }
915b52f52a0SThomas Gleixner
916b52f52a0SThomas Gleixner /**
9177c3078b6SJason Wessel * clocksource_touch_watchdog - Update watchdog
9187c3078b6SJason Wessel *
9197c3078b6SJason Wessel * Update the watchdog after exception contexts such as kgdb so as not
9207b7422a5SThomas Gleixner * to incorrectly trip the watchdog. This might fail when the kernel
9217b7422a5SThomas Gleixner * was stopped in code which holds watchdog_lock.
9227c3078b6SJason Wessel */
clocksource_touch_watchdog(void)9237c3078b6SJason Wessel void clocksource_touch_watchdog(void)
9247c3078b6SJason Wessel {
9257c3078b6SJason Wessel clocksource_resume_watchdog();
9267c3078b6SJason Wessel }
9277c3078b6SJason Wessel
928734efb46Sjohn stultz /**
929d65670a7SJohn Stultz * clocksource_max_adjustment- Returns max adjustment amount
930d65670a7SJohn Stultz * @cs: Pointer to clocksource
931d65670a7SJohn Stultz *
932d65670a7SJohn Stultz */
clocksource_max_adjustment(struct clocksource * cs)933d65670a7SJohn Stultz static u32 clocksource_max_adjustment(struct clocksource *cs)
934d65670a7SJohn Stultz {
935d65670a7SJohn Stultz u64 ret;
936d65670a7SJohn Stultz /*
93788b28adfSJim Cromie * We won't try to correct for more than 11% adjustments (110,000 ppm),
938d65670a7SJohn Stultz */
939d65670a7SJohn Stultz ret = (u64)cs->mult * 11;
940d65670a7SJohn Stultz do_div(ret,100);
941d65670a7SJohn Stultz return (u32)ret;
942d65670a7SJohn Stultz }
943d65670a7SJohn Stultz
944d65670a7SJohn Stultz /**
94587d8b9ebSStephen Boyd * clocks_calc_max_nsecs - Returns maximum nanoseconds that can be converted
94687d8b9ebSStephen Boyd * @mult: cycle to nanosecond multiplier
94787d8b9ebSStephen Boyd * @shift: cycle to nanosecond divisor (power of two)
94887d8b9ebSStephen Boyd * @maxadj: maximum adjustment value to mult (~11%)
94987d8b9ebSStephen Boyd * @mask: bitmask for two's complement subtraction of non 64 bit counters
950fb82fe2fSJohn Stultz * @max_cyc: maximum cycle value before potential overflow (does not include
951fb82fe2fSJohn Stultz * any safety margin)
952362fde04SJohn Stultz *
9538e56f33fSJohn Stultz * NOTE: This function includes a safety margin of 50%, in other words, we
9548e56f33fSJohn Stultz * return half the number of nanoseconds the hardware counter can technically
9558e56f33fSJohn Stultz * cover. This is done so that we can potentially detect problems caused by
9568e56f33fSJohn Stultz * delayed timers or bad hardware, which might result in time intervals that
957571af55aSZhen Lei * are larger than what the math used can handle without overflows.
95898962465SJon Hunter */
clocks_calc_max_nsecs(u32 mult,u32 shift,u32 maxadj,u64 mask,u64 * max_cyc)959fb82fe2fSJohn Stultz u64 clocks_calc_max_nsecs(u32 mult, u32 shift, u32 maxadj, u64 mask, u64 *max_cyc)
96098962465SJon Hunter {
96198962465SJon Hunter u64 max_nsecs, max_cycles;
96298962465SJon Hunter
96398962465SJon Hunter /*
96498962465SJon Hunter * Calculate the maximum number of cycles that we can pass to the
9656086e346SJohn Stultz * cyc2ns() function without overflowing a 64-bit result.
96698962465SJon Hunter */
9676086e346SJohn Stultz max_cycles = ULLONG_MAX;
9686086e346SJohn Stultz do_div(max_cycles, mult+maxadj);
96998962465SJon Hunter
97098962465SJon Hunter /*
97198962465SJon Hunter * The actual maximum number of cycles we can defer the clocksource is
97287d8b9ebSStephen Boyd * determined by the minimum of max_cycles and mask.
973d65670a7SJohn Stultz * Note: Here we subtract the maxadj to make sure we don't sleep for
974d65670a7SJohn Stultz * too long if there's a large negative adjustment.
97598962465SJon Hunter */
97687d8b9ebSStephen Boyd max_cycles = min(max_cycles, mask);
97787d8b9ebSStephen Boyd max_nsecs = clocksource_cyc2ns(max_cycles, mult - maxadj, shift);
97898962465SJon Hunter
979fb82fe2fSJohn Stultz /* return the max_cycles value as well if requested */
980fb82fe2fSJohn Stultz if (max_cyc)
981fb82fe2fSJohn Stultz *max_cyc = max_cycles;
982fb82fe2fSJohn Stultz
983362fde04SJohn Stultz /* Return 50% of the actual maximum, so we can detect bad values */
984362fde04SJohn Stultz max_nsecs >>= 1;
985362fde04SJohn Stultz
98687d8b9ebSStephen Boyd return max_nsecs;
98787d8b9ebSStephen Boyd }
98887d8b9ebSStephen Boyd
98987d8b9ebSStephen Boyd /**
990fb82fe2fSJohn Stultz * clocksource_update_max_deferment - Updates the clocksource max_idle_ns & max_cycles
991fb82fe2fSJohn Stultz * @cs: Pointer to clocksource to be updated
99287d8b9ebSStephen Boyd *
99387d8b9ebSStephen Boyd */
clocksource_update_max_deferment(struct clocksource * cs)994fb82fe2fSJohn Stultz static inline void clocksource_update_max_deferment(struct clocksource *cs)
99587d8b9ebSStephen Boyd {
996fb82fe2fSJohn Stultz cs->max_idle_ns = clocks_calc_max_nsecs(cs->mult, cs->shift,
997fb82fe2fSJohn Stultz cs->maxadj, cs->mask,
998fb82fe2fSJohn Stultz &cs->max_cycles);
99976031d95SThomas Gleixner
100076031d95SThomas Gleixner /*
100176031d95SThomas Gleixner * Threshold for detecting negative motion in clocksource_delta().
100276031d95SThomas Gleixner *
100376031d95SThomas Gleixner * Allow for 0.875 of the counter width so that overly long idle
100476031d95SThomas Gleixner * sleeps, which go slightly over mask/2, do not trigger the
100576031d95SThomas Gleixner * negative motion detection.
100676031d95SThomas Gleixner */
100776031d95SThomas Gleixner cs->max_raw_delta = (cs->mask >> 1) + (cs->mask >> 2) + (cs->mask >> 3);
100898962465SJon Hunter }
100998962465SJon Hunter
clocksource_find_best(bool oneshot,bool skipcur)1010f5a2e343SThomas Gleixner static struct clocksource *clocksource_find_best(bool oneshot, bool skipcur)
10115d33b883SThomas Gleixner {
10125d33b883SThomas Gleixner struct clocksource *cs;
10135d33b883SThomas Gleixner
10145d33b883SThomas Gleixner if (!finished_booting || list_empty(&clocksource_list))
10155d33b883SThomas Gleixner return NULL;
10165d33b883SThomas Gleixner
10175d33b883SThomas Gleixner /*
10185d33b883SThomas Gleixner * We pick the clocksource with the highest rating. If oneshot
10195d33b883SThomas Gleixner * mode is active, we pick the highres valid clocksource with
10205d33b883SThomas Gleixner * the best rating.
10215d33b883SThomas Gleixner */
10225d33b883SThomas Gleixner list_for_each_entry(cs, &clocksource_list, list) {
1023f5a2e343SThomas Gleixner if (skipcur && cs == curr_clocksource)
1024f5a2e343SThomas Gleixner continue;
10255d33b883SThomas Gleixner if (oneshot && !(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES))
10265d33b883SThomas Gleixner continue;
10275d33b883SThomas Gleixner return cs;
10285d33b883SThomas Gleixner }
10295d33b883SThomas Gleixner return NULL;
10305d33b883SThomas Gleixner }
10315d33b883SThomas Gleixner
__clocksource_select(bool skipcur)1032f5a2e343SThomas Gleixner static void __clocksource_select(bool skipcur)
1033734efb46Sjohn stultz {
10345d33b883SThomas Gleixner bool oneshot = tick_oneshot_mode_active();
1035f1b82746SMartin Schwidefsky struct clocksource *best, *cs;
10365d8b34fdSThomas Gleixner
10375d33b883SThomas Gleixner /* Find the best suitable clocksource */
1038f5a2e343SThomas Gleixner best = clocksource_find_best(oneshot, skipcur);
10395d33b883SThomas Gleixner if (!best)
1040f1b82746SMartin Schwidefsky return;
10415d33b883SThomas Gleixner
10427f852afeSBaolin Wang if (!strlen(override_name))
10437f852afeSBaolin Wang goto found;
10447f852afeSBaolin Wang
1045f1b82746SMartin Schwidefsky /* Check for the override clocksource. */
1046f1b82746SMartin Schwidefsky list_for_each_entry(cs, &clocksource_list, list) {
1047f5a2e343SThomas Gleixner if (skipcur && cs == curr_clocksource)
1048f5a2e343SThomas Gleixner continue;
1049f1b82746SMartin Schwidefsky if (strcmp(cs->name, override_name) != 0)
1050f1b82746SMartin Schwidefsky continue;
1051f1b82746SMartin Schwidefsky /*
1052f1b82746SMartin Schwidefsky * Check to make sure we don't switch to a non-highres
1053f1b82746SMartin Schwidefsky * capable clocksource if the tick code is in oneshot
1054f1b82746SMartin Schwidefsky * mode (highres or nohz)
1055f1b82746SMartin Schwidefsky */
10565d33b883SThomas Gleixner if (!(cs->flags & CLOCK_SOURCE_VALID_FOR_HRES) && oneshot) {
1057f1b82746SMartin Schwidefsky /* Override clocksource cannot be used. */
105836374583SKyle Walker if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
105936374583SKyle Walker pr_warn("Override clocksource %s is unstable and not HRT compatible - cannot switch while in HRT/NOHZ mode\n",
106045bbfe64SJoe Perches cs->name);
1061f1b82746SMartin Schwidefsky override_name[0] = 0;
106236374583SKyle Walker } else {
106336374583SKyle Walker /*
106436374583SKyle Walker * The override cannot be currently verified.
106536374583SKyle Walker * Deferring to let the watchdog check.
106636374583SKyle Walker */
106736374583SKyle Walker pr_info("Override clocksource %s is not currently HRT compatible - deferring\n",
106836374583SKyle Walker cs->name);
106936374583SKyle Walker }
1070f1b82746SMartin Schwidefsky } else
1071f1b82746SMartin Schwidefsky /* Override clocksource can be used. */
1072f1b82746SMartin Schwidefsky best = cs;
1073f1b82746SMartin Schwidefsky break;
1074734efb46Sjohn stultz }
1075ba919d1cSThomas Gleixner
10767f852afeSBaolin Wang found:
1077ba919d1cSThomas Gleixner if (curr_clocksource != best && !timekeeping_notify(best)) {
1078ba919d1cSThomas Gleixner pr_info("Switched to clocksource %s\n", best->name);
107975c5158fSMartin Schwidefsky curr_clocksource = best;
1080f1b82746SMartin Schwidefsky }
108175c5158fSMartin Schwidefsky }
108275c5158fSMartin Schwidefsky
1083f5a2e343SThomas Gleixner /**
1084f5a2e343SThomas Gleixner * clocksource_select - Select the best clocksource available
1085f5a2e343SThomas Gleixner *
1086f5a2e343SThomas Gleixner * Private function. Must hold clocksource_mutex when called.
1087f5a2e343SThomas Gleixner *
1088f5a2e343SThomas Gleixner * Select the clocksource with the best rating, or the clocksource,
1089f5a2e343SThomas Gleixner * which is selected by userspace override.
1090f5a2e343SThomas Gleixner */
clocksource_select(void)1091f5a2e343SThomas Gleixner static void clocksource_select(void)
1092f5a2e343SThomas Gleixner {
1093cfed432dSGuillaume Gomez __clocksource_select(false);
1094f5a2e343SThomas Gleixner }
1095f5a2e343SThomas Gleixner
clocksource_select_fallback(void)10967eaeb343SThomas Gleixner static void clocksource_select_fallback(void)
10977eaeb343SThomas Gleixner {
1098cfed432dSGuillaume Gomez __clocksource_select(true);
10997eaeb343SThomas Gleixner }
11007eaeb343SThomas Gleixner
110175c5158fSMartin Schwidefsky /*
110275c5158fSMartin Schwidefsky * clocksource_done_booting - Called near the end of core bootup
110375c5158fSMartin Schwidefsky *
110475c5158fSMartin Schwidefsky * Hack to avoid lots of clocksource churn at boot time.
110575c5158fSMartin Schwidefsky * We use fs_initcall because we want this to start before
110675c5158fSMartin Schwidefsky * device_initcall but after subsys_initcall.
110775c5158fSMartin Schwidefsky */
clocksource_done_booting(void)110875c5158fSMartin Schwidefsky static int __init clocksource_done_booting(void)
110975c5158fSMartin Schwidefsky {
1110ad6759fbSjohn stultz mutex_lock(&clocksource_mutex);
1111ad6759fbSjohn stultz curr_clocksource = clocksource_default_clock();
111275c5158fSMartin Schwidefsky finished_booting = 1;
111354a6bc0bSThomas Gleixner /*
111454a6bc0bSThomas Gleixner * Run the watchdog first to eliminate unstable clock sources
111554a6bc0bSThomas Gleixner */
1116e2c631baSPeter Zijlstra __clocksource_watchdog_kthread();
111775c5158fSMartin Schwidefsky clocksource_select();
1118e6c73305SThomas Gleixner mutex_unlock(&clocksource_mutex);
111975c5158fSMartin Schwidefsky return 0;
112075c5158fSMartin Schwidefsky }
112175c5158fSMartin Schwidefsky fs_initcall(clocksource_done_booting);
1122f1b82746SMartin Schwidefsky
112392c7e002SThomas Gleixner /*
112492c7e002SThomas Gleixner * Enqueue the clocksource sorted by rating
1125734efb46Sjohn stultz */
clocksource_enqueue(struct clocksource * cs)1126f1b82746SMartin Schwidefsky static void clocksource_enqueue(struct clocksource *cs)
1127734efb46Sjohn stultz {
1128f1b82746SMartin Schwidefsky struct list_head *entry = &clocksource_list;
1129f1b82746SMartin Schwidefsky struct clocksource *tmp;
1130734efb46Sjohn stultz
11310fb71d34SMinfei Huang list_for_each_entry(tmp, &clocksource_list, list) {
113292c7e002SThomas Gleixner /* Keep track of the place, where to insert */
11330fb71d34SMinfei Huang if (tmp->rating < cs->rating)
11340fb71d34SMinfei Huang break;
1135f1b82746SMartin Schwidefsky entry = &tmp->list;
11360fb71d34SMinfei Huang }
1137f1b82746SMartin Schwidefsky list_add(&cs->list, entry);
1138734efb46Sjohn stultz }
1139734efb46Sjohn stultz
1140d7e81c26SJohn Stultz /**
1141fba9e072SJohn Stultz * __clocksource_update_freq_scale - Used update clocksource with new freq
1142b1b73d09SKusanagi Kouichi * @cs: clocksource to be registered
1143852db46dSJohn Stultz * @scale: Scale factor multiplied against freq to get clocksource hz
1144852db46dSJohn Stultz * @freq: clocksource frequency (cycles per second) divided by scale
1145852db46dSJohn Stultz *
1146852db46dSJohn Stultz * This should only be called from the clocksource->enable() method.
1147852db46dSJohn Stultz *
1148852db46dSJohn Stultz * This *SHOULD NOT* be called directly! Please use the
1149fba9e072SJohn Stultz * __clocksource_update_freq_hz() or __clocksource_update_freq_khz() helper
1150fba9e072SJohn Stultz * functions.
1151852db46dSJohn Stultz */
__clocksource_update_freq_scale(struct clocksource * cs,u32 scale,u32 freq)1152fba9e072SJohn Stultz void __clocksource_update_freq_scale(struct clocksource *cs, u32 scale, u32 freq)
1153852db46dSJohn Stultz {
1154c0e299b1SThomas Gleixner u64 sec;
1155f8935983SJohn Stultz
1156f8935983SJohn Stultz /*
1157f8935983SJohn Stultz * Default clocksources are *special* and self-define their mult/shift.
1158f8935983SJohn Stultz * But, you're not special, so you should specify a freq value.
1159f8935983SJohn Stultz */
1160f8935983SJohn Stultz if (freq) {
1161852db46dSJohn Stultz /*
1162724ed53eSThomas Gleixner * Calc the maximum number of seconds which we can run before
1163f8935983SJohn Stultz * wrapping around. For clocksources which have a mask > 32-bit
1164724ed53eSThomas Gleixner * we need to limit the max sleep time to have a good
1165724ed53eSThomas Gleixner * conversion precision. 10 minutes is still a reasonable
1166724ed53eSThomas Gleixner * amount. That results in a shift value of 24 for a
1167f8935983SJohn Stultz * clocksource with mask >= 40-bit and f >= 4GHz. That maps to
1168362fde04SJohn Stultz * ~ 0.06ppm granularity for NTP.
1169852db46dSJohn Stultz */
1170362fde04SJohn Stultz sec = cs->mask;
1171724ed53eSThomas Gleixner do_div(sec, freq);
1172724ed53eSThomas Gleixner do_div(sec, scale);
1173724ed53eSThomas Gleixner if (!sec)
1174724ed53eSThomas Gleixner sec = 1;
1175724ed53eSThomas Gleixner else if (sec > 600 && cs->mask > UINT_MAX)
1176724ed53eSThomas Gleixner sec = 600;
1177724ed53eSThomas Gleixner
1178852db46dSJohn Stultz clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
1179724ed53eSThomas Gleixner NSEC_PER_SEC / scale, sec * scale);
1180f8935983SJohn Stultz }
11812e27e793SPaul E. McKenney
11822e27e793SPaul E. McKenney /*
118317915131SBorislav Petkov * If the uncertainty margin is not specified, calculate it. If
118417915131SBorislav Petkov * both scale and freq are non-zero, calculate the clock period, but
118517915131SBorislav Petkov * bound below at 2*WATCHDOG_MAX_SKEW, that is, 500ppm by default.
118617915131SBorislav Petkov * However, if either of scale or freq is zero, be very conservative
118717915131SBorislav Petkov * and take the tens-of-milliseconds WATCHDOG_THRESHOLD value
118817915131SBorislav Petkov * for the uncertainty margin. Allow stupidly small uncertainty
118917915131SBorislav Petkov * margins to be specified by the caller for testing purposes,
119017915131SBorislav Petkov * but warn to discourage production use of this capability.
119117915131SBorislav Petkov *
119217915131SBorislav Petkov * Bottom line: The sum of the uncertainty margins of the
119317915131SBorislav Petkov * watchdog clocksource and the clocksource under test will be at
119417915131SBorislav Petkov * least 500ppm by default. For more information, please see the
119517915131SBorislav Petkov * comment preceding CONFIG_CLOCKSOURCE_WATCHDOG_MAX_SKEW_US above.
11962e27e793SPaul E. McKenney */
11972e27e793SPaul E. McKenney if (scale && freq && !cs->uncertainty_margin) {
11982e27e793SPaul E. McKenney cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
11992e27e793SPaul E. McKenney if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
12002e27e793SPaul E. McKenney cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
12012e27e793SPaul E. McKenney } else if (!cs->uncertainty_margin) {
12022e27e793SPaul E. McKenney cs->uncertainty_margin = WATCHDOG_THRESHOLD;
12032e27e793SPaul E. McKenney }
12042e27e793SPaul E. McKenney WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
12052e27e793SPaul E. McKenney
1206d65670a7SJohn Stultz /*
1207362fde04SJohn Stultz * Ensure clocksources that have large 'mult' values don't overflow
1208362fde04SJohn Stultz * when adjusted.
1209d65670a7SJohn Stultz */
1210d65670a7SJohn Stultz cs->maxadj = clocksource_max_adjustment(cs);
1211f8935983SJohn Stultz while (freq && ((cs->mult + cs->maxadj < cs->mult)
1212f8935983SJohn Stultz || (cs->mult - cs->maxadj > cs->mult))) {
1213d65670a7SJohn Stultz cs->mult >>= 1;
1214d65670a7SJohn Stultz cs->shift--;
1215d65670a7SJohn Stultz cs->maxadj = clocksource_max_adjustment(cs);
1216d65670a7SJohn Stultz }
1217d65670a7SJohn Stultz
1218f8935983SJohn Stultz /*
1219f8935983SJohn Stultz * Only warn for *special* clocksources that self-define
1220f8935983SJohn Stultz * their mult/shift values and don't specify a freq.
1221f8935983SJohn Stultz */
1222f8935983SJohn Stultz WARN_ONCE(cs->mult + cs->maxadj < cs->mult,
1223f8935983SJohn Stultz "timekeeping: Clocksource %s might overflow on 11%% adjustment\n",
1224f8935983SJohn Stultz cs->name);
1225f8935983SJohn Stultz
1226fb82fe2fSJohn Stultz clocksource_update_max_deferment(cs);
12278cc8c525SJohn Stultz
122845bbfe64SJoe Perches pr_info("%s: mask: 0x%llx max_cycles: 0x%llx, max_idle_ns: %lld ns\n",
12298cc8c525SJohn Stultz cs->name, cs->mask, cs->max_cycles, cs->max_idle_ns);
1230852db46dSJohn Stultz }
1231fba9e072SJohn Stultz EXPORT_SYMBOL_GPL(__clocksource_update_freq_scale);
1232852db46dSJohn Stultz
1233852db46dSJohn Stultz /**
1234d7e81c26SJohn Stultz * __clocksource_register_scale - Used to install new clocksources
1235b1b73d09SKusanagi Kouichi * @cs: clocksource to be registered
1236d7e81c26SJohn Stultz * @scale: Scale factor multiplied against freq to get clocksource hz
1237d7e81c26SJohn Stultz * @freq: clocksource frequency (cycles per second) divided by scale
1238d7e81c26SJohn Stultz *
1239d7e81c26SJohn Stultz * Returns -EBUSY if registration fails, zero otherwise.
1240d7e81c26SJohn Stultz *
1241d7e81c26SJohn Stultz * This *SHOULD NOT* be called directly! Please use the
1242d7e81c26SJohn Stultz * clocksource_register_hz() or clocksource_register_khz helper functions.
1243d7e81c26SJohn Stultz */
__clocksource_register_scale(struct clocksource * cs,u32 scale,u32 freq)1244d7e81c26SJohn Stultz int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
1245d7e81c26SJohn Stultz {
12462aae7bcfSPeter Zijlstra unsigned long flags;
1247d7e81c26SJohn Stultz
1248d67f34c1SThomas Gleixner clocksource_arch_init(cs);
1249d67f34c1SThomas Gleixner
1250b2c67cbeSThomas Gleixner if (WARN_ON_ONCE((unsigned int)cs->id >= CSID_MAX))
1251b2c67cbeSThomas Gleixner cs->id = CSID_GENERIC;
12525d51bee7SThomas Gleixner if (cs->vdso_clock_mode < 0 ||
12535d51bee7SThomas Gleixner cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
12545d51bee7SThomas Gleixner pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
12555d51bee7SThomas Gleixner cs->name, cs->vdso_clock_mode);
12565d51bee7SThomas Gleixner cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
12575d51bee7SThomas Gleixner }
12585d51bee7SThomas Gleixner
1259b595076aSUwe Kleine-König /* Initialize mult/shift and max_idle_ns */
1260fba9e072SJohn Stultz __clocksource_update_freq_scale(cs, scale, freq);
1261d7e81c26SJohn Stultz
1262be278e98SJames Hartley /* Add clocksource to the clocksource list */
1263d7e81c26SJohn Stultz mutex_lock(&clocksource_mutex);
12642aae7bcfSPeter Zijlstra
12652aae7bcfSPeter Zijlstra clocksource_watchdog_lock(&flags);
1266d7e81c26SJohn Stultz clocksource_enqueue(cs);
1267d7e81c26SJohn Stultz clocksource_enqueue_watchdog(cs);
12682aae7bcfSPeter Zijlstra clocksource_watchdog_unlock(&flags);
12692aae7bcfSPeter Zijlstra
1270e05b2efbSjohn stultz clocksource_select();
1271bbf66d89SVitaly Kuznetsov clocksource_select_watchdog(false);
127239232ed5SBaolin Wang __clocksource_suspend_select(cs);
1273d7e81c26SJohn Stultz mutex_unlock(&clocksource_mutex);
1274d7e81c26SJohn Stultz return 0;
1275d7e81c26SJohn Stultz }
1276d7e81c26SJohn Stultz EXPORT_SYMBOL_GPL(__clocksource_register_scale);
1277d7e81c26SJohn Stultz
12787eaeb343SThomas Gleixner /*
12797eaeb343SThomas Gleixner * Unbind clocksource @cs. Called with clocksource_mutex held
12807eaeb343SThomas Gleixner */
clocksource_unbind(struct clocksource * cs)12817eaeb343SThomas Gleixner static int clocksource_unbind(struct clocksource *cs)
12827eaeb343SThomas Gleixner {
12832aae7bcfSPeter Zijlstra unsigned long flags;
12842aae7bcfSPeter Zijlstra
1285bbf66d89SVitaly Kuznetsov if (clocksource_is_watchdog(cs)) {
1286bbf66d89SVitaly Kuznetsov /* Select and try to install a replacement watchdog. */
1287bbf66d89SVitaly Kuznetsov clocksource_select_watchdog(true);
12887eaeb343SThomas Gleixner if (clocksource_is_watchdog(cs))
12897eaeb343SThomas Gleixner return -EBUSY;
1290bbf66d89SVitaly Kuznetsov }
12917eaeb343SThomas Gleixner
12927eaeb343SThomas Gleixner if (cs == curr_clocksource) {
12937eaeb343SThomas Gleixner /* Select and try to install a replacement clock source */
12947eaeb343SThomas Gleixner clocksource_select_fallback();
12957eaeb343SThomas Gleixner if (curr_clocksource == cs)
12967eaeb343SThomas Gleixner return -EBUSY;
12977eaeb343SThomas Gleixner }
12982aae7bcfSPeter Zijlstra
129939232ed5SBaolin Wang if (clocksource_is_suspend(cs)) {
130039232ed5SBaolin Wang /*
130139232ed5SBaolin Wang * Select and try to install a replacement suspend clocksource.
130239232ed5SBaolin Wang * If no replacement suspend clocksource, we will just let the
130339232ed5SBaolin Wang * clocksource go and have no suspend clocksource.
130439232ed5SBaolin Wang */
130539232ed5SBaolin Wang clocksource_suspend_select(true);
130639232ed5SBaolin Wang }
130739232ed5SBaolin Wang
13082aae7bcfSPeter Zijlstra clocksource_watchdog_lock(&flags);
13097eaeb343SThomas Gleixner clocksource_dequeue_watchdog(cs);
13107eaeb343SThomas Gleixner list_del_init(&cs->list);
13112aae7bcfSPeter Zijlstra clocksource_watchdog_unlock(&flags);
13122aae7bcfSPeter Zijlstra
13137eaeb343SThomas Gleixner return 0;
13147eaeb343SThomas Gleixner }
13157eaeb343SThomas Gleixner
13164713e22cSThomas Gleixner /**
13174713e22cSThomas Gleixner * clocksource_unregister - remove a registered clocksource
1318b1b73d09SKusanagi Kouichi * @cs: clocksource to be unregistered
13194713e22cSThomas Gleixner */
clocksource_unregister(struct clocksource * cs)1320a89c7edbSThomas Gleixner int clocksource_unregister(struct clocksource *cs)
13214713e22cSThomas Gleixner {
1322a89c7edbSThomas Gleixner int ret = 0;
1323a89c7edbSThomas Gleixner
132475c5158fSMartin Schwidefsky mutex_lock(&clocksource_mutex);
1325a89c7edbSThomas Gleixner if (!list_empty(&cs->list))
1326a89c7edbSThomas Gleixner ret = clocksource_unbind(cs);
132775c5158fSMartin Schwidefsky mutex_unlock(&clocksource_mutex);
1328a89c7edbSThomas Gleixner return ret;
13294713e22cSThomas Gleixner }
1330fb63a0ebSMartin Schwidefsky EXPORT_SYMBOL(clocksource_unregister);
13314713e22cSThomas Gleixner
13322b013700SDaniel Walker #ifdef CONFIG_SYSFS
1333734efb46Sjohn stultz /**
1334e87821d1SBaolin Wang * current_clocksource_show - sysfs interface for current clocksource
1335734efb46Sjohn stultz * @dev: unused
1336b1b73d09SKusanagi Kouichi * @attr: unused
1337734efb46Sjohn stultz * @buf: char buffer to be filled with clocksource list
1338734efb46Sjohn stultz *
1339734efb46Sjohn stultz * Provides sysfs interface for listing current clocksource.
1340734efb46Sjohn stultz */
current_clocksource_show(struct device * dev,struct device_attribute * attr,char * buf)1341e87821d1SBaolin Wang static ssize_t current_clocksource_show(struct device *dev,
1342e87821d1SBaolin Wang struct device_attribute *attr,
1343e87821d1SBaolin Wang char *buf)
1344734efb46Sjohn stultz {
13455e2cb101SMiao Xie ssize_t count = 0;
1346734efb46Sjohn stultz
134775c5158fSMartin Schwidefsky mutex_lock(&clocksource_mutex);
13488f0acb7fSLi Zhijian count = sysfs_emit(buf, "%s\n", curr_clocksource->name);
134975c5158fSMartin Schwidefsky mutex_unlock(&clocksource_mutex);
1350734efb46Sjohn stultz
13515e2cb101SMiao Xie return count;
1352734efb46Sjohn stultz }
1353734efb46Sjohn stultz
sysfs_get_uname(const char * buf,char * dst,size_t cnt)1354891292a7SPatrick Palka ssize_t sysfs_get_uname(const char *buf, char *dst, size_t cnt)
135529b54078SThomas Gleixner {
135629b54078SThomas Gleixner size_t ret = cnt;
135729b54078SThomas Gleixner
135829b54078SThomas Gleixner /* strings from sysfs write are not 0 terminated! */
135929b54078SThomas Gleixner if (!cnt || cnt >= CS_NAME_LEN)
136029b54078SThomas Gleixner return -EINVAL;
136129b54078SThomas Gleixner
136229b54078SThomas Gleixner /* strip of \n: */
136329b54078SThomas Gleixner if (buf[cnt-1] == '\n')
136429b54078SThomas Gleixner cnt--;
136529b54078SThomas Gleixner if (cnt > 0)
136629b54078SThomas Gleixner memcpy(dst, buf, cnt);
136729b54078SThomas Gleixner dst[cnt] = 0;
136829b54078SThomas Gleixner return ret;
136929b54078SThomas Gleixner }
137029b54078SThomas Gleixner
1371734efb46Sjohn stultz /**
1372e87821d1SBaolin Wang * current_clocksource_store - interface for manually overriding clocksource
1373734efb46Sjohn stultz * @dev: unused
1374b1b73d09SKusanagi Kouichi * @attr: unused
1375734efb46Sjohn stultz * @buf: name of override clocksource
1376734efb46Sjohn stultz * @count: length of buffer
1377734efb46Sjohn stultz *
1378734efb46Sjohn stultz * Takes input from sysfs interface for manually overriding the default
1379b71a8eb0SUwe Kleine-König * clocksource selection.
1380734efb46Sjohn stultz */
current_clocksource_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1381e87821d1SBaolin Wang static ssize_t current_clocksource_store(struct device *dev,
1382d369a5d8SKay Sievers struct device_attribute *attr,
1383734efb46Sjohn stultz const char *buf, size_t count)
1384734efb46Sjohn stultz {
1385233bcb41SElad Wexler ssize_t ret;
1386734efb46Sjohn stultz
138775c5158fSMartin Schwidefsky mutex_lock(&clocksource_mutex);
1388734efb46Sjohn stultz
138903e13cf5SThomas Gleixner ret = sysfs_get_uname(buf, override_name, count);
139029b54078SThomas Gleixner if (ret >= 0)
1391f1b82746SMartin Schwidefsky clocksource_select();
1392734efb46Sjohn stultz
139375c5158fSMartin Schwidefsky mutex_unlock(&clocksource_mutex);
1394734efb46Sjohn stultz
1395734efb46Sjohn stultz return ret;
1396734efb46Sjohn stultz }
1397e87821d1SBaolin Wang static DEVICE_ATTR_RW(current_clocksource);
1398734efb46Sjohn stultz
1399734efb46Sjohn stultz /**
1400e87821d1SBaolin Wang * unbind_clocksource_store - interface for manually unbinding clocksource
14017eaeb343SThomas Gleixner * @dev: unused
14027eaeb343SThomas Gleixner * @attr: unused
14037eaeb343SThomas Gleixner * @buf: unused
14047eaeb343SThomas Gleixner * @count: length of buffer
14057eaeb343SThomas Gleixner *
14067eaeb343SThomas Gleixner * Takes input from sysfs interface for manually unbinding a clocksource.
14077eaeb343SThomas Gleixner */
unbind_clocksource_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)1408e87821d1SBaolin Wang static ssize_t unbind_clocksource_store(struct device *dev,
14097eaeb343SThomas Gleixner struct device_attribute *attr,
14107eaeb343SThomas Gleixner const char *buf, size_t count)
14117eaeb343SThomas Gleixner {
14127eaeb343SThomas Gleixner struct clocksource *cs;
14137eaeb343SThomas Gleixner char name[CS_NAME_LEN];
1414233bcb41SElad Wexler ssize_t ret;
14157eaeb343SThomas Gleixner
141603e13cf5SThomas Gleixner ret = sysfs_get_uname(buf, name, count);
14177eaeb343SThomas Gleixner if (ret < 0)
14187eaeb343SThomas Gleixner return ret;
14197eaeb343SThomas Gleixner
14207eaeb343SThomas Gleixner ret = -ENODEV;
14217eaeb343SThomas Gleixner mutex_lock(&clocksource_mutex);
14227eaeb343SThomas Gleixner list_for_each_entry(cs, &clocksource_list, list) {
14237eaeb343SThomas Gleixner if (strcmp(cs->name, name))
14247eaeb343SThomas Gleixner continue;
14257eaeb343SThomas Gleixner ret = clocksource_unbind(cs);
14267eaeb343SThomas Gleixner break;
14277eaeb343SThomas Gleixner }
14287eaeb343SThomas Gleixner mutex_unlock(&clocksource_mutex);
14297eaeb343SThomas Gleixner
14307eaeb343SThomas Gleixner return ret ? ret : count;
14317eaeb343SThomas Gleixner }
1432e87821d1SBaolin Wang static DEVICE_ATTR_WO(unbind_clocksource);
14337eaeb343SThomas Gleixner
14347eaeb343SThomas Gleixner /**
1435e87821d1SBaolin Wang * available_clocksource_show - sysfs interface for listing clocksource
1436734efb46Sjohn stultz * @dev: unused
1437b1b73d09SKusanagi Kouichi * @attr: unused
1438734efb46Sjohn stultz * @buf: char buffer to be filled with clocksource list
1439734efb46Sjohn stultz *
1440734efb46Sjohn stultz * Provides sysfs interface for listing registered clocksources
1441734efb46Sjohn stultz */
available_clocksource_show(struct device * dev,struct device_attribute * attr,char * buf)1442e87821d1SBaolin Wang static ssize_t available_clocksource_show(struct device *dev,
1443d369a5d8SKay Sievers struct device_attribute *attr,
14444a0b2b4dSAndi Kleen char *buf)
1445734efb46Sjohn stultz {
14462e197586SMatthias Kaehlcke struct clocksource *src;
14475e2cb101SMiao Xie ssize_t count = 0;
1448734efb46Sjohn stultz
144975c5158fSMartin Schwidefsky mutex_lock(&clocksource_mutex);
14502e197586SMatthias Kaehlcke list_for_each_entry(src, &clocksource_list, list) {
1451cd6d95d8SThomas Gleixner /*
1452cd6d95d8SThomas Gleixner * Don't show non-HRES clocksource if the tick code is
1453cd6d95d8SThomas Gleixner * in one shot mode (highres=on or nohz=on)
1454cd6d95d8SThomas Gleixner */
1455cd6d95d8SThomas Gleixner if (!tick_oneshot_mode_active() ||
14563f68535aSjohn stultz (src->flags & CLOCK_SOURCE_VALID_FOR_HRES))
14575e2cb101SMiao Xie count += snprintf(buf + count,
14585e2cb101SMiao Xie max((ssize_t)PAGE_SIZE - count, (ssize_t)0),
14595e2cb101SMiao Xie "%s ", src->name);
1460734efb46Sjohn stultz }
146175c5158fSMartin Schwidefsky mutex_unlock(&clocksource_mutex);
1462734efb46Sjohn stultz
14635e2cb101SMiao Xie count += snprintf(buf + count,
14645e2cb101SMiao Xie max((ssize_t)PAGE_SIZE - count, (ssize_t)0), "\n");
1465734efb46Sjohn stultz
14665e2cb101SMiao Xie return count;
1467734efb46Sjohn stultz }
1468e87821d1SBaolin Wang static DEVICE_ATTR_RO(available_clocksource);
1469734efb46Sjohn stultz
147027263e8dSBaolin Wang static struct attribute *clocksource_attrs[] = {
147127263e8dSBaolin Wang &dev_attr_current_clocksource.attr,
147227263e8dSBaolin Wang &dev_attr_unbind_clocksource.attr,
147327263e8dSBaolin Wang &dev_attr_available_clocksource.attr,
147427263e8dSBaolin Wang NULL
147527263e8dSBaolin Wang };
147627263e8dSBaolin Wang ATTRIBUTE_GROUPS(clocksource);
147727263e8dSBaolin Wang
14782bc7fc24SRicardo B. Marliere static const struct bus_type clocksource_subsys = {
1479af5ca3f4SKay Sievers .name = "clocksource",
1480d369a5d8SKay Sievers .dev_name = "clocksource",
1481734efb46Sjohn stultz };
1482734efb46Sjohn stultz
1483d369a5d8SKay Sievers static struct device device_clocksource = {
1484734efb46Sjohn stultz .id = 0,
1485d369a5d8SKay Sievers .bus = &clocksource_subsys,
148627263e8dSBaolin Wang .groups = clocksource_groups,
1487734efb46Sjohn stultz };
1488734efb46Sjohn stultz
init_clocksource_sysfs(void)1489ad596171Sjohn stultz static int __init init_clocksource_sysfs(void)
1490734efb46Sjohn stultz {
1491d369a5d8SKay Sievers int error = subsys_system_register(&clocksource_subsys, NULL);
1492734efb46Sjohn stultz
1493734efb46Sjohn stultz if (!error)
1494d369a5d8SKay Sievers error = device_register(&device_clocksource);
149527263e8dSBaolin Wang
1496734efb46Sjohn stultz return error;
1497734efb46Sjohn stultz }
1498734efb46Sjohn stultz
1499734efb46Sjohn stultz device_initcall(init_clocksource_sysfs);
15002b013700SDaniel Walker #endif /* CONFIG_SYSFS */
1501734efb46Sjohn stultz
1502734efb46Sjohn stultz /**
1503734efb46Sjohn stultz * boot_override_clocksource - boot clock override
1504734efb46Sjohn stultz * @str: override name
1505734efb46Sjohn stultz *
1506734efb46Sjohn stultz * Takes a clocksource= boot argument and uses it
1507734efb46Sjohn stultz * as the clocksource override name.
1508734efb46Sjohn stultz */
boot_override_clocksource(char * str)1509734efb46Sjohn stultz static int __init boot_override_clocksource(char* str)
1510734efb46Sjohn stultz {
151175c5158fSMartin Schwidefsky mutex_lock(&clocksource_mutex);
1512734efb46Sjohn stultz if (str)
1513fc661d0aSThorsten Blum strscpy(override_name, str);
151475c5158fSMartin Schwidefsky mutex_unlock(&clocksource_mutex);
1515734efb46Sjohn stultz return 1;
1516734efb46Sjohn stultz }
1517734efb46Sjohn stultz
1518734efb46Sjohn stultz __setup("clocksource=", boot_override_clocksource);
1519734efb46Sjohn stultz
1520734efb46Sjohn stultz /**
1521734efb46Sjohn stultz * boot_override_clock - Compatibility layer for deprecated boot option
1522734efb46Sjohn stultz * @str: override name
1523734efb46Sjohn stultz *
1524734efb46Sjohn stultz * DEPRECATED! Takes a clock= boot argument and uses it
1525734efb46Sjohn stultz * as the clocksource override name
1526734efb46Sjohn stultz */
boot_override_clock(char * str)1527734efb46Sjohn stultz static int __init boot_override_clock(char* str)
1528734efb46Sjohn stultz {
15295d0cf410Sjohn stultz if (!strcmp(str, "pmtmr")) {
153045bbfe64SJoe Perches pr_warn("clock=pmtmr is deprecated - use clocksource=acpi_pm\n");
15315d0cf410Sjohn stultz return boot_override_clocksource("acpi_pm");
15325d0cf410Sjohn stultz }
153345bbfe64SJoe Perches pr_warn("clock= boot option is deprecated - use clocksource=xyz\n");
1534734efb46Sjohn stultz return boot_override_clocksource(str);
1535734efb46Sjohn stultz }
1536734efb46Sjohn stultz
1537734efb46Sjohn stultz __setup("clock=", boot_override_clock);
1538