135728b82SThomas Gleixner // SPDX-License-Identifier: GPL-2.0 25cee9645SThomas Gleixner /* 35cee9645SThomas Gleixner * Kernel internal timers 45cee9645SThomas Gleixner * 55cee9645SThomas Gleixner * Copyright (C) 1991, 1992 Linus Torvalds 65cee9645SThomas Gleixner * 75cee9645SThomas Gleixner * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 85cee9645SThomas Gleixner * 95cee9645SThomas Gleixner * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 105cee9645SThomas Gleixner * "A Kernel Model for Precision Timekeeping" by Dave Mills 115cee9645SThomas Gleixner * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 125cee9645SThomas Gleixner * serialize accesses to xtime/lost_ticks). 135cee9645SThomas Gleixner * Copyright (C) 1998 Andrea Arcangeli 145cee9645SThomas Gleixner * 1999-03-10 Improved NTP compatibility by Ulrich Windl 155cee9645SThomas Gleixner * 2002-05-31 Move sys_sysinfo here and make its locking sane, Robert Love 165cee9645SThomas Gleixner * 2000-10-05 Implemented scalable SMP per-CPU timer handling. 175cee9645SThomas Gleixner * Copyright (C) 2000, 2001, 2002 Ingo Molnar 185cee9645SThomas Gleixner * Designed by David S. Miller, Alexey Kuznetsov and Ingo Molnar 195cee9645SThomas Gleixner */ 205cee9645SThomas Gleixner 215cee9645SThomas Gleixner #include <linux/kernel_stat.h> 225cee9645SThomas Gleixner #include <linux/export.h> 235cee9645SThomas Gleixner #include <linux/interrupt.h> 245cee9645SThomas Gleixner #include <linux/percpu.h> 255cee9645SThomas Gleixner #include <linux/init.h> 265cee9645SThomas Gleixner #include <linux/mm.h> 275cee9645SThomas Gleixner #include <linux/swap.h> 285cee9645SThomas Gleixner #include <linux/pid_namespace.h> 295cee9645SThomas Gleixner #include <linux/notifier.h> 305cee9645SThomas Gleixner #include <linux/thread_info.h> 315cee9645SThomas Gleixner #include <linux/time.h> 325cee9645SThomas Gleixner #include <linux/jiffies.h> 335cee9645SThomas Gleixner #include <linux/posix-timers.h> 345cee9645SThomas Gleixner #include <linux/cpu.h> 355cee9645SThomas Gleixner #include <linux/syscalls.h> 365cee9645SThomas Gleixner #include <linux/delay.h> 375cee9645SThomas Gleixner #include <linux/tick.h> 385cee9645SThomas Gleixner #include <linux/kallsyms.h> 395cee9645SThomas Gleixner #include <linux/irq_work.h> 40174cd4b1SIngo Molnar #include <linux/sched/signal.h> 415cee9645SThomas Gleixner #include <linux/sched/sysctl.h> 42370c9135SIngo Molnar #include <linux/sched/nohz.h> 43b17b0153SIngo Molnar #include <linux/sched/debug.h> 445cee9645SThomas Gleixner #include <linux/slab.h> 455cee9645SThomas Gleixner #include <linux/compat.h> 46f227e3ecSWilly Tarreau #include <linux/random.h> 475cee9645SThomas Gleixner 487c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 495cee9645SThomas Gleixner #include <asm/unistd.h> 505cee9645SThomas Gleixner #include <asm/div64.h> 515cee9645SThomas Gleixner #include <asm/timex.h> 525cee9645SThomas Gleixner #include <asm/io.h> 535cee9645SThomas Gleixner 54c1ad348bSThomas Gleixner #include "tick-internal.h" 55c1ad348bSThomas Gleixner 565cee9645SThomas Gleixner #define CREATE_TRACE_POINTS 575cee9645SThomas Gleixner #include <trace/events/timer.h> 585cee9645SThomas Gleixner 595cee9645SThomas Gleixner __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; 605cee9645SThomas Gleixner 615cee9645SThomas Gleixner EXPORT_SYMBOL(jiffies_64); 625cee9645SThomas Gleixner 635cee9645SThomas Gleixner /* 64500462a9SThomas Gleixner * The timer wheel has LVL_DEPTH array levels. Each level provides an array of 65500462a9SThomas Gleixner * LVL_SIZE buckets. Each level is driven by its own clock and therefor each 66500462a9SThomas Gleixner * level has a different granularity. 67500462a9SThomas Gleixner * 68500462a9SThomas Gleixner * The level granularity is: LVL_CLK_DIV ^ lvl 69500462a9SThomas Gleixner * The level clock frequency is: HZ / (LVL_CLK_DIV ^ level) 70500462a9SThomas Gleixner * 71500462a9SThomas Gleixner * The array level of a newly armed timer depends on the relative expiry 72500462a9SThomas Gleixner * time. The farther the expiry time is away the higher the array level and 73500462a9SThomas Gleixner * therefor the granularity becomes. 74500462a9SThomas Gleixner * 75500462a9SThomas Gleixner * Contrary to the original timer wheel implementation, which aims for 'exact' 76500462a9SThomas Gleixner * expiry of the timers, this implementation removes the need for recascading 77500462a9SThomas Gleixner * the timers into the lower array levels. The previous 'classic' timer wheel 78500462a9SThomas Gleixner * implementation of the kernel already violated the 'exact' expiry by adding 79500462a9SThomas Gleixner * slack to the expiry time to provide batched expiration. The granularity 80500462a9SThomas Gleixner * levels provide implicit batching. 81500462a9SThomas Gleixner * 82500462a9SThomas Gleixner * This is an optimization of the original timer wheel implementation for the 83500462a9SThomas Gleixner * majority of the timer wheel use cases: timeouts. The vast majority of 84500462a9SThomas Gleixner * timeout timers (networking, disk I/O ...) are canceled before expiry. If 85500462a9SThomas Gleixner * the timeout expires it indicates that normal operation is disturbed, so it 86500462a9SThomas Gleixner * does not matter much whether the timeout comes with a slight delay. 87500462a9SThomas Gleixner * 88500462a9SThomas Gleixner * The only exception to this are networking timers with a small expiry 89500462a9SThomas Gleixner * time. They rely on the granularity. Those fit into the first wheel level, 90500462a9SThomas Gleixner * which has HZ granularity. 91500462a9SThomas Gleixner * 92500462a9SThomas Gleixner * We don't have cascading anymore. timers with a expiry time above the 93500462a9SThomas Gleixner * capacity of the last wheel level are force expired at the maximum timeout 94500462a9SThomas Gleixner * value of the last wheel level. From data sampling we know that the maximum 95500462a9SThomas Gleixner * value observed is 5 days (network connection tracking), so this should not 96500462a9SThomas Gleixner * be an issue. 97500462a9SThomas Gleixner * 98500462a9SThomas Gleixner * The currently chosen array constants values are a good compromise between 99500462a9SThomas Gleixner * array size and granularity. 100500462a9SThomas Gleixner * 101500462a9SThomas Gleixner * This results in the following granularity and range levels: 102500462a9SThomas Gleixner * 103500462a9SThomas Gleixner * HZ 1000 steps 104500462a9SThomas Gleixner * Level Offset Granularity Range 105500462a9SThomas Gleixner * 0 0 1 ms 0 ms - 63 ms 106500462a9SThomas Gleixner * 1 64 8 ms 64 ms - 511 ms 107500462a9SThomas Gleixner * 2 128 64 ms 512 ms - 4095 ms (512ms - ~4s) 108500462a9SThomas Gleixner * 3 192 512 ms 4096 ms - 32767 ms (~4s - ~32s) 109500462a9SThomas Gleixner * 4 256 4096 ms (~4s) 32768 ms - 262143 ms (~32s - ~4m) 110500462a9SThomas Gleixner * 5 320 32768 ms (~32s) 262144 ms - 2097151 ms (~4m - ~34m) 111500462a9SThomas Gleixner * 6 384 262144 ms (~4m) 2097152 ms - 16777215 ms (~34m - ~4h) 112500462a9SThomas Gleixner * 7 448 2097152 ms (~34m) 16777216 ms - 134217727 ms (~4h - ~1d) 113500462a9SThomas Gleixner * 8 512 16777216 ms (~4h) 134217728 ms - 1073741822 ms (~1d - ~12d) 114500462a9SThomas Gleixner * 115500462a9SThomas Gleixner * HZ 300 116500462a9SThomas Gleixner * Level Offset Granularity Range 117500462a9SThomas Gleixner * 0 0 3 ms 0 ms - 210 ms 118500462a9SThomas Gleixner * 1 64 26 ms 213 ms - 1703 ms (213ms - ~1s) 119500462a9SThomas Gleixner * 2 128 213 ms 1706 ms - 13650 ms (~1s - ~13s) 120500462a9SThomas Gleixner * 3 192 1706 ms (~1s) 13653 ms - 109223 ms (~13s - ~1m) 121500462a9SThomas Gleixner * 4 256 13653 ms (~13s) 109226 ms - 873810 ms (~1m - ~14m) 122500462a9SThomas Gleixner * 5 320 109226 ms (~1m) 873813 ms - 6990503 ms (~14m - ~1h) 123500462a9SThomas Gleixner * 6 384 873813 ms (~14m) 6990506 ms - 55924050 ms (~1h - ~15h) 124500462a9SThomas Gleixner * 7 448 6990506 ms (~1h) 55924053 ms - 447392423 ms (~15h - ~5d) 125500462a9SThomas Gleixner * 8 512 55924053 ms (~15h) 447392426 ms - 3579139406 ms (~5d - ~41d) 126500462a9SThomas Gleixner * 127500462a9SThomas Gleixner * HZ 250 128500462a9SThomas Gleixner * Level Offset Granularity Range 129500462a9SThomas Gleixner * 0 0 4 ms 0 ms - 255 ms 130500462a9SThomas Gleixner * 1 64 32 ms 256 ms - 2047 ms (256ms - ~2s) 131500462a9SThomas Gleixner * 2 128 256 ms 2048 ms - 16383 ms (~2s - ~16s) 132500462a9SThomas Gleixner * 3 192 2048 ms (~2s) 16384 ms - 131071 ms (~16s - ~2m) 133500462a9SThomas Gleixner * 4 256 16384 ms (~16s) 131072 ms - 1048575 ms (~2m - ~17m) 134500462a9SThomas Gleixner * 5 320 131072 ms (~2m) 1048576 ms - 8388607 ms (~17m - ~2h) 135500462a9SThomas Gleixner * 6 384 1048576 ms (~17m) 8388608 ms - 67108863 ms (~2h - ~18h) 136500462a9SThomas Gleixner * 7 448 8388608 ms (~2h) 67108864 ms - 536870911 ms (~18h - ~6d) 137500462a9SThomas Gleixner * 8 512 67108864 ms (~18h) 536870912 ms - 4294967288 ms (~6d - ~49d) 138500462a9SThomas Gleixner * 139500462a9SThomas Gleixner * HZ 100 140500462a9SThomas Gleixner * Level Offset Granularity Range 141500462a9SThomas Gleixner * 0 0 10 ms 0 ms - 630 ms 142500462a9SThomas Gleixner * 1 64 80 ms 640 ms - 5110 ms (640ms - ~5s) 143500462a9SThomas Gleixner * 2 128 640 ms 5120 ms - 40950 ms (~5s - ~40s) 144500462a9SThomas Gleixner * 3 192 5120 ms (~5s) 40960 ms - 327670 ms (~40s - ~5m) 145500462a9SThomas Gleixner * 4 256 40960 ms (~40s) 327680 ms - 2621430 ms (~5m - ~43m) 146500462a9SThomas Gleixner * 5 320 327680 ms (~5m) 2621440 ms - 20971510 ms (~43m - ~5h) 147500462a9SThomas Gleixner * 6 384 2621440 ms (~43m) 20971520 ms - 167772150 ms (~5h - ~1d) 148500462a9SThomas Gleixner * 7 448 20971520 ms (~5h) 167772160 ms - 1342177270 ms (~1d - ~15d) 1495cee9645SThomas Gleixner */ 1505cee9645SThomas Gleixner 151500462a9SThomas Gleixner /* Clock divisor for the next level */ 152500462a9SThomas Gleixner #define LVL_CLK_SHIFT 3 153500462a9SThomas Gleixner #define LVL_CLK_DIV (1UL << LVL_CLK_SHIFT) 154500462a9SThomas Gleixner #define LVL_CLK_MASK (LVL_CLK_DIV - 1) 155500462a9SThomas Gleixner #define LVL_SHIFT(n) ((n) * LVL_CLK_SHIFT) 156500462a9SThomas Gleixner #define LVL_GRAN(n) (1UL << LVL_SHIFT(n)) 1575cee9645SThomas Gleixner 158500462a9SThomas Gleixner /* 159500462a9SThomas Gleixner * The time start value for each level to select the bucket at enqueue 16044688972SFrederic Weisbecker * time. We start from the last possible delta of the previous level 16144688972SFrederic Weisbecker * so that we can later add an extra LVL_GRAN(n) to n (see calc_index()). 162500462a9SThomas Gleixner */ 163500462a9SThomas Gleixner #define LVL_START(n) ((LVL_SIZE - 1) << (((n) - 1) * LVL_CLK_SHIFT)) 1645cee9645SThomas Gleixner 165500462a9SThomas Gleixner /* Size of each clock level */ 166500462a9SThomas Gleixner #define LVL_BITS 6 167500462a9SThomas Gleixner #define LVL_SIZE (1UL << LVL_BITS) 168500462a9SThomas Gleixner #define LVL_MASK (LVL_SIZE - 1) 169500462a9SThomas Gleixner #define LVL_OFFS(n) ((n) * LVL_SIZE) 170500462a9SThomas Gleixner 171500462a9SThomas Gleixner /* Level depth */ 172500462a9SThomas Gleixner #if HZ > 100 173500462a9SThomas Gleixner # define LVL_DEPTH 9 174500462a9SThomas Gleixner # else 175500462a9SThomas Gleixner # define LVL_DEPTH 8 176500462a9SThomas Gleixner #endif 177500462a9SThomas Gleixner 178500462a9SThomas Gleixner /* The cutoff (max. capacity of the wheel) */ 179500462a9SThomas Gleixner #define WHEEL_TIMEOUT_CUTOFF (LVL_START(LVL_DEPTH)) 180500462a9SThomas Gleixner #define WHEEL_TIMEOUT_MAX (WHEEL_TIMEOUT_CUTOFF - LVL_GRAN(LVL_DEPTH - 1)) 181500462a9SThomas Gleixner 182500462a9SThomas Gleixner /* 183500462a9SThomas Gleixner * The resulting wheel size. If NOHZ is configured we allocate two 184500462a9SThomas Gleixner * wheels so we have a separate storage for the deferrable timers. 185500462a9SThomas Gleixner */ 186500462a9SThomas Gleixner #define WHEEL_SIZE (LVL_SIZE * LVL_DEPTH) 187500462a9SThomas Gleixner 188500462a9SThomas Gleixner #ifdef CONFIG_NO_HZ_COMMON 189500462a9SThomas Gleixner # define NR_BASES 2 190500462a9SThomas Gleixner # define BASE_STD 0 191500462a9SThomas Gleixner # define BASE_DEF 1 192500462a9SThomas Gleixner #else 193500462a9SThomas Gleixner # define NR_BASES 1 194500462a9SThomas Gleixner # define BASE_STD 0 195500462a9SThomas Gleixner # define BASE_DEF 0 196500462a9SThomas Gleixner #endif 1975cee9645SThomas Gleixner 198494af3edSThomas Gleixner struct timer_base { 1992287d866SSebastian Andrzej Siewior raw_spinlock_t lock; 2005cee9645SThomas Gleixner struct timer_list *running_timer; 201030dcdd1SAnna-Maria Gleixner #ifdef CONFIG_PREEMPT_RT 202030dcdd1SAnna-Maria Gleixner spinlock_t expiry_lock; 203030dcdd1SAnna-Maria Gleixner atomic_t timer_waiters; 204030dcdd1SAnna-Maria Gleixner #endif 205494af3edSThomas Gleixner unsigned long clk; 206a683f390SThomas Gleixner unsigned long next_expiry; 207500462a9SThomas Gleixner unsigned int cpu; 20831cd0e11SFrederic Weisbecker bool next_expiry_recalc; 209a683f390SThomas Gleixner bool is_idle; 210aebacb7fSNicolas Saenz Julienne bool timers_pending; 211500462a9SThomas Gleixner DECLARE_BITMAP(pending_map, WHEEL_SIZE); 212500462a9SThomas Gleixner struct hlist_head vectors[WHEEL_SIZE]; 2135cee9645SThomas Gleixner } ____cacheline_aligned; 2145cee9645SThomas Gleixner 215500462a9SThomas Gleixner static DEFINE_PER_CPU(struct timer_base, timer_bases[NR_BASES]); 2165cee9645SThomas Gleixner 217ae67badaSThomas Gleixner #ifdef CONFIG_NO_HZ_COMMON 218ae67badaSThomas Gleixner 21914c80341SAnna-Maria Gleixner static DEFINE_STATIC_KEY_FALSE(timers_nohz_active); 220ae67badaSThomas Gleixner static DEFINE_MUTEX(timer_keys_mutex); 221ae67badaSThomas Gleixner 222ae67badaSThomas Gleixner static void timer_update_keys(struct work_struct *work); 223ae67badaSThomas Gleixner static DECLARE_WORK(timer_update_work, timer_update_keys); 224ae67badaSThomas Gleixner 225ae67badaSThomas Gleixner #ifdef CONFIG_SMP 226bc7a34b8SThomas Gleixner unsigned int sysctl_timer_migration = 1; 227bc7a34b8SThomas Gleixner 228ae67badaSThomas Gleixner DEFINE_STATIC_KEY_FALSE(timers_migration_enabled); 229ae67badaSThomas Gleixner 230ae67badaSThomas Gleixner static void timers_update_migration(void) 231bc7a34b8SThomas Gleixner { 232ae67badaSThomas Gleixner if (sysctl_timer_migration && tick_nohz_active) 233ae67badaSThomas Gleixner static_branch_enable(&timers_migration_enabled); 234ae67badaSThomas Gleixner else 235ae67badaSThomas Gleixner static_branch_disable(&timers_migration_enabled); 236bc7a34b8SThomas Gleixner } 237ae67badaSThomas Gleixner #else 238ae67badaSThomas Gleixner static inline void timers_update_migration(void) { } 239ae67badaSThomas Gleixner #endif /* !CONFIG_SMP */ 240ae67badaSThomas Gleixner 241ae67badaSThomas Gleixner static void timer_update_keys(struct work_struct *work) 242ae67badaSThomas Gleixner { 243ae67badaSThomas Gleixner mutex_lock(&timer_keys_mutex); 244ae67badaSThomas Gleixner timers_update_migration(); 245ae67badaSThomas Gleixner static_branch_enable(&timers_nohz_active); 246ae67badaSThomas Gleixner mutex_unlock(&timer_keys_mutex); 247ae67badaSThomas Gleixner } 248ae67badaSThomas Gleixner 249ae67badaSThomas Gleixner void timers_update_nohz(void) 250ae67badaSThomas Gleixner { 251ae67badaSThomas Gleixner schedule_work(&timer_update_work); 252bc7a34b8SThomas Gleixner } 253bc7a34b8SThomas Gleixner 254bc7a34b8SThomas Gleixner int timer_migration_handler(struct ctl_table *table, int write, 25532927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 256bc7a34b8SThomas Gleixner { 257bc7a34b8SThomas Gleixner int ret; 258bc7a34b8SThomas Gleixner 259ae67badaSThomas Gleixner mutex_lock(&timer_keys_mutex); 260b94bf594SMyungho Jung ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 261bc7a34b8SThomas Gleixner if (!ret && write) 262ae67badaSThomas Gleixner timers_update_migration(); 263ae67badaSThomas Gleixner mutex_unlock(&timer_keys_mutex); 264bc7a34b8SThomas Gleixner return ret; 265bc7a34b8SThomas Gleixner } 26614c80341SAnna-Maria Gleixner 26714c80341SAnna-Maria Gleixner static inline bool is_timers_nohz_active(void) 26814c80341SAnna-Maria Gleixner { 26914c80341SAnna-Maria Gleixner return static_branch_unlikely(&timers_nohz_active); 27014c80341SAnna-Maria Gleixner } 27114c80341SAnna-Maria Gleixner #else 27214c80341SAnna-Maria Gleixner static inline bool is_timers_nohz_active(void) { return false; } 273ae67badaSThomas Gleixner #endif /* NO_HZ_COMMON */ 274bc7a34b8SThomas Gleixner 2755cee9645SThomas Gleixner static unsigned long round_jiffies_common(unsigned long j, int cpu, 2765cee9645SThomas Gleixner bool force_up) 2775cee9645SThomas Gleixner { 2785cee9645SThomas Gleixner int rem; 2795cee9645SThomas Gleixner unsigned long original = j; 2805cee9645SThomas Gleixner 2815cee9645SThomas Gleixner /* 2825cee9645SThomas Gleixner * We don't want all cpus firing their timers at once hitting the 2835cee9645SThomas Gleixner * same lock or cachelines, so we skew each extra cpu with an extra 2845cee9645SThomas Gleixner * 3 jiffies. This 3 jiffies came originally from the mm/ code which 2855cee9645SThomas Gleixner * already did this. 2865cee9645SThomas Gleixner * The skew is done by adding 3*cpunr, then round, then subtract this 2875cee9645SThomas Gleixner * extra offset again. 2885cee9645SThomas Gleixner */ 2895cee9645SThomas Gleixner j += cpu * 3; 2905cee9645SThomas Gleixner 2915cee9645SThomas Gleixner rem = j % HZ; 2925cee9645SThomas Gleixner 2935cee9645SThomas Gleixner /* 2945cee9645SThomas Gleixner * If the target jiffie is just after a whole second (which can happen 2955cee9645SThomas Gleixner * due to delays of the timer irq, long irq off times etc etc) then 2965cee9645SThomas Gleixner * we should round down to the whole second, not up. Use 1/4th second 2975cee9645SThomas Gleixner * as cutoff for this rounding as an extreme upper bound for this. 2985cee9645SThomas Gleixner * But never round down if @force_up is set. 2995cee9645SThomas Gleixner */ 3005cee9645SThomas Gleixner if (rem < HZ/4 && !force_up) /* round down */ 3015cee9645SThomas Gleixner j = j - rem; 3025cee9645SThomas Gleixner else /* round up */ 3035cee9645SThomas Gleixner j = j - rem + HZ; 3045cee9645SThomas Gleixner 3055cee9645SThomas Gleixner /* now that we have rounded, subtract the extra skew again */ 3065cee9645SThomas Gleixner j -= cpu * 3; 3075cee9645SThomas Gleixner 3085cee9645SThomas Gleixner /* 3095cee9645SThomas Gleixner * Make sure j is still in the future. Otherwise return the 3105cee9645SThomas Gleixner * unmodified value. 3115cee9645SThomas Gleixner */ 3125cee9645SThomas Gleixner return time_is_after_jiffies(j) ? j : original; 3135cee9645SThomas Gleixner } 3145cee9645SThomas Gleixner 3155cee9645SThomas Gleixner /** 3165cee9645SThomas Gleixner * __round_jiffies - function to round jiffies to a full second 3175cee9645SThomas Gleixner * @j: the time in (absolute) jiffies that should be rounded 3185cee9645SThomas Gleixner * @cpu: the processor number on which the timeout will happen 3195cee9645SThomas Gleixner * 3205cee9645SThomas Gleixner * __round_jiffies() rounds an absolute time in the future (in jiffies) 3215cee9645SThomas Gleixner * up or down to (approximately) full seconds. This is useful for timers 3225cee9645SThomas Gleixner * for which the exact time they fire does not matter too much, as long as 3235cee9645SThomas Gleixner * they fire approximately every X seconds. 3245cee9645SThomas Gleixner * 3255cee9645SThomas Gleixner * By rounding these timers to whole seconds, all such timers will fire 3265cee9645SThomas Gleixner * at the same time, rather than at various times spread out. The goal 3275cee9645SThomas Gleixner * of this is to have the CPU wake up less, which saves power. 3285cee9645SThomas Gleixner * 3295cee9645SThomas Gleixner * The exact rounding is skewed for each processor to avoid all 3305cee9645SThomas Gleixner * processors firing at the exact same time, which could lead 3315cee9645SThomas Gleixner * to lock contention or spurious cache line bouncing. 3325cee9645SThomas Gleixner * 3335cee9645SThomas Gleixner * The return value is the rounded version of the @j parameter. 3345cee9645SThomas Gleixner */ 3355cee9645SThomas Gleixner unsigned long __round_jiffies(unsigned long j, int cpu) 3365cee9645SThomas Gleixner { 3375cee9645SThomas Gleixner return round_jiffies_common(j, cpu, false); 3385cee9645SThomas Gleixner } 3395cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(__round_jiffies); 3405cee9645SThomas Gleixner 3415cee9645SThomas Gleixner /** 3425cee9645SThomas Gleixner * __round_jiffies_relative - function to round jiffies to a full second 3435cee9645SThomas Gleixner * @j: the time in (relative) jiffies that should be rounded 3445cee9645SThomas Gleixner * @cpu: the processor number on which the timeout will happen 3455cee9645SThomas Gleixner * 3465cee9645SThomas Gleixner * __round_jiffies_relative() rounds a time delta in the future (in jiffies) 3475cee9645SThomas Gleixner * up or down to (approximately) full seconds. This is useful for timers 3485cee9645SThomas Gleixner * for which the exact time they fire does not matter too much, as long as 3495cee9645SThomas Gleixner * they fire approximately every X seconds. 3505cee9645SThomas Gleixner * 3515cee9645SThomas Gleixner * By rounding these timers to whole seconds, all such timers will fire 3525cee9645SThomas Gleixner * at the same time, rather than at various times spread out. The goal 3535cee9645SThomas Gleixner * of this is to have the CPU wake up less, which saves power. 3545cee9645SThomas Gleixner * 3555cee9645SThomas Gleixner * The exact rounding is skewed for each processor to avoid all 3565cee9645SThomas Gleixner * processors firing at the exact same time, which could lead 3575cee9645SThomas Gleixner * to lock contention or spurious cache line bouncing. 3585cee9645SThomas Gleixner * 3595cee9645SThomas Gleixner * The return value is the rounded version of the @j parameter. 3605cee9645SThomas Gleixner */ 3615cee9645SThomas Gleixner unsigned long __round_jiffies_relative(unsigned long j, int cpu) 3625cee9645SThomas Gleixner { 3635cee9645SThomas Gleixner unsigned long j0 = jiffies; 3645cee9645SThomas Gleixner 3655cee9645SThomas Gleixner /* Use j0 because jiffies might change while we run */ 3665cee9645SThomas Gleixner return round_jiffies_common(j + j0, cpu, false) - j0; 3675cee9645SThomas Gleixner } 3685cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(__round_jiffies_relative); 3695cee9645SThomas Gleixner 3705cee9645SThomas Gleixner /** 3715cee9645SThomas Gleixner * round_jiffies - function to round jiffies to a full second 3725cee9645SThomas Gleixner * @j: the time in (absolute) jiffies that should be rounded 3735cee9645SThomas Gleixner * 3745cee9645SThomas Gleixner * round_jiffies() rounds an absolute time in the future (in jiffies) 3755cee9645SThomas Gleixner * up or down to (approximately) full seconds. This is useful for timers 3765cee9645SThomas Gleixner * for which the exact time they fire does not matter too much, as long as 3775cee9645SThomas Gleixner * they fire approximately every X seconds. 3785cee9645SThomas Gleixner * 3795cee9645SThomas Gleixner * By rounding these timers to whole seconds, all such timers will fire 3805cee9645SThomas Gleixner * at the same time, rather than at various times spread out. The goal 3815cee9645SThomas Gleixner * of this is to have the CPU wake up less, which saves power. 3825cee9645SThomas Gleixner * 3835cee9645SThomas Gleixner * The return value is the rounded version of the @j parameter. 3845cee9645SThomas Gleixner */ 3855cee9645SThomas Gleixner unsigned long round_jiffies(unsigned long j) 3865cee9645SThomas Gleixner { 3875cee9645SThomas Gleixner return round_jiffies_common(j, raw_smp_processor_id(), false); 3885cee9645SThomas Gleixner } 3895cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(round_jiffies); 3905cee9645SThomas Gleixner 3915cee9645SThomas Gleixner /** 3925cee9645SThomas Gleixner * round_jiffies_relative - function to round jiffies to a full second 3935cee9645SThomas Gleixner * @j: the time in (relative) jiffies that should be rounded 3945cee9645SThomas Gleixner * 3955cee9645SThomas Gleixner * round_jiffies_relative() rounds a time delta in the future (in jiffies) 3965cee9645SThomas Gleixner * up or down to (approximately) full seconds. This is useful for timers 3975cee9645SThomas Gleixner * for which the exact time they fire does not matter too much, as long as 3985cee9645SThomas Gleixner * they fire approximately every X seconds. 3995cee9645SThomas Gleixner * 4005cee9645SThomas Gleixner * By rounding these timers to whole seconds, all such timers will fire 4015cee9645SThomas Gleixner * at the same time, rather than at various times spread out. The goal 4025cee9645SThomas Gleixner * of this is to have the CPU wake up less, which saves power. 4035cee9645SThomas Gleixner * 4045cee9645SThomas Gleixner * The return value is the rounded version of the @j parameter. 4055cee9645SThomas Gleixner */ 4065cee9645SThomas Gleixner unsigned long round_jiffies_relative(unsigned long j) 4075cee9645SThomas Gleixner { 4085cee9645SThomas Gleixner return __round_jiffies_relative(j, raw_smp_processor_id()); 4095cee9645SThomas Gleixner } 4105cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(round_jiffies_relative); 4115cee9645SThomas Gleixner 4125cee9645SThomas Gleixner /** 4135cee9645SThomas Gleixner * __round_jiffies_up - function to round jiffies up to a full second 4145cee9645SThomas Gleixner * @j: the time in (absolute) jiffies that should be rounded 4155cee9645SThomas Gleixner * @cpu: the processor number on which the timeout will happen 4165cee9645SThomas Gleixner * 4175cee9645SThomas Gleixner * This is the same as __round_jiffies() except that it will never 4185cee9645SThomas Gleixner * round down. This is useful for timeouts for which the exact time 4195cee9645SThomas Gleixner * of firing does not matter too much, as long as they don't fire too 4205cee9645SThomas Gleixner * early. 4215cee9645SThomas Gleixner */ 4225cee9645SThomas Gleixner unsigned long __round_jiffies_up(unsigned long j, int cpu) 4235cee9645SThomas Gleixner { 4245cee9645SThomas Gleixner return round_jiffies_common(j, cpu, true); 4255cee9645SThomas Gleixner } 4265cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(__round_jiffies_up); 4275cee9645SThomas Gleixner 4285cee9645SThomas Gleixner /** 4295cee9645SThomas Gleixner * __round_jiffies_up_relative - function to round jiffies up to a full second 4305cee9645SThomas Gleixner * @j: the time in (relative) jiffies that should be rounded 4315cee9645SThomas Gleixner * @cpu: the processor number on which the timeout will happen 4325cee9645SThomas Gleixner * 4335cee9645SThomas Gleixner * This is the same as __round_jiffies_relative() except that it will never 4345cee9645SThomas Gleixner * round down. This is useful for timeouts for which the exact time 4355cee9645SThomas Gleixner * of firing does not matter too much, as long as they don't fire too 4365cee9645SThomas Gleixner * early. 4375cee9645SThomas Gleixner */ 4385cee9645SThomas Gleixner unsigned long __round_jiffies_up_relative(unsigned long j, int cpu) 4395cee9645SThomas Gleixner { 4405cee9645SThomas Gleixner unsigned long j0 = jiffies; 4415cee9645SThomas Gleixner 4425cee9645SThomas Gleixner /* Use j0 because jiffies might change while we run */ 4435cee9645SThomas Gleixner return round_jiffies_common(j + j0, cpu, true) - j0; 4445cee9645SThomas Gleixner } 4455cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(__round_jiffies_up_relative); 4465cee9645SThomas Gleixner 4475cee9645SThomas Gleixner /** 4485cee9645SThomas Gleixner * round_jiffies_up - function to round jiffies up to a full second 4495cee9645SThomas Gleixner * @j: the time in (absolute) jiffies that should be rounded 4505cee9645SThomas Gleixner * 4515cee9645SThomas Gleixner * This is the same as round_jiffies() except that it will never 4525cee9645SThomas Gleixner * round down. This is useful for timeouts for which the exact time 4535cee9645SThomas Gleixner * of firing does not matter too much, as long as they don't fire too 4545cee9645SThomas Gleixner * early. 4555cee9645SThomas Gleixner */ 4565cee9645SThomas Gleixner unsigned long round_jiffies_up(unsigned long j) 4575cee9645SThomas Gleixner { 4585cee9645SThomas Gleixner return round_jiffies_common(j, raw_smp_processor_id(), true); 4595cee9645SThomas Gleixner } 4605cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(round_jiffies_up); 4615cee9645SThomas Gleixner 4625cee9645SThomas Gleixner /** 4635cee9645SThomas Gleixner * round_jiffies_up_relative - function to round jiffies up to a full second 4645cee9645SThomas Gleixner * @j: the time in (relative) jiffies that should be rounded 4655cee9645SThomas Gleixner * 4665cee9645SThomas Gleixner * This is the same as round_jiffies_relative() except that it will never 4675cee9645SThomas Gleixner * round down. This is useful for timeouts for which the exact time 4685cee9645SThomas Gleixner * of firing does not matter too much, as long as they don't fire too 4695cee9645SThomas Gleixner * early. 4705cee9645SThomas Gleixner */ 4715cee9645SThomas Gleixner unsigned long round_jiffies_up_relative(unsigned long j) 4725cee9645SThomas Gleixner { 4735cee9645SThomas Gleixner return __round_jiffies_up_relative(j, raw_smp_processor_id()); 4745cee9645SThomas Gleixner } 4755cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(round_jiffies_up_relative); 4765cee9645SThomas Gleixner 4775cee9645SThomas Gleixner 478500462a9SThomas Gleixner static inline unsigned int timer_get_idx(struct timer_list *timer) 4795cee9645SThomas Gleixner { 480500462a9SThomas Gleixner return (timer->flags & TIMER_ARRAYMASK) >> TIMER_ARRAYSHIFT; 4815cee9645SThomas Gleixner } 482500462a9SThomas Gleixner 483500462a9SThomas Gleixner static inline void timer_set_idx(struct timer_list *timer, unsigned int idx) 484500462a9SThomas Gleixner { 485500462a9SThomas Gleixner timer->flags = (timer->flags & ~TIMER_ARRAYMASK) | 486500462a9SThomas Gleixner idx << TIMER_ARRAYSHIFT; 487500462a9SThomas Gleixner } 488500462a9SThomas Gleixner 489500462a9SThomas Gleixner /* 490500462a9SThomas Gleixner * Helper function to calculate the array index for a given expiry 491500462a9SThomas Gleixner * time. 492500462a9SThomas Gleixner */ 4931f32cab0SAnna-Maria Behnsen static inline unsigned calc_index(unsigned long expires, unsigned lvl, 4941f32cab0SAnna-Maria Behnsen unsigned long *bucket_expiry) 495500462a9SThomas Gleixner { 49644688972SFrederic Weisbecker 49744688972SFrederic Weisbecker /* 49844688972SFrederic Weisbecker * The timer wheel has to guarantee that a timer does not fire 49944688972SFrederic Weisbecker * early. Early expiry can happen due to: 50044688972SFrederic Weisbecker * - Timer is armed at the edge of a tick 50144688972SFrederic Weisbecker * - Truncation of the expiry time in the outer wheel levels 50244688972SFrederic Weisbecker * 50344688972SFrederic Weisbecker * Round up with level granularity to prevent this. 50444688972SFrederic Weisbecker */ 505500462a9SThomas Gleixner expires = (expires + LVL_GRAN(lvl)) >> LVL_SHIFT(lvl); 5061f32cab0SAnna-Maria Behnsen *bucket_expiry = expires << LVL_SHIFT(lvl); 507500462a9SThomas Gleixner return LVL_OFFS(lvl) + (expires & LVL_MASK); 508500462a9SThomas Gleixner } 509500462a9SThomas Gleixner 5101f32cab0SAnna-Maria Behnsen static int calc_wheel_index(unsigned long expires, unsigned long clk, 5111f32cab0SAnna-Maria Behnsen unsigned long *bucket_expiry) 5125cee9645SThomas Gleixner { 513ffdf0477SAnna-Maria Gleixner unsigned long delta = expires - clk; 514500462a9SThomas Gleixner unsigned int idx; 5155cee9645SThomas Gleixner 516500462a9SThomas Gleixner if (delta < LVL_START(1)) { 5171f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 0, bucket_expiry); 518500462a9SThomas Gleixner } else if (delta < LVL_START(2)) { 5191f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 1, bucket_expiry); 520500462a9SThomas Gleixner } else if (delta < LVL_START(3)) { 5211f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 2, bucket_expiry); 522500462a9SThomas Gleixner } else if (delta < LVL_START(4)) { 5231f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 3, bucket_expiry); 524500462a9SThomas Gleixner } else if (delta < LVL_START(5)) { 5251f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 4, bucket_expiry); 526500462a9SThomas Gleixner } else if (delta < LVL_START(6)) { 5271f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 5, bucket_expiry); 528500462a9SThomas Gleixner } else if (delta < LVL_START(7)) { 5291f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 6, bucket_expiry); 530500462a9SThomas Gleixner } else if (LVL_DEPTH > 8 && delta < LVL_START(8)) { 5311f32cab0SAnna-Maria Behnsen idx = calc_index(expires, 7, bucket_expiry); 532500462a9SThomas Gleixner } else if ((long) delta < 0) { 533ffdf0477SAnna-Maria Gleixner idx = clk & LVL_MASK; 5341f32cab0SAnna-Maria Behnsen *bucket_expiry = clk; 5355cee9645SThomas Gleixner } else { 536500462a9SThomas Gleixner /* 537500462a9SThomas Gleixner * Force expire obscene large timeouts to expire at the 538500462a9SThomas Gleixner * capacity limit of the wheel. 5395cee9645SThomas Gleixner */ 540e2a71bdeSFrederic Weisbecker if (delta >= WHEEL_TIMEOUT_CUTOFF) 541e2a71bdeSFrederic Weisbecker expires = clk + WHEEL_TIMEOUT_MAX; 5421bd04bf6SThomas Gleixner 5431f32cab0SAnna-Maria Behnsen idx = calc_index(expires, LVL_DEPTH - 1, bucket_expiry); 544500462a9SThomas Gleixner } 545ffdf0477SAnna-Maria Gleixner return idx; 546ffdf0477SAnna-Maria Gleixner } 547ffdf0477SAnna-Maria Gleixner 548ffdf0477SAnna-Maria Gleixner static void 549ffdf0477SAnna-Maria Gleixner trigger_dyntick_cpu(struct timer_base *base, struct timer_list *timer) 5505cee9645SThomas Gleixner { 551ae67badaSThomas Gleixner if (!is_timers_nohz_active()) 552a683f390SThomas Gleixner return; 5533bb475a3SThomas Gleixner 5545cee9645SThomas Gleixner /* 555a683f390SThomas Gleixner * TODO: This wants some optimizing similar to the code below, but we 556a683f390SThomas Gleixner * will do that when we switch from push to pull for deferrable timers. 5575cee9645SThomas Gleixner */ 558a683f390SThomas Gleixner if (timer->flags & TIMER_DEFERRABLE) { 559a683f390SThomas Gleixner if (tick_nohz_full_cpu(base->cpu)) 5609f6d9baaSViresh Kumar wake_up_nohz_cpu(base->cpu); 561a683f390SThomas Gleixner return; 5625cee9645SThomas Gleixner } 5635cee9645SThomas Gleixner 5645cee9645SThomas Gleixner /* 565a683f390SThomas Gleixner * We might have to IPI the remote CPU if the base is idle and the 566a683f390SThomas Gleixner * timer is not deferrable. If the other CPU is on the way to idle 567a683f390SThomas Gleixner * then it can't set base->is_idle as we hold the base lock: 5685cee9645SThomas Gleixner */ 569dc2a0f1fSFrederic Weisbecker if (base->is_idle) 5705cee9645SThomas Gleixner wake_up_nohz_cpu(base->cpu); 5715cee9645SThomas Gleixner } 5725cee9645SThomas Gleixner 5739a2b764bSFrederic Weisbecker /* 5749a2b764bSFrederic Weisbecker * Enqueue the timer into the hash bucket, mark it pending in 5759a2b764bSFrederic Weisbecker * the bitmap, store the index in the timer flags then wake up 5769a2b764bSFrederic Weisbecker * the target CPU if needed. 5779a2b764bSFrederic Weisbecker */ 5789a2b764bSFrederic Weisbecker static void enqueue_timer(struct timer_base *base, struct timer_list *timer, 5799a2b764bSFrederic Weisbecker unsigned int idx, unsigned long bucket_expiry) 580ffdf0477SAnna-Maria Gleixner { 581dc2a0f1fSFrederic Weisbecker 5829a2b764bSFrederic Weisbecker hlist_add_head(&timer->entry, base->vectors + idx); 5839a2b764bSFrederic Weisbecker __set_bit(idx, base->pending_map); 5849a2b764bSFrederic Weisbecker timer_set_idx(timer, idx); 5859a2b764bSFrederic Weisbecker 5869a2b764bSFrederic Weisbecker trace_timer_start(timer, timer->expires, timer->flags); 587dc2a0f1fSFrederic Weisbecker 588dc2a0f1fSFrederic Weisbecker /* 589dc2a0f1fSFrederic Weisbecker * Check whether this is the new first expiring timer. The 590dc2a0f1fSFrederic Weisbecker * effective expiry time of the timer is required here 591dc2a0f1fSFrederic Weisbecker * (bucket_expiry) instead of timer->expires. 592dc2a0f1fSFrederic Weisbecker */ 593dc2a0f1fSFrederic Weisbecker if (time_before(bucket_expiry, base->next_expiry)) { 594dc2a0f1fSFrederic Weisbecker /* 595dc2a0f1fSFrederic Weisbecker * Set the next expiry time and kick the CPU so it 596dc2a0f1fSFrederic Weisbecker * can reevaluate the wheel: 597dc2a0f1fSFrederic Weisbecker */ 598dc2a0f1fSFrederic Weisbecker base->next_expiry = bucket_expiry; 599aebacb7fSNicolas Saenz Julienne base->timers_pending = true; 60031cd0e11SFrederic Weisbecker base->next_expiry_recalc = false; 601ffdf0477SAnna-Maria Gleixner trigger_dyntick_cpu(base, timer); 6025cee9645SThomas Gleixner } 6039a2b764bSFrederic Weisbecker } 6049a2b764bSFrederic Weisbecker 6059a2b764bSFrederic Weisbecker static void internal_add_timer(struct timer_base *base, struct timer_list *timer) 6065cee9645SThomas Gleixner { 6071f32cab0SAnna-Maria Behnsen unsigned long bucket_expiry; 6089a2b764bSFrederic Weisbecker unsigned int idx; 6091f32cab0SAnna-Maria Behnsen 6109a2b764bSFrederic Weisbecker idx = calc_wheel_index(timer->expires, base->clk, &bucket_expiry); 6119a2b764bSFrederic Weisbecker enqueue_timer(base, timer, idx, bucket_expiry); 6125cee9645SThomas Gleixner } 6135cee9645SThomas Gleixner 6145cee9645SThomas Gleixner #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 6155cee9645SThomas Gleixner 616f9e62f31SStephen Boyd static const struct debug_obj_descr timer_debug_descr; 6175cee9645SThomas Gleixner 6185cee9645SThomas Gleixner static void *timer_debug_hint(void *addr) 6195cee9645SThomas Gleixner { 6205cee9645SThomas Gleixner return ((struct timer_list *) addr)->function; 6215cee9645SThomas Gleixner } 6225cee9645SThomas Gleixner 623b9fdac7fSDu, Changbin static bool timer_is_static_object(void *addr) 624b9fdac7fSDu, Changbin { 625b9fdac7fSDu, Changbin struct timer_list *timer = addr; 626b9fdac7fSDu, Changbin 627b9fdac7fSDu, Changbin return (timer->entry.pprev == NULL && 628b9fdac7fSDu, Changbin timer->entry.next == TIMER_ENTRY_STATIC); 629b9fdac7fSDu, Changbin } 630b9fdac7fSDu, Changbin 6315cee9645SThomas Gleixner /* 6325cee9645SThomas Gleixner * fixup_init is called when: 6335cee9645SThomas Gleixner * - an active object is initialized 6345cee9645SThomas Gleixner */ 635e3252464SDu, Changbin static bool timer_fixup_init(void *addr, enum debug_obj_state state) 6365cee9645SThomas Gleixner { 6375cee9645SThomas Gleixner struct timer_list *timer = addr; 6385cee9645SThomas Gleixner 6395cee9645SThomas Gleixner switch (state) { 6405cee9645SThomas Gleixner case ODEBUG_STATE_ACTIVE: 6415cee9645SThomas Gleixner del_timer_sync(timer); 6425cee9645SThomas Gleixner debug_object_init(timer, &timer_debug_descr); 643e3252464SDu, Changbin return true; 6445cee9645SThomas Gleixner default: 645e3252464SDu, Changbin return false; 6465cee9645SThomas Gleixner } 6475cee9645SThomas Gleixner } 6485cee9645SThomas Gleixner 6495cee9645SThomas Gleixner /* Stub timer callback for improperly used timers. */ 650ba16490eSThomas Gleixner static void stub_timer(struct timer_list *unused) 6515cee9645SThomas Gleixner { 6525cee9645SThomas Gleixner WARN_ON(1); 6535cee9645SThomas Gleixner } 6545cee9645SThomas Gleixner 6555cee9645SThomas Gleixner /* 6565cee9645SThomas Gleixner * fixup_activate is called when: 6575cee9645SThomas Gleixner * - an active object is activated 658b9fdac7fSDu, Changbin * - an unknown non-static object is activated 6595cee9645SThomas Gleixner */ 660e3252464SDu, Changbin static bool timer_fixup_activate(void *addr, enum debug_obj_state state) 6615cee9645SThomas Gleixner { 6625cee9645SThomas Gleixner struct timer_list *timer = addr; 6635cee9645SThomas Gleixner 6645cee9645SThomas Gleixner switch (state) { 6655cee9645SThomas Gleixner case ODEBUG_STATE_NOTAVAILABLE: 666ba16490eSThomas Gleixner timer_setup(timer, stub_timer, 0); 667e3252464SDu, Changbin return true; 6685cee9645SThomas Gleixner 6695cee9645SThomas Gleixner case ODEBUG_STATE_ACTIVE: 6705cee9645SThomas Gleixner WARN_ON(1); 671df561f66SGustavo A. R. Silva fallthrough; 6725cee9645SThomas Gleixner default: 673e3252464SDu, Changbin return false; 6745cee9645SThomas Gleixner } 6755cee9645SThomas Gleixner } 6765cee9645SThomas Gleixner 6775cee9645SThomas Gleixner /* 6785cee9645SThomas Gleixner * fixup_free is called when: 6795cee9645SThomas Gleixner * - an active object is freed 6805cee9645SThomas Gleixner */ 681e3252464SDu, Changbin static bool timer_fixup_free(void *addr, enum debug_obj_state state) 6825cee9645SThomas Gleixner { 6835cee9645SThomas Gleixner struct timer_list *timer = addr; 6845cee9645SThomas Gleixner 6855cee9645SThomas Gleixner switch (state) { 6865cee9645SThomas Gleixner case ODEBUG_STATE_ACTIVE: 6875cee9645SThomas Gleixner del_timer_sync(timer); 6885cee9645SThomas Gleixner debug_object_free(timer, &timer_debug_descr); 689e3252464SDu, Changbin return true; 6905cee9645SThomas Gleixner default: 691e3252464SDu, Changbin return false; 6925cee9645SThomas Gleixner } 6935cee9645SThomas Gleixner } 6945cee9645SThomas Gleixner 6955cee9645SThomas Gleixner /* 6965cee9645SThomas Gleixner * fixup_assert_init is called when: 6975cee9645SThomas Gleixner * - an untracked/uninit-ed object is found 6985cee9645SThomas Gleixner */ 699e3252464SDu, Changbin static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state) 7005cee9645SThomas Gleixner { 7015cee9645SThomas Gleixner struct timer_list *timer = addr; 7025cee9645SThomas Gleixner 7035cee9645SThomas Gleixner switch (state) { 7045cee9645SThomas Gleixner case ODEBUG_STATE_NOTAVAILABLE: 705ba16490eSThomas Gleixner timer_setup(timer, stub_timer, 0); 706e3252464SDu, Changbin return true; 7075cee9645SThomas Gleixner default: 708e3252464SDu, Changbin return false; 7095cee9645SThomas Gleixner } 7105cee9645SThomas Gleixner } 7115cee9645SThomas Gleixner 712f9e62f31SStephen Boyd static const struct debug_obj_descr timer_debug_descr = { 7135cee9645SThomas Gleixner .name = "timer_list", 7145cee9645SThomas Gleixner .debug_hint = timer_debug_hint, 715b9fdac7fSDu, Changbin .is_static_object = timer_is_static_object, 7165cee9645SThomas Gleixner .fixup_init = timer_fixup_init, 7175cee9645SThomas Gleixner .fixup_activate = timer_fixup_activate, 7185cee9645SThomas Gleixner .fixup_free = timer_fixup_free, 7195cee9645SThomas Gleixner .fixup_assert_init = timer_fixup_assert_init, 7205cee9645SThomas Gleixner }; 7215cee9645SThomas Gleixner 7225cee9645SThomas Gleixner static inline void debug_timer_init(struct timer_list *timer) 7235cee9645SThomas Gleixner { 7245cee9645SThomas Gleixner debug_object_init(timer, &timer_debug_descr); 7255cee9645SThomas Gleixner } 7265cee9645SThomas Gleixner 7275cee9645SThomas Gleixner static inline void debug_timer_activate(struct timer_list *timer) 7285cee9645SThomas Gleixner { 7295cee9645SThomas Gleixner debug_object_activate(timer, &timer_debug_descr); 7305cee9645SThomas Gleixner } 7315cee9645SThomas Gleixner 7325cee9645SThomas Gleixner static inline void debug_timer_deactivate(struct timer_list *timer) 7335cee9645SThomas Gleixner { 7345cee9645SThomas Gleixner debug_object_deactivate(timer, &timer_debug_descr); 7355cee9645SThomas Gleixner } 7365cee9645SThomas Gleixner 7375cee9645SThomas Gleixner static inline void debug_timer_assert_init(struct timer_list *timer) 7385cee9645SThomas Gleixner { 7395cee9645SThomas Gleixner debug_object_assert_init(timer, &timer_debug_descr); 7405cee9645SThomas Gleixner } 7415cee9645SThomas Gleixner 742188665b2SKees Cook static void do_init_timer(struct timer_list *timer, 743188665b2SKees Cook void (*func)(struct timer_list *), 744188665b2SKees Cook unsigned int flags, 7455cee9645SThomas Gleixner const char *name, struct lock_class_key *key); 7465cee9645SThomas Gleixner 747188665b2SKees Cook void init_timer_on_stack_key(struct timer_list *timer, 748188665b2SKees Cook void (*func)(struct timer_list *), 749188665b2SKees Cook unsigned int flags, 7505cee9645SThomas Gleixner const char *name, struct lock_class_key *key) 7515cee9645SThomas Gleixner { 7525cee9645SThomas Gleixner debug_object_init_on_stack(timer, &timer_debug_descr); 753188665b2SKees Cook do_init_timer(timer, func, flags, name, key); 7545cee9645SThomas Gleixner } 7555cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(init_timer_on_stack_key); 7565cee9645SThomas Gleixner 7575cee9645SThomas Gleixner void destroy_timer_on_stack(struct timer_list *timer) 7585cee9645SThomas Gleixner { 7595cee9645SThomas Gleixner debug_object_free(timer, &timer_debug_descr); 7605cee9645SThomas Gleixner } 7615cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(destroy_timer_on_stack); 7625cee9645SThomas Gleixner 7635cee9645SThomas Gleixner #else 7645cee9645SThomas Gleixner static inline void debug_timer_init(struct timer_list *timer) { } 7655cee9645SThomas Gleixner static inline void debug_timer_activate(struct timer_list *timer) { } 7665cee9645SThomas Gleixner static inline void debug_timer_deactivate(struct timer_list *timer) { } 7675cee9645SThomas Gleixner static inline void debug_timer_assert_init(struct timer_list *timer) { } 7685cee9645SThomas Gleixner #endif 7695cee9645SThomas Gleixner 7705cee9645SThomas Gleixner static inline void debug_init(struct timer_list *timer) 7715cee9645SThomas Gleixner { 7725cee9645SThomas Gleixner debug_timer_init(timer); 7735cee9645SThomas Gleixner trace_timer_init(timer); 7745cee9645SThomas Gleixner } 7755cee9645SThomas Gleixner 7765cee9645SThomas Gleixner static inline void debug_deactivate(struct timer_list *timer) 7775cee9645SThomas Gleixner { 7785cee9645SThomas Gleixner debug_timer_deactivate(timer); 7795cee9645SThomas Gleixner trace_timer_cancel(timer); 7805cee9645SThomas Gleixner } 7815cee9645SThomas Gleixner 7825cee9645SThomas Gleixner static inline void debug_assert_init(struct timer_list *timer) 7835cee9645SThomas Gleixner { 7845cee9645SThomas Gleixner debug_timer_assert_init(timer); 7855cee9645SThomas Gleixner } 7865cee9645SThomas Gleixner 787188665b2SKees Cook static void do_init_timer(struct timer_list *timer, 788188665b2SKees Cook void (*func)(struct timer_list *), 789188665b2SKees Cook unsigned int flags, 7905cee9645SThomas Gleixner const char *name, struct lock_class_key *key) 7915cee9645SThomas Gleixner { 7921dabbcecSThomas Gleixner timer->entry.pprev = NULL; 793188665b2SKees Cook timer->function = func; 794b952caf2SQianli Zhao if (WARN_ON_ONCE(flags & ~TIMER_INIT_FLAGS)) 795b952caf2SQianli Zhao flags &= TIMER_INIT_FLAGS; 7960eeda71bSThomas Gleixner timer->flags = flags | raw_smp_processor_id(); 7975cee9645SThomas Gleixner lockdep_init_map(&timer->lockdep_map, name, key, 0); 7985cee9645SThomas Gleixner } 7995cee9645SThomas Gleixner 8005cee9645SThomas Gleixner /** 8015cee9645SThomas Gleixner * init_timer_key - initialize a timer 8025cee9645SThomas Gleixner * @timer: the timer to be initialized 803188665b2SKees Cook * @func: timer callback function 8045cee9645SThomas Gleixner * @flags: timer flags 8055cee9645SThomas Gleixner * @name: name of the timer 8065cee9645SThomas Gleixner * @key: lockdep class key of the fake lock used for tracking timer 8075cee9645SThomas Gleixner * sync lock dependencies 8085cee9645SThomas Gleixner * 8095cee9645SThomas Gleixner * init_timer_key() must be done to a timer prior calling *any* of the 8105cee9645SThomas Gleixner * other timer functions. 8115cee9645SThomas Gleixner */ 812188665b2SKees Cook void init_timer_key(struct timer_list *timer, 813188665b2SKees Cook void (*func)(struct timer_list *), unsigned int flags, 8145cee9645SThomas Gleixner const char *name, struct lock_class_key *key) 8155cee9645SThomas Gleixner { 8165cee9645SThomas Gleixner debug_init(timer); 817188665b2SKees Cook do_init_timer(timer, func, flags, name, key); 8185cee9645SThomas Gleixner } 8195cee9645SThomas Gleixner EXPORT_SYMBOL(init_timer_key); 8205cee9645SThomas Gleixner 8215cee9645SThomas Gleixner static inline void detach_timer(struct timer_list *timer, bool clear_pending) 8225cee9645SThomas Gleixner { 8231dabbcecSThomas Gleixner struct hlist_node *entry = &timer->entry; 8245cee9645SThomas Gleixner 8255cee9645SThomas Gleixner debug_deactivate(timer); 8265cee9645SThomas Gleixner 8271dabbcecSThomas Gleixner __hlist_del(entry); 8285cee9645SThomas Gleixner if (clear_pending) 8291dabbcecSThomas Gleixner entry->pprev = NULL; 8301dabbcecSThomas Gleixner entry->next = LIST_POISON2; 8315cee9645SThomas Gleixner } 8325cee9645SThomas Gleixner 833494af3edSThomas Gleixner static int detach_if_pending(struct timer_list *timer, struct timer_base *base, 8345cee9645SThomas Gleixner bool clear_pending) 8355cee9645SThomas Gleixner { 836500462a9SThomas Gleixner unsigned idx = timer_get_idx(timer); 837500462a9SThomas Gleixner 8385cee9645SThomas Gleixner if (!timer_pending(timer)) 8395cee9645SThomas Gleixner return 0; 8405cee9645SThomas Gleixner 84131cd0e11SFrederic Weisbecker if (hlist_is_singular_node(&timer->entry, base->vectors + idx)) { 842500462a9SThomas Gleixner __clear_bit(idx, base->pending_map); 84331cd0e11SFrederic Weisbecker base->next_expiry_recalc = true; 84431cd0e11SFrederic Weisbecker } 845500462a9SThomas Gleixner 8465cee9645SThomas Gleixner detach_timer(timer, clear_pending); 8475cee9645SThomas Gleixner return 1; 8485cee9645SThomas Gleixner } 8495cee9645SThomas Gleixner 850500462a9SThomas Gleixner static inline struct timer_base *get_timer_cpu_base(u32 tflags, u32 cpu) 851500462a9SThomas Gleixner { 852500462a9SThomas Gleixner struct timer_base *base = per_cpu_ptr(&timer_bases[BASE_STD], cpu); 853500462a9SThomas Gleixner 8545cee9645SThomas Gleixner /* 855ced6d5c1SAnna-Maria Gleixner * If the timer is deferrable and NO_HZ_COMMON is set then we need 856ced6d5c1SAnna-Maria Gleixner * to use the deferrable base. 857500462a9SThomas Gleixner */ 858ced6d5c1SAnna-Maria Gleixner if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) 859500462a9SThomas Gleixner base = per_cpu_ptr(&timer_bases[BASE_DEF], cpu); 860500462a9SThomas Gleixner return base; 861500462a9SThomas Gleixner } 862500462a9SThomas Gleixner 863500462a9SThomas Gleixner static inline struct timer_base *get_timer_this_cpu_base(u32 tflags) 864500462a9SThomas Gleixner { 865500462a9SThomas Gleixner struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 866500462a9SThomas Gleixner 867500462a9SThomas Gleixner /* 868ced6d5c1SAnna-Maria Gleixner * If the timer is deferrable and NO_HZ_COMMON is set then we need 869ced6d5c1SAnna-Maria Gleixner * to use the deferrable base. 870500462a9SThomas Gleixner */ 871ced6d5c1SAnna-Maria Gleixner if (IS_ENABLED(CONFIG_NO_HZ_COMMON) && (tflags & TIMER_DEFERRABLE)) 872500462a9SThomas Gleixner base = this_cpu_ptr(&timer_bases[BASE_DEF]); 873500462a9SThomas Gleixner return base; 874500462a9SThomas Gleixner } 875500462a9SThomas Gleixner 876500462a9SThomas Gleixner static inline struct timer_base *get_timer_base(u32 tflags) 877500462a9SThomas Gleixner { 878500462a9SThomas Gleixner return get_timer_cpu_base(tflags, tflags & TIMER_CPUMASK); 879500462a9SThomas Gleixner } 880500462a9SThomas Gleixner 881a683f390SThomas Gleixner static inline struct timer_base * 8826bad6bccSThomas Gleixner get_target_base(struct timer_base *base, unsigned tflags) 883500462a9SThomas Gleixner { 884ae67badaSThomas Gleixner #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 885ae67badaSThomas Gleixner if (static_branch_likely(&timers_migration_enabled) && 886ae67badaSThomas Gleixner !(tflags & TIMER_PINNED)) 887500462a9SThomas Gleixner return get_timer_cpu_base(tflags, get_nohz_timer_target()); 888500462a9SThomas Gleixner #endif 889ae67badaSThomas Gleixner return get_timer_this_cpu_base(tflags); 890500462a9SThomas Gleixner } 891500462a9SThomas Gleixner 892a683f390SThomas Gleixner static inline void forward_timer_base(struct timer_base *base) 893a683f390SThomas Gleixner { 8940975fb56SFrederic Weisbecker unsigned long jnow = READ_ONCE(jiffies); 8956bad6bccSThomas Gleixner 896a683f390SThomas Gleixner /* 8970975fb56SFrederic Weisbecker * No need to forward if we are close enough below jiffies. 8980975fb56SFrederic Weisbecker * Also while executing timers, base->clk is 1 offset ahead 8994bf07f65SIngo Molnar * of jiffies to avoid endless requeuing to current jiffies. 900a683f390SThomas Gleixner */ 90136cd28a4SFrederic Weisbecker if ((long)(jnow - base->clk) < 1) 902a683f390SThomas Gleixner return; 903a683f390SThomas Gleixner 904a683f390SThomas Gleixner /* 905a683f390SThomas Gleixner * If the next expiry value is > jiffies, then we fast forward to 906a683f390SThomas Gleixner * jiffies otherwise we forward to the next expiry value. 907a683f390SThomas Gleixner */ 90830c66fc3SFrederic Weisbecker if (time_after(base->next_expiry, jnow)) { 9096bad6bccSThomas Gleixner base->clk = jnow; 91030c66fc3SFrederic Weisbecker } else { 91130c66fc3SFrederic Weisbecker if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk))) 91230c66fc3SFrederic Weisbecker return; 913a683f390SThomas Gleixner base->clk = base->next_expiry; 91430c66fc3SFrederic Weisbecker } 915ae67badaSThomas Gleixner } 916a683f390SThomas Gleixner 917a683f390SThomas Gleixner 918500462a9SThomas Gleixner /* 919500462a9SThomas Gleixner * We are using hashed locking: Holding per_cpu(timer_bases[x]).lock means 920500462a9SThomas Gleixner * that all timers which are tied to this base are locked, and the base itself 921500462a9SThomas Gleixner * is locked too. 9225cee9645SThomas Gleixner * 9235cee9645SThomas Gleixner * So __run_timers/migrate_timers can safely modify all timers which could 924500462a9SThomas Gleixner * be found in the base->vectors array. 9255cee9645SThomas Gleixner * 926500462a9SThomas Gleixner * When a timer is migrating then the TIMER_MIGRATING flag is set and we need 927500462a9SThomas Gleixner * to wait until the migration is done. 9285cee9645SThomas Gleixner */ 929494af3edSThomas Gleixner static struct timer_base *lock_timer_base(struct timer_list *timer, 9305cee9645SThomas Gleixner unsigned long *flags) 9315cee9645SThomas Gleixner __acquires(timer->base->lock) 9325cee9645SThomas Gleixner { 9330eeda71bSThomas Gleixner for (;;) { 934494af3edSThomas Gleixner struct timer_base *base; 935b831275aSThomas Gleixner u32 tf; 936b831275aSThomas Gleixner 937b831275aSThomas Gleixner /* 938b831275aSThomas Gleixner * We need to use READ_ONCE() here, otherwise the compiler 939b831275aSThomas Gleixner * might re-read @tf between the check for TIMER_MIGRATING 940b831275aSThomas Gleixner * and spin_lock(). 941b831275aSThomas Gleixner */ 942b831275aSThomas Gleixner tf = READ_ONCE(timer->flags); 9435cee9645SThomas Gleixner 9440eeda71bSThomas Gleixner if (!(tf & TIMER_MIGRATING)) { 945500462a9SThomas Gleixner base = get_timer_base(tf); 9462287d866SSebastian Andrzej Siewior raw_spin_lock_irqsave(&base->lock, *flags); 9470eeda71bSThomas Gleixner if (timer->flags == tf) 9485cee9645SThomas Gleixner return base; 9492287d866SSebastian Andrzej Siewior raw_spin_unlock_irqrestore(&base->lock, *flags); 9505cee9645SThomas Gleixner } 9515cee9645SThomas Gleixner cpu_relax(); 9525cee9645SThomas Gleixner } 9535cee9645SThomas Gleixner } 9545cee9645SThomas Gleixner 955b24591e2SDavid Howells #define MOD_TIMER_PENDING_ONLY 0x01 956b24591e2SDavid Howells #define MOD_TIMER_REDUCE 0x02 95790c01894SEric Dumazet #define MOD_TIMER_NOTPENDING 0x04 958b24591e2SDavid Howells 9595cee9645SThomas Gleixner static inline int 960b24591e2SDavid Howells __mod_timer(struct timer_list *timer, unsigned long expires, unsigned int options) 9615cee9645SThomas Gleixner { 9621f32cab0SAnna-Maria Behnsen unsigned long clk = 0, flags, bucket_expiry; 963494af3edSThomas Gleixner struct timer_base *base, *new_base; 964f00c0afdSAnna-Maria Gleixner unsigned int idx = UINT_MAX; 965bc7a34b8SThomas Gleixner int ret = 0; 9665cee9645SThomas Gleixner 9674da9152aSThomas Gleixner BUG_ON(!timer->function); 9684da9152aSThomas Gleixner 969500462a9SThomas Gleixner /* 970f00c0afdSAnna-Maria Gleixner * This is a common optimization triggered by the networking code - if 971f00c0afdSAnna-Maria Gleixner * the timer is re-modified to have the same timeout or ends up in the 972f00c0afdSAnna-Maria Gleixner * same array bucket then just return: 973500462a9SThomas Gleixner */ 97490c01894SEric Dumazet if (!(options & MOD_TIMER_NOTPENDING) && timer_pending(timer)) { 9752fe59f50SNicholas Piggin /* 9762fe59f50SNicholas Piggin * The downside of this optimization is that it can result in 9772fe59f50SNicholas Piggin * larger granularity than you would get from adding a new 9782fe59f50SNicholas Piggin * timer with this expiry. 9792fe59f50SNicholas Piggin */ 980b24591e2SDavid Howells long diff = timer->expires - expires; 981b24591e2SDavid Howells 982b24591e2SDavid Howells if (!diff) 983b24591e2SDavid Howells return 1; 984b24591e2SDavid Howells if (options & MOD_TIMER_REDUCE && diff <= 0) 985500462a9SThomas Gleixner return 1; 986f00c0afdSAnna-Maria Gleixner 9874da9152aSThomas Gleixner /* 9884da9152aSThomas Gleixner * We lock timer base and calculate the bucket index right 9894da9152aSThomas Gleixner * here. If the timer ends up in the same bucket, then we 9904da9152aSThomas Gleixner * just update the expiry time and avoid the whole 9914da9152aSThomas Gleixner * dequeue/enqueue dance. 9924da9152aSThomas Gleixner */ 9934da9152aSThomas Gleixner base = lock_timer_base(timer, &flags); 9942fe59f50SNicholas Piggin forward_timer_base(base); 9954da9152aSThomas Gleixner 996b24591e2SDavid Howells if (timer_pending(timer) && (options & MOD_TIMER_REDUCE) && 997b24591e2SDavid Howells time_before_eq(timer->expires, expires)) { 998b24591e2SDavid Howells ret = 1; 999b24591e2SDavid Howells goto out_unlock; 1000b24591e2SDavid Howells } 1001b24591e2SDavid Howells 10024da9152aSThomas Gleixner clk = base->clk; 10031f32cab0SAnna-Maria Behnsen idx = calc_wheel_index(expires, clk, &bucket_expiry); 1004f00c0afdSAnna-Maria Gleixner 1005f00c0afdSAnna-Maria Gleixner /* 1006f00c0afdSAnna-Maria Gleixner * Retrieve and compare the array index of the pending 1007f00c0afdSAnna-Maria Gleixner * timer. If it matches set the expiry to the new value so a 1008f00c0afdSAnna-Maria Gleixner * subsequent call will exit in the expires check above. 1009f00c0afdSAnna-Maria Gleixner */ 1010f00c0afdSAnna-Maria Gleixner if (idx == timer_get_idx(timer)) { 1011b24591e2SDavid Howells if (!(options & MOD_TIMER_REDUCE)) 1012b24591e2SDavid Howells timer->expires = expires; 1013b24591e2SDavid Howells else if (time_after(timer->expires, expires)) 1014f00c0afdSAnna-Maria Gleixner timer->expires = expires; 10154da9152aSThomas Gleixner ret = 1; 10164da9152aSThomas Gleixner goto out_unlock; 1017f00c0afdSAnna-Maria Gleixner } 10184da9152aSThomas Gleixner } else { 10194da9152aSThomas Gleixner base = lock_timer_base(timer, &flags); 10202fe59f50SNicholas Piggin forward_timer_base(base); 1021500462a9SThomas Gleixner } 1022500462a9SThomas Gleixner 10235cee9645SThomas Gleixner ret = detach_if_pending(timer, base, false); 1024b24591e2SDavid Howells if (!ret && (options & MOD_TIMER_PENDING_ONLY)) 10255cee9645SThomas Gleixner goto out_unlock; 10265cee9645SThomas Gleixner 1027500462a9SThomas Gleixner new_base = get_target_base(base, timer->flags); 10285cee9645SThomas Gleixner 10295cee9645SThomas Gleixner if (base != new_base) { 10305cee9645SThomas Gleixner /* 1031500462a9SThomas Gleixner * We are trying to schedule the timer on the new base. 10325cee9645SThomas Gleixner * However we can't change timer's base while it is running, 10335cee9645SThomas Gleixner * otherwise del_timer_sync() can't detect that the timer's 1034500462a9SThomas Gleixner * handler yet has not finished. This also guarantees that the 1035500462a9SThomas Gleixner * timer is serialized wrt itself. 10365cee9645SThomas Gleixner */ 10375cee9645SThomas Gleixner if (likely(base->running_timer != timer)) { 10385cee9645SThomas Gleixner /* See the comment in lock_timer_base() */ 10390eeda71bSThomas Gleixner timer->flags |= TIMER_MIGRATING; 10400eeda71bSThomas Gleixner 10412287d866SSebastian Andrzej Siewior raw_spin_unlock(&base->lock); 10425cee9645SThomas Gleixner base = new_base; 10432287d866SSebastian Andrzej Siewior raw_spin_lock(&base->lock); 1044d0023a14SEric Dumazet WRITE_ONCE(timer->flags, 1045d0023a14SEric Dumazet (timer->flags & ~TIMER_BASEMASK) | base->cpu); 10466bad6bccSThomas Gleixner forward_timer_base(base); 10472fe59f50SNicholas Piggin } 10482fe59f50SNicholas Piggin } 10496bad6bccSThomas Gleixner 1050dc1e7dc5SAnna-Maria Gleixner debug_timer_activate(timer); 1051fd45bb77SThomas Gleixner 10525cee9645SThomas Gleixner timer->expires = expires; 1053f00c0afdSAnna-Maria Gleixner /* 1054f00c0afdSAnna-Maria Gleixner * If 'idx' was calculated above and the base time did not advance 10554da9152aSThomas Gleixner * between calculating 'idx' and possibly switching the base, only 10569a2b764bSFrederic Weisbecker * enqueue_timer() is required. Otherwise we need to (re)calculate 10579a2b764bSFrederic Weisbecker * the wheel index via internal_add_timer(). 1058f00c0afdSAnna-Maria Gleixner */ 10599a2b764bSFrederic Weisbecker if (idx != UINT_MAX && clk == base->clk) 10609a2b764bSFrederic Weisbecker enqueue_timer(base, timer, idx, bucket_expiry); 10619a2b764bSFrederic Weisbecker else 10625cee9645SThomas Gleixner internal_add_timer(base, timer); 10635cee9645SThomas Gleixner 10645cee9645SThomas Gleixner out_unlock: 10652287d866SSebastian Andrzej Siewior raw_spin_unlock_irqrestore(&base->lock, flags); 10665cee9645SThomas Gleixner 10675cee9645SThomas Gleixner return ret; 10685cee9645SThomas Gleixner } 10695cee9645SThomas Gleixner 10705cee9645SThomas Gleixner /** 10715cee9645SThomas Gleixner * mod_timer_pending - modify a pending timer's timeout 10725cee9645SThomas Gleixner * @timer: the pending timer to be modified 10735cee9645SThomas Gleixner * @expires: new timeout in jiffies 10745cee9645SThomas Gleixner * 10755cee9645SThomas Gleixner * mod_timer_pending() is the same for pending timers as mod_timer(), 10765cee9645SThomas Gleixner * but will not re-activate and modify already deleted timers. 10775cee9645SThomas Gleixner * 10785cee9645SThomas Gleixner * It is useful for unserialized use of timers. 10795cee9645SThomas Gleixner */ 10805cee9645SThomas Gleixner int mod_timer_pending(struct timer_list *timer, unsigned long expires) 10815cee9645SThomas Gleixner { 1082b24591e2SDavid Howells return __mod_timer(timer, expires, MOD_TIMER_PENDING_ONLY); 10835cee9645SThomas Gleixner } 10845cee9645SThomas Gleixner EXPORT_SYMBOL(mod_timer_pending); 10855cee9645SThomas Gleixner 10865cee9645SThomas Gleixner /** 10875cee9645SThomas Gleixner * mod_timer - modify a timer's timeout 10885cee9645SThomas Gleixner * @timer: the timer to be modified 10895cee9645SThomas Gleixner * @expires: new timeout in jiffies 10905cee9645SThomas Gleixner * 10915cee9645SThomas Gleixner * mod_timer() is a more efficient way to update the expire field of an 10925cee9645SThomas Gleixner * active timer (if the timer is inactive it will be activated) 10935cee9645SThomas Gleixner * 10945cee9645SThomas Gleixner * mod_timer(timer, expires) is equivalent to: 10955cee9645SThomas Gleixner * 10965cee9645SThomas Gleixner * del_timer(timer); timer->expires = expires; add_timer(timer); 10975cee9645SThomas Gleixner * 10985cee9645SThomas Gleixner * Note that if there are multiple unserialized concurrent users of the 10995cee9645SThomas Gleixner * same timer, then mod_timer() is the only safe way to modify the timeout, 11005cee9645SThomas Gleixner * since add_timer() cannot modify an already running timer. 11015cee9645SThomas Gleixner * 11025cee9645SThomas Gleixner * The function returns whether it has modified a pending timer or not. 11035cee9645SThomas Gleixner * (ie. mod_timer() of an inactive timer returns 0, mod_timer() of an 11045cee9645SThomas Gleixner * active timer returns 1.) 11055cee9645SThomas Gleixner */ 11065cee9645SThomas Gleixner int mod_timer(struct timer_list *timer, unsigned long expires) 11075cee9645SThomas Gleixner { 1108b24591e2SDavid Howells return __mod_timer(timer, expires, 0); 11095cee9645SThomas Gleixner } 11105cee9645SThomas Gleixner EXPORT_SYMBOL(mod_timer); 11115cee9645SThomas Gleixner 11125cee9645SThomas Gleixner /** 1113b24591e2SDavid Howells * timer_reduce - Modify a timer's timeout if it would reduce the timeout 1114b24591e2SDavid Howells * @timer: The timer to be modified 1115b24591e2SDavid Howells * @expires: New timeout in jiffies 1116b24591e2SDavid Howells * 1117b24591e2SDavid Howells * timer_reduce() is very similar to mod_timer(), except that it will only 1118b24591e2SDavid Howells * modify a running timer if that would reduce the expiration time (it will 1119b24591e2SDavid Howells * start a timer that isn't running). 1120b24591e2SDavid Howells */ 1121b24591e2SDavid Howells int timer_reduce(struct timer_list *timer, unsigned long expires) 1122b24591e2SDavid Howells { 1123b24591e2SDavid Howells return __mod_timer(timer, expires, MOD_TIMER_REDUCE); 1124b24591e2SDavid Howells } 1125b24591e2SDavid Howells EXPORT_SYMBOL(timer_reduce); 1126b24591e2SDavid Howells 1127b24591e2SDavid Howells /** 11285cee9645SThomas Gleixner * add_timer - start a timer 11295cee9645SThomas Gleixner * @timer: the timer to be added 11305cee9645SThomas Gleixner * 1131c1eba5bcSKees Cook * The kernel will do a ->function(@timer) callback from the 11325cee9645SThomas Gleixner * timer interrupt at the ->expires point in the future. The 11335cee9645SThomas Gleixner * current time is 'jiffies'. 11345cee9645SThomas Gleixner * 1135c1eba5bcSKees Cook * The timer's ->expires, ->function fields must be set prior calling this 1136c1eba5bcSKees Cook * function. 11375cee9645SThomas Gleixner * 11385cee9645SThomas Gleixner * Timers with an ->expires field in the past will be executed in the next 11395cee9645SThomas Gleixner * timer tick. 11405cee9645SThomas Gleixner */ 11415cee9645SThomas Gleixner void add_timer(struct timer_list *timer) 11425cee9645SThomas Gleixner { 11435cee9645SThomas Gleixner BUG_ON(timer_pending(timer)); 114490c01894SEric Dumazet __mod_timer(timer, timer->expires, MOD_TIMER_NOTPENDING); 11455cee9645SThomas Gleixner } 11465cee9645SThomas Gleixner EXPORT_SYMBOL(add_timer); 11475cee9645SThomas Gleixner 11485cee9645SThomas Gleixner /** 11495cee9645SThomas Gleixner * add_timer_on - start a timer on a particular CPU 11505cee9645SThomas Gleixner * @timer: the timer to be added 11515cee9645SThomas Gleixner * @cpu: the CPU to start it on 11525cee9645SThomas Gleixner * 11535cee9645SThomas Gleixner * This is not very scalable on SMP. Double adds are not possible. 11545cee9645SThomas Gleixner */ 11555cee9645SThomas Gleixner void add_timer_on(struct timer_list *timer, int cpu) 11565cee9645SThomas Gleixner { 1157500462a9SThomas Gleixner struct timer_base *new_base, *base; 11585cee9645SThomas Gleixner unsigned long flags; 11595cee9645SThomas Gleixner 11605cee9645SThomas Gleixner BUG_ON(timer_pending(timer) || !timer->function); 116122b886ddSTejun Heo 1162500462a9SThomas Gleixner new_base = get_timer_cpu_base(timer->flags, cpu); 1163500462a9SThomas Gleixner 116422b886ddSTejun Heo /* 116522b886ddSTejun Heo * If @timer was on a different CPU, it should be migrated with the 116622b886ddSTejun Heo * old base locked to prevent other operations proceeding with the 116722b886ddSTejun Heo * wrong base locked. See lock_timer_base(). 116822b886ddSTejun Heo */ 116922b886ddSTejun Heo base = lock_timer_base(timer, &flags); 117022b886ddSTejun Heo if (base != new_base) { 117122b886ddSTejun Heo timer->flags |= TIMER_MIGRATING; 117222b886ddSTejun Heo 11732287d866SSebastian Andrzej Siewior raw_spin_unlock(&base->lock); 117422b886ddSTejun Heo base = new_base; 11752287d866SSebastian Andrzej Siewior raw_spin_lock(&base->lock); 117622b886ddSTejun Heo WRITE_ONCE(timer->flags, 117722b886ddSTejun Heo (timer->flags & ~TIMER_BASEMASK) | cpu); 117822b886ddSTejun Heo } 11792fe59f50SNicholas Piggin forward_timer_base(base); 118022b886ddSTejun Heo 1181dc1e7dc5SAnna-Maria Gleixner debug_timer_activate(timer); 11825cee9645SThomas Gleixner internal_add_timer(base, timer); 11832287d866SSebastian Andrzej Siewior raw_spin_unlock_irqrestore(&base->lock, flags); 11845cee9645SThomas Gleixner } 11855cee9645SThomas Gleixner EXPORT_SYMBOL_GPL(add_timer_on); 11865cee9645SThomas Gleixner 11875cee9645SThomas Gleixner /** 11880ba42a59SMasanari Iida * del_timer - deactivate a timer. 11895cee9645SThomas Gleixner * @timer: the timer to be deactivated 11905cee9645SThomas Gleixner * 11915cee9645SThomas Gleixner * del_timer() deactivates a timer - this works on both active and inactive 11925cee9645SThomas Gleixner * timers. 11935cee9645SThomas Gleixner * 11945cee9645SThomas Gleixner * The function returns whether it has deactivated a pending timer or not. 11955cee9645SThomas Gleixner * (ie. del_timer() of an inactive timer returns 0, del_timer() of an 11965cee9645SThomas Gleixner * active timer returns 1.) 11975cee9645SThomas Gleixner */ 11985cee9645SThomas Gleixner int del_timer(struct timer_list *timer) 11995cee9645SThomas Gleixner { 1200494af3edSThomas Gleixner struct timer_base *base; 12015cee9645SThomas Gleixner unsigned long flags; 12025cee9645SThomas Gleixner int ret = 0; 12035cee9645SThomas Gleixner 12045cee9645SThomas Gleixner debug_assert_init(timer); 12055cee9645SThomas Gleixner 12065cee9645SThomas Gleixner if (timer_pending(timer)) { 12075cee9645SThomas Gleixner base = lock_timer_base(timer, &flags); 12085cee9645SThomas Gleixner ret = detach_if_pending(timer, base, true); 12092287d866SSebastian Andrzej Siewior raw_spin_unlock_irqrestore(&base->lock, flags); 12105cee9645SThomas Gleixner } 12115cee9645SThomas Gleixner 12125cee9645SThomas Gleixner return ret; 12135cee9645SThomas Gleixner } 12145cee9645SThomas Gleixner EXPORT_SYMBOL(del_timer); 12155cee9645SThomas Gleixner 12165cee9645SThomas Gleixner /** 12175cee9645SThomas Gleixner * try_to_del_timer_sync - Try to deactivate a timer 1218d15bc69aSPeter Meerwald-Stadler * @timer: timer to delete 12195cee9645SThomas Gleixner * 12205cee9645SThomas Gleixner * This function tries to deactivate a timer. Upon successful (ret >= 0) 12215cee9645SThomas Gleixner * exit the timer is not queued and the handler is not running on any CPU. 12225cee9645SThomas Gleixner */ 12235cee9645SThomas Gleixner int try_to_del_timer_sync(struct timer_list *timer) 12245cee9645SThomas Gleixner { 1225494af3edSThomas Gleixner struct timer_base *base; 12265cee9645SThomas Gleixner unsigned long flags; 12275cee9645SThomas Gleixner int ret = -1; 12285cee9645SThomas Gleixner 12295cee9645SThomas Gleixner debug_assert_init(timer); 12305cee9645SThomas Gleixner 12315cee9645SThomas Gleixner base = lock_timer_base(timer, &flags); 12325cee9645SThomas Gleixner 1233dfb4357dSKees Cook if (base->running_timer != timer) 12345cee9645SThomas Gleixner ret = detach_if_pending(timer, base, true); 1235dfb4357dSKees Cook 12362287d866SSebastian Andrzej Siewior raw_spin_unlock_irqrestore(&base->lock, flags); 12375cee9645SThomas Gleixner 12385cee9645SThomas Gleixner return ret; 12395cee9645SThomas Gleixner } 12405cee9645SThomas Gleixner EXPORT_SYMBOL(try_to_del_timer_sync); 12415cee9645SThomas Gleixner 1242030dcdd1SAnna-Maria Gleixner #ifdef CONFIG_PREEMPT_RT 1243030dcdd1SAnna-Maria Gleixner static __init void timer_base_init_expiry_lock(struct timer_base *base) 1244030dcdd1SAnna-Maria Gleixner { 1245030dcdd1SAnna-Maria Gleixner spin_lock_init(&base->expiry_lock); 1246030dcdd1SAnna-Maria Gleixner } 1247030dcdd1SAnna-Maria Gleixner 1248030dcdd1SAnna-Maria Gleixner static inline void timer_base_lock_expiry(struct timer_base *base) 1249030dcdd1SAnna-Maria Gleixner { 1250030dcdd1SAnna-Maria Gleixner spin_lock(&base->expiry_lock); 1251030dcdd1SAnna-Maria Gleixner } 1252030dcdd1SAnna-Maria Gleixner 1253030dcdd1SAnna-Maria Gleixner static inline void timer_base_unlock_expiry(struct timer_base *base) 1254030dcdd1SAnna-Maria Gleixner { 1255030dcdd1SAnna-Maria Gleixner spin_unlock(&base->expiry_lock); 1256030dcdd1SAnna-Maria Gleixner } 1257030dcdd1SAnna-Maria Gleixner 1258030dcdd1SAnna-Maria Gleixner /* 1259030dcdd1SAnna-Maria Gleixner * The counterpart to del_timer_wait_running(). 1260030dcdd1SAnna-Maria Gleixner * 1261030dcdd1SAnna-Maria Gleixner * If there is a waiter for base->expiry_lock, then it was waiting for the 12624bf07f65SIngo Molnar * timer callback to finish. Drop expiry_lock and reacquire it. That allows 1263030dcdd1SAnna-Maria Gleixner * the waiter to acquire the lock and make progress. 1264030dcdd1SAnna-Maria Gleixner */ 1265030dcdd1SAnna-Maria Gleixner static void timer_sync_wait_running(struct timer_base *base) 1266030dcdd1SAnna-Maria Gleixner { 1267030dcdd1SAnna-Maria Gleixner if (atomic_read(&base->timer_waiters)) { 1268*bb7262b2SThomas Gleixner raw_spin_unlock_irq(&base->lock); 1269030dcdd1SAnna-Maria Gleixner spin_unlock(&base->expiry_lock); 1270030dcdd1SAnna-Maria Gleixner spin_lock(&base->expiry_lock); 1271*bb7262b2SThomas Gleixner raw_spin_lock_irq(&base->lock); 1272030dcdd1SAnna-Maria Gleixner } 1273030dcdd1SAnna-Maria Gleixner } 1274030dcdd1SAnna-Maria Gleixner 1275030dcdd1SAnna-Maria Gleixner /* 1276030dcdd1SAnna-Maria Gleixner * This function is called on PREEMPT_RT kernels when the fast path 1277030dcdd1SAnna-Maria Gleixner * deletion of a timer failed because the timer callback function was 1278030dcdd1SAnna-Maria Gleixner * running. 1279030dcdd1SAnna-Maria Gleixner * 1280030dcdd1SAnna-Maria Gleixner * This prevents priority inversion, if the softirq thread on a remote CPU 1281030dcdd1SAnna-Maria Gleixner * got preempted, and it prevents a life lock when the task which tries to 1282030dcdd1SAnna-Maria Gleixner * delete a timer preempted the softirq thread running the timer callback 1283030dcdd1SAnna-Maria Gleixner * function. 1284030dcdd1SAnna-Maria Gleixner */ 1285030dcdd1SAnna-Maria Gleixner static void del_timer_wait_running(struct timer_list *timer) 1286030dcdd1SAnna-Maria Gleixner { 1287030dcdd1SAnna-Maria Gleixner u32 tf; 1288030dcdd1SAnna-Maria Gleixner 1289030dcdd1SAnna-Maria Gleixner tf = READ_ONCE(timer->flags); 1290c725dafcSSebastian Andrzej Siewior if (!(tf & (TIMER_MIGRATING | TIMER_IRQSAFE))) { 1291030dcdd1SAnna-Maria Gleixner struct timer_base *base = get_timer_base(tf); 1292030dcdd1SAnna-Maria Gleixner 1293030dcdd1SAnna-Maria Gleixner /* 1294030dcdd1SAnna-Maria Gleixner * Mark the base as contended and grab the expiry lock, 1295030dcdd1SAnna-Maria Gleixner * which is held by the softirq across the timer 1296030dcdd1SAnna-Maria Gleixner * callback. Drop the lock immediately so the softirq can 1297030dcdd1SAnna-Maria Gleixner * expire the next timer. In theory the timer could already 1298030dcdd1SAnna-Maria Gleixner * be running again, but that's more than unlikely and just 1299030dcdd1SAnna-Maria Gleixner * causes another wait loop. 1300030dcdd1SAnna-Maria Gleixner */ 1301030dcdd1SAnna-Maria Gleixner atomic_inc(&base->timer_waiters); 1302030dcdd1SAnna-Maria Gleixner spin_lock_bh(&base->expiry_lock); 1303030dcdd1SAnna-Maria Gleixner atomic_dec(&base->timer_waiters); 1304030dcdd1SAnna-Maria Gleixner spin_unlock_bh(&base->expiry_lock); 1305030dcdd1SAnna-Maria Gleixner } 1306030dcdd1SAnna-Maria Gleixner } 1307030dcdd1SAnna-Maria Gleixner #else 1308030dcdd1SAnna-Maria Gleixner static inline void timer_base_init_expiry_lock(struct timer_base *base) { } 1309030dcdd1SAnna-Maria Gleixner static inline void timer_base_lock_expiry(struct timer_base *base) { } 1310030dcdd1SAnna-Maria Gleixner static inline void timer_base_unlock_expiry(struct timer_base *base) { } 1311030dcdd1SAnna-Maria Gleixner static inline void timer_sync_wait_running(struct timer_base *base) { } 1312030dcdd1SAnna-Maria Gleixner static inline void del_timer_wait_running(struct timer_list *timer) { } 1313030dcdd1SAnna-Maria Gleixner #endif 1314030dcdd1SAnna-Maria Gleixner 1315030dcdd1SAnna-Maria Gleixner #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT) 13165cee9645SThomas Gleixner /** 13175cee9645SThomas Gleixner * del_timer_sync - deactivate a timer and wait for the handler to finish. 13185cee9645SThomas Gleixner * @timer: the timer to be deactivated 13195cee9645SThomas Gleixner * 13205cee9645SThomas Gleixner * This function only differs from del_timer() on SMP: besides deactivating 13215cee9645SThomas Gleixner * the timer it also makes sure the handler has finished executing on other 13225cee9645SThomas Gleixner * CPUs. 13235cee9645SThomas Gleixner * 13245cee9645SThomas Gleixner * Synchronization rules: Callers must prevent restarting of the timer, 13255cee9645SThomas Gleixner * otherwise this function is meaningless. It must not be called from 13265cee9645SThomas Gleixner * interrupt contexts unless the timer is an irqsafe one. The caller must 13275cee9645SThomas Gleixner * not hold locks which would prevent completion of the timer's 13285cee9645SThomas Gleixner * handler. The timer's handler must not call add_timer_on(). Upon exit the 13295cee9645SThomas Gleixner * timer is not queued and the handler is not running on any CPU. 13305cee9645SThomas Gleixner * 13315cee9645SThomas Gleixner * Note: For !irqsafe timers, you must not hold locks that are held in 13325cee9645SThomas Gleixner * interrupt context while calling this function. Even if the lock has 1333bf9c96beSMauro Carvalho Chehab * nothing to do with the timer in question. Here's why:: 13345cee9645SThomas Gleixner * 13355cee9645SThomas Gleixner * CPU0 CPU1 13365cee9645SThomas Gleixner * ---- ---- 13375cee9645SThomas Gleixner * <SOFTIRQ> 13385cee9645SThomas Gleixner * call_timer_fn(); 13395cee9645SThomas Gleixner * base->running_timer = mytimer; 13405cee9645SThomas Gleixner * spin_lock_irq(somelock); 13415cee9645SThomas Gleixner * <IRQ> 13425cee9645SThomas Gleixner * spin_lock(somelock); 13435cee9645SThomas Gleixner * del_timer_sync(mytimer); 13445cee9645SThomas Gleixner * while (base->running_timer == mytimer); 13455cee9645SThomas Gleixner * 13465cee9645SThomas Gleixner * Now del_timer_sync() will never return and never release somelock. 13475cee9645SThomas Gleixner * The interrupt on the other CPU is waiting to grab somelock but 13485cee9645SThomas Gleixner * it has interrupted the softirq that CPU0 is waiting to finish. 13495cee9645SThomas Gleixner * 13505cee9645SThomas Gleixner * The function returns whether it has deactivated a pending timer or not. 13515cee9645SThomas Gleixner */ 13525cee9645SThomas Gleixner int del_timer_sync(struct timer_list *timer) 13535cee9645SThomas Gleixner { 1354030dcdd1SAnna-Maria Gleixner int ret; 1355030dcdd1SAnna-Maria Gleixner 13565cee9645SThomas Gleixner #ifdef CONFIG_LOCKDEP 13575cee9645SThomas Gleixner unsigned long flags; 13585cee9645SThomas Gleixner 13595cee9645SThomas Gleixner /* 13605cee9645SThomas Gleixner * If lockdep gives a backtrace here, please reference 13615cee9645SThomas Gleixner * the synchronization rules above. 13625cee9645SThomas Gleixner */ 13635cee9645SThomas Gleixner local_irq_save(flags); 13645cee9645SThomas Gleixner lock_map_acquire(&timer->lockdep_map); 13655cee9645SThomas Gleixner lock_map_release(&timer->lockdep_map); 13665cee9645SThomas Gleixner local_irq_restore(flags); 13675cee9645SThomas Gleixner #endif 13685cee9645SThomas Gleixner /* 13695cee9645SThomas Gleixner * don't use it in hardirq context, because it 13705cee9645SThomas Gleixner * could lead to deadlock. 13715cee9645SThomas Gleixner */ 13720eeda71bSThomas Gleixner WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); 1373030dcdd1SAnna-Maria Gleixner 1374c725dafcSSebastian Andrzej Siewior /* 1375c725dafcSSebastian Andrzej Siewior * Must be able to sleep on PREEMPT_RT because of the slowpath in 1376c725dafcSSebastian Andrzej Siewior * del_timer_wait_running(). 1377c725dafcSSebastian Andrzej Siewior */ 1378c725dafcSSebastian Andrzej Siewior if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(timer->flags & TIMER_IRQSAFE)) 1379c725dafcSSebastian Andrzej Siewior lockdep_assert_preemption_enabled(); 1380c725dafcSSebastian Andrzej Siewior 1381030dcdd1SAnna-Maria Gleixner do { 1382030dcdd1SAnna-Maria Gleixner ret = try_to_del_timer_sync(timer); 1383030dcdd1SAnna-Maria Gleixner 1384030dcdd1SAnna-Maria Gleixner if (unlikely(ret < 0)) { 1385030dcdd1SAnna-Maria Gleixner del_timer_wait_running(timer); 13865cee9645SThomas Gleixner cpu_relax(); 13875cee9645SThomas Gleixner } 1388030dcdd1SAnna-Maria Gleixner } while (ret < 0); 1389030dcdd1SAnna-Maria Gleixner 1390030dcdd1SAnna-Maria Gleixner return ret; 13915cee9645SThomas Gleixner } 13925cee9645SThomas Gleixner EXPORT_SYMBOL(del_timer_sync); 13935cee9645SThomas Gleixner #endif 13945cee9645SThomas Gleixner 1395f28d3d53SAnna-Maria Gleixner static void call_timer_fn(struct timer_list *timer, 1396f28d3d53SAnna-Maria Gleixner void (*fn)(struct timer_list *), 1397f28d3d53SAnna-Maria Gleixner unsigned long baseclk) 13985cee9645SThomas Gleixner { 13995cee9645SThomas Gleixner int count = preempt_count(); 14005cee9645SThomas Gleixner 14015cee9645SThomas Gleixner #ifdef CONFIG_LOCKDEP 14025cee9645SThomas Gleixner /* 14035cee9645SThomas Gleixner * It is permissible to free the timer from inside the 14045cee9645SThomas Gleixner * function that is called from it, this we need to take into 14055cee9645SThomas Gleixner * account for lockdep too. To avoid bogus "held lock freed" 14065cee9645SThomas Gleixner * warnings as well as problems when looking into 14075cee9645SThomas Gleixner * timer->lockdep_map, make a copy and use that here. 14085cee9645SThomas Gleixner */ 14095cee9645SThomas Gleixner struct lockdep_map lockdep_map; 14105cee9645SThomas Gleixner 14115cee9645SThomas Gleixner lockdep_copy_map(&lockdep_map, &timer->lockdep_map); 14125cee9645SThomas Gleixner #endif 14135cee9645SThomas Gleixner /* 14145cee9645SThomas Gleixner * Couple the lock chain with the lock chain at 14155cee9645SThomas Gleixner * del_timer_sync() by acquiring the lock_map around the fn() 14165cee9645SThomas Gleixner * call here and in del_timer_sync(). 14175cee9645SThomas Gleixner */ 14185cee9645SThomas Gleixner lock_map_acquire(&lockdep_map); 14195cee9645SThomas Gleixner 1420f28d3d53SAnna-Maria Gleixner trace_timer_expire_entry(timer, baseclk); 1421354b46b1SKees Cook fn(timer); 14225cee9645SThomas Gleixner trace_timer_expire_exit(timer); 14235cee9645SThomas Gleixner 14245cee9645SThomas Gleixner lock_map_release(&lockdep_map); 14255cee9645SThomas Gleixner 14265cee9645SThomas Gleixner if (count != preempt_count()) { 1427d75f773cSSakari Ailus WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n", 14285cee9645SThomas Gleixner fn, count, preempt_count()); 14295cee9645SThomas Gleixner /* 14305cee9645SThomas Gleixner * Restore the preempt count. That gives us a decent 14315cee9645SThomas Gleixner * chance to survive and extract information. If the 14325cee9645SThomas Gleixner * callback kept a lock held, bad luck, but not worse 14335cee9645SThomas Gleixner * than the BUG() we had. 14345cee9645SThomas Gleixner */ 14355cee9645SThomas Gleixner preempt_count_set(count); 14365cee9645SThomas Gleixner } 14375cee9645SThomas Gleixner } 14385cee9645SThomas Gleixner 1439500462a9SThomas Gleixner static void expire_timers(struct timer_base *base, struct hlist_head *head) 14405cee9645SThomas Gleixner { 1441f28d3d53SAnna-Maria Gleixner /* 1442f28d3d53SAnna-Maria Gleixner * This value is required only for tracing. base->clk was 1443f28d3d53SAnna-Maria Gleixner * incremented directly before expire_timers was called. But expiry 1444f28d3d53SAnna-Maria Gleixner * is related to the old base->clk value. 1445f28d3d53SAnna-Maria Gleixner */ 1446f28d3d53SAnna-Maria Gleixner unsigned long baseclk = base->clk - 1; 1447f28d3d53SAnna-Maria Gleixner 14481dabbcecSThomas Gleixner while (!hlist_empty(head)) { 1449500462a9SThomas Gleixner struct timer_list *timer; 1450354b46b1SKees Cook void (*fn)(struct timer_list *); 14515cee9645SThomas Gleixner 14521dabbcecSThomas Gleixner timer = hlist_entry(head->first, struct timer_list, entry); 14535cee9645SThomas Gleixner 14545cee9645SThomas Gleixner base->running_timer = timer; 1455500462a9SThomas Gleixner detach_timer(timer, true); 14565cee9645SThomas Gleixner 1457500462a9SThomas Gleixner fn = timer->function; 1458500462a9SThomas Gleixner 1459500462a9SThomas Gleixner if (timer->flags & TIMER_IRQSAFE) { 14602287d866SSebastian Andrzej Siewior raw_spin_unlock(&base->lock); 1461f28d3d53SAnna-Maria Gleixner call_timer_fn(timer, fn, baseclk); 14622287d866SSebastian Andrzej Siewior raw_spin_lock(&base->lock); 1463*bb7262b2SThomas Gleixner base->running_timer = NULL; 14645cee9645SThomas Gleixner } else { 14652287d866SSebastian Andrzej Siewior raw_spin_unlock_irq(&base->lock); 1466f28d3d53SAnna-Maria Gleixner call_timer_fn(timer, fn, baseclk); 1467*bb7262b2SThomas Gleixner raw_spin_lock_irq(&base->lock); 1468030dcdd1SAnna-Maria Gleixner base->running_timer = NULL; 1469030dcdd1SAnna-Maria Gleixner timer_sync_wait_running(base); 14705cee9645SThomas Gleixner } 14715cee9645SThomas Gleixner } 14725cee9645SThomas Gleixner } 1473500462a9SThomas Gleixner 1474d4f7dae8SFrederic Weisbecker static int collect_expired_timers(struct timer_base *base, 1475500462a9SThomas Gleixner struct hlist_head *heads) 1476500462a9SThomas Gleixner { 1477d4f7dae8SFrederic Weisbecker unsigned long clk = base->clk = base->next_expiry; 1478500462a9SThomas Gleixner struct hlist_head *vec; 1479500462a9SThomas Gleixner int i, levels = 0; 1480500462a9SThomas Gleixner unsigned int idx; 1481500462a9SThomas Gleixner 1482500462a9SThomas Gleixner for (i = 0; i < LVL_DEPTH; i++) { 1483500462a9SThomas Gleixner idx = (clk & LVL_MASK) + i * LVL_SIZE; 1484500462a9SThomas Gleixner 1485500462a9SThomas Gleixner if (__test_and_clear_bit(idx, base->pending_map)) { 1486500462a9SThomas Gleixner vec = base->vectors + idx; 1487500462a9SThomas Gleixner hlist_move_list(vec, heads++); 1488500462a9SThomas Gleixner levels++; 1489500462a9SThomas Gleixner } 1490500462a9SThomas Gleixner /* Is it time to look at the next level? */ 1491500462a9SThomas Gleixner if (clk & LVL_CLK_MASK) 1492500462a9SThomas Gleixner break; 1493500462a9SThomas Gleixner /* Shift clock for the next level granularity */ 1494500462a9SThomas Gleixner clk >>= LVL_CLK_SHIFT; 1495500462a9SThomas Gleixner } 1496500462a9SThomas Gleixner return levels; 14975cee9645SThomas Gleixner } 14985cee9645SThomas Gleixner 14995cee9645SThomas Gleixner /* 150023696838SAnna-Maria Gleixner * Find the next pending bucket of a level. Search from level start (@offset) 150123696838SAnna-Maria Gleixner * + @clk upwards and if nothing there, search from start of the level 150223696838SAnna-Maria Gleixner * (@offset) up to @offset + clk. 15035cee9645SThomas Gleixner */ 1504500462a9SThomas Gleixner static int next_pending_bucket(struct timer_base *base, unsigned offset, 1505500462a9SThomas Gleixner unsigned clk) 15065cee9645SThomas Gleixner { 1507500462a9SThomas Gleixner unsigned pos, start = offset + clk; 1508500462a9SThomas Gleixner unsigned end = offset + LVL_SIZE; 15095cee9645SThomas Gleixner 1510500462a9SThomas Gleixner pos = find_next_bit(base->pending_map, end, start); 1511500462a9SThomas Gleixner if (pos < end) 1512500462a9SThomas Gleixner return pos - start; 15135cee9645SThomas Gleixner 1514500462a9SThomas Gleixner pos = find_next_bit(base->pending_map, start, offset); 1515500462a9SThomas Gleixner return pos < start ? pos + LVL_SIZE - start : -1; 15165cee9645SThomas Gleixner } 15175cee9645SThomas Gleixner 1518500462a9SThomas Gleixner /* 151923696838SAnna-Maria Gleixner * Search the first expiring timer in the various clock levels. Caller must 152023696838SAnna-Maria Gleixner * hold base->lock. 15215cee9645SThomas Gleixner */ 1522494af3edSThomas Gleixner static unsigned long __next_timer_interrupt(struct timer_base *base) 15235cee9645SThomas Gleixner { 1524500462a9SThomas Gleixner unsigned long clk, next, adj; 1525500462a9SThomas Gleixner unsigned lvl, offset = 0; 15265cee9645SThomas Gleixner 1527500462a9SThomas Gleixner next = base->clk + NEXT_TIMER_MAX_DELTA; 1528500462a9SThomas Gleixner clk = base->clk; 1529500462a9SThomas Gleixner for (lvl = 0; lvl < LVL_DEPTH; lvl++, offset += LVL_SIZE) { 1530500462a9SThomas Gleixner int pos = next_pending_bucket(base, offset, clk & LVL_MASK); 1531001ec1b3SFrederic Weisbecker unsigned long lvl_clk = clk & LVL_CLK_MASK; 15325cee9645SThomas Gleixner 1533500462a9SThomas Gleixner if (pos >= 0) { 1534500462a9SThomas Gleixner unsigned long tmp = clk + (unsigned long) pos; 15355cee9645SThomas Gleixner 1536500462a9SThomas Gleixner tmp <<= LVL_SHIFT(lvl); 1537500462a9SThomas Gleixner if (time_before(tmp, next)) 1538500462a9SThomas Gleixner next = tmp; 1539001ec1b3SFrederic Weisbecker 1540001ec1b3SFrederic Weisbecker /* 1541001ec1b3SFrederic Weisbecker * If the next expiration happens before we reach 1542001ec1b3SFrederic Weisbecker * the next level, no need to check further. 1543001ec1b3SFrederic Weisbecker */ 1544001ec1b3SFrederic Weisbecker if (pos <= ((LVL_CLK_DIV - lvl_clk) & LVL_CLK_MASK)) 1545001ec1b3SFrederic Weisbecker break; 15465cee9645SThomas Gleixner } 15475cee9645SThomas Gleixner /* 1548500462a9SThomas Gleixner * Clock for the next level. If the current level clock lower 1549500462a9SThomas Gleixner * bits are zero, we look at the next level as is. If not we 1550500462a9SThomas Gleixner * need to advance it by one because that's going to be the 1551500462a9SThomas Gleixner * next expiring bucket in that level. base->clk is the next 1552500462a9SThomas Gleixner * expiring jiffie. So in case of: 1553500462a9SThomas Gleixner * 1554500462a9SThomas Gleixner * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 1555500462a9SThomas Gleixner * 0 0 0 0 0 0 1556500462a9SThomas Gleixner * 1557500462a9SThomas Gleixner * we have to look at all levels @index 0. With 1558500462a9SThomas Gleixner * 1559500462a9SThomas Gleixner * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 1560500462a9SThomas Gleixner * 0 0 0 0 0 2 1561500462a9SThomas Gleixner * 1562500462a9SThomas Gleixner * LVL0 has the next expiring bucket @index 2. The upper 1563500462a9SThomas Gleixner * levels have the next expiring bucket @index 1. 1564500462a9SThomas Gleixner * 1565500462a9SThomas Gleixner * In case that the propagation wraps the next level the same 1566500462a9SThomas Gleixner * rules apply: 1567500462a9SThomas Gleixner * 1568500462a9SThomas Gleixner * LVL5 LVL4 LVL3 LVL2 LVL1 LVL0 1569500462a9SThomas Gleixner * 0 0 0 0 F 2 1570500462a9SThomas Gleixner * 1571500462a9SThomas Gleixner * So after looking at LVL0 we get: 1572500462a9SThomas Gleixner * 1573500462a9SThomas Gleixner * LVL5 LVL4 LVL3 LVL2 LVL1 1574500462a9SThomas Gleixner * 0 0 0 1 0 1575500462a9SThomas Gleixner * 1576500462a9SThomas Gleixner * So no propagation from LVL1 to LVL2 because that happened 1577500462a9SThomas Gleixner * with the add already, but then we need to propagate further 1578500462a9SThomas Gleixner * from LVL2 to LVL3. 1579500462a9SThomas Gleixner * 1580500462a9SThomas Gleixner * So the simple check whether the lower bits of the current 1581500462a9SThomas Gleixner * level are 0 or not is sufficient for all cases. 15825cee9645SThomas Gleixner */ 1583001ec1b3SFrederic Weisbecker adj = lvl_clk ? 1 : 0; 1584500462a9SThomas Gleixner clk >>= LVL_CLK_SHIFT; 1585500462a9SThomas Gleixner clk += adj; 15865cee9645SThomas Gleixner } 158731cd0e11SFrederic Weisbecker 158831cd0e11SFrederic Weisbecker base->next_expiry_recalc = false; 1589aebacb7fSNicolas Saenz Julienne base->timers_pending = !(next == base->clk + NEXT_TIMER_MAX_DELTA); 159031cd0e11SFrederic Weisbecker 1591500462a9SThomas Gleixner return next; 15925cee9645SThomas Gleixner } 15935cee9645SThomas Gleixner 1594dc2a0f1fSFrederic Weisbecker #ifdef CONFIG_NO_HZ_COMMON 15955cee9645SThomas Gleixner /* 15965cee9645SThomas Gleixner * Check, if the next hrtimer event is before the next timer wheel 15975cee9645SThomas Gleixner * event: 15985cee9645SThomas Gleixner */ 1599c1ad348bSThomas Gleixner static u64 cmp_next_hrtimer_event(u64 basem, u64 expires) 16005cee9645SThomas Gleixner { 1601c1ad348bSThomas Gleixner u64 nextevt = hrtimer_get_next_event(); 16025cee9645SThomas Gleixner 1603c1ad348bSThomas Gleixner /* 1604c1ad348bSThomas Gleixner * If high resolution timers are enabled 1605c1ad348bSThomas Gleixner * hrtimer_get_next_event() returns KTIME_MAX. 1606c1ad348bSThomas Gleixner */ 1607c1ad348bSThomas Gleixner if (expires <= nextevt) 16085cee9645SThomas Gleixner return expires; 16095cee9645SThomas Gleixner 16105cee9645SThomas Gleixner /* 1611c1ad348bSThomas Gleixner * If the next timer is already expired, return the tick base 1612c1ad348bSThomas Gleixner * time so the tick is fired immediately. 16135cee9645SThomas Gleixner */ 1614c1ad348bSThomas Gleixner if (nextevt <= basem) 1615c1ad348bSThomas Gleixner return basem; 16165cee9645SThomas Gleixner 16175cee9645SThomas Gleixner /* 1618c1ad348bSThomas Gleixner * Round up to the next jiffie. High resolution timers are 1619c1ad348bSThomas Gleixner * off, so the hrtimers are expired in the tick and we need to 1620c1ad348bSThomas Gleixner * make sure that this tick really expires the timer to avoid 1621c1ad348bSThomas Gleixner * a ping pong of the nohz stop code. 1622c1ad348bSThomas Gleixner * 1623c1ad348bSThomas Gleixner * Use DIV_ROUND_UP_ULL to prevent gcc calling __divdi3 16245cee9645SThomas Gleixner */ 1625c1ad348bSThomas Gleixner return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC; 16265cee9645SThomas Gleixner } 16275cee9645SThomas Gleixner 16285cee9645SThomas Gleixner /** 1629c1ad348bSThomas Gleixner * get_next_timer_interrupt - return the time (clock mono) of the next timer 1630c1ad348bSThomas Gleixner * @basej: base time jiffies 1631c1ad348bSThomas Gleixner * @basem: base time clock monotonic 1632c1ad348bSThomas Gleixner * 1633c1ad348bSThomas Gleixner * Returns the tick aligned clock monotonic time of the next pending 1634c1ad348bSThomas Gleixner * timer or KTIME_MAX if no timer is pending. 16355cee9645SThomas Gleixner */ 1636c1ad348bSThomas Gleixner u64 get_next_timer_interrupt(unsigned long basej, u64 basem) 16375cee9645SThomas Gleixner { 1638500462a9SThomas Gleixner struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 1639c1ad348bSThomas Gleixner u64 expires = KTIME_MAX; 1640c1ad348bSThomas Gleixner unsigned long nextevt; 16415cee9645SThomas Gleixner 16425cee9645SThomas Gleixner /* 16435cee9645SThomas Gleixner * Pretend that there is no timer pending if the cpu is offline. 16445cee9645SThomas Gleixner * Possible pending timers will be migrated later to an active cpu. 16455cee9645SThomas Gleixner */ 16465cee9645SThomas Gleixner if (cpu_is_offline(smp_processor_id())) 16475cee9645SThomas Gleixner return expires; 16485cee9645SThomas Gleixner 16492287d866SSebastian Andrzej Siewior raw_spin_lock(&base->lock); 165031cd0e11SFrederic Weisbecker if (base->next_expiry_recalc) 165131cd0e11SFrederic Weisbecker base->next_expiry = __next_timer_interrupt(base); 165231cd0e11SFrederic Weisbecker nextevt = base->next_expiry; 165331cd0e11SFrederic Weisbecker 1654a683f390SThomas Gleixner /* 1655041ad7bcSThomas Gleixner * We have a fresh next event. Check whether we can forward the 1656041ad7bcSThomas Gleixner * base. We can only do that when @basej is past base->clk 1657041ad7bcSThomas Gleixner * otherwise we might rewind base->clk. 1658a683f390SThomas Gleixner */ 1659041ad7bcSThomas Gleixner if (time_after(basej, base->clk)) { 1660041ad7bcSThomas Gleixner if (time_after(nextevt, basej)) 1661041ad7bcSThomas Gleixner base->clk = basej; 1662a683f390SThomas Gleixner else if (time_after(nextevt, base->clk)) 1663a683f390SThomas Gleixner base->clk = nextevt; 1664041ad7bcSThomas Gleixner } 1665a683f390SThomas Gleixner 1666a683f390SThomas Gleixner if (time_before_eq(nextevt, basej)) { 1667c1ad348bSThomas Gleixner expires = basem; 1668a683f390SThomas Gleixner base->is_idle = false; 1669a683f390SThomas Gleixner } else { 1670aebacb7fSNicolas Saenz Julienne if (base->timers_pending) 167134f41c03SMatija Glavinic Pecotic expires = basem + (u64)(nextevt - basej) * TICK_NSEC; 1672a683f390SThomas Gleixner /* 16732fe59f50SNicholas Piggin * If we expect to sleep more than a tick, mark the base idle. 16742fe59f50SNicholas Piggin * Also the tick is stopped so any added timer must forward 16752fe59f50SNicholas Piggin * the base clk itself to keep granularity small. This idle 16762fe59f50SNicholas Piggin * logic is only maintained for the BASE_STD base, deferrable 16772fe59f50SNicholas Piggin * timers may still see large granularity skew (by design). 1678a683f390SThomas Gleixner */ 16791f8a4212SFrederic Weisbecker if ((expires - basem) > TICK_NSEC) 1680a683f390SThomas Gleixner base->is_idle = true; 16815cee9645SThomas Gleixner } 16822287d866SSebastian Andrzej Siewior raw_spin_unlock(&base->lock); 16835cee9645SThomas Gleixner 1684c1ad348bSThomas Gleixner return cmp_next_hrtimer_event(basem, expires); 16855cee9645SThomas Gleixner } 168623696838SAnna-Maria Gleixner 1687a683f390SThomas Gleixner /** 1688a683f390SThomas Gleixner * timer_clear_idle - Clear the idle state of the timer base 1689a683f390SThomas Gleixner * 1690a683f390SThomas Gleixner * Called with interrupts disabled 1691a683f390SThomas Gleixner */ 1692a683f390SThomas Gleixner void timer_clear_idle(void) 1693a683f390SThomas Gleixner { 1694a683f390SThomas Gleixner struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 1695a683f390SThomas Gleixner 1696a683f390SThomas Gleixner /* 1697a683f390SThomas Gleixner * We do this unlocked. The worst outcome is a remote enqueue sending 1698a683f390SThomas Gleixner * a pointless IPI, but taking the lock would just make the window for 1699a683f390SThomas Gleixner * sending the IPI a few instructions smaller for the cost of taking 1700a683f390SThomas Gleixner * the lock in the exit from idle path. 1701a683f390SThomas Gleixner */ 1702a683f390SThomas Gleixner base->is_idle = false; 1703a683f390SThomas Gleixner } 17045cee9645SThomas Gleixner #endif 17055cee9645SThomas Gleixner 170673420feaSAnna-Maria Gleixner /** 170773420feaSAnna-Maria Gleixner * __run_timers - run all expired timers (if any) on this CPU. 170873420feaSAnna-Maria Gleixner * @base: the timer vector to be processed. 170973420feaSAnna-Maria Gleixner */ 171073420feaSAnna-Maria Gleixner static inline void __run_timers(struct timer_base *base) 171173420feaSAnna-Maria Gleixner { 171273420feaSAnna-Maria Gleixner struct hlist_head heads[LVL_DEPTH]; 171373420feaSAnna-Maria Gleixner int levels; 171473420feaSAnna-Maria Gleixner 1715d4f7dae8SFrederic Weisbecker if (time_before(jiffies, base->next_expiry)) 171673420feaSAnna-Maria Gleixner return; 171773420feaSAnna-Maria Gleixner 1718030dcdd1SAnna-Maria Gleixner timer_base_lock_expiry(base); 17192287d866SSebastian Andrzej Siewior raw_spin_lock_irq(&base->lock); 172073420feaSAnna-Maria Gleixner 1721d4f7dae8SFrederic Weisbecker while (time_after_eq(jiffies, base->clk) && 1722d4f7dae8SFrederic Weisbecker time_after_eq(jiffies, base->next_expiry)) { 172373420feaSAnna-Maria Gleixner levels = collect_expired_timers(base, heads); 172431cd0e11SFrederic Weisbecker /* 172531cd0e11SFrederic Weisbecker * The only possible reason for not finding any expired 172631cd0e11SFrederic Weisbecker * timer at this clk is that all matching timers have been 172731cd0e11SFrederic Weisbecker * dequeued. 172831cd0e11SFrederic Weisbecker */ 172931cd0e11SFrederic Weisbecker WARN_ON_ONCE(!levels && !base->next_expiry_recalc); 173073420feaSAnna-Maria Gleixner base->clk++; 1731dc2a0f1fSFrederic Weisbecker base->next_expiry = __next_timer_interrupt(base); 173273420feaSAnna-Maria Gleixner 173373420feaSAnna-Maria Gleixner while (levels--) 173473420feaSAnna-Maria Gleixner expire_timers(base, heads + levels); 173573420feaSAnna-Maria Gleixner } 17362287d866SSebastian Andrzej Siewior raw_spin_unlock_irq(&base->lock); 1737030dcdd1SAnna-Maria Gleixner timer_base_unlock_expiry(base); 173873420feaSAnna-Maria Gleixner } 173973420feaSAnna-Maria Gleixner 17405cee9645SThomas Gleixner /* 17415cee9645SThomas Gleixner * This function runs timers and the timer-tq in bottom half context. 17425cee9645SThomas Gleixner */ 17430766f788SEmese Revfy static __latent_entropy void run_timer_softirq(struct softirq_action *h) 17445cee9645SThomas Gleixner { 1745500462a9SThomas Gleixner struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 17465cee9645SThomas Gleixner 17475cee9645SThomas Gleixner __run_timers(base); 1748ced6d5c1SAnna-Maria Gleixner if (IS_ENABLED(CONFIG_NO_HZ_COMMON)) 1749500462a9SThomas Gleixner __run_timers(this_cpu_ptr(&timer_bases[BASE_DEF])); 17505cee9645SThomas Gleixner } 17515cee9645SThomas Gleixner 17525cee9645SThomas Gleixner /* 17535cee9645SThomas Gleixner * Called by the local, per-CPU timer interrupt on SMP. 17545cee9645SThomas Gleixner */ 1755cc947f2bSThomas Gleixner static void run_local_timers(void) 17565cee9645SThomas Gleixner { 17574e85876aSThomas Gleixner struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]); 17584e85876aSThomas Gleixner 17595cee9645SThomas Gleixner hrtimer_run_queues(); 17604e85876aSThomas Gleixner /* Raise the softirq only if required. */ 1761d4f7dae8SFrederic Weisbecker if (time_before(jiffies, base->next_expiry)) { 1762ed4bbf79SThomas Gleixner if (!IS_ENABLED(CONFIG_NO_HZ_COMMON)) 17634e85876aSThomas Gleixner return; 17644e85876aSThomas Gleixner /* CPU is awake, so check the deferrable base. */ 17654e85876aSThomas Gleixner base++; 1766d4f7dae8SFrederic Weisbecker if (time_before(jiffies, base->next_expiry)) 17674e85876aSThomas Gleixner return; 17684e85876aSThomas Gleixner } 17695cee9645SThomas Gleixner raise_softirq(TIMER_SOFTIRQ); 17705cee9645SThomas Gleixner } 17715cee9645SThomas Gleixner 177258e1177bSKees Cook /* 1773cc947f2bSThomas Gleixner * Called from the timer interrupt handler to charge one tick to the current 1774cc947f2bSThomas Gleixner * process. user_tick is 1 if the tick is user time, 0 for system. 1775cc947f2bSThomas Gleixner */ 1776cc947f2bSThomas Gleixner void update_process_times(int user_tick) 1777cc947f2bSThomas Gleixner { 1778cc947f2bSThomas Gleixner struct task_struct *p = current; 1779cc947f2bSThomas Gleixner 1780cc947f2bSThomas Gleixner PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0); 1781cc947f2bSThomas Gleixner 1782cc947f2bSThomas Gleixner /* Note: this timer irq context must be accounted for as well. */ 1783cc947f2bSThomas Gleixner account_process_tick(p, user_tick); 1784cc947f2bSThomas Gleixner run_local_timers(); 1785cc947f2bSThomas Gleixner rcu_sched_clock_irq(user_tick); 1786cc947f2bSThomas Gleixner #ifdef CONFIG_IRQ_WORK 1787cc947f2bSThomas Gleixner if (in_irq()) 1788cc947f2bSThomas Gleixner irq_work_tick(); 1789cc947f2bSThomas Gleixner #endif 1790cc947f2bSThomas Gleixner scheduler_tick(); 1791cc947f2bSThomas Gleixner if (IS_ENABLED(CONFIG_POSIX_TIMERS)) 1792cc947f2bSThomas Gleixner run_posix_cpu_timers(); 1793cc947f2bSThomas Gleixner } 1794cc947f2bSThomas Gleixner 1795cc947f2bSThomas Gleixner /* 179658e1177bSKees Cook * Since schedule_timeout()'s timer is defined on the stack, it must store 179758e1177bSKees Cook * the target task on the stack as well. 179858e1177bSKees Cook */ 179958e1177bSKees Cook struct process_timer { 180058e1177bSKees Cook struct timer_list timer; 180158e1177bSKees Cook struct task_struct *task; 180258e1177bSKees Cook }; 180358e1177bSKees Cook 180458e1177bSKees Cook static void process_timeout(struct timer_list *t) 18055cee9645SThomas Gleixner { 180658e1177bSKees Cook struct process_timer *timeout = from_timer(timeout, t, timer); 180758e1177bSKees Cook 180858e1177bSKees Cook wake_up_process(timeout->task); 18095cee9645SThomas Gleixner } 18105cee9645SThomas Gleixner 18115cee9645SThomas Gleixner /** 18125cee9645SThomas Gleixner * schedule_timeout - sleep until timeout 18135cee9645SThomas Gleixner * @timeout: timeout value in jiffies 18145cee9645SThomas Gleixner * 18156e317c32SAlexander Popov * Make the current task sleep until @timeout jiffies have elapsed. 18166e317c32SAlexander Popov * The function behavior depends on the current task state 18176e317c32SAlexander Popov * (see also set_current_state() description): 18185cee9645SThomas Gleixner * 18196e317c32SAlexander Popov * %TASK_RUNNING - the scheduler is called, but the task does not sleep 18206e317c32SAlexander Popov * at all. That happens because sched_submit_work() does nothing for 18216e317c32SAlexander Popov * tasks in %TASK_RUNNING state. 18225cee9645SThomas Gleixner * 18235cee9645SThomas Gleixner * %TASK_UNINTERRUPTIBLE - at least @timeout jiffies are guaranteed to 18244b7e9cf9SDouglas Anderson * pass before the routine returns unless the current task is explicitly 18256e317c32SAlexander Popov * woken up, (e.g. by wake_up_process()). 18265cee9645SThomas Gleixner * 18275cee9645SThomas Gleixner * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 18284b7e9cf9SDouglas Anderson * delivered to the current task or the current task is explicitly woken 18294b7e9cf9SDouglas Anderson * up. 18305cee9645SThomas Gleixner * 18316e317c32SAlexander Popov * The current task state is guaranteed to be %TASK_RUNNING when this 18325cee9645SThomas Gleixner * routine returns. 18335cee9645SThomas Gleixner * 18345cee9645SThomas Gleixner * Specifying a @timeout value of %MAX_SCHEDULE_TIMEOUT will schedule 18355cee9645SThomas Gleixner * the CPU away without a bound on the timeout. In this case the return 18365cee9645SThomas Gleixner * value will be %MAX_SCHEDULE_TIMEOUT. 18375cee9645SThomas Gleixner * 18384b7e9cf9SDouglas Anderson * Returns 0 when the timer has expired otherwise the remaining time in 18394b7e9cf9SDouglas Anderson * jiffies will be returned. In all cases the return value is guaranteed 18404b7e9cf9SDouglas Anderson * to be non-negative. 18415cee9645SThomas Gleixner */ 18425cee9645SThomas Gleixner signed long __sched schedule_timeout(signed long timeout) 18435cee9645SThomas Gleixner { 184458e1177bSKees Cook struct process_timer timer; 18455cee9645SThomas Gleixner unsigned long expire; 18465cee9645SThomas Gleixner 18475cee9645SThomas Gleixner switch (timeout) 18485cee9645SThomas Gleixner { 18495cee9645SThomas Gleixner case MAX_SCHEDULE_TIMEOUT: 18505cee9645SThomas Gleixner /* 18515cee9645SThomas Gleixner * These two special cases are useful to be comfortable 18525cee9645SThomas Gleixner * in the caller. Nothing more. We could take 18535cee9645SThomas Gleixner * MAX_SCHEDULE_TIMEOUT from one of the negative value 18545cee9645SThomas Gleixner * but I' d like to return a valid offset (>=0) to allow 18555cee9645SThomas Gleixner * the caller to do everything it want with the retval. 18565cee9645SThomas Gleixner */ 18575cee9645SThomas Gleixner schedule(); 18585cee9645SThomas Gleixner goto out; 18595cee9645SThomas Gleixner default: 18605cee9645SThomas Gleixner /* 18615cee9645SThomas Gleixner * Another bit of PARANOID. Note that the retval will be 18625cee9645SThomas Gleixner * 0 since no piece of kernel is supposed to do a check 18635cee9645SThomas Gleixner * for a negative retval of schedule_timeout() (since it 18645cee9645SThomas Gleixner * should never happens anyway). You just have the printk() 18655cee9645SThomas Gleixner * that will tell you if something is gone wrong and where. 18665cee9645SThomas Gleixner */ 18675cee9645SThomas Gleixner if (timeout < 0) { 18685cee9645SThomas Gleixner printk(KERN_ERR "schedule_timeout: wrong timeout " 18695cee9645SThomas Gleixner "value %lx\n", timeout); 18705cee9645SThomas Gleixner dump_stack(); 1871600642aeSPeter Zijlstra __set_current_state(TASK_RUNNING); 18725cee9645SThomas Gleixner goto out; 18735cee9645SThomas Gleixner } 18745cee9645SThomas Gleixner } 18755cee9645SThomas Gleixner 18765cee9645SThomas Gleixner expire = timeout + jiffies; 18775cee9645SThomas Gleixner 187858e1177bSKees Cook timer.task = current; 187958e1177bSKees Cook timer_setup_on_stack(&timer.timer, process_timeout, 0); 188090c01894SEric Dumazet __mod_timer(&timer.timer, expire, MOD_TIMER_NOTPENDING); 18815cee9645SThomas Gleixner schedule(); 188258e1177bSKees Cook del_singleshot_timer_sync(&timer.timer); 18835cee9645SThomas Gleixner 18845cee9645SThomas Gleixner /* Remove the timer from the object tracker */ 188558e1177bSKees Cook destroy_timer_on_stack(&timer.timer); 18865cee9645SThomas Gleixner 18875cee9645SThomas Gleixner timeout = expire - jiffies; 18885cee9645SThomas Gleixner 18895cee9645SThomas Gleixner out: 18905cee9645SThomas Gleixner return timeout < 0 ? 0 : timeout; 18915cee9645SThomas Gleixner } 18925cee9645SThomas Gleixner EXPORT_SYMBOL(schedule_timeout); 18935cee9645SThomas Gleixner 18945cee9645SThomas Gleixner /* 18955cee9645SThomas Gleixner * We can use __set_current_state() here because schedule_timeout() calls 18965cee9645SThomas Gleixner * schedule() unconditionally. 18975cee9645SThomas Gleixner */ 18985cee9645SThomas Gleixner signed long __sched schedule_timeout_interruptible(signed long timeout) 18995cee9645SThomas Gleixner { 19005cee9645SThomas Gleixner __set_current_state(TASK_INTERRUPTIBLE); 19015cee9645SThomas Gleixner return schedule_timeout(timeout); 19025cee9645SThomas Gleixner } 19035cee9645SThomas Gleixner EXPORT_SYMBOL(schedule_timeout_interruptible); 19045cee9645SThomas Gleixner 19055cee9645SThomas Gleixner signed long __sched schedule_timeout_killable(signed long timeout) 19065cee9645SThomas Gleixner { 19075cee9645SThomas Gleixner __set_current_state(TASK_KILLABLE); 19085cee9645SThomas Gleixner return schedule_timeout(timeout); 19095cee9645SThomas Gleixner } 19105cee9645SThomas Gleixner EXPORT_SYMBOL(schedule_timeout_killable); 19115cee9645SThomas Gleixner 19125cee9645SThomas Gleixner signed long __sched schedule_timeout_uninterruptible(signed long timeout) 19135cee9645SThomas Gleixner { 19145cee9645SThomas Gleixner __set_current_state(TASK_UNINTERRUPTIBLE); 19155cee9645SThomas Gleixner return schedule_timeout(timeout); 19165cee9645SThomas Gleixner } 19175cee9645SThomas Gleixner EXPORT_SYMBOL(schedule_timeout_uninterruptible); 19185cee9645SThomas Gleixner 191969b27bafSAndrew Morton /* 192069b27bafSAndrew Morton * Like schedule_timeout_uninterruptible(), except this task will not contribute 192169b27bafSAndrew Morton * to load average. 192269b27bafSAndrew Morton */ 192369b27bafSAndrew Morton signed long __sched schedule_timeout_idle(signed long timeout) 192469b27bafSAndrew Morton { 192569b27bafSAndrew Morton __set_current_state(TASK_IDLE); 192669b27bafSAndrew Morton return schedule_timeout(timeout); 192769b27bafSAndrew Morton } 192869b27bafSAndrew Morton EXPORT_SYMBOL(schedule_timeout_idle); 192969b27bafSAndrew Morton 19305cee9645SThomas Gleixner #ifdef CONFIG_HOTPLUG_CPU 1931494af3edSThomas Gleixner static void migrate_timer_list(struct timer_base *new_base, struct hlist_head *head) 19325cee9645SThomas Gleixner { 19335cee9645SThomas Gleixner struct timer_list *timer; 19340eeda71bSThomas Gleixner int cpu = new_base->cpu; 19355cee9645SThomas Gleixner 19361dabbcecSThomas Gleixner while (!hlist_empty(head)) { 19371dabbcecSThomas Gleixner timer = hlist_entry(head->first, struct timer_list, entry); 19385cee9645SThomas Gleixner detach_timer(timer, false); 19390eeda71bSThomas Gleixner timer->flags = (timer->flags & ~TIMER_BASEMASK) | cpu; 19405cee9645SThomas Gleixner internal_add_timer(new_base, timer); 19415cee9645SThomas Gleixner } 19425cee9645SThomas Gleixner } 19435cee9645SThomas Gleixner 194426456f87SThomas Gleixner int timers_prepare_cpu(unsigned int cpu) 194526456f87SThomas Gleixner { 194626456f87SThomas Gleixner struct timer_base *base; 194726456f87SThomas Gleixner int b; 194826456f87SThomas Gleixner 194926456f87SThomas Gleixner for (b = 0; b < NR_BASES; b++) { 195026456f87SThomas Gleixner base = per_cpu_ptr(&timer_bases[b], cpu); 195126456f87SThomas Gleixner base->clk = jiffies; 195226456f87SThomas Gleixner base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; 1953aebacb7fSNicolas Saenz Julienne base->timers_pending = false; 195426456f87SThomas Gleixner base->is_idle = false; 195526456f87SThomas Gleixner } 195626456f87SThomas Gleixner return 0; 195726456f87SThomas Gleixner } 195826456f87SThomas Gleixner 195924f73b99SRichard Cochran int timers_dead_cpu(unsigned int cpu) 19605cee9645SThomas Gleixner { 1961494af3edSThomas Gleixner struct timer_base *old_base; 1962494af3edSThomas Gleixner struct timer_base *new_base; 1963500462a9SThomas Gleixner int b, i; 19645cee9645SThomas Gleixner 19655cee9645SThomas Gleixner BUG_ON(cpu_online(cpu)); 1966500462a9SThomas Gleixner 1967500462a9SThomas Gleixner for (b = 0; b < NR_BASES; b++) { 1968500462a9SThomas Gleixner old_base = per_cpu_ptr(&timer_bases[b], cpu); 1969500462a9SThomas Gleixner new_base = get_cpu_ptr(&timer_bases[b]); 19705cee9645SThomas Gleixner /* 19715cee9645SThomas Gleixner * The caller is globally serialized and nobody else 19725cee9645SThomas Gleixner * takes two locks at once, deadlock is not possible. 19735cee9645SThomas Gleixner */ 19742287d866SSebastian Andrzej Siewior raw_spin_lock_irq(&new_base->lock); 19752287d866SSebastian Andrzej Siewior raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 19765cee9645SThomas Gleixner 1977c52232a4SLingutla Chandrasekhar /* 1978c52232a4SLingutla Chandrasekhar * The current CPUs base clock might be stale. Update it 1979c52232a4SLingutla Chandrasekhar * before moving the timers over. 1980c52232a4SLingutla Chandrasekhar */ 1981c52232a4SLingutla Chandrasekhar forward_timer_base(new_base); 1982c52232a4SLingutla Chandrasekhar 19835cee9645SThomas Gleixner BUG_ON(old_base->running_timer); 19845cee9645SThomas Gleixner 1985500462a9SThomas Gleixner for (i = 0; i < WHEEL_SIZE; i++) 1986500462a9SThomas Gleixner migrate_timer_list(new_base, old_base->vectors + i); 19878def9060SViresh Kumar 19882287d866SSebastian Andrzej Siewior raw_spin_unlock(&old_base->lock); 19892287d866SSebastian Andrzej Siewior raw_spin_unlock_irq(&new_base->lock); 1990494af3edSThomas Gleixner put_cpu_ptr(&timer_bases); 19915cee9645SThomas Gleixner } 199224f73b99SRichard Cochran return 0; 19935cee9645SThomas Gleixner } 19945cee9645SThomas Gleixner 19953650b57fSPeter Zijlstra #endif /* CONFIG_HOTPLUG_CPU */ 19965cee9645SThomas Gleixner 19970eeda71bSThomas Gleixner static void __init init_timer_cpu(int cpu) 19988def9060SViresh Kumar { 1999500462a9SThomas Gleixner struct timer_base *base; 2000500462a9SThomas Gleixner int i; 20013650b57fSPeter Zijlstra 2002500462a9SThomas Gleixner for (i = 0; i < NR_BASES; i++) { 2003500462a9SThomas Gleixner base = per_cpu_ptr(&timer_bases[i], cpu); 20048def9060SViresh Kumar base->cpu = cpu; 20052287d866SSebastian Andrzej Siewior raw_spin_lock_init(&base->lock); 2006494af3edSThomas Gleixner base->clk = jiffies; 2007dc2a0f1fSFrederic Weisbecker base->next_expiry = base->clk + NEXT_TIMER_MAX_DELTA; 2008030dcdd1SAnna-Maria Gleixner timer_base_init_expiry_lock(base); 2009500462a9SThomas Gleixner } 20108def9060SViresh Kumar } 20118def9060SViresh Kumar 20128def9060SViresh Kumar static void __init init_timer_cpus(void) 20138def9060SViresh Kumar { 20148def9060SViresh Kumar int cpu; 20158def9060SViresh Kumar 20160eeda71bSThomas Gleixner for_each_possible_cpu(cpu) 20170eeda71bSThomas Gleixner init_timer_cpu(cpu); 20188def9060SViresh Kumar } 20195cee9645SThomas Gleixner 20205cee9645SThomas Gleixner void __init init_timers(void) 20215cee9645SThomas Gleixner { 20228def9060SViresh Kumar init_timer_cpus(); 20231fb497ddSThomas Gleixner posix_cputimers_init_work(); 20245cee9645SThomas Gleixner open_softirq(TIMER_SOFTIRQ, run_timer_softirq); 20255cee9645SThomas Gleixner } 20265cee9645SThomas Gleixner 20275cee9645SThomas Gleixner /** 20285cee9645SThomas Gleixner * msleep - sleep safely even with waitqueue interruptions 20295cee9645SThomas Gleixner * @msecs: Time in milliseconds to sleep for 20305cee9645SThomas Gleixner */ 20315cee9645SThomas Gleixner void msleep(unsigned int msecs) 20325cee9645SThomas Gleixner { 20335cee9645SThomas Gleixner unsigned long timeout = msecs_to_jiffies(msecs) + 1; 20345cee9645SThomas Gleixner 20355cee9645SThomas Gleixner while (timeout) 20365cee9645SThomas Gleixner timeout = schedule_timeout_uninterruptible(timeout); 20375cee9645SThomas Gleixner } 20385cee9645SThomas Gleixner 20395cee9645SThomas Gleixner EXPORT_SYMBOL(msleep); 20405cee9645SThomas Gleixner 20415cee9645SThomas Gleixner /** 20425cee9645SThomas Gleixner * msleep_interruptible - sleep waiting for signals 20435cee9645SThomas Gleixner * @msecs: Time in milliseconds to sleep for 20445cee9645SThomas Gleixner */ 20455cee9645SThomas Gleixner unsigned long msleep_interruptible(unsigned int msecs) 20465cee9645SThomas Gleixner { 20475cee9645SThomas Gleixner unsigned long timeout = msecs_to_jiffies(msecs) + 1; 20485cee9645SThomas Gleixner 20495cee9645SThomas Gleixner while (timeout && !signal_pending(current)) 20505cee9645SThomas Gleixner timeout = schedule_timeout_interruptible(timeout); 20515cee9645SThomas Gleixner return jiffies_to_msecs(timeout); 20525cee9645SThomas Gleixner } 20535cee9645SThomas Gleixner 20545cee9645SThomas Gleixner EXPORT_SYMBOL(msleep_interruptible); 20555cee9645SThomas Gleixner 20565cee9645SThomas Gleixner /** 2057b5227d03SBjorn Helgaas * usleep_range - Sleep for an approximate time 20585cee9645SThomas Gleixner * @min: Minimum time in usecs to sleep 20595cee9645SThomas Gleixner * @max: Maximum time in usecs to sleep 2060b5227d03SBjorn Helgaas * 2061b5227d03SBjorn Helgaas * In non-atomic context where the exact wakeup time is flexible, use 2062b5227d03SBjorn Helgaas * usleep_range() instead of udelay(). The sleep improves responsiveness 2063b5227d03SBjorn Helgaas * by avoiding the CPU-hogging busy-wait of udelay(), and the range reduces 2064b5227d03SBjorn Helgaas * power usage by allowing hrtimers to take advantage of an already- 2065b5227d03SBjorn Helgaas * scheduled interrupt instead of scheduling a new one just for this sleep. 20665cee9645SThomas Gleixner */ 20672ad5d327SThomas Gleixner void __sched usleep_range(unsigned long min, unsigned long max) 20685cee9645SThomas Gleixner { 20696c5e9059SDouglas Anderson ktime_t exp = ktime_add_us(ktime_get(), min); 20706c5e9059SDouglas Anderson u64 delta = (u64)(max - min) * NSEC_PER_USEC; 20716c5e9059SDouglas Anderson 20726c5e9059SDouglas Anderson for (;;) { 20735cee9645SThomas Gleixner __set_current_state(TASK_UNINTERRUPTIBLE); 20746c5e9059SDouglas Anderson /* Do not return before the requested sleep time has elapsed */ 20756c5e9059SDouglas Anderson if (!schedule_hrtimeout_range(&exp, delta, HRTIMER_MODE_ABS)) 20766c5e9059SDouglas Anderson break; 20776c5e9059SDouglas Anderson } 20785cee9645SThomas Gleixner } 20795cee9645SThomas Gleixner EXPORT_SYMBOL(usleep_range); 2080