1 /* CPU control. 2 * (C) 2001, 2002, 2003, 2004 Rusty Russell 3 * 4 * This code is licenced under the GPL. 5 */ 6 #include <linux/proc_fs.h> 7 #include <linux/smp.h> 8 #include <linux/init.h> 9 #include <linux/notifier.h> 10 #include <linux/sched.h> 11 #include <linux/unistd.h> 12 #include <linux/cpu.h> 13 #include <linux/export.h> 14 #include <linux/kthread.h> 15 #include <linux/stop_machine.h> 16 #include <linux/mutex.h> 17 #include <linux/gfp.h> 18 #include <linux/suspend.h> 19 20 #include "smpboot.h" 21 22 #ifdef CONFIG_SMP 23 /* Serializes the updates to cpu_online_mask, cpu_present_mask */ 24 static DEFINE_MUTEX(cpu_add_remove_lock); 25 26 /* 27 * The following two API's must be used when attempting 28 * to serialize the updates to cpu_online_mask, cpu_present_mask. 29 */ 30 void cpu_maps_update_begin(void) 31 { 32 mutex_lock(&cpu_add_remove_lock); 33 } 34 35 void cpu_maps_update_done(void) 36 { 37 mutex_unlock(&cpu_add_remove_lock); 38 } 39 40 static RAW_NOTIFIER_HEAD(cpu_chain); 41 42 /* If set, cpu_up and cpu_down will return -EBUSY and do nothing. 43 * Should always be manipulated under cpu_add_remove_lock 44 */ 45 static int cpu_hotplug_disabled; 46 47 #ifdef CONFIG_HOTPLUG_CPU 48 49 static struct { 50 struct task_struct *active_writer; 51 struct mutex lock; /* Synchronizes accesses to refcount, */ 52 /* 53 * Also blocks the new readers during 54 * an ongoing cpu hotplug operation. 55 */ 56 int refcount; 57 } cpu_hotplug = { 58 .active_writer = NULL, 59 .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), 60 .refcount = 0, 61 }; 62 63 void get_online_cpus(void) 64 { 65 might_sleep(); 66 if (cpu_hotplug.active_writer == current) 67 return; 68 mutex_lock(&cpu_hotplug.lock); 69 cpu_hotplug.refcount++; 70 mutex_unlock(&cpu_hotplug.lock); 71 72 } 73 EXPORT_SYMBOL_GPL(get_online_cpus); 74 75 void put_online_cpus(void) 76 { 77 if (cpu_hotplug.active_writer == current) 78 return; 79 mutex_lock(&cpu_hotplug.lock); 80 if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) 81 wake_up_process(cpu_hotplug.active_writer); 82 mutex_unlock(&cpu_hotplug.lock); 83 84 } 85 EXPORT_SYMBOL_GPL(put_online_cpus); 86 87 /* 88 * This ensures that the hotplug operation can begin only when the 89 * refcount goes to zero. 90 * 91 * Note that during a cpu-hotplug operation, the new readers, if any, 92 * will be blocked by the cpu_hotplug.lock 93 * 94 * Since cpu_hotplug_begin() is always called after invoking 95 * cpu_maps_update_begin(), we can be sure that only one writer is active. 96 * 97 * Note that theoretically, there is a possibility of a livelock: 98 * - Refcount goes to zero, last reader wakes up the sleeping 99 * writer. 100 * - Last reader unlocks the cpu_hotplug.lock. 101 * - A new reader arrives at this moment, bumps up the refcount. 102 * - The writer acquires the cpu_hotplug.lock finds the refcount 103 * non zero and goes to sleep again. 104 * 105 * However, this is very difficult to achieve in practice since 106 * get_online_cpus() not an api which is called all that often. 107 * 108 */ 109 static void cpu_hotplug_begin(void) 110 { 111 cpu_hotplug.active_writer = current; 112 113 for (;;) { 114 mutex_lock(&cpu_hotplug.lock); 115 if (likely(!cpu_hotplug.refcount)) 116 break; 117 __set_current_state(TASK_UNINTERRUPTIBLE); 118 mutex_unlock(&cpu_hotplug.lock); 119 schedule(); 120 } 121 } 122 123 static void cpu_hotplug_done(void) 124 { 125 cpu_hotplug.active_writer = NULL; 126 mutex_unlock(&cpu_hotplug.lock); 127 } 128 129 #else /* #if CONFIG_HOTPLUG_CPU */ 130 static void cpu_hotplug_begin(void) {} 131 static void cpu_hotplug_done(void) {} 132 #endif /* #else #if CONFIG_HOTPLUG_CPU */ 133 134 /* Need to know about CPUs going up/down? */ 135 int __ref register_cpu_notifier(struct notifier_block *nb) 136 { 137 int ret; 138 cpu_maps_update_begin(); 139 ret = raw_notifier_chain_register(&cpu_chain, nb); 140 cpu_maps_update_done(); 141 return ret; 142 } 143 144 static int __cpu_notify(unsigned long val, void *v, int nr_to_call, 145 int *nr_calls) 146 { 147 int ret; 148 149 ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, 150 nr_calls); 151 152 return notifier_to_errno(ret); 153 } 154 155 static int cpu_notify(unsigned long val, void *v) 156 { 157 return __cpu_notify(val, v, -1, NULL); 158 } 159 160 #ifdef CONFIG_HOTPLUG_CPU 161 162 static void cpu_notify_nofail(unsigned long val, void *v) 163 { 164 BUG_ON(cpu_notify(val, v)); 165 } 166 EXPORT_SYMBOL(register_cpu_notifier); 167 168 void __ref unregister_cpu_notifier(struct notifier_block *nb) 169 { 170 cpu_maps_update_begin(); 171 raw_notifier_chain_unregister(&cpu_chain, nb); 172 cpu_maps_update_done(); 173 } 174 EXPORT_SYMBOL(unregister_cpu_notifier); 175 176 static inline void check_for_tasks(int cpu) 177 { 178 struct task_struct *p; 179 180 write_lock_irq(&tasklist_lock); 181 for_each_process(p) { 182 if (task_cpu(p) == cpu && p->state == TASK_RUNNING && 183 (p->utime || p->stime)) 184 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " 185 "(state = %ld, flags = %x)\n", 186 p->comm, task_pid_nr(p), cpu, 187 p->state, p->flags); 188 } 189 write_unlock_irq(&tasklist_lock); 190 } 191 192 struct take_cpu_down_param { 193 unsigned long mod; 194 void *hcpu; 195 }; 196 197 /* Take this CPU down. */ 198 static int __ref take_cpu_down(void *_param) 199 { 200 struct take_cpu_down_param *param = _param; 201 int err; 202 203 /* Ensure this CPU doesn't handle any more interrupts. */ 204 err = __cpu_disable(); 205 if (err < 0) 206 return err; 207 208 cpu_notify(CPU_DYING | param->mod, param->hcpu); 209 return 0; 210 } 211 212 /* Requires cpu_add_remove_lock to be held */ 213 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) 214 { 215 int err, nr_calls = 0; 216 void *hcpu = (void *)(long)cpu; 217 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 218 struct take_cpu_down_param tcd_param = { 219 .mod = mod, 220 .hcpu = hcpu, 221 }; 222 223 if (num_online_cpus() == 1) 224 return -EBUSY; 225 226 if (!cpu_online(cpu)) 227 return -EINVAL; 228 229 cpu_hotplug_begin(); 230 231 err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); 232 if (err) { 233 nr_calls--; 234 __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); 235 printk("%s: attempt to take down CPU %u failed\n", 236 __func__, cpu); 237 goto out_release; 238 } 239 240 err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); 241 if (err) { 242 /* CPU didn't die: tell everyone. Can't complain. */ 243 cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); 244 245 goto out_release; 246 } 247 BUG_ON(cpu_online(cpu)); 248 249 /* 250 * The migration_call() CPU_DYING callback will have removed all 251 * runnable tasks from the cpu, there's only the idle task left now 252 * that the migration thread is done doing the stop_machine thing. 253 * 254 * Wait for the stop thread to go away. 255 */ 256 while (!idle_cpu(cpu)) 257 cpu_relax(); 258 259 /* This actually kills the CPU. */ 260 __cpu_die(cpu); 261 262 /* CPU is completely dead: tell everyone. Too late to complain. */ 263 cpu_notify_nofail(CPU_DEAD | mod, hcpu); 264 265 check_for_tasks(cpu); 266 267 out_release: 268 cpu_hotplug_done(); 269 if (!err) 270 cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); 271 return err; 272 } 273 274 int __ref cpu_down(unsigned int cpu) 275 { 276 int err; 277 278 cpu_maps_update_begin(); 279 280 if (cpu_hotplug_disabled) { 281 err = -EBUSY; 282 goto out; 283 } 284 285 err = _cpu_down(cpu, 0); 286 287 out: 288 cpu_maps_update_done(); 289 return err; 290 } 291 EXPORT_SYMBOL(cpu_down); 292 #endif /*CONFIG_HOTPLUG_CPU*/ 293 294 /* Requires cpu_add_remove_lock to be held */ 295 static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) 296 { 297 int ret, nr_calls = 0; 298 void *hcpu = (void *)(long)cpu; 299 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; 300 struct task_struct *idle; 301 302 if (cpu_online(cpu) || !cpu_present(cpu)) 303 return -EINVAL; 304 305 cpu_hotplug_begin(); 306 307 idle = idle_thread_get(cpu); 308 if (IS_ERR(idle)) { 309 ret = PTR_ERR(idle); 310 goto out; 311 } 312 313 ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); 314 if (ret) { 315 nr_calls--; 316 printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", 317 __func__, cpu); 318 goto out_notify; 319 } 320 321 /* Arch-specific enabling code. */ 322 ret = __cpu_up(cpu, idle); 323 if (ret != 0) 324 goto out_notify; 325 BUG_ON(!cpu_online(cpu)); 326 327 /* Now call notifier in preparation. */ 328 cpu_notify(CPU_ONLINE | mod, hcpu); 329 330 out_notify: 331 if (ret != 0) 332 __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); 333 out: 334 cpu_hotplug_done(); 335 336 return ret; 337 } 338 339 int __cpuinit cpu_up(unsigned int cpu) 340 { 341 int err = 0; 342 343 #ifdef CONFIG_MEMORY_HOTPLUG 344 int nid; 345 pg_data_t *pgdat; 346 #endif 347 348 if (!cpu_possible(cpu)) { 349 printk(KERN_ERR "can't online cpu %d because it is not " 350 "configured as may-hotadd at boot time\n", cpu); 351 #if defined(CONFIG_IA64) 352 printk(KERN_ERR "please check additional_cpus= boot " 353 "parameter\n"); 354 #endif 355 return -EINVAL; 356 } 357 358 #ifdef CONFIG_MEMORY_HOTPLUG 359 nid = cpu_to_node(cpu); 360 if (!node_online(nid)) { 361 err = mem_online_node(nid); 362 if (err) 363 return err; 364 } 365 366 pgdat = NODE_DATA(nid); 367 if (!pgdat) { 368 printk(KERN_ERR 369 "Can't online cpu %d due to NULL pgdat\n", cpu); 370 return -ENOMEM; 371 } 372 373 if (pgdat->node_zonelists->_zonerefs->zone == NULL) { 374 mutex_lock(&zonelists_mutex); 375 build_all_zonelists(NULL); 376 mutex_unlock(&zonelists_mutex); 377 } 378 #endif 379 380 cpu_maps_update_begin(); 381 382 if (cpu_hotplug_disabled) { 383 err = -EBUSY; 384 goto out; 385 } 386 387 err = _cpu_up(cpu, 0); 388 389 out: 390 cpu_maps_update_done(); 391 return err; 392 } 393 EXPORT_SYMBOL_GPL(cpu_up); 394 395 #ifdef CONFIG_PM_SLEEP_SMP 396 static cpumask_var_t frozen_cpus; 397 398 void __weak arch_disable_nonboot_cpus_begin(void) 399 { 400 } 401 402 void __weak arch_disable_nonboot_cpus_end(void) 403 { 404 } 405 406 int disable_nonboot_cpus(void) 407 { 408 int cpu, first_cpu, error = 0; 409 410 cpu_maps_update_begin(); 411 first_cpu = cpumask_first(cpu_online_mask); 412 /* 413 * We take down all of the non-boot CPUs in one shot to avoid races 414 * with the userspace trying to use the CPU hotplug at the same time 415 */ 416 cpumask_clear(frozen_cpus); 417 arch_disable_nonboot_cpus_begin(); 418 419 printk("Disabling non-boot CPUs ...\n"); 420 for_each_online_cpu(cpu) { 421 if (cpu == first_cpu) 422 continue; 423 error = _cpu_down(cpu, 1); 424 if (!error) 425 cpumask_set_cpu(cpu, frozen_cpus); 426 else { 427 printk(KERN_ERR "Error taking CPU%d down: %d\n", 428 cpu, error); 429 break; 430 } 431 } 432 433 arch_disable_nonboot_cpus_end(); 434 435 if (!error) { 436 BUG_ON(num_online_cpus() > 1); 437 /* Make sure the CPUs won't be enabled by someone else */ 438 cpu_hotplug_disabled = 1; 439 } else { 440 printk(KERN_ERR "Non-boot CPUs are not disabled\n"); 441 } 442 cpu_maps_update_done(); 443 return error; 444 } 445 446 void __weak arch_enable_nonboot_cpus_begin(void) 447 { 448 } 449 450 void __weak arch_enable_nonboot_cpus_end(void) 451 { 452 } 453 454 void __ref enable_nonboot_cpus(void) 455 { 456 int cpu, error; 457 458 /* Allow everyone to use the CPU hotplug again */ 459 cpu_maps_update_begin(); 460 cpu_hotplug_disabled = 0; 461 if (cpumask_empty(frozen_cpus)) 462 goto out; 463 464 printk(KERN_INFO "Enabling non-boot CPUs ...\n"); 465 466 arch_enable_nonboot_cpus_begin(); 467 468 for_each_cpu(cpu, frozen_cpus) { 469 error = _cpu_up(cpu, 1); 470 if (!error) { 471 printk(KERN_INFO "CPU%d is up\n", cpu); 472 continue; 473 } 474 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); 475 } 476 477 arch_enable_nonboot_cpus_end(); 478 479 cpumask_clear(frozen_cpus); 480 out: 481 cpu_maps_update_done(); 482 } 483 484 static int __init alloc_frozen_cpus(void) 485 { 486 if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) 487 return -ENOMEM; 488 return 0; 489 } 490 core_initcall(alloc_frozen_cpus); 491 492 /* 493 * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU 494 * hotplug when tasks are about to be frozen. Also, don't allow the freezer 495 * to continue until any currently running CPU hotplug operation gets 496 * completed. 497 * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the 498 * 'cpu_add_remove_lock'. And this same lock is also taken by the regular 499 * CPU hotplug path and released only after it is complete. Thus, we 500 * (and hence the freezer) will block here until any currently running CPU 501 * hotplug operation gets completed. 502 */ 503 void cpu_hotplug_disable_before_freeze(void) 504 { 505 cpu_maps_update_begin(); 506 cpu_hotplug_disabled = 1; 507 cpu_maps_update_done(); 508 } 509 510 511 /* 512 * When tasks have been thawed, re-enable regular CPU hotplug (which had been 513 * disabled while beginning to freeze tasks). 514 */ 515 void cpu_hotplug_enable_after_thaw(void) 516 { 517 cpu_maps_update_begin(); 518 cpu_hotplug_disabled = 0; 519 cpu_maps_update_done(); 520 } 521 522 /* 523 * When callbacks for CPU hotplug notifications are being executed, we must 524 * ensure that the state of the system with respect to the tasks being frozen 525 * or not, as reported by the notification, remains unchanged *throughout the 526 * duration* of the execution of the callbacks. 527 * Hence we need to prevent the freezer from racing with regular CPU hotplug. 528 * 529 * This synchronization is implemented by mutually excluding regular CPU 530 * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ 531 * Hibernate notifications. 532 */ 533 static int 534 cpu_hotplug_pm_callback(struct notifier_block *nb, 535 unsigned long action, void *ptr) 536 { 537 switch (action) { 538 539 case PM_SUSPEND_PREPARE: 540 case PM_HIBERNATION_PREPARE: 541 cpu_hotplug_disable_before_freeze(); 542 break; 543 544 case PM_POST_SUSPEND: 545 case PM_POST_HIBERNATION: 546 cpu_hotplug_enable_after_thaw(); 547 break; 548 549 default: 550 return NOTIFY_DONE; 551 } 552 553 return NOTIFY_OK; 554 } 555 556 557 static int __init cpu_hotplug_pm_sync_init(void) 558 { 559 pm_notifier(cpu_hotplug_pm_callback, 0); 560 return 0; 561 } 562 core_initcall(cpu_hotplug_pm_sync_init); 563 564 #endif /* CONFIG_PM_SLEEP_SMP */ 565 566 /** 567 * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers 568 * @cpu: cpu that just started 569 * 570 * This function calls the cpu_chain notifiers with CPU_STARTING. 571 * It must be called by the arch code on the new cpu, before the new cpu 572 * enables interrupts and before the "boot" cpu returns from __cpu_up(). 573 */ 574 void __cpuinit notify_cpu_starting(unsigned int cpu) 575 { 576 unsigned long val = CPU_STARTING; 577 578 #ifdef CONFIG_PM_SLEEP_SMP 579 if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) 580 val = CPU_STARTING_FROZEN; 581 #endif /* CONFIG_PM_SLEEP_SMP */ 582 cpu_notify(val, (void *)(long)cpu); 583 } 584 585 #endif /* CONFIG_SMP */ 586 587 /* 588 * cpu_bit_bitmap[] is a special, "compressed" data structure that 589 * represents all NR_CPUS bits binary values of 1<<nr. 590 * 591 * It is used by cpumask_of() to get a constant address to a CPU 592 * mask value that has a single bit set only. 593 */ 594 595 /* cpu_bit_bitmap[0] is empty - so we can back into it */ 596 #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x)) 597 #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) 598 #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) 599 #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) 600 601 const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { 602 603 MASK_DECLARE_8(0), MASK_DECLARE_8(8), 604 MASK_DECLARE_8(16), MASK_DECLARE_8(24), 605 #if BITS_PER_LONG > 32 606 MASK_DECLARE_8(32), MASK_DECLARE_8(40), 607 MASK_DECLARE_8(48), MASK_DECLARE_8(56), 608 #endif 609 }; 610 EXPORT_SYMBOL_GPL(cpu_bit_bitmap); 611 612 const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; 613 EXPORT_SYMBOL(cpu_all_bits); 614 615 #ifdef CONFIG_INIT_ALL_POSSIBLE 616 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly 617 = CPU_BITS_ALL; 618 #else 619 static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly; 620 #endif 621 const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits); 622 EXPORT_SYMBOL(cpu_possible_mask); 623 624 static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly; 625 const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits); 626 EXPORT_SYMBOL(cpu_online_mask); 627 628 static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly; 629 const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits); 630 EXPORT_SYMBOL(cpu_present_mask); 631 632 static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; 633 const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); 634 EXPORT_SYMBOL(cpu_active_mask); 635 636 void set_cpu_possible(unsigned int cpu, bool possible) 637 { 638 if (possible) 639 cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits)); 640 else 641 cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits)); 642 } 643 644 void set_cpu_present(unsigned int cpu, bool present) 645 { 646 if (present) 647 cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits)); 648 else 649 cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits)); 650 } 651 652 void set_cpu_online(unsigned int cpu, bool online) 653 { 654 if (online) 655 cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); 656 else 657 cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); 658 } 659 660 void set_cpu_active(unsigned int cpu, bool active) 661 { 662 if (active) 663 cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); 664 else 665 cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits)); 666 } 667 668 void init_cpu_present(const struct cpumask *src) 669 { 670 cpumask_copy(to_cpumask(cpu_present_bits), src); 671 } 672 673 void init_cpu_possible(const struct cpumask *src) 674 { 675 cpumask_copy(to_cpumask(cpu_possible_bits), src); 676 } 677 678 void init_cpu_online(const struct cpumask *src) 679 { 680 cpumask_copy(to_cpumask(cpu_online_bits), src); 681 } 682