1 /* 2 * kexec.c - kexec system call core code. 3 * Copyright (C) 2002-2004 Eric Biederman <[email protected]> 4 * 5 * This source code is licensed under the GNU General Public License, 6 * Version 2. See the file COPYING for more details. 7 */ 8 9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 10 11 #include <linux/capability.h> 12 #include <linux/mm.h> 13 #include <linux/file.h> 14 #include <linux/slab.h> 15 #include <linux/fs.h> 16 #include <linux/kexec.h> 17 #include <linux/mutex.h> 18 #include <linux/list.h> 19 #include <linux/highmem.h> 20 #include <linux/syscalls.h> 21 #include <linux/reboot.h> 22 #include <linux/ioport.h> 23 #include <linux/hardirq.h> 24 #include <linux/elf.h> 25 #include <linux/elfcore.h> 26 #include <linux/utsname.h> 27 #include <linux/numa.h> 28 #include <linux/suspend.h> 29 #include <linux/device.h> 30 #include <linux/freezer.h> 31 #include <linux/pm.h> 32 #include <linux/cpu.h> 33 #include <linux/uaccess.h> 34 #include <linux/io.h> 35 #include <linux/console.h> 36 #include <linux/vmalloc.h> 37 #include <linux/swap.h> 38 #include <linux/syscore_ops.h> 39 #include <linux/compiler.h> 40 #include <linux/hugetlb.h> 41 42 #include <asm/page.h> 43 #include <asm/sections.h> 44 45 #include <crypto/hash.h> 46 #include <crypto/sha.h> 47 #include "kexec_internal.h" 48 49 DEFINE_MUTEX(kexec_mutex); 50 51 /* Per cpu memory for storing cpu states in case of system crash. */ 52 note_buf_t __percpu *crash_notes; 53 54 /* vmcoreinfo stuff */ 55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; 56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; 57 size_t vmcoreinfo_size; 58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); 59 60 /* Flag to indicate we are going to kexec a new kernel */ 61 bool kexec_in_progress = false; 62 63 64 /* Location of the reserved area for the crash kernel */ 65 struct resource crashk_res = { 66 .name = "Crash kernel", 67 .start = 0, 68 .end = 0, 69 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 70 }; 71 struct resource crashk_low_res = { 72 .name = "Crash kernel", 73 .start = 0, 74 .end = 0, 75 .flags = IORESOURCE_BUSY | IORESOURCE_MEM 76 }; 77 78 int kexec_should_crash(struct task_struct *p) 79 { 80 /* 81 * If crash_kexec_post_notifiers is enabled, don't run 82 * crash_kexec() here yet, which must be run after panic 83 * notifiers in panic(). 84 */ 85 if (crash_kexec_post_notifiers) 86 return 0; 87 /* 88 * There are 4 panic() calls in do_exit() path, each of which 89 * corresponds to each of these 4 conditions. 90 */ 91 if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops) 92 return 1; 93 return 0; 94 } 95 96 /* 97 * When kexec transitions to the new kernel there is a one-to-one 98 * mapping between physical and virtual addresses. On processors 99 * where you can disable the MMU this is trivial, and easy. For 100 * others it is still a simple predictable page table to setup. 101 * 102 * In that environment kexec copies the new kernel to its final 103 * resting place. This means I can only support memory whose 104 * physical address can fit in an unsigned long. In particular 105 * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled. 106 * If the assembly stub has more restrictive requirements 107 * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be 108 * defined more restrictively in <asm/kexec.h>. 109 * 110 * The code for the transition from the current kernel to the 111 * the new kernel is placed in the control_code_buffer, whose size 112 * is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single 113 * page of memory is necessary, but some architectures require more. 114 * Because this memory must be identity mapped in the transition from 115 * virtual to physical addresses it must live in the range 116 * 0 - TASK_SIZE, as only the user space mappings are arbitrarily 117 * modifiable. 118 * 119 * The assembly stub in the control code buffer is passed a linked list 120 * of descriptor pages detailing the source pages of the new kernel, 121 * and the destination addresses of those source pages. As this data 122 * structure is not used in the context of the current OS, it must 123 * be self-contained. 124 * 125 * The code has been made to work with highmem pages and will use a 126 * destination page in its final resting place (if it happens 127 * to allocate it). The end product of this is that most of the 128 * physical address space, and most of RAM can be used. 129 * 130 * Future directions include: 131 * - allocating a page table with the control code buffer identity 132 * mapped, to simplify machine_kexec and make kexec_on_panic more 133 * reliable. 134 */ 135 136 /* 137 * KIMAGE_NO_DEST is an impossible destination address..., for 138 * allocating pages whose destination address we do not care about. 139 */ 140 #define KIMAGE_NO_DEST (-1UL) 141 142 static struct page *kimage_alloc_page(struct kimage *image, 143 gfp_t gfp_mask, 144 unsigned long dest); 145 146 int sanity_check_segment_list(struct kimage *image) 147 { 148 int result, i; 149 unsigned long nr_segments = image->nr_segments; 150 151 /* 152 * Verify we have good destination addresses. The caller is 153 * responsible for making certain we don't attempt to load 154 * the new image into invalid or reserved areas of RAM. This 155 * just verifies it is an address we can use. 156 * 157 * Since the kernel does everything in page size chunks ensure 158 * the destination addresses are page aligned. Too many 159 * special cases crop of when we don't do this. The most 160 * insidious is getting overlapping destination addresses 161 * simply because addresses are changed to page size 162 * granularity. 163 */ 164 result = -EADDRNOTAVAIL; 165 for (i = 0; i < nr_segments; i++) { 166 unsigned long mstart, mend; 167 168 mstart = image->segment[i].mem; 169 mend = mstart + image->segment[i].memsz; 170 if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK)) 171 return result; 172 if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT) 173 return result; 174 } 175 176 /* Verify our destination addresses do not overlap. 177 * If we alloed overlapping destination addresses 178 * through very weird things can happen with no 179 * easy explanation as one segment stops on another. 180 */ 181 result = -EINVAL; 182 for (i = 0; i < nr_segments; i++) { 183 unsigned long mstart, mend; 184 unsigned long j; 185 186 mstart = image->segment[i].mem; 187 mend = mstart + image->segment[i].memsz; 188 for (j = 0; j < i; j++) { 189 unsigned long pstart, pend; 190 191 pstart = image->segment[j].mem; 192 pend = pstart + image->segment[j].memsz; 193 /* Do the segments overlap ? */ 194 if ((mend > pstart) && (mstart < pend)) 195 return result; 196 } 197 } 198 199 /* Ensure our buffer sizes are strictly less than 200 * our memory sizes. This should always be the case, 201 * and it is easier to check up front than to be surprised 202 * later on. 203 */ 204 result = -EINVAL; 205 for (i = 0; i < nr_segments; i++) { 206 if (image->segment[i].bufsz > image->segment[i].memsz) 207 return result; 208 } 209 210 /* 211 * Verify we have good destination addresses. Normally 212 * the caller is responsible for making certain we don't 213 * attempt to load the new image into invalid or reserved 214 * areas of RAM. But crash kernels are preloaded into a 215 * reserved area of ram. We must ensure the addresses 216 * are in the reserved area otherwise preloading the 217 * kernel could corrupt things. 218 */ 219 220 if (image->type == KEXEC_TYPE_CRASH) { 221 result = -EADDRNOTAVAIL; 222 for (i = 0; i < nr_segments; i++) { 223 unsigned long mstart, mend; 224 225 mstart = image->segment[i].mem; 226 mend = mstart + image->segment[i].memsz - 1; 227 /* Ensure we are within the crash kernel limits */ 228 if ((mstart < crashk_res.start) || 229 (mend > crashk_res.end)) 230 return result; 231 } 232 } 233 234 return 0; 235 } 236 237 struct kimage *do_kimage_alloc_init(void) 238 { 239 struct kimage *image; 240 241 /* Allocate a controlling structure */ 242 image = kzalloc(sizeof(*image), GFP_KERNEL); 243 if (!image) 244 return NULL; 245 246 image->head = 0; 247 image->entry = &image->head; 248 image->last_entry = &image->head; 249 image->control_page = ~0; /* By default this does not apply */ 250 image->type = KEXEC_TYPE_DEFAULT; 251 252 /* Initialize the list of control pages */ 253 INIT_LIST_HEAD(&image->control_pages); 254 255 /* Initialize the list of destination pages */ 256 INIT_LIST_HEAD(&image->dest_pages); 257 258 /* Initialize the list of unusable pages */ 259 INIT_LIST_HEAD(&image->unusable_pages); 260 261 return image; 262 } 263 264 int kimage_is_destination_range(struct kimage *image, 265 unsigned long start, 266 unsigned long end) 267 { 268 unsigned long i; 269 270 for (i = 0; i < image->nr_segments; i++) { 271 unsigned long mstart, mend; 272 273 mstart = image->segment[i].mem; 274 mend = mstart + image->segment[i].memsz; 275 if ((end > mstart) && (start < mend)) 276 return 1; 277 } 278 279 return 0; 280 } 281 282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order) 283 { 284 struct page *pages; 285 286 pages = alloc_pages(gfp_mask, order); 287 if (pages) { 288 unsigned int count, i; 289 290 pages->mapping = NULL; 291 set_page_private(pages, order); 292 count = 1 << order; 293 for (i = 0; i < count; i++) 294 SetPageReserved(pages + i); 295 } 296 297 return pages; 298 } 299 300 static void kimage_free_pages(struct page *page) 301 { 302 unsigned int order, count, i; 303 304 order = page_private(page); 305 count = 1 << order; 306 for (i = 0; i < count; i++) 307 ClearPageReserved(page + i); 308 __free_pages(page, order); 309 } 310 311 void kimage_free_page_list(struct list_head *list) 312 { 313 struct list_head *pos, *next; 314 315 list_for_each_safe(pos, next, list) { 316 struct page *page; 317 318 page = list_entry(pos, struct page, lru); 319 list_del(&page->lru); 320 kimage_free_pages(page); 321 } 322 } 323 324 static struct page *kimage_alloc_normal_control_pages(struct kimage *image, 325 unsigned int order) 326 { 327 /* Control pages are special, they are the intermediaries 328 * that are needed while we copy the rest of the pages 329 * to their final resting place. As such they must 330 * not conflict with either the destination addresses 331 * or memory the kernel is already using. 332 * 333 * The only case where we really need more than one of 334 * these are for architectures where we cannot disable 335 * the MMU and must instead generate an identity mapped 336 * page table for all of the memory. 337 * 338 * At worst this runs in O(N) of the image size. 339 */ 340 struct list_head extra_pages; 341 struct page *pages; 342 unsigned int count; 343 344 count = 1 << order; 345 INIT_LIST_HEAD(&extra_pages); 346 347 /* Loop while I can allocate a page and the page allocated 348 * is a destination page. 349 */ 350 do { 351 unsigned long pfn, epfn, addr, eaddr; 352 353 pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order); 354 if (!pages) 355 break; 356 pfn = page_to_pfn(pages); 357 epfn = pfn + count; 358 addr = pfn << PAGE_SHIFT; 359 eaddr = epfn << PAGE_SHIFT; 360 if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) || 361 kimage_is_destination_range(image, addr, eaddr)) { 362 list_add(&pages->lru, &extra_pages); 363 pages = NULL; 364 } 365 } while (!pages); 366 367 if (pages) { 368 /* Remember the allocated page... */ 369 list_add(&pages->lru, &image->control_pages); 370 371 /* Because the page is already in it's destination 372 * location we will never allocate another page at 373 * that address. Therefore kimage_alloc_pages 374 * will not return it (again) and we don't need 375 * to give it an entry in image->segment[]. 376 */ 377 } 378 /* Deal with the destination pages I have inadvertently allocated. 379 * 380 * Ideally I would convert multi-page allocations into single 381 * page allocations, and add everything to image->dest_pages. 382 * 383 * For now it is simpler to just free the pages. 384 */ 385 kimage_free_page_list(&extra_pages); 386 387 return pages; 388 } 389 390 static struct page *kimage_alloc_crash_control_pages(struct kimage *image, 391 unsigned int order) 392 { 393 /* Control pages are special, they are the intermediaries 394 * that are needed while we copy the rest of the pages 395 * to their final resting place. As such they must 396 * not conflict with either the destination addresses 397 * or memory the kernel is already using. 398 * 399 * Control pages are also the only pags we must allocate 400 * when loading a crash kernel. All of the other pages 401 * are specified by the segments and we just memcpy 402 * into them directly. 403 * 404 * The only case where we really need more than one of 405 * these are for architectures where we cannot disable 406 * the MMU and must instead generate an identity mapped 407 * page table for all of the memory. 408 * 409 * Given the low demand this implements a very simple 410 * allocator that finds the first hole of the appropriate 411 * size in the reserved memory region, and allocates all 412 * of the memory up to and including the hole. 413 */ 414 unsigned long hole_start, hole_end, size; 415 struct page *pages; 416 417 pages = NULL; 418 size = (1 << order) << PAGE_SHIFT; 419 hole_start = (image->control_page + (size - 1)) & ~(size - 1); 420 hole_end = hole_start + size - 1; 421 while (hole_end <= crashk_res.end) { 422 unsigned long i; 423 424 if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT) 425 break; 426 /* See if I overlap any of the segments */ 427 for (i = 0; i < image->nr_segments; i++) { 428 unsigned long mstart, mend; 429 430 mstart = image->segment[i].mem; 431 mend = mstart + image->segment[i].memsz - 1; 432 if ((hole_end >= mstart) && (hole_start <= mend)) { 433 /* Advance the hole to the end of the segment */ 434 hole_start = (mend + (size - 1)) & ~(size - 1); 435 hole_end = hole_start + size - 1; 436 break; 437 } 438 } 439 /* If I don't overlap any segments I have found my hole! */ 440 if (i == image->nr_segments) { 441 pages = pfn_to_page(hole_start >> PAGE_SHIFT); 442 image->control_page = hole_end; 443 break; 444 } 445 } 446 447 return pages; 448 } 449 450 451 struct page *kimage_alloc_control_pages(struct kimage *image, 452 unsigned int order) 453 { 454 struct page *pages = NULL; 455 456 switch (image->type) { 457 case KEXEC_TYPE_DEFAULT: 458 pages = kimage_alloc_normal_control_pages(image, order); 459 break; 460 case KEXEC_TYPE_CRASH: 461 pages = kimage_alloc_crash_control_pages(image, order); 462 break; 463 } 464 465 return pages; 466 } 467 468 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry) 469 { 470 if (*image->entry != 0) 471 image->entry++; 472 473 if (image->entry == image->last_entry) { 474 kimage_entry_t *ind_page; 475 struct page *page; 476 477 page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST); 478 if (!page) 479 return -ENOMEM; 480 481 ind_page = page_address(page); 482 *image->entry = virt_to_phys(ind_page) | IND_INDIRECTION; 483 image->entry = ind_page; 484 image->last_entry = ind_page + 485 ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1); 486 } 487 *image->entry = entry; 488 image->entry++; 489 *image->entry = 0; 490 491 return 0; 492 } 493 494 static int kimage_set_destination(struct kimage *image, 495 unsigned long destination) 496 { 497 int result; 498 499 destination &= PAGE_MASK; 500 result = kimage_add_entry(image, destination | IND_DESTINATION); 501 502 return result; 503 } 504 505 506 static int kimage_add_page(struct kimage *image, unsigned long page) 507 { 508 int result; 509 510 page &= PAGE_MASK; 511 result = kimage_add_entry(image, page | IND_SOURCE); 512 513 return result; 514 } 515 516 517 static void kimage_free_extra_pages(struct kimage *image) 518 { 519 /* Walk through and free any extra destination pages I may have */ 520 kimage_free_page_list(&image->dest_pages); 521 522 /* Walk through and free any unusable pages I have cached */ 523 kimage_free_page_list(&image->unusable_pages); 524 525 } 526 void kimage_terminate(struct kimage *image) 527 { 528 if (*image->entry != 0) 529 image->entry++; 530 531 *image->entry = IND_DONE; 532 } 533 534 #define for_each_kimage_entry(image, ptr, entry) \ 535 for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \ 536 ptr = (entry & IND_INDIRECTION) ? \ 537 phys_to_virt((entry & PAGE_MASK)) : ptr + 1) 538 539 static void kimage_free_entry(kimage_entry_t entry) 540 { 541 struct page *page; 542 543 page = pfn_to_page(entry >> PAGE_SHIFT); 544 kimage_free_pages(page); 545 } 546 547 void kimage_free(struct kimage *image) 548 { 549 kimage_entry_t *ptr, entry; 550 kimage_entry_t ind = 0; 551 552 if (!image) 553 return; 554 555 kimage_free_extra_pages(image); 556 for_each_kimage_entry(image, ptr, entry) { 557 if (entry & IND_INDIRECTION) { 558 /* Free the previous indirection page */ 559 if (ind & IND_INDIRECTION) 560 kimage_free_entry(ind); 561 /* Save this indirection page until we are 562 * done with it. 563 */ 564 ind = entry; 565 } else if (entry & IND_SOURCE) 566 kimage_free_entry(entry); 567 } 568 /* Free the final indirection page */ 569 if (ind & IND_INDIRECTION) 570 kimage_free_entry(ind); 571 572 /* Handle any machine specific cleanup */ 573 machine_kexec_cleanup(image); 574 575 /* Free the kexec control pages... */ 576 kimage_free_page_list(&image->control_pages); 577 578 /* 579 * Free up any temporary buffers allocated. This might hit if 580 * error occurred much later after buffer allocation. 581 */ 582 if (image->file_mode) 583 kimage_file_post_load_cleanup(image); 584 585 kfree(image); 586 } 587 588 static kimage_entry_t *kimage_dst_used(struct kimage *image, 589 unsigned long page) 590 { 591 kimage_entry_t *ptr, entry; 592 unsigned long destination = 0; 593 594 for_each_kimage_entry(image, ptr, entry) { 595 if (entry & IND_DESTINATION) 596 destination = entry & PAGE_MASK; 597 else if (entry & IND_SOURCE) { 598 if (page == destination) 599 return ptr; 600 destination += PAGE_SIZE; 601 } 602 } 603 604 return NULL; 605 } 606 607 static struct page *kimage_alloc_page(struct kimage *image, 608 gfp_t gfp_mask, 609 unsigned long destination) 610 { 611 /* 612 * Here we implement safeguards to ensure that a source page 613 * is not copied to its destination page before the data on 614 * the destination page is no longer useful. 615 * 616 * To do this we maintain the invariant that a source page is 617 * either its own destination page, or it is not a 618 * destination page at all. 619 * 620 * That is slightly stronger than required, but the proof 621 * that no problems will not occur is trivial, and the 622 * implementation is simply to verify. 623 * 624 * When allocating all pages normally this algorithm will run 625 * in O(N) time, but in the worst case it will run in O(N^2) 626 * time. If the runtime is a problem the data structures can 627 * be fixed. 628 */ 629 struct page *page; 630 unsigned long addr; 631 632 /* 633 * Walk through the list of destination pages, and see if I 634 * have a match. 635 */ 636 list_for_each_entry(page, &image->dest_pages, lru) { 637 addr = page_to_pfn(page) << PAGE_SHIFT; 638 if (addr == destination) { 639 list_del(&page->lru); 640 return page; 641 } 642 } 643 page = NULL; 644 while (1) { 645 kimage_entry_t *old; 646 647 /* Allocate a page, if we run out of memory give up */ 648 page = kimage_alloc_pages(gfp_mask, 0); 649 if (!page) 650 return NULL; 651 /* If the page cannot be used file it away */ 652 if (page_to_pfn(page) > 653 (KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) { 654 list_add(&page->lru, &image->unusable_pages); 655 continue; 656 } 657 addr = page_to_pfn(page) << PAGE_SHIFT; 658 659 /* If it is the destination page we want use it */ 660 if (addr == destination) 661 break; 662 663 /* If the page is not a destination page use it */ 664 if (!kimage_is_destination_range(image, addr, 665 addr + PAGE_SIZE)) 666 break; 667 668 /* 669 * I know that the page is someones destination page. 670 * See if there is already a source page for this 671 * destination page. And if so swap the source pages. 672 */ 673 old = kimage_dst_used(image, addr); 674 if (old) { 675 /* If so move it */ 676 unsigned long old_addr; 677 struct page *old_page; 678 679 old_addr = *old & PAGE_MASK; 680 old_page = pfn_to_page(old_addr >> PAGE_SHIFT); 681 copy_highpage(page, old_page); 682 *old = addr | (*old & ~PAGE_MASK); 683 684 /* The old page I have found cannot be a 685 * destination page, so return it if it's 686 * gfp_flags honor the ones passed in. 687 */ 688 if (!(gfp_mask & __GFP_HIGHMEM) && 689 PageHighMem(old_page)) { 690 kimage_free_pages(old_page); 691 continue; 692 } 693 addr = old_addr; 694 page = old_page; 695 break; 696 } 697 /* Place the page on the destination list, to be used later */ 698 list_add(&page->lru, &image->dest_pages); 699 } 700 701 return page; 702 } 703 704 static int kimage_load_normal_segment(struct kimage *image, 705 struct kexec_segment *segment) 706 { 707 unsigned long maddr; 708 size_t ubytes, mbytes; 709 int result; 710 unsigned char __user *buf = NULL; 711 unsigned char *kbuf = NULL; 712 713 result = 0; 714 if (image->file_mode) 715 kbuf = segment->kbuf; 716 else 717 buf = segment->buf; 718 ubytes = segment->bufsz; 719 mbytes = segment->memsz; 720 maddr = segment->mem; 721 722 result = kimage_set_destination(image, maddr); 723 if (result < 0) 724 goto out; 725 726 while (mbytes) { 727 struct page *page; 728 char *ptr; 729 size_t uchunk, mchunk; 730 731 page = kimage_alloc_page(image, GFP_HIGHUSER, maddr); 732 if (!page) { 733 result = -ENOMEM; 734 goto out; 735 } 736 result = kimage_add_page(image, page_to_pfn(page) 737 << PAGE_SHIFT); 738 if (result < 0) 739 goto out; 740 741 ptr = kmap(page); 742 /* Start with a clear page */ 743 clear_page(ptr); 744 ptr += maddr & ~PAGE_MASK; 745 mchunk = min_t(size_t, mbytes, 746 PAGE_SIZE - (maddr & ~PAGE_MASK)); 747 uchunk = min(ubytes, mchunk); 748 749 /* For file based kexec, source pages are in kernel memory */ 750 if (image->file_mode) 751 memcpy(ptr, kbuf, uchunk); 752 else 753 result = copy_from_user(ptr, buf, uchunk); 754 kunmap(page); 755 if (result) { 756 result = -EFAULT; 757 goto out; 758 } 759 ubytes -= uchunk; 760 maddr += mchunk; 761 if (image->file_mode) 762 kbuf += mchunk; 763 else 764 buf += mchunk; 765 mbytes -= mchunk; 766 } 767 out: 768 return result; 769 } 770 771 static int kimage_load_crash_segment(struct kimage *image, 772 struct kexec_segment *segment) 773 { 774 /* For crash dumps kernels we simply copy the data from 775 * user space to it's destination. 776 * We do things a page at a time for the sake of kmap. 777 */ 778 unsigned long maddr; 779 size_t ubytes, mbytes; 780 int result; 781 unsigned char __user *buf = NULL; 782 unsigned char *kbuf = NULL; 783 784 result = 0; 785 if (image->file_mode) 786 kbuf = segment->kbuf; 787 else 788 buf = segment->buf; 789 ubytes = segment->bufsz; 790 mbytes = segment->memsz; 791 maddr = segment->mem; 792 while (mbytes) { 793 struct page *page; 794 char *ptr; 795 size_t uchunk, mchunk; 796 797 page = pfn_to_page(maddr >> PAGE_SHIFT); 798 if (!page) { 799 result = -ENOMEM; 800 goto out; 801 } 802 ptr = kmap(page); 803 ptr += maddr & ~PAGE_MASK; 804 mchunk = min_t(size_t, mbytes, 805 PAGE_SIZE - (maddr & ~PAGE_MASK)); 806 uchunk = min(ubytes, mchunk); 807 if (mchunk > uchunk) { 808 /* Zero the trailing part of the page */ 809 memset(ptr + uchunk, 0, mchunk - uchunk); 810 } 811 812 /* For file based kexec, source pages are in kernel memory */ 813 if (image->file_mode) 814 memcpy(ptr, kbuf, uchunk); 815 else 816 result = copy_from_user(ptr, buf, uchunk); 817 kexec_flush_icache_page(page); 818 kunmap(page); 819 if (result) { 820 result = -EFAULT; 821 goto out; 822 } 823 ubytes -= uchunk; 824 maddr += mchunk; 825 if (image->file_mode) 826 kbuf += mchunk; 827 else 828 buf += mchunk; 829 mbytes -= mchunk; 830 } 831 out: 832 return result; 833 } 834 835 int kimage_load_segment(struct kimage *image, 836 struct kexec_segment *segment) 837 { 838 int result = -ENOMEM; 839 840 switch (image->type) { 841 case KEXEC_TYPE_DEFAULT: 842 result = kimage_load_normal_segment(image, segment); 843 break; 844 case KEXEC_TYPE_CRASH: 845 result = kimage_load_crash_segment(image, segment); 846 break; 847 } 848 849 return result; 850 } 851 852 struct kimage *kexec_image; 853 struct kimage *kexec_crash_image; 854 int kexec_load_disabled; 855 856 /* 857 * No panic_cpu check version of crash_kexec(). This function is called 858 * only when panic_cpu holds the current CPU number; this is the only CPU 859 * which processes crash_kexec routines. 860 */ 861 void __crash_kexec(struct pt_regs *regs) 862 { 863 /* Take the kexec_mutex here to prevent sys_kexec_load 864 * running on one cpu from replacing the crash kernel 865 * we are using after a panic on a different cpu. 866 * 867 * If the crash kernel was not located in a fixed area 868 * of memory the xchg(&kexec_crash_image) would be 869 * sufficient. But since I reuse the memory... 870 */ 871 if (mutex_trylock(&kexec_mutex)) { 872 if (kexec_crash_image) { 873 struct pt_regs fixed_regs; 874 875 crash_setup_regs(&fixed_regs, regs); 876 crash_save_vmcoreinfo(); 877 machine_crash_shutdown(&fixed_regs); 878 machine_kexec(kexec_crash_image); 879 } 880 mutex_unlock(&kexec_mutex); 881 } 882 } 883 884 void crash_kexec(struct pt_regs *regs) 885 { 886 int old_cpu, this_cpu; 887 888 /* 889 * Only one CPU is allowed to execute the crash_kexec() code as with 890 * panic(). Otherwise parallel calls of panic() and crash_kexec() 891 * may stop each other. To exclude them, we use panic_cpu here too. 892 */ 893 this_cpu = raw_smp_processor_id(); 894 old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); 895 if (old_cpu == PANIC_CPU_INVALID) { 896 /* This is the 1st CPU which comes here, so go ahead. */ 897 __crash_kexec(regs); 898 899 /* 900 * Reset panic_cpu to allow another panic()/crash_kexec() 901 * call. 902 */ 903 atomic_set(&panic_cpu, PANIC_CPU_INVALID); 904 } 905 } 906 907 size_t crash_get_memory_size(void) 908 { 909 size_t size = 0; 910 911 mutex_lock(&kexec_mutex); 912 if (crashk_res.end != crashk_res.start) 913 size = resource_size(&crashk_res); 914 mutex_unlock(&kexec_mutex); 915 return size; 916 } 917 918 void __weak crash_free_reserved_phys_range(unsigned long begin, 919 unsigned long end) 920 { 921 unsigned long addr; 922 923 for (addr = begin; addr < end; addr += PAGE_SIZE) 924 free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT)); 925 } 926 927 int crash_shrink_memory(unsigned long new_size) 928 { 929 int ret = 0; 930 unsigned long start, end; 931 unsigned long old_size; 932 struct resource *ram_res; 933 934 mutex_lock(&kexec_mutex); 935 936 if (kexec_crash_image) { 937 ret = -ENOENT; 938 goto unlock; 939 } 940 start = crashk_res.start; 941 end = crashk_res.end; 942 old_size = (end == 0) ? 0 : end - start + 1; 943 if (new_size >= old_size) { 944 ret = (new_size == old_size) ? 0 : -EINVAL; 945 goto unlock; 946 } 947 948 ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL); 949 if (!ram_res) { 950 ret = -ENOMEM; 951 goto unlock; 952 } 953 954 start = roundup(start, KEXEC_CRASH_MEM_ALIGN); 955 end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN); 956 957 crash_map_reserved_pages(); 958 crash_free_reserved_phys_range(end, crashk_res.end); 959 960 if ((start == end) && (crashk_res.parent != NULL)) 961 release_resource(&crashk_res); 962 963 ram_res->start = end; 964 ram_res->end = crashk_res.end; 965 ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM; 966 ram_res->name = "System RAM"; 967 968 crashk_res.end = end - 1; 969 970 insert_resource(&iomem_resource, ram_res); 971 crash_unmap_reserved_pages(); 972 973 unlock: 974 mutex_unlock(&kexec_mutex); 975 return ret; 976 } 977 978 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data, 979 size_t data_len) 980 { 981 struct elf_note note; 982 983 note.n_namesz = strlen(name) + 1; 984 note.n_descsz = data_len; 985 note.n_type = type; 986 memcpy(buf, ¬e, sizeof(note)); 987 buf += (sizeof(note) + 3)/4; 988 memcpy(buf, name, note.n_namesz); 989 buf += (note.n_namesz + 3)/4; 990 memcpy(buf, data, note.n_descsz); 991 buf += (note.n_descsz + 3)/4; 992 993 return buf; 994 } 995 996 static void final_note(u32 *buf) 997 { 998 struct elf_note note; 999 1000 note.n_namesz = 0; 1001 note.n_descsz = 0; 1002 note.n_type = 0; 1003 memcpy(buf, ¬e, sizeof(note)); 1004 } 1005 1006 void crash_save_cpu(struct pt_regs *regs, int cpu) 1007 { 1008 struct elf_prstatus prstatus; 1009 u32 *buf; 1010 1011 if ((cpu < 0) || (cpu >= nr_cpu_ids)) 1012 return; 1013 1014 /* Using ELF notes here is opportunistic. 1015 * I need a well defined structure format 1016 * for the data I pass, and I need tags 1017 * on the data to indicate what information I have 1018 * squirrelled away. ELF notes happen to provide 1019 * all of that, so there is no need to invent something new. 1020 */ 1021 buf = (u32 *)per_cpu_ptr(crash_notes, cpu); 1022 if (!buf) 1023 return; 1024 memset(&prstatus, 0, sizeof(prstatus)); 1025 prstatus.pr_pid = current->pid; 1026 elf_core_copy_kernel_regs(&prstatus.pr_reg, regs); 1027 buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, 1028 &prstatus, sizeof(prstatus)); 1029 final_note(buf); 1030 } 1031 1032 static int __init crash_notes_memory_init(void) 1033 { 1034 /* Allocate memory for saving cpu registers. */ 1035 size_t size, align; 1036 1037 /* 1038 * crash_notes could be allocated across 2 vmalloc pages when percpu 1039 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc 1040 * pages are also on 2 continuous physical pages. In this case the 1041 * 2nd part of crash_notes in 2nd page could be lost since only the 1042 * starting address and size of crash_notes are exported through sysfs. 1043 * Here round up the size of crash_notes to the nearest power of two 1044 * and pass it to __alloc_percpu as align value. This can make sure 1045 * crash_notes is allocated inside one physical page. 1046 */ 1047 size = sizeof(note_buf_t); 1048 align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE); 1049 1050 /* 1051 * Break compile if size is bigger than PAGE_SIZE since crash_notes 1052 * definitely will be in 2 pages with that. 1053 */ 1054 BUILD_BUG_ON(size > PAGE_SIZE); 1055 1056 crash_notes = __alloc_percpu(size, align); 1057 if (!crash_notes) { 1058 pr_warn("Memory allocation for saving cpu register states failed\n"); 1059 return -ENOMEM; 1060 } 1061 return 0; 1062 } 1063 subsys_initcall(crash_notes_memory_init); 1064 1065 1066 /* 1067 * parsing the "crashkernel" commandline 1068 * 1069 * this code is intended to be called from architecture specific code 1070 */ 1071 1072 1073 /* 1074 * This function parses command lines in the format 1075 * 1076 * crashkernel=ramsize-range:size[,...][@offset] 1077 * 1078 * The function returns 0 on success and -EINVAL on failure. 1079 */ 1080 static int __init parse_crashkernel_mem(char *cmdline, 1081 unsigned long long system_ram, 1082 unsigned long long *crash_size, 1083 unsigned long long *crash_base) 1084 { 1085 char *cur = cmdline, *tmp; 1086 1087 /* for each entry of the comma-separated list */ 1088 do { 1089 unsigned long long start, end = ULLONG_MAX, size; 1090 1091 /* get the start of the range */ 1092 start = memparse(cur, &tmp); 1093 if (cur == tmp) { 1094 pr_warn("crashkernel: Memory value expected\n"); 1095 return -EINVAL; 1096 } 1097 cur = tmp; 1098 if (*cur != '-') { 1099 pr_warn("crashkernel: '-' expected\n"); 1100 return -EINVAL; 1101 } 1102 cur++; 1103 1104 /* if no ':' is here, than we read the end */ 1105 if (*cur != ':') { 1106 end = memparse(cur, &tmp); 1107 if (cur == tmp) { 1108 pr_warn("crashkernel: Memory value expected\n"); 1109 return -EINVAL; 1110 } 1111 cur = tmp; 1112 if (end <= start) { 1113 pr_warn("crashkernel: end <= start\n"); 1114 return -EINVAL; 1115 } 1116 } 1117 1118 if (*cur != ':') { 1119 pr_warn("crashkernel: ':' expected\n"); 1120 return -EINVAL; 1121 } 1122 cur++; 1123 1124 size = memparse(cur, &tmp); 1125 if (cur == tmp) { 1126 pr_warn("Memory value expected\n"); 1127 return -EINVAL; 1128 } 1129 cur = tmp; 1130 if (size >= system_ram) { 1131 pr_warn("crashkernel: invalid size\n"); 1132 return -EINVAL; 1133 } 1134 1135 /* match ? */ 1136 if (system_ram >= start && system_ram < end) { 1137 *crash_size = size; 1138 break; 1139 } 1140 } while (*cur++ == ','); 1141 1142 if (*crash_size > 0) { 1143 while (*cur && *cur != ' ' && *cur != '@') 1144 cur++; 1145 if (*cur == '@') { 1146 cur++; 1147 *crash_base = memparse(cur, &tmp); 1148 if (cur == tmp) { 1149 pr_warn("Memory value expected after '@'\n"); 1150 return -EINVAL; 1151 } 1152 } 1153 } 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * That function parses "simple" (old) crashkernel command lines like 1160 * 1161 * crashkernel=size[@offset] 1162 * 1163 * It returns 0 on success and -EINVAL on failure. 1164 */ 1165 static int __init parse_crashkernel_simple(char *cmdline, 1166 unsigned long long *crash_size, 1167 unsigned long long *crash_base) 1168 { 1169 char *cur = cmdline; 1170 1171 *crash_size = memparse(cmdline, &cur); 1172 if (cmdline == cur) { 1173 pr_warn("crashkernel: memory value expected\n"); 1174 return -EINVAL; 1175 } 1176 1177 if (*cur == '@') 1178 *crash_base = memparse(cur+1, &cur); 1179 else if (*cur != ' ' && *cur != '\0') { 1180 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1181 return -EINVAL; 1182 } 1183 1184 return 0; 1185 } 1186 1187 #define SUFFIX_HIGH 0 1188 #define SUFFIX_LOW 1 1189 #define SUFFIX_NULL 2 1190 static __initdata char *suffix_tbl[] = { 1191 [SUFFIX_HIGH] = ",high", 1192 [SUFFIX_LOW] = ",low", 1193 [SUFFIX_NULL] = NULL, 1194 }; 1195 1196 /* 1197 * That function parses "suffix" crashkernel command lines like 1198 * 1199 * crashkernel=size,[high|low] 1200 * 1201 * It returns 0 on success and -EINVAL on failure. 1202 */ 1203 static int __init parse_crashkernel_suffix(char *cmdline, 1204 unsigned long long *crash_size, 1205 const char *suffix) 1206 { 1207 char *cur = cmdline; 1208 1209 *crash_size = memparse(cmdline, &cur); 1210 if (cmdline == cur) { 1211 pr_warn("crashkernel: memory value expected\n"); 1212 return -EINVAL; 1213 } 1214 1215 /* check with suffix */ 1216 if (strncmp(cur, suffix, strlen(suffix))) { 1217 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1218 return -EINVAL; 1219 } 1220 cur += strlen(suffix); 1221 if (*cur != ' ' && *cur != '\0') { 1222 pr_warn("crashkernel: unrecognized char: %c\n", *cur); 1223 return -EINVAL; 1224 } 1225 1226 return 0; 1227 } 1228 1229 static __init char *get_last_crashkernel(char *cmdline, 1230 const char *name, 1231 const char *suffix) 1232 { 1233 char *p = cmdline, *ck_cmdline = NULL; 1234 1235 /* find crashkernel and use the last one if there are more */ 1236 p = strstr(p, name); 1237 while (p) { 1238 char *end_p = strchr(p, ' '); 1239 char *q; 1240 1241 if (!end_p) 1242 end_p = p + strlen(p); 1243 1244 if (!suffix) { 1245 int i; 1246 1247 /* skip the one with any known suffix */ 1248 for (i = 0; suffix_tbl[i]; i++) { 1249 q = end_p - strlen(suffix_tbl[i]); 1250 if (!strncmp(q, suffix_tbl[i], 1251 strlen(suffix_tbl[i]))) 1252 goto next; 1253 } 1254 ck_cmdline = p; 1255 } else { 1256 q = end_p - strlen(suffix); 1257 if (!strncmp(q, suffix, strlen(suffix))) 1258 ck_cmdline = p; 1259 } 1260 next: 1261 p = strstr(p+1, name); 1262 } 1263 1264 if (!ck_cmdline) 1265 return NULL; 1266 1267 return ck_cmdline; 1268 } 1269 1270 static int __init __parse_crashkernel(char *cmdline, 1271 unsigned long long system_ram, 1272 unsigned long long *crash_size, 1273 unsigned long long *crash_base, 1274 const char *name, 1275 const char *suffix) 1276 { 1277 char *first_colon, *first_space; 1278 char *ck_cmdline; 1279 1280 BUG_ON(!crash_size || !crash_base); 1281 *crash_size = 0; 1282 *crash_base = 0; 1283 1284 ck_cmdline = get_last_crashkernel(cmdline, name, suffix); 1285 1286 if (!ck_cmdline) 1287 return -EINVAL; 1288 1289 ck_cmdline += strlen(name); 1290 1291 if (suffix) 1292 return parse_crashkernel_suffix(ck_cmdline, crash_size, 1293 suffix); 1294 /* 1295 * if the commandline contains a ':', then that's the extended 1296 * syntax -- if not, it must be the classic syntax 1297 */ 1298 first_colon = strchr(ck_cmdline, ':'); 1299 first_space = strchr(ck_cmdline, ' '); 1300 if (first_colon && (!first_space || first_colon < first_space)) 1301 return parse_crashkernel_mem(ck_cmdline, system_ram, 1302 crash_size, crash_base); 1303 1304 return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base); 1305 } 1306 1307 /* 1308 * That function is the entry point for command line parsing and should be 1309 * called from the arch-specific code. 1310 */ 1311 int __init parse_crashkernel(char *cmdline, 1312 unsigned long long system_ram, 1313 unsigned long long *crash_size, 1314 unsigned long long *crash_base) 1315 { 1316 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1317 "crashkernel=", NULL); 1318 } 1319 1320 int __init parse_crashkernel_high(char *cmdline, 1321 unsigned long long system_ram, 1322 unsigned long long *crash_size, 1323 unsigned long long *crash_base) 1324 { 1325 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1326 "crashkernel=", suffix_tbl[SUFFIX_HIGH]); 1327 } 1328 1329 int __init parse_crashkernel_low(char *cmdline, 1330 unsigned long long system_ram, 1331 unsigned long long *crash_size, 1332 unsigned long long *crash_base) 1333 { 1334 return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base, 1335 "crashkernel=", suffix_tbl[SUFFIX_LOW]); 1336 } 1337 1338 static void update_vmcoreinfo_note(void) 1339 { 1340 u32 *buf = vmcoreinfo_note; 1341 1342 if (!vmcoreinfo_size) 1343 return; 1344 buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data, 1345 vmcoreinfo_size); 1346 final_note(buf); 1347 } 1348 1349 void crash_save_vmcoreinfo(void) 1350 { 1351 vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); 1352 update_vmcoreinfo_note(); 1353 } 1354 1355 void vmcoreinfo_append_str(const char *fmt, ...) 1356 { 1357 va_list args; 1358 char buf[0x50]; 1359 size_t r; 1360 1361 va_start(args, fmt); 1362 r = vscnprintf(buf, sizeof(buf), fmt, args); 1363 va_end(args); 1364 1365 r = min(r, vmcoreinfo_max_size - vmcoreinfo_size); 1366 1367 memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r); 1368 1369 vmcoreinfo_size += r; 1370 } 1371 1372 /* 1373 * provide an empty default implementation here -- architecture 1374 * code may override this 1375 */ 1376 void __weak arch_crash_save_vmcoreinfo(void) 1377 {} 1378 1379 unsigned long __weak paddr_vmcoreinfo_note(void) 1380 { 1381 return __pa((unsigned long)(char *)&vmcoreinfo_note); 1382 } 1383 1384 static int __init crash_save_vmcoreinfo_init(void) 1385 { 1386 VMCOREINFO_OSRELEASE(init_uts_ns.name.release); 1387 VMCOREINFO_PAGESIZE(PAGE_SIZE); 1388 1389 VMCOREINFO_SYMBOL(init_uts_ns); 1390 VMCOREINFO_SYMBOL(node_online_map); 1391 #ifdef CONFIG_MMU 1392 VMCOREINFO_SYMBOL(swapper_pg_dir); 1393 #endif 1394 VMCOREINFO_SYMBOL(_stext); 1395 VMCOREINFO_SYMBOL(vmap_area_list); 1396 1397 #ifndef CONFIG_NEED_MULTIPLE_NODES 1398 VMCOREINFO_SYMBOL(mem_map); 1399 VMCOREINFO_SYMBOL(contig_page_data); 1400 #endif 1401 #ifdef CONFIG_SPARSEMEM 1402 VMCOREINFO_SYMBOL(mem_section); 1403 VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS); 1404 VMCOREINFO_STRUCT_SIZE(mem_section); 1405 VMCOREINFO_OFFSET(mem_section, section_mem_map); 1406 #endif 1407 VMCOREINFO_STRUCT_SIZE(page); 1408 VMCOREINFO_STRUCT_SIZE(pglist_data); 1409 VMCOREINFO_STRUCT_SIZE(zone); 1410 VMCOREINFO_STRUCT_SIZE(free_area); 1411 VMCOREINFO_STRUCT_SIZE(list_head); 1412 VMCOREINFO_SIZE(nodemask_t); 1413 VMCOREINFO_OFFSET(page, flags); 1414 VMCOREINFO_OFFSET(page, _count); 1415 VMCOREINFO_OFFSET(page, mapping); 1416 VMCOREINFO_OFFSET(page, lru); 1417 VMCOREINFO_OFFSET(page, _mapcount); 1418 VMCOREINFO_OFFSET(page, private); 1419 VMCOREINFO_OFFSET(pglist_data, node_zones); 1420 VMCOREINFO_OFFSET(pglist_data, nr_zones); 1421 #ifdef CONFIG_FLAT_NODE_MEM_MAP 1422 VMCOREINFO_OFFSET(pglist_data, node_mem_map); 1423 #endif 1424 VMCOREINFO_OFFSET(pglist_data, node_start_pfn); 1425 VMCOREINFO_OFFSET(pglist_data, node_spanned_pages); 1426 VMCOREINFO_OFFSET(pglist_data, node_id); 1427 VMCOREINFO_OFFSET(zone, free_area); 1428 VMCOREINFO_OFFSET(zone, vm_stat); 1429 VMCOREINFO_OFFSET(zone, spanned_pages); 1430 VMCOREINFO_OFFSET(free_area, free_list); 1431 VMCOREINFO_OFFSET(list_head, next); 1432 VMCOREINFO_OFFSET(list_head, prev); 1433 VMCOREINFO_OFFSET(vmap_area, va_start); 1434 VMCOREINFO_OFFSET(vmap_area, list); 1435 VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER); 1436 log_buf_kexec_setup(); 1437 VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES); 1438 VMCOREINFO_NUMBER(NR_FREE_PAGES); 1439 VMCOREINFO_NUMBER(PG_lru); 1440 VMCOREINFO_NUMBER(PG_private); 1441 VMCOREINFO_NUMBER(PG_swapcache); 1442 VMCOREINFO_NUMBER(PG_slab); 1443 #ifdef CONFIG_MEMORY_FAILURE 1444 VMCOREINFO_NUMBER(PG_hwpoison); 1445 #endif 1446 VMCOREINFO_NUMBER(PG_head_mask); 1447 VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); 1448 #ifdef CONFIG_X86 1449 VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE); 1450 #endif 1451 #ifdef CONFIG_HUGETLBFS 1452 VMCOREINFO_SYMBOL(free_huge_page); 1453 #endif 1454 1455 arch_crash_save_vmcoreinfo(); 1456 update_vmcoreinfo_note(); 1457 1458 return 0; 1459 } 1460 1461 subsys_initcall(crash_save_vmcoreinfo_init); 1462 1463 /* 1464 * Move into place and start executing a preloaded standalone 1465 * executable. If nothing was preloaded return an error. 1466 */ 1467 int kernel_kexec(void) 1468 { 1469 int error = 0; 1470 1471 if (!mutex_trylock(&kexec_mutex)) 1472 return -EBUSY; 1473 if (!kexec_image) { 1474 error = -EINVAL; 1475 goto Unlock; 1476 } 1477 1478 #ifdef CONFIG_KEXEC_JUMP 1479 if (kexec_image->preserve_context) { 1480 lock_system_sleep(); 1481 pm_prepare_console(); 1482 error = freeze_processes(); 1483 if (error) { 1484 error = -EBUSY; 1485 goto Restore_console; 1486 } 1487 suspend_console(); 1488 error = dpm_suspend_start(PMSG_FREEZE); 1489 if (error) 1490 goto Resume_console; 1491 /* At this point, dpm_suspend_start() has been called, 1492 * but *not* dpm_suspend_end(). We *must* call 1493 * dpm_suspend_end() now. Otherwise, drivers for 1494 * some devices (e.g. interrupt controllers) become 1495 * desynchronized with the actual state of the 1496 * hardware at resume time, and evil weirdness ensues. 1497 */ 1498 error = dpm_suspend_end(PMSG_FREEZE); 1499 if (error) 1500 goto Resume_devices; 1501 error = disable_nonboot_cpus(); 1502 if (error) 1503 goto Enable_cpus; 1504 local_irq_disable(); 1505 error = syscore_suspend(); 1506 if (error) 1507 goto Enable_irqs; 1508 } else 1509 #endif 1510 { 1511 kexec_in_progress = true; 1512 kernel_restart_prepare(NULL); 1513 migrate_to_reboot_cpu(); 1514 1515 /* 1516 * migrate_to_reboot_cpu() disables CPU hotplug assuming that 1517 * no further code needs to use CPU hotplug (which is true in 1518 * the reboot case). However, the kexec path depends on using 1519 * CPU hotplug again; so re-enable it here. 1520 */ 1521 cpu_hotplug_enable(); 1522 pr_emerg("Starting new kernel\n"); 1523 machine_shutdown(); 1524 } 1525 1526 machine_kexec(kexec_image); 1527 1528 #ifdef CONFIG_KEXEC_JUMP 1529 if (kexec_image->preserve_context) { 1530 syscore_resume(); 1531 Enable_irqs: 1532 local_irq_enable(); 1533 Enable_cpus: 1534 enable_nonboot_cpus(); 1535 dpm_resume_start(PMSG_RESTORE); 1536 Resume_devices: 1537 dpm_resume_end(PMSG_RESTORE); 1538 Resume_console: 1539 resume_console(); 1540 thaw_processes(); 1541 Restore_console: 1542 pm_restore_console(); 1543 unlock_system_sleep(); 1544 } 1545 #endif 1546 1547 Unlock: 1548 mutex_unlock(&kexec_mutex); 1549 return error; 1550 } 1551 1552 /* 1553 * Add and remove page tables for crashkernel memory 1554 * 1555 * Provide an empty default implementation here -- architecture 1556 * code may override this 1557 */ 1558 void __weak crash_map_reserved_pages(void) 1559 {} 1560 1561 void __weak crash_unmap_reserved_pages(void) 1562 {} 1563