xref: /linux-6.15/kernel/kexec_core.c (revision 1f330c32)
1 /*
2  * kexec.c - kexec system call core code.
3  * Copyright (C) 2002-2004 Eric Biederman  <[email protected]>
4  *
5  * This source code is licensed under the GNU General Public License,
6  * Version 2.  See the file COPYING for more details.
7  */
8 
9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
10 
11 #include <linux/capability.h>
12 #include <linux/mm.h>
13 #include <linux/file.h>
14 #include <linux/slab.h>
15 #include <linux/fs.h>
16 #include <linux/kexec.h>
17 #include <linux/mutex.h>
18 #include <linux/list.h>
19 #include <linux/highmem.h>
20 #include <linux/syscalls.h>
21 #include <linux/reboot.h>
22 #include <linux/ioport.h>
23 #include <linux/hardirq.h>
24 #include <linux/elf.h>
25 #include <linux/elfcore.h>
26 #include <linux/utsname.h>
27 #include <linux/numa.h>
28 #include <linux/suspend.h>
29 #include <linux/device.h>
30 #include <linux/freezer.h>
31 #include <linux/pm.h>
32 #include <linux/cpu.h>
33 #include <linux/uaccess.h>
34 #include <linux/io.h>
35 #include <linux/console.h>
36 #include <linux/vmalloc.h>
37 #include <linux/swap.h>
38 #include <linux/syscore_ops.h>
39 #include <linux/compiler.h>
40 #include <linux/hugetlb.h>
41 
42 #include <asm/page.h>
43 #include <asm/sections.h>
44 
45 #include <crypto/hash.h>
46 #include <crypto/sha.h>
47 #include "kexec_internal.h"
48 
49 DEFINE_MUTEX(kexec_mutex);
50 
51 /* Per cpu memory for storing cpu states in case of system crash. */
52 note_buf_t __percpu *crash_notes;
53 
54 /* vmcoreinfo stuff */
55 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
56 u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
57 size_t vmcoreinfo_size;
58 size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
59 
60 /* Flag to indicate we are going to kexec a new kernel */
61 bool kexec_in_progress = false;
62 
63 
64 /* Location of the reserved area for the crash kernel */
65 struct resource crashk_res = {
66 	.name  = "Crash kernel",
67 	.start = 0,
68 	.end   = 0,
69 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
70 };
71 struct resource crashk_low_res = {
72 	.name  = "Crash kernel",
73 	.start = 0,
74 	.end   = 0,
75 	.flags = IORESOURCE_BUSY | IORESOURCE_MEM
76 };
77 
78 int kexec_should_crash(struct task_struct *p)
79 {
80 	/*
81 	 * If crash_kexec_post_notifiers is enabled, don't run
82 	 * crash_kexec() here yet, which must be run after panic
83 	 * notifiers in panic().
84 	 */
85 	if (crash_kexec_post_notifiers)
86 		return 0;
87 	/*
88 	 * There are 4 panic() calls in do_exit() path, each of which
89 	 * corresponds to each of these 4 conditions.
90 	 */
91 	if (in_interrupt() || !p->pid || is_global_init(p) || panic_on_oops)
92 		return 1;
93 	return 0;
94 }
95 
96 /*
97  * When kexec transitions to the new kernel there is a one-to-one
98  * mapping between physical and virtual addresses.  On processors
99  * where you can disable the MMU this is trivial, and easy.  For
100  * others it is still a simple predictable page table to setup.
101  *
102  * In that environment kexec copies the new kernel to its final
103  * resting place.  This means I can only support memory whose
104  * physical address can fit in an unsigned long.  In particular
105  * addresses where (pfn << PAGE_SHIFT) > ULONG_MAX cannot be handled.
106  * If the assembly stub has more restrictive requirements
107  * KEXEC_SOURCE_MEMORY_LIMIT and KEXEC_DEST_MEMORY_LIMIT can be
108  * defined more restrictively in <asm/kexec.h>.
109  *
110  * The code for the transition from the current kernel to the
111  * the new kernel is placed in the control_code_buffer, whose size
112  * is given by KEXEC_CONTROL_PAGE_SIZE.  In the best case only a single
113  * page of memory is necessary, but some architectures require more.
114  * Because this memory must be identity mapped in the transition from
115  * virtual to physical addresses it must live in the range
116  * 0 - TASK_SIZE, as only the user space mappings are arbitrarily
117  * modifiable.
118  *
119  * The assembly stub in the control code buffer is passed a linked list
120  * of descriptor pages detailing the source pages of the new kernel,
121  * and the destination addresses of those source pages.  As this data
122  * structure is not used in the context of the current OS, it must
123  * be self-contained.
124  *
125  * The code has been made to work with highmem pages and will use a
126  * destination page in its final resting place (if it happens
127  * to allocate it).  The end product of this is that most of the
128  * physical address space, and most of RAM can be used.
129  *
130  * Future directions include:
131  *  - allocating a page table with the control code buffer identity
132  *    mapped, to simplify machine_kexec and make kexec_on_panic more
133  *    reliable.
134  */
135 
136 /*
137  * KIMAGE_NO_DEST is an impossible destination address..., for
138  * allocating pages whose destination address we do not care about.
139  */
140 #define KIMAGE_NO_DEST (-1UL)
141 
142 static struct page *kimage_alloc_page(struct kimage *image,
143 				       gfp_t gfp_mask,
144 				       unsigned long dest);
145 
146 int sanity_check_segment_list(struct kimage *image)
147 {
148 	int result, i;
149 	unsigned long nr_segments = image->nr_segments;
150 
151 	/*
152 	 * Verify we have good destination addresses.  The caller is
153 	 * responsible for making certain we don't attempt to load
154 	 * the new image into invalid or reserved areas of RAM.  This
155 	 * just verifies it is an address we can use.
156 	 *
157 	 * Since the kernel does everything in page size chunks ensure
158 	 * the destination addresses are page aligned.  Too many
159 	 * special cases crop of when we don't do this.  The most
160 	 * insidious is getting overlapping destination addresses
161 	 * simply because addresses are changed to page size
162 	 * granularity.
163 	 */
164 	result = -EADDRNOTAVAIL;
165 	for (i = 0; i < nr_segments; i++) {
166 		unsigned long mstart, mend;
167 
168 		mstart = image->segment[i].mem;
169 		mend   = mstart + image->segment[i].memsz;
170 		if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
171 			return result;
172 		if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
173 			return result;
174 	}
175 
176 	/* Verify our destination addresses do not overlap.
177 	 * If we alloed overlapping destination addresses
178 	 * through very weird things can happen with no
179 	 * easy explanation as one segment stops on another.
180 	 */
181 	result = -EINVAL;
182 	for (i = 0; i < nr_segments; i++) {
183 		unsigned long mstart, mend;
184 		unsigned long j;
185 
186 		mstart = image->segment[i].mem;
187 		mend   = mstart + image->segment[i].memsz;
188 		for (j = 0; j < i; j++) {
189 			unsigned long pstart, pend;
190 
191 			pstart = image->segment[j].mem;
192 			pend   = pstart + image->segment[j].memsz;
193 			/* Do the segments overlap ? */
194 			if ((mend > pstart) && (mstart < pend))
195 				return result;
196 		}
197 	}
198 
199 	/* Ensure our buffer sizes are strictly less than
200 	 * our memory sizes.  This should always be the case,
201 	 * and it is easier to check up front than to be surprised
202 	 * later on.
203 	 */
204 	result = -EINVAL;
205 	for (i = 0; i < nr_segments; i++) {
206 		if (image->segment[i].bufsz > image->segment[i].memsz)
207 			return result;
208 	}
209 
210 	/*
211 	 * Verify we have good destination addresses.  Normally
212 	 * the caller is responsible for making certain we don't
213 	 * attempt to load the new image into invalid or reserved
214 	 * areas of RAM.  But crash kernels are preloaded into a
215 	 * reserved area of ram.  We must ensure the addresses
216 	 * are in the reserved area otherwise preloading the
217 	 * kernel could corrupt things.
218 	 */
219 
220 	if (image->type == KEXEC_TYPE_CRASH) {
221 		result = -EADDRNOTAVAIL;
222 		for (i = 0; i < nr_segments; i++) {
223 			unsigned long mstart, mend;
224 
225 			mstart = image->segment[i].mem;
226 			mend = mstart + image->segment[i].memsz - 1;
227 			/* Ensure we are within the crash kernel limits */
228 			if ((mstart < crashk_res.start) ||
229 			    (mend > crashk_res.end))
230 				return result;
231 		}
232 	}
233 
234 	return 0;
235 }
236 
237 struct kimage *do_kimage_alloc_init(void)
238 {
239 	struct kimage *image;
240 
241 	/* Allocate a controlling structure */
242 	image = kzalloc(sizeof(*image), GFP_KERNEL);
243 	if (!image)
244 		return NULL;
245 
246 	image->head = 0;
247 	image->entry = &image->head;
248 	image->last_entry = &image->head;
249 	image->control_page = ~0; /* By default this does not apply */
250 	image->type = KEXEC_TYPE_DEFAULT;
251 
252 	/* Initialize the list of control pages */
253 	INIT_LIST_HEAD(&image->control_pages);
254 
255 	/* Initialize the list of destination pages */
256 	INIT_LIST_HEAD(&image->dest_pages);
257 
258 	/* Initialize the list of unusable pages */
259 	INIT_LIST_HEAD(&image->unusable_pages);
260 
261 	return image;
262 }
263 
264 int kimage_is_destination_range(struct kimage *image,
265 					unsigned long start,
266 					unsigned long end)
267 {
268 	unsigned long i;
269 
270 	for (i = 0; i < image->nr_segments; i++) {
271 		unsigned long mstart, mend;
272 
273 		mstart = image->segment[i].mem;
274 		mend = mstart + image->segment[i].memsz;
275 		if ((end > mstart) && (start < mend))
276 			return 1;
277 	}
278 
279 	return 0;
280 }
281 
282 static struct page *kimage_alloc_pages(gfp_t gfp_mask, unsigned int order)
283 {
284 	struct page *pages;
285 
286 	pages = alloc_pages(gfp_mask, order);
287 	if (pages) {
288 		unsigned int count, i;
289 
290 		pages->mapping = NULL;
291 		set_page_private(pages, order);
292 		count = 1 << order;
293 		for (i = 0; i < count; i++)
294 			SetPageReserved(pages + i);
295 	}
296 
297 	return pages;
298 }
299 
300 static void kimage_free_pages(struct page *page)
301 {
302 	unsigned int order, count, i;
303 
304 	order = page_private(page);
305 	count = 1 << order;
306 	for (i = 0; i < count; i++)
307 		ClearPageReserved(page + i);
308 	__free_pages(page, order);
309 }
310 
311 void kimage_free_page_list(struct list_head *list)
312 {
313 	struct list_head *pos, *next;
314 
315 	list_for_each_safe(pos, next, list) {
316 		struct page *page;
317 
318 		page = list_entry(pos, struct page, lru);
319 		list_del(&page->lru);
320 		kimage_free_pages(page);
321 	}
322 }
323 
324 static struct page *kimage_alloc_normal_control_pages(struct kimage *image,
325 							unsigned int order)
326 {
327 	/* Control pages are special, they are the intermediaries
328 	 * that are needed while we copy the rest of the pages
329 	 * to their final resting place.  As such they must
330 	 * not conflict with either the destination addresses
331 	 * or memory the kernel is already using.
332 	 *
333 	 * The only case where we really need more than one of
334 	 * these are for architectures where we cannot disable
335 	 * the MMU and must instead generate an identity mapped
336 	 * page table for all of the memory.
337 	 *
338 	 * At worst this runs in O(N) of the image size.
339 	 */
340 	struct list_head extra_pages;
341 	struct page *pages;
342 	unsigned int count;
343 
344 	count = 1 << order;
345 	INIT_LIST_HEAD(&extra_pages);
346 
347 	/* Loop while I can allocate a page and the page allocated
348 	 * is a destination page.
349 	 */
350 	do {
351 		unsigned long pfn, epfn, addr, eaddr;
352 
353 		pages = kimage_alloc_pages(KEXEC_CONTROL_MEMORY_GFP, order);
354 		if (!pages)
355 			break;
356 		pfn   = page_to_pfn(pages);
357 		epfn  = pfn + count;
358 		addr  = pfn << PAGE_SHIFT;
359 		eaddr = epfn << PAGE_SHIFT;
360 		if ((epfn >= (KEXEC_CONTROL_MEMORY_LIMIT >> PAGE_SHIFT)) ||
361 			      kimage_is_destination_range(image, addr, eaddr)) {
362 			list_add(&pages->lru, &extra_pages);
363 			pages = NULL;
364 		}
365 	} while (!pages);
366 
367 	if (pages) {
368 		/* Remember the allocated page... */
369 		list_add(&pages->lru, &image->control_pages);
370 
371 		/* Because the page is already in it's destination
372 		 * location we will never allocate another page at
373 		 * that address.  Therefore kimage_alloc_pages
374 		 * will not return it (again) and we don't need
375 		 * to give it an entry in image->segment[].
376 		 */
377 	}
378 	/* Deal with the destination pages I have inadvertently allocated.
379 	 *
380 	 * Ideally I would convert multi-page allocations into single
381 	 * page allocations, and add everything to image->dest_pages.
382 	 *
383 	 * For now it is simpler to just free the pages.
384 	 */
385 	kimage_free_page_list(&extra_pages);
386 
387 	return pages;
388 }
389 
390 static struct page *kimage_alloc_crash_control_pages(struct kimage *image,
391 						      unsigned int order)
392 {
393 	/* Control pages are special, they are the intermediaries
394 	 * that are needed while we copy the rest of the pages
395 	 * to their final resting place.  As such they must
396 	 * not conflict with either the destination addresses
397 	 * or memory the kernel is already using.
398 	 *
399 	 * Control pages are also the only pags we must allocate
400 	 * when loading a crash kernel.  All of the other pages
401 	 * are specified by the segments and we just memcpy
402 	 * into them directly.
403 	 *
404 	 * The only case where we really need more than one of
405 	 * these are for architectures where we cannot disable
406 	 * the MMU and must instead generate an identity mapped
407 	 * page table for all of the memory.
408 	 *
409 	 * Given the low demand this implements a very simple
410 	 * allocator that finds the first hole of the appropriate
411 	 * size in the reserved memory region, and allocates all
412 	 * of the memory up to and including the hole.
413 	 */
414 	unsigned long hole_start, hole_end, size;
415 	struct page *pages;
416 
417 	pages = NULL;
418 	size = (1 << order) << PAGE_SHIFT;
419 	hole_start = (image->control_page + (size - 1)) & ~(size - 1);
420 	hole_end   = hole_start + size - 1;
421 	while (hole_end <= crashk_res.end) {
422 		unsigned long i;
423 
424 		if (hole_end > KEXEC_CRASH_CONTROL_MEMORY_LIMIT)
425 			break;
426 		/* See if I overlap any of the segments */
427 		for (i = 0; i < image->nr_segments; i++) {
428 			unsigned long mstart, mend;
429 
430 			mstart = image->segment[i].mem;
431 			mend   = mstart + image->segment[i].memsz - 1;
432 			if ((hole_end >= mstart) && (hole_start <= mend)) {
433 				/* Advance the hole to the end of the segment */
434 				hole_start = (mend + (size - 1)) & ~(size - 1);
435 				hole_end   = hole_start + size - 1;
436 				break;
437 			}
438 		}
439 		/* If I don't overlap any segments I have found my hole! */
440 		if (i == image->nr_segments) {
441 			pages = pfn_to_page(hole_start >> PAGE_SHIFT);
442 			image->control_page = hole_end;
443 			break;
444 		}
445 	}
446 
447 	return pages;
448 }
449 
450 
451 struct page *kimage_alloc_control_pages(struct kimage *image,
452 					 unsigned int order)
453 {
454 	struct page *pages = NULL;
455 
456 	switch (image->type) {
457 	case KEXEC_TYPE_DEFAULT:
458 		pages = kimage_alloc_normal_control_pages(image, order);
459 		break;
460 	case KEXEC_TYPE_CRASH:
461 		pages = kimage_alloc_crash_control_pages(image, order);
462 		break;
463 	}
464 
465 	return pages;
466 }
467 
468 static int kimage_add_entry(struct kimage *image, kimage_entry_t entry)
469 {
470 	if (*image->entry != 0)
471 		image->entry++;
472 
473 	if (image->entry == image->last_entry) {
474 		kimage_entry_t *ind_page;
475 		struct page *page;
476 
477 		page = kimage_alloc_page(image, GFP_KERNEL, KIMAGE_NO_DEST);
478 		if (!page)
479 			return -ENOMEM;
480 
481 		ind_page = page_address(page);
482 		*image->entry = virt_to_phys(ind_page) | IND_INDIRECTION;
483 		image->entry = ind_page;
484 		image->last_entry = ind_page +
485 				      ((PAGE_SIZE/sizeof(kimage_entry_t)) - 1);
486 	}
487 	*image->entry = entry;
488 	image->entry++;
489 	*image->entry = 0;
490 
491 	return 0;
492 }
493 
494 static int kimage_set_destination(struct kimage *image,
495 				   unsigned long destination)
496 {
497 	int result;
498 
499 	destination &= PAGE_MASK;
500 	result = kimage_add_entry(image, destination | IND_DESTINATION);
501 
502 	return result;
503 }
504 
505 
506 static int kimage_add_page(struct kimage *image, unsigned long page)
507 {
508 	int result;
509 
510 	page &= PAGE_MASK;
511 	result = kimage_add_entry(image, page | IND_SOURCE);
512 
513 	return result;
514 }
515 
516 
517 static void kimage_free_extra_pages(struct kimage *image)
518 {
519 	/* Walk through and free any extra destination pages I may have */
520 	kimage_free_page_list(&image->dest_pages);
521 
522 	/* Walk through and free any unusable pages I have cached */
523 	kimage_free_page_list(&image->unusable_pages);
524 
525 }
526 void kimage_terminate(struct kimage *image)
527 {
528 	if (*image->entry != 0)
529 		image->entry++;
530 
531 	*image->entry = IND_DONE;
532 }
533 
534 #define for_each_kimage_entry(image, ptr, entry) \
535 	for (ptr = &image->head; (entry = *ptr) && !(entry & IND_DONE); \
536 		ptr = (entry & IND_INDIRECTION) ? \
537 			phys_to_virt((entry & PAGE_MASK)) : ptr + 1)
538 
539 static void kimage_free_entry(kimage_entry_t entry)
540 {
541 	struct page *page;
542 
543 	page = pfn_to_page(entry >> PAGE_SHIFT);
544 	kimage_free_pages(page);
545 }
546 
547 void kimage_free(struct kimage *image)
548 {
549 	kimage_entry_t *ptr, entry;
550 	kimage_entry_t ind = 0;
551 
552 	if (!image)
553 		return;
554 
555 	kimage_free_extra_pages(image);
556 	for_each_kimage_entry(image, ptr, entry) {
557 		if (entry & IND_INDIRECTION) {
558 			/* Free the previous indirection page */
559 			if (ind & IND_INDIRECTION)
560 				kimage_free_entry(ind);
561 			/* Save this indirection page until we are
562 			 * done with it.
563 			 */
564 			ind = entry;
565 		} else if (entry & IND_SOURCE)
566 			kimage_free_entry(entry);
567 	}
568 	/* Free the final indirection page */
569 	if (ind & IND_INDIRECTION)
570 		kimage_free_entry(ind);
571 
572 	/* Handle any machine specific cleanup */
573 	machine_kexec_cleanup(image);
574 
575 	/* Free the kexec control pages... */
576 	kimage_free_page_list(&image->control_pages);
577 
578 	/*
579 	 * Free up any temporary buffers allocated. This might hit if
580 	 * error occurred much later after buffer allocation.
581 	 */
582 	if (image->file_mode)
583 		kimage_file_post_load_cleanup(image);
584 
585 	kfree(image);
586 }
587 
588 static kimage_entry_t *kimage_dst_used(struct kimage *image,
589 					unsigned long page)
590 {
591 	kimage_entry_t *ptr, entry;
592 	unsigned long destination = 0;
593 
594 	for_each_kimage_entry(image, ptr, entry) {
595 		if (entry & IND_DESTINATION)
596 			destination = entry & PAGE_MASK;
597 		else if (entry & IND_SOURCE) {
598 			if (page == destination)
599 				return ptr;
600 			destination += PAGE_SIZE;
601 		}
602 	}
603 
604 	return NULL;
605 }
606 
607 static struct page *kimage_alloc_page(struct kimage *image,
608 					gfp_t gfp_mask,
609 					unsigned long destination)
610 {
611 	/*
612 	 * Here we implement safeguards to ensure that a source page
613 	 * is not copied to its destination page before the data on
614 	 * the destination page is no longer useful.
615 	 *
616 	 * To do this we maintain the invariant that a source page is
617 	 * either its own destination page, or it is not a
618 	 * destination page at all.
619 	 *
620 	 * That is slightly stronger than required, but the proof
621 	 * that no problems will not occur is trivial, and the
622 	 * implementation is simply to verify.
623 	 *
624 	 * When allocating all pages normally this algorithm will run
625 	 * in O(N) time, but in the worst case it will run in O(N^2)
626 	 * time.   If the runtime is a problem the data structures can
627 	 * be fixed.
628 	 */
629 	struct page *page;
630 	unsigned long addr;
631 
632 	/*
633 	 * Walk through the list of destination pages, and see if I
634 	 * have a match.
635 	 */
636 	list_for_each_entry(page, &image->dest_pages, lru) {
637 		addr = page_to_pfn(page) << PAGE_SHIFT;
638 		if (addr == destination) {
639 			list_del(&page->lru);
640 			return page;
641 		}
642 	}
643 	page = NULL;
644 	while (1) {
645 		kimage_entry_t *old;
646 
647 		/* Allocate a page, if we run out of memory give up */
648 		page = kimage_alloc_pages(gfp_mask, 0);
649 		if (!page)
650 			return NULL;
651 		/* If the page cannot be used file it away */
652 		if (page_to_pfn(page) >
653 				(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
654 			list_add(&page->lru, &image->unusable_pages);
655 			continue;
656 		}
657 		addr = page_to_pfn(page) << PAGE_SHIFT;
658 
659 		/* If it is the destination page we want use it */
660 		if (addr == destination)
661 			break;
662 
663 		/* If the page is not a destination page use it */
664 		if (!kimage_is_destination_range(image, addr,
665 						  addr + PAGE_SIZE))
666 			break;
667 
668 		/*
669 		 * I know that the page is someones destination page.
670 		 * See if there is already a source page for this
671 		 * destination page.  And if so swap the source pages.
672 		 */
673 		old = kimage_dst_used(image, addr);
674 		if (old) {
675 			/* If so move it */
676 			unsigned long old_addr;
677 			struct page *old_page;
678 
679 			old_addr = *old & PAGE_MASK;
680 			old_page = pfn_to_page(old_addr >> PAGE_SHIFT);
681 			copy_highpage(page, old_page);
682 			*old = addr | (*old & ~PAGE_MASK);
683 
684 			/* The old page I have found cannot be a
685 			 * destination page, so return it if it's
686 			 * gfp_flags honor the ones passed in.
687 			 */
688 			if (!(gfp_mask & __GFP_HIGHMEM) &&
689 			    PageHighMem(old_page)) {
690 				kimage_free_pages(old_page);
691 				continue;
692 			}
693 			addr = old_addr;
694 			page = old_page;
695 			break;
696 		}
697 		/* Place the page on the destination list, to be used later */
698 		list_add(&page->lru, &image->dest_pages);
699 	}
700 
701 	return page;
702 }
703 
704 static int kimage_load_normal_segment(struct kimage *image,
705 					 struct kexec_segment *segment)
706 {
707 	unsigned long maddr;
708 	size_t ubytes, mbytes;
709 	int result;
710 	unsigned char __user *buf = NULL;
711 	unsigned char *kbuf = NULL;
712 
713 	result = 0;
714 	if (image->file_mode)
715 		kbuf = segment->kbuf;
716 	else
717 		buf = segment->buf;
718 	ubytes = segment->bufsz;
719 	mbytes = segment->memsz;
720 	maddr = segment->mem;
721 
722 	result = kimage_set_destination(image, maddr);
723 	if (result < 0)
724 		goto out;
725 
726 	while (mbytes) {
727 		struct page *page;
728 		char *ptr;
729 		size_t uchunk, mchunk;
730 
731 		page = kimage_alloc_page(image, GFP_HIGHUSER, maddr);
732 		if (!page) {
733 			result  = -ENOMEM;
734 			goto out;
735 		}
736 		result = kimage_add_page(image, page_to_pfn(page)
737 								<< PAGE_SHIFT);
738 		if (result < 0)
739 			goto out;
740 
741 		ptr = kmap(page);
742 		/* Start with a clear page */
743 		clear_page(ptr);
744 		ptr += maddr & ~PAGE_MASK;
745 		mchunk = min_t(size_t, mbytes,
746 				PAGE_SIZE - (maddr & ~PAGE_MASK));
747 		uchunk = min(ubytes, mchunk);
748 
749 		/* For file based kexec, source pages are in kernel memory */
750 		if (image->file_mode)
751 			memcpy(ptr, kbuf, uchunk);
752 		else
753 			result = copy_from_user(ptr, buf, uchunk);
754 		kunmap(page);
755 		if (result) {
756 			result = -EFAULT;
757 			goto out;
758 		}
759 		ubytes -= uchunk;
760 		maddr  += mchunk;
761 		if (image->file_mode)
762 			kbuf += mchunk;
763 		else
764 			buf += mchunk;
765 		mbytes -= mchunk;
766 	}
767 out:
768 	return result;
769 }
770 
771 static int kimage_load_crash_segment(struct kimage *image,
772 					struct kexec_segment *segment)
773 {
774 	/* For crash dumps kernels we simply copy the data from
775 	 * user space to it's destination.
776 	 * We do things a page at a time for the sake of kmap.
777 	 */
778 	unsigned long maddr;
779 	size_t ubytes, mbytes;
780 	int result;
781 	unsigned char __user *buf = NULL;
782 	unsigned char *kbuf = NULL;
783 
784 	result = 0;
785 	if (image->file_mode)
786 		kbuf = segment->kbuf;
787 	else
788 		buf = segment->buf;
789 	ubytes = segment->bufsz;
790 	mbytes = segment->memsz;
791 	maddr = segment->mem;
792 	while (mbytes) {
793 		struct page *page;
794 		char *ptr;
795 		size_t uchunk, mchunk;
796 
797 		page = pfn_to_page(maddr >> PAGE_SHIFT);
798 		if (!page) {
799 			result  = -ENOMEM;
800 			goto out;
801 		}
802 		ptr = kmap(page);
803 		ptr += maddr & ~PAGE_MASK;
804 		mchunk = min_t(size_t, mbytes,
805 				PAGE_SIZE - (maddr & ~PAGE_MASK));
806 		uchunk = min(ubytes, mchunk);
807 		if (mchunk > uchunk) {
808 			/* Zero the trailing part of the page */
809 			memset(ptr + uchunk, 0, mchunk - uchunk);
810 		}
811 
812 		/* For file based kexec, source pages are in kernel memory */
813 		if (image->file_mode)
814 			memcpy(ptr, kbuf, uchunk);
815 		else
816 			result = copy_from_user(ptr, buf, uchunk);
817 		kexec_flush_icache_page(page);
818 		kunmap(page);
819 		if (result) {
820 			result = -EFAULT;
821 			goto out;
822 		}
823 		ubytes -= uchunk;
824 		maddr  += mchunk;
825 		if (image->file_mode)
826 			kbuf += mchunk;
827 		else
828 			buf += mchunk;
829 		mbytes -= mchunk;
830 	}
831 out:
832 	return result;
833 }
834 
835 int kimage_load_segment(struct kimage *image,
836 				struct kexec_segment *segment)
837 {
838 	int result = -ENOMEM;
839 
840 	switch (image->type) {
841 	case KEXEC_TYPE_DEFAULT:
842 		result = kimage_load_normal_segment(image, segment);
843 		break;
844 	case KEXEC_TYPE_CRASH:
845 		result = kimage_load_crash_segment(image, segment);
846 		break;
847 	}
848 
849 	return result;
850 }
851 
852 struct kimage *kexec_image;
853 struct kimage *kexec_crash_image;
854 int kexec_load_disabled;
855 
856 /*
857  * No panic_cpu check version of crash_kexec().  This function is called
858  * only when panic_cpu holds the current CPU number; this is the only CPU
859  * which processes crash_kexec routines.
860  */
861 void __crash_kexec(struct pt_regs *regs)
862 {
863 	/* Take the kexec_mutex here to prevent sys_kexec_load
864 	 * running on one cpu from replacing the crash kernel
865 	 * we are using after a panic on a different cpu.
866 	 *
867 	 * If the crash kernel was not located in a fixed area
868 	 * of memory the xchg(&kexec_crash_image) would be
869 	 * sufficient.  But since I reuse the memory...
870 	 */
871 	if (mutex_trylock(&kexec_mutex)) {
872 		if (kexec_crash_image) {
873 			struct pt_regs fixed_regs;
874 
875 			crash_setup_regs(&fixed_regs, regs);
876 			crash_save_vmcoreinfo();
877 			machine_crash_shutdown(&fixed_regs);
878 			machine_kexec(kexec_crash_image);
879 		}
880 		mutex_unlock(&kexec_mutex);
881 	}
882 }
883 
884 void crash_kexec(struct pt_regs *regs)
885 {
886 	int old_cpu, this_cpu;
887 
888 	/*
889 	 * Only one CPU is allowed to execute the crash_kexec() code as with
890 	 * panic().  Otherwise parallel calls of panic() and crash_kexec()
891 	 * may stop each other.  To exclude them, we use panic_cpu here too.
892 	 */
893 	this_cpu = raw_smp_processor_id();
894 	old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu);
895 	if (old_cpu == PANIC_CPU_INVALID) {
896 		/* This is the 1st CPU which comes here, so go ahead. */
897 		__crash_kexec(regs);
898 
899 		/*
900 		 * Reset panic_cpu to allow another panic()/crash_kexec()
901 		 * call.
902 		 */
903 		atomic_set(&panic_cpu, PANIC_CPU_INVALID);
904 	}
905 }
906 
907 size_t crash_get_memory_size(void)
908 {
909 	size_t size = 0;
910 
911 	mutex_lock(&kexec_mutex);
912 	if (crashk_res.end != crashk_res.start)
913 		size = resource_size(&crashk_res);
914 	mutex_unlock(&kexec_mutex);
915 	return size;
916 }
917 
918 void __weak crash_free_reserved_phys_range(unsigned long begin,
919 					   unsigned long end)
920 {
921 	unsigned long addr;
922 
923 	for (addr = begin; addr < end; addr += PAGE_SIZE)
924 		free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
925 }
926 
927 int crash_shrink_memory(unsigned long new_size)
928 {
929 	int ret = 0;
930 	unsigned long start, end;
931 	unsigned long old_size;
932 	struct resource *ram_res;
933 
934 	mutex_lock(&kexec_mutex);
935 
936 	if (kexec_crash_image) {
937 		ret = -ENOENT;
938 		goto unlock;
939 	}
940 	start = crashk_res.start;
941 	end = crashk_res.end;
942 	old_size = (end == 0) ? 0 : end - start + 1;
943 	if (new_size >= old_size) {
944 		ret = (new_size == old_size) ? 0 : -EINVAL;
945 		goto unlock;
946 	}
947 
948 	ram_res = kzalloc(sizeof(*ram_res), GFP_KERNEL);
949 	if (!ram_res) {
950 		ret = -ENOMEM;
951 		goto unlock;
952 	}
953 
954 	start = roundup(start, KEXEC_CRASH_MEM_ALIGN);
955 	end = roundup(start + new_size, KEXEC_CRASH_MEM_ALIGN);
956 
957 	crash_map_reserved_pages();
958 	crash_free_reserved_phys_range(end, crashk_res.end);
959 
960 	if ((start == end) && (crashk_res.parent != NULL))
961 		release_resource(&crashk_res);
962 
963 	ram_res->start = end;
964 	ram_res->end = crashk_res.end;
965 	ram_res->flags = IORESOURCE_BUSY | IORESOURCE_MEM;
966 	ram_res->name = "System RAM";
967 
968 	crashk_res.end = end - 1;
969 
970 	insert_resource(&iomem_resource, ram_res);
971 	crash_unmap_reserved_pages();
972 
973 unlock:
974 	mutex_unlock(&kexec_mutex);
975 	return ret;
976 }
977 
978 static u32 *append_elf_note(u32 *buf, char *name, unsigned type, void *data,
979 			    size_t data_len)
980 {
981 	struct elf_note note;
982 
983 	note.n_namesz = strlen(name) + 1;
984 	note.n_descsz = data_len;
985 	note.n_type   = type;
986 	memcpy(buf, &note, sizeof(note));
987 	buf += (sizeof(note) + 3)/4;
988 	memcpy(buf, name, note.n_namesz);
989 	buf += (note.n_namesz + 3)/4;
990 	memcpy(buf, data, note.n_descsz);
991 	buf += (note.n_descsz + 3)/4;
992 
993 	return buf;
994 }
995 
996 static void final_note(u32 *buf)
997 {
998 	struct elf_note note;
999 
1000 	note.n_namesz = 0;
1001 	note.n_descsz = 0;
1002 	note.n_type   = 0;
1003 	memcpy(buf, &note, sizeof(note));
1004 }
1005 
1006 void crash_save_cpu(struct pt_regs *regs, int cpu)
1007 {
1008 	struct elf_prstatus prstatus;
1009 	u32 *buf;
1010 
1011 	if ((cpu < 0) || (cpu >= nr_cpu_ids))
1012 		return;
1013 
1014 	/* Using ELF notes here is opportunistic.
1015 	 * I need a well defined structure format
1016 	 * for the data I pass, and I need tags
1017 	 * on the data to indicate what information I have
1018 	 * squirrelled away.  ELF notes happen to provide
1019 	 * all of that, so there is no need to invent something new.
1020 	 */
1021 	buf = (u32 *)per_cpu_ptr(crash_notes, cpu);
1022 	if (!buf)
1023 		return;
1024 	memset(&prstatus, 0, sizeof(prstatus));
1025 	prstatus.pr_pid = current->pid;
1026 	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
1027 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
1028 			      &prstatus, sizeof(prstatus));
1029 	final_note(buf);
1030 }
1031 
1032 static int __init crash_notes_memory_init(void)
1033 {
1034 	/* Allocate memory for saving cpu registers. */
1035 	size_t size, align;
1036 
1037 	/*
1038 	 * crash_notes could be allocated across 2 vmalloc pages when percpu
1039 	 * is vmalloc based . vmalloc doesn't guarantee 2 continuous vmalloc
1040 	 * pages are also on 2 continuous physical pages. In this case the
1041 	 * 2nd part of crash_notes in 2nd page could be lost since only the
1042 	 * starting address and size of crash_notes are exported through sysfs.
1043 	 * Here round up the size of crash_notes to the nearest power of two
1044 	 * and pass it to __alloc_percpu as align value. This can make sure
1045 	 * crash_notes is allocated inside one physical page.
1046 	 */
1047 	size = sizeof(note_buf_t);
1048 	align = min(roundup_pow_of_two(sizeof(note_buf_t)), PAGE_SIZE);
1049 
1050 	/*
1051 	 * Break compile if size is bigger than PAGE_SIZE since crash_notes
1052 	 * definitely will be in 2 pages with that.
1053 	 */
1054 	BUILD_BUG_ON(size > PAGE_SIZE);
1055 
1056 	crash_notes = __alloc_percpu(size, align);
1057 	if (!crash_notes) {
1058 		pr_warn("Memory allocation for saving cpu register states failed\n");
1059 		return -ENOMEM;
1060 	}
1061 	return 0;
1062 }
1063 subsys_initcall(crash_notes_memory_init);
1064 
1065 
1066 /*
1067  * parsing the "crashkernel" commandline
1068  *
1069  * this code is intended to be called from architecture specific code
1070  */
1071 
1072 
1073 /*
1074  * This function parses command lines in the format
1075  *
1076  *   crashkernel=ramsize-range:size[,...][@offset]
1077  *
1078  * The function returns 0 on success and -EINVAL on failure.
1079  */
1080 static int __init parse_crashkernel_mem(char *cmdline,
1081 					unsigned long long system_ram,
1082 					unsigned long long *crash_size,
1083 					unsigned long long *crash_base)
1084 {
1085 	char *cur = cmdline, *tmp;
1086 
1087 	/* for each entry of the comma-separated list */
1088 	do {
1089 		unsigned long long start, end = ULLONG_MAX, size;
1090 
1091 		/* get the start of the range */
1092 		start = memparse(cur, &tmp);
1093 		if (cur == tmp) {
1094 			pr_warn("crashkernel: Memory value expected\n");
1095 			return -EINVAL;
1096 		}
1097 		cur = tmp;
1098 		if (*cur != '-') {
1099 			pr_warn("crashkernel: '-' expected\n");
1100 			return -EINVAL;
1101 		}
1102 		cur++;
1103 
1104 		/* if no ':' is here, than we read the end */
1105 		if (*cur != ':') {
1106 			end = memparse(cur, &tmp);
1107 			if (cur == tmp) {
1108 				pr_warn("crashkernel: Memory value expected\n");
1109 				return -EINVAL;
1110 			}
1111 			cur = tmp;
1112 			if (end <= start) {
1113 				pr_warn("crashkernel: end <= start\n");
1114 				return -EINVAL;
1115 			}
1116 		}
1117 
1118 		if (*cur != ':') {
1119 			pr_warn("crashkernel: ':' expected\n");
1120 			return -EINVAL;
1121 		}
1122 		cur++;
1123 
1124 		size = memparse(cur, &tmp);
1125 		if (cur == tmp) {
1126 			pr_warn("Memory value expected\n");
1127 			return -EINVAL;
1128 		}
1129 		cur = tmp;
1130 		if (size >= system_ram) {
1131 			pr_warn("crashkernel: invalid size\n");
1132 			return -EINVAL;
1133 		}
1134 
1135 		/* match ? */
1136 		if (system_ram >= start && system_ram < end) {
1137 			*crash_size = size;
1138 			break;
1139 		}
1140 	} while (*cur++ == ',');
1141 
1142 	if (*crash_size > 0) {
1143 		while (*cur && *cur != ' ' && *cur != '@')
1144 			cur++;
1145 		if (*cur == '@') {
1146 			cur++;
1147 			*crash_base = memparse(cur, &tmp);
1148 			if (cur == tmp) {
1149 				pr_warn("Memory value expected after '@'\n");
1150 				return -EINVAL;
1151 			}
1152 		}
1153 	}
1154 
1155 	return 0;
1156 }
1157 
1158 /*
1159  * That function parses "simple" (old) crashkernel command lines like
1160  *
1161  *	crashkernel=size[@offset]
1162  *
1163  * It returns 0 on success and -EINVAL on failure.
1164  */
1165 static int __init parse_crashkernel_simple(char *cmdline,
1166 					   unsigned long long *crash_size,
1167 					   unsigned long long *crash_base)
1168 {
1169 	char *cur = cmdline;
1170 
1171 	*crash_size = memparse(cmdline, &cur);
1172 	if (cmdline == cur) {
1173 		pr_warn("crashkernel: memory value expected\n");
1174 		return -EINVAL;
1175 	}
1176 
1177 	if (*cur == '@')
1178 		*crash_base = memparse(cur+1, &cur);
1179 	else if (*cur != ' ' && *cur != '\0') {
1180 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1181 		return -EINVAL;
1182 	}
1183 
1184 	return 0;
1185 }
1186 
1187 #define SUFFIX_HIGH 0
1188 #define SUFFIX_LOW  1
1189 #define SUFFIX_NULL 2
1190 static __initdata char *suffix_tbl[] = {
1191 	[SUFFIX_HIGH] = ",high",
1192 	[SUFFIX_LOW]  = ",low",
1193 	[SUFFIX_NULL] = NULL,
1194 };
1195 
1196 /*
1197  * That function parses "suffix"  crashkernel command lines like
1198  *
1199  *	crashkernel=size,[high|low]
1200  *
1201  * It returns 0 on success and -EINVAL on failure.
1202  */
1203 static int __init parse_crashkernel_suffix(char *cmdline,
1204 					   unsigned long long	*crash_size,
1205 					   const char *suffix)
1206 {
1207 	char *cur = cmdline;
1208 
1209 	*crash_size = memparse(cmdline, &cur);
1210 	if (cmdline == cur) {
1211 		pr_warn("crashkernel: memory value expected\n");
1212 		return -EINVAL;
1213 	}
1214 
1215 	/* check with suffix */
1216 	if (strncmp(cur, suffix, strlen(suffix))) {
1217 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1218 		return -EINVAL;
1219 	}
1220 	cur += strlen(suffix);
1221 	if (*cur != ' ' && *cur != '\0') {
1222 		pr_warn("crashkernel: unrecognized char: %c\n", *cur);
1223 		return -EINVAL;
1224 	}
1225 
1226 	return 0;
1227 }
1228 
1229 static __init char *get_last_crashkernel(char *cmdline,
1230 			     const char *name,
1231 			     const char *suffix)
1232 {
1233 	char *p = cmdline, *ck_cmdline = NULL;
1234 
1235 	/* find crashkernel and use the last one if there are more */
1236 	p = strstr(p, name);
1237 	while (p) {
1238 		char *end_p = strchr(p, ' ');
1239 		char *q;
1240 
1241 		if (!end_p)
1242 			end_p = p + strlen(p);
1243 
1244 		if (!suffix) {
1245 			int i;
1246 
1247 			/* skip the one with any known suffix */
1248 			for (i = 0; suffix_tbl[i]; i++) {
1249 				q = end_p - strlen(suffix_tbl[i]);
1250 				if (!strncmp(q, suffix_tbl[i],
1251 					     strlen(suffix_tbl[i])))
1252 					goto next;
1253 			}
1254 			ck_cmdline = p;
1255 		} else {
1256 			q = end_p - strlen(suffix);
1257 			if (!strncmp(q, suffix, strlen(suffix)))
1258 				ck_cmdline = p;
1259 		}
1260 next:
1261 		p = strstr(p+1, name);
1262 	}
1263 
1264 	if (!ck_cmdline)
1265 		return NULL;
1266 
1267 	return ck_cmdline;
1268 }
1269 
1270 static int __init __parse_crashkernel(char *cmdline,
1271 			     unsigned long long system_ram,
1272 			     unsigned long long *crash_size,
1273 			     unsigned long long *crash_base,
1274 			     const char *name,
1275 			     const char *suffix)
1276 {
1277 	char	*first_colon, *first_space;
1278 	char	*ck_cmdline;
1279 
1280 	BUG_ON(!crash_size || !crash_base);
1281 	*crash_size = 0;
1282 	*crash_base = 0;
1283 
1284 	ck_cmdline = get_last_crashkernel(cmdline, name, suffix);
1285 
1286 	if (!ck_cmdline)
1287 		return -EINVAL;
1288 
1289 	ck_cmdline += strlen(name);
1290 
1291 	if (suffix)
1292 		return parse_crashkernel_suffix(ck_cmdline, crash_size,
1293 				suffix);
1294 	/*
1295 	 * if the commandline contains a ':', then that's the extended
1296 	 * syntax -- if not, it must be the classic syntax
1297 	 */
1298 	first_colon = strchr(ck_cmdline, ':');
1299 	first_space = strchr(ck_cmdline, ' ');
1300 	if (first_colon && (!first_space || first_colon < first_space))
1301 		return parse_crashkernel_mem(ck_cmdline, system_ram,
1302 				crash_size, crash_base);
1303 
1304 	return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
1305 }
1306 
1307 /*
1308  * That function is the entry point for command line parsing and should be
1309  * called from the arch-specific code.
1310  */
1311 int __init parse_crashkernel(char *cmdline,
1312 			     unsigned long long system_ram,
1313 			     unsigned long long *crash_size,
1314 			     unsigned long long *crash_base)
1315 {
1316 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1317 					"crashkernel=", NULL);
1318 }
1319 
1320 int __init parse_crashkernel_high(char *cmdline,
1321 			     unsigned long long system_ram,
1322 			     unsigned long long *crash_size,
1323 			     unsigned long long *crash_base)
1324 {
1325 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1326 				"crashkernel=", suffix_tbl[SUFFIX_HIGH]);
1327 }
1328 
1329 int __init parse_crashkernel_low(char *cmdline,
1330 			     unsigned long long system_ram,
1331 			     unsigned long long *crash_size,
1332 			     unsigned long long *crash_base)
1333 {
1334 	return __parse_crashkernel(cmdline, system_ram, crash_size, crash_base,
1335 				"crashkernel=", suffix_tbl[SUFFIX_LOW]);
1336 }
1337 
1338 static void update_vmcoreinfo_note(void)
1339 {
1340 	u32 *buf = vmcoreinfo_note;
1341 
1342 	if (!vmcoreinfo_size)
1343 		return;
1344 	buf = append_elf_note(buf, VMCOREINFO_NOTE_NAME, 0, vmcoreinfo_data,
1345 			      vmcoreinfo_size);
1346 	final_note(buf);
1347 }
1348 
1349 void crash_save_vmcoreinfo(void)
1350 {
1351 	vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds());
1352 	update_vmcoreinfo_note();
1353 }
1354 
1355 void vmcoreinfo_append_str(const char *fmt, ...)
1356 {
1357 	va_list args;
1358 	char buf[0x50];
1359 	size_t r;
1360 
1361 	va_start(args, fmt);
1362 	r = vscnprintf(buf, sizeof(buf), fmt, args);
1363 	va_end(args);
1364 
1365 	r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
1366 
1367 	memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
1368 
1369 	vmcoreinfo_size += r;
1370 }
1371 
1372 /*
1373  * provide an empty default implementation here -- architecture
1374  * code may override this
1375  */
1376 void __weak arch_crash_save_vmcoreinfo(void)
1377 {}
1378 
1379 unsigned long __weak paddr_vmcoreinfo_note(void)
1380 {
1381 	return __pa((unsigned long)(char *)&vmcoreinfo_note);
1382 }
1383 
1384 static int __init crash_save_vmcoreinfo_init(void)
1385 {
1386 	VMCOREINFO_OSRELEASE(init_uts_ns.name.release);
1387 	VMCOREINFO_PAGESIZE(PAGE_SIZE);
1388 
1389 	VMCOREINFO_SYMBOL(init_uts_ns);
1390 	VMCOREINFO_SYMBOL(node_online_map);
1391 #ifdef CONFIG_MMU
1392 	VMCOREINFO_SYMBOL(swapper_pg_dir);
1393 #endif
1394 	VMCOREINFO_SYMBOL(_stext);
1395 	VMCOREINFO_SYMBOL(vmap_area_list);
1396 
1397 #ifndef CONFIG_NEED_MULTIPLE_NODES
1398 	VMCOREINFO_SYMBOL(mem_map);
1399 	VMCOREINFO_SYMBOL(contig_page_data);
1400 #endif
1401 #ifdef CONFIG_SPARSEMEM
1402 	VMCOREINFO_SYMBOL(mem_section);
1403 	VMCOREINFO_LENGTH(mem_section, NR_SECTION_ROOTS);
1404 	VMCOREINFO_STRUCT_SIZE(mem_section);
1405 	VMCOREINFO_OFFSET(mem_section, section_mem_map);
1406 #endif
1407 	VMCOREINFO_STRUCT_SIZE(page);
1408 	VMCOREINFO_STRUCT_SIZE(pglist_data);
1409 	VMCOREINFO_STRUCT_SIZE(zone);
1410 	VMCOREINFO_STRUCT_SIZE(free_area);
1411 	VMCOREINFO_STRUCT_SIZE(list_head);
1412 	VMCOREINFO_SIZE(nodemask_t);
1413 	VMCOREINFO_OFFSET(page, flags);
1414 	VMCOREINFO_OFFSET(page, _count);
1415 	VMCOREINFO_OFFSET(page, mapping);
1416 	VMCOREINFO_OFFSET(page, lru);
1417 	VMCOREINFO_OFFSET(page, _mapcount);
1418 	VMCOREINFO_OFFSET(page, private);
1419 	VMCOREINFO_OFFSET(pglist_data, node_zones);
1420 	VMCOREINFO_OFFSET(pglist_data, nr_zones);
1421 #ifdef CONFIG_FLAT_NODE_MEM_MAP
1422 	VMCOREINFO_OFFSET(pglist_data, node_mem_map);
1423 #endif
1424 	VMCOREINFO_OFFSET(pglist_data, node_start_pfn);
1425 	VMCOREINFO_OFFSET(pglist_data, node_spanned_pages);
1426 	VMCOREINFO_OFFSET(pglist_data, node_id);
1427 	VMCOREINFO_OFFSET(zone, free_area);
1428 	VMCOREINFO_OFFSET(zone, vm_stat);
1429 	VMCOREINFO_OFFSET(zone, spanned_pages);
1430 	VMCOREINFO_OFFSET(free_area, free_list);
1431 	VMCOREINFO_OFFSET(list_head, next);
1432 	VMCOREINFO_OFFSET(list_head, prev);
1433 	VMCOREINFO_OFFSET(vmap_area, va_start);
1434 	VMCOREINFO_OFFSET(vmap_area, list);
1435 	VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
1436 	log_buf_kexec_setup();
1437 	VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
1438 	VMCOREINFO_NUMBER(NR_FREE_PAGES);
1439 	VMCOREINFO_NUMBER(PG_lru);
1440 	VMCOREINFO_NUMBER(PG_private);
1441 	VMCOREINFO_NUMBER(PG_swapcache);
1442 	VMCOREINFO_NUMBER(PG_slab);
1443 #ifdef CONFIG_MEMORY_FAILURE
1444 	VMCOREINFO_NUMBER(PG_hwpoison);
1445 #endif
1446 	VMCOREINFO_NUMBER(PG_head_mask);
1447 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
1448 #ifdef CONFIG_X86
1449 	VMCOREINFO_NUMBER(KERNEL_IMAGE_SIZE);
1450 #endif
1451 #ifdef CONFIG_HUGETLBFS
1452 	VMCOREINFO_SYMBOL(free_huge_page);
1453 #endif
1454 
1455 	arch_crash_save_vmcoreinfo();
1456 	update_vmcoreinfo_note();
1457 
1458 	return 0;
1459 }
1460 
1461 subsys_initcall(crash_save_vmcoreinfo_init);
1462 
1463 /*
1464  * Move into place and start executing a preloaded standalone
1465  * executable.  If nothing was preloaded return an error.
1466  */
1467 int kernel_kexec(void)
1468 {
1469 	int error = 0;
1470 
1471 	if (!mutex_trylock(&kexec_mutex))
1472 		return -EBUSY;
1473 	if (!kexec_image) {
1474 		error = -EINVAL;
1475 		goto Unlock;
1476 	}
1477 
1478 #ifdef CONFIG_KEXEC_JUMP
1479 	if (kexec_image->preserve_context) {
1480 		lock_system_sleep();
1481 		pm_prepare_console();
1482 		error = freeze_processes();
1483 		if (error) {
1484 			error = -EBUSY;
1485 			goto Restore_console;
1486 		}
1487 		suspend_console();
1488 		error = dpm_suspend_start(PMSG_FREEZE);
1489 		if (error)
1490 			goto Resume_console;
1491 		/* At this point, dpm_suspend_start() has been called,
1492 		 * but *not* dpm_suspend_end(). We *must* call
1493 		 * dpm_suspend_end() now.  Otherwise, drivers for
1494 		 * some devices (e.g. interrupt controllers) become
1495 		 * desynchronized with the actual state of the
1496 		 * hardware at resume time, and evil weirdness ensues.
1497 		 */
1498 		error = dpm_suspend_end(PMSG_FREEZE);
1499 		if (error)
1500 			goto Resume_devices;
1501 		error = disable_nonboot_cpus();
1502 		if (error)
1503 			goto Enable_cpus;
1504 		local_irq_disable();
1505 		error = syscore_suspend();
1506 		if (error)
1507 			goto Enable_irqs;
1508 	} else
1509 #endif
1510 	{
1511 		kexec_in_progress = true;
1512 		kernel_restart_prepare(NULL);
1513 		migrate_to_reboot_cpu();
1514 
1515 		/*
1516 		 * migrate_to_reboot_cpu() disables CPU hotplug assuming that
1517 		 * no further code needs to use CPU hotplug (which is true in
1518 		 * the reboot case). However, the kexec path depends on using
1519 		 * CPU hotplug again; so re-enable it here.
1520 		 */
1521 		cpu_hotplug_enable();
1522 		pr_emerg("Starting new kernel\n");
1523 		machine_shutdown();
1524 	}
1525 
1526 	machine_kexec(kexec_image);
1527 
1528 #ifdef CONFIG_KEXEC_JUMP
1529 	if (kexec_image->preserve_context) {
1530 		syscore_resume();
1531  Enable_irqs:
1532 		local_irq_enable();
1533  Enable_cpus:
1534 		enable_nonboot_cpus();
1535 		dpm_resume_start(PMSG_RESTORE);
1536  Resume_devices:
1537 		dpm_resume_end(PMSG_RESTORE);
1538  Resume_console:
1539 		resume_console();
1540 		thaw_processes();
1541  Restore_console:
1542 		pm_restore_console();
1543 		unlock_system_sleep();
1544 	}
1545 #endif
1546 
1547  Unlock:
1548 	mutex_unlock(&kexec_mutex);
1549 	return error;
1550 }
1551 
1552 /*
1553  * Add and remove page tables for crashkernel memory
1554  *
1555  * Provide an empty default implementation here -- architecture
1556  * code may override this
1557  */
1558 void __weak crash_map_reserved_pages(void)
1559 {}
1560 
1561 void __weak crash_unmap_reserved_pages(void)
1562 {}
1563