xref: /linux-6.15/drivers/base/memory.c (revision b9792abb)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
23947be19SDave Hansen /*
310fbcf4cSKay Sievers  * Memory subsystem support
43947be19SDave Hansen  *
53947be19SDave Hansen  * Written by Matt Tolentino <[email protected]>
63947be19SDave Hansen  *            Dave Hansen <[email protected]>
73947be19SDave Hansen  *
83947be19SDave Hansen  * This file provides the necessary infrastructure to represent
93947be19SDave Hansen  * a SPARSEMEM-memory-model system's physical memory in /sysfs.
103947be19SDave Hansen  * All arch-independent code that assumes MEMORY_HOTPLUG requires
113947be19SDave Hansen  * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
123947be19SDave Hansen  */
133947be19SDave Hansen 
143947be19SDave Hansen #include <linux/module.h>
153947be19SDave Hansen #include <linux/init.h>
163947be19SDave Hansen #include <linux/topology.h>
17c59ede7bSRandy.Dunlap #include <linux/capability.h>
183947be19SDave Hansen #include <linux/device.h>
193947be19SDave Hansen #include <linux/memory.h>
203947be19SDave Hansen #include <linux/memory_hotplug.h>
213947be19SDave Hansen #include <linux/mm.h>
229f1b16a5SShaohua Li #include <linux/stat.h>
235a0e3ad6STejun Heo #include <linux/slab.h>
244fb6eabfSScott Cheloha #include <linux/xarray.h>
259f1b16a5SShaohua Li 
2660063497SArun Sharma #include <linux/atomic.h>
277c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
283947be19SDave Hansen 
293947be19SDave Hansen #define MEMORY_CLASS_NAME	"memory"
300c2c99b1SNathan Fontenot 
314dc8207bSDavid Hildenbrand static const char *const online_type_to_str[] = {
324dc8207bSDavid Hildenbrand 	[MMOP_OFFLINE] = "offline",
334dc8207bSDavid Hildenbrand 	[MMOP_ONLINE] = "online",
344dc8207bSDavid Hildenbrand 	[MMOP_ONLINE_KERNEL] = "online_kernel",
354dc8207bSDavid Hildenbrand 	[MMOP_ONLINE_MOVABLE] = "online_movable",
364dc8207bSDavid Hildenbrand };
374dc8207bSDavid Hildenbrand 
mhp_online_type_from_str(const char * str)381adf8b46SAnshuman Khandual int mhp_online_type_from_str(const char *str)
394dc8207bSDavid Hildenbrand {
404dc8207bSDavid Hildenbrand 	int i;
414dc8207bSDavid Hildenbrand 
424dc8207bSDavid Hildenbrand 	for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
434dc8207bSDavid Hildenbrand 		if (sysfs_streq(str, online_type_to_str[i]))
444dc8207bSDavid Hildenbrand 			return i;
454dc8207bSDavid Hildenbrand 	}
464dc8207bSDavid Hildenbrand 	return -EINVAL;
474dc8207bSDavid Hildenbrand }
484dc8207bSDavid Hildenbrand 
497315f0ccSGu Zheng #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
507315f0ccSGu Zheng 
510c2c99b1SNathan Fontenot static int sections_per_block;
520c2c99b1SNathan Fontenot 
memory_block_id(unsigned long section_nr)53178bdbedSWei Yang static inline unsigned long memory_block_id(unsigned long section_nr)
540c2c99b1SNathan Fontenot {
550c2c99b1SNathan Fontenot 	return section_nr / sections_per_block;
560c2c99b1SNathan Fontenot }
573947be19SDave Hansen 
pfn_to_block_id(unsigned long pfn)5890ec010fSDavid Hildenbrand static inline unsigned long pfn_to_block_id(unsigned long pfn)
59db051a0dSDavid Hildenbrand {
60178bdbedSWei Yang 	return memory_block_id(pfn_to_section_nr(pfn));
61db051a0dSDavid Hildenbrand }
62db051a0dSDavid Hildenbrand 
phys_to_block_id(unsigned long phys)63ea884641SDavid Hildenbrand static inline unsigned long phys_to_block_id(unsigned long phys)
64ea884641SDavid Hildenbrand {
65ea884641SDavid Hildenbrand 	return pfn_to_block_id(PFN_DOWN(phys));
66ea884641SDavid Hildenbrand }
67ea884641SDavid Hildenbrand 
684960e05eSRafael J. Wysocki static int memory_subsys_online(struct device *dev);
694960e05eSRafael J. Wysocki static int memory_subsys_offline(struct device *dev);
704960e05eSRafael J. Wysocki 
71580fc9c7SGreg Kroah-Hartman static const struct bus_type memory_subsys = {
72af5ca3f4SKay Sievers 	.name = MEMORY_CLASS_NAME,
7310fbcf4cSKay Sievers 	.dev_name = MEMORY_CLASS_NAME,
744960e05eSRafael J. Wysocki 	.online = memory_subsys_online,
754960e05eSRafael J. Wysocki 	.offline = memory_subsys_offline,
763947be19SDave Hansen };
773947be19SDave Hansen 
784fb6eabfSScott Cheloha /*
794fb6eabfSScott Cheloha  * Memory blocks are cached in a local radix tree to avoid
804fb6eabfSScott Cheloha  * a costly linear search for the corresponding device on
814fb6eabfSScott Cheloha  * the subsystem bus.
824fb6eabfSScott Cheloha  */
834fb6eabfSScott Cheloha static DEFINE_XARRAY(memory_blocks);
844fb6eabfSScott Cheloha 
85028fc57aSDavid Hildenbrand /*
86028fc57aSDavid Hildenbrand  * Memory groups, indexed by memory group id (mgid).
87028fc57aSDavid Hildenbrand  */
88028fc57aSDavid Hildenbrand static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
893fcebf90SDavid Hildenbrand #define MEMORY_GROUP_MARK_DYNAMIC	XA_MARK_1
90028fc57aSDavid Hildenbrand 
91e041c683SAlan Stern static BLOCKING_NOTIFIER_HEAD(memory_chain);
923947be19SDave Hansen 
register_memory_notifier(struct notifier_block * nb)9398a38ebdSAndy Whitcroft int register_memory_notifier(struct notifier_block *nb)
943947be19SDave Hansen {
95e041c683SAlan Stern 	return blocking_notifier_chain_register(&memory_chain, nb);
963947be19SDave Hansen }
973c82c30cSHannes Hering EXPORT_SYMBOL(register_memory_notifier);
983947be19SDave Hansen 
unregister_memory_notifier(struct notifier_block * nb)9998a38ebdSAndy Whitcroft void unregister_memory_notifier(struct notifier_block *nb)
1003947be19SDave Hansen {
101e041c683SAlan Stern 	blocking_notifier_chain_unregister(&memory_chain, nb);
1023947be19SDave Hansen }
1033c82c30cSHannes Hering EXPORT_SYMBOL(unregister_memory_notifier);
1043947be19SDave Hansen 
memory_block_release(struct device * dev)105fa7194ebSYasuaki Ishimatsu static void memory_block_release(struct device *dev)
106fa7194ebSYasuaki Ishimatsu {
1077315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
1081a8c64e1SAneesh Kumar K.V 	/* Verify that the altmap is freed */
1091a8c64e1SAneesh Kumar K.V 	WARN_ON(mem->altmap);
110fa7194ebSYasuaki Ishimatsu 	kfree(mem);
111fa7194ebSYasuaki Ishimatsu }
112fa7194ebSYasuaki Ishimatsu 
memory_block_size_bytes(void)1130c2c99b1SNathan Fontenot unsigned long __weak memory_block_size_bytes(void)
1140c2c99b1SNathan Fontenot {
1150c2c99b1SNathan Fontenot 	return MIN_MEMORY_BLOCK_SIZE;
1160c2c99b1SNathan Fontenot }
117c221c0b0SDave Hansen EXPORT_SYMBOL_GPL(memory_block_size_bytes);
1180c2c99b1SNathan Fontenot 
1197c09f428SGavin Shan /* Show the memory block ID, relative to the memory block size */
phys_index_show(struct device * dev,struct device_attribute * attr,char * buf)1203f8e9178SDavid Hildenbrand static ssize_t phys_index_show(struct device *dev,
12110fbcf4cSKay Sievers 			       struct device_attribute *attr, char *buf)
1223947be19SDave Hansen {
1237315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
124d3360164SNathan Fontenot 
1257c09f428SGavin Shan 	return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr));
126d3360164SNathan Fontenot }
127d3360164SNathan Fontenot 
1283947be19SDave Hansen /*
12953cdc1cbSDavid Hildenbrand  * Legacy interface that we cannot remove. Always indicate "removable"
13053cdc1cbSDavid Hildenbrand  * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
1315c755e9fSBadari Pulavarty  */
removable_show(struct device * dev,struct device_attribute * attr,char * buf)1323f8e9178SDavid Hildenbrand static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
1333f8e9178SDavid Hildenbrand 			      char *buf)
1345c755e9fSBadari Pulavarty {
135aa838896SJoe Perches 	return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
1365c755e9fSBadari Pulavarty }
1375c755e9fSBadari Pulavarty 
1385c755e9fSBadari Pulavarty /*
1393947be19SDave Hansen  * online, offline, going offline, etc.
1403947be19SDave Hansen  */
state_show(struct device * dev,struct device_attribute * attr,char * buf)1413f8e9178SDavid Hildenbrand static ssize_t state_show(struct device *dev, struct device_attribute *attr,
1423f8e9178SDavid Hildenbrand 			  char *buf)
1433947be19SDave Hansen {
1447315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
145973c3911SJoe Perches 	const char *output;
1463947be19SDave Hansen 
1473947be19SDave Hansen 	/*
1483947be19SDave Hansen 	 * We can probably put these states in a nice little array
1493947be19SDave Hansen 	 * so that they're not open-coded
1503947be19SDave Hansen 	 */
1513947be19SDave Hansen 	switch (mem->state) {
1523947be19SDave Hansen 	case MEM_ONLINE:
153973c3911SJoe Perches 		output = "online";
1543947be19SDave Hansen 		break;
1553947be19SDave Hansen 	case MEM_OFFLINE:
156973c3911SJoe Perches 		output = "offline";
1573947be19SDave Hansen 		break;
1583947be19SDave Hansen 	case MEM_GOING_OFFLINE:
159973c3911SJoe Perches 		output = "going-offline";
1603947be19SDave Hansen 		break;
1613947be19SDave Hansen 	default:
1623947be19SDave Hansen 		WARN_ON(1);
163973c3911SJoe Perches 		return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
1643947be19SDave Hansen 	}
1653947be19SDave Hansen 
166973c3911SJoe Perches 	return sysfs_emit(buf, "%s\n", output);
1673947be19SDave Hansen }
1683947be19SDave Hansen 
memory_notify(unsigned long val,void * v)1697b78d335SYasunori Goto int memory_notify(unsigned long val, void *v)
1703947be19SDave Hansen {
171e041c683SAlan Stern 	return blocking_notifier_call_chain(&memory_chain, val, v);
1723947be19SDave Hansen }
1733947be19SDave Hansen 
1745033091dSNaoya Horiguchi #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
1755033091dSNaoya Horiguchi static unsigned long memblk_nr_poison(struct memory_block *mem);
1765033091dSNaoya Horiguchi #else
memblk_nr_poison(struct memory_block * mem)1775033091dSNaoya Horiguchi static inline unsigned long memblk_nr_poison(struct memory_block *mem)
1785033091dSNaoya Horiguchi {
1795033091dSNaoya Horiguchi 	return 0;
1805033091dSNaoya Horiguchi }
1815033091dSNaoya Horiguchi #endif
1825033091dSNaoya Horiguchi 
183001002e7SSumanth Korikkar /*
184001002e7SSumanth Korikkar  * Must acquire mem_hotplug_lock in write mode.
185001002e7SSumanth Korikkar  */
memory_block_online(struct memory_block * mem)1868736cc2dSOscar Salvador static int memory_block_online(struct memory_block *mem)
1878736cc2dSOscar Salvador {
1888736cc2dSOscar Salvador 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
1898736cc2dSOscar Salvador 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
1901a8c64e1SAneesh Kumar K.V 	unsigned long nr_vmemmap_pages = 0;
191c5f1e2d1SSumanth Korikkar 	struct memory_notify arg;
192a08a2ae3SOscar Salvador 	struct zone *zone;
193a08a2ae3SOscar Salvador 	int ret;
1948736cc2dSOscar Salvador 
1955033091dSNaoya Horiguchi 	if (memblk_nr_poison(mem))
1965033091dSNaoya Horiguchi 		return -EHWPOISON;
1975033091dSNaoya Horiguchi 
198445fcf7cSDavid Hildenbrand 	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
199445fcf7cSDavid Hildenbrand 				  start_pfn, nr_pages);
200a08a2ae3SOscar Salvador 
201a08a2ae3SOscar Salvador 	/*
202a08a2ae3SOscar Salvador 	 * Although vmemmap pages have a different lifecycle than the pages
203a08a2ae3SOscar Salvador 	 * they describe (they remain until the memory is unplugged), doing
204a08a2ae3SOscar Salvador 	 * their initialization and accounting at memory onlining/offlining
205a08a2ae3SOscar Salvador 	 * stage helps to keep accounting easier to follow - e.g vmemmaps
206a08a2ae3SOscar Salvador 	 * belong to the same zone as the memory they backed.
207a08a2ae3SOscar Salvador 	 */
2081a8c64e1SAneesh Kumar K.V 	if (mem->altmap)
2091a8c64e1SAneesh Kumar K.V 		nr_vmemmap_pages = mem->altmap->free;
2101a8c64e1SAneesh Kumar K.V 
211c5f1e2d1SSumanth Korikkar 	arg.altmap_start_pfn = start_pfn;
212c5f1e2d1SSumanth Korikkar 	arg.altmap_nr_pages = nr_vmemmap_pages;
213c5f1e2d1SSumanth Korikkar 	arg.start_pfn = start_pfn + nr_vmemmap_pages;
214c5f1e2d1SSumanth Korikkar 	arg.nr_pages = nr_pages - nr_vmemmap_pages;
215001002e7SSumanth Korikkar 	mem_hotplug_begin();
216c5f1e2d1SSumanth Korikkar 	ret = memory_notify(MEM_PREPARE_ONLINE, &arg);
217c5f1e2d1SSumanth Korikkar 	ret = notifier_to_errno(ret);
218c5f1e2d1SSumanth Korikkar 	if (ret)
219c5f1e2d1SSumanth Korikkar 		goto out_notifier;
220c5f1e2d1SSumanth Korikkar 
221a08a2ae3SOscar Salvador 	if (nr_vmemmap_pages) {
222c5f1e2d1SSumanth Korikkar 		ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages,
223c5f1e2d1SSumanth Korikkar 						zone, mem->altmap->inaccessible);
224a08a2ae3SOscar Salvador 		if (ret)
225001002e7SSumanth Korikkar 			goto out;
226a08a2ae3SOscar Salvador 	}
227a08a2ae3SOscar Salvador 
228a08a2ae3SOscar Salvador 	ret = online_pages(start_pfn + nr_vmemmap_pages,
229836809ecSDavid Hildenbrand 			   nr_pages - nr_vmemmap_pages, zone, mem->group);
230a08a2ae3SOscar Salvador 	if (ret) {
231a08a2ae3SOscar Salvador 		if (nr_vmemmap_pages)
232a08a2ae3SOscar Salvador 			mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
233001002e7SSumanth Korikkar 		goto out;
234a08a2ae3SOscar Salvador 	}
235a08a2ae3SOscar Salvador 
236a08a2ae3SOscar Salvador 	/*
237a08a2ae3SOscar Salvador 	 * Account once onlining succeeded. If the zone was unpopulated, it is
238a08a2ae3SOscar Salvador 	 * now already properly populated.
239a08a2ae3SOscar Salvador 	 */
240a08a2ae3SOscar Salvador 	if (nr_vmemmap_pages)
241836809ecSDavid Hildenbrand 		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
2424b097002SDavid Hildenbrand 					  nr_vmemmap_pages);
243a08a2ae3SOscar Salvador 
244395f6081SDavid Hildenbrand 	mem->zone = zone;
245c5f1e2d1SSumanth Korikkar 	mem_hotplug_done();
246c5f1e2d1SSumanth Korikkar 	return ret;
247001002e7SSumanth Korikkar out:
248c5f1e2d1SSumanth Korikkar 	memory_notify(MEM_FINISH_OFFLINE, &arg);
249c5f1e2d1SSumanth Korikkar out_notifier:
250001002e7SSumanth Korikkar 	mem_hotplug_done();
251a08a2ae3SOscar Salvador 	return ret;
2528736cc2dSOscar Salvador }
2538736cc2dSOscar Salvador 
254001002e7SSumanth Korikkar /*
255001002e7SSumanth Korikkar  * Must acquire mem_hotplug_lock in write mode.
256001002e7SSumanth Korikkar  */
memory_block_offline(struct memory_block * mem)2578736cc2dSOscar Salvador static int memory_block_offline(struct memory_block *mem)
2588736cc2dSOscar Salvador {
2598736cc2dSOscar Salvador 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
2608736cc2dSOscar Salvador 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
2611a8c64e1SAneesh Kumar K.V 	unsigned long nr_vmemmap_pages = 0;
262c5f1e2d1SSumanth Korikkar 	struct memory_notify arg;
263a08a2ae3SOscar Salvador 	int ret;
2648736cc2dSOscar Salvador 
265395f6081SDavid Hildenbrand 	if (!mem->zone)
266395f6081SDavid Hildenbrand 		return -EINVAL;
267395f6081SDavid Hildenbrand 
268a08a2ae3SOscar Salvador 	/*
269a08a2ae3SOscar Salvador 	 * Unaccount before offlining, such that unpopulated zone and kthreads
270a08a2ae3SOscar Salvador 	 * can properly be torn down in offline_pages().
271a08a2ae3SOscar Salvador 	 */
2721a8c64e1SAneesh Kumar K.V 	if (mem->altmap)
2731a8c64e1SAneesh Kumar K.V 		nr_vmemmap_pages = mem->altmap->free;
2741a8c64e1SAneesh Kumar K.V 
275001002e7SSumanth Korikkar 	mem_hotplug_begin();
2764b097002SDavid Hildenbrand 	if (nr_vmemmap_pages)
277836809ecSDavid Hildenbrand 		adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
2784b097002SDavid Hildenbrand 					  -nr_vmemmap_pages);
279a08a2ae3SOscar Salvador 
280a08a2ae3SOscar Salvador 	ret = offline_pages(start_pfn + nr_vmemmap_pages,
281395f6081SDavid Hildenbrand 			    nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
282a08a2ae3SOscar Salvador 	if (ret) {
283a08a2ae3SOscar Salvador 		/* offline_pages() failed. Account back. */
284a08a2ae3SOscar Salvador 		if (nr_vmemmap_pages)
2854b097002SDavid Hildenbrand 			adjust_present_page_count(pfn_to_page(start_pfn),
286836809ecSDavid Hildenbrand 						  mem->group, nr_vmemmap_pages);
287001002e7SSumanth Korikkar 		goto out;
288a08a2ae3SOscar Salvador 	}
289a08a2ae3SOscar Salvador 
290a08a2ae3SOscar Salvador 	if (nr_vmemmap_pages)
291a08a2ae3SOscar Salvador 		mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
292a08a2ae3SOscar Salvador 
293395f6081SDavid Hildenbrand 	mem->zone = NULL;
294c5f1e2d1SSumanth Korikkar 	arg.altmap_start_pfn = start_pfn;
295c5f1e2d1SSumanth Korikkar 	arg.altmap_nr_pages = nr_vmemmap_pages;
296c5f1e2d1SSumanth Korikkar 	arg.start_pfn = start_pfn + nr_vmemmap_pages;
297c5f1e2d1SSumanth Korikkar 	arg.nr_pages = nr_pages - nr_vmemmap_pages;
298c5f1e2d1SSumanth Korikkar 	memory_notify(MEM_FINISH_OFFLINE, &arg);
299001002e7SSumanth Korikkar out:
300001002e7SSumanth Korikkar 	mem_hotplug_done();
301a08a2ae3SOscar Salvador 	return ret;
3028736cc2dSOscar Salvador }
3038736cc2dSOscar Salvador 
3043947be19SDave Hansen /*
3053947be19SDave Hansen  * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
3063947be19SDave Hansen  * OK to have direct references to sparsemem variables in here.
3073947be19SDave Hansen  */
30854f23eb7SNathan Fontenot static int
memory_block_action(struct memory_block * mem,unsigned long action)3098736cc2dSOscar Salvador memory_block_action(struct memory_block *mem, unsigned long action)
31054f23eb7SNathan Fontenot {
31154f23eb7SNathan Fontenot 	int ret;
31254f23eb7SNathan Fontenot 
3133947be19SDave Hansen 	switch (action) {
3143947be19SDave Hansen 	case MEM_ONLINE:
3158736cc2dSOscar Salvador 		ret = memory_block_online(mem);
3163947be19SDave Hansen 		break;
3173947be19SDave Hansen 	case MEM_OFFLINE:
3188736cc2dSOscar Salvador 		ret = memory_block_offline(mem);
3193947be19SDave Hansen 		break;
3203947be19SDave Hansen 	default:
3210c2c99b1SNathan Fontenot 		WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
3228736cc2dSOscar Salvador 		     "%ld\n", __func__, mem->start_section_nr, action, action);
3233947be19SDave Hansen 		ret = -EINVAL;
3243947be19SDave Hansen 	}
3253947be19SDave Hansen 
3263947be19SDave Hansen 	return ret;
3273947be19SDave Hansen }
3283947be19SDave Hansen 
memory_block_change_state(struct memory_block * mem,unsigned long to_state,unsigned long from_state_req)329dc18d706SNathan Fontenot static int memory_block_change_state(struct memory_block *mem,
330fa2be40fSSeth Jennings 		unsigned long to_state, unsigned long from_state_req)
3313947be19SDave Hansen {
332de0ed36aSGreg Kroah-Hartman 	int ret = 0;
3330c2c99b1SNathan Fontenot 
3344960e05eSRafael J. Wysocki 	if (mem->state != from_state_req)
3354960e05eSRafael J. Wysocki 		return -EINVAL;
3363947be19SDave Hansen 
3370c2c99b1SNathan Fontenot 	if (to_state == MEM_OFFLINE)
3380c2c99b1SNathan Fontenot 		mem->state = MEM_GOING_OFFLINE;
3390c2c99b1SNathan Fontenot 
3408736cc2dSOscar Salvador 	ret = memory_block_action(mem, to_state);
341b2c064b2SRafael J. Wysocki 	mem->state = ret ? from_state_req : to_state;
342fa2be40fSSeth Jennings 
3434960e05eSRafael J. Wysocki 	return ret;
344f5138e42SMichael Holzheu }
3453947be19SDave Hansen 
346fa2be40fSSeth Jennings /* The device lock serializes operations on memory_subsys_[online|offline] */
memory_subsys_online(struct device * dev)3474960e05eSRafael J. Wysocki static int memory_subsys_online(struct device *dev)
3484960e05eSRafael J. Wysocki {
3497315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
3504960e05eSRafael J. Wysocki 	int ret;
3514960e05eSRafael J. Wysocki 
352fa2be40fSSeth Jennings 	if (mem->state == MEM_ONLINE)
353fa2be40fSSeth Jennings 		return 0;
3544960e05eSRafael J. Wysocki 
355fa2be40fSSeth Jennings 	/*
356efc978adSDavid Hildenbrand 	 * When called via device_online() without configuring the online_type,
357efc978adSDavid Hildenbrand 	 * we want to default to MMOP_ONLINE.
358fa2be40fSSeth Jennings 	 */
359efc978adSDavid Hildenbrand 	if (mem->online_type == MMOP_OFFLINE)
360956f8b44SDavid Hildenbrand 		mem->online_type = MMOP_ONLINE;
3614960e05eSRafael J. Wysocki 
362fa2be40fSSeth Jennings 	ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
363efc978adSDavid Hildenbrand 	mem->online_type = MMOP_OFFLINE;
364fa2be40fSSeth Jennings 
3654960e05eSRafael J. Wysocki 	return ret;
3664960e05eSRafael J. Wysocki }
3674960e05eSRafael J. Wysocki 
memory_subsys_offline(struct device * dev)3684960e05eSRafael J. Wysocki static int memory_subsys_offline(struct device *dev)
3694960e05eSRafael J. Wysocki {
3707315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
3714960e05eSRafael J. Wysocki 
372fa2be40fSSeth Jennings 	if (mem->state == MEM_OFFLINE)
373fa2be40fSSeth Jennings 		return 0;
3744960e05eSRafael J. Wysocki 
375fa2be40fSSeth Jennings 	return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
3764960e05eSRafael J. Wysocki }
3774960e05eSRafael J. Wysocki 
state_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)3783f8e9178SDavid Hildenbrand static ssize_t state_store(struct device *dev, struct device_attribute *attr,
3793f8e9178SDavid Hildenbrand 			   const char *buf, size_t count)
3803947be19SDave Hansen {
3811adf8b46SAnshuman Khandual 	const int online_type = mhp_online_type_from_str(buf);
3827315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
3834dc8207bSDavid Hildenbrand 	int ret;
3844dc8207bSDavid Hildenbrand 
3854dc8207bSDavid Hildenbrand 	if (online_type < 0)
3864dc8207bSDavid Hildenbrand 		return -EINVAL;
3873947be19SDave Hansen 
3885e33bc41SRafael J. Wysocki 	ret = lock_device_hotplug_sysfs();
3895e33bc41SRafael J. Wysocki 	if (ret)
3905e33bc41SRafael J. Wysocki 		return ret;
3914960e05eSRafael J. Wysocki 
392fa2be40fSSeth Jennings 	switch (online_type) {
3934f7c6b49STang Chen 	case MMOP_ONLINE_KERNEL:
3944f7c6b49STang Chen 	case MMOP_ONLINE_MOVABLE:
395956f8b44SDavid Hildenbrand 	case MMOP_ONLINE:
396381eab4aSDavid Hildenbrand 		/* mem->online_type is protected by device_hotplug_lock */
397fa2be40fSSeth Jennings 		mem->online_type = online_type;
398fa2be40fSSeth Jennings 		ret = device_online(&mem->dev);
399fa2be40fSSeth Jennings 		break;
4004f7c6b49STang Chen 	case MMOP_OFFLINE:
401fa2be40fSSeth Jennings 		ret = device_offline(&mem->dev);
402fa2be40fSSeth Jennings 		break;
403fa2be40fSSeth Jennings 	default:
404fa2be40fSSeth Jennings 		ret = -EINVAL; /* should never happen */
4054960e05eSRafael J. Wysocki 	}
4064960e05eSRafael J. Wysocki 
4074960e05eSRafael J. Wysocki 	unlock_device_hotplug();
4080c2c99b1SNathan Fontenot 
409d66ba15bSReza Arbab 	if (ret < 0)
4103947be19SDave Hansen 		return ret;
411d66ba15bSReza Arbab 	if (ret)
412d66ba15bSReza Arbab 		return -EINVAL;
413d66ba15bSReza Arbab 
4143947be19SDave Hansen 	return count;
4153947be19SDave Hansen }
4163947be19SDave Hansen 
4173947be19SDave Hansen /*
418e9a2e48eSDavid Hildenbrand  * Legacy interface that we cannot remove: s390x exposes the storage increment
419e9a2e48eSDavid Hildenbrand  * covered by a memory block, allowing for identifying which memory blocks
420e9a2e48eSDavid Hildenbrand  * comprise a storage increment. Since a memory block spans complete
421e9a2e48eSDavid Hildenbrand  * storage increments nowadays, this interface is basically unused. Other
422e9a2e48eSDavid Hildenbrand  * archs never exposed != 0.
4233947be19SDave Hansen  */
phys_device_show(struct device * dev,struct device_attribute * attr,char * buf)4243f8e9178SDavid Hildenbrand static ssize_t phys_device_show(struct device *dev,
42510fbcf4cSKay Sievers 				struct device_attribute *attr, char *buf)
4263947be19SDave Hansen {
4277315f0ccSGu Zheng 	struct memory_block *mem = to_memory_block(dev);
428e9a2e48eSDavid Hildenbrand 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
429948b3edbSJoe Perches 
430e9a2e48eSDavid Hildenbrand 	return sysfs_emit(buf, "%d\n",
431e9a2e48eSDavid Hildenbrand 			  arch_get_memory_phys_device(start_pfn));
4323947be19SDave Hansen }
4333947be19SDave Hansen 
434ed2f2400SZhang Zhen #ifdef CONFIG_MEMORY_HOTREMOVE
print_allowed_zone(char * buf,int len,int nid,struct memory_group * group,unsigned long start_pfn,unsigned long nr_pages,int online_type,struct zone * default_zone)435973c3911SJoe Perches static int print_allowed_zone(char *buf, int len, int nid,
436445fcf7cSDavid Hildenbrand 			      struct memory_group *group,
437973c3911SJoe Perches 			      unsigned long start_pfn, unsigned long nr_pages,
438973c3911SJoe Perches 			      int online_type, struct zone *default_zone)
439e5e68930SMichal Hocko {
440e5e68930SMichal Hocko 	struct zone *zone;
441e5e68930SMichal Hocko 
442445fcf7cSDavid Hildenbrand 	zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
443973c3911SJoe Perches 	if (zone == default_zone)
444973c3911SJoe Perches 		return 0;
445948b3edbSJoe Perches 
446973c3911SJoe Perches 	return sysfs_emit_at(buf, len, " %s", zone->name);
447e5e68930SMichal Hocko }
448e5e68930SMichal Hocko 
valid_zones_show(struct device * dev,struct device_attribute * attr,char * buf)4493f8e9178SDavid Hildenbrand static ssize_t valid_zones_show(struct device *dev,
450ed2f2400SZhang Zhen 				struct device_attribute *attr, char *buf)
451ed2f2400SZhang Zhen {
452ed2f2400SZhang Zhen 	struct memory_block *mem = to_memory_block(dev);
453f1dd2cd1SMichal Hocko 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
454ed2f2400SZhang Zhen 	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
455445fcf7cSDavid Hildenbrand 	struct memory_group *group = mem->group;
456e5e68930SMichal Hocko 	struct zone *default_zone;
457445fcf7cSDavid Hildenbrand 	int nid = mem->nid;
458100bc3b8SShiyang Ruan 	int len;
459ed2f2400SZhang Zhen 
460f1dd2cd1SMichal Hocko 	/*
461f1dd2cd1SMichal Hocko 	 * Check the existing zone. Make sure that we do that only on the
462f1dd2cd1SMichal Hocko 	 * online nodes otherwise the page_zone is not reliable
463f1dd2cd1SMichal Hocko 	 */
464f1dd2cd1SMichal Hocko 	if (mem->state == MEM_ONLINE) {
4654e8346d0SMikhail Zaslonko 		/*
466395f6081SDavid Hildenbrand 		 * If !mem->zone, the memory block spans multiple zones and
467395f6081SDavid Hildenbrand 		 * cannot get offlined.
4684e8346d0SMikhail Zaslonko 		 */
469100bc3b8SShiyang Ruan 		return sysfs_emit(buf, "%s\n",
470100bc3b8SShiyang Ruan 				  mem->zone ? mem->zone->name : "none");
471ed2f2400SZhang Zhen 	}
472ed2f2400SZhang Zhen 
473445fcf7cSDavid Hildenbrand 	default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
474445fcf7cSDavid Hildenbrand 					  start_pfn, nr_pages);
475ed2f2400SZhang Zhen 
476100bc3b8SShiyang Ruan 	len = sysfs_emit(buf, "%s", default_zone->name);
477445fcf7cSDavid Hildenbrand 	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
478973c3911SJoe Perches 				  MMOP_ONLINE_KERNEL, default_zone);
479445fcf7cSDavid Hildenbrand 	len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
480973c3911SJoe Perches 				  MMOP_ONLINE_MOVABLE, default_zone);
481948b3edbSJoe Perches 	len += sysfs_emit_at(buf, len, "\n");
482973c3911SJoe Perches 	return len;
483ed2f2400SZhang Zhen }
4843f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(valid_zones);
485ed2f2400SZhang Zhen #endif
486ed2f2400SZhang Zhen 
4873f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(phys_index);
4883f8e9178SDavid Hildenbrand static DEVICE_ATTR_RW(state);
4893f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(phys_device);
4903f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(removable);
4913947be19SDave Hansen 
4923947be19SDave Hansen /*
493f915fb7fSDavid Hildenbrand  * Show the memory block size (shared by all memory blocks).
4943947be19SDave Hansen  */
block_size_bytes_show(struct device * dev,struct device_attribute * attr,char * buf)4953f8e9178SDavid Hildenbrand static ssize_t block_size_bytes_show(struct device *dev,
4963f8e9178SDavid Hildenbrand 				     struct device_attribute *attr, char *buf)
4973947be19SDave Hansen {
498aa838896SJoe Perches 	return sysfs_emit(buf, "%lx\n", memory_block_size_bytes());
4993947be19SDave Hansen }
5003947be19SDave Hansen 
5013f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(block_size_bytes);
5023947be19SDave Hansen 
5033947be19SDave Hansen /*
50431bc3858SVitaly Kuznetsov  * Memory auto online policy.
50531bc3858SVitaly Kuznetsov  */
50631bc3858SVitaly Kuznetsov 
auto_online_blocks_show(struct device * dev,struct device_attribute * attr,char * buf)5073f8e9178SDavid Hildenbrand static ssize_t auto_online_blocks_show(struct device *dev,
5083f8e9178SDavid Hildenbrand 				       struct device_attribute *attr, char *buf)
50931bc3858SVitaly Kuznetsov {
510aa838896SJoe Perches 	return sysfs_emit(buf, "%s\n",
51144d46b76SGregory Price 			  online_type_to_str[mhp_get_default_online_type()]);
51231bc3858SVitaly Kuznetsov }
51331bc3858SVitaly Kuznetsov 
auto_online_blocks_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5143f8e9178SDavid Hildenbrand static ssize_t auto_online_blocks_store(struct device *dev,
5153f8e9178SDavid Hildenbrand 					struct device_attribute *attr,
51631bc3858SVitaly Kuznetsov 					const char *buf, size_t count)
51731bc3858SVitaly Kuznetsov {
5181adf8b46SAnshuman Khandual 	const int online_type = mhp_online_type_from_str(buf);
5195f47adf7SDavid Hildenbrand 
5205f47adf7SDavid Hildenbrand 	if (online_type < 0)
52131bc3858SVitaly Kuznetsov 		return -EINVAL;
52231bc3858SVitaly Kuznetsov 
52344d46b76SGregory Price 	mhp_set_default_online_type(online_type);
52431bc3858SVitaly Kuznetsov 	return count;
52531bc3858SVitaly Kuznetsov }
52631bc3858SVitaly Kuznetsov 
5273f8e9178SDavid Hildenbrand static DEVICE_ATTR_RW(auto_online_blocks);
52831bc3858SVitaly Kuznetsov 
52988a6f899SEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG
53088a6f899SEric DeVolder #include <linux/kexec.h>
crash_hotplug_show(struct device * dev,struct device_attribute * attr,char * buf)53188a6f899SEric DeVolder static ssize_t crash_hotplug_show(struct device *dev,
53288a6f899SEric DeVolder 				       struct device_attribute *attr, char *buf)
53388a6f899SEric DeVolder {
53479365026SSourabh Jain 	return sysfs_emit(buf, "%d\n", crash_check_hotplug_support());
53588a6f899SEric DeVolder }
53688a6f899SEric DeVolder static DEVICE_ATTR_RO(crash_hotplug);
53788a6f899SEric DeVolder #endif
53888a6f899SEric DeVolder 
53931bc3858SVitaly Kuznetsov /*
5403947be19SDave Hansen  * Some architectures will have custom drivers to do this, and
5413947be19SDave Hansen  * will not need to do it from userspace.  The fake hot-add code
5423947be19SDave Hansen  * as well as ppc64 will do all of their discovery in userspace
5433947be19SDave Hansen  * and will require this interface.
5443947be19SDave Hansen  */
5453947be19SDave Hansen #ifdef CONFIG_ARCH_MEMORY_PROBE
probe_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5463f8e9178SDavid Hildenbrand static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
54728812fe1SAndi Kleen 			   const char *buf, size_t count)
5483947be19SDave Hansen {
5493947be19SDave Hansen 	u64 phys_addr;
550cb5490a5SJohn Allen 	int nid, ret;
55161b94feaSAnton Blanchard 	unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
5523947be19SDave Hansen 
553b69deb2bSZhang Zhen 	ret = kstrtoull(buf, 0, &phys_addr);
554b69deb2bSZhang Zhen 	if (ret)
555b69deb2bSZhang Zhen 		return ret;
5563947be19SDave Hansen 
55761b94feaSAnton Blanchard 	if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
55861b94feaSAnton Blanchard 		return -EINVAL;
55961b94feaSAnton Blanchard 
5608df1d0e4SDavid Hildenbrand 	ret = lock_device_hotplug_sysfs();
5618df1d0e4SDavid Hildenbrand 	if (ret)
56237803841Szhong jiang 		return ret;
5638df1d0e4SDavid Hildenbrand 
564bc02af93SYasunori Goto 	nid = memory_add_physaddr_to_nid(phys_addr);
5658df1d0e4SDavid Hildenbrand 	ret = __add_memory(nid, phys_addr,
566b6117199SDavid Hildenbrand 			   MIN_MEMORY_BLOCK_SIZE * sections_per_block,
567b6117199SDavid Hildenbrand 			   MHP_NONE);
568cb5490a5SJohn Allen 
5696add7cd6SNathan Fontenot 	if (ret)
5709f0af69bSNikanth Karthikesan 		goto out;
5716add7cd6SNathan Fontenot 
5729f0af69bSNikanth Karthikesan 	ret = count;
5739f0af69bSNikanth Karthikesan out:
5748df1d0e4SDavid Hildenbrand 	unlock_device_hotplug();
5759f0af69bSNikanth Karthikesan 	return ret;
5763947be19SDave Hansen }
5773947be19SDave Hansen 
5783f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(probe);
5793947be19SDave Hansen #endif
5803947be19SDave Hansen 
581facb6011SAndi Kleen #ifdef CONFIG_MEMORY_FAILURE
582facb6011SAndi Kleen /*
583facb6011SAndi Kleen  * Support for offlining pages of memory
584facb6011SAndi Kleen  */
585facb6011SAndi Kleen 
586facb6011SAndi Kleen /* Soft offline a page */
soft_offline_page_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5873f8e9178SDavid Hildenbrand static ssize_t soft_offline_page_store(struct device *dev,
58810fbcf4cSKay Sievers 				       struct device_attribute *attr,
58928812fe1SAndi Kleen 				       const char *buf, size_t count)
590facb6011SAndi Kleen {
591facb6011SAndi Kleen 	int ret;
592facb6011SAndi Kleen 	u64 pfn;
593facb6011SAndi Kleen 	if (!capable(CAP_SYS_ADMIN))
594facb6011SAndi Kleen 		return -EPERM;
59534da5e67SJingoo Han 	if (kstrtoull(buf, 0, &pfn) < 0)
596facb6011SAndi Kleen 		return -EINVAL;
597facb6011SAndi Kleen 	pfn >>= PAGE_SHIFT;
598feec24a6SNaoya Horiguchi 	ret = soft_offline_page(pfn, 0);
599facb6011SAndi Kleen 	return ret == 0 ? count : ret;
600facb6011SAndi Kleen }
601facb6011SAndi Kleen 
602facb6011SAndi Kleen /* Forcibly offline a page, including killing processes. */
hard_offline_page_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)6033f8e9178SDavid Hildenbrand static ssize_t hard_offline_page_store(struct device *dev,
60410fbcf4cSKay Sievers 				       struct device_attribute *attr,
60528812fe1SAndi Kleen 				       const char *buf, size_t count)
606facb6011SAndi Kleen {
607facb6011SAndi Kleen 	int ret;
608facb6011SAndi Kleen 	u64 pfn;
609facb6011SAndi Kleen 	if (!capable(CAP_SYS_ADMIN))
610facb6011SAndi Kleen 		return -EPERM;
61134da5e67SJingoo Han 	if (kstrtoull(buf, 0, &pfn) < 0)
612facb6011SAndi Kleen 		return -EINVAL;
613facb6011SAndi Kleen 	pfn >>= PAGE_SHIFT;
61467f22ba7Szhenwei pi 	ret = memory_failure(pfn, MF_SW_SIMULATED);
615d1fe111fSluofei 	if (ret == -EOPNOTSUPP)
616d1fe111fSluofei 		ret = 0;
617facb6011SAndi Kleen 	return ret ? ret : count;
618facb6011SAndi Kleen }
619facb6011SAndi Kleen 
6203f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(soft_offline_page);
6213f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(hard_offline_page);
622facb6011SAndi Kleen #endif
623facb6011SAndi Kleen 
624e9a2e48eSDavid Hildenbrand /* See phys_device_show(). */
arch_get_memory_phys_device(unsigned long start_pfn)625bc32df00SHeiko Carstens int __weak arch_get_memory_phys_device(unsigned long start_pfn)
626bc32df00SHeiko Carstens {
627bc32df00SHeiko Carstens 	return 0;
628bc32df00SHeiko Carstens }
6293947be19SDave Hansen 
6304fb6eabfSScott Cheloha /*
6314fb6eabfSScott Cheloha  * A reference for the returned memory block device is acquired.
6324fb6eabfSScott Cheloha  *
6334fb6eabfSScott Cheloha  * Called under device_hotplug_lock.
6344fb6eabfSScott Cheloha  */
find_memory_block_by_id(unsigned long block_id)635dd625285SDavid Hildenbrand static struct memory_block *find_memory_block_by_id(unsigned long block_id)
63698383031SRobin Holt {
6374fb6eabfSScott Cheloha 	struct memory_block *mem;
63898383031SRobin Holt 
6394fb6eabfSScott Cheloha 	mem = xa_load(&memory_blocks, block_id);
6404fb6eabfSScott Cheloha 	if (mem)
6414fb6eabfSScott Cheloha 		get_device(&mem->dev);
6424fb6eabfSScott Cheloha 	return mem;
643db051a0dSDavid Hildenbrand }
644db051a0dSDavid Hildenbrand 
6453947be19SDave Hansen /*
6464fb6eabfSScott Cheloha  * Called under device_hotplug_lock.
6473947be19SDave Hansen  */
find_memory_block(unsigned long section_nr)648fc1f5e98SOhhoon Kwon struct memory_block *find_memory_block(unsigned long section_nr)
6493947be19SDave Hansen {
650fc1f5e98SOhhoon Kwon 	unsigned long block_id = memory_block_id(section_nr);
651dd625285SDavid Hildenbrand 
652dd625285SDavid Hildenbrand 	return find_memory_block_by_id(block_id);
6533947be19SDave Hansen }
6543947be19SDave Hansen 
65596b2c0fcSNathan Fontenot static struct attribute *memory_memblk_attrs[] = {
65696b2c0fcSNathan Fontenot 	&dev_attr_phys_index.attr,
65796b2c0fcSNathan Fontenot 	&dev_attr_state.attr,
65896b2c0fcSNathan Fontenot 	&dev_attr_phys_device.attr,
65996b2c0fcSNathan Fontenot 	&dev_attr_removable.attr,
660ed2f2400SZhang Zhen #ifdef CONFIG_MEMORY_HOTREMOVE
661ed2f2400SZhang Zhen 	&dev_attr_valid_zones.attr,
662ed2f2400SZhang Zhen #endif
66396b2c0fcSNathan Fontenot 	NULL
66496b2c0fcSNathan Fontenot };
66596b2c0fcSNathan Fontenot 
6665a576764SRikard Falkeborn static const struct attribute_group memory_memblk_attr_group = {
66796b2c0fcSNathan Fontenot 	.attrs = memory_memblk_attrs,
66896b2c0fcSNathan Fontenot };
66996b2c0fcSNathan Fontenot 
67096b2c0fcSNathan Fontenot static const struct attribute_group *memory_memblk_attr_groups[] = {
67196b2c0fcSNathan Fontenot 	&memory_memblk_attr_group,
67296b2c0fcSNathan Fontenot 	NULL,
67396b2c0fcSNathan Fontenot };
67496b2c0fcSNathan Fontenot 
__add_memory_block(struct memory_block * memory)6752aa065f7SDavid Hildenbrand static int __add_memory_block(struct memory_block *memory)
67696b2c0fcSNathan Fontenot {
677085aa2deSArvind Yadav 	int ret;
678085aa2deSArvind Yadav 
67996b2c0fcSNathan Fontenot 	memory->dev.bus = &memory_subsys;
68096b2c0fcSNathan Fontenot 	memory->dev.id = memory->start_section_nr / sections_per_block;
68196b2c0fcSNathan Fontenot 	memory->dev.release = memory_block_release;
68296b2c0fcSNathan Fontenot 	memory->dev.groups = memory_memblk_attr_groups;
683f991fae5SLinus Torvalds 	memory->dev.offline = memory->state == MEM_OFFLINE;
68496b2c0fcSNathan Fontenot 
685085aa2deSArvind Yadav 	ret = device_register(&memory->dev);
6864fb6eabfSScott Cheloha 	if (ret) {
687085aa2deSArvind Yadav 		put_device(&memory->dev);
6884fb6eabfSScott Cheloha 		return ret;
6894fb6eabfSScott Cheloha 	}
6904fb6eabfSScott Cheloha 	ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
6914fb6eabfSScott Cheloha 			      GFP_KERNEL));
692f47f758cSChristophe JAILLET 	if (ret)
6934fb6eabfSScott Cheloha 		device_unregister(&memory->dev);
694f47f758cSChristophe JAILLET 
695085aa2deSArvind Yadav 	return ret;
69696b2c0fcSNathan Fontenot }
69796b2c0fcSNathan Fontenot 
early_node_zone_for_memory_block(struct memory_block * mem,int nid)698395f6081SDavid Hildenbrand static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
699395f6081SDavid Hildenbrand 						     int nid)
700395f6081SDavid Hildenbrand {
701395f6081SDavid Hildenbrand 	const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
702395f6081SDavid Hildenbrand 	const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
703395f6081SDavid Hildenbrand 	struct zone *zone, *matching_zone = NULL;
704395f6081SDavid Hildenbrand 	pg_data_t *pgdat = NODE_DATA(nid);
705395f6081SDavid Hildenbrand 	int i;
706395f6081SDavid Hildenbrand 
707395f6081SDavid Hildenbrand 	/*
708395f6081SDavid Hildenbrand 	 * This logic only works for early memory, when the applicable zones
709395f6081SDavid Hildenbrand 	 * already span the memory block. We don't expect overlapping zones on
710395f6081SDavid Hildenbrand 	 * a single node for early memory. So if we're told that some PFNs
711395f6081SDavid Hildenbrand 	 * of a node fall into this memory block, we can assume that all node
712395f6081SDavid Hildenbrand 	 * zones that intersect with the memory block are actually applicable.
713395f6081SDavid Hildenbrand 	 * No need to look at the memmap.
714395f6081SDavid Hildenbrand 	 */
715395f6081SDavid Hildenbrand 	for (i = 0; i < MAX_NR_ZONES; i++) {
716395f6081SDavid Hildenbrand 		zone = pgdat->node_zones + i;
717395f6081SDavid Hildenbrand 		if (!populated_zone(zone))
718395f6081SDavid Hildenbrand 			continue;
719395f6081SDavid Hildenbrand 		if (!zone_intersects(zone, start_pfn, nr_pages))
720395f6081SDavid Hildenbrand 			continue;
721395f6081SDavid Hildenbrand 		if (!matching_zone) {
722395f6081SDavid Hildenbrand 			matching_zone = zone;
723395f6081SDavid Hildenbrand 			continue;
724395f6081SDavid Hildenbrand 		}
725395f6081SDavid Hildenbrand 		/* Spans multiple zones ... */
726395f6081SDavid Hildenbrand 		matching_zone = NULL;
727395f6081SDavid Hildenbrand 		break;
728395f6081SDavid Hildenbrand 	}
729395f6081SDavid Hildenbrand 	return matching_zone;
730395f6081SDavid Hildenbrand }
731395f6081SDavid Hildenbrand 
732395f6081SDavid Hildenbrand #ifdef CONFIG_NUMA
733395f6081SDavid Hildenbrand /**
734395f6081SDavid Hildenbrand  * memory_block_add_nid() - Indicate that system RAM falling into this memory
735395f6081SDavid Hildenbrand  *			    block device (partially) belongs to the given node.
736395f6081SDavid Hildenbrand  * @mem: The memory block device.
737395f6081SDavid Hildenbrand  * @nid: The node id.
738395f6081SDavid Hildenbrand  * @context: The memory initialization context.
739395f6081SDavid Hildenbrand  *
740395f6081SDavid Hildenbrand  * Indicate that system RAM falling into this memory block (partially) belongs
741395f6081SDavid Hildenbrand  * to the given node. If the context indicates ("early") that we are adding the
742395f6081SDavid Hildenbrand  * node during node device subsystem initialization, this will also properly
743395f6081SDavid Hildenbrand  * set/adjust mem->zone based on the zone ranges of the given node.
744395f6081SDavid Hildenbrand  */
memory_block_add_nid(struct memory_block * mem,int nid,enum meminit_context context)745395f6081SDavid Hildenbrand void memory_block_add_nid(struct memory_block *mem, int nid,
746395f6081SDavid Hildenbrand 			  enum meminit_context context)
747395f6081SDavid Hildenbrand {
748395f6081SDavid Hildenbrand 	if (context == MEMINIT_EARLY && mem->nid != nid) {
749395f6081SDavid Hildenbrand 		/*
750395f6081SDavid Hildenbrand 		 * For early memory we have to determine the zone when setting
751395f6081SDavid Hildenbrand 		 * the node id and handle multiple nodes spanning a single
752395f6081SDavid Hildenbrand 		 * memory block by indicate via zone == NULL that we're not
753395f6081SDavid Hildenbrand 		 * dealing with a single zone. So if we're setting the node id
754395f6081SDavid Hildenbrand 		 * the first time, determine if there is a single zone. If we're
755395f6081SDavid Hildenbrand 		 * setting the node id a second time to a different node,
756395f6081SDavid Hildenbrand 		 * invalidate the single detected zone.
757395f6081SDavid Hildenbrand 		 */
758395f6081SDavid Hildenbrand 		if (mem->nid == NUMA_NO_NODE)
759395f6081SDavid Hildenbrand 			mem->zone = early_node_zone_for_memory_block(mem, nid);
760395f6081SDavid Hildenbrand 		else
761395f6081SDavid Hildenbrand 			mem->zone = NULL;
762395f6081SDavid Hildenbrand 	}
763395f6081SDavid Hildenbrand 
764395f6081SDavid Hildenbrand 	/*
765395f6081SDavid Hildenbrand 	 * If this memory block spans multiple nodes, we only indicate
766395f6081SDavid Hildenbrand 	 * the last processed node. If we span multiple nodes (not applicable
767395f6081SDavid Hildenbrand 	 * to hotplugged memory), zone == NULL will prohibit memory offlining
768395f6081SDavid Hildenbrand 	 * and consequently unplug.
769395f6081SDavid Hildenbrand 	 */
770395f6081SDavid Hildenbrand 	mem->nid = nid;
771395f6081SDavid Hildenbrand }
772395f6081SDavid Hildenbrand #endif
773395f6081SDavid Hildenbrand 
add_memory_block(unsigned long block_id,unsigned long state,struct vmem_altmap * altmap,struct memory_group * group)7742aa065f7SDavid Hildenbrand static int add_memory_block(unsigned long block_id, unsigned long state,
7751a8c64e1SAneesh Kumar K.V 			    struct vmem_altmap *altmap,
776028fc57aSDavid Hildenbrand 			    struct memory_group *group)
777e4619c85SNathan Fontenot {
7780c2c99b1SNathan Fontenot 	struct memory_block *mem;
779e4619c85SNathan Fontenot 	int ret = 0;
780e4619c85SNathan Fontenot 
781dd625285SDavid Hildenbrand 	mem = find_memory_block_by_id(block_id);
782db051a0dSDavid Hildenbrand 	if (mem) {
783db051a0dSDavid Hildenbrand 		put_device(&mem->dev);
784db051a0dSDavid Hildenbrand 		return -EEXIST;
785db051a0dSDavid Hildenbrand 	}
7860c2c99b1SNathan Fontenot 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
787e4619c85SNathan Fontenot 	if (!mem)
788e4619c85SNathan Fontenot 		return -ENOMEM;
789e4619c85SNathan Fontenot 
79018115825SDavid Hildenbrand 	mem->start_section_nr = block_id * sections_per_block;
791e4619c85SNathan Fontenot 	mem->state = state;
792d84f2f5aSDavid Hildenbrand 	mem->nid = NUMA_NO_NODE;
7931a8c64e1SAneesh Kumar K.V 	mem->altmap = altmap;
794028fc57aSDavid Hildenbrand 	INIT_LIST_HEAD(&mem->group_next);
795028fc57aSDavid Hildenbrand 
796395f6081SDavid Hildenbrand #ifndef CONFIG_NUMA
797395f6081SDavid Hildenbrand 	if (state == MEM_ONLINE)
798395f6081SDavid Hildenbrand 		/*
799395f6081SDavid Hildenbrand 		 * MEM_ONLINE at this point implies early memory. With NUMA,
800395f6081SDavid Hildenbrand 		 * we'll determine the zone when setting the node id via
801395f6081SDavid Hildenbrand 		 * memory_block_add_nid(). Memory hotplug updated the zone
802395f6081SDavid Hildenbrand 		 * manually when memory onlining/offlining succeeds.
803395f6081SDavid Hildenbrand 		 */
804395f6081SDavid Hildenbrand 		mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
805395f6081SDavid Hildenbrand #endif /* CONFIG_NUMA */
806395f6081SDavid Hildenbrand 
8072aa065f7SDavid Hildenbrand 	ret = __add_memory_block(mem);
8087ea0d2d7SDavid Hildenbrand 	if (ret)
8097ea0d2d7SDavid Hildenbrand 		return ret;
8107ea0d2d7SDavid Hildenbrand 
811028fc57aSDavid Hildenbrand 	if (group) {
812028fc57aSDavid Hildenbrand 		mem->group = group;
813028fc57aSDavid Hildenbrand 		list_add(&mem->group_next, &group->memory_blocks);
814028fc57aSDavid Hildenbrand 	}
815e4619c85SNathan Fontenot 
8167ea0d2d7SDavid Hildenbrand 	return 0;
8170c2c99b1SNathan Fontenot }
8180c2c99b1SNathan Fontenot 
add_hotplug_memory_block(unsigned long block_id,struct vmem_altmap * altmap,struct memory_group * group)8192aa065f7SDavid Hildenbrand static int add_hotplug_memory_block(unsigned long block_id,
8201a8c64e1SAneesh Kumar K.V 				    struct vmem_altmap *altmap,
8212aa065f7SDavid Hildenbrand 				    struct memory_group *group)
8222aa065f7SDavid Hildenbrand {
8231a8c64e1SAneesh Kumar K.V 	return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
8242aa065f7SDavid Hildenbrand }
8252aa065f7SDavid Hildenbrand 
remove_memory_block(struct memory_block * memory)8262aa065f7SDavid Hildenbrand static void remove_memory_block(struct memory_block *memory)
8274edd7cefSDavid Rientjes {
828db051a0dSDavid Hildenbrand 	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
829db051a0dSDavid Hildenbrand 		return;
8304edd7cefSDavid Rientjes 
8314fb6eabfSScott Cheloha 	WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
8324fb6eabfSScott Cheloha 
833028fc57aSDavid Hildenbrand 	if (memory->group) {
834028fc57aSDavid Hildenbrand 		list_del(&memory->group_next);
835028fc57aSDavid Hildenbrand 		memory->group = NULL;
836028fc57aSDavid Hildenbrand 	}
837028fc57aSDavid Hildenbrand 
838cb7b3a36SDavid Hildenbrand 	/* drop the ref. we got via find_memory_block() */
839df2b717cSSeth Jennings 	put_device(&memory->dev);
8404edd7cefSDavid Rientjes 	device_unregister(&memory->dev);
8414edd7cefSDavid Rientjes }
8424edd7cefSDavid Rientjes 
843db051a0dSDavid Hildenbrand /*
844db051a0dSDavid Hildenbrand  * Create memory block devices for the given memory area. Start and size
845db051a0dSDavid Hildenbrand  * have to be aligned to memory block granularity. Memory block devices
846db051a0dSDavid Hildenbrand  * will be initialized as offline.
847848e19adSDavid Hildenbrand  *
848848e19adSDavid Hildenbrand  * Called under device_hotplug_lock.
849db051a0dSDavid Hildenbrand  */
create_memory_block_devices(unsigned long start,unsigned long size,struct vmem_altmap * altmap,struct memory_group * group)850a08a2ae3SOscar Salvador int create_memory_block_devices(unsigned long start, unsigned long size,
8511a8c64e1SAneesh Kumar K.V 				struct vmem_altmap *altmap,
852028fc57aSDavid Hildenbrand 				struct memory_group *group)
853db051a0dSDavid Hildenbrand {
85490ec010fSDavid Hildenbrand 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
85590ec010fSDavid Hildenbrand 	unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
856db051a0dSDavid Hildenbrand 	struct memory_block *mem;
857db051a0dSDavid Hildenbrand 	unsigned long block_id;
858db051a0dSDavid Hildenbrand 	int ret = 0;
859db051a0dSDavid Hildenbrand 
860db051a0dSDavid Hildenbrand 	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
861db051a0dSDavid Hildenbrand 			 !IS_ALIGNED(size, memory_block_size_bytes())))
862db051a0dSDavid Hildenbrand 		return -EINVAL;
863db051a0dSDavid Hildenbrand 
864db051a0dSDavid Hildenbrand 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
8651a8c64e1SAneesh Kumar K.V 		ret = add_hotplug_memory_block(block_id, altmap, group);
866db051a0dSDavid Hildenbrand 		if (ret)
867db051a0dSDavid Hildenbrand 			break;
868db051a0dSDavid Hildenbrand 	}
869db051a0dSDavid Hildenbrand 	if (ret) {
870db051a0dSDavid Hildenbrand 		end_block_id = block_id;
871db051a0dSDavid Hildenbrand 		for (block_id = start_block_id; block_id != end_block_id;
872db051a0dSDavid Hildenbrand 		     block_id++) {
873dd625285SDavid Hildenbrand 			mem = find_memory_block_by_id(block_id);
874848e19adSDavid Hildenbrand 			if (WARN_ON_ONCE(!mem))
875848e19adSDavid Hildenbrand 				continue;
8762aa065f7SDavid Hildenbrand 			remove_memory_block(mem);
877db051a0dSDavid Hildenbrand 		}
878db051a0dSDavid Hildenbrand 	}
879db051a0dSDavid Hildenbrand 	return ret;
880db051a0dSDavid Hildenbrand }
881db051a0dSDavid Hildenbrand 
8824c4b7f9bSDavid Hildenbrand /*
8834c4b7f9bSDavid Hildenbrand  * Remove memory block devices for the given memory area. Start and size
8844c4b7f9bSDavid Hildenbrand  * have to be aligned to memory block granularity. Memory block devices
8854c4b7f9bSDavid Hildenbrand  * have to be offline.
886848e19adSDavid Hildenbrand  *
887848e19adSDavid Hildenbrand  * Called under device_hotplug_lock.
8884c4b7f9bSDavid Hildenbrand  */
remove_memory_block_devices(unsigned long start,unsigned long size)8894c4b7f9bSDavid Hildenbrand void remove_memory_block_devices(unsigned long start, unsigned long size)
8903947be19SDave Hansen {
89190ec010fSDavid Hildenbrand 	const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
89290ec010fSDavid Hildenbrand 	const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
8933947be19SDave Hansen 	struct memory_block *mem;
89490ec010fSDavid Hildenbrand 	unsigned long block_id;
8953947be19SDave Hansen 
8964c4b7f9bSDavid Hildenbrand 	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
8974c4b7f9bSDavid Hildenbrand 			 !IS_ALIGNED(size, memory_block_size_bytes())))
898cb7b3a36SDavid Hildenbrand 		return;
899cb7b3a36SDavid Hildenbrand 
9004c4b7f9bSDavid Hildenbrand 	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
901dd625285SDavid Hildenbrand 		mem = find_memory_block_by_id(block_id);
9024c4b7f9bSDavid Hildenbrand 		if (WARN_ON_ONCE(!mem))
9034c4b7f9bSDavid Hildenbrand 			continue;
9045033091dSNaoya Horiguchi 		num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
9054c4b7f9bSDavid Hildenbrand 		unregister_memory_block_under_nodes(mem);
9062aa065f7SDavid Hildenbrand 		remove_memory_block(mem);
9074c4b7f9bSDavid Hildenbrand 	}
9083947be19SDave Hansen }
9093947be19SDave Hansen 
91096b2c0fcSNathan Fontenot static struct attribute *memory_root_attrs[] = {
91196b2c0fcSNathan Fontenot #ifdef CONFIG_ARCH_MEMORY_PROBE
91296b2c0fcSNathan Fontenot 	&dev_attr_probe.attr,
91396b2c0fcSNathan Fontenot #endif
91496b2c0fcSNathan Fontenot 
91596b2c0fcSNathan Fontenot #ifdef CONFIG_MEMORY_FAILURE
91696b2c0fcSNathan Fontenot 	&dev_attr_soft_offline_page.attr,
91796b2c0fcSNathan Fontenot 	&dev_attr_hard_offline_page.attr,
91896b2c0fcSNathan Fontenot #endif
91996b2c0fcSNathan Fontenot 
92096b2c0fcSNathan Fontenot 	&dev_attr_block_size_bytes.attr,
92131bc3858SVitaly Kuznetsov 	&dev_attr_auto_online_blocks.attr,
92288a6f899SEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG
92388a6f899SEric DeVolder 	&dev_attr_crash_hotplug.attr,
92488a6f899SEric DeVolder #endif
92596b2c0fcSNathan Fontenot 	NULL
92696b2c0fcSNathan Fontenot };
92796b2c0fcSNathan Fontenot 
9285a576764SRikard Falkeborn static const struct attribute_group memory_root_attr_group = {
92996b2c0fcSNathan Fontenot 	.attrs = memory_root_attrs,
93096b2c0fcSNathan Fontenot };
93196b2c0fcSNathan Fontenot 
93296b2c0fcSNathan Fontenot static const struct attribute_group *memory_root_attr_groups[] = {
93396b2c0fcSNathan Fontenot 	&memory_root_attr_group,
93496b2c0fcSNathan Fontenot 	NULL,
93596b2c0fcSNathan Fontenot };
93696b2c0fcSNathan Fontenot 
937e90bdb7fSWen Congyang /*
938848e19adSDavid Hildenbrand  * Initialize the sysfs support for memory devices. At the time this function
939848e19adSDavid Hildenbrand  * is called, we cannot have concurrent creation/deletion of memory block
940848e19adSDavid Hildenbrand  * devices, the device_hotplug_lock is not needed.
9413947be19SDave Hansen  */
memory_dev_init(void)942902ce63bSDavid Hildenbrand void __init memory_dev_init(void)
9433947be19SDave Hansen {
9443947be19SDave Hansen 	int ret;
945*b9792abbSGavin Shan 	unsigned long block_sz, block_id, nr;
9463947be19SDave Hansen 
947902ce63bSDavid Hildenbrand 	/* Validate the configured memory block size */
948902ce63bSDavid Hildenbrand 	block_sz = memory_block_size_bytes();
949902ce63bSDavid Hildenbrand 	if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
950902ce63bSDavid Hildenbrand 		panic("Memory block size not suitable: 0x%lx\n", block_sz);
951902ce63bSDavid Hildenbrand 	sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
952902ce63bSDavid Hildenbrand 
95396b2c0fcSNathan Fontenot 	ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
95428ec24e2SAndrew Morton 	if (ret)
955848e19adSDavid Hildenbrand 		panic("%s() failed to register subsystem: %d\n", __func__, ret);
9563947be19SDave Hansen 
9573947be19SDave Hansen 	/*
958*b9792abbSGavin Shan 	 * Create entries for memory sections that were found during boot
959*b9792abbSGavin Shan 	 * and have been initialized. Use @block_id to track the last
960*b9792abbSGavin Shan 	 * handled block and initialize it to an invalid value (ULONG_MAX)
961*b9792abbSGavin Shan 	 * to bypass the block ID matching check for the first present
962*b9792abbSGavin Shan 	 * block so that it can be covered.
9633947be19SDave Hansen 	 */
964*b9792abbSGavin Shan 	block_id = ULONG_MAX;
965*b9792abbSGavin Shan 	for_each_present_section_nr(0, nr) {
966*b9792abbSGavin Shan 		if (block_id != ULONG_MAX && memory_block_id(nr) == block_id)
967*b9792abbSGavin Shan 			continue;
968*b9792abbSGavin Shan 
969*b9792abbSGavin Shan 		block_id = memory_block_id(nr);
970*b9792abbSGavin Shan 		ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
971*b9792abbSGavin Shan 		if (ret) {
972*b9792abbSGavin Shan 			panic("%s() failed to add memory block: %d\n",
973*b9792abbSGavin Shan 			      __func__, ret);
974*b9792abbSGavin Shan 		}
975848e19adSDavid Hildenbrand 	}
9763947be19SDave Hansen }
977ea884641SDavid Hildenbrand 
978ea884641SDavid Hildenbrand /**
979ea884641SDavid Hildenbrand  * walk_memory_blocks - walk through all present memory blocks overlapped
980ea884641SDavid Hildenbrand  *			by the range [start, start + size)
981ea884641SDavid Hildenbrand  *
982ea884641SDavid Hildenbrand  * @start: start address of the memory range
983ea884641SDavid Hildenbrand  * @size: size of the memory range
984ea884641SDavid Hildenbrand  * @arg: argument passed to func
985ea884641SDavid Hildenbrand  * @func: callback for each memory section walked
986ea884641SDavid Hildenbrand  *
987ea884641SDavid Hildenbrand  * This function walks through all present memory blocks overlapped by the
988ea884641SDavid Hildenbrand  * range [start, start + size), calling func on each memory block.
989ea884641SDavid Hildenbrand  *
990ea884641SDavid Hildenbrand  * In case func() returns an error, walking is aborted and the error is
991ea884641SDavid Hildenbrand  * returned.
9924fb6eabfSScott Cheloha  *
9934fb6eabfSScott Cheloha  * Called under device_hotplug_lock.
994ea884641SDavid Hildenbrand  */
walk_memory_blocks(unsigned long start,unsigned long size,void * arg,walk_memory_blocks_func_t func)995ea884641SDavid Hildenbrand int walk_memory_blocks(unsigned long start, unsigned long size,
996ea884641SDavid Hildenbrand 		       void *arg, walk_memory_blocks_func_t func)
997ea884641SDavid Hildenbrand {
998ea884641SDavid Hildenbrand 	const unsigned long start_block_id = phys_to_block_id(start);
999ea884641SDavid Hildenbrand 	const unsigned long end_block_id = phys_to_block_id(start + size - 1);
1000ea884641SDavid Hildenbrand 	struct memory_block *mem;
1001ea884641SDavid Hildenbrand 	unsigned long block_id;
1002ea884641SDavid Hildenbrand 	int ret = 0;
1003ea884641SDavid Hildenbrand 
1004dd625285SDavid Hildenbrand 	if (!size)
1005dd625285SDavid Hildenbrand 		return 0;
1006dd625285SDavid Hildenbrand 
1007ea884641SDavid Hildenbrand 	for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
1008dd625285SDavid Hildenbrand 		mem = find_memory_block_by_id(block_id);
1009ea884641SDavid Hildenbrand 		if (!mem)
1010ea884641SDavid Hildenbrand 			continue;
1011ea884641SDavid Hildenbrand 
1012ea884641SDavid Hildenbrand 		ret = func(mem, arg);
1013ea884641SDavid Hildenbrand 		put_device(&mem->dev);
1014ea884641SDavid Hildenbrand 		if (ret)
1015ea884641SDavid Hildenbrand 			break;
1016ea884641SDavid Hildenbrand 	}
1017ea884641SDavid Hildenbrand 	return ret;
1018ea884641SDavid Hildenbrand }
10192c91f8fcSDavid Hildenbrand 
10202c91f8fcSDavid Hildenbrand struct for_each_memory_block_cb_data {
10212c91f8fcSDavid Hildenbrand 	walk_memory_blocks_func_t func;
10222c91f8fcSDavid Hildenbrand 	void *arg;
10232c91f8fcSDavid Hildenbrand };
10242c91f8fcSDavid Hildenbrand 
for_each_memory_block_cb(struct device * dev,void * data)10252c91f8fcSDavid Hildenbrand static int for_each_memory_block_cb(struct device *dev, void *data)
10262c91f8fcSDavid Hildenbrand {
10272c91f8fcSDavid Hildenbrand 	struct memory_block *mem = to_memory_block(dev);
10282c91f8fcSDavid Hildenbrand 	struct for_each_memory_block_cb_data *cb_data = data;
10292c91f8fcSDavid Hildenbrand 
10302c91f8fcSDavid Hildenbrand 	return cb_data->func(mem, cb_data->arg);
10312c91f8fcSDavid Hildenbrand }
10322c91f8fcSDavid Hildenbrand 
10332c91f8fcSDavid Hildenbrand /**
10342c91f8fcSDavid Hildenbrand  * for_each_memory_block - walk through all present memory blocks
10352c91f8fcSDavid Hildenbrand  *
10362c91f8fcSDavid Hildenbrand  * @arg: argument passed to func
10372c91f8fcSDavid Hildenbrand  * @func: callback for each memory block walked
10382c91f8fcSDavid Hildenbrand  *
10392c91f8fcSDavid Hildenbrand  * This function walks through all present memory blocks, calling func on
10402c91f8fcSDavid Hildenbrand  * each memory block.
10412c91f8fcSDavid Hildenbrand  *
10422c91f8fcSDavid Hildenbrand  * In case func() returns an error, walking is aborted and the error is
10432c91f8fcSDavid Hildenbrand  * returned.
10442c91f8fcSDavid Hildenbrand  */
for_each_memory_block(void * arg,walk_memory_blocks_func_t func)10452c91f8fcSDavid Hildenbrand int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
10462c91f8fcSDavid Hildenbrand {
10472c91f8fcSDavid Hildenbrand 	struct for_each_memory_block_cb_data cb_data = {
10482c91f8fcSDavid Hildenbrand 		.func = func,
10492c91f8fcSDavid Hildenbrand 		.arg = arg,
10502c91f8fcSDavid Hildenbrand 	};
10512c91f8fcSDavid Hildenbrand 
10522c91f8fcSDavid Hildenbrand 	return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
10532c91f8fcSDavid Hildenbrand 				for_each_memory_block_cb);
10542c91f8fcSDavid Hildenbrand }
1055028fc57aSDavid Hildenbrand 
1056028fc57aSDavid Hildenbrand /*
1057028fc57aSDavid Hildenbrand  * This is an internal helper to unify allocation and initialization of
1058028fc57aSDavid Hildenbrand  * memory groups. Note that the passed memory group will be copied to a
1059028fc57aSDavid Hildenbrand  * dynamically allocated memory group. After this call, the passed
1060028fc57aSDavid Hildenbrand  * memory group should no longer be used.
1061028fc57aSDavid Hildenbrand  */
memory_group_register(struct memory_group group)1062028fc57aSDavid Hildenbrand static int memory_group_register(struct memory_group group)
1063028fc57aSDavid Hildenbrand {
1064028fc57aSDavid Hildenbrand 	struct memory_group *new_group;
1065028fc57aSDavid Hildenbrand 	uint32_t mgid;
1066028fc57aSDavid Hildenbrand 	int ret;
1067028fc57aSDavid Hildenbrand 
1068028fc57aSDavid Hildenbrand 	if (!node_possible(group.nid))
1069028fc57aSDavid Hildenbrand 		return -EINVAL;
1070028fc57aSDavid Hildenbrand 
1071028fc57aSDavid Hildenbrand 	new_group = kzalloc(sizeof(group), GFP_KERNEL);
1072028fc57aSDavid Hildenbrand 	if (!new_group)
1073028fc57aSDavid Hildenbrand 		return -ENOMEM;
1074028fc57aSDavid Hildenbrand 	*new_group = group;
1075028fc57aSDavid Hildenbrand 	INIT_LIST_HEAD(&new_group->memory_blocks);
1076028fc57aSDavid Hildenbrand 
1077028fc57aSDavid Hildenbrand 	ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
1078028fc57aSDavid Hildenbrand 		       GFP_KERNEL);
1079028fc57aSDavid Hildenbrand 	if (ret) {
1080028fc57aSDavid Hildenbrand 		kfree(new_group);
1081028fc57aSDavid Hildenbrand 		return ret;
10823fcebf90SDavid Hildenbrand 	} else if (group.is_dynamic) {
10833fcebf90SDavid Hildenbrand 		xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
1084028fc57aSDavid Hildenbrand 	}
1085028fc57aSDavid Hildenbrand 	return mgid;
1086028fc57aSDavid Hildenbrand }
1087028fc57aSDavid Hildenbrand 
1088028fc57aSDavid Hildenbrand /**
1089028fc57aSDavid Hildenbrand  * memory_group_register_static() - Register a static memory group.
1090028fc57aSDavid Hildenbrand  * @nid: The node id.
1091028fc57aSDavid Hildenbrand  * @max_pages: The maximum number of pages we'll have in this static memory
1092028fc57aSDavid Hildenbrand  *	       group.
1093028fc57aSDavid Hildenbrand  *
1094028fc57aSDavid Hildenbrand  * Register a new static memory group and return the memory group id.
1095028fc57aSDavid Hildenbrand  * All memory in the group belongs to a single unit, such as a DIMM. All
1096028fc57aSDavid Hildenbrand  * memory belonging to a static memory group is added in one go to be removed
1097028fc57aSDavid Hildenbrand  * in one go -- it's static.
1098028fc57aSDavid Hildenbrand  *
1099028fc57aSDavid Hildenbrand  * Returns an error if out of memory, if the node id is invalid, if no new
1100028fc57aSDavid Hildenbrand  * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
1101028fc57aSDavid Hildenbrand  * returns the new memory group id.
1102028fc57aSDavid Hildenbrand  */
memory_group_register_static(int nid,unsigned long max_pages)1103028fc57aSDavid Hildenbrand int memory_group_register_static(int nid, unsigned long max_pages)
1104028fc57aSDavid Hildenbrand {
1105028fc57aSDavid Hildenbrand 	struct memory_group group = {
1106028fc57aSDavid Hildenbrand 		.nid = nid,
1107028fc57aSDavid Hildenbrand 		.s = {
1108028fc57aSDavid Hildenbrand 			.max_pages = max_pages,
1109028fc57aSDavid Hildenbrand 		},
1110028fc57aSDavid Hildenbrand 	};
1111028fc57aSDavid Hildenbrand 
1112028fc57aSDavid Hildenbrand 	if (!max_pages)
1113028fc57aSDavid Hildenbrand 		return -EINVAL;
1114028fc57aSDavid Hildenbrand 	return memory_group_register(group);
1115028fc57aSDavid Hildenbrand }
1116028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_register_static);
1117028fc57aSDavid Hildenbrand 
1118028fc57aSDavid Hildenbrand /**
1119028fc57aSDavid Hildenbrand  * memory_group_register_dynamic() - Register a dynamic memory group.
1120028fc57aSDavid Hildenbrand  * @nid: The node id.
1121028fc57aSDavid Hildenbrand  * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
1122028fc57aSDavid Hildenbrand  *		memory group.
1123028fc57aSDavid Hildenbrand  *
1124028fc57aSDavid Hildenbrand  * Register a new dynamic memory group and return the memory group id.
1125028fc57aSDavid Hildenbrand  * Memory within a dynamic memory group is added/removed dynamically
1126028fc57aSDavid Hildenbrand  * in unit_pages.
1127028fc57aSDavid Hildenbrand  *
1128028fc57aSDavid Hildenbrand  * Returns an error if out of memory, if the node id is invalid, if no new
1129028fc57aSDavid Hildenbrand  * memory groups can be registered, or if unit_pages is invalid (0, not a
1130028fc57aSDavid Hildenbrand  * power of two, smaller than a single memory block). Otherwise, returns the
1131028fc57aSDavid Hildenbrand  * new memory group id.
1132028fc57aSDavid Hildenbrand  */
memory_group_register_dynamic(int nid,unsigned long unit_pages)1133028fc57aSDavid Hildenbrand int memory_group_register_dynamic(int nid, unsigned long unit_pages)
1134028fc57aSDavid Hildenbrand {
1135028fc57aSDavid Hildenbrand 	struct memory_group group = {
1136028fc57aSDavid Hildenbrand 		.nid = nid,
1137028fc57aSDavid Hildenbrand 		.is_dynamic = true,
1138028fc57aSDavid Hildenbrand 		.d = {
1139028fc57aSDavid Hildenbrand 			.unit_pages = unit_pages,
1140028fc57aSDavid Hildenbrand 		},
1141028fc57aSDavid Hildenbrand 	};
1142028fc57aSDavid Hildenbrand 
1143028fc57aSDavid Hildenbrand 	if (!unit_pages || !is_power_of_2(unit_pages) ||
1144028fc57aSDavid Hildenbrand 	    unit_pages < PHYS_PFN(memory_block_size_bytes()))
1145028fc57aSDavid Hildenbrand 		return -EINVAL;
1146028fc57aSDavid Hildenbrand 	return memory_group_register(group);
1147028fc57aSDavid Hildenbrand }
1148028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_register_dynamic);
1149028fc57aSDavid Hildenbrand 
1150028fc57aSDavid Hildenbrand /**
1151028fc57aSDavid Hildenbrand  * memory_group_unregister() - Unregister a memory group.
1152028fc57aSDavid Hildenbrand  * @mgid: the memory group id
1153028fc57aSDavid Hildenbrand  *
1154028fc57aSDavid Hildenbrand  * Unregister a memory group. If any memory block still belongs to this
1155028fc57aSDavid Hildenbrand  * memory group, unregistering will fail.
1156028fc57aSDavid Hildenbrand  *
1157028fc57aSDavid Hildenbrand  * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
1158028fc57aSDavid Hildenbrand  * memory blocks still belong to this memory group and returns 0 if
1159028fc57aSDavid Hildenbrand  * unregistering succeeded.
1160028fc57aSDavid Hildenbrand  */
memory_group_unregister(int mgid)1161028fc57aSDavid Hildenbrand int memory_group_unregister(int mgid)
1162028fc57aSDavid Hildenbrand {
1163028fc57aSDavid Hildenbrand 	struct memory_group *group;
1164028fc57aSDavid Hildenbrand 
1165028fc57aSDavid Hildenbrand 	if (mgid < 0)
1166028fc57aSDavid Hildenbrand 		return -EINVAL;
1167028fc57aSDavid Hildenbrand 
1168028fc57aSDavid Hildenbrand 	group = xa_load(&memory_groups, mgid);
1169028fc57aSDavid Hildenbrand 	if (!group)
1170028fc57aSDavid Hildenbrand 		return -EINVAL;
1171028fc57aSDavid Hildenbrand 	if (!list_empty(&group->memory_blocks))
1172028fc57aSDavid Hildenbrand 		return -EBUSY;
1173028fc57aSDavid Hildenbrand 	xa_erase(&memory_groups, mgid);
1174028fc57aSDavid Hildenbrand 	kfree(group);
1175028fc57aSDavid Hildenbrand 	return 0;
1176028fc57aSDavid Hildenbrand }
1177028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_unregister);
1178028fc57aSDavid Hildenbrand 
1179028fc57aSDavid Hildenbrand /*
1180028fc57aSDavid Hildenbrand  * This is an internal helper only to be used in core memory hotplug code to
1181028fc57aSDavid Hildenbrand  * lookup a memory group. We don't care about locking, as we don't expect a
1182028fc57aSDavid Hildenbrand  * memory group to get unregistered while adding memory to it -- because
1183028fc57aSDavid Hildenbrand  * the group and the memory is managed by the same driver.
1184028fc57aSDavid Hildenbrand  */
memory_group_find_by_id(int mgid)1185028fc57aSDavid Hildenbrand struct memory_group *memory_group_find_by_id(int mgid)
1186028fc57aSDavid Hildenbrand {
1187028fc57aSDavid Hildenbrand 	return xa_load(&memory_groups, mgid);
1188028fc57aSDavid Hildenbrand }
11893fcebf90SDavid Hildenbrand 
11903fcebf90SDavid Hildenbrand /*
11913fcebf90SDavid Hildenbrand  * This is an internal helper only to be used in core memory hotplug code to
11923fcebf90SDavid Hildenbrand  * walk all dynamic memory groups excluding a given memory group, either
11933fcebf90SDavid Hildenbrand  * belonging to a specific node, or belonging to any node.
11943fcebf90SDavid Hildenbrand  */
walk_dynamic_memory_groups(int nid,walk_memory_groups_func_t func,struct memory_group * excluded,void * arg)11953fcebf90SDavid Hildenbrand int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
11963fcebf90SDavid Hildenbrand 			       struct memory_group *excluded, void *arg)
11973fcebf90SDavid Hildenbrand {
11983fcebf90SDavid Hildenbrand 	struct memory_group *group;
11993fcebf90SDavid Hildenbrand 	unsigned long index;
12003fcebf90SDavid Hildenbrand 	int ret = 0;
12013fcebf90SDavid Hildenbrand 
12023fcebf90SDavid Hildenbrand 	xa_for_each_marked(&memory_groups, index, group,
12033fcebf90SDavid Hildenbrand 			   MEMORY_GROUP_MARK_DYNAMIC) {
12043fcebf90SDavid Hildenbrand 		if (group == excluded)
12053fcebf90SDavid Hildenbrand 			continue;
12063fcebf90SDavid Hildenbrand #ifdef CONFIG_NUMA
12073fcebf90SDavid Hildenbrand 		if (nid != NUMA_NO_NODE && group->nid != nid)
12083fcebf90SDavid Hildenbrand 			continue;
12093fcebf90SDavid Hildenbrand #endif /* CONFIG_NUMA */
12103fcebf90SDavid Hildenbrand 		ret = func(group, arg);
12113fcebf90SDavid Hildenbrand 		if (ret)
12123fcebf90SDavid Hildenbrand 			break;
12133fcebf90SDavid Hildenbrand 	}
12143fcebf90SDavid Hildenbrand 	return ret;
12153fcebf90SDavid Hildenbrand }
12165033091dSNaoya Horiguchi 
12175033091dSNaoya Horiguchi #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
memblk_nr_poison_inc(unsigned long pfn)12185033091dSNaoya Horiguchi void memblk_nr_poison_inc(unsigned long pfn)
12195033091dSNaoya Horiguchi {
12205033091dSNaoya Horiguchi 	const unsigned long block_id = pfn_to_block_id(pfn);
12215033091dSNaoya Horiguchi 	struct memory_block *mem = find_memory_block_by_id(block_id);
12225033091dSNaoya Horiguchi 
12235033091dSNaoya Horiguchi 	if (mem)
12245033091dSNaoya Horiguchi 		atomic_long_inc(&mem->nr_hwpoison);
12255033091dSNaoya Horiguchi }
12265033091dSNaoya Horiguchi 
memblk_nr_poison_sub(unsigned long pfn,long i)12275033091dSNaoya Horiguchi void memblk_nr_poison_sub(unsigned long pfn, long i)
12285033091dSNaoya Horiguchi {
12295033091dSNaoya Horiguchi 	const unsigned long block_id = pfn_to_block_id(pfn);
12305033091dSNaoya Horiguchi 	struct memory_block *mem = find_memory_block_by_id(block_id);
12315033091dSNaoya Horiguchi 
12325033091dSNaoya Horiguchi 	if (mem)
12335033091dSNaoya Horiguchi 		atomic_long_sub(i, &mem->nr_hwpoison);
12345033091dSNaoya Horiguchi }
12355033091dSNaoya Horiguchi 
memblk_nr_poison(struct memory_block * mem)12365033091dSNaoya Horiguchi static unsigned long memblk_nr_poison(struct memory_block *mem)
12375033091dSNaoya Horiguchi {
12385033091dSNaoya Horiguchi 	return atomic_long_read(&mem->nr_hwpoison);
12395033091dSNaoya Horiguchi }
12405033091dSNaoya Horiguchi #endif
1241