1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
23947be19SDave Hansen /*
310fbcf4cSKay Sievers * Memory subsystem support
43947be19SDave Hansen *
53947be19SDave Hansen * Written by Matt Tolentino <[email protected]>
63947be19SDave Hansen * Dave Hansen <[email protected]>
73947be19SDave Hansen *
83947be19SDave Hansen * This file provides the necessary infrastructure to represent
93947be19SDave Hansen * a SPARSEMEM-memory-model system's physical memory in /sysfs.
103947be19SDave Hansen * All arch-independent code that assumes MEMORY_HOTPLUG requires
113947be19SDave Hansen * SPARSEMEM should be contained here, or in mm/memory_hotplug.c.
123947be19SDave Hansen */
133947be19SDave Hansen
143947be19SDave Hansen #include <linux/module.h>
153947be19SDave Hansen #include <linux/init.h>
163947be19SDave Hansen #include <linux/topology.h>
17c59ede7bSRandy.Dunlap #include <linux/capability.h>
183947be19SDave Hansen #include <linux/device.h>
193947be19SDave Hansen #include <linux/memory.h>
203947be19SDave Hansen #include <linux/memory_hotplug.h>
213947be19SDave Hansen #include <linux/mm.h>
229f1b16a5SShaohua Li #include <linux/stat.h>
235a0e3ad6STejun Heo #include <linux/slab.h>
244fb6eabfSScott Cheloha #include <linux/xarray.h>
259f1b16a5SShaohua Li
2660063497SArun Sharma #include <linux/atomic.h>
277c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
283947be19SDave Hansen
293947be19SDave Hansen #define MEMORY_CLASS_NAME "memory"
300c2c99b1SNathan Fontenot
314dc8207bSDavid Hildenbrand static const char *const online_type_to_str[] = {
324dc8207bSDavid Hildenbrand [MMOP_OFFLINE] = "offline",
334dc8207bSDavid Hildenbrand [MMOP_ONLINE] = "online",
344dc8207bSDavid Hildenbrand [MMOP_ONLINE_KERNEL] = "online_kernel",
354dc8207bSDavid Hildenbrand [MMOP_ONLINE_MOVABLE] = "online_movable",
364dc8207bSDavid Hildenbrand };
374dc8207bSDavid Hildenbrand
mhp_online_type_from_str(const char * str)381adf8b46SAnshuman Khandual int mhp_online_type_from_str(const char *str)
394dc8207bSDavid Hildenbrand {
404dc8207bSDavid Hildenbrand int i;
414dc8207bSDavid Hildenbrand
424dc8207bSDavid Hildenbrand for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) {
434dc8207bSDavid Hildenbrand if (sysfs_streq(str, online_type_to_str[i]))
444dc8207bSDavid Hildenbrand return i;
454dc8207bSDavid Hildenbrand }
464dc8207bSDavid Hildenbrand return -EINVAL;
474dc8207bSDavid Hildenbrand }
484dc8207bSDavid Hildenbrand
497315f0ccSGu Zheng #define to_memory_block(dev) container_of(dev, struct memory_block, dev)
507315f0ccSGu Zheng
510c2c99b1SNathan Fontenot static int sections_per_block;
520c2c99b1SNathan Fontenot
memory_block_id(unsigned long section_nr)53178bdbedSWei Yang static inline unsigned long memory_block_id(unsigned long section_nr)
540c2c99b1SNathan Fontenot {
550c2c99b1SNathan Fontenot return section_nr / sections_per_block;
560c2c99b1SNathan Fontenot }
573947be19SDave Hansen
pfn_to_block_id(unsigned long pfn)5890ec010fSDavid Hildenbrand static inline unsigned long pfn_to_block_id(unsigned long pfn)
59db051a0dSDavid Hildenbrand {
60178bdbedSWei Yang return memory_block_id(pfn_to_section_nr(pfn));
61db051a0dSDavid Hildenbrand }
62db051a0dSDavid Hildenbrand
phys_to_block_id(unsigned long phys)63ea884641SDavid Hildenbrand static inline unsigned long phys_to_block_id(unsigned long phys)
64ea884641SDavid Hildenbrand {
65ea884641SDavid Hildenbrand return pfn_to_block_id(PFN_DOWN(phys));
66ea884641SDavid Hildenbrand }
67ea884641SDavid Hildenbrand
684960e05eSRafael J. Wysocki static int memory_subsys_online(struct device *dev);
694960e05eSRafael J. Wysocki static int memory_subsys_offline(struct device *dev);
704960e05eSRafael J. Wysocki
71580fc9c7SGreg Kroah-Hartman static const struct bus_type memory_subsys = {
72af5ca3f4SKay Sievers .name = MEMORY_CLASS_NAME,
7310fbcf4cSKay Sievers .dev_name = MEMORY_CLASS_NAME,
744960e05eSRafael J. Wysocki .online = memory_subsys_online,
754960e05eSRafael J. Wysocki .offline = memory_subsys_offline,
763947be19SDave Hansen };
773947be19SDave Hansen
784fb6eabfSScott Cheloha /*
794fb6eabfSScott Cheloha * Memory blocks are cached in a local radix tree to avoid
804fb6eabfSScott Cheloha * a costly linear search for the corresponding device on
814fb6eabfSScott Cheloha * the subsystem bus.
824fb6eabfSScott Cheloha */
834fb6eabfSScott Cheloha static DEFINE_XARRAY(memory_blocks);
844fb6eabfSScott Cheloha
85028fc57aSDavid Hildenbrand /*
86028fc57aSDavid Hildenbrand * Memory groups, indexed by memory group id (mgid).
87028fc57aSDavid Hildenbrand */
88028fc57aSDavid Hildenbrand static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
893fcebf90SDavid Hildenbrand #define MEMORY_GROUP_MARK_DYNAMIC XA_MARK_1
90028fc57aSDavid Hildenbrand
91e041c683SAlan Stern static BLOCKING_NOTIFIER_HEAD(memory_chain);
923947be19SDave Hansen
register_memory_notifier(struct notifier_block * nb)9398a38ebdSAndy Whitcroft int register_memory_notifier(struct notifier_block *nb)
943947be19SDave Hansen {
95e041c683SAlan Stern return blocking_notifier_chain_register(&memory_chain, nb);
963947be19SDave Hansen }
973c82c30cSHannes Hering EXPORT_SYMBOL(register_memory_notifier);
983947be19SDave Hansen
unregister_memory_notifier(struct notifier_block * nb)9998a38ebdSAndy Whitcroft void unregister_memory_notifier(struct notifier_block *nb)
1003947be19SDave Hansen {
101e041c683SAlan Stern blocking_notifier_chain_unregister(&memory_chain, nb);
1023947be19SDave Hansen }
1033c82c30cSHannes Hering EXPORT_SYMBOL(unregister_memory_notifier);
1043947be19SDave Hansen
memory_block_release(struct device * dev)105fa7194ebSYasuaki Ishimatsu static void memory_block_release(struct device *dev)
106fa7194ebSYasuaki Ishimatsu {
1077315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
1081a8c64e1SAneesh Kumar K.V /* Verify that the altmap is freed */
1091a8c64e1SAneesh Kumar K.V WARN_ON(mem->altmap);
110fa7194ebSYasuaki Ishimatsu kfree(mem);
111fa7194ebSYasuaki Ishimatsu }
112fa7194ebSYasuaki Ishimatsu
memory_block_size_bytes(void)1130c2c99b1SNathan Fontenot unsigned long __weak memory_block_size_bytes(void)
1140c2c99b1SNathan Fontenot {
1150c2c99b1SNathan Fontenot return MIN_MEMORY_BLOCK_SIZE;
1160c2c99b1SNathan Fontenot }
117c221c0b0SDave Hansen EXPORT_SYMBOL_GPL(memory_block_size_bytes);
1180c2c99b1SNathan Fontenot
1197c09f428SGavin Shan /* Show the memory block ID, relative to the memory block size */
phys_index_show(struct device * dev,struct device_attribute * attr,char * buf)1203f8e9178SDavid Hildenbrand static ssize_t phys_index_show(struct device *dev,
12110fbcf4cSKay Sievers struct device_attribute *attr, char *buf)
1223947be19SDave Hansen {
1237315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
124d3360164SNathan Fontenot
1257c09f428SGavin Shan return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr));
126d3360164SNathan Fontenot }
127d3360164SNathan Fontenot
1283947be19SDave Hansen /*
12953cdc1cbSDavid Hildenbrand * Legacy interface that we cannot remove. Always indicate "removable"
13053cdc1cbSDavid Hildenbrand * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
1315c755e9fSBadari Pulavarty */
removable_show(struct device * dev,struct device_attribute * attr,char * buf)1323f8e9178SDavid Hildenbrand static ssize_t removable_show(struct device *dev, struct device_attribute *attr,
1333f8e9178SDavid Hildenbrand char *buf)
1345c755e9fSBadari Pulavarty {
135aa838896SJoe Perches return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE));
1365c755e9fSBadari Pulavarty }
1375c755e9fSBadari Pulavarty
1385c755e9fSBadari Pulavarty /*
1393947be19SDave Hansen * online, offline, going offline, etc.
1403947be19SDave Hansen */
state_show(struct device * dev,struct device_attribute * attr,char * buf)1413f8e9178SDavid Hildenbrand static ssize_t state_show(struct device *dev, struct device_attribute *attr,
1423f8e9178SDavid Hildenbrand char *buf)
1433947be19SDave Hansen {
1447315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
145973c3911SJoe Perches const char *output;
1463947be19SDave Hansen
1473947be19SDave Hansen /*
1483947be19SDave Hansen * We can probably put these states in a nice little array
1493947be19SDave Hansen * so that they're not open-coded
1503947be19SDave Hansen */
1513947be19SDave Hansen switch (mem->state) {
1523947be19SDave Hansen case MEM_ONLINE:
153973c3911SJoe Perches output = "online";
1543947be19SDave Hansen break;
1553947be19SDave Hansen case MEM_OFFLINE:
156973c3911SJoe Perches output = "offline";
1573947be19SDave Hansen break;
1583947be19SDave Hansen case MEM_GOING_OFFLINE:
159973c3911SJoe Perches output = "going-offline";
1603947be19SDave Hansen break;
1613947be19SDave Hansen default:
1623947be19SDave Hansen WARN_ON(1);
163973c3911SJoe Perches return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state);
1643947be19SDave Hansen }
1653947be19SDave Hansen
166973c3911SJoe Perches return sysfs_emit(buf, "%s\n", output);
1673947be19SDave Hansen }
1683947be19SDave Hansen
memory_notify(unsigned long val,void * v)1697b78d335SYasunori Goto int memory_notify(unsigned long val, void *v)
1703947be19SDave Hansen {
171e041c683SAlan Stern return blocking_notifier_call_chain(&memory_chain, val, v);
1723947be19SDave Hansen }
1733947be19SDave Hansen
1745033091dSNaoya Horiguchi #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
1755033091dSNaoya Horiguchi static unsigned long memblk_nr_poison(struct memory_block *mem);
1765033091dSNaoya Horiguchi #else
memblk_nr_poison(struct memory_block * mem)1775033091dSNaoya Horiguchi static inline unsigned long memblk_nr_poison(struct memory_block *mem)
1785033091dSNaoya Horiguchi {
1795033091dSNaoya Horiguchi return 0;
1805033091dSNaoya Horiguchi }
1815033091dSNaoya Horiguchi #endif
1825033091dSNaoya Horiguchi
183001002e7SSumanth Korikkar /*
184001002e7SSumanth Korikkar * Must acquire mem_hotplug_lock in write mode.
185001002e7SSumanth Korikkar */
memory_block_online(struct memory_block * mem)1868736cc2dSOscar Salvador static int memory_block_online(struct memory_block *mem)
1878736cc2dSOscar Salvador {
1888736cc2dSOscar Salvador unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
1898736cc2dSOscar Salvador unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
1901a8c64e1SAneesh Kumar K.V unsigned long nr_vmemmap_pages = 0;
191c5f1e2d1SSumanth Korikkar struct memory_notify arg;
192a08a2ae3SOscar Salvador struct zone *zone;
193a08a2ae3SOscar Salvador int ret;
1948736cc2dSOscar Salvador
1955033091dSNaoya Horiguchi if (memblk_nr_poison(mem))
1965033091dSNaoya Horiguchi return -EHWPOISON;
1975033091dSNaoya Horiguchi
198445fcf7cSDavid Hildenbrand zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
199445fcf7cSDavid Hildenbrand start_pfn, nr_pages);
200a08a2ae3SOscar Salvador
201a08a2ae3SOscar Salvador /*
202a08a2ae3SOscar Salvador * Although vmemmap pages have a different lifecycle than the pages
203a08a2ae3SOscar Salvador * they describe (they remain until the memory is unplugged), doing
204a08a2ae3SOscar Salvador * their initialization and accounting at memory onlining/offlining
205a08a2ae3SOscar Salvador * stage helps to keep accounting easier to follow - e.g vmemmaps
206a08a2ae3SOscar Salvador * belong to the same zone as the memory they backed.
207a08a2ae3SOscar Salvador */
2081a8c64e1SAneesh Kumar K.V if (mem->altmap)
2091a8c64e1SAneesh Kumar K.V nr_vmemmap_pages = mem->altmap->free;
2101a8c64e1SAneesh Kumar K.V
211c5f1e2d1SSumanth Korikkar arg.altmap_start_pfn = start_pfn;
212c5f1e2d1SSumanth Korikkar arg.altmap_nr_pages = nr_vmemmap_pages;
213c5f1e2d1SSumanth Korikkar arg.start_pfn = start_pfn + nr_vmemmap_pages;
214c5f1e2d1SSumanth Korikkar arg.nr_pages = nr_pages - nr_vmemmap_pages;
215001002e7SSumanth Korikkar mem_hotplug_begin();
216c5f1e2d1SSumanth Korikkar ret = memory_notify(MEM_PREPARE_ONLINE, &arg);
217c5f1e2d1SSumanth Korikkar ret = notifier_to_errno(ret);
218c5f1e2d1SSumanth Korikkar if (ret)
219c5f1e2d1SSumanth Korikkar goto out_notifier;
220c5f1e2d1SSumanth Korikkar
221a08a2ae3SOscar Salvador if (nr_vmemmap_pages) {
222c5f1e2d1SSumanth Korikkar ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages,
223c5f1e2d1SSumanth Korikkar zone, mem->altmap->inaccessible);
224a08a2ae3SOscar Salvador if (ret)
225001002e7SSumanth Korikkar goto out;
226a08a2ae3SOscar Salvador }
227a08a2ae3SOscar Salvador
228a08a2ae3SOscar Salvador ret = online_pages(start_pfn + nr_vmemmap_pages,
229836809ecSDavid Hildenbrand nr_pages - nr_vmemmap_pages, zone, mem->group);
230a08a2ae3SOscar Salvador if (ret) {
231a08a2ae3SOscar Salvador if (nr_vmemmap_pages)
232a08a2ae3SOscar Salvador mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
233001002e7SSumanth Korikkar goto out;
234a08a2ae3SOscar Salvador }
235a08a2ae3SOscar Salvador
236a08a2ae3SOscar Salvador /*
237a08a2ae3SOscar Salvador * Account once onlining succeeded. If the zone was unpopulated, it is
238a08a2ae3SOscar Salvador * now already properly populated.
239a08a2ae3SOscar Salvador */
240a08a2ae3SOscar Salvador if (nr_vmemmap_pages)
241836809ecSDavid Hildenbrand adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
2424b097002SDavid Hildenbrand nr_vmemmap_pages);
243a08a2ae3SOscar Salvador
244395f6081SDavid Hildenbrand mem->zone = zone;
245c5f1e2d1SSumanth Korikkar mem_hotplug_done();
246c5f1e2d1SSumanth Korikkar return ret;
247001002e7SSumanth Korikkar out:
248c5f1e2d1SSumanth Korikkar memory_notify(MEM_FINISH_OFFLINE, &arg);
249c5f1e2d1SSumanth Korikkar out_notifier:
250001002e7SSumanth Korikkar mem_hotplug_done();
251a08a2ae3SOscar Salvador return ret;
2528736cc2dSOscar Salvador }
2538736cc2dSOscar Salvador
254001002e7SSumanth Korikkar /*
255001002e7SSumanth Korikkar * Must acquire mem_hotplug_lock in write mode.
256001002e7SSumanth Korikkar */
memory_block_offline(struct memory_block * mem)2578736cc2dSOscar Salvador static int memory_block_offline(struct memory_block *mem)
2588736cc2dSOscar Salvador {
2598736cc2dSOscar Salvador unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
2608736cc2dSOscar Salvador unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
2611a8c64e1SAneesh Kumar K.V unsigned long nr_vmemmap_pages = 0;
262c5f1e2d1SSumanth Korikkar struct memory_notify arg;
263a08a2ae3SOscar Salvador int ret;
2648736cc2dSOscar Salvador
265395f6081SDavid Hildenbrand if (!mem->zone)
266395f6081SDavid Hildenbrand return -EINVAL;
267395f6081SDavid Hildenbrand
268a08a2ae3SOscar Salvador /*
269a08a2ae3SOscar Salvador * Unaccount before offlining, such that unpopulated zone and kthreads
270a08a2ae3SOscar Salvador * can properly be torn down in offline_pages().
271a08a2ae3SOscar Salvador */
2721a8c64e1SAneesh Kumar K.V if (mem->altmap)
2731a8c64e1SAneesh Kumar K.V nr_vmemmap_pages = mem->altmap->free;
2741a8c64e1SAneesh Kumar K.V
275001002e7SSumanth Korikkar mem_hotplug_begin();
2764b097002SDavid Hildenbrand if (nr_vmemmap_pages)
277836809ecSDavid Hildenbrand adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
2784b097002SDavid Hildenbrand -nr_vmemmap_pages);
279a08a2ae3SOscar Salvador
280a08a2ae3SOscar Salvador ret = offline_pages(start_pfn + nr_vmemmap_pages,
281395f6081SDavid Hildenbrand nr_pages - nr_vmemmap_pages, mem->zone, mem->group);
282a08a2ae3SOscar Salvador if (ret) {
283a08a2ae3SOscar Salvador /* offline_pages() failed. Account back. */
284a08a2ae3SOscar Salvador if (nr_vmemmap_pages)
2854b097002SDavid Hildenbrand adjust_present_page_count(pfn_to_page(start_pfn),
286836809ecSDavid Hildenbrand mem->group, nr_vmemmap_pages);
287001002e7SSumanth Korikkar goto out;
288a08a2ae3SOscar Salvador }
289a08a2ae3SOscar Salvador
290a08a2ae3SOscar Salvador if (nr_vmemmap_pages)
291a08a2ae3SOscar Salvador mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
292a08a2ae3SOscar Salvador
293395f6081SDavid Hildenbrand mem->zone = NULL;
294c5f1e2d1SSumanth Korikkar arg.altmap_start_pfn = start_pfn;
295c5f1e2d1SSumanth Korikkar arg.altmap_nr_pages = nr_vmemmap_pages;
296c5f1e2d1SSumanth Korikkar arg.start_pfn = start_pfn + nr_vmemmap_pages;
297c5f1e2d1SSumanth Korikkar arg.nr_pages = nr_pages - nr_vmemmap_pages;
298c5f1e2d1SSumanth Korikkar memory_notify(MEM_FINISH_OFFLINE, &arg);
299001002e7SSumanth Korikkar out:
300001002e7SSumanth Korikkar mem_hotplug_done();
301a08a2ae3SOscar Salvador return ret;
3028736cc2dSOscar Salvador }
3038736cc2dSOscar Salvador
3043947be19SDave Hansen /*
3053947be19SDave Hansen * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
3063947be19SDave Hansen * OK to have direct references to sparsemem variables in here.
3073947be19SDave Hansen */
30854f23eb7SNathan Fontenot static int
memory_block_action(struct memory_block * mem,unsigned long action)3098736cc2dSOscar Salvador memory_block_action(struct memory_block *mem, unsigned long action)
31054f23eb7SNathan Fontenot {
31154f23eb7SNathan Fontenot int ret;
31254f23eb7SNathan Fontenot
3133947be19SDave Hansen switch (action) {
3143947be19SDave Hansen case MEM_ONLINE:
3158736cc2dSOscar Salvador ret = memory_block_online(mem);
3163947be19SDave Hansen break;
3173947be19SDave Hansen case MEM_OFFLINE:
3188736cc2dSOscar Salvador ret = memory_block_offline(mem);
3193947be19SDave Hansen break;
3203947be19SDave Hansen default:
3210c2c99b1SNathan Fontenot WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
3228736cc2dSOscar Salvador "%ld\n", __func__, mem->start_section_nr, action, action);
3233947be19SDave Hansen ret = -EINVAL;
3243947be19SDave Hansen }
3253947be19SDave Hansen
3263947be19SDave Hansen return ret;
3273947be19SDave Hansen }
3283947be19SDave Hansen
memory_block_change_state(struct memory_block * mem,unsigned long to_state,unsigned long from_state_req)329dc18d706SNathan Fontenot static int memory_block_change_state(struct memory_block *mem,
330fa2be40fSSeth Jennings unsigned long to_state, unsigned long from_state_req)
3313947be19SDave Hansen {
332de0ed36aSGreg Kroah-Hartman int ret = 0;
3330c2c99b1SNathan Fontenot
3344960e05eSRafael J. Wysocki if (mem->state != from_state_req)
3354960e05eSRafael J. Wysocki return -EINVAL;
3363947be19SDave Hansen
3370c2c99b1SNathan Fontenot if (to_state == MEM_OFFLINE)
3380c2c99b1SNathan Fontenot mem->state = MEM_GOING_OFFLINE;
3390c2c99b1SNathan Fontenot
3408736cc2dSOscar Salvador ret = memory_block_action(mem, to_state);
341b2c064b2SRafael J. Wysocki mem->state = ret ? from_state_req : to_state;
342fa2be40fSSeth Jennings
3434960e05eSRafael J. Wysocki return ret;
344f5138e42SMichael Holzheu }
3453947be19SDave Hansen
346fa2be40fSSeth Jennings /* The device lock serializes operations on memory_subsys_[online|offline] */
memory_subsys_online(struct device * dev)3474960e05eSRafael J. Wysocki static int memory_subsys_online(struct device *dev)
3484960e05eSRafael J. Wysocki {
3497315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
3504960e05eSRafael J. Wysocki int ret;
3514960e05eSRafael J. Wysocki
352fa2be40fSSeth Jennings if (mem->state == MEM_ONLINE)
353fa2be40fSSeth Jennings return 0;
3544960e05eSRafael J. Wysocki
355fa2be40fSSeth Jennings /*
356efc978adSDavid Hildenbrand * When called via device_online() without configuring the online_type,
357efc978adSDavid Hildenbrand * we want to default to MMOP_ONLINE.
358fa2be40fSSeth Jennings */
359efc978adSDavid Hildenbrand if (mem->online_type == MMOP_OFFLINE)
360956f8b44SDavid Hildenbrand mem->online_type = MMOP_ONLINE;
3614960e05eSRafael J. Wysocki
362fa2be40fSSeth Jennings ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE);
363efc978adSDavid Hildenbrand mem->online_type = MMOP_OFFLINE;
364fa2be40fSSeth Jennings
3654960e05eSRafael J. Wysocki return ret;
3664960e05eSRafael J. Wysocki }
3674960e05eSRafael J. Wysocki
memory_subsys_offline(struct device * dev)3684960e05eSRafael J. Wysocki static int memory_subsys_offline(struct device *dev)
3694960e05eSRafael J. Wysocki {
3707315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
3714960e05eSRafael J. Wysocki
372fa2be40fSSeth Jennings if (mem->state == MEM_OFFLINE)
373fa2be40fSSeth Jennings return 0;
3744960e05eSRafael J. Wysocki
375fa2be40fSSeth Jennings return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE);
3764960e05eSRafael J. Wysocki }
3774960e05eSRafael J. Wysocki
state_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)3783f8e9178SDavid Hildenbrand static ssize_t state_store(struct device *dev, struct device_attribute *attr,
3793f8e9178SDavid Hildenbrand const char *buf, size_t count)
3803947be19SDave Hansen {
3811adf8b46SAnshuman Khandual const int online_type = mhp_online_type_from_str(buf);
3827315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
3834dc8207bSDavid Hildenbrand int ret;
3844dc8207bSDavid Hildenbrand
3854dc8207bSDavid Hildenbrand if (online_type < 0)
3864dc8207bSDavid Hildenbrand return -EINVAL;
3873947be19SDave Hansen
3885e33bc41SRafael J. Wysocki ret = lock_device_hotplug_sysfs();
3895e33bc41SRafael J. Wysocki if (ret)
3905e33bc41SRafael J. Wysocki return ret;
3914960e05eSRafael J. Wysocki
392fa2be40fSSeth Jennings switch (online_type) {
3934f7c6b49STang Chen case MMOP_ONLINE_KERNEL:
3944f7c6b49STang Chen case MMOP_ONLINE_MOVABLE:
395956f8b44SDavid Hildenbrand case MMOP_ONLINE:
396381eab4aSDavid Hildenbrand /* mem->online_type is protected by device_hotplug_lock */
397fa2be40fSSeth Jennings mem->online_type = online_type;
398fa2be40fSSeth Jennings ret = device_online(&mem->dev);
399fa2be40fSSeth Jennings break;
4004f7c6b49STang Chen case MMOP_OFFLINE:
401fa2be40fSSeth Jennings ret = device_offline(&mem->dev);
402fa2be40fSSeth Jennings break;
403fa2be40fSSeth Jennings default:
404fa2be40fSSeth Jennings ret = -EINVAL; /* should never happen */
4054960e05eSRafael J. Wysocki }
4064960e05eSRafael J. Wysocki
4074960e05eSRafael J. Wysocki unlock_device_hotplug();
4080c2c99b1SNathan Fontenot
409d66ba15bSReza Arbab if (ret < 0)
4103947be19SDave Hansen return ret;
411d66ba15bSReza Arbab if (ret)
412d66ba15bSReza Arbab return -EINVAL;
413d66ba15bSReza Arbab
4143947be19SDave Hansen return count;
4153947be19SDave Hansen }
4163947be19SDave Hansen
4173947be19SDave Hansen /*
418e9a2e48eSDavid Hildenbrand * Legacy interface that we cannot remove: s390x exposes the storage increment
419e9a2e48eSDavid Hildenbrand * covered by a memory block, allowing for identifying which memory blocks
420e9a2e48eSDavid Hildenbrand * comprise a storage increment. Since a memory block spans complete
421e9a2e48eSDavid Hildenbrand * storage increments nowadays, this interface is basically unused. Other
422e9a2e48eSDavid Hildenbrand * archs never exposed != 0.
4233947be19SDave Hansen */
phys_device_show(struct device * dev,struct device_attribute * attr,char * buf)4243f8e9178SDavid Hildenbrand static ssize_t phys_device_show(struct device *dev,
42510fbcf4cSKay Sievers struct device_attribute *attr, char *buf)
4263947be19SDave Hansen {
4277315f0ccSGu Zheng struct memory_block *mem = to_memory_block(dev);
428e9a2e48eSDavid Hildenbrand unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
429948b3edbSJoe Perches
430e9a2e48eSDavid Hildenbrand return sysfs_emit(buf, "%d\n",
431e9a2e48eSDavid Hildenbrand arch_get_memory_phys_device(start_pfn));
4323947be19SDave Hansen }
4333947be19SDave Hansen
434ed2f2400SZhang Zhen #ifdef CONFIG_MEMORY_HOTREMOVE
print_allowed_zone(char * buf,int len,int nid,struct memory_group * group,unsigned long start_pfn,unsigned long nr_pages,int online_type,struct zone * default_zone)435973c3911SJoe Perches static int print_allowed_zone(char *buf, int len, int nid,
436445fcf7cSDavid Hildenbrand struct memory_group *group,
437973c3911SJoe Perches unsigned long start_pfn, unsigned long nr_pages,
438973c3911SJoe Perches int online_type, struct zone *default_zone)
439e5e68930SMichal Hocko {
440e5e68930SMichal Hocko struct zone *zone;
441e5e68930SMichal Hocko
442445fcf7cSDavid Hildenbrand zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
443973c3911SJoe Perches if (zone == default_zone)
444973c3911SJoe Perches return 0;
445948b3edbSJoe Perches
446973c3911SJoe Perches return sysfs_emit_at(buf, len, " %s", zone->name);
447e5e68930SMichal Hocko }
448e5e68930SMichal Hocko
valid_zones_show(struct device * dev,struct device_attribute * attr,char * buf)4493f8e9178SDavid Hildenbrand static ssize_t valid_zones_show(struct device *dev,
450ed2f2400SZhang Zhen struct device_attribute *attr, char *buf)
451ed2f2400SZhang Zhen {
452ed2f2400SZhang Zhen struct memory_block *mem = to_memory_block(dev);
453f1dd2cd1SMichal Hocko unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
454ed2f2400SZhang Zhen unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
455445fcf7cSDavid Hildenbrand struct memory_group *group = mem->group;
456e5e68930SMichal Hocko struct zone *default_zone;
457445fcf7cSDavid Hildenbrand int nid = mem->nid;
458100bc3b8SShiyang Ruan int len;
459ed2f2400SZhang Zhen
460f1dd2cd1SMichal Hocko /*
461f1dd2cd1SMichal Hocko * Check the existing zone. Make sure that we do that only on the
462f1dd2cd1SMichal Hocko * online nodes otherwise the page_zone is not reliable
463f1dd2cd1SMichal Hocko */
464f1dd2cd1SMichal Hocko if (mem->state == MEM_ONLINE) {
4654e8346d0SMikhail Zaslonko /*
466395f6081SDavid Hildenbrand * If !mem->zone, the memory block spans multiple zones and
467395f6081SDavid Hildenbrand * cannot get offlined.
4684e8346d0SMikhail Zaslonko */
469100bc3b8SShiyang Ruan return sysfs_emit(buf, "%s\n",
470100bc3b8SShiyang Ruan mem->zone ? mem->zone->name : "none");
471ed2f2400SZhang Zhen }
472ed2f2400SZhang Zhen
473445fcf7cSDavid Hildenbrand default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
474445fcf7cSDavid Hildenbrand start_pfn, nr_pages);
475ed2f2400SZhang Zhen
476100bc3b8SShiyang Ruan len = sysfs_emit(buf, "%s", default_zone->name);
477445fcf7cSDavid Hildenbrand len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
478973c3911SJoe Perches MMOP_ONLINE_KERNEL, default_zone);
479445fcf7cSDavid Hildenbrand len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
480973c3911SJoe Perches MMOP_ONLINE_MOVABLE, default_zone);
481948b3edbSJoe Perches len += sysfs_emit_at(buf, len, "\n");
482973c3911SJoe Perches return len;
483ed2f2400SZhang Zhen }
4843f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(valid_zones);
485ed2f2400SZhang Zhen #endif
486ed2f2400SZhang Zhen
4873f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(phys_index);
4883f8e9178SDavid Hildenbrand static DEVICE_ATTR_RW(state);
4893f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(phys_device);
4903f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(removable);
4913947be19SDave Hansen
4923947be19SDave Hansen /*
493f915fb7fSDavid Hildenbrand * Show the memory block size (shared by all memory blocks).
4943947be19SDave Hansen */
block_size_bytes_show(struct device * dev,struct device_attribute * attr,char * buf)4953f8e9178SDavid Hildenbrand static ssize_t block_size_bytes_show(struct device *dev,
4963f8e9178SDavid Hildenbrand struct device_attribute *attr, char *buf)
4973947be19SDave Hansen {
498aa838896SJoe Perches return sysfs_emit(buf, "%lx\n", memory_block_size_bytes());
4993947be19SDave Hansen }
5003947be19SDave Hansen
5013f8e9178SDavid Hildenbrand static DEVICE_ATTR_RO(block_size_bytes);
5023947be19SDave Hansen
5033947be19SDave Hansen /*
50431bc3858SVitaly Kuznetsov * Memory auto online policy.
50531bc3858SVitaly Kuznetsov */
50631bc3858SVitaly Kuznetsov
auto_online_blocks_show(struct device * dev,struct device_attribute * attr,char * buf)5073f8e9178SDavid Hildenbrand static ssize_t auto_online_blocks_show(struct device *dev,
5083f8e9178SDavid Hildenbrand struct device_attribute *attr, char *buf)
50931bc3858SVitaly Kuznetsov {
510aa838896SJoe Perches return sysfs_emit(buf, "%s\n",
51144d46b76SGregory Price online_type_to_str[mhp_get_default_online_type()]);
51231bc3858SVitaly Kuznetsov }
51331bc3858SVitaly Kuznetsov
auto_online_blocks_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5143f8e9178SDavid Hildenbrand static ssize_t auto_online_blocks_store(struct device *dev,
5153f8e9178SDavid Hildenbrand struct device_attribute *attr,
51631bc3858SVitaly Kuznetsov const char *buf, size_t count)
51731bc3858SVitaly Kuznetsov {
5181adf8b46SAnshuman Khandual const int online_type = mhp_online_type_from_str(buf);
5195f47adf7SDavid Hildenbrand
5205f47adf7SDavid Hildenbrand if (online_type < 0)
52131bc3858SVitaly Kuznetsov return -EINVAL;
52231bc3858SVitaly Kuznetsov
52344d46b76SGregory Price mhp_set_default_online_type(online_type);
52431bc3858SVitaly Kuznetsov return count;
52531bc3858SVitaly Kuznetsov }
52631bc3858SVitaly Kuznetsov
5273f8e9178SDavid Hildenbrand static DEVICE_ATTR_RW(auto_online_blocks);
52831bc3858SVitaly Kuznetsov
52988a6f899SEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG
53088a6f899SEric DeVolder #include <linux/kexec.h>
crash_hotplug_show(struct device * dev,struct device_attribute * attr,char * buf)53188a6f899SEric DeVolder static ssize_t crash_hotplug_show(struct device *dev,
53288a6f899SEric DeVolder struct device_attribute *attr, char *buf)
53388a6f899SEric DeVolder {
53479365026SSourabh Jain return sysfs_emit(buf, "%d\n", crash_check_hotplug_support());
53588a6f899SEric DeVolder }
53688a6f899SEric DeVolder static DEVICE_ATTR_RO(crash_hotplug);
53788a6f899SEric DeVolder #endif
53888a6f899SEric DeVolder
53931bc3858SVitaly Kuznetsov /*
5403947be19SDave Hansen * Some architectures will have custom drivers to do this, and
5413947be19SDave Hansen * will not need to do it from userspace. The fake hot-add code
5423947be19SDave Hansen * as well as ppc64 will do all of their discovery in userspace
5433947be19SDave Hansen * and will require this interface.
5443947be19SDave Hansen */
5453947be19SDave Hansen #ifdef CONFIG_ARCH_MEMORY_PROBE
probe_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5463f8e9178SDavid Hildenbrand static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
54728812fe1SAndi Kleen const char *buf, size_t count)
5483947be19SDave Hansen {
5493947be19SDave Hansen u64 phys_addr;
550cb5490a5SJohn Allen int nid, ret;
55161b94feaSAnton Blanchard unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block;
5523947be19SDave Hansen
553b69deb2bSZhang Zhen ret = kstrtoull(buf, 0, &phys_addr);
554b69deb2bSZhang Zhen if (ret)
555b69deb2bSZhang Zhen return ret;
5563947be19SDave Hansen
55761b94feaSAnton Blanchard if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1))
55861b94feaSAnton Blanchard return -EINVAL;
55961b94feaSAnton Blanchard
5608df1d0e4SDavid Hildenbrand ret = lock_device_hotplug_sysfs();
5618df1d0e4SDavid Hildenbrand if (ret)
56237803841Szhong jiang return ret;
5638df1d0e4SDavid Hildenbrand
564bc02af93SYasunori Goto nid = memory_add_physaddr_to_nid(phys_addr);
5658df1d0e4SDavid Hildenbrand ret = __add_memory(nid, phys_addr,
566b6117199SDavid Hildenbrand MIN_MEMORY_BLOCK_SIZE * sections_per_block,
567b6117199SDavid Hildenbrand MHP_NONE);
568cb5490a5SJohn Allen
5696add7cd6SNathan Fontenot if (ret)
5709f0af69bSNikanth Karthikesan goto out;
5716add7cd6SNathan Fontenot
5729f0af69bSNikanth Karthikesan ret = count;
5739f0af69bSNikanth Karthikesan out:
5748df1d0e4SDavid Hildenbrand unlock_device_hotplug();
5759f0af69bSNikanth Karthikesan return ret;
5763947be19SDave Hansen }
5773947be19SDave Hansen
5783f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(probe);
5793947be19SDave Hansen #endif
5803947be19SDave Hansen
581facb6011SAndi Kleen #ifdef CONFIG_MEMORY_FAILURE
582facb6011SAndi Kleen /*
583facb6011SAndi Kleen * Support for offlining pages of memory
584facb6011SAndi Kleen */
585facb6011SAndi Kleen
586facb6011SAndi Kleen /* Soft offline a page */
soft_offline_page_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)5873f8e9178SDavid Hildenbrand static ssize_t soft_offline_page_store(struct device *dev,
58810fbcf4cSKay Sievers struct device_attribute *attr,
58928812fe1SAndi Kleen const char *buf, size_t count)
590facb6011SAndi Kleen {
591facb6011SAndi Kleen int ret;
592facb6011SAndi Kleen u64 pfn;
593facb6011SAndi Kleen if (!capable(CAP_SYS_ADMIN))
594facb6011SAndi Kleen return -EPERM;
59534da5e67SJingoo Han if (kstrtoull(buf, 0, &pfn) < 0)
596facb6011SAndi Kleen return -EINVAL;
597facb6011SAndi Kleen pfn >>= PAGE_SHIFT;
598feec24a6SNaoya Horiguchi ret = soft_offline_page(pfn, 0);
599facb6011SAndi Kleen return ret == 0 ? count : ret;
600facb6011SAndi Kleen }
601facb6011SAndi Kleen
602facb6011SAndi Kleen /* Forcibly offline a page, including killing processes. */
hard_offline_page_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)6033f8e9178SDavid Hildenbrand static ssize_t hard_offline_page_store(struct device *dev,
60410fbcf4cSKay Sievers struct device_attribute *attr,
60528812fe1SAndi Kleen const char *buf, size_t count)
606facb6011SAndi Kleen {
607facb6011SAndi Kleen int ret;
608facb6011SAndi Kleen u64 pfn;
609facb6011SAndi Kleen if (!capable(CAP_SYS_ADMIN))
610facb6011SAndi Kleen return -EPERM;
61134da5e67SJingoo Han if (kstrtoull(buf, 0, &pfn) < 0)
612facb6011SAndi Kleen return -EINVAL;
613facb6011SAndi Kleen pfn >>= PAGE_SHIFT;
61467f22ba7Szhenwei pi ret = memory_failure(pfn, MF_SW_SIMULATED);
615d1fe111fSluofei if (ret == -EOPNOTSUPP)
616d1fe111fSluofei ret = 0;
617facb6011SAndi Kleen return ret ? ret : count;
618facb6011SAndi Kleen }
619facb6011SAndi Kleen
6203f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(soft_offline_page);
6213f8e9178SDavid Hildenbrand static DEVICE_ATTR_WO(hard_offline_page);
622facb6011SAndi Kleen #endif
623facb6011SAndi Kleen
624e9a2e48eSDavid Hildenbrand /* See phys_device_show(). */
arch_get_memory_phys_device(unsigned long start_pfn)625bc32df00SHeiko Carstens int __weak arch_get_memory_phys_device(unsigned long start_pfn)
626bc32df00SHeiko Carstens {
627bc32df00SHeiko Carstens return 0;
628bc32df00SHeiko Carstens }
6293947be19SDave Hansen
6304fb6eabfSScott Cheloha /*
6314fb6eabfSScott Cheloha * A reference for the returned memory block device is acquired.
6324fb6eabfSScott Cheloha *
6334fb6eabfSScott Cheloha * Called under device_hotplug_lock.
6344fb6eabfSScott Cheloha */
find_memory_block_by_id(unsigned long block_id)635dd625285SDavid Hildenbrand static struct memory_block *find_memory_block_by_id(unsigned long block_id)
63698383031SRobin Holt {
6374fb6eabfSScott Cheloha struct memory_block *mem;
63898383031SRobin Holt
6394fb6eabfSScott Cheloha mem = xa_load(&memory_blocks, block_id);
6404fb6eabfSScott Cheloha if (mem)
6414fb6eabfSScott Cheloha get_device(&mem->dev);
6424fb6eabfSScott Cheloha return mem;
643db051a0dSDavid Hildenbrand }
644db051a0dSDavid Hildenbrand
6453947be19SDave Hansen /*
6464fb6eabfSScott Cheloha * Called under device_hotplug_lock.
6473947be19SDave Hansen */
find_memory_block(unsigned long section_nr)648fc1f5e98SOhhoon Kwon struct memory_block *find_memory_block(unsigned long section_nr)
6493947be19SDave Hansen {
650fc1f5e98SOhhoon Kwon unsigned long block_id = memory_block_id(section_nr);
651dd625285SDavid Hildenbrand
652dd625285SDavid Hildenbrand return find_memory_block_by_id(block_id);
6533947be19SDave Hansen }
6543947be19SDave Hansen
65596b2c0fcSNathan Fontenot static struct attribute *memory_memblk_attrs[] = {
65696b2c0fcSNathan Fontenot &dev_attr_phys_index.attr,
65796b2c0fcSNathan Fontenot &dev_attr_state.attr,
65896b2c0fcSNathan Fontenot &dev_attr_phys_device.attr,
65996b2c0fcSNathan Fontenot &dev_attr_removable.attr,
660ed2f2400SZhang Zhen #ifdef CONFIG_MEMORY_HOTREMOVE
661ed2f2400SZhang Zhen &dev_attr_valid_zones.attr,
662ed2f2400SZhang Zhen #endif
66396b2c0fcSNathan Fontenot NULL
66496b2c0fcSNathan Fontenot };
66596b2c0fcSNathan Fontenot
6665a576764SRikard Falkeborn static const struct attribute_group memory_memblk_attr_group = {
66796b2c0fcSNathan Fontenot .attrs = memory_memblk_attrs,
66896b2c0fcSNathan Fontenot };
66996b2c0fcSNathan Fontenot
67096b2c0fcSNathan Fontenot static const struct attribute_group *memory_memblk_attr_groups[] = {
67196b2c0fcSNathan Fontenot &memory_memblk_attr_group,
67296b2c0fcSNathan Fontenot NULL,
67396b2c0fcSNathan Fontenot };
67496b2c0fcSNathan Fontenot
__add_memory_block(struct memory_block * memory)6752aa065f7SDavid Hildenbrand static int __add_memory_block(struct memory_block *memory)
67696b2c0fcSNathan Fontenot {
677085aa2deSArvind Yadav int ret;
678085aa2deSArvind Yadav
67996b2c0fcSNathan Fontenot memory->dev.bus = &memory_subsys;
68096b2c0fcSNathan Fontenot memory->dev.id = memory->start_section_nr / sections_per_block;
68196b2c0fcSNathan Fontenot memory->dev.release = memory_block_release;
68296b2c0fcSNathan Fontenot memory->dev.groups = memory_memblk_attr_groups;
683f991fae5SLinus Torvalds memory->dev.offline = memory->state == MEM_OFFLINE;
68496b2c0fcSNathan Fontenot
685085aa2deSArvind Yadav ret = device_register(&memory->dev);
6864fb6eabfSScott Cheloha if (ret) {
687085aa2deSArvind Yadav put_device(&memory->dev);
6884fb6eabfSScott Cheloha return ret;
6894fb6eabfSScott Cheloha }
6904fb6eabfSScott Cheloha ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory,
6914fb6eabfSScott Cheloha GFP_KERNEL));
692f47f758cSChristophe JAILLET if (ret)
6934fb6eabfSScott Cheloha device_unregister(&memory->dev);
694f47f758cSChristophe JAILLET
695085aa2deSArvind Yadav return ret;
69696b2c0fcSNathan Fontenot }
69796b2c0fcSNathan Fontenot
early_node_zone_for_memory_block(struct memory_block * mem,int nid)698395f6081SDavid Hildenbrand static struct zone *early_node_zone_for_memory_block(struct memory_block *mem,
699395f6081SDavid Hildenbrand int nid)
700395f6081SDavid Hildenbrand {
701395f6081SDavid Hildenbrand const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
702395f6081SDavid Hildenbrand const unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
703395f6081SDavid Hildenbrand struct zone *zone, *matching_zone = NULL;
704395f6081SDavid Hildenbrand pg_data_t *pgdat = NODE_DATA(nid);
705395f6081SDavid Hildenbrand int i;
706395f6081SDavid Hildenbrand
707395f6081SDavid Hildenbrand /*
708395f6081SDavid Hildenbrand * This logic only works for early memory, when the applicable zones
709395f6081SDavid Hildenbrand * already span the memory block. We don't expect overlapping zones on
710395f6081SDavid Hildenbrand * a single node for early memory. So if we're told that some PFNs
711395f6081SDavid Hildenbrand * of a node fall into this memory block, we can assume that all node
712395f6081SDavid Hildenbrand * zones that intersect with the memory block are actually applicable.
713395f6081SDavid Hildenbrand * No need to look at the memmap.
714395f6081SDavid Hildenbrand */
715395f6081SDavid Hildenbrand for (i = 0; i < MAX_NR_ZONES; i++) {
716395f6081SDavid Hildenbrand zone = pgdat->node_zones + i;
717395f6081SDavid Hildenbrand if (!populated_zone(zone))
718395f6081SDavid Hildenbrand continue;
719395f6081SDavid Hildenbrand if (!zone_intersects(zone, start_pfn, nr_pages))
720395f6081SDavid Hildenbrand continue;
721395f6081SDavid Hildenbrand if (!matching_zone) {
722395f6081SDavid Hildenbrand matching_zone = zone;
723395f6081SDavid Hildenbrand continue;
724395f6081SDavid Hildenbrand }
725395f6081SDavid Hildenbrand /* Spans multiple zones ... */
726395f6081SDavid Hildenbrand matching_zone = NULL;
727395f6081SDavid Hildenbrand break;
728395f6081SDavid Hildenbrand }
729395f6081SDavid Hildenbrand return matching_zone;
730395f6081SDavid Hildenbrand }
731395f6081SDavid Hildenbrand
732395f6081SDavid Hildenbrand #ifdef CONFIG_NUMA
733395f6081SDavid Hildenbrand /**
734395f6081SDavid Hildenbrand * memory_block_add_nid() - Indicate that system RAM falling into this memory
735395f6081SDavid Hildenbrand * block device (partially) belongs to the given node.
736395f6081SDavid Hildenbrand * @mem: The memory block device.
737395f6081SDavid Hildenbrand * @nid: The node id.
738395f6081SDavid Hildenbrand * @context: The memory initialization context.
739395f6081SDavid Hildenbrand *
740395f6081SDavid Hildenbrand * Indicate that system RAM falling into this memory block (partially) belongs
741395f6081SDavid Hildenbrand * to the given node. If the context indicates ("early") that we are adding the
742395f6081SDavid Hildenbrand * node during node device subsystem initialization, this will also properly
743395f6081SDavid Hildenbrand * set/adjust mem->zone based on the zone ranges of the given node.
744395f6081SDavid Hildenbrand */
memory_block_add_nid(struct memory_block * mem,int nid,enum meminit_context context)745395f6081SDavid Hildenbrand void memory_block_add_nid(struct memory_block *mem, int nid,
746395f6081SDavid Hildenbrand enum meminit_context context)
747395f6081SDavid Hildenbrand {
748395f6081SDavid Hildenbrand if (context == MEMINIT_EARLY && mem->nid != nid) {
749395f6081SDavid Hildenbrand /*
750395f6081SDavid Hildenbrand * For early memory we have to determine the zone when setting
751395f6081SDavid Hildenbrand * the node id and handle multiple nodes spanning a single
752395f6081SDavid Hildenbrand * memory block by indicate via zone == NULL that we're not
753395f6081SDavid Hildenbrand * dealing with a single zone. So if we're setting the node id
754395f6081SDavid Hildenbrand * the first time, determine if there is a single zone. If we're
755395f6081SDavid Hildenbrand * setting the node id a second time to a different node,
756395f6081SDavid Hildenbrand * invalidate the single detected zone.
757395f6081SDavid Hildenbrand */
758395f6081SDavid Hildenbrand if (mem->nid == NUMA_NO_NODE)
759395f6081SDavid Hildenbrand mem->zone = early_node_zone_for_memory_block(mem, nid);
760395f6081SDavid Hildenbrand else
761395f6081SDavid Hildenbrand mem->zone = NULL;
762395f6081SDavid Hildenbrand }
763395f6081SDavid Hildenbrand
764395f6081SDavid Hildenbrand /*
765395f6081SDavid Hildenbrand * If this memory block spans multiple nodes, we only indicate
766395f6081SDavid Hildenbrand * the last processed node. If we span multiple nodes (not applicable
767395f6081SDavid Hildenbrand * to hotplugged memory), zone == NULL will prohibit memory offlining
768395f6081SDavid Hildenbrand * and consequently unplug.
769395f6081SDavid Hildenbrand */
770395f6081SDavid Hildenbrand mem->nid = nid;
771395f6081SDavid Hildenbrand }
772395f6081SDavid Hildenbrand #endif
773395f6081SDavid Hildenbrand
add_memory_block(unsigned long block_id,unsigned long state,struct vmem_altmap * altmap,struct memory_group * group)7742aa065f7SDavid Hildenbrand static int add_memory_block(unsigned long block_id, unsigned long state,
7751a8c64e1SAneesh Kumar K.V struct vmem_altmap *altmap,
776028fc57aSDavid Hildenbrand struct memory_group *group)
777e4619c85SNathan Fontenot {
7780c2c99b1SNathan Fontenot struct memory_block *mem;
779e4619c85SNathan Fontenot int ret = 0;
780e4619c85SNathan Fontenot
781dd625285SDavid Hildenbrand mem = find_memory_block_by_id(block_id);
782db051a0dSDavid Hildenbrand if (mem) {
783db051a0dSDavid Hildenbrand put_device(&mem->dev);
784db051a0dSDavid Hildenbrand return -EEXIST;
785db051a0dSDavid Hildenbrand }
7860c2c99b1SNathan Fontenot mem = kzalloc(sizeof(*mem), GFP_KERNEL);
787e4619c85SNathan Fontenot if (!mem)
788e4619c85SNathan Fontenot return -ENOMEM;
789e4619c85SNathan Fontenot
79018115825SDavid Hildenbrand mem->start_section_nr = block_id * sections_per_block;
791e4619c85SNathan Fontenot mem->state = state;
792d84f2f5aSDavid Hildenbrand mem->nid = NUMA_NO_NODE;
7931a8c64e1SAneesh Kumar K.V mem->altmap = altmap;
794028fc57aSDavid Hildenbrand INIT_LIST_HEAD(&mem->group_next);
795028fc57aSDavid Hildenbrand
796395f6081SDavid Hildenbrand #ifndef CONFIG_NUMA
797395f6081SDavid Hildenbrand if (state == MEM_ONLINE)
798395f6081SDavid Hildenbrand /*
799395f6081SDavid Hildenbrand * MEM_ONLINE at this point implies early memory. With NUMA,
800395f6081SDavid Hildenbrand * we'll determine the zone when setting the node id via
801395f6081SDavid Hildenbrand * memory_block_add_nid(). Memory hotplug updated the zone
802395f6081SDavid Hildenbrand * manually when memory onlining/offlining succeeds.
803395f6081SDavid Hildenbrand */
804395f6081SDavid Hildenbrand mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE);
805395f6081SDavid Hildenbrand #endif /* CONFIG_NUMA */
806395f6081SDavid Hildenbrand
8072aa065f7SDavid Hildenbrand ret = __add_memory_block(mem);
8087ea0d2d7SDavid Hildenbrand if (ret)
8097ea0d2d7SDavid Hildenbrand return ret;
8107ea0d2d7SDavid Hildenbrand
811028fc57aSDavid Hildenbrand if (group) {
812028fc57aSDavid Hildenbrand mem->group = group;
813028fc57aSDavid Hildenbrand list_add(&mem->group_next, &group->memory_blocks);
814028fc57aSDavid Hildenbrand }
815e4619c85SNathan Fontenot
8167ea0d2d7SDavid Hildenbrand return 0;
8170c2c99b1SNathan Fontenot }
8180c2c99b1SNathan Fontenot
add_hotplug_memory_block(unsigned long block_id,struct vmem_altmap * altmap,struct memory_group * group)8192aa065f7SDavid Hildenbrand static int add_hotplug_memory_block(unsigned long block_id,
8201a8c64e1SAneesh Kumar K.V struct vmem_altmap *altmap,
8212aa065f7SDavid Hildenbrand struct memory_group *group)
8222aa065f7SDavid Hildenbrand {
8231a8c64e1SAneesh Kumar K.V return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
8242aa065f7SDavid Hildenbrand }
8252aa065f7SDavid Hildenbrand
remove_memory_block(struct memory_block * memory)8262aa065f7SDavid Hildenbrand static void remove_memory_block(struct memory_block *memory)
8274edd7cefSDavid Rientjes {
828db051a0dSDavid Hildenbrand if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
829db051a0dSDavid Hildenbrand return;
8304edd7cefSDavid Rientjes
8314fb6eabfSScott Cheloha WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
8324fb6eabfSScott Cheloha
833028fc57aSDavid Hildenbrand if (memory->group) {
834028fc57aSDavid Hildenbrand list_del(&memory->group_next);
835028fc57aSDavid Hildenbrand memory->group = NULL;
836028fc57aSDavid Hildenbrand }
837028fc57aSDavid Hildenbrand
838cb7b3a36SDavid Hildenbrand /* drop the ref. we got via find_memory_block() */
839df2b717cSSeth Jennings put_device(&memory->dev);
8404edd7cefSDavid Rientjes device_unregister(&memory->dev);
8414edd7cefSDavid Rientjes }
8424edd7cefSDavid Rientjes
843db051a0dSDavid Hildenbrand /*
844db051a0dSDavid Hildenbrand * Create memory block devices for the given memory area. Start and size
845db051a0dSDavid Hildenbrand * have to be aligned to memory block granularity. Memory block devices
846db051a0dSDavid Hildenbrand * will be initialized as offline.
847848e19adSDavid Hildenbrand *
848848e19adSDavid Hildenbrand * Called under device_hotplug_lock.
849db051a0dSDavid Hildenbrand */
create_memory_block_devices(unsigned long start,unsigned long size,struct vmem_altmap * altmap,struct memory_group * group)850a08a2ae3SOscar Salvador int create_memory_block_devices(unsigned long start, unsigned long size,
8511a8c64e1SAneesh Kumar K.V struct vmem_altmap *altmap,
852028fc57aSDavid Hildenbrand struct memory_group *group)
853db051a0dSDavid Hildenbrand {
85490ec010fSDavid Hildenbrand const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
85590ec010fSDavid Hildenbrand unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
856db051a0dSDavid Hildenbrand struct memory_block *mem;
857db051a0dSDavid Hildenbrand unsigned long block_id;
858db051a0dSDavid Hildenbrand int ret = 0;
859db051a0dSDavid Hildenbrand
860db051a0dSDavid Hildenbrand if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
861db051a0dSDavid Hildenbrand !IS_ALIGNED(size, memory_block_size_bytes())))
862db051a0dSDavid Hildenbrand return -EINVAL;
863db051a0dSDavid Hildenbrand
864db051a0dSDavid Hildenbrand for (block_id = start_block_id; block_id != end_block_id; block_id++) {
8651a8c64e1SAneesh Kumar K.V ret = add_hotplug_memory_block(block_id, altmap, group);
866db051a0dSDavid Hildenbrand if (ret)
867db051a0dSDavid Hildenbrand break;
868db051a0dSDavid Hildenbrand }
869db051a0dSDavid Hildenbrand if (ret) {
870db051a0dSDavid Hildenbrand end_block_id = block_id;
871db051a0dSDavid Hildenbrand for (block_id = start_block_id; block_id != end_block_id;
872db051a0dSDavid Hildenbrand block_id++) {
873dd625285SDavid Hildenbrand mem = find_memory_block_by_id(block_id);
874848e19adSDavid Hildenbrand if (WARN_ON_ONCE(!mem))
875848e19adSDavid Hildenbrand continue;
8762aa065f7SDavid Hildenbrand remove_memory_block(mem);
877db051a0dSDavid Hildenbrand }
878db051a0dSDavid Hildenbrand }
879db051a0dSDavid Hildenbrand return ret;
880db051a0dSDavid Hildenbrand }
881db051a0dSDavid Hildenbrand
8824c4b7f9bSDavid Hildenbrand /*
8834c4b7f9bSDavid Hildenbrand * Remove memory block devices for the given memory area. Start and size
8844c4b7f9bSDavid Hildenbrand * have to be aligned to memory block granularity. Memory block devices
8854c4b7f9bSDavid Hildenbrand * have to be offline.
886848e19adSDavid Hildenbrand *
887848e19adSDavid Hildenbrand * Called under device_hotplug_lock.
8884c4b7f9bSDavid Hildenbrand */
remove_memory_block_devices(unsigned long start,unsigned long size)8894c4b7f9bSDavid Hildenbrand void remove_memory_block_devices(unsigned long start, unsigned long size)
8903947be19SDave Hansen {
89190ec010fSDavid Hildenbrand const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
89290ec010fSDavid Hildenbrand const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
8933947be19SDave Hansen struct memory_block *mem;
89490ec010fSDavid Hildenbrand unsigned long block_id;
8953947be19SDave Hansen
8964c4b7f9bSDavid Hildenbrand if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
8974c4b7f9bSDavid Hildenbrand !IS_ALIGNED(size, memory_block_size_bytes())))
898cb7b3a36SDavid Hildenbrand return;
899cb7b3a36SDavid Hildenbrand
9004c4b7f9bSDavid Hildenbrand for (block_id = start_block_id; block_id != end_block_id; block_id++) {
901dd625285SDavid Hildenbrand mem = find_memory_block_by_id(block_id);
9024c4b7f9bSDavid Hildenbrand if (WARN_ON_ONCE(!mem))
9034c4b7f9bSDavid Hildenbrand continue;
9045033091dSNaoya Horiguchi num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
9054c4b7f9bSDavid Hildenbrand unregister_memory_block_under_nodes(mem);
9062aa065f7SDavid Hildenbrand remove_memory_block(mem);
9074c4b7f9bSDavid Hildenbrand }
9083947be19SDave Hansen }
9093947be19SDave Hansen
91096b2c0fcSNathan Fontenot static struct attribute *memory_root_attrs[] = {
91196b2c0fcSNathan Fontenot #ifdef CONFIG_ARCH_MEMORY_PROBE
91296b2c0fcSNathan Fontenot &dev_attr_probe.attr,
91396b2c0fcSNathan Fontenot #endif
91496b2c0fcSNathan Fontenot
91596b2c0fcSNathan Fontenot #ifdef CONFIG_MEMORY_FAILURE
91696b2c0fcSNathan Fontenot &dev_attr_soft_offline_page.attr,
91796b2c0fcSNathan Fontenot &dev_attr_hard_offline_page.attr,
91896b2c0fcSNathan Fontenot #endif
91996b2c0fcSNathan Fontenot
92096b2c0fcSNathan Fontenot &dev_attr_block_size_bytes.attr,
92131bc3858SVitaly Kuznetsov &dev_attr_auto_online_blocks.attr,
92288a6f899SEric DeVolder #ifdef CONFIG_CRASH_HOTPLUG
92388a6f899SEric DeVolder &dev_attr_crash_hotplug.attr,
92488a6f899SEric DeVolder #endif
92596b2c0fcSNathan Fontenot NULL
92696b2c0fcSNathan Fontenot };
92796b2c0fcSNathan Fontenot
9285a576764SRikard Falkeborn static const struct attribute_group memory_root_attr_group = {
92996b2c0fcSNathan Fontenot .attrs = memory_root_attrs,
93096b2c0fcSNathan Fontenot };
93196b2c0fcSNathan Fontenot
93296b2c0fcSNathan Fontenot static const struct attribute_group *memory_root_attr_groups[] = {
93396b2c0fcSNathan Fontenot &memory_root_attr_group,
93496b2c0fcSNathan Fontenot NULL,
93596b2c0fcSNathan Fontenot };
93696b2c0fcSNathan Fontenot
937e90bdb7fSWen Congyang /*
938848e19adSDavid Hildenbrand * Initialize the sysfs support for memory devices. At the time this function
939848e19adSDavid Hildenbrand * is called, we cannot have concurrent creation/deletion of memory block
940848e19adSDavid Hildenbrand * devices, the device_hotplug_lock is not needed.
9413947be19SDave Hansen */
memory_dev_init(void)942902ce63bSDavid Hildenbrand void __init memory_dev_init(void)
9433947be19SDave Hansen {
9443947be19SDave Hansen int ret;
945*b9792abbSGavin Shan unsigned long block_sz, block_id, nr;
9463947be19SDave Hansen
947902ce63bSDavid Hildenbrand /* Validate the configured memory block size */
948902ce63bSDavid Hildenbrand block_sz = memory_block_size_bytes();
949902ce63bSDavid Hildenbrand if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE)
950902ce63bSDavid Hildenbrand panic("Memory block size not suitable: 0x%lx\n", block_sz);
951902ce63bSDavid Hildenbrand sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
952902ce63bSDavid Hildenbrand
95396b2c0fcSNathan Fontenot ret = subsys_system_register(&memory_subsys, memory_root_attr_groups);
95428ec24e2SAndrew Morton if (ret)
955848e19adSDavid Hildenbrand panic("%s() failed to register subsystem: %d\n", __func__, ret);
9563947be19SDave Hansen
9573947be19SDave Hansen /*
958*b9792abbSGavin Shan * Create entries for memory sections that were found during boot
959*b9792abbSGavin Shan * and have been initialized. Use @block_id to track the last
960*b9792abbSGavin Shan * handled block and initialize it to an invalid value (ULONG_MAX)
961*b9792abbSGavin Shan * to bypass the block ID matching check for the first present
962*b9792abbSGavin Shan * block so that it can be covered.
9633947be19SDave Hansen */
964*b9792abbSGavin Shan block_id = ULONG_MAX;
965*b9792abbSGavin Shan for_each_present_section_nr(0, nr) {
966*b9792abbSGavin Shan if (block_id != ULONG_MAX && memory_block_id(nr) == block_id)
967*b9792abbSGavin Shan continue;
968*b9792abbSGavin Shan
969*b9792abbSGavin Shan block_id = memory_block_id(nr);
970*b9792abbSGavin Shan ret = add_memory_block(block_id, MEM_ONLINE, NULL, NULL);
971*b9792abbSGavin Shan if (ret) {
972*b9792abbSGavin Shan panic("%s() failed to add memory block: %d\n",
973*b9792abbSGavin Shan __func__, ret);
974*b9792abbSGavin Shan }
975848e19adSDavid Hildenbrand }
9763947be19SDave Hansen }
977ea884641SDavid Hildenbrand
978ea884641SDavid Hildenbrand /**
979ea884641SDavid Hildenbrand * walk_memory_blocks - walk through all present memory blocks overlapped
980ea884641SDavid Hildenbrand * by the range [start, start + size)
981ea884641SDavid Hildenbrand *
982ea884641SDavid Hildenbrand * @start: start address of the memory range
983ea884641SDavid Hildenbrand * @size: size of the memory range
984ea884641SDavid Hildenbrand * @arg: argument passed to func
985ea884641SDavid Hildenbrand * @func: callback for each memory section walked
986ea884641SDavid Hildenbrand *
987ea884641SDavid Hildenbrand * This function walks through all present memory blocks overlapped by the
988ea884641SDavid Hildenbrand * range [start, start + size), calling func on each memory block.
989ea884641SDavid Hildenbrand *
990ea884641SDavid Hildenbrand * In case func() returns an error, walking is aborted and the error is
991ea884641SDavid Hildenbrand * returned.
9924fb6eabfSScott Cheloha *
9934fb6eabfSScott Cheloha * Called under device_hotplug_lock.
994ea884641SDavid Hildenbrand */
walk_memory_blocks(unsigned long start,unsigned long size,void * arg,walk_memory_blocks_func_t func)995ea884641SDavid Hildenbrand int walk_memory_blocks(unsigned long start, unsigned long size,
996ea884641SDavid Hildenbrand void *arg, walk_memory_blocks_func_t func)
997ea884641SDavid Hildenbrand {
998ea884641SDavid Hildenbrand const unsigned long start_block_id = phys_to_block_id(start);
999ea884641SDavid Hildenbrand const unsigned long end_block_id = phys_to_block_id(start + size - 1);
1000ea884641SDavid Hildenbrand struct memory_block *mem;
1001ea884641SDavid Hildenbrand unsigned long block_id;
1002ea884641SDavid Hildenbrand int ret = 0;
1003ea884641SDavid Hildenbrand
1004dd625285SDavid Hildenbrand if (!size)
1005dd625285SDavid Hildenbrand return 0;
1006dd625285SDavid Hildenbrand
1007ea884641SDavid Hildenbrand for (block_id = start_block_id; block_id <= end_block_id; block_id++) {
1008dd625285SDavid Hildenbrand mem = find_memory_block_by_id(block_id);
1009ea884641SDavid Hildenbrand if (!mem)
1010ea884641SDavid Hildenbrand continue;
1011ea884641SDavid Hildenbrand
1012ea884641SDavid Hildenbrand ret = func(mem, arg);
1013ea884641SDavid Hildenbrand put_device(&mem->dev);
1014ea884641SDavid Hildenbrand if (ret)
1015ea884641SDavid Hildenbrand break;
1016ea884641SDavid Hildenbrand }
1017ea884641SDavid Hildenbrand return ret;
1018ea884641SDavid Hildenbrand }
10192c91f8fcSDavid Hildenbrand
10202c91f8fcSDavid Hildenbrand struct for_each_memory_block_cb_data {
10212c91f8fcSDavid Hildenbrand walk_memory_blocks_func_t func;
10222c91f8fcSDavid Hildenbrand void *arg;
10232c91f8fcSDavid Hildenbrand };
10242c91f8fcSDavid Hildenbrand
for_each_memory_block_cb(struct device * dev,void * data)10252c91f8fcSDavid Hildenbrand static int for_each_memory_block_cb(struct device *dev, void *data)
10262c91f8fcSDavid Hildenbrand {
10272c91f8fcSDavid Hildenbrand struct memory_block *mem = to_memory_block(dev);
10282c91f8fcSDavid Hildenbrand struct for_each_memory_block_cb_data *cb_data = data;
10292c91f8fcSDavid Hildenbrand
10302c91f8fcSDavid Hildenbrand return cb_data->func(mem, cb_data->arg);
10312c91f8fcSDavid Hildenbrand }
10322c91f8fcSDavid Hildenbrand
10332c91f8fcSDavid Hildenbrand /**
10342c91f8fcSDavid Hildenbrand * for_each_memory_block - walk through all present memory blocks
10352c91f8fcSDavid Hildenbrand *
10362c91f8fcSDavid Hildenbrand * @arg: argument passed to func
10372c91f8fcSDavid Hildenbrand * @func: callback for each memory block walked
10382c91f8fcSDavid Hildenbrand *
10392c91f8fcSDavid Hildenbrand * This function walks through all present memory blocks, calling func on
10402c91f8fcSDavid Hildenbrand * each memory block.
10412c91f8fcSDavid Hildenbrand *
10422c91f8fcSDavid Hildenbrand * In case func() returns an error, walking is aborted and the error is
10432c91f8fcSDavid Hildenbrand * returned.
10442c91f8fcSDavid Hildenbrand */
for_each_memory_block(void * arg,walk_memory_blocks_func_t func)10452c91f8fcSDavid Hildenbrand int for_each_memory_block(void *arg, walk_memory_blocks_func_t func)
10462c91f8fcSDavid Hildenbrand {
10472c91f8fcSDavid Hildenbrand struct for_each_memory_block_cb_data cb_data = {
10482c91f8fcSDavid Hildenbrand .func = func,
10492c91f8fcSDavid Hildenbrand .arg = arg,
10502c91f8fcSDavid Hildenbrand };
10512c91f8fcSDavid Hildenbrand
10522c91f8fcSDavid Hildenbrand return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
10532c91f8fcSDavid Hildenbrand for_each_memory_block_cb);
10542c91f8fcSDavid Hildenbrand }
1055028fc57aSDavid Hildenbrand
1056028fc57aSDavid Hildenbrand /*
1057028fc57aSDavid Hildenbrand * This is an internal helper to unify allocation and initialization of
1058028fc57aSDavid Hildenbrand * memory groups. Note that the passed memory group will be copied to a
1059028fc57aSDavid Hildenbrand * dynamically allocated memory group. After this call, the passed
1060028fc57aSDavid Hildenbrand * memory group should no longer be used.
1061028fc57aSDavid Hildenbrand */
memory_group_register(struct memory_group group)1062028fc57aSDavid Hildenbrand static int memory_group_register(struct memory_group group)
1063028fc57aSDavid Hildenbrand {
1064028fc57aSDavid Hildenbrand struct memory_group *new_group;
1065028fc57aSDavid Hildenbrand uint32_t mgid;
1066028fc57aSDavid Hildenbrand int ret;
1067028fc57aSDavid Hildenbrand
1068028fc57aSDavid Hildenbrand if (!node_possible(group.nid))
1069028fc57aSDavid Hildenbrand return -EINVAL;
1070028fc57aSDavid Hildenbrand
1071028fc57aSDavid Hildenbrand new_group = kzalloc(sizeof(group), GFP_KERNEL);
1072028fc57aSDavid Hildenbrand if (!new_group)
1073028fc57aSDavid Hildenbrand return -ENOMEM;
1074028fc57aSDavid Hildenbrand *new_group = group;
1075028fc57aSDavid Hildenbrand INIT_LIST_HEAD(&new_group->memory_blocks);
1076028fc57aSDavid Hildenbrand
1077028fc57aSDavid Hildenbrand ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
1078028fc57aSDavid Hildenbrand GFP_KERNEL);
1079028fc57aSDavid Hildenbrand if (ret) {
1080028fc57aSDavid Hildenbrand kfree(new_group);
1081028fc57aSDavid Hildenbrand return ret;
10823fcebf90SDavid Hildenbrand } else if (group.is_dynamic) {
10833fcebf90SDavid Hildenbrand xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
1084028fc57aSDavid Hildenbrand }
1085028fc57aSDavid Hildenbrand return mgid;
1086028fc57aSDavid Hildenbrand }
1087028fc57aSDavid Hildenbrand
1088028fc57aSDavid Hildenbrand /**
1089028fc57aSDavid Hildenbrand * memory_group_register_static() - Register a static memory group.
1090028fc57aSDavid Hildenbrand * @nid: The node id.
1091028fc57aSDavid Hildenbrand * @max_pages: The maximum number of pages we'll have in this static memory
1092028fc57aSDavid Hildenbrand * group.
1093028fc57aSDavid Hildenbrand *
1094028fc57aSDavid Hildenbrand * Register a new static memory group and return the memory group id.
1095028fc57aSDavid Hildenbrand * All memory in the group belongs to a single unit, such as a DIMM. All
1096028fc57aSDavid Hildenbrand * memory belonging to a static memory group is added in one go to be removed
1097028fc57aSDavid Hildenbrand * in one go -- it's static.
1098028fc57aSDavid Hildenbrand *
1099028fc57aSDavid Hildenbrand * Returns an error if out of memory, if the node id is invalid, if no new
1100028fc57aSDavid Hildenbrand * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
1101028fc57aSDavid Hildenbrand * returns the new memory group id.
1102028fc57aSDavid Hildenbrand */
memory_group_register_static(int nid,unsigned long max_pages)1103028fc57aSDavid Hildenbrand int memory_group_register_static(int nid, unsigned long max_pages)
1104028fc57aSDavid Hildenbrand {
1105028fc57aSDavid Hildenbrand struct memory_group group = {
1106028fc57aSDavid Hildenbrand .nid = nid,
1107028fc57aSDavid Hildenbrand .s = {
1108028fc57aSDavid Hildenbrand .max_pages = max_pages,
1109028fc57aSDavid Hildenbrand },
1110028fc57aSDavid Hildenbrand };
1111028fc57aSDavid Hildenbrand
1112028fc57aSDavid Hildenbrand if (!max_pages)
1113028fc57aSDavid Hildenbrand return -EINVAL;
1114028fc57aSDavid Hildenbrand return memory_group_register(group);
1115028fc57aSDavid Hildenbrand }
1116028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_register_static);
1117028fc57aSDavid Hildenbrand
1118028fc57aSDavid Hildenbrand /**
1119028fc57aSDavid Hildenbrand * memory_group_register_dynamic() - Register a dynamic memory group.
1120028fc57aSDavid Hildenbrand * @nid: The node id.
1121028fc57aSDavid Hildenbrand * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
1122028fc57aSDavid Hildenbrand * memory group.
1123028fc57aSDavid Hildenbrand *
1124028fc57aSDavid Hildenbrand * Register a new dynamic memory group and return the memory group id.
1125028fc57aSDavid Hildenbrand * Memory within a dynamic memory group is added/removed dynamically
1126028fc57aSDavid Hildenbrand * in unit_pages.
1127028fc57aSDavid Hildenbrand *
1128028fc57aSDavid Hildenbrand * Returns an error if out of memory, if the node id is invalid, if no new
1129028fc57aSDavid Hildenbrand * memory groups can be registered, or if unit_pages is invalid (0, not a
1130028fc57aSDavid Hildenbrand * power of two, smaller than a single memory block). Otherwise, returns the
1131028fc57aSDavid Hildenbrand * new memory group id.
1132028fc57aSDavid Hildenbrand */
memory_group_register_dynamic(int nid,unsigned long unit_pages)1133028fc57aSDavid Hildenbrand int memory_group_register_dynamic(int nid, unsigned long unit_pages)
1134028fc57aSDavid Hildenbrand {
1135028fc57aSDavid Hildenbrand struct memory_group group = {
1136028fc57aSDavid Hildenbrand .nid = nid,
1137028fc57aSDavid Hildenbrand .is_dynamic = true,
1138028fc57aSDavid Hildenbrand .d = {
1139028fc57aSDavid Hildenbrand .unit_pages = unit_pages,
1140028fc57aSDavid Hildenbrand },
1141028fc57aSDavid Hildenbrand };
1142028fc57aSDavid Hildenbrand
1143028fc57aSDavid Hildenbrand if (!unit_pages || !is_power_of_2(unit_pages) ||
1144028fc57aSDavid Hildenbrand unit_pages < PHYS_PFN(memory_block_size_bytes()))
1145028fc57aSDavid Hildenbrand return -EINVAL;
1146028fc57aSDavid Hildenbrand return memory_group_register(group);
1147028fc57aSDavid Hildenbrand }
1148028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_register_dynamic);
1149028fc57aSDavid Hildenbrand
1150028fc57aSDavid Hildenbrand /**
1151028fc57aSDavid Hildenbrand * memory_group_unregister() - Unregister a memory group.
1152028fc57aSDavid Hildenbrand * @mgid: the memory group id
1153028fc57aSDavid Hildenbrand *
1154028fc57aSDavid Hildenbrand * Unregister a memory group. If any memory block still belongs to this
1155028fc57aSDavid Hildenbrand * memory group, unregistering will fail.
1156028fc57aSDavid Hildenbrand *
1157028fc57aSDavid Hildenbrand * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
1158028fc57aSDavid Hildenbrand * memory blocks still belong to this memory group and returns 0 if
1159028fc57aSDavid Hildenbrand * unregistering succeeded.
1160028fc57aSDavid Hildenbrand */
memory_group_unregister(int mgid)1161028fc57aSDavid Hildenbrand int memory_group_unregister(int mgid)
1162028fc57aSDavid Hildenbrand {
1163028fc57aSDavid Hildenbrand struct memory_group *group;
1164028fc57aSDavid Hildenbrand
1165028fc57aSDavid Hildenbrand if (mgid < 0)
1166028fc57aSDavid Hildenbrand return -EINVAL;
1167028fc57aSDavid Hildenbrand
1168028fc57aSDavid Hildenbrand group = xa_load(&memory_groups, mgid);
1169028fc57aSDavid Hildenbrand if (!group)
1170028fc57aSDavid Hildenbrand return -EINVAL;
1171028fc57aSDavid Hildenbrand if (!list_empty(&group->memory_blocks))
1172028fc57aSDavid Hildenbrand return -EBUSY;
1173028fc57aSDavid Hildenbrand xa_erase(&memory_groups, mgid);
1174028fc57aSDavid Hildenbrand kfree(group);
1175028fc57aSDavid Hildenbrand return 0;
1176028fc57aSDavid Hildenbrand }
1177028fc57aSDavid Hildenbrand EXPORT_SYMBOL_GPL(memory_group_unregister);
1178028fc57aSDavid Hildenbrand
1179028fc57aSDavid Hildenbrand /*
1180028fc57aSDavid Hildenbrand * This is an internal helper only to be used in core memory hotplug code to
1181028fc57aSDavid Hildenbrand * lookup a memory group. We don't care about locking, as we don't expect a
1182028fc57aSDavid Hildenbrand * memory group to get unregistered while adding memory to it -- because
1183028fc57aSDavid Hildenbrand * the group and the memory is managed by the same driver.
1184028fc57aSDavid Hildenbrand */
memory_group_find_by_id(int mgid)1185028fc57aSDavid Hildenbrand struct memory_group *memory_group_find_by_id(int mgid)
1186028fc57aSDavid Hildenbrand {
1187028fc57aSDavid Hildenbrand return xa_load(&memory_groups, mgid);
1188028fc57aSDavid Hildenbrand }
11893fcebf90SDavid Hildenbrand
11903fcebf90SDavid Hildenbrand /*
11913fcebf90SDavid Hildenbrand * This is an internal helper only to be used in core memory hotplug code to
11923fcebf90SDavid Hildenbrand * walk all dynamic memory groups excluding a given memory group, either
11933fcebf90SDavid Hildenbrand * belonging to a specific node, or belonging to any node.
11943fcebf90SDavid Hildenbrand */
walk_dynamic_memory_groups(int nid,walk_memory_groups_func_t func,struct memory_group * excluded,void * arg)11953fcebf90SDavid Hildenbrand int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
11963fcebf90SDavid Hildenbrand struct memory_group *excluded, void *arg)
11973fcebf90SDavid Hildenbrand {
11983fcebf90SDavid Hildenbrand struct memory_group *group;
11993fcebf90SDavid Hildenbrand unsigned long index;
12003fcebf90SDavid Hildenbrand int ret = 0;
12013fcebf90SDavid Hildenbrand
12023fcebf90SDavid Hildenbrand xa_for_each_marked(&memory_groups, index, group,
12033fcebf90SDavid Hildenbrand MEMORY_GROUP_MARK_DYNAMIC) {
12043fcebf90SDavid Hildenbrand if (group == excluded)
12053fcebf90SDavid Hildenbrand continue;
12063fcebf90SDavid Hildenbrand #ifdef CONFIG_NUMA
12073fcebf90SDavid Hildenbrand if (nid != NUMA_NO_NODE && group->nid != nid)
12083fcebf90SDavid Hildenbrand continue;
12093fcebf90SDavid Hildenbrand #endif /* CONFIG_NUMA */
12103fcebf90SDavid Hildenbrand ret = func(group, arg);
12113fcebf90SDavid Hildenbrand if (ret)
12123fcebf90SDavid Hildenbrand break;
12133fcebf90SDavid Hildenbrand }
12143fcebf90SDavid Hildenbrand return ret;
12153fcebf90SDavid Hildenbrand }
12165033091dSNaoya Horiguchi
12175033091dSNaoya Horiguchi #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
memblk_nr_poison_inc(unsigned long pfn)12185033091dSNaoya Horiguchi void memblk_nr_poison_inc(unsigned long pfn)
12195033091dSNaoya Horiguchi {
12205033091dSNaoya Horiguchi const unsigned long block_id = pfn_to_block_id(pfn);
12215033091dSNaoya Horiguchi struct memory_block *mem = find_memory_block_by_id(block_id);
12225033091dSNaoya Horiguchi
12235033091dSNaoya Horiguchi if (mem)
12245033091dSNaoya Horiguchi atomic_long_inc(&mem->nr_hwpoison);
12255033091dSNaoya Horiguchi }
12265033091dSNaoya Horiguchi
memblk_nr_poison_sub(unsigned long pfn,long i)12275033091dSNaoya Horiguchi void memblk_nr_poison_sub(unsigned long pfn, long i)
12285033091dSNaoya Horiguchi {
12295033091dSNaoya Horiguchi const unsigned long block_id = pfn_to_block_id(pfn);
12305033091dSNaoya Horiguchi struct memory_block *mem = find_memory_block_by_id(block_id);
12315033091dSNaoya Horiguchi
12325033091dSNaoya Horiguchi if (mem)
12335033091dSNaoya Horiguchi atomic_long_sub(i, &mem->nr_hwpoison);
12345033091dSNaoya Horiguchi }
12355033091dSNaoya Horiguchi
memblk_nr_poison(struct memory_block * mem)12365033091dSNaoya Horiguchi static unsigned long memblk_nr_poison(struct memory_block *mem)
12375033091dSNaoya Horiguchi {
12385033091dSNaoya Horiguchi return atomic_long_read(&mem->nr_hwpoison);
12395033091dSNaoya Horiguchi }
12405033091dSNaoya Horiguchi #endif
1241