2019-06-03 05:44:50 +00:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2016-04-08 22:50:27 +00:00
|
|
|
/*
|
|
|
|
* NUMA support, based on the x86 implementation.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2015 Cavium Inc.
|
|
|
|
* Author: Ganapatrao Kulkarni <gkulkarni@cavium.com>
|
|
|
|
*/
|
|
|
|
|
2016-09-01 06:54:59 +00:00
|
|
|
#define pr_fmt(fmt) "NUMA: " fmt
|
|
|
|
|
2016-05-24 22:35:44 +00:00
|
|
|
#include <linux/acpi.h>
|
2016-04-08 22:50:27 +00:00
|
|
|
#include <linux/memblock.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include <linux/of.h>
|
|
|
|
|
2016-09-01 06:55:00 +00:00
|
|
|
#include <asm/sections.h>
|
2016-08-15 15:33:10 +00:00
|
|
|
|
2016-04-08 22:50:27 +00:00
|
|
|
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
|
|
|
|
EXPORT_SYMBOL(node_data);
|
|
|
|
nodemask_t numa_nodes_parsed __initdata;
|
|
|
|
static int cpu_to_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE };
|
|
|
|
|
|
|
|
static int numa_distance_cnt;
|
|
|
|
static u8 *numa_distance;
|
2016-12-13 04:18:29 +00:00
|
|
|
bool numa_off;
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
static __init int numa_parse_early_param(char *opt)
|
|
|
|
{
|
|
|
|
if (!opt)
|
|
|
|
return -EINVAL;
|
2019-07-30 02:44:15 +00:00
|
|
|
if (str_has_prefix(opt, "off"))
|
2016-05-24 22:35:37 +00:00
|
|
|
numa_off = true;
|
2016-09-01 06:54:59 +00:00
|
|
|
|
2016-04-08 22:50:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
early_param("numa", numa_parse_early_param);
|
|
|
|
|
|
|
|
cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
|
|
|
|
EXPORT_SYMBOL(node_to_cpumask_map);
|
|
|
|
|
|
|
|
#ifdef CONFIG_DEBUG_PER_CPU_MAPS
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Returns a pointer to the bitmask of CPUs on Node 'node'.
|
|
|
|
*/
|
|
|
|
const struct cpumask *cpumask_of_node(int node)
|
|
|
|
{
|
2020-09-21 02:39:36 +00:00
|
|
|
|
|
|
|
if (node == NUMA_NO_NODE)
|
|
|
|
return cpu_all_mask;
|
|
|
|
|
|
|
|
if (WARN_ON(node < 0 || node >= nr_node_ids))
|
2016-04-08 22:50:27 +00:00
|
|
|
return cpu_none_mask;
|
|
|
|
|
|
|
|
if (WARN_ON(node_to_cpumask_map[node] == NULL))
|
|
|
|
return cpu_online_mask;
|
|
|
|
|
|
|
|
return node_to_cpumask_map[node];
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(cpumask_of_node);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
2018-07-06 11:02:43 +00:00
|
|
|
static void numa_update_cpu(unsigned int cpu, bool remove)
|
2016-04-08 22:50:27 +00:00
|
|
|
{
|
2018-07-06 11:02:43 +00:00
|
|
|
int nid = cpu_to_node(cpu);
|
|
|
|
|
|
|
|
if (nid == NUMA_NO_NODE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (remove)
|
|
|
|
cpumask_clear_cpu(cpu, node_to_cpumask_map[nid]);
|
|
|
|
else
|
2016-04-08 22:50:27 +00:00
|
|
|
cpumask_set_cpu(cpu, node_to_cpumask_map[nid]);
|
|
|
|
}
|
|
|
|
|
2018-07-06 11:02:43 +00:00
|
|
|
void numa_add_cpu(unsigned int cpu)
|
2016-04-08 22:50:27 +00:00
|
|
|
{
|
2018-07-06 11:02:43 +00:00
|
|
|
numa_update_cpu(cpu, false);
|
|
|
|
}
|
2016-04-08 22:50:27 +00:00
|
|
|
|
2018-07-06 11:02:43 +00:00
|
|
|
void numa_remove_cpu(unsigned int cpu)
|
|
|
|
{
|
|
|
|
numa_update_cpu(cpu, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
void numa_clear_node(unsigned int cpu)
|
|
|
|
{
|
|
|
|
numa_remove_cpu(cpu);
|
2016-04-08 22:50:27 +00:00
|
|
|
set_cpu_numa_node(cpu, NUMA_NO_NODE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allocate node_to_cpumask_map based on number of available nodes
|
|
|
|
* Requires node_possible_map to be valid.
|
|
|
|
*
|
|
|
|
* Note: cpumask_of_node() is not valid until after this is done.
|
|
|
|
* (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
|
|
|
|
*/
|
|
|
|
static void __init setup_node_to_cpumask_map(void)
|
|
|
|
{
|
|
|
|
int node;
|
|
|
|
|
|
|
|
/* setup nr_node_ids if not done yet */
|
|
|
|
if (nr_node_ids == MAX_NUMNODES)
|
|
|
|
setup_nr_node_ids();
|
|
|
|
|
|
|
|
/* allocate and clear the mapping */
|
|
|
|
for (node = 0; node < nr_node_ids; node++) {
|
|
|
|
alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
|
|
|
|
cpumask_clear(node_to_cpumask_map[node]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* cpumask_of_node() will now work */
|
2019-03-05 23:48:26 +00:00
|
|
|
pr_debug("Node to cpumask map for %u nodes\n", nr_node_ids);
|
2016-04-08 22:50:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2019-03-14 16:16:19 +00:00
|
|
|
* Set the cpu to node and mem mapping
|
2016-04-08 22:50:27 +00:00
|
|
|
*/
|
|
|
|
void numa_store_cpu_info(unsigned int cpu)
|
|
|
|
{
|
2018-07-06 11:02:43 +00:00
|
|
|
set_cpu_numa_node(cpu, cpu_to_node_map[cpu]);
|
2016-04-08 22:50:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init early_map_cpu_to_node(unsigned int cpu, int nid)
|
|
|
|
{
|
|
|
|
/* fallback to node 0 */
|
2016-09-01 06:55:04 +00:00
|
|
|
if (nid < 0 || nid >= MAX_NUMNODES || numa_off)
|
2016-04-08 22:50:27 +00:00
|
|
|
nid = 0;
|
|
|
|
|
|
|
|
cpu_to_node_map[cpu] = nid;
|
2016-09-01 06:55:04 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* We should set the numa node of cpu0 as soon as possible, because it
|
|
|
|
* has already been set up online before. cpu_to_node(0) will soon be
|
|
|
|
* called.
|
|
|
|
*/
|
|
|
|
if (!cpu)
|
|
|
|
set_cpu_numa_node(cpu, nid);
|
2016-04-08 22:50:27 +00:00
|
|
|
}
|
|
|
|
|
2016-09-01 06:55:00 +00:00
|
|
|
#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
|
|
|
|
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
|
|
|
EXPORT_SYMBOL(__per_cpu_offset);
|
|
|
|
|
2024-10-03 17:23:43 +00:00
|
|
|
int __init early_cpu_to_node(int cpu)
|
2016-09-01 06:55:00 +00:00
|
|
|
{
|
|
|
|
return cpu_to_node_map[cpu];
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
|
|
|
|
{
|
arm64/numa: fix pcpu_cpu_distance() to get correct CPU proximity
The pcpu_build_alloc_info() function group CPUs according to their
proximity, by call callback function @cpu_distance_fn from different
ARCHs.
For arm64 the callback of @cpu_distance_fn is
pcpu_cpu_distance(from, to)
-> node_distance(from, to)
The @from and @to for function node_distance() should be nid.
However, pcpu_cpu_distance() in arch/arm64/mm/numa.c just past the
cpu id for @from and @to, and didn't convert to numa node id.
For this incorrect cpu proximity get from ARCH, it may cause each CPU
in one group and make group_cnt out of bound:
setup_per_cpu_areas()
pcpu_embed_first_chunk()
pcpu_build_alloc_info()
in pcpu_build_alloc_info, since cpu_distance_fn will return
REMOTE_DISTANCE if we pass cpu ids (0,1,2...), so
cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE will wrongly be ture.
This may results in triggering the BUG_ON(unit != nr_units) later:
[ 0.000000] kernel BUG at mm/percpu.c:1916!
[ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP
[ 0.000000] Modules linked in:
[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc1-00003-g14155ca-dirty #26
[ 0.000000] Hardware name: Hisilicon Hi1616 Evaluation Board (DT)
[ 0.000000] task: ffff000008d6e900 task.stack: ffff000008d60000
[ 0.000000] PC is at pcpu_embed_first_chunk+0x420/0x704
[ 0.000000] LR is at pcpu_embed_first_chunk+0x3bc/0x704
[ 0.000000] pc : [<ffff000008c754f4>] lr : [<ffff000008c75490>] pstate: 800000c5
[ 0.000000] sp : ffff000008d63eb0
[ 0.000000] x29: ffff000008d63eb0 [ 0.000000] x28: 0000000000000000
[ 0.000000] x27: 0000000000000040 [ 0.000000] x26: ffff8413fbfcef00
[ 0.000000] x25: 0000000000000042 [ 0.000000] x24: 0000000000000042
[ 0.000000] x23: 0000000000001000 [ 0.000000] x22: 0000000000000046
[ 0.000000] x21: 0000000000000001 [ 0.000000] x20: ffff000008cb3bc8
[ 0.000000] x19: ffff8413fbfcf570 [ 0.000000] x18: 0000000000000000
[ 0.000000] x17: ffff000008e49ae0 [ 0.000000] x16: 0000000000000003
[ 0.000000] x15: 000000000000001e [ 0.000000] x14: 0000000000000004
[ 0.000000] x13: 0000000000000000 [ 0.000000] x12: 000000000000006f
[ 0.000000] x11: 00000413fbffff00 [ 0.000000] x10: 0000000000000004
[ 0.000000] x9 : 0000000000000000 [ 0.000000] x8 : 0000000000000001
[ 0.000000] x7 : ffff8413fbfcf63c [ 0.000000] x6 : ffff000008d65d28
[ 0.000000] x5 : ffff000008d65e50 [ 0.000000] x4 : 0000000000000000
[ 0.000000] x3 : ffff000008cb3cc8 [ 0.000000] x2 : 0000000000000040
[ 0.000000] x1 : 0000000000000040 [ 0.000000] x0 : 0000000000000000
[...]
[ 0.000000] Call trace:
[ 0.000000] Exception stack(0xffff000008d63ce0 to 0xffff000008d63e10)
[ 0.000000] 3ce0: ffff8413fbfcf570 0001000000000000 ffff000008d63eb0 ffff000008c754f4
[ 0.000000] 3d00: ffff000008d63d50 ffff0000081af210 00000413fbfff010 0000000000001000
[ 0.000000] 3d20: ffff000008d63d50 ffff0000081af220 00000413fbfff010 0000000000001000
[ 0.000000] 3d40: 00000413fbfcef00 0000000000000004 ffff000008d63db0 ffff0000081af390
[ 0.000000] 3d60: 00000413fbfcef00 0000000000001000 0000000000000000 0000000000001000
[ 0.000000] 3d80: 0000000000000000 0000000000000040 0000000000000040 ffff000008cb3cc8
[ 0.000000] 3da0: 0000000000000000 ffff000008d65e50 ffff000008d65d28 ffff8413fbfcf63c
[ 0.000000] 3dc0: 0000000000000001 0000000000000000 0000000000000004 00000413fbffff00
[ 0.000000] 3de0: 000000000000006f 0000000000000000 0000000000000004 000000000000001e
[ 0.000000] 3e00: 0000000000000003 ffff000008e49ae0
[ 0.000000] [<ffff000008c754f4>] pcpu_embed_first_chunk+0x420/0x704
[ 0.000000] [<ffff000008c6658c>] setup_per_cpu_areas+0x38/0xc8
[ 0.000000] [<ffff000008c608d8>] start_kernel+0x10c/0x390
[ 0.000000] [<ffff000008c601d8>] __primary_switched+0x5c/0x64
[ 0.000000] Code: b8018660 17ffffd7 6b16037f 54000080 (d4210000)
[ 0.000000] ---[ end trace 0000000000000000 ]---
[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
Fix by getting cpu's node id with early_cpu_to_node() then pass it
to node_distance() as the original intention.
Fixes: 7af3a0a99252 ("arm64/numa: support HAVE_SETUP_PER_CPU_AREA")
Signed-off-by: Yisheng Xie <xieyisheng1@huawei.com>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-10-21 08:13:55 +00:00
|
|
|
return node_distance(early_cpu_to_node(from), early_cpu_to_node(to));
|
2016-09-01 06:55:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void __init setup_per_cpu_areas(void)
|
|
|
|
{
|
|
|
|
unsigned long delta;
|
|
|
|
unsigned int cpu;
|
2022-05-02 19:57:47 +00:00
|
|
|
int rc = -EINVAL;
|
|
|
|
|
|
|
|
if (pcpu_chosen_fc != PCPU_FC_PAGE) {
|
|
|
|
/*
|
|
|
|
* Always reserve area for module percpu variables. That's
|
|
|
|
* what the legacy allocator did.
|
|
|
|
*/
|
|
|
|
rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
|
|
|
|
PERCPU_DYNAMIC_RESERVE, PAGE_SIZE,
|
|
|
|
pcpu_cpu_distance,
|
2022-10-12 11:08:52 +00:00
|
|
|
early_cpu_to_node);
|
2022-05-02 19:57:47 +00:00
|
|
|
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
|
|
|
|
if (rc < 0)
|
|
|
|
pr_warn("PERCPU: %s allocator failed (%d), falling back to page size\n",
|
|
|
|
pcpu_fc_names[pcpu_chosen_fc], rc);
|
|
|
|
#endif
|
|
|
|
}
|
2016-09-01 06:55:00 +00:00
|
|
|
|
2022-05-02 19:57:47 +00:00
|
|
|
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
|
|
|
|
if (rc < 0)
|
2022-10-12 11:08:52 +00:00
|
|
|
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, early_cpu_to_node);
|
2022-05-02 19:57:47 +00:00
|
|
|
#endif
|
2016-09-01 06:55:00 +00:00
|
|
|
if (rc < 0)
|
2022-05-02 19:57:47 +00:00
|
|
|
panic("Failed to initialize percpu areas (err=%d).", rc);
|
2016-09-01 06:55:00 +00:00
|
|
|
|
|
|
|
delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
|
|
|
|
for_each_possible_cpu(cpu)
|
|
|
|
__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-04-08 22:50:27 +00:00
|
|
|
/**
|
2019-03-14 16:16:19 +00:00
|
|
|
* numa_add_memblk() - Set node id to memblk
|
2016-04-08 22:50:27 +00:00
|
|
|
* @nid: NUMA node ID of the new memblk
|
|
|
|
* @start: Start address of the new memblk
|
2016-05-24 22:35:36 +00:00
|
|
|
* @end: End address of the new memblk
|
2016-04-08 22:50:27 +00:00
|
|
|
*
|
|
|
|
* RETURNS:
|
|
|
|
* 0 on success, -errno on failure.
|
|
|
|
*/
|
2016-05-24 22:35:36 +00:00
|
|
|
int __init numa_add_memblk(int nid, u64 start, u64 end)
|
2016-04-08 22:50:27 +00:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
2016-05-24 22:35:36 +00:00
|
|
|
ret = memblock_set_node(start, (end - start), &memblock.memory, nid);
|
2016-04-08 22:50:27 +00:00
|
|
|
if (ret < 0) {
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_err("memblock [0x%llx - 0x%llx] failed to add on node %d\n",
|
2016-05-24 22:35:36 +00:00
|
|
|
start, (end - 1), nid);
|
2016-04-08 22:50:27 +00:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
node_set(nid, numa_nodes_parsed);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-03-14 16:16:19 +00:00
|
|
|
/*
|
2016-04-08 22:50:27 +00:00
|
|
|
* Initialize NODE_DATA for a node on the local memory
|
|
|
|
*/
|
|
|
|
static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
|
|
|
|
{
|
|
|
|
const size_t nd_size = roundup(sizeof(pg_data_t), SMP_CACHE_BYTES);
|
|
|
|
u64 nd_pa;
|
|
|
|
void *nd;
|
|
|
|
int tnid;
|
|
|
|
|
2017-07-20 11:03:59 +00:00
|
|
|
if (start_pfn >= end_pfn)
|
2016-10-21 08:13:56 +00:00
|
|
|
pr_info("Initmem setup node %d [<memory-less node>]\n", nid);
|
2016-04-08 22:50:27 +00:00
|
|
|
|
memblock: rename memblock_alloc{_nid,_try_nid} to memblock_phys_alloc*
Make it explicit that the caller gets a physical address rather than a
virtual one.
This will also allow using meblock_alloc prefix for memblock allocations
returning virtual address, which is done in the following patches.
The conversion is done using the following semantic patch:
@@
expression e1, e2, e3;
@@
(
- memblock_alloc(e1, e2)
+ memblock_phys_alloc(e1, e2)
|
- memblock_alloc_nid(e1, e2, e3)
+ memblock_phys_alloc_nid(e1, e2, e3)
|
- memblock_alloc_try_nid(e1, e2, e3)
+ memblock_phys_alloc_try_nid(e1, e2, e3)
)
Link: http://lkml.kernel.org/r/1536927045-23536-7-git-send-email-rppt@linux.vnet.ibm.com
Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greentime Hu <green.hu@gmail.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Guan Xuetao <gxt@pku.edu.cn>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "James E.J. Bottomley" <jejb@parisc-linux.org>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ley Foon Tan <lftan@altera.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Palmer Dabbelt <palmer@sifive.com>
Cc: Paul Burton <paul.burton@mips.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Serge Semin <fancer.lancer@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-30 22:07:59 +00:00
|
|
|
nd_pa = memblock_phys_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid);
|
2019-03-12 06:29:21 +00:00
|
|
|
if (!nd_pa)
|
|
|
|
panic("Cannot allocate %zu bytes for node %d data\n",
|
|
|
|
nd_size, nid);
|
|
|
|
|
2016-04-08 22:50:27 +00:00
|
|
|
nd = __va(nd_pa);
|
|
|
|
|
|
|
|
/* report and initialize */
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_info("NODE_DATA [mem %#010Lx-%#010Lx]\n",
|
2016-04-08 22:50:27 +00:00
|
|
|
nd_pa, nd_pa + nd_size - 1);
|
|
|
|
tnid = early_pfn_to_nid(nd_pa >> PAGE_SHIFT);
|
|
|
|
if (tnid != nid)
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_info("NODE_DATA(%d) on node %d\n", nid, tnid);
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
node_data[nid] = nd;
|
|
|
|
memset(NODE_DATA(nid), 0, sizeof(pg_data_t));
|
|
|
|
NODE_DATA(nid)->node_id = nid;
|
|
|
|
NODE_DATA(nid)->node_start_pfn = start_pfn;
|
|
|
|
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
|
|
|
|
}
|
|
|
|
|
2019-03-14 16:16:19 +00:00
|
|
|
/*
|
2016-04-08 22:50:27 +00:00
|
|
|
* numa_free_distance
|
|
|
|
*
|
|
|
|
* The current table is freed.
|
|
|
|
*/
|
|
|
|
void __init numa_free_distance(void)
|
|
|
|
{
|
|
|
|
size_t size;
|
|
|
|
|
|
|
|
if (!numa_distance)
|
|
|
|
return;
|
|
|
|
|
|
|
|
size = numa_distance_cnt * numa_distance_cnt *
|
|
|
|
sizeof(numa_distance[0]);
|
|
|
|
|
memblock: use memblock_free for freeing virtual pointers
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2071840
Tested: This is one of a series of patch sets to enable Arm SystemReady IR
support in the kernel for NXP i.MX8 platforms. At this stage, this
has been tested by ensuring we can survive the CI/CD loop -- i.e.,
that we have not broken anything else, and a simple boot test. When
sufficient drivers have been brought in for i.MX8M, we will be able
to run further tests.
Conflicts:
init/main.c
This patch is being applied out of order, but is a simple
function name replacement, so applied manually.
commit 4421cca0a3e4833b3bf0f20de98eb580ab8c7290
Author: Mike Rapoport <rppt@kernel.org>
Date: Fri Nov 5 13:43:22 2021 -0700
memblock: use memblock_free for freeing virtual pointers
Rename memblock_free_ptr() to memblock_free() and use memblock_free()
when freeing a virtual pointer so that memblock_free() will be a
counterpart of memblock_alloc()
The callers are updated with the below semantic patch and manual
addition of (void *) casting to pointers that are represented by
unsigned long variables.
@@
identifier vaddr;
expression size;
@@
(
- memblock_phys_free(__pa(vaddr), size);
+ memblock_free(vaddr, size);
|
- memblock_free_ptr(vaddr, size);
+ memblock_free(vaddr, size);
)
[sfr@canb.auug.org.au: fixup]
Link: https://lkml.kernel.org/r/20211018192940.3d1d532f@canb.auug.org.au
Link: https://lkml.kernel.org/r/20210930185031.18648-7-rppt@kernel.org
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Juergen Gross <jgross@suse.com>
Cc: Shahab Vahedi <Shahab.Vahedi@synopsys.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 4421cca0a3e4833b3bf0f20de98eb580ab8c7290)
Signed-off-by: Al Stone <ahs3@redhat.com>
2022-06-30 22:34:47 +00:00
|
|
|
memblock_free(numa_distance, size);
|
2016-04-08 22:50:27 +00:00
|
|
|
numa_distance_cnt = 0;
|
|
|
|
numa_distance = NULL;
|
|
|
|
}
|
|
|
|
|
2019-03-14 16:16:19 +00:00
|
|
|
/*
|
2016-04-08 22:50:27 +00:00
|
|
|
* Create a new NUMA distance table.
|
|
|
|
*/
|
|
|
|
static int __init numa_alloc_distance(void)
|
|
|
|
{
|
|
|
|
size_t size;
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
size = nr_node_ids * nr_node_ids * sizeof(numa_distance[0]);
|
2022-05-02 19:57:53 +00:00
|
|
|
numa_distance = memblock_alloc(size, PAGE_SIZE);
|
|
|
|
if (WARN_ON(!numa_distance))
|
2016-04-08 22:50:27 +00:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
numa_distance_cnt = nr_node_ids;
|
|
|
|
|
|
|
|
/* fill with the default distances */
|
|
|
|
for (i = 0; i < numa_distance_cnt; i++)
|
|
|
|
for (j = 0; j < numa_distance_cnt; j++)
|
|
|
|
numa_distance[i * numa_distance_cnt + j] = i == j ?
|
|
|
|
LOCAL_DISTANCE : REMOTE_DISTANCE;
|
|
|
|
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_debug("Initialized distance table, cnt=%d\n", numa_distance_cnt);
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-03-14 16:16:19 +00:00
|
|
|
* numa_set_distance() - Set inter node NUMA distance from node to node.
|
2016-04-08 22:50:27 +00:00
|
|
|
* @from: the 'from' node to set distance
|
|
|
|
* @to: the 'to' node to set distance
|
|
|
|
* @distance: NUMA distance
|
|
|
|
*
|
|
|
|
* Set the distance from node @from to @to to @distance.
|
|
|
|
* If distance table doesn't exist, a warning is printed.
|
|
|
|
*
|
|
|
|
* If @from or @to is higher than the highest known node or lower than zero
|
|
|
|
* or @distance doesn't make sense, the call is ignored.
|
|
|
|
*/
|
|
|
|
void __init numa_set_distance(int from, int to, int distance)
|
|
|
|
{
|
|
|
|
if (!numa_distance) {
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_warn_once("Warning: distance table not allocated yet\n");
|
2016-04-08 22:50:27 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (from >= numa_distance_cnt || to >= numa_distance_cnt ||
|
|
|
|
from < 0 || to < 0) {
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n",
|
2016-04-08 22:50:27 +00:00
|
|
|
from, to, distance);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((u8)distance != distance ||
|
|
|
|
(from == to && distance != LOCAL_DISTANCE)) {
|
2016-09-01 06:54:59 +00:00
|
|
|
pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n",
|
2016-04-08 22:50:27 +00:00
|
|
|
from, to, distance);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
numa_distance[from * numa_distance_cnt + to] = distance;
|
|
|
|
}
|
|
|
|
|
2019-03-14 16:16:19 +00:00
|
|
|
/*
|
2016-04-08 22:50:27 +00:00
|
|
|
* Return NUMA distance @from to @to
|
|
|
|
*/
|
|
|
|
int __node_distance(int from, int to)
|
|
|
|
{
|
|
|
|
if (from >= numa_distance_cnt || to >= numa_distance_cnt)
|
|
|
|
return from == to ? LOCAL_DISTANCE : REMOTE_DISTANCE;
|
|
|
|
return numa_distance[from * numa_distance_cnt + to];
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(__node_distance);
|
|
|
|
|
|
|
|
static int __init numa_register_nodes(void)
|
|
|
|
{
|
|
|
|
int nid;
|
|
|
|
struct memblock_region *mblk;
|
|
|
|
|
|
|
|
/* Check that valid nid is set to memblks */
|
2020-10-13 23:58:30 +00:00
|
|
|
for_each_mem_region(mblk) {
|
2020-06-03 22:56:53 +00:00
|
|
|
int mblk_nid = memblock_get_region_node(mblk);
|
2021-01-28 03:55:33 +00:00
|
|
|
phys_addr_t start = mblk->base;
|
|
|
|
phys_addr_t end = mblk->base + mblk->size - 1;
|
2020-06-03 22:56:53 +00:00
|
|
|
|
|
|
|
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
|
2021-01-28 03:55:33 +00:00
|
|
|
pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
|
|
|
|
mblk_nid, &start, &end);
|
2016-04-08 22:50:27 +00:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2020-06-03 22:56:53 +00:00
|
|
|
}
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
/* Finally register nodes. */
|
|
|
|
for_each_node_mask(nid, numa_nodes_parsed) {
|
|
|
|
unsigned long start_pfn, end_pfn;
|
|
|
|
|
|
|
|
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
|
|
|
|
setup_node_data(nid, start_pfn, end_pfn);
|
|
|
|
node_set_online(nid);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Setup online nodes to actual nodes*/
|
|
|
|
node_possible_map = numa_nodes_parsed;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init numa_init(int (*init_func)(void))
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
nodes_clear(numa_nodes_parsed);
|
|
|
|
nodes_clear(node_possible_map);
|
|
|
|
nodes_clear(node_online_map);
|
|
|
|
|
|
|
|
ret = numa_alloc_distance();
|
|
|
|
if (ret < 0)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
ret = init_func();
|
|
|
|
if (ret < 0)
|
2018-09-22 15:39:56 +00:00
|
|
|
goto out_free_distance;
|
2016-04-08 22:50:27 +00:00
|
|
|
|
2016-09-01 06:54:56 +00:00
|
|
|
if (nodes_empty(numa_nodes_parsed)) {
|
|
|
|
pr_info("No NUMA configuration found\n");
|
2018-09-22 15:39:56 +00:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto out_free_distance;
|
2016-09-01 06:54:56 +00:00
|
|
|
}
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
ret = numa_register_nodes();
|
|
|
|
if (ret < 0)
|
2018-09-22 15:39:56 +00:00
|
|
|
goto out_free_distance;
|
2016-04-08 22:50:27 +00:00
|
|
|
|
|
|
|
setup_node_to_cpumask_map();
|
|
|
|
|
|
|
|
return 0;
|
2018-09-22 15:39:56 +00:00
|
|
|
out_free_distance:
|
|
|
|
numa_free_distance();
|
|
|
|
return ret;
|
2016-04-08 22:50:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2019-03-14 16:16:19 +00:00
|
|
|
* dummy_numa_init() - Fallback dummy NUMA init
|
2016-04-08 22:50:27 +00:00
|
|
|
*
|
|
|
|
* Used if there's no underlying NUMA architecture, NUMA initialization
|
|
|
|
* fails, or NUMA is disabled on the command line.
|
|
|
|
*
|
|
|
|
* Must online at least one node (node 0) and add memory blocks that cover all
|
|
|
|
* allowed memory. It is unlikely that this function fails.
|
2019-03-14 16:16:19 +00:00
|
|
|
*
|
|
|
|
* Return: 0 on success, -errno on failure.
|
2016-04-08 22:50:27 +00:00
|
|
|
*/
|
|
|
|
static int __init dummy_numa_init(void)
|
|
|
|
{
|
2020-10-13 23:57:31 +00:00
|
|
|
phys_addr_t start = memblock_start_of_DRAM();
|
2021-01-28 03:55:33 +00:00
|
|
|
phys_addr_t end = memblock_end_of_DRAM() - 1;
|
2016-04-08 22:50:27 +00:00
|
|
|
int ret;
|
|
|
|
|
2016-05-24 22:35:37 +00:00
|
|
|
if (numa_off)
|
|
|
|
pr_info("NUMA disabled\n"); /* Forced off on command line. */
|
2021-01-28 03:55:33 +00:00
|
|
|
pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
|
2016-04-08 22:50:27 +00:00
|
|
|
|
2021-01-28 03:55:33 +00:00
|
|
|
ret = numa_add_memblk(0, start, end + 1);
|
2020-10-13 23:57:31 +00:00
|
|
|
if (ret) {
|
2016-04-08 22:50:27 +00:00
|
|
|
pr_err("NUMA init failed\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2016-05-24 22:35:37 +00:00
|
|
|
numa_off = true;
|
2016-04-08 22:50:27 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-11-19 00:38:25 +00:00
|
|
|
#ifdef CONFIG_ACPI_NUMA
|
|
|
|
static int __init arch_acpi_numa_init(void)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = acpi_numa_init();
|
|
|
|
if (ret) {
|
|
|
|
pr_info("Failed to initialise from firmware\n");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return srat_disabled() ? -EINVAL : 0;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
static int __init arch_acpi_numa_init(void)
|
|
|
|
{
|
|
|
|
return -EOPNOTSUPP;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2016-04-08 22:50:27 +00:00
|
|
|
/**
|
2020-11-19 00:38:25 +00:00
|
|
|
* arch_numa_init() - Initialize NUMA
|
2016-04-08 22:50:27 +00:00
|
|
|
*
|
2019-03-14 16:16:19 +00:00
|
|
|
* Try each configured NUMA initialization method until one succeeds. The
|
2020-09-01 09:11:54 +00:00
|
|
|
* last fallback is dummy single node config encompassing whole memory.
|
2016-04-08 22:50:27 +00:00
|
|
|
*/
|
2020-11-19 00:38:25 +00:00
|
|
|
void __init arch_numa_init(void)
|
2016-04-08 22:50:27 +00:00
|
|
|
{
|
|
|
|
if (!numa_off) {
|
2020-11-19 00:38:25 +00:00
|
|
|
if (!acpi_disabled && !numa_init(arch_acpi_numa_init))
|
2016-05-24 22:35:44 +00:00
|
|
|
return;
|
|
|
|
if (acpi_disabled && !numa_init(of_numa_init))
|
2016-04-08 22:50:27 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
numa_init(dummy_numa_init);
|
|
|
|
}
|