2016-04-09 06:50:25 +08:00
|
|
|
/*
|
|
|
|
* OF NUMA Parsing support.
|
|
|
|
*
|
|
|
|
* Copyright (C) 2015 - 2016 Cavium Inc.
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
*/
|
|
|
|
|
2016-09-01 14:54:58 +08:00
|
|
|
#define pr_fmt(fmt) "OF: NUMA: " fmt
|
|
|
|
|
2016-04-09 06:50:25 +08:00
|
|
|
#include <linux/of.h>
|
|
|
|
#include <linux/of_address.h>
|
|
|
|
#include <linux/nodemask.h>
|
|
|
|
|
|
|
|
#include <asm/numa.h>
|
|
|
|
|
|
|
|
/* define default numa node to 0 */
|
|
|
|
#define DEFAULT_NODE 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Even though we connect cpus to numa domains later in SMP
|
|
|
|
* init, we need to know the node ids now for all cpus.
|
|
|
|
*/
|
|
|
|
static void __init of_numa_parse_cpu_nodes(void)
|
|
|
|
{
|
|
|
|
u32 nid;
|
|
|
|
int r;
|
|
|
|
struct device_node *cpus;
|
|
|
|
struct device_node *np = NULL;
|
|
|
|
|
|
|
|
cpus = of_find_node_by_path("/cpus");
|
|
|
|
if (!cpus)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for_each_child_of_node(cpus, np) {
|
|
|
|
/* Skip things that are not CPUs */
|
|
|
|
if (of_node_cmp(np->type, "cpu") != 0)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
r = of_property_read_u32(np, "numa-node-id", &nid);
|
|
|
|
if (r)
|
|
|
|
continue;
|
|
|
|
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_debug("CPU on %u\n", nid);
|
2016-04-09 06:50:25 +08:00
|
|
|
if (nid >= MAX_NUMNODES)
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_warn("Node id %u exceeds maximum value\n", nid);
|
2016-04-09 06:50:25 +08:00
|
|
|
else
|
|
|
|
node_set(nid, numa_nodes_parsed);
|
|
|
|
}
|
2017-04-18 08:29:17 +08:00
|
|
|
|
|
|
|
of_node_put(cpus);
|
2016-04-09 06:50:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init of_numa_parse_memory_nodes(void)
|
|
|
|
{
|
|
|
|
struct device_node *np = NULL;
|
|
|
|
struct resource rsrc;
|
|
|
|
u32 nid;
|
2016-09-01 14:54:53 +08:00
|
|
|
int i, r;
|
2016-04-09 06:50:25 +08:00
|
|
|
|
2016-09-01 14:54:53 +08:00
|
|
|
for_each_node_by_type(np, "memory") {
|
2016-04-09 06:50:25 +08:00
|
|
|
r = of_property_read_u32(np, "numa-node-id", &nid);
|
|
|
|
if (r == -EINVAL)
|
|
|
|
/*
|
|
|
|
* property doesn't exist if -EINVAL, continue
|
|
|
|
* looking for more memory nodes with
|
|
|
|
* "numa-node-id" property
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
|
2016-09-01 14:54:54 +08:00
|
|
|
if (nid >= MAX_NUMNODES) {
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_warn("Node id %u exceeds maximum value\n", nid);
|
2016-09-01 14:54:54 +08:00
|
|
|
r = -EINVAL;
|
|
|
|
}
|
|
|
|
|
2016-09-01 14:54:53 +08:00
|
|
|
for (i = 0; !r && !of_address_to_resource(np, i, &rsrc); i++)
|
|
|
|
r = numa_add_memblk(nid, rsrc.start, rsrc.end + 1);
|
|
|
|
|
|
|
|
if (!i || r) {
|
|
|
|
of_node_put(np);
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_err("bad property in memory node\n");
|
2016-09-01 14:54:53 +08:00
|
|
|
return r ? : -EINVAL;
|
|
|
|
}
|
2016-04-09 06:50:25 +08:00
|
|
|
}
|
|
|
|
|
2016-09-01 14:54:53 +08:00
|
|
|
return 0;
|
2016-04-09 06:50:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int __init of_numa_parse_distance_map_v1(struct device_node *map)
|
|
|
|
{
|
|
|
|
const __be32 *matrix;
|
|
|
|
int entry_count;
|
|
|
|
int i;
|
|
|
|
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_info("parsing numa-distance-map-v1\n");
|
2016-04-09 06:50:25 +08:00
|
|
|
|
|
|
|
matrix = of_get_property(map, "distance-matrix", NULL);
|
|
|
|
if (!matrix) {
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_err("No distance-matrix property in distance-map\n");
|
2016-04-09 06:50:25 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
entry_count = of_property_count_u32_elems(map, "distance-matrix");
|
|
|
|
if (entry_count <= 0) {
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_err("Invalid distance-matrix\n");
|
2016-04-09 06:50:25 +08:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i + 2 < entry_count; i += 3) {
|
|
|
|
u32 nodea, nodeb, distance;
|
|
|
|
|
|
|
|
nodea = of_read_number(matrix, 1);
|
|
|
|
matrix++;
|
|
|
|
nodeb = of_read_number(matrix, 1);
|
|
|
|
matrix++;
|
|
|
|
distance = of_read_number(matrix, 1);
|
|
|
|
matrix++;
|
|
|
|
|
|
|
|
numa_set_distance(nodea, nodeb, distance);
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_debug("distance[node%d -> node%d] = %d\n",
|
2016-04-09 06:50:25 +08:00
|
|
|
nodea, nodeb, distance);
|
|
|
|
|
|
|
|
/* Set default distance of node B->A same as A->B */
|
|
|
|
if (nodeb > nodea)
|
|
|
|
numa_set_distance(nodeb, nodea, distance);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init of_numa_parse_distance_map(void)
|
|
|
|
{
|
|
|
|
int ret = 0;
|
|
|
|
struct device_node *np;
|
|
|
|
|
|
|
|
np = of_find_compatible_node(NULL, NULL,
|
|
|
|
"numa-distance-map-v1");
|
|
|
|
if (np)
|
|
|
|
ret = of_numa_parse_distance_map_v1(np);
|
|
|
|
|
|
|
|
of_node_put(np);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
int of_node_to_nid(struct device_node *device)
|
|
|
|
{
|
|
|
|
struct device_node *np;
|
|
|
|
u32 nid;
|
|
|
|
int r = -ENODATA;
|
|
|
|
|
|
|
|
np = of_node_get(device);
|
|
|
|
|
|
|
|
while (np) {
|
|
|
|
r = of_property_read_u32(np, "numa-node-id", &nid);
|
|
|
|
/*
|
|
|
|
* -EINVAL indicates the property was not found, and
|
|
|
|
* we walk up the tree trying to find a parent with a
|
|
|
|
* "numa-node-id". Any other type of error indicates
|
|
|
|
* a bad device tree and we give up.
|
|
|
|
*/
|
|
|
|
if (r != -EINVAL)
|
|
|
|
break;
|
|
|
|
|
2016-09-01 14:54:57 +08:00
|
|
|
np = of_get_next_parent(np);
|
2016-04-09 06:50:25 +08:00
|
|
|
}
|
|
|
|
if (np && r)
|
2016-09-01 14:54:58 +08:00
|
|
|
pr_warn("Invalid \"numa-node-id\" property in node %s\n",
|
2016-04-09 06:50:25 +08:00
|
|
|
np->name);
|
|
|
|
of_node_put(np);
|
|
|
|
|
of, numa: Return NUMA_NO_NODE from disable of_node_to_nid() if nid not possible.
On arm64 NUMA kernels we can pass "numa=off" on the command line to
disable NUMA. A side effect of this is that kmalloc_node() calls to
non-zero nodes will crash the system with an OOPS:
[ 0.000000] ITS@0x0000901000020000: allocated 2097152 Devices @10002000000 (flat, esz 8, psz 64K, shr 1)
[ 0.000000] Unable to handle kernel NULL pointer dereference at virtual address 00001680
[ 0.000000] pgd = fffffc0009470000
[ 0.000000] [00001680] *pgd=0000010ffff90003, *pud=0000010ffff90003, *pmd=0000010ffff90003, *pte=0000000000000000
[ 0.000000] Internal error: Oops: 96000006 [#1] SMP
.
.
.
[ 0.000000] [<fffffc00081c8950>] __alloc_pages_nodemask+0xa4/0xe68
[ 0.000000] [<fffffc000821fa70>] new_slab+0xd0/0x564
[ 0.000000] [<fffffc0008221e24>] ___slab_alloc+0x2e4/0x514
[ 0.000000] [<fffffc0008239498>] __slab_alloc+0x48/0x58
[ 0.000000] [<fffffc0008222c20>] __kmalloc_node+0xd0/0x2dc
[ 0.000000] [<fffffc0008115374>] __irq_domain_add+0x7c/0x164
[ 0.000000] [<fffffc0008b461dc>] its_probe+0x784/0x81c
[ 0.000000] [<fffffc0008b462bc>] its_init+0x48/0x1b0
[ 0.000000] [<fffffc0008b4543c>] gic_init_bases+0x228/0x360
[ 0.000000] [<fffffc0008b456bc>] gic_of_init+0x148/0x1cc
[ 0.000000] [<fffffc0008b5aec8>] of_irq_init+0x184/0x298
[ 0.000000] [<fffffc0008b43f9c>] irqchip_init+0x14/0x38
[ 0.000000] [<fffffc0008b12d60>] init_IRQ+0xc/0x30
[ 0.000000] [<fffffc0008b10a3c>] start_kernel+0x240/0x3b8
[ 0.000000] [<fffffc0008b101c4>] __primary_switched+0x30/0x6c
[ 0.000000] Code: 912ec2a0 b9403809 0a0902fb 37b007db (f9400300)
.
.
.
This is caused by code like this in kernel/irq/irqdomain.c
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
When NUMA is disabled, the concept of a node is really undefined, so
of_node_to_nid() should unconditionally return NUMA_NO_NODE.
Fix by returning NUMA_NO_NODE when the nid is not in the set of
possible nodes.
Reported-by: Gilbert Netzer <noname@pdc.kth.se>
Signed-off-by: David Daney <david.daney@cavium.com>
Cc: stable@vger.kernel.org # 4.7+
Signed-off-by: Rob Herring <robh@kernel.org>
2016-10-29 05:15:02 +08:00
|
|
|
/*
|
|
|
|
* If numa=off passed on command line, or with a defective
|
|
|
|
* device tree, the nid may not be in the set of possible
|
|
|
|
* nodes. Check for this case and return NUMA_NO_NODE.
|
|
|
|
*/
|
|
|
|
if (!r && nid < MAX_NUMNODES && node_possible(nid))
|
2016-09-01 14:54:55 +08:00
|
|
|
return nid;
|
2016-04-09 06:50:25 +08:00
|
|
|
|
|
|
|
return NUMA_NO_NODE;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(of_node_to_nid);
|
|
|
|
|
|
|
|
int __init of_numa_init(void)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
|
|
|
|
of_numa_parse_cpu_nodes();
|
|
|
|
r = of_numa_parse_memory_nodes();
|
|
|
|
if (r)
|
|
|
|
return r;
|
|
|
|
return of_numa_parse_distance_map();
|
|
|
|
}
|