diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index 01d17046225a..bec481aaca16 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -31,6 +31,9 @@ static inline int pcibus_to_node(struct pci_bus *pbus) cpu_all_mask : \ cpumask_of_node(pcibus_to_node(bus))) +int __node_distance(int, int); +#define node_distance(a, b) __node_distance(a, b) + #else /* CONFIG_NUMA */ #include diff --git a/arch/sparc/include/uapi/asm/asi.h b/arch/sparc/include/uapi/asm/asi.h index aace6f313716..7ad7203deaec 100644 --- a/arch/sparc/include/uapi/asm/asi.h +++ b/arch/sparc/include/uapi/asm/asi.h @@ -279,7 +279,7 @@ * Most-Recently-Used, primary, * implicit */ -#define ASI_ST_BLKINIT_MRU_S 0xf2 /* (NG4) init-store, twin load, +#define ASI_ST_BLKINIT_MRU_S 0xf3 /* (NG4) init-store, twin load, * Most-Recently-Used, secondary, * implicit */ diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 5320689c06e9..37686828c3d9 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -161,7 +161,7 @@ static inline iopte_t *alloc_npages(struct device *dev, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) return NULL; return iommu->page_table + entry; @@ -253,7 +253,7 @@ static void dma_4u_free_coherent(struct device *dev, size_t size, npages = IO_PAGE_ALIGN(size) >> IO_PAGE_SHIFT; iommu = dev->archdata.iommu; - iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) @@ -426,7 +426,7 @@ static void dma_4u_unmap_page(struct device *dev, dma_addr_t bus_addr, iommu_free_ctx(iommu, ctx); spin_unlock_irqrestore(&iommu->lock, flags); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, @@ -492,7 +492,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, &handle, (unsigned long)(-1), 0); /* Handle failure */ - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == IOMMU_ERROR_CODE)) { if (printk_ratelimit()) printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" " npages %lx\n", iommu, paddr, npages); @@ -571,7 +571,7 @@ iommu_map_failed: iopte_make_dummy(iommu, base + j); iommu_tbl_range_free(&iommu->tbl, vaddr, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -648,7 +648,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, iopte_make_dummy(iommu, base + i); iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 1ae5eb1bb045..59d503866431 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -1953,7 +1953,7 @@ static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu, entry = iommu_tbl_range_alloc(NULL, &iommu->iommu_map_table, npages, NULL, (unsigned long)-1, 0); - if (unlikely(entry < 0)) + if (unlikely(entry == IOMMU_ERROR_CODE)) return NULL; return iommu->page_table + entry; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index d2fe57dad433..836e8cef47e2 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -159,7 +159,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) goto range_alloc_fail; *dma_addrp = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); @@ -187,7 +187,7 @@ static void *dma_4v_alloc_coherent(struct device *dev, size_t size, return ret; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, *dma_addrp, npages, IOMMU_ERROR_CODE); range_alloc_fail: free_pages(first_page, order); @@ -226,7 +226,7 @@ static void dma_4v_free_coherent(struct device *dev, size_t size, void *cpu, devhandle = pbm->devhandle; entry = ((dvma - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT); dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, dvma, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, dvma, npages, IOMMU_ERROR_CODE); order = get_order(size); if (order < 10) free_pages((unsigned long)cpu, order); @@ -256,7 +256,7 @@ static dma_addr_t dma_4v_map_page(struct device *dev, struct page *page, entry = iommu_tbl_range_alloc(dev, &iommu->tbl, npages, NULL, (unsigned long)(-1), 0); - if (unlikely(entry == DMA_ERROR_CODE)) + if (unlikely(entry == IOMMU_ERROR_CODE)) goto bad; bus_addr = (iommu->tbl.table_map_base + (entry << IO_PAGE_SHIFT)); @@ -288,7 +288,7 @@ bad: return DMA_ERROR_CODE; iommu_map_fail: - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); return DMA_ERROR_CODE; } @@ -317,7 +317,7 @@ static void dma_4v_unmap_page(struct device *dev, dma_addr_t bus_addr, bus_addr &= IO_PAGE_MASK; entry = (bus_addr - iommu->tbl.table_map_base) >> IO_PAGE_SHIFT; dma_4v_iommu_demap(&devhandle, entry, npages); - iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, DMA_ERROR_CODE); + iommu_tbl_range_free(&iommu->tbl, bus_addr, npages, IOMMU_ERROR_CODE); } static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, @@ -376,7 +376,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, &handle, (unsigned long)(-1), 0); /* Handle failure */ - if (unlikely(entry == DMA_ERROR_CODE)) { + if (unlikely(entry == IOMMU_ERROR_CODE)) { if (printk_ratelimit()) printk(KERN_INFO "iommu_alloc failed, iommu %p paddr %lx" " npages %lx\n", iommu, paddr, npages); @@ -451,7 +451,7 @@ iommu_map_failed: npages = iommu_num_pages(s->dma_address, s->dma_length, IO_PAGE_SIZE); iommu_tbl_range_free(&iommu->tbl, vaddr, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); /* XXX demap? XXX */ s->dma_address = DMA_ERROR_CODE; s->dma_length = 0; @@ -496,7 +496,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, entry = ((dma_handle - tbl->table_map_base) >> shift); dma_4v_iommu_demap(&devhandle, entry, npages); iommu_tbl_range_free(&iommu->tbl, dma_handle, npages, - DMA_ERROR_CODE); + IOMMU_ERROR_CODE); sg = sg_next(sg); } diff --git a/arch/sparc/kernel/unaligned_64.c b/arch/sparc/kernel/unaligned_64.c index 62098a89bbbf..d89e97b374cf 100644 --- a/arch/sparc/kernel/unaligned_64.c +++ b/arch/sparc/kernel/unaligned_64.c @@ -436,24 +436,26 @@ extern void sun4v_data_access_exception(struct pt_regs *regs, int handle_ldf_stq(u32 insn, struct pt_regs *regs) { unsigned long addr = compute_effective_address(regs, insn, 0); - int freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + int freg; struct fpustate *f = FPUSTATE; int asi = decode_asi(insn, regs); - int flag = (freg < 32) ? FPRS_DL : FPRS_DU; + int flag; perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); save_and_clear_fpu(); current_thread_info()->xfsr[0] &= ~0x1c000; - if (freg & 3) { - current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */; - do_fpother(regs); - return 0; - } if (insn & 0x200000) { /* STQ */ u64 first = 0, second = 0; + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + if (freg & 3) { + current_thread_info()->xfsr[0] |= (6 << 14) /* invalid_fp_register */; + do_fpother(regs); + return 0; + } if (current_thread_info()->fpsaved[0] & flag) { first = *(u64 *)&f->regs[freg]; second = *(u64 *)&f->regs[freg+2]; @@ -513,6 +515,12 @@ int handle_ldf_stq(u32 insn, struct pt_regs *regs) case 0x100000: size = 4; break; default: size = 2; break; } + if (size == 1) + freg = (insn >> 25) & 0x1f; + else + freg = ((insn >> 25) & 0x1e) | ((insn >> 20) & 0x20); + flag = (freg < 32) ? FPRS_DL : FPRS_DU; + for (i = 0; i < size; i++) data[i] = 0; diff --git a/arch/sparc/lib/VISsave.S b/arch/sparc/lib/VISsave.S index a063d84336d6..62c2647bd5ce 100644 --- a/arch/sparc/lib/VISsave.S +++ b/arch/sparc/lib/VISsave.S @@ -6,24 +6,23 @@ * Copyright (C) 1998 Jakub Jelinek (jj@ultra.linux.cz) */ +#include + #include #include #include #include #include - .text - .globl VISenter, VISenterhalf - /* On entry: %o5=current FPRS value, %g7 is callers address */ /* May clobber %o5, %g1, %g2, %g3, %g7, %icc, %xcc */ /* Nothing special need be done here to handle pre-emption, this * FPU save/restore mechanism is already preemption safe. */ - + .text .align 32 -VISenter: +ENTRY(VISenter) ldub [%g6 + TI_FPDEPTH], %g1 brnz,a,pn %g1, 1f cmp %g1, 1 @@ -79,3 +78,4 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3 .align 32 80: jmpl %g7 + %g0, %g0 nop +ENDPROC(VISenter) diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c index 4ac88b757514..3025bd57f7ab 100644 --- a/arch/sparc/mm/init_64.c +++ b/arch/sparc/mm/init_64.c @@ -93,6 +93,8 @@ static unsigned long cpu_pgsz_mask; static struct linux_prom64_registers pavail[MAX_BANKS]; static int pavail_ents; +u64 numa_latency[MAX_NUMNODES][MAX_NUMNODES]; + static int cmp_p64(const void *a, const void *b) { const struct linux_prom64_registers *x = a, *y = b; @@ -1157,6 +1159,48 @@ static struct mdesc_mlgroup * __init find_mlgroup(u64 node) return NULL; } +int __node_distance(int from, int to) +{ + if ((from >= MAX_NUMNODES) || (to >= MAX_NUMNODES)) { + pr_warn("Returning default NUMA distance value for %d->%d\n", + from, to); + return (from == to) ? LOCAL_DISTANCE : REMOTE_DISTANCE; + } + return numa_latency[from][to]; +} + +static int find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp) +{ + int i; + + for (i = 0; i < MAX_NUMNODES; i++) { + struct node_mem_mask *n = &node_masks[i]; + + if ((grp->mask == n->mask) && (grp->match == n->val)) + break; + } + return i; +} + +static void find_numa_latencies_for_group(struct mdesc_handle *md, u64 grp, + int index) +{ + u64 arc; + + mdesc_for_each_arc(arc, md, grp, MDESC_ARC_TYPE_FWD) { + int tnode; + u64 target = mdesc_arc_target(md, arc); + struct mdesc_mlgroup *m = find_mlgroup(target); + + if (!m) + continue; + tnode = find_best_numa_node_for_mlgroup(m); + if (tnode == MAX_NUMNODES) + continue; + numa_latency[index][tnode] = m->latency; + } +} + static int __init numa_attach_mlgroup(struct mdesc_handle *md, u64 grp, int index) { @@ -1220,9 +1264,16 @@ static int __init numa_parse_mdesc_group(struct mdesc_handle *md, u64 grp, static int __init numa_parse_mdesc(void) { struct mdesc_handle *md = mdesc_grab(); - int i, err, count; + int i, j, err, count; u64 node; + /* Some sane defaults for numa latency values */ + for (i = 0; i < MAX_NUMNODES; i++) { + for (j = 0; j < MAX_NUMNODES; j++) + numa_latency[i][j] = (i == j) ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + } + node = mdesc_node_by_name(md, MDESC_NODE_NULL, "latency-groups"); if (node == MDESC_NODE_NULL) { mdesc_release(md); @@ -1245,6 +1296,23 @@ static int __init numa_parse_mdesc(void) count++; } + count = 0; + mdesc_for_each_node_by_name(md, node, "group") { + find_numa_latencies_for_group(md, node, count); + count++; + } + + /* Normalize numa latency matrix according to ACPI SLIT spec. */ + for (i = 0; i < MAX_NUMNODES; i++) { + u64 self_latency = numa_latency[i][i]; + + for (j = 0; j < MAX_NUMNODES; j++) { + numa_latency[i][j] = + (numa_latency[i][j] * LOCAL_DISTANCE) / + self_latency; + } + } + add_node_ranges(); for (i = 0; i < num_node_masks; i++) { diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h index bbced83b32ee..376a27c9cc6a 100644 --- a/include/linux/iommu-common.h +++ b/include/linux/iommu-common.h @@ -7,6 +7,7 @@ #define IOMMU_POOL_HASHBITS 4 #define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) +#define IOMMU_ERROR_CODE (~(unsigned long) 0) struct iommu_pool { unsigned long start; diff --git a/lib/iommu-common.c b/lib/iommu-common.c index b1c93e94ca7a..858dc1aae478 100644 --- a/lib/iommu-common.c +++ b/lib/iommu-common.c @@ -11,10 +11,6 @@ #include #include -#ifndef DMA_ERROR_CODE -#define DMA_ERROR_CODE (~(dma_addr_t)0x0) -#endif - static unsigned long iommu_large_alloc = 15; static DEFINE_PER_CPU(unsigned int, iommu_hash_common); @@ -123,7 +119,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, /* Sanity check */ if (unlikely(npages == 0)) { WARN_ON_ONCE(1); - return DMA_ERROR_CODE; + return IOMMU_ERROR_CODE; } if (largealloc) { @@ -206,7 +202,7 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, goto again; } else { /* give up */ - n = DMA_ERROR_CODE; + n = IOMMU_ERROR_CODE; goto bail; } } @@ -259,7 +255,7 @@ void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, unsigned long flags; unsigned long shift = iommu->table_shift; - if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ + if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */ entry = (dma_addr - iommu->table_map_base) >> shift; pool = get_pool(iommu, entry);