MIPS: Add Cavium OCTEON processor support files to arch/mips/cavium-octeon.

These are the rest of the new files needed to add OCTEON processor support to the Linux kernel. Other than Makefile and Kconfig which should be obvious, we have: csrc-octeon.c -- Clock source driver for OCTEON. dma-octeon.c -- Helper functions for mapping DMA memory. flash_setup.c -- Register on-board flash with the MTD subsystem. octeon-irq.c -- OCTEON interrupt controller managment. octeon-memcpy.S -- Optimized memcpy() implementation. serial.c -- Register 8250 platform driver and early console. setup.c -- Early architecture initialization. smp.c -- OCTEON SMP support. octeon_switch.S -- Scheduler context switch for OCTEON. c-octeon.c -- OCTEON cache controller support. cex-oct.S -- OCTEON cache exception handler. asm/mach-cavium-octeon/*.h -- Architecture include files. Signed-off-by: Tomaso Paoletti <tpaoletti@caviumnetworks.com> Signed-off-by: David Daney <ddaney@caviumnetworks.com> Signed-off-by: Ralf Baechle <ralf@linux-mips.org> create mode 100644 arch/mips/cavium-octeon/Kconfig create mode 100644 arch/mips/cavium-octeon/Makefile create mode 100644 arch/mips/cavium-octeon/csrc-octeon.c create mode 100644 arch/mips/cavium-octeon/dma-octeon.c create mode 100644 arch/mips/cavium-octeon/flash_setup.c create mode 100644 arch/mips/cavium-octeon/octeon-irq.c create mode 100644 arch/mips/cavium-octeon/octeon-memcpy.S create mode 100644 arch/mips/cavium-octeon/serial.c create mode 100644 arch/mips/cavium-octeon/setup.c create mode 100644 arch/mips/cavium-octeon/smp.c create mode 100644 arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h create mode 100644 arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h create mode 100644 arch/mips/include/asm/mach-cavium-octeon/irq.h create mode 100644 arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h create mode 100644 arch/mips/include/asm/mach-cavium-octeon/war.h create mode 100644 arch/mips/include/asm/octeon/octeon.h create mode 100644 arch/mips/kernel/octeon_switch.S create mode 100644 arch/mips/mm/c-octeon.c create mode 100644 arch/mips/mm/cex-oct.S
2024-11-18 23:54:26 +08:00 · 2009-01-08 16:46:40 -08:00 · 2009-01-08 16:46:40 -08:00 · 5b3b16880f
commit 5b3b16880f
parent 58f07778ce
19 changed files with 4243 additions and 0 deletions
--- a/arch/mips/cavium-octeon/Kconfig
+++ b/arch/mips/cavium-octeon/Kconfig
@ -0,0 +1,85 @@
+config CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	bool "Enable Octeon specific options"
+	depends on CPU_CAVIUM_OCTEON
+	default "y"
+
+config CAVIUM_OCTEON_2ND_KERNEL
+	bool "Build the kernel to be used as a 2nd kernel on the same chip"
+	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	default "n"
+	help
+	  This option configures this kernel to be linked at a different
+	  address and use the 2nd uart for output. This allows a kernel built
+	  with this option to be run at the same time as one built without this
+	  option.
+
+config CAVIUM_OCTEON_HW_FIX_UNALIGNED
+	bool "Enable hardware fixups of unaligned loads and stores"
+	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	default "y"
+	help
+	  Configure the Octeon hardware to automatically fix unaligned loads
+	  and stores. Normally unaligned accesses are fixed using a kernel
+	  exception handler. This option enables the hardware automatic fixups,
+	  which requires only an extra 3 cycles. Disable this option if you
+	  are running code that relies on address exceptions on unaligned
+	  accesses.
+
+config CAVIUM_OCTEON_CVMSEG_SIZE
+	int "Number of L1 cache lines reserved for CVMSEG memory"
+	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	range 0 54
+	default 1
+	help
+	  CVMSEG LM is a segment that accesses portions of the dcache as a
+	  local memory; the larger CVMSEG is, the smaller the cache is.
+	  This selects the size of CVMSEG LM, which is in cache blocks. The
+	  legally range is from zero to 54 cache blocks (i.e. CVMSEG LM is
+	  between zero and 6192 bytes).
+
+config CAVIUM_OCTEON_LOCK_L2
+	bool "Lock often used kernel code in the L2"
+	depends on CAVIUM_OCTEON_SPECIFIC_OPTIONS
+	default "y"
+	help
+	  Enable locking parts of the kernel into the L2 cache.
+
+config CAVIUM_OCTEON_LOCK_L2_TLB
+	bool "Lock the TLB handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level TLB fast path into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_EXCEPTION
+	bool "Lock the exception handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level exception handler into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT
+	bool "Lock the interrupt handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the low level interrupt handler into L2.
+
+config CAVIUM_OCTEON_LOCK_L2_INTERRUPT
+	bool "Lock the 2nd level interrupt handler in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the 2nd level interrupt handler in L2.
+
+config CAVIUM_OCTEON_LOCK_L2_MEMCPY
+	bool "Lock memcpy() in L2"
+	depends on CAVIUM_OCTEON_LOCK_L2
+	default "y"
+	help
+	  Lock the kernel's implementation of memcpy() into L2.
+
+config ARCH_SPARSEMEM_ENABLE
+	def_bool y
+	select SPARSEMEM_STATIC
+	depends on CPU_CAVIUM_OCTEON
--- a/arch/mips/cavium-octeon/Makefile
+++ b/arch/mips/cavium-octeon/Makefile
@ -0,0 +1,16 @@
+#
+# Makefile for the Cavium Octeon specific kernel interface routines
+# under Linux.
+#
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (C) 2005-2008 Cavium Networks
+#
+
+obj-y := setup.o serial.o octeon-irq.o csrc-octeon.o
+obj-y += dma-octeon.o flash_setup.o
+obj-y += octeon-memcpy.o
+
+obj-$(CONFIG_SMP)                     += smp.o
--- a/arch/mips/cavium-octeon/csrc-octeon.c
+++ b/arch/mips/cavium-octeon/csrc-octeon.c
@ -0,0 +1,58 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007 by Ralf Baechle
+ */
+#include <linux/clocksource.h>
+#include <linux/init.h>
+
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-ipd-defs.h>
+
+/*
+ * Set the current core's cvmcount counter to the value of the
+ * IPD_CLK_COUNT.  We do this on all cores as they are brought
+ * on-line.  This allows for a read from a local cpu register to
+ * access a synchronized counter.
+ *
+ */
+void octeon_init_cvmcount(void)
+{
+	unsigned long flags;
+	unsigned loops = 2;
+
+	/* Clobber loops so GCC will not unroll the following while loop. */
+	asm("" : "+r" (loops));
+
+	local_irq_save(flags);
+	/*
+	 * Loop several times so we are executing from the cache,
+	 * which should give more deterministic timing.
+	 */
+	while (loops--)
+		write_c0_cvmcount(cvmx_read_csr(CVMX_IPD_CLK_COUNT));
+	local_irq_restore(flags);
+}
+
+static cycle_t octeon_cvmcount_read(void)
+{
+	return read_c0_cvmcount();
+}
+
+static struct clocksource clocksource_mips = {
+	.name		= "OCTEON_CVMCOUNT",
+	.read		= octeon_cvmcount_read,
+	.mask		= CLOCKSOURCE_MASK(64),
+	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+void __init plat_time_init(void)
+{
+	clocksource_mips.rating = 300;
+	clocksource_set_clock(&clocksource_mips, mips_hpt_frequency);
+	clocksource_register(&clocksource_mips);
+}
--- a/arch/mips/cavium-octeon/dma-octeon.c
+++ b/arch/mips/cavium-octeon/dma-octeon.c
@ -0,0 +1,32 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000  Ani Joshi <ajoshi@unixbox.com>
+ * Copyright (C) 2000, 2001  Ralf Baechle <ralf@gnu.org>
+ * Copyright (C) 2005 Ilya A. Volynets-Evenbakh <ilya@total-knowledge.com>
+ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf.
+ * IP32 changes by Ilya.
+ * Cavium Networks: Create new dma setup for Cavium Networks Octeon based on
+ * the kernels original.
+ */
+#include <linux/types.h>
+#include <linux/mm.h>
+
+#include <dma-coherence.h>
+
+dma_addr_t octeon_map_dma_mem(struct device *dev, void *ptr, size_t size)
+{
+	/* Without PCI/PCIe this function can be called for Octeon internal
+	   devices such as USB. These devices all support 64bit addressing */
+	mb();
+	return virt_to_phys(ptr);
+}
+
+void octeon_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr)
+{
+	/* Without PCI/PCIe this function can be called for Octeon internal
+	 * devices such as USB. These devices all support 64bit addressing */
+	return;
+}
--- a/arch/mips/cavium-octeon/flash_setup.c
+++ b/arch/mips/cavium-octeon/flash_setup.c
@ -0,0 +1,84 @@
+/*
+ *   Octeon Bootbus flash setup
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2007, 2008 Cavium Networks
+ */
+#include <linux/kernel.h>
+#include <linux/mtd/mtd.h>
+#include <linux/mtd/map.h>
+#include <linux/mtd/partitions.h>
+
+#include <asm/octeon/octeon.h>
+
+static struct map_info flash_map;
+static struct mtd_info *mymtd;
+#ifdef CONFIG_MTD_PARTITIONS
+static int nr_parts;
+static struct mtd_partition *parts;
+static const char *part_probe_types[] = {
+	"cmdlinepart",
+#ifdef CONFIG_MTD_REDBOOT_PARTS
+	"RedBoot",
+#endif
+	NULL
+};
+#endif
+
+/**
+ * Module/ driver initialization.
+ *
+ * Returns Zero on success
+ */
+static int __init flash_init(void)
+{
+	/*
+	 * Read the bootbus region 0 setup to determine the base
+	 * address of the flash.
+	 */
+	union cvmx_mio_boot_reg_cfgx region_cfg;
+	region_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(0));
+	if (region_cfg.s.en) {
+		/*
+		 * The bootloader always takes the flash and sets its
+		 * address so the entire flash fits below
+		 * 0x1fc00000. This way the flash aliases to
+		 * 0x1fc00000 for booting. Software can access the
+		 * full flash at the true address, while core boot can
+		 * access 4MB.
+		 */
+		/* Use this name so old part lines work */
+		flash_map.name = "phys_mapped_flash";
+		flash_map.phys = region_cfg.s.base << 16;
+		flash_map.size = 0x1fc00000 - flash_map.phys;
+		flash_map.bankwidth = 1;
+		flash_map.virt = ioremap(flash_map.phys, flash_map.size);
+		pr_notice("Bootbus flash: Setting flash for %luMB flash at "
+			  "0x%08lx\n", flash_map.size >> 20, flash_map.phys);
+		simple_map_init(&flash_map);
+		mymtd = do_map_probe("cfi_probe", &flash_map);
+		if (mymtd) {
+			mymtd->owner = THIS_MODULE;
+
+#ifdef CONFIG_MTD_PARTITIONS
+			nr_parts = parse_mtd_partitions(mymtd,
+							part_probe_types,
+							&parts, 0);
+			if (nr_parts > 0)
+				add_mtd_partitions(mymtd, parts, nr_parts);
+			else
+				add_mtd_device(mymtd);
+#else
+			add_mtd_device(mymtd);
+#endif
+		} else {
+			pr_err("Failed to register MTD device for flash\n");
+		}
+	}
+	return 0;
+}
+
+late_initcall(flash_init);
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@ -0,0 +1,497 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Cavium Networks
+ */
+#include <linux/irq.h>
+#include <linux/interrupt.h>
+#include <linux/hardirq.h>
+
+#include <asm/octeon/octeon.h>
+
+DEFINE_RWLOCK(octeon_irq_ciu0_rwlock);
+DEFINE_RWLOCK(octeon_irq_ciu1_rwlock);
+DEFINE_SPINLOCK(octeon_irq_msi_lock);
+
+static void octeon_irq_core_ack(unsigned int irq)
+{
+	unsigned int bit = irq - OCTEON_IRQ_SW0;
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	clear_c0_status(0x100 << bit);
+	/* The two user interrupts must be cleared manually. */
+	if (bit < 2)
+		clear_c0_cause(0x100 << bit);
+}
+
+static void octeon_irq_core_eoi(unsigned int irq)
+{
+	irq_desc_t *desc = irq_desc + irq;
+	unsigned int bit = irq - OCTEON_IRQ_SW0;
+	/*
+	 * If an IRQ is being processed while we are disabling it the
+	 * handler will attempt to unmask the interrupt after it has
+	 * been disabled.
+	 */
+	if (desc->status & IRQ_DISABLED)
+		return;
+
+	/* There is a race here.  We should fix it.  */
+
+	/*
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	set_c0_status(0x100 << bit);
+}
+
+static void octeon_irq_core_enable(unsigned int irq)
+{
+	unsigned long flags;
+	unsigned int bit = irq - OCTEON_IRQ_SW0;
+
+	/*
+	 * We need to disable interrupts to make sure our updates are
+	 * atomic.
+	 */
+	local_irq_save(flags);
+	set_c0_status(0x100 << bit);
+	local_irq_restore(flags);
+}
+
+static void octeon_irq_core_disable_local(unsigned int irq)
+{
+	unsigned long flags;
+	unsigned int bit = irq - OCTEON_IRQ_SW0;
+	/*
+	 * We need to disable interrupts to make sure our updates are
+	 * atomic.
+	 */
+	local_irq_save(flags);
+	clear_c0_status(0x100 << bit);
+	local_irq_restore(flags);
+}
+
+static void octeon_irq_core_disable(unsigned int irq)
+{
+#ifdef CONFIG_SMP
+	on_each_cpu((void (*)(void *)) octeon_irq_core_disable_local,
+		    (void *) (long) irq, 1);
+#else
+	octeon_irq_core_disable_local(irq);
+#endif
+}
+
+static struct irq_chip octeon_irq_chip_core = {
+	.name = "Core",
+	.enable = octeon_irq_core_enable,
+	.disable = octeon_irq_core_disable,
+	.ack = octeon_irq_core_ack,
+	.eoi = octeon_irq_core_eoi,
+};
+
+
+static void octeon_irq_ciu0_ack(unsigned int irq)
+{
+	/*
+	 * In order to avoid any locking accessing the CIU, we
+	 * acknowledge CIU interrupts by disabling all of them.  This
+	 * way we can use a per core register and avoid any out of
+	 * core locking requirements.  This has the side affect that
+	 * CIU interrupts can't be processed recursively.
+	 *
+	 * We don't need to disable IRQs to make these atomic since
+	 * they are already disabled earlier in the low level
+	 * interrupt code.
+	 */
+	clear_c0_status(0x100 << 2);
+}
+
+static void octeon_irq_ciu0_eoi(unsigned int irq)
+{
+	/*
+	 * Enable all CIU interrupts again.  We don't need to disable
+	 * IRQs to make these atomic since they are already disabled
+	 * earlier in the low level interrupt code.
+	 */
+	set_c0_status(0x100 << 2);
+}
+
+static void octeon_irq_ciu0_enable(unsigned int irq)
+{
+	int coreid = cvmx_get_core_num();
+	unsigned long flags;
+	uint64_t en0;
+	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
+
+	/*
+	 * A read lock is used here to make sure only one core is ever
+	 * updating the CIU enable bits at a time. During an enable
+	 * the cores don't interfere with each other. During a disable
+	 * the write lock stops any enables that might cause a
+	 * problem.
+	 */
+	read_lock_irqsave(&octeon_irq_ciu0_rwlock, flags);
+	en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+	en0 |= 1ull << bit;
+	cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0);
+	cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+	read_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags);
+}
+
+static void octeon_irq_ciu0_disable(unsigned int irq)
+{
+	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
+	unsigned long flags;
+	uint64_t en0;
+#ifdef CONFIG_SMP
+	int cpu;
+	write_lock_irqsave(&octeon_irq_ciu0_rwlock, flags);
+	for_each_online_cpu(cpu) {
+		int coreid = cpu_logical_map(cpu);
+		en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+		en0 &= ~(1ull << bit);
+		cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0);
+	}
+	/*
+	 * We need to do a read after the last update to make sure all
+	 * of them are done.
+	 */
+	cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
+	write_unlock_irqrestore(&octeon_irq_ciu0_rwlock, flags);
+#else
+	int coreid = cvmx_get_core_num();
+	local_irq_save(flags);
+	en0 = cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+	en0 &= ~(1ull << bit);
+	cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0);
+	cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+	local_irq_restore(flags);
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void octeon_irq_ciu0_set_affinity(unsigned int irq, const struct cpumask *dest)
+{
+	int cpu;
+	int bit = irq - OCTEON_IRQ_WORKQ0;	/* Bit 0-63 of EN0 */
+
+	write_lock(&octeon_irq_ciu0_rwlock);
+	for_each_online_cpu(cpu) {
+		int coreid = cpu_logical_map(cpu);
+		uint64_t en0 =
+			cvmx_read_csr(CVMX_CIU_INTX_EN0(coreid * 2));
+		if (cpumask_test_cpu(cpu, dest))
+			en0 |= 1ull << bit;
+		else
+			en0 &= ~(1ull << bit);
+		cvmx_write_csr(CVMX_CIU_INTX_EN0(coreid * 2), en0);
+	}
+	/*
+	 * We need to do a read after the last update to make sure all
+	 * of them are done.
+	 */
+	cvmx_read_csr(CVMX_CIU_INTX_EN0(cvmx_get_core_num() * 2));
+	write_unlock(&octeon_irq_ciu0_rwlock);
+}
+#endif
+
+static struct irq_chip octeon_irq_chip_ciu0 = {
+	.name = "CIU0",
+	.enable = octeon_irq_ciu0_enable,
+	.disable = octeon_irq_ciu0_disable,
+	.ack = octeon_irq_ciu0_ack,
+	.eoi = octeon_irq_ciu0_eoi,
+#ifdef CONFIG_SMP
+	.set_affinity = octeon_irq_ciu0_set_affinity,
+#endif
+};
+
+
+static void octeon_irq_ciu1_ack(unsigned int irq)
+{
+	/*
+	 * In order to avoid any locking accessing the CIU, we
+	 * acknowledge CIU interrupts by disabling all of them.  This
+	 * way we can use a per core register and avoid any out of
+	 * core locking requirements.  This has the side affect that
+	 * CIU interrupts can't be processed recursively.  We don't
+	 * need to disable IRQs to make these atomic since they are
+	 * already disabled earlier in the low level interrupt code.
+	 */
+	clear_c0_status(0x100 << 3);
+}
+
+static void octeon_irq_ciu1_eoi(unsigned int irq)
+{
+	/*
+	 * Enable all CIU interrupts again.  We don't need to disable
+	 * IRQs to make these atomic since they are already disabled
+	 * earlier in the low level interrupt code.
+	 */
+	set_c0_status(0x100 << 3);
+}
+
+static void octeon_irq_ciu1_enable(unsigned int irq)
+{
+	int coreid = cvmx_get_core_num();
+	unsigned long flags;
+	uint64_t en1;
+	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
+
+	/*
+	 * A read lock is used here to make sure only one core is ever
+	 * updating the CIU enable bits at a time.  During an enable
+	 * the cores don't interfere with each other.  During a disable
+	 * the write lock stops any enables that might cause a
+	 * problem.
+	 */
+	read_lock_irqsave(&octeon_irq_ciu1_rwlock, flags);
+	en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1));
+	en1 |= 1ull << bit;
+	cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1);
+	cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1));
+	read_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags);
+}
+
+static void octeon_irq_ciu1_disable(unsigned int irq)
+{
+	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
+	unsigned long flags;
+	uint64_t en1;
+#ifdef CONFIG_SMP
+	int cpu;
+	write_lock_irqsave(&octeon_irq_ciu1_rwlock, flags);
+	for_each_online_cpu(cpu) {
+		int coreid = cpu_logical_map(cpu);
+		en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1));
+		en1 &= ~(1ull << bit);
+		cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1);
+	}
+	/*
+	 * We need to do a read after the last update to make sure all
+	 * of them are done.
+	 */
+	cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
+	write_unlock_irqrestore(&octeon_irq_ciu1_rwlock, flags);
+#else
+	int coreid = cvmx_get_core_num();
+	local_irq_save(flags);
+	en1 = cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1));
+	en1 &= ~(1ull << bit);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1);
+	cvmx_read_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1));
+	local_irq_restore(flags);
+#endif
+}
+
+#ifdef CONFIG_SMP
+static void octeon_irq_ciu1_set_affinity(unsigned int irq, const struct cpumask *dest)
+{
+	int cpu;
+	int bit = irq - OCTEON_IRQ_WDOG0;	/* Bit 0-63 of EN1 */
+
+	write_lock(&octeon_irq_ciu1_rwlock);
+	for_each_online_cpu(cpu) {
+		int coreid = cpu_logical_map(cpu);
+		uint64_t en1 =
+			cvmx_read_csr(CVMX_CIU_INTX_EN1
+				(coreid * 2 + 1));
+		if (cpumask_test_cpu(cpu, dest))
+			en1 |= 1ull << bit;
+		else
+			en1 &= ~(1ull << bit);
+		cvmx_write_csr(CVMX_CIU_INTX_EN1(coreid * 2 + 1), en1);
+	}
+	/*
+	 * We need to do a read after the last update to make sure all
+	 * of them are done.
+	 */
+	cvmx_read_csr(CVMX_CIU_INTX_EN1(cvmx_get_core_num() * 2 + 1));
+	write_unlock(&octeon_irq_ciu1_rwlock);
+}
+#endif
+
+static struct irq_chip octeon_irq_chip_ciu1 = {
+	.name = "CIU1",
+	.enable = octeon_irq_ciu1_enable,
+	.disable = octeon_irq_ciu1_disable,
+	.ack = octeon_irq_ciu1_ack,
+	.eoi = octeon_irq_ciu1_eoi,
+#ifdef CONFIG_SMP
+	.set_affinity = octeon_irq_ciu1_set_affinity,
+#endif
+};
+
+#ifdef CONFIG_PCI_MSI
+
+static void octeon_irq_msi_ack(unsigned int irq)
+{
+	if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) {
+		/* These chips have PCI */
+		cvmx_write_csr(CVMX_NPI_NPI_MSI_RCV,
+			       1ull << (irq - OCTEON_IRQ_MSI_BIT0));
+	} else {
+		/*
+		 * These chips have PCIe. Thankfully the ACK doesn't
+		 * need any locking.
+		 */
+		cvmx_write_csr(CVMX_PEXP_NPEI_MSI_RCV0,
+			       1ull << (irq - OCTEON_IRQ_MSI_BIT0));
+	}
+}
+
+static void octeon_irq_msi_eoi(unsigned int irq)
+{
+	/* Nothing needed */
+}
+
+static void octeon_irq_msi_enable(unsigned int irq)
+{
+	if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) {
+		/*
+		 * Octeon PCI doesn't have the ability to mask/unmask
+		 * MSI interrupts individually.  Instead of
+		 * masking/unmasking them in groups of 16, we simple
+		 * assume MSI devices are well behaved.  MSI
+		 * interrupts are always enable and the ACK is assumed
+		 * to be enough.
+		 */
+	} else {
+		/* These chips have PCIe.  Note that we only support
+		 * the first 64 MSI interrupts.  Unfortunately all the
+		 * MSI enables are in the same register.  We use
+		 * MSI0's lock to control access to them all.
+		 */
+		uint64_t en;
+		unsigned long flags;
+		spin_lock_irqsave(&octeon_irq_msi_lock, flags);
+		en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0);
+		en |= 1ull << (irq - OCTEON_IRQ_MSI_BIT0);
+		cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en);
+		cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0);
+		spin_unlock_irqrestore(&octeon_irq_msi_lock, flags);
+	}
+}
+
+static void octeon_irq_msi_disable(unsigned int irq)
+{
+	if (!octeon_has_feature(OCTEON_FEATURE_PCIE)) {
+		/* See comment in enable */
+	} else {
+		/*
+		 * These chips have PCIe.  Note that we only support
+		 * the first 64 MSI interrupts.  Unfortunately all the
+		 * MSI enables are in the same register.  We use
+		 * MSI0's lock to control access to them all.
+		 */
+		uint64_t en;
+		unsigned long flags;
+		spin_lock_irqsave(&octeon_irq_msi_lock, flags);
+		en = cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0);
+		en &= ~(1ull << (irq - OCTEON_IRQ_MSI_BIT0));
+		cvmx_write_csr(CVMX_PEXP_NPEI_MSI_ENB0, en);
+		cvmx_read_csr(CVMX_PEXP_NPEI_MSI_ENB0);
+		spin_unlock_irqrestore(&octeon_irq_msi_lock, flags);
+	}
+}
+
+static struct irq_chip octeon_irq_chip_msi = {
+	.name = "MSI",
+	.enable = octeon_irq_msi_enable,
+	.disable = octeon_irq_msi_disable,
+	.ack = octeon_irq_msi_ack,
+	.eoi = octeon_irq_msi_eoi,
+};
+#endif
+
+void __init arch_init_irq(void)
+{
+	int irq;
+
+#ifdef CONFIG_SMP
+	/* Set the default affinity to the boot cpu. */
+	cpumask_clear(irq_default_affinity);
+	cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+#endif
+
+	if (NR_IRQS < OCTEON_IRQ_LAST)
+		pr_err("octeon_irq_init: NR_IRQS is set too low\n");
+
+	/* 0 - 15 reserved for i8259 master and slave controller. */
+
+	/* 17 - 23 Mips internal */
+	for (irq = OCTEON_IRQ_SW0; irq <= OCTEON_IRQ_TIMER; irq++) {
+		set_irq_chip_and_handler(irq, &octeon_irq_chip_core,
+					 handle_percpu_irq);
+	}
+
+	/* 24 - 87 CIU_INT_SUM0 */
+	for (irq = OCTEON_IRQ_WORKQ0; irq <= OCTEON_IRQ_BOOTDMA; irq++) {
+		set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu0,
+					 handle_percpu_irq);
+	}
+
+	/* 88 - 151 CIU_INT_SUM1 */
+	for (irq = OCTEON_IRQ_WDOG0; irq <= OCTEON_IRQ_RESERVED151; irq++) {
+		set_irq_chip_and_handler(irq, &octeon_irq_chip_ciu1,
+					 handle_percpu_irq);
+	}
+
+#ifdef CONFIG_PCI_MSI
+	/* 152 - 215 PCI/PCIe MSI interrupts */
+	for (irq = OCTEON_IRQ_MSI_BIT0; irq <= OCTEON_IRQ_MSI_BIT63; irq++) {
+		set_irq_chip_and_handler(irq, &octeon_irq_chip_msi,
+					 handle_percpu_irq);
+	}
+#endif
+	set_c0_status(0x300 << 2);
+}
+
+asmlinkage void plat_irq_dispatch(void)
+{
+	const unsigned long core_id = cvmx_get_core_num();
+	const uint64_t ciu_sum0_address = CVMX_CIU_INTX_SUM0(core_id * 2);
+	const uint64_t ciu_en0_address = CVMX_CIU_INTX_EN0(core_id * 2);
+	const uint64_t ciu_sum1_address = CVMX_CIU_INT_SUM1;
+	const uint64_t ciu_en1_address = CVMX_CIU_INTX_EN1(core_id * 2 + 1);
+	unsigned long cop0_cause;
+	unsigned long cop0_status;
+	uint64_t ciu_en;
+	uint64_t ciu_sum;
+
+	while (1) {
+		cop0_cause = read_c0_cause();
+		cop0_status = read_c0_status();
+		cop0_cause &= cop0_status;
+		cop0_cause &= ST0_IM;
+
+		if (unlikely(cop0_cause & STATUSF_IP2)) {
+			ciu_sum = cvmx_read_csr(ciu_sum0_address);
+			ciu_en = cvmx_read_csr(ciu_en0_address);
+			ciu_sum &= ciu_en;
+			if (likely(ciu_sum))
+				do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WORKQ0 - 1);
+			else
+				spurious_interrupt();
+		} else if (unlikely(cop0_cause & STATUSF_IP3)) {
+			ciu_sum = cvmx_read_csr(ciu_sum1_address);
+			ciu_en = cvmx_read_csr(ciu_en1_address);
+			ciu_sum &= ciu_en;
+			if (likely(ciu_sum))
+				do_IRQ(fls64(ciu_sum) + OCTEON_IRQ_WDOG0 - 1);
+			else
+				spurious_interrupt();
+		} else if (likely(cop0_cause)) {
+			do_IRQ(fls(cop0_cause) - 9 + MIPS_CPU_IRQ_BASE);
+		} else {
+			break;
+		}
+	}
+}
--- a/arch/mips/cavium-octeon/octeon-memcpy.S
+++ b/arch/mips/cavium-octeon/octeon-memcpy.S
@ -0,0 +1,521 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Unified implementation of memcpy, memmove and the __copy_user backend.
+ *
+ * Copyright (C) 1998, 99, 2000, 01, 2002 Ralf Baechle (ralf@gnu.org)
+ * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
+ * Copyright (C) 2002 Broadcom, Inc.
+ *   memcpy/copy_user author: Mark Vandevoorde
+ *
+ * Mnemonic names for arguments to memcpy/__copy_user
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+#define dst a0
+#define src a1
+#define len a2
+
+/*
+ * Spec
+ *
+ * memcpy copies len bytes from src to dst and sets v0 to dst.
+ * It assumes that
+ *   - src and dst don't overlap
+ *   - src is readable
+ *   - dst is writable
+ * memcpy uses the standard calling convention
+ *
+ * __copy_user copies up to len bytes from src to dst and sets a2 (len) to
+ * the number of uncopied bytes due to an exception caused by a read or write.
+ * __copy_user assumes that src and dst don't overlap, and that the call is
+ * implementing one of the following:
+ *   copy_to_user
+ *     - src is readable  (no exceptions when reading src)
+ *   copy_from_user
+ *     - dst is writable  (no exceptions when writing dst)
+ * __copy_user uses a non-standard calling convention; see
+ * arch/mips/include/asm/uaccess.h
+ *
+ * When an exception happens on a load, the handler must
+ # ensure that all of the destination buffer is overwritten to prevent
+ * leaking information to user mode programs.
+ */
+
+/*
+ * Implementation
+ */
+
+/*
+ * The exception handler for loads requires that:
+ *  1- AT contain the address of the byte just past the end of the source
+ *     of the copy,
+ *  2- src_entry <= src < AT, and
+ *  3- (dst - src) == (dst_entry - src_entry),
+ * The _entry suffix denotes values when __copy_user was called.
+ *
+ * (1) is set up up by uaccess.h and maintained by not writing AT in copy_user
+ * (2) is met by incrementing src by the number of bytes copied
+ * (3) is met by not doing loads between a pair of increments of dst and src
+ *
+ * The exception handlers for stores adjust len (if necessary) and return.
+ * These handlers do not need to overwrite any data.
+ *
+ * For __rmemcpy and memmove an exception is always a kernel bug, therefore
+ * they're not protected.
+ */
+
+#define EXC(inst_reg,addr,handler)		\
+9:	inst_reg, addr;				\
+	.section __ex_table,"a";		\
+	PTR	9b, handler;			\
+	.previous
+
+/*
+ * Only on the 64-bit kernel we can made use of 64-bit registers.
+ */
+#ifdef CONFIG_64BIT
+#define USE_DOUBLE
+#endif
+
+#ifdef USE_DOUBLE
+
+#define LOAD   ld
+#define LOADL  ldl
+#define LOADR  ldr
+#define STOREL sdl
+#define STORER sdr
+#define STORE  sd
+#define ADD    daddu
+#define SUB    dsubu
+#define SRL    dsrl
+#define SRA    dsra
+#define SLL    dsll
+#define SLLV   dsllv
+#define SRLV   dsrlv
+#define NBYTES 8
+#define LOG_NBYTES 3
+
+/*
+ * As we are sharing code base with the mips32 tree (which use the o32 ABI
+ * register definitions). We need to redefine the register definitions from
+ * the n64 ABI register naming to the o32 ABI register naming.
+ */
+#undef t0
+#undef t1
+#undef t2
+#undef t3
+#define t0	$8
+#define t1	$9
+#define t2	$10
+#define t3	$11
+#define t4	$12
+#define t5	$13
+#define t6	$14
+#define t7	$15
+
+#else
+
+#define LOAD   lw
+#define LOADL  lwl
+#define LOADR  lwr
+#define STOREL swl
+#define STORER swr
+#define STORE  sw
+#define ADD    addu
+#define SUB    subu
+#define SRL    srl
+#define SLL    sll
+#define SRA    sra
+#define SLLV   sllv
+#define SRLV   srlv
+#define NBYTES 4
+#define LOG_NBYTES 2
+
+#endif /* USE_DOUBLE */
+
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+#define LDFIRST LOADR
+#define LDREST  LOADL
+#define STFIRST STORER
+#define STREST  STOREL
+#define SHIFT_DISCARD SLLV
+#else
+#define LDFIRST LOADL
+#define LDREST  LOADR
+#define STFIRST STOREL
+#define STREST  STORER
+#define SHIFT_DISCARD SRLV
+#endif
+
+#define FIRST(unit) ((unit)*NBYTES)
+#define REST(unit)  (FIRST(unit)+NBYTES-1)
+#define UNIT(unit)  FIRST(unit)
+
+#define ADDRMASK (NBYTES-1)
+
+	.text
+	.set	noreorder
+	.set	noat
+
+/*
+ * A combined memcpy/__copy_user
+ * __copy_user sets len to 0 for success; else to an upper bound of
+ * the number of uncopied bytes.
+ * memcpy sets v0 to dst.
+ */
+	.align	5
+LEAF(memcpy)					/* a0=dst a1=src a2=len */
+	move	v0, dst				/* return value */
+__memcpy:
+FEXPORT(__copy_user)
+	/*
+	 * Note: dst & src may be unaligned, len may be 0
+	 * Temps
+	 */
+	#
+	# Octeon doesn't care if the destination is unaligned. The hardware
+	# can fix it faster than we can special case the assembly.
+	#
+	pref	0, 0(src)
+	sltu	t0, len, NBYTES		# Check if < 1 word
+	bnez	t0, copy_bytes_checklen
+	 and	t0, src, ADDRMASK	# Check if src unaligned
+	bnez	t0, src_unaligned
+	 sltu	t0, len, 4*NBYTES	# Check if < 4 words
+	bnez	t0, less_than_4units
+	 sltu	t0, len, 8*NBYTES	# Check if < 8 words
+	bnez	t0, less_than_8units
+	 sltu	t0, len, 16*NBYTES	# Check if < 16 words
+	bnez	t0, cleanup_both_aligned
+	 sltu	t0, len, 128+1		# Check if len < 129
+	bnez	t0, 1f			# Skip prefetch if len is too short
+	 sltu	t0, len, 256+1		# Check if len < 257
+	bnez	t0, 1f			# Skip prefetch if len is too short
+	 pref	0, 128(src)		# We must not prefetch invalid addresses
+	#
+	# This is where we loop if there is more than 128 bytes left
+2:	pref	0, 256(src)		# We must not prefetch invalid addresses
+	#
+	# This is where we loop if we can't prefetch anymore
+1:
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 16*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p16u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p15u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p14u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p13u)
+EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p12u)
+EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p11u)
+EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p10u)
+	ADD	src, src, 16*NBYTES
+EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p9u)
+	ADD	dst, dst, 16*NBYTES
+EXC(	LOAD	t0, UNIT(-8)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(-7)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(-6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(-5)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(-8)(dst),	s_exc_p8u)
+EXC(	STORE	t1, UNIT(-7)(dst),	s_exc_p7u)
+EXC(	STORE	t2, UNIT(-6)(dst),	s_exc_p6u)
+EXC(	STORE	t3, UNIT(-5)(dst),	s_exc_p5u)
+EXC(	LOAD	t0, UNIT(-4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(-3)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(-2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(-1)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(-4)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(-3)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(-2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(-1)(dst),	s_exc_p1u)
+	sltu	t0, len, 256+1		# See if we can prefetch more
+	beqz	t0, 2b
+	 sltu	t0, len, 128		# See if we can loop more time
+	beqz	t0, 1b
+	 nop
+	#
+	# Jump here if there are less than 16*NBYTES left.
+	#
+cleanup_both_aligned:
+	beqz	len, done
+	 sltu	t0, len, 8*NBYTES
+	bnez	t0, less_than_8units
+	 nop
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 8*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p8u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p7u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p6u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p5u)
+EXC(	LOAD	t0, UNIT(4)(src),	l_exc_copy)
+EXC(	LOAD	t1, UNIT(5)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(6)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(7)(src),	l_exc_copy)
+EXC(	STORE	t0, UNIT(4)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(5)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(6)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(7)(dst),	s_exc_p1u)
+	ADD	src, src, 8*NBYTES
+	beqz	len, done
+	 ADD	dst, dst, 8*NBYTES
+	#
+	# Jump here if there are less than 8*NBYTES left.
+	#
+less_than_8units:
+	sltu	t0, len, 4*NBYTES
+	bnez	t0, less_than_4units
+	 nop
+EXC(	LOAD	t0, UNIT(0)(src),	l_exc)
+EXC(	LOAD	t1, UNIT(1)(src),	l_exc_copy)
+EXC(	LOAD	t2, UNIT(2)(src),	l_exc_copy)
+EXC(	LOAD	t3, UNIT(3)(src),	l_exc_copy)
+	SUB	len, len, 4*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+	ADD	src, src, 4*NBYTES
+	beqz	len, done
+	 ADD	dst, dst, 4*NBYTES
+	#
+	# Jump here if there are less than 4*NBYTES left. This means
+	# we may need to copy up to 3 NBYTES words.
+	#
+less_than_4units:
+	sltu	t0, len, 1*NBYTES
+	bnez	t0, copy_bytes_checklen
+	 nop
+	#
+	# 1) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	sltu	t1, len, 8
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bnez	t1, copy_bytes_checklen
+	 ADD	dst, dst, NBYTES
+	#
+	# 2) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	sltu	t1, len, 8
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bnez	t1, copy_bytes_checklen
+	 ADD	dst, dst, NBYTES
+	#
+	# 3) Copy NBYTES, then check length again
+	#
+EXC(	LOAD	t0, 0(src),		l_exc)
+	SUB	len, len, NBYTES
+	ADD	src, src, NBYTES
+	ADD	dst, dst, NBYTES
+	b copy_bytes_checklen
+EXC(	 STORE	t0, -8(dst),		s_exc_p1u)
+
+src_unaligned:
+#define rem t8
+	SRL	t0, len, LOG_NBYTES+2    # +2 for 4 units/iter
+	beqz	t0, cleanup_src_unaligned
+	 and	rem, len, (4*NBYTES-1)   # rem = len % 4*NBYTES
+1:
+/*
+ * Avoid consecutive LD*'s to the same register since some mips
+ * implementations can't issue them in the same cycle.
+ * It's OK to load FIRST(N+1) before REST(N) because the two addresses
+ * are to the same unit (unless src is aligned, but it's not).
+ */
+EXC(	LDFIRST	t0, FIRST(0)(src),	l_exc)
+EXC(	LDFIRST	t1, FIRST(1)(src),	l_exc_copy)
+	SUB     len, len, 4*NBYTES
+EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+EXC(	LDREST	t1, REST(1)(src),	l_exc_copy)
+EXC(	LDFIRST	t2, FIRST(2)(src),	l_exc_copy)
+EXC(	LDFIRST	t3, FIRST(3)(src),	l_exc_copy)
+EXC(	LDREST	t2, REST(2)(src),	l_exc_copy)
+EXC(	LDREST	t3, REST(3)(src),	l_exc_copy)
+	ADD	src, src, 4*NBYTES
+EXC(	STORE	t0, UNIT(0)(dst),	s_exc_p4u)
+EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
+EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
+EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
+	bne	len, rem, 1b
+	 ADD	dst, dst, 4*NBYTES
+
+cleanup_src_unaligned:
+	beqz	len, done
+	 and	rem, len, NBYTES-1  # rem = len % NBYTES
+	beq	rem, len, copy_bytes
+	 nop
+1:
+EXC(	LDFIRST t0, FIRST(0)(src),	l_exc)
+EXC(	LDREST	t0, REST(0)(src),	l_exc_copy)
+	SUB	len, len, NBYTES
+EXC(	STORE	t0, 0(dst),		s_exc_p1u)
+	ADD	src, src, NBYTES
+	bne	len, rem, 1b
+	 ADD	dst, dst, NBYTES
+
+copy_bytes_checklen:
+	beqz	len, done
+	 nop
+copy_bytes:
+	/* 0 < len < NBYTES  */
+#define COPY_BYTE(N)			\
+EXC(	lb	t0, N(src), l_exc);	\
+	SUB	len, len, 1;		\
+	beqz	len, done;		\
+EXC(	 sb	t0, N(dst), s_exc_p1)
+
+	COPY_BYTE(0)
+	COPY_BYTE(1)
+#ifdef USE_DOUBLE
+	COPY_BYTE(2)
+	COPY_BYTE(3)
+	COPY_BYTE(4)
+	COPY_BYTE(5)
+#endif
+EXC(	lb	t0, NBYTES-2(src), l_exc)
+	SUB	len, len, 1
+	jr	ra
+EXC(	 sb	t0, NBYTES-2(dst), s_exc_p1)
+done:
+	jr	ra
+	 nop
+	END(memcpy)
+
+l_exc_copy:
+	/*
+	 * Copy bytes from src until faulting load address (or until a
+	 * lb faults)
+	 *
+	 * When reached by a faulting LDFIRST/LDREST, THREAD_BUADDR($28)
+	 * may be more than a byte beyond the last address.
+	 * Hence, the lb below may get an exception.
+	 *
+	 * Assumes src < THREAD_BUADDR($28)
+	 */
+	LOAD	t0, TI_TASK($28)
+	 nop
+	LOAD	t0, THREAD_BUADDR(t0)
+1:
+EXC(	lb	t1, 0(src),	l_exc)
+	ADD	src, src, 1
+	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
+	bne	src, t0, 1b
+	 ADD	dst, dst, 1
+l_exc:
+	LOAD	t0, TI_TASK($28)
+	 nop
+	LOAD	t0, THREAD_BUADDR(t0)	# t0 is just past last good address
+	 nop
+	SUB	len, AT, t0		# len number of uncopied bytes
+	/*
+	 * Here's where we rely on src and dst being incremented in tandem,
+	 *   See (3) above.
+	 * dst += (fault addr - src) to put dst at first byte to clear
+	 */
+	ADD	dst, t0			# compute start address in a1
+	SUB	dst, src
+	/*
+	 * Clear len bytes starting at dst.  Can't call __bzero because it
+	 * might modify len.  An inefficient loop for these rare times...
+	 */
+	beqz	len, done
+	 SUB	src, len, 1
+1:	sb	zero, 0(dst)
+	ADD	dst, dst, 1
+	bnez	src, 1b
+	 SUB	src, src, 1
+	jr	ra
+	 nop
+
+
+#define SEXC(n)				\
+s_exc_p ## n ## u:			\
+	jr	ra;			\
+	 ADD	len, len, n*NBYTES
+
+SEXC(16)
+SEXC(15)
+SEXC(14)
+SEXC(13)
+SEXC(12)
+SEXC(11)
+SEXC(10)
+SEXC(9)
+SEXC(8)
+SEXC(7)
+SEXC(6)
+SEXC(5)
+SEXC(4)
+SEXC(3)
+SEXC(2)
+SEXC(1)
+
+s_exc_p1:
+	jr	ra
+	 ADD	len, len, 1
+s_exc:
+	jr	ra
+	 nop
+
+	.align	5
+LEAF(memmove)
+	ADD	t0, a0, a2
+	ADD	t1, a1, a2
+	sltu	t0, a1, t0			# dst + len <= src -> memcpy
+	sltu	t1, a0, t1			# dst >= src + len -> memcpy
+	and	t0, t1
+	beqz	t0, __memcpy
+	 move	v0, a0				/* return value */
+	beqz	a2, r_out
+	END(memmove)
+
+	/* fall through to __rmemcpy */
+LEAF(__rmemcpy)					/* a0=dst a1=src a2=len */
+	 sltu	t0, a1, a0
+	beqz	t0, r_end_bytes_up		# src >= dst
+	 nop
+	ADD	a0, a2				# dst = dst + len
+	ADD	a1, a2				# src = src + len
+
+r_end_bytes:
+	lb	t0, -1(a1)
+	SUB	a2, a2, 0x1
+	sb	t0, -1(a0)
+	SUB	a1, a1, 0x1
+	bnez	a2, r_end_bytes
+	 SUB	a0, a0, 0x1
+
+r_out:
+	jr	ra
+	 move	a2, zero
+
+r_end_bytes_up:
+	lb	t0, (a1)
+	SUB	a2, a2, 0x1
+	sb	t0, (a0)
+	ADD	a1, a1, 0x1
+	bnez	a2, r_end_bytes_up
+	 ADD	a0, a0, 0x1
+
+	jr	ra
+	 move	a2, zero
+	END(__rmemcpy)
--- a/arch/mips/cavium-octeon/serial.c
+++ b/arch/mips/cavium-octeon/serial.c
@ -0,0 +1,136 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2007 Cavium Networks
+ */
+#include <linux/console.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/serial.h>
+#include <linux/serial_8250.h>
+#include <linux/serial_reg.h>
+#include <linux/tty.h>
+
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+
+#ifdef CONFIG_GDB_CONSOLE
+#define DEBUG_UART 0
+#else
+#define DEBUG_UART 1
+#endif
+
+unsigned int octeon_serial_in(struct uart_port *up, int offset)
+{
+	int rv = cvmx_read_csr((uint64_t)(up->membase + (offset << 3)));
+	if (offset == UART_IIR && (rv & 0xf) == 7) {
+		/* Busy interrupt, read the USR (39) and try again. */
+		cvmx_read_csr((uint64_t)(up->membase + (39 << 3)));
+		rv = cvmx_read_csr((uint64_t)(up->membase + (offset << 3)));
+	}
+	return rv;
+}
+
+void octeon_serial_out(struct uart_port *up, int offset, int value)
+{
+	/*
+	 * If bits 6 or 7 of the OCTEON UART's LCR are set, it quits
+	 * working.
+	 */
+	if (offset == UART_LCR)
+		value &= 0x9f;
+	cvmx_write_csr((uint64_t)(up->membase + (offset << 3)), (u8)value);
+}
+
+/*
+ * Allocated in .bss, so it is all zeroed.
+ */
+#define OCTEON_MAX_UARTS 3
+static struct plat_serial8250_port octeon_uart8250_data[OCTEON_MAX_UARTS + 1];
+static struct platform_device octeon_uart8250_device = {
+	.name			= "serial8250",
+	.id			= PLAT8250_DEV_PLATFORM,
+	.dev			= {
+		.platform_data	= octeon_uart8250_data,
+	},
+};
+
+static void __init octeon_uart_set_common(struct plat_serial8250_port *p)
+{
+	p->flags = ASYNC_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE;
+	p->type = PORT_OCTEON;
+	p->iotype = UPIO_MEM;
+	p->regshift = 3;	/* I/O addresses are every 8 bytes */
+	p->uartclk = mips_hpt_frequency;
+	p->serial_in = octeon_serial_in;
+	p->serial_out = octeon_serial_out;
+}
+
+static int __init octeon_serial_init(void)
+{
+	int enable_uart0;
+	int enable_uart1;
+	int enable_uart2;
+	struct plat_serial8250_port *p;
+
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+	/*
+	 * If we are configured to run as the second of two kernels,
+	 * disable uart0 and enable uart1. Uart0 is owned by the first
+	 * kernel
+	 */
+	enable_uart0 = 0;
+	enable_uart1 = 1;
+#else
+	/*
+	 * We are configured for the first kernel. We'll enable uart0
+	 * if the bootloader told us to use 0, otherwise will enable
+	 * uart 1.
+	 */
+	enable_uart0 = (octeon_get_boot_uart() == 0);
+	enable_uart1 = (octeon_get_boot_uart() == 1);
+#ifdef CONFIG_KGDB
+	enable_uart1 = 1;
+#endif
+#endif
+
+	/* Right now CN52XX is the only chip with a third uart */
+	enable_uart2 = OCTEON_IS_MODEL(OCTEON_CN52XX);
+
+	p = octeon_uart8250_data;
+	if (enable_uart0) {
+		/* Add a ttyS device for hardware uart 0 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UARTX_RBR(0);
+		p->mapbase = CVMX_MIO_UARTX_RBR(0) & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART0;
+		p++;
+	}
+
+	if (enable_uart1) {
+		/* Add a ttyS device for hardware uart 1 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UARTX_RBR(1);
+		p->mapbase = CVMX_MIO_UARTX_RBR(1) & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART1;
+		p++;
+	}
+	if (enable_uart2) {
+		/* Add a ttyS device for hardware uart 2 */
+		octeon_uart_set_common(p);
+		p->membase = (void *) CVMX_MIO_UART2_RBR;
+		p->mapbase = CVMX_MIO_UART2_RBR & ((1ull << 49) - 1);
+		p->irq = OCTEON_IRQ_UART2;
+		p++;
+	}
+
+	BUG_ON(p > &octeon_uart8250_data[OCTEON_MAX_UARTS]);
+
+	return platform_device_register(&octeon_uart8250_device);
+}
+
+device_initcall(octeon_serial_init);
--- a/arch/mips/cavium-octeon/setup.c
+++ b/arch/mips/cavium-octeon/setup.c
@ -0,0 +1,929 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2007 Cavium Networks
+ * Copyright (C) 2008 Wind River Systems
+ */
+#include <linux/init.h>
+#include <linux/console.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/serial.h>
+#include <linux/types.h>
+#include <linux/string.h>	/* for memset */
+#include <linux/serial.h>
+#include <linux/tty.h>
+#include <linux/time.h>
+#include <linux/platform_device.h>
+#include <linux/serial_core.h>
+#include <linux/serial_8250.h>
+#include <linux/string.h>
+
+#include <asm/processor.h>
+#include <asm/reboot.h>
+#include <asm/smp-ops.h>
+#include <asm/system.h>
+#include <asm/irq_cpu.h>
+#include <asm/mipsregs.h>
+#include <asm/bootinfo.h>
+#include <asm/sections.h>
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+extern void cvmx_interrupt_rsl_decode(void);
+extern int __cvmx_interrupt_ecc_report_single_bit_errors;
+extern void cvmx_interrupt_rsl_enable(void);
+#endif
+
+extern struct plat_smp_ops octeon_smp_ops;
+
+#ifdef CONFIG_PCI
+extern void pci_console_init(const char *arg);
+#endif
+
+#ifdef CONFIG_CAVIUM_RESERVE32
+extern uint64_t octeon_reserve32_memory;
+#endif
+static unsigned long long MAX_MEMORY = 512ull << 20;
+
+struct octeon_boot_descriptor *octeon_boot_desc_ptr;
+
+struct cvmx_bootinfo *octeon_bootinfo;
+EXPORT_SYMBOL(octeon_bootinfo);
+
+#ifdef CONFIG_CAVIUM_RESERVE32
+uint64_t octeon_reserve32_memory;
+EXPORT_SYMBOL(octeon_reserve32_memory);
+#endif
+
+static int octeon_uart;
+
+extern asmlinkage void handle_int(void);
+extern asmlinkage void plat_irq_dispatch(void);
+
+/**
+ * Return non zero if we are currently running in the Octeon simulator
+ *
+ * Returns
+ */
+int octeon_is_simulation(void)
+{
+	return octeon_bootinfo->board_type == CVMX_BOARD_TYPE_SIM;
+}
+EXPORT_SYMBOL(octeon_is_simulation);
+
+/**
+ * Return true if Octeon is in PCI Host mode. This means
+ * Linux can control the PCI bus.
+ *
+ * Returns Non zero if Octeon in host mode.
+ */
+int octeon_is_pci_host(void)
+{
+#ifdef CONFIG_PCI
+	return octeon_bootinfo->config_flags & CVMX_BOOTINFO_CFG_FLAG_PCI_HOST;
+#else
+	return 0;
+#endif
+}
+
+/**
+ * Get the clock rate of Octeon
+ *
+ * Returns Clock rate in HZ
+ */
+uint64_t octeon_get_clock_rate(void)
+{
+	if (octeon_is_simulation())
+		octeon_bootinfo->eclock_hz = 6000000;
+	return octeon_bootinfo->eclock_hz;
+}
+EXPORT_SYMBOL(octeon_get_clock_rate);
+
+/**
+ * Write to the LCD display connected to the bootbus. This display
+ * exists on most Cavium evaluation boards. If it doesn't exist, then
+ * this function doesn't do anything.
+ *
+ * @s:      String to write
+ */
+void octeon_write_lcd(const char *s)
+{
+	if (octeon_bootinfo->led_display_base_addr) {
+		void __iomem *lcd_address =
+			ioremap_nocache(octeon_bootinfo->led_display_base_addr,
+					8);
+		int i;
+		for (i = 0; i < 8; i++, s++) {
+			if (*s)
+				iowrite8(*s, lcd_address + i);
+			else
+				iowrite8(' ', lcd_address + i);
+		}
+		iounmap(lcd_address);
+	}
+}
+
+/**
+ * Return the console uart passed by the bootloader
+ *
+ * Returns uart   (0 or 1)
+ */
+int octeon_get_boot_uart(void)
+{
+	int uart;
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+	uart = 1;
+#else
+	uart = (octeon_boot_desc_ptr->flags & OCTEON_BL_FLAG_CONSOLE_UART1) ?
+		1 : 0;
+#endif
+	return uart;
+}
+
+/**
+ * Get the coremask Linux was booted on.
+ *
+ * Returns Core mask
+ */
+int octeon_get_boot_coremask(void)
+{
+	return octeon_boot_desc_ptr->core_mask;
+}
+
+/**
+ * Check the hardware BIST results for a CPU
+ */
+void octeon_check_cpu_bist(void)
+{
+	const int coreid = cvmx_get_core_num();
+	unsigned long long mask;
+	unsigned long long bist_val;
+
+	/* Check BIST results for COP0 registers */
+	mask = 0x1f00000000ull;
+	bist_val = read_octeon_c0_icacheerr();
+	if (bist_val & mask)
+		pr_err("Core%d BIST Failure: CacheErr(icache) = 0x%llx\n",
+		       coreid, bist_val);
+
+	bist_val = read_octeon_c0_dcacheerr();
+	if (bist_val & 1)
+		pr_err("Core%d L1 Dcache parity error: "
+		       "CacheErr(dcache) = 0x%llx\n",
+		       coreid, bist_val);
+
+	mask = 0xfc00000000000000ull;
+	bist_val = read_c0_cvmmemctl();
+	if (bist_val & mask)
+		pr_err("Core%d BIST Failure: COP0_CVM_MEM_CTL = 0x%llx\n",
+		       coreid, bist_val);
+
+	write_octeon_c0_dcacheerr(0);
+}
+
+#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB
+/**
+ * Called on every core to setup the wired tlb entry needed
+ * if CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB is set.
+ *
+ */
+static void octeon_hal_setup_per_cpu_reserved32(void *unused)
+{
+	/*
+	 * The config has selected to wire the reserve32 memory for all
+	 * userspace applications. We need to put a wired TLB entry in for each
+	 * 512MB of reserve32 memory. We only handle double 256MB pages here,
+	 * so reserve32 must be multiple of 512MB.
+	 */
+	uint32_t size = CONFIG_CAVIUM_RESERVE32;
+	uint32_t entrylo0 =
+		0x7 | ((octeon_reserve32_memory & ((1ul << 40) - 1)) >> 6);
+	uint32_t entrylo1 = entrylo0 + (256 << 14);
+	uint32_t entryhi = (0x80000000UL - (CONFIG_CAVIUM_RESERVE32 << 20));
+	while (size >= 512) {
+#if 0
+		pr_info("CPU%d: Adding double wired TLB entry for 0x%lx\n",
+			smp_processor_id(), entryhi);
+#endif
+		add_wired_entry(entrylo0, entrylo1, entryhi, PM_256M);
+		entrylo0 += 512 << 14;
+		entrylo1 += 512 << 14;
+		entryhi += 512 << 20;
+		size -= 512;
+	}
+}
+#endif /* CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB */
+
+/**
+ * Called to release the named block which was used to made sure
+ * that nobody used the memory for something else during
+ * init. Now we'll free it so userspace apps can use this
+ * memory region with bootmem_alloc.
+ *
+ * This function is called only once from prom_free_prom_memory().
+ */
+void octeon_hal_setup_reserved32(void)
+{
+#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB
+	on_each_cpu(octeon_hal_setup_per_cpu_reserved32, NULL, 0, 1);
+#endif
+}
+
+/**
+ * Reboot Octeon
+ *
+ * @command: Command to pass to the bootloader. Currently ignored.
+ */
+static void octeon_restart(char *command)
+{
+	/* Disable all watchdogs before soft reset. They don't get cleared */
+#ifdef CONFIG_SMP
+	int cpu;
+	for_each_online_cpu(cpu)
+		cvmx_write_csr(CVMX_CIU_WDOGX(cpu_logical_map(cpu)), 0);
+#else
+	cvmx_write_csr(CVMX_CIU_WDOGX(cvmx_get_core_num()), 0);
+#endif
+
+	mb();
+	while (1)
+		cvmx_write_csr(CVMX_CIU_SOFT_RST, 1);
+}
+
+
+/**
+ * Permanently stop a core.
+ *
+ * @arg: Ignored.
+ */
+static void octeon_kill_core(void *arg)
+{
+	mb();
+	if (octeon_is_simulation()) {
+		/* The simulator needs the watchdog to stop for dead cores */
+		cvmx_write_csr(CVMX_CIU_WDOGX(cvmx_get_core_num()), 0);
+		/* A break instruction causes the simulator stop a core */
+		asm volatile ("sync\nbreak");
+	}
+}
+
+
+/**
+ * Halt the system
+ */
+static void octeon_halt(void)
+{
+	smp_call_function(octeon_kill_core, NULL, 0);
+
+	switch (octeon_bootinfo->board_type) {
+	case CVMX_BOARD_TYPE_NAO38:
+		/* Driving a 1 to GPIO 12 shuts off this board */
+		cvmx_write_csr(CVMX_GPIO_BIT_CFGX(12), 1);
+		cvmx_write_csr(CVMX_GPIO_TX_SET, 0x1000);
+		break;
+	default:
+		octeon_write_lcd("PowerOff");
+		break;
+	}
+
+	octeon_kill_core(NULL);
+}
+
+#if 0
+/**
+ * Platform time init specifics.
+ * Returns
+ */
+void __init plat_time_init(void)
+{
+	/* Nothing special here, but we are required to have one */
+}
+
+#endif
+
+/**
+ * Handle all the error condition interrupts that might occur.
+ *
+ */
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+static irqreturn_t octeon_rlm_interrupt(int cpl, void *dev_id)
+{
+	cvmx_interrupt_rsl_decode();
+	return IRQ_HANDLED;
+}
+#endif
+
+/**
+ * Return a string representing the system type
+ *
+ * Returns
+ */
+const char *octeon_board_type_string(void)
+{
+	static char name[80];
+	sprintf(name, "%s (%s)",
+		cvmx_board_type_to_string(octeon_bootinfo->board_type),
+		octeon_model_get_string(read_c0_prid()));
+	return name;
+}
+
+const char *get_system_type(void)
+	__attribute__ ((alias("octeon_board_type_string")));
+
+void octeon_user_io_init(void)
+{
+	union octeon_cvmemctl cvmmemctl;
+	union cvmx_iob_fau_timeout fau_timeout;
+	union cvmx_pow_nw_tim nm_tim;
+	uint64_t cvmctl;
+
+	/* Get the current settings for CP0_CVMMEMCTL_REG */
+	cvmmemctl.u64 = read_c0_cvmmemctl();
+	/* R/W If set, marked write-buffer entries time out the same
+	 * as as other entries; if clear, marked write-buffer entries
+	 * use the maximum timeout. */
+	cvmmemctl.s.dismarkwblongto = 1;
+	/* R/W If set, a merged store does not clear the write-buffer
+	 * entry timeout state. */
+	cvmmemctl.s.dismrgclrwbto = 0;
+	/* R/W Two bits that are the MSBs of the resultant CVMSEG LM
+	 * word location for an IOBDMA. The other 8 bits come from the
+	 * SCRADDR field of the IOBDMA. */
+	cvmmemctl.s.iobdmascrmsb = 0;
+	/* R/W If set, SYNCWS and SYNCS only order marked stores; if
+	 * clear, SYNCWS and SYNCS only order unmarked
+	 * stores. SYNCWSMARKED has no effect when DISSYNCWS is
+	 * set. */
+	cvmmemctl.s.syncwsmarked = 0;
+	/* R/W If set, SYNCWS acts as SYNCW and SYNCS acts as SYNC. */
+	cvmmemctl.s.dissyncws = 0;
+	/* R/W If set, no stall happens on write buffer full. */
+	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2))
+		cvmmemctl.s.diswbfst = 1;
+	else
+		cvmmemctl.s.diswbfst = 0;
+	/* R/W If set (and SX set), supervisor-level loads/stores can
+	 * use XKPHYS addresses with <48>==0 */
+	cvmmemctl.s.xkmemenas = 0;
+
+	/* R/W If set (and UX set), user-level loads/stores can use
+	 * XKPHYS addresses with VA<48>==0 */
+	cvmmemctl.s.xkmemenau = 0;
+
+	/* R/W If set (and SX set), supervisor-level loads/stores can
+	 * use XKPHYS addresses with VA<48>==1 */
+	cvmmemctl.s.xkioenas = 0;
+
+	/* R/W If set (and UX set), user-level loads/stores can use
+	 * XKPHYS addresses with VA<48>==1 */
+	cvmmemctl.s.xkioenau = 0;
+
+	/* R/W If set, all stores act as SYNCW (NOMERGE must be set
+	 * when this is set) RW, reset to 0. */
+	cvmmemctl.s.allsyncw = 0;
+
+	/* R/W If set, no stores merge, and all stores reach the
+	 * coherent bus in order. */
+	cvmmemctl.s.nomerge = 0;
+	/* R/W Selects the bit in the counter used for DID time-outs 0
+	 * = 231, 1 = 230, 2 = 229, 3 = 214. Actual time-out is
+	 * between 1x and 2x this interval. For example, with
+	 * DIDTTO=3, expiration interval is between 16K and 32K. */
+	cvmmemctl.s.didtto = 0;
+	/* R/W If set, the (mem) CSR clock never turns off. */
+	cvmmemctl.s.csrckalwys = 0;
+	/* R/W If set, mclk never turns off. */
+	cvmmemctl.s.mclkalwys = 0;
+	/* R/W Selects the bit in the counter used for write buffer
+	 * flush time-outs (WBFLT+11) is the bit position in an
+	 * internal counter used to determine expiration. The write
+	 * buffer expires between 1x and 2x this interval. For
+	 * example, with WBFLT = 0, a write buffer expires between 2K
+	 * and 4K cycles after the write buffer entry is allocated. */
+	cvmmemctl.s.wbfltime = 0;
+	/* R/W If set, do not put Istream in the L2 cache. */
+	cvmmemctl.s.istrnol2 = 0;
+	/* R/W The write buffer threshold. */
+	cvmmemctl.s.wbthresh = 10;
+	/* R/W If set, CVMSEG is available for loads/stores in
+	 * kernel/debug mode. */
+#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
+	cvmmemctl.s.cvmsegenak = 1;
+#else
+	cvmmemctl.s.cvmsegenak = 0;
+#endif
+	/* R/W If set, CVMSEG is available for loads/stores in
+	 * supervisor mode. */
+	cvmmemctl.s.cvmsegenas = 0;
+	/* R/W If set, CVMSEG is available for loads/stores in user
+	 * mode. */
+	cvmmemctl.s.cvmsegenau = 0;
+	/* R/W Size of local memory in cache blocks, 54 (6912 bytes)
+	 * is max legal value. */
+	cvmmemctl.s.lmemsz = CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE;
+
+
+	if (smp_processor_id() == 0)
+		pr_notice("CVMSEG size: %d cache lines (%d bytes)\n",
+			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE,
+			  CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE * 128);
+
+	write_c0_cvmmemctl(cvmmemctl.u64);
+
+	/* Move the performance counter interrupts to IRQ 6 */
+	cvmctl = read_c0_cvmctl();
+	cvmctl &= ~(7 << 7);
+	cvmctl |= 6 << 7;
+	write_c0_cvmctl(cvmctl);
+
+	/* Set a default for the hardware timeouts */
+	fau_timeout.u64 = 0;
+	fau_timeout.s.tout_val = 0xfff;
+	/* Disable tagwait FAU timeout */
+	fau_timeout.s.tout_enb = 0;
+	cvmx_write_csr(CVMX_IOB_FAU_TIMEOUT, fau_timeout.u64);
+
+	nm_tim.u64 = 0;
+	/* 4096 cycles */
+	nm_tim.s.nw_tim = 3;
+	cvmx_write_csr(CVMX_POW_NW_TIM, nm_tim.u64);
+
+	write_octeon_c0_icacheerr(0);
+	write_c0_derraddr1(0);
+}
+
+/**
+ * Early entry point for arch setup
+ */
+void __init prom_init(void)
+{
+	struct cvmx_sysinfo *sysinfo;
+	const int coreid = cvmx_get_core_num();
+	int i;
+	int argc;
+	struct uart_port octeon_port;
+#ifdef CONFIG_CAVIUM_RESERVE32
+	int64_t addr = -1;
+#endif
+	/*
+	 * The bootloader passes a pointer to the boot descriptor in
+	 * $a3, this is available as fw_arg3.
+	 */
+	octeon_boot_desc_ptr = (struct octeon_boot_descriptor *)fw_arg3;
+	octeon_bootinfo =
+		cvmx_phys_to_ptr(octeon_boot_desc_ptr->cvmx_desc_vaddr);
+	cvmx_bootmem_init(cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr));
+
+	/*
+	 * Only enable the LED controller if we're running on a CN38XX, CN58XX,
+	 * or CN56XX. The CN30XX and CN31XX don't have an LED controller.
+	 */
+	if (!octeon_is_simulation() &&
+	    octeon_has_feature(OCTEON_FEATURE_LED_CONTROLLER)) {
+		cvmx_write_csr(CVMX_LED_EN, 0);
+		cvmx_write_csr(CVMX_LED_PRT, 0);
+		cvmx_write_csr(CVMX_LED_DBG, 0);
+		cvmx_write_csr(CVMX_LED_PRT_FMT, 0);
+		cvmx_write_csr(CVMX_LED_UDD_CNTX(0), 32);
+		cvmx_write_csr(CVMX_LED_UDD_CNTX(1), 32);
+		cvmx_write_csr(CVMX_LED_UDD_DATX(0), 0);
+		cvmx_write_csr(CVMX_LED_UDD_DATX(1), 0);
+		cvmx_write_csr(CVMX_LED_EN, 1);
+	}
+#ifdef CONFIG_CAVIUM_RESERVE32
+	/*
+	 * We need to temporarily allocate all memory in the reserve32
+	 * region. This makes sure the kernel doesn't allocate this
+	 * memory when it is getting memory from the
+	 * bootloader. Later, after the memory allocations are
+	 * complete, the reserve32 will be freed.
+	 */
+#ifdef CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB
+	if (CONFIG_CAVIUM_RESERVE32 & 0x1ff)
+		pr_err("CAVIUM_RESERVE32 isn't a multiple of 512MB. "
+		       "This is required if CAVIUM_RESERVE32_USE_WIRED_TLB "
+		       "is set\n");
+	else
+		addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20,
+							0, 0, 512 << 20,
+							"CAVIUM_RESERVE32", 0);
+#else
+	/*
+	 * Allocate memory for RESERVED32 aligned on 2MB boundary. This
+	 * is in case we later use hugetlb entries with it.
+	 */
+	addr = cvmx_bootmem_phy_named_block_alloc(CONFIG_CAVIUM_RESERVE32 << 20,
+						0, 0, 2 << 20,
+						"CAVIUM_RESERVE32", 0);
+#endif
+	if (addr < 0)
+		pr_err("Failed to allocate CAVIUM_RESERVE32 memory area\n");
+	else
+		octeon_reserve32_memory = addr;
+#endif
+
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2
+	if (cvmx_read_csr(CVMX_L2D_FUS3) & (3ull << 34)) {
+		pr_info("Skipping L2 locking due to reduced L2 cache size\n");
+	} else {
+		uint32_t ebase = read_c0_ebase() & 0x3ffff000;
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_TLB
+		/* TLB refill */
+		cvmx_l2c_lock_mem_region(ebase, 0x100);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_EXCEPTION
+		/* General exception */
+		cvmx_l2c_lock_mem_region(ebase + 0x180, 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_LOW_LEVEL_INTERRUPT
+		/* Interrupt handler */
+		cvmx_l2c_lock_mem_region(ebase + 0x200, 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_INTERRUPT
+		cvmx_l2c_lock_mem_region(__pa_symbol(handle_int), 0x100);
+		cvmx_l2c_lock_mem_region(__pa_symbol(plat_irq_dispatch), 0x80);
+#endif
+#ifdef CONFIG_CAVIUM_OCTEON_LOCK_L2_MEMCPY
+		cvmx_l2c_lock_mem_region(__pa_symbol(memcpy), 0x480);
+#endif
+	}
+#endif
+
+	sysinfo = cvmx_sysinfo_get();
+	memset(sysinfo, 0, sizeof(*sysinfo));
+	sysinfo->system_dram_size = octeon_bootinfo->dram_size << 20;
+	sysinfo->phy_mem_desc_ptr =
+		cvmx_phys_to_ptr(octeon_bootinfo->phy_mem_desc_addr);
+	sysinfo->core_mask = octeon_bootinfo->core_mask;
+	sysinfo->exception_base_addr = octeon_bootinfo->exception_base_addr;
+	sysinfo->cpu_clock_hz = octeon_bootinfo->eclock_hz;
+	sysinfo->dram_data_rate_hz = octeon_bootinfo->dclock_hz * 2;
+	sysinfo->board_type = octeon_bootinfo->board_type;
+	sysinfo->board_rev_major = octeon_bootinfo->board_rev_major;
+	sysinfo->board_rev_minor = octeon_bootinfo->board_rev_minor;
+	memcpy(sysinfo->mac_addr_base, octeon_bootinfo->mac_addr_base,
+	       sizeof(sysinfo->mac_addr_base));
+	sysinfo->mac_addr_count = octeon_bootinfo->mac_addr_count;
+	memcpy(sysinfo->board_serial_number,
+	       octeon_bootinfo->board_serial_number,
+	       sizeof(sysinfo->board_serial_number));
+	sysinfo->compact_flash_common_base_addr =
+		octeon_bootinfo->compact_flash_common_base_addr;
+	sysinfo->compact_flash_attribute_base_addr =
+		octeon_bootinfo->compact_flash_attribute_base_addr;
+	sysinfo->led_display_base_addr = octeon_bootinfo->led_display_base_addr;
+	sysinfo->dfa_ref_clock_hz = octeon_bootinfo->dfa_ref_clock_hz;
+	sysinfo->bootloader_config_flags = octeon_bootinfo->config_flags;
+
+
+	octeon_check_cpu_bist();
+
+	octeon_uart = octeon_get_boot_uart();
+
+	/*
+	 * Disable All CIU Interrupts. The ones we need will be
+	 * enabled later.  Read the SUM register so we know the write
+	 * completed.
+	 */
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2 + 1)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2 + 1)), 0);
+	cvmx_read_csr(CVMX_CIU_INTX_SUM0((coreid * 2)));
+
+#ifdef CONFIG_SMP
+	octeon_write_lcd("LinuxSMP");
+#else
+	octeon_write_lcd("Linux");
+#endif
+
+#ifdef CONFIG_CAVIUM_GDB
+	/*
+	 * When debugging the linux kernel, force the cores to enter
+	 * the debug exception handler to break in.
+	 */
+	if (octeon_get_boot_debug_flag()) {
+		cvmx_write_csr(CVMX_CIU_DINT, 1 << cvmx_get_core_num());
+		cvmx_read_csr(CVMX_CIU_DINT);
+	}
+#endif
+
+	/*
+	 * BIST should always be enabled when doing a soft reset. L2
+	 * Cache locking for instance is not cleared unless BIST is
+	 * enabled.  Unfortunately due to a chip errata G-200 for
+	 * Cn38XX and CN31XX, BIST msut be disabled on these parts.
+	 */
+	if (OCTEON_IS_MODEL(OCTEON_CN38XX_PASS2) ||
+	    OCTEON_IS_MODEL(OCTEON_CN31XX))
+		cvmx_write_csr(CVMX_CIU_SOFT_BIST, 0);
+	else
+		cvmx_write_csr(CVMX_CIU_SOFT_BIST, 1);
+
+	/* Default to 64MB in the simulator to speed things up */
+	if (octeon_is_simulation())
+		MAX_MEMORY = 64ull << 20;
+
+	arcs_cmdline[0] = 0;
+	argc = octeon_boot_desc_ptr->argc;
+	for (i = 0; i < argc; i++) {
+		const char *arg =
+			cvmx_phys_to_ptr(octeon_boot_desc_ptr->argv[i]);
+		if ((strncmp(arg, "MEM=", 4) == 0) ||
+		    (strncmp(arg, "mem=", 4) == 0)) {
+			sscanf(arg + 4, "%llu", &MAX_MEMORY);
+			MAX_MEMORY <<= 20;
+			if (MAX_MEMORY == 0)
+				MAX_MEMORY = 32ull << 30;
+		} else if (strcmp(arg, "ecc_verbose") == 0) {
+#ifdef CONFIG_CAVIUM_REPORT_SINGLE_BIT_ECC
+			__cvmx_interrupt_ecc_report_single_bit_errors = 1;
+			pr_notice("Reporting of single bit ECC errors is "
+				  "turned on\n");
+#endif
+		} else if (strlen(arcs_cmdline) + strlen(arg) + 1 <
+			   sizeof(arcs_cmdline) - 1) {
+			strcat(arcs_cmdline, " ");
+			strcat(arcs_cmdline, arg);
+		}
+	}
+
+	if (strstr(arcs_cmdline, "console=") == NULL) {
+#ifdef CONFIG_GDB_CONSOLE
+		strcat(arcs_cmdline, " console=gdb");
+#else
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+		strcat(arcs_cmdline, " console=ttyS0,115200");
+#else
+		if (octeon_uart == 1)
+			strcat(arcs_cmdline, " console=ttyS1,115200");
+		else
+			strcat(arcs_cmdline, " console=ttyS0,115200");
+#endif
+#endif
+	}
+
+	if (octeon_is_simulation()) {
+		/*
+		 * The simulator uses a mtdram device pre filled with
+		 * the filesystem. Also specify the calibration delay
+		 * to avoid calculating it every time.
+		 */
+		strcat(arcs_cmdline, " rw root=1f00"
+		       " lpj=60176 slram=root,0x40000000,+1073741824");
+	}
+
+	mips_hpt_frequency = octeon_get_clock_rate();
+
+	octeon_init_cvmcount();
+
+	_machine_restart = octeon_restart;
+	_machine_halt = octeon_halt;
+
+	memset(&octeon_port, 0, sizeof(octeon_port));
+	/*
+	 * For early_serial_setup we don't set the port type or
+	 * UPF_FIXED_TYPE.
+	 */
+	octeon_port.flags = ASYNC_SKIP_TEST | UPF_SHARE_IRQ;
+	octeon_port.iotype = UPIO_MEM;
+	/* I/O addresses are every 8 bytes */
+	octeon_port.regshift = 3;
+	/* Clock rate of the chip */
+	octeon_port.uartclk = mips_hpt_frequency;
+	octeon_port.fifosize = 64;
+	octeon_port.mapbase = 0x0001180000000800ull + (1024 * octeon_uart);
+	octeon_port.membase = cvmx_phys_to_ptr(octeon_port.mapbase);
+	octeon_port.serial_in = octeon_serial_in;
+	octeon_port.serial_out = octeon_serial_out;
+#ifdef CONFIG_CAVIUM_OCTEON_2ND_KERNEL
+	octeon_port.line = 0;
+#else
+	octeon_port.line = octeon_uart;
+#endif
+	octeon_port.irq = 42 + octeon_uart;
+	early_serial_setup(&octeon_port);
+
+	octeon_user_io_init();
+	register_smp_ops(&octeon_smp_ops);
+}
+
+void __init plat_mem_setup(void)
+{
+	uint64_t mem_alloc_size;
+	uint64_t total;
+	int64_t memory;
+
+	total = 0;
+
+	/* First add the init memory we will be returning.  */
+	memory = __pa_symbol(&__init_begin) & PAGE_MASK;
+	mem_alloc_size = (__pa_symbol(&__init_end) & PAGE_MASK) - memory;
+	if (mem_alloc_size > 0) {
+		add_memory_region(memory, mem_alloc_size, BOOT_MEM_RAM);
+		total += mem_alloc_size;
+	}
+
+	/*
+	 * The Mips memory init uses the first memory location for
+	 * some memory vectors. When SPARSEMEM is in use, it doesn't
+	 * verify that the size is big enough for the final
+	 * vectors. Making the smallest chuck 4MB seems to be enough
+	 * to consistantly work.
+	 */
+	mem_alloc_size = 4 << 20;
+	if (mem_alloc_size > MAX_MEMORY)
+		mem_alloc_size = MAX_MEMORY;
+
+	/*
+	 * When allocating memory, we want incrementing addresses from
+	 * bootmem_alloc so the code in add_memory_region can merge
+	 * regions next to each other.
+	 */
+	cvmx_bootmem_lock();
+	while ((boot_mem_map.nr_map < BOOT_MEM_MAP_MAX)
+		&& (total < MAX_MEMORY)) {
+#if defined(CONFIG_64BIT) || defined(CONFIG_64BIT_PHYS_ADDR)
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size,
+						__pa_symbol(&__init_end), -1,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#elif defined(CONFIG_HIGHMEM)
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size, 0, 1ull << 31,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#else
+		memory = cvmx_bootmem_phy_alloc(mem_alloc_size, 0, 512 << 20,
+						0x100000,
+						CVMX_BOOTMEM_FLAG_NO_LOCKING);
+#endif
+		if (memory >= 0) {
+			/*
+			 * This function automatically merges address
+			 * regions next to each other if they are
+			 * received in incrementing order.
+			 */
+			add_memory_region(memory, mem_alloc_size, BOOT_MEM_RAM);
+			total += mem_alloc_size;
+		} else {
+			break;
+		}
+	}
+	cvmx_bootmem_unlock();
+
+#ifdef CONFIG_CAVIUM_RESERVE32
+	/*
+	 * Now that we've allocated the kernel memory it is safe to
+	 * free the reserved region. We free it here so that builtin
+	 * drivers can use the memory.
+	 */
+	if (octeon_reserve32_memory)
+		cvmx_bootmem_free_named("CAVIUM_RESERVE32");
+#endif /* CONFIG_CAVIUM_RESERVE32 */
+
+	if (total == 0)
+		panic("Unable to allocate memory from "
+		      "cvmx_bootmem_phy_alloc\n");
+}
+
+
+int prom_putchar(char c)
+{
+	uint64_t lsrval;
+
+	/* Spin until there is room */
+	do {
+		lsrval = cvmx_read_csr(CVMX_MIO_UARTX_LSR(octeon_uart));
+	} while ((lsrval & 0x20) == 0);
+
+	/* Write the byte */
+	cvmx_write_csr(CVMX_MIO_UARTX_THR(octeon_uart), c);
+	return 1;
+}
+
+void prom_free_prom_memory(void)
+{
+#ifdef CONFIG_CAVIUM_DECODE_RSL
+	cvmx_interrupt_rsl_enable();
+
+	/* Add an interrupt handler for general failures. */
+	if (request_irq(OCTEON_IRQ_RML, octeon_rlm_interrupt, IRQF_SHARED,
+			"RML/RSL", octeon_rlm_interrupt)) {
+		panic("Unable to request_irq(OCTEON_IRQ_RML)\n");
+	}
+#endif
+
+	/* This call is here so that it is performed after any TLB
+	   initializations. It needs to be after these in case the
+	   CONFIG_CAVIUM_RESERVE32_USE_WIRED_TLB option is set */
+	octeon_hal_setup_reserved32();
+}
+
+static struct octeon_cf_data octeon_cf_data;
+
+static int __init octeon_cf_device_init(void)
+{
+	union cvmx_mio_boot_reg_cfgx mio_boot_reg_cfg;
+	unsigned long base_ptr, region_base, region_size;
+	struct platform_device *pd;
+	struct resource cf_resources[3];
+	unsigned int num_resources;
+	int i;
+	int ret = 0;
+
+	/* Setup octeon-cf platform device if present. */
+	base_ptr = 0;
+	if (octeon_bootinfo->major_version == 1
+		&& octeon_bootinfo->minor_version >= 1) {
+		if (octeon_bootinfo->compact_flash_common_base_addr)
+			base_ptr =
+				octeon_bootinfo->compact_flash_common_base_addr;
+	} else {
+		base_ptr = 0x1d000800;
+	}
+
+	if (!base_ptr)
+		return ret;
+
+	/* Find CS0 region. */
+	for (i = 0; i < 8; i++) {
+		mio_boot_reg_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(i));
+		region_base = mio_boot_reg_cfg.s.base << 16;
+		region_size = (mio_boot_reg_cfg.s.size + 1) << 16;
+		if (mio_boot_reg_cfg.s.en && base_ptr >= region_base
+		    && base_ptr < region_base + region_size)
+			break;
+	}
+	if (i >= 7) {
+		/* i and i + 1 are CS0 and CS1, both must be less than 8. */
+		goto out;
+	}
+	octeon_cf_data.base_region = i;
+	octeon_cf_data.is16bit = mio_boot_reg_cfg.s.width;
+	octeon_cf_data.base_region_bias = base_ptr - region_base;
+	memset(cf_resources, 0, sizeof(cf_resources));
+	num_resources = 0;
+	cf_resources[num_resources].flags	= IORESOURCE_MEM;
+	cf_resources[num_resources].start	= region_base;
+	cf_resources[num_resources].end	= region_base + region_size - 1;
+	num_resources++;
+
+
+	if (!(base_ptr & 0xfffful)) {
+		/*
+		 * Boot loader signals availability of DMA (true_ide
+		 * mode) by setting low order bits of base_ptr to
+		 * zero.
+		 */
+
+		/* Asume that CS1 immediately follows. */
+		mio_boot_reg_cfg.u64 =
+			cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(i + 1));
+		region_base = mio_boot_reg_cfg.s.base << 16;
+		region_size = (mio_boot_reg_cfg.s.size + 1) << 16;
+		if (!mio_boot_reg_cfg.s.en)
+			goto out;
+
+		cf_resources[num_resources].flags	= IORESOURCE_MEM;
+		cf_resources[num_resources].start	= region_base;
+		cf_resources[num_resources].end	= region_base + region_size - 1;
+		num_resources++;
+
+		octeon_cf_data.dma_engine = 0;
+		cf_resources[num_resources].flags	= IORESOURCE_IRQ;
+		cf_resources[num_resources].start	= OCTEON_IRQ_BOOTDMA;
+		cf_resources[num_resources].end	= OCTEON_IRQ_BOOTDMA;
+		num_resources++;
+	} else {
+		octeon_cf_data.dma_engine = -1;
+	}
+
+	pd = platform_device_alloc("pata_octeon_cf", -1);
+	if (!pd) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	pd->dev.platform_data = &octeon_cf_data;
+
+	ret = platform_device_add_resources(pd, cf_resources, num_resources);
+	if (ret)
+		goto fail;
+
+	ret = platform_device_add(pd);
+	if (ret)
+		goto fail;
+
+	return ret;
+fail:
+	platform_device_put(pd);
+out:
+	return ret;
+}
+device_initcall(octeon_cf_device_init);
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@ -0,0 +1,211 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Cavium Networks
+ */
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+#include <asm/mmu_context.h>
+#include <asm/system.h>
+#include <asm/time.h>
+
+#include <asm/octeon/octeon.h>
+
+volatile unsigned long octeon_processor_boot = 0xff;
+volatile unsigned long octeon_processor_sp;
+volatile unsigned long octeon_processor_gp;
+
+static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
+{
+	const int coreid = cvmx_get_core_num();
+	uint64_t action;
+
+	/* Load the mailbox register to figure out what we're supposed to do */
+	action = cvmx_read_csr(CVMX_CIU_MBOX_CLRX(coreid));
+
+	/* Clear the mailbox to clear the interrupt */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), action);
+
+	if (action & SMP_CALL_FUNCTION)
+		smp_call_function_interrupt();
+
+	/* Check if we've been told to flush the icache */
+	if (action & SMP_ICACHE_FLUSH)
+		asm volatile ("synci 0($0)\n");
+	return IRQ_HANDLED;
+}
+
+/**
+ * Cause the function described by call_data to be executed on the passed
+ * cpu.  When the function has finished, increment the finished field of
+ * call_data.
+ */
+void octeon_send_ipi_single(int cpu, unsigned int action)
+{
+	int coreid = cpu_logical_map(cpu);
+	/*
+	pr_info("SMP: Mailbox send cpu=%d, coreid=%d, action=%u\n", cpu,
+	       coreid, action);
+	*/
+	cvmx_write_csr(CVMX_CIU_MBOX_SETX(coreid), action);
+}
+
+static inline void octeon_send_ipi_mask(cpumask_t mask, unsigned int action)
+{
+	unsigned int i;
+
+	for_each_cpu_mask(i, mask)
+		octeon_send_ipi_single(i, action);
+}
+
+/**
+ * Detect available CPUs, populate phys_cpu_present_map
+ */
+static void octeon_smp_setup(void)
+{
+	const int coreid = cvmx_get_core_num();
+	int cpus;
+	int id;
+
+	int core_mask = octeon_get_boot_coremask();
+
+	cpus_clear(cpu_possible_map);
+	__cpu_number_map[coreid] = 0;
+	__cpu_logical_map[0] = coreid;
+	cpu_set(0, cpu_possible_map);
+
+	cpus = 1;
+	for (id = 0; id < 16; id++) {
+		if ((id != coreid) && (core_mask & (1 << id))) {
+			cpu_set(cpus, cpu_possible_map);
+			__cpu_number_map[id] = cpus;
+			__cpu_logical_map[cpus] = id;
+			cpus++;
+		}
+	}
+}
+
+/**
+ * Firmware CPU startup hook
+ *
+ */
+static void octeon_boot_secondary(int cpu, struct task_struct *idle)
+{
+	int count;
+
+	pr_info("SMP: Booting CPU%02d (CoreId %2d)...\n", cpu,
+		cpu_logical_map(cpu));
+
+	octeon_processor_sp = __KSTK_TOS(idle);
+	octeon_processor_gp = (unsigned long)(task_thread_info(idle));
+	octeon_processor_boot = cpu_logical_map(cpu);
+	mb();
+
+	count = 10000;
+	while (octeon_processor_sp && count) {
+		/* Waiting for processor to get the SP and GP */
+		udelay(1);
+		count--;
+	}
+	if (count == 0)
+		pr_err("Secondary boot timeout\n");
+}
+
+/**
+ * After we've done initial boot, this function is called to allow the
+ * board code to clean up state, if needed
+ */
+static void octeon_init_secondary(void)
+{
+	const int coreid = cvmx_get_core_num();
+	union cvmx_ciu_intx_sum0 interrupt_enable;
+
+	octeon_check_cpu_bist();
+	octeon_init_cvmcount();
+	/*
+	pr_info("SMP: CPU%d (CoreId %lu) started\n", cpu, coreid);
+	*/
+	/* Enable Mailbox interrupts to this core. These are the only
+	   interrupts allowed on line 3 */
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(coreid), 0xffffffff);
+	interrupt_enable.u64 = 0;
+	interrupt_enable.s.mbox = 0x3;
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2)), interrupt_enable.u64);
+	cvmx_write_csr(CVMX_CIU_INTX_EN0((coreid * 2 + 1)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2)), 0);
+	cvmx_write_csr(CVMX_CIU_INTX_EN1((coreid * 2 + 1)), 0);
+	/* Enable core interrupt processing for 2,3 and 7 */
+	set_c0_status(0x8c01);
+}
+
+/**
+ * Callout to firmware before smp_init
+ *
+ */
+void octeon_prepare_cpus(unsigned int max_cpus)
+{
+	cvmx_write_csr(CVMX_CIU_MBOX_CLRX(cvmx_get_core_num()), 0xffffffff);
+	if (request_irq(OCTEON_IRQ_MBOX0, mailbox_interrupt, IRQF_SHARED,
+			"mailbox0", mailbox_interrupt)) {
+		panic("Cannot request_irq(OCTEON_IRQ_MBOX0)\n");
+	}
+	if (request_irq(OCTEON_IRQ_MBOX1, mailbox_interrupt, IRQF_SHARED,
+			"mailbox1", mailbox_interrupt)) {
+		panic("Cannot request_irq(OCTEON_IRQ_MBOX1)\n");
+	}
+}
+
+/**
+ * Last chance for the board code to finish SMP initialization before
+ * the CPU is "online".
+ */
+static void octeon_smp_finish(void)
+{
+#ifdef CONFIG_CAVIUM_GDB
+	unsigned long tmp;
+	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
+	   to be not masked by this core so we know the signal is received by
+	   someone */
+	asm volatile ("dmfc0 %0, $22\n"
+		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
+#endif
+
+	octeon_user_io_init();
+
+	/* to generate the first CPU timer interrupt */
+	write_c0_compare(read_c0_count() + mips_hpt_frequency / HZ);
+}
+
+/**
+ * Hook for after all CPUs are online
+ */
+static void octeon_cpus_done(void)
+{
+#ifdef CONFIG_CAVIUM_GDB
+	unsigned long tmp;
+	/* Pulse MCD0 signal on Ctrl-C to stop all the cores. Also set the MCD0
+	   to be not masked by this core so we know the signal is received by
+	   someone */
+	asm volatile ("dmfc0 %0, $22\n"
+		      "ori   %0, %0, 0x9100\n" "dmtc0 %0, $22\n" : "=r" (tmp));
+#endif
+}
+
+struct plat_smp_ops octeon_smp_ops = {
+	.send_ipi_single	= octeon_send_ipi_single,
+	.send_ipi_mask		= octeon_send_ipi_mask,
+	.init_secondary		= octeon_init_secondary,
+	.smp_finish		= octeon_smp_finish,
+	.cpus_done		= octeon_cpus_done,
+	.boot_secondary		= octeon_boot_secondary,
+	.smp_setup		= octeon_smp_setup,
+	.prepare_cpus		= octeon_prepare_cpus,
+};
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@ -0,0 +1,78 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004 Cavium Networks
+ */
+#ifndef __ASM_MACH_CAVIUM_OCTEON_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_CAVIUM_OCTEON_CPU_FEATURE_OVERRIDES_H
+
+#include <linux/types.h>
+#include <asm/mipsregs.h>
+
+/*
+ * Cavium Octeons are MIPS64v2 processors
+ */
+#define cpu_dcache_line_size()	128
+#define cpu_icache_line_size()	128
+
+
+#define cpu_has_4kex		1
+#define cpu_has_3k_cache	0
+#define cpu_has_4k_cache	0
+#define cpu_has_tx39_cache	0
+#define cpu_has_fpu		0
+#define cpu_has_counter		1
+#define cpu_has_watch		1
+#define cpu_has_divec		1
+#define cpu_has_vce		0
+#define cpu_has_cache_cdex_p	0
+#define cpu_has_cache_cdex_s	0
+#define cpu_has_prefetch	1
+
+/*
+ * We should disable LL/SC on non SMP systems as it is faster to
+ * disable interrupts for atomic access than a LL/SC.  Unfortunatly we
+ * cannot as this breaks asm/futex.h
+ */
+#define cpu_has_llsc		1
+#define cpu_has_vtag_icache	1
+#define cpu_has_dc_aliases	0
+#define cpu_has_ic_fills_f_dc	0
+#define cpu_has_64bits		1
+#define cpu_has_octeon_cache	1
+#define cpu_has_saa		octeon_has_saa()
+#define cpu_has_mips32r1	0
+#define cpu_has_mips32r2	0
+#define cpu_has_mips64r1	0
+#define cpu_has_mips64r2	1
+#define cpu_has_dsp		0
+#define cpu_has_mipsmt		0
+#define cpu_has_userlocal	0
+#define cpu_has_vint		0
+#define cpu_has_veic		0
+#define ARCH_HAS_READ_CURRENT_TIMER 1
+#define ARCH_HAS_IRQ_PER_CPU	1
+#define ARCH_HAS_SPINLOCK_PREFETCH 1
+#define spin_lock_prefetch(x) prefetch(x)
+#define PREFETCH_STRIDE 128
+
+static inline int read_current_timer(unsigned long *result)
+{
+	asm volatile ("rdhwr %0,$31\n"
+#ifndef CONFIG_64BIT
+		      "\tsll %0, 0"
+#endif
+		      : "=r" (*result));
+	return 0;
+}
+
+static inline int octeon_has_saa(void)
+{
+	int id;
+	asm volatile ("mfc0 %0, $15,0" : "=r" (id));
+	return id >= 0x000d0300;
+}
+
+#endif
--- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h
@ -0,0 +1,64 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006  Ralf Baechle <ralf@linux-mips.org>
+ *
+ *
+ * Similar to mach-generic/dma-coherence.h except
+ * plat_device_is_coherent hard coded to return 1.
+ *
+ */
+#ifndef __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H
+#define __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H
+
+struct device;
+
+dma_addr_t octeon_map_dma_mem(struct device *, void *, size_t);
+void octeon_unmap_dma_mem(struct device *, dma_addr_t);
+
+static inline dma_addr_t plat_map_dma_mem(struct device *dev, void *addr,
+	size_t size)
+{
+	return octeon_map_dma_mem(dev, addr, size);
+}
+
+static inline dma_addr_t plat_map_dma_mem_page(struct device *dev,
+	struct page *page)
+{
+	return octeon_map_dma_mem(dev, page_address(page), PAGE_SIZE);
+}
+
+static inline unsigned long plat_dma_addr_to_phys(dma_addr_t dma_addr)
+{
+	return dma_addr;
+}
+
+static inline void plat_unmap_dma_mem(struct device *dev, dma_addr_t dma_addr)
+{
+	octeon_unmap_dma_mem(dev, dma_addr);
+}
+
+static inline int plat_dma_supported(struct device *dev, u64 mask)
+{
+	return 1;
+}
+
+static inline void plat_extra_sync_for_device(struct device *dev)
+{
+	mb();
+}
+
+static inline int plat_device_is_coherent(struct device *dev)
+{
+	return 1;
+}
+
+static inline int plat_dma_mapping_error(struct device *dev,
+					 dma_addr_t dma_addr)
+{
+	return dma_addr == -1;
+}
+
+#endif /* __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H */
--- a/arch/mips/include/asm/mach-cavium-octeon/irq.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/irq.h
@ -0,0 +1,244 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Cavium Networks
+ */
+#ifndef __OCTEON_IRQ_H__
+#define __OCTEON_IRQ_H__
+
+#define NR_IRQS OCTEON_IRQ_LAST
+#define MIPS_CPU_IRQ_BASE OCTEON_IRQ_SW0
+
+/* 0 - 7 represent the i8259 master */
+#define OCTEON_IRQ_I8259M0	0
+#define OCTEON_IRQ_I8259M1	1
+#define OCTEON_IRQ_I8259M2	2
+#define OCTEON_IRQ_I8259M3	3
+#define OCTEON_IRQ_I8259M4	4
+#define OCTEON_IRQ_I8259M5	5
+#define OCTEON_IRQ_I8259M6	6
+#define OCTEON_IRQ_I8259M7	7
+/* 8 - 15 represent the i8259 slave */
+#define OCTEON_IRQ_I8259S0	8
+#define OCTEON_IRQ_I8259S1	9
+#define OCTEON_IRQ_I8259S2	10
+#define OCTEON_IRQ_I8259S3	11
+#define OCTEON_IRQ_I8259S4	12
+#define OCTEON_IRQ_I8259S5	13
+#define OCTEON_IRQ_I8259S6	14
+#define OCTEON_IRQ_I8259S7	15
+/* 16 - 23 represent the 8 MIPS standard interrupt sources */
+#define OCTEON_IRQ_SW0		16
+#define OCTEON_IRQ_SW1		17
+#define OCTEON_IRQ_CIU0		18
+#define OCTEON_IRQ_CIU1		19
+#define OCTEON_IRQ_CIU4		20
+#define OCTEON_IRQ_5		21
+#define OCTEON_IRQ_PERF		22
+#define OCTEON_IRQ_TIMER	23
+/* 24 - 87 represent the sources in CIU_INTX_EN0 */
+#define OCTEON_IRQ_WORKQ0	24
+#define OCTEON_IRQ_WORKQ1	25
+#define OCTEON_IRQ_WORKQ2	26
+#define OCTEON_IRQ_WORKQ3	27
+#define OCTEON_IRQ_WORKQ4	28
+#define OCTEON_IRQ_WORKQ5	29
+#define OCTEON_IRQ_WORKQ6	30
+#define OCTEON_IRQ_WORKQ7	31
+#define OCTEON_IRQ_WORKQ8	32
+#define OCTEON_IRQ_WORKQ9	33
+#define OCTEON_IRQ_WORKQ10	34
+#define OCTEON_IRQ_WORKQ11	35
+#define OCTEON_IRQ_WORKQ12	36
+#define OCTEON_IRQ_WORKQ13	37
+#define OCTEON_IRQ_WORKQ14	38
+#define OCTEON_IRQ_WORKQ15	39
+#define OCTEON_IRQ_GPIO0	40
+#define OCTEON_IRQ_GPIO1	41
+#define OCTEON_IRQ_GPIO2	42
+#define OCTEON_IRQ_GPIO3	43
+#define OCTEON_IRQ_GPIO4	44
+#define OCTEON_IRQ_GPIO5	45
+#define OCTEON_IRQ_GPIO6	46
+#define OCTEON_IRQ_GPIO7	47
+#define OCTEON_IRQ_GPIO8	48
+#define OCTEON_IRQ_GPIO9	49
+#define OCTEON_IRQ_GPIO10	50
+#define OCTEON_IRQ_GPIO11	51
+#define OCTEON_IRQ_GPIO12	52
+#define OCTEON_IRQ_GPIO13	53
+#define OCTEON_IRQ_GPIO14	54
+#define OCTEON_IRQ_GPIO15	55
+#define OCTEON_IRQ_MBOX0	56
+#define OCTEON_IRQ_MBOX1	57
+#define OCTEON_IRQ_UART0	58
+#define OCTEON_IRQ_UART1	59
+#define OCTEON_IRQ_PCI_INT0	60
+#define OCTEON_IRQ_PCI_INT1	61
+#define OCTEON_IRQ_PCI_INT2	62
+#define OCTEON_IRQ_PCI_INT3	63
+#define OCTEON_IRQ_PCI_MSI0	64
+#define OCTEON_IRQ_PCI_MSI1	65
+#define OCTEON_IRQ_PCI_MSI2	66
+#define OCTEON_IRQ_PCI_MSI3	67
+#define OCTEON_IRQ_RESERVED68	68	/* Summary of CIU_INT_SUM1 */
+#define OCTEON_IRQ_TWSI		69
+#define OCTEON_IRQ_RML		70
+#define OCTEON_IRQ_TRACE	71
+#define OCTEON_IRQ_GMX_DRP0	72
+#define OCTEON_IRQ_GMX_DRP1	73
+#define OCTEON_IRQ_IPD_DRP	74
+#define OCTEON_IRQ_KEY_ZERO	75
+#define OCTEON_IRQ_TIMER0	76
+#define OCTEON_IRQ_TIMER1	77
+#define OCTEON_IRQ_TIMER2	78
+#define OCTEON_IRQ_TIMER3	79
+#define OCTEON_IRQ_USB0		80
+#define OCTEON_IRQ_PCM		81
+#define OCTEON_IRQ_MPI		82
+#define OCTEON_IRQ_TWSI2	83
+#define OCTEON_IRQ_POWIQ	84
+#define OCTEON_IRQ_IPDPPTHR	85
+#define OCTEON_IRQ_MII0		86
+#define OCTEON_IRQ_BOOTDMA	87
+/* 88 - 151 represent the sources in CIU_INTX_EN1 */
+#define OCTEON_IRQ_WDOG0	88
+#define OCTEON_IRQ_WDOG1	89
+#define OCTEON_IRQ_WDOG2	90
+#define OCTEON_IRQ_WDOG3	91
+#define OCTEON_IRQ_WDOG4	92
+#define OCTEON_IRQ_WDOG5	93
+#define OCTEON_IRQ_WDOG6	94
+#define OCTEON_IRQ_WDOG7	95
+#define OCTEON_IRQ_WDOG8	96
+#define OCTEON_IRQ_WDOG9	97
+#define OCTEON_IRQ_WDOG10	98
+#define OCTEON_IRQ_WDOG11	99
+#define OCTEON_IRQ_WDOG12	100
+#define OCTEON_IRQ_WDOG13	101
+#define OCTEON_IRQ_WDOG14	102
+#define OCTEON_IRQ_WDOG15	103
+#define OCTEON_IRQ_UART2	104
+#define OCTEON_IRQ_USB1		105
+#define OCTEON_IRQ_MII1		106
+#define OCTEON_IRQ_RESERVED107	107
+#define OCTEON_IRQ_RESERVED108	108
+#define OCTEON_IRQ_RESERVED109	109
+#define OCTEON_IRQ_RESERVED110	110
+#define OCTEON_IRQ_RESERVED111	111
+#define OCTEON_IRQ_RESERVED112	112
+#define OCTEON_IRQ_RESERVED113	113
+#define OCTEON_IRQ_RESERVED114	114
+#define OCTEON_IRQ_RESERVED115	115
+#define OCTEON_IRQ_RESERVED116	116
+#define OCTEON_IRQ_RESERVED117	117
+#define OCTEON_IRQ_RESERVED118	118
+#define OCTEON_IRQ_RESERVED119	119
+#define OCTEON_IRQ_RESERVED120	120
+#define OCTEON_IRQ_RESERVED121	121
+#define OCTEON_IRQ_RESERVED122	122
+#define OCTEON_IRQ_RESERVED123	123
+#define OCTEON_IRQ_RESERVED124	124
+#define OCTEON_IRQ_RESERVED125	125
+#define OCTEON_IRQ_RESERVED126	126
+#define OCTEON_IRQ_RESERVED127	127
+#define OCTEON_IRQ_RESERVED128	128
+#define OCTEON_IRQ_RESERVED129	129
+#define OCTEON_IRQ_RESERVED130	130
+#define OCTEON_IRQ_RESERVED131	131
+#define OCTEON_IRQ_RESERVED132	132
+#define OCTEON_IRQ_RESERVED133	133
+#define OCTEON_IRQ_RESERVED134	134
+#define OCTEON_IRQ_RESERVED135	135
+#define OCTEON_IRQ_RESERVED136	136
+#define OCTEON_IRQ_RESERVED137	137
+#define OCTEON_IRQ_RESERVED138	138
+#define OCTEON_IRQ_RESERVED139	139
+#define OCTEON_IRQ_RESERVED140	140
+#define OCTEON_IRQ_RESERVED141	141
+#define OCTEON_IRQ_RESERVED142	142
+#define OCTEON_IRQ_RESERVED143	143
+#define OCTEON_IRQ_RESERVED144	144
+#define OCTEON_IRQ_RESERVED145	145
+#define OCTEON_IRQ_RESERVED146	146
+#define OCTEON_IRQ_RESERVED147	147
+#define OCTEON_IRQ_RESERVED148	148
+#define OCTEON_IRQ_RESERVED149	149
+#define OCTEON_IRQ_RESERVED150	150
+#define OCTEON_IRQ_RESERVED151	151
+
+#ifdef CONFIG_PCI_MSI
+/* 152 - 215 represent the MSI interrupts 0-63 */
+#define OCTEON_IRQ_MSI_BIT0	152
+#define OCTEON_IRQ_MSI_BIT1	153
+#define OCTEON_IRQ_MSI_BIT2	154
+#define OCTEON_IRQ_MSI_BIT3	155
+#define OCTEON_IRQ_MSI_BIT4	156
+#define OCTEON_IRQ_MSI_BIT5	157
+#define OCTEON_IRQ_MSI_BIT6	158
+#define OCTEON_IRQ_MSI_BIT7	159
+#define OCTEON_IRQ_MSI_BIT8	160
+#define OCTEON_IRQ_MSI_BIT9	161
+#define OCTEON_IRQ_MSI_BIT10	162
+#define OCTEON_IRQ_MSI_BIT11	163
+#define OCTEON_IRQ_MSI_BIT12	164
+#define OCTEON_IRQ_MSI_BIT13	165
+#define OCTEON_IRQ_MSI_BIT14	166
+#define OCTEON_IRQ_MSI_BIT15	167
+#define OCTEON_IRQ_MSI_BIT16	168
+#define OCTEON_IRQ_MSI_BIT17	169
+#define OCTEON_IRQ_MSI_BIT18	170
+#define OCTEON_IRQ_MSI_BIT19	171
+#define OCTEON_IRQ_MSI_BIT20	172
+#define OCTEON_IRQ_MSI_BIT21	173
+#define OCTEON_IRQ_MSI_BIT22	174
+#define OCTEON_IRQ_MSI_BIT23	175
+#define OCTEON_IRQ_MSI_BIT24	176
+#define OCTEON_IRQ_MSI_BIT25	177
+#define OCTEON_IRQ_MSI_BIT26	178
+#define OCTEON_IRQ_MSI_BIT27	179
+#define OCTEON_IRQ_MSI_BIT28	180
+#define OCTEON_IRQ_MSI_BIT29	181
+#define OCTEON_IRQ_MSI_BIT30	182
+#define OCTEON_IRQ_MSI_BIT31	183
+#define OCTEON_IRQ_MSI_BIT32	184
+#define OCTEON_IRQ_MSI_BIT33	185
+#define OCTEON_IRQ_MSI_BIT34	186
+#define OCTEON_IRQ_MSI_BIT35	187
+#define OCTEON_IRQ_MSI_BIT36	188
+#define OCTEON_IRQ_MSI_BIT37	189
+#define OCTEON_IRQ_MSI_BIT38	190
+#define OCTEON_IRQ_MSI_BIT39	191
+#define OCTEON_IRQ_MSI_BIT40	192
+#define OCTEON_IRQ_MSI_BIT41	193
+#define OCTEON_IRQ_MSI_BIT42	194
+#define OCTEON_IRQ_MSI_BIT43	195
+#define OCTEON_IRQ_MSI_BIT44	196
+#define OCTEON_IRQ_MSI_BIT45	197
+#define OCTEON_IRQ_MSI_BIT46	198
+#define OCTEON_IRQ_MSI_BIT47	199
+#define OCTEON_IRQ_MSI_BIT48	200
+#define OCTEON_IRQ_MSI_BIT49	201
+#define OCTEON_IRQ_MSI_BIT50	202
+#define OCTEON_IRQ_MSI_BIT51	203
+#define OCTEON_IRQ_MSI_BIT52	204
+#define OCTEON_IRQ_MSI_BIT53	205
+#define OCTEON_IRQ_MSI_BIT54	206
+#define OCTEON_IRQ_MSI_BIT55	207
+#define OCTEON_IRQ_MSI_BIT56	208
+#define OCTEON_IRQ_MSI_BIT57	209
+#define OCTEON_IRQ_MSI_BIT58	210
+#define OCTEON_IRQ_MSI_BIT59	211
+#define OCTEON_IRQ_MSI_BIT60	212
+#define OCTEON_IRQ_MSI_BIT61	213
+#define OCTEON_IRQ_MSI_BIT62	214
+#define OCTEON_IRQ_MSI_BIT63	215
+
+#define OCTEON_IRQ_LAST         216
+#else
+#define OCTEON_IRQ_LAST         152
+#endif
+
+#endif
--- a/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/kernel-entry-init.h
@ -0,0 +1,131 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2008 Cavium Networks, Inc
+ */
+#ifndef __ASM_MACH_CAVIUM_OCTEON_KERNEL_ENTRY_H
+#define __ASM_MACH_CAVIUM_OCTEON_KERNEL_ENTRY_H
+
+
+#define CP0_CYCLE_COUNTER $9, 6
+#define CP0_CVMCTL_REG $9, 7
+#define CP0_CVMMEMCTL_REG $11,7
+#define CP0_PRID_REG $15, 0
+#define CP0_PRID_OCTEON_PASS1 0x000d0000
+#define CP0_PRID_OCTEON_CN30XX 0x000d0200
+
+.macro  kernel_entry_setup
+	# Registers set by bootloader:
+	# (only 32 bits set by bootloader, all addresses are physical
+	# addresses, and need to have the appropriate memory region set
+	# by the kernel
+	# a0 = argc
+	# a1 = argv (kseg0 compat addr)
+	# a2 = 1 if init core, zero otherwise
+	# a3 = address of boot descriptor block
+	.set push
+	.set arch=octeon
+	# Read the cavium mem control register
+	dmfc0   v0, CP0_CVMMEMCTL_REG
+	# Clear the lower 6 bits, the CVMSEG size
+	dins    v0, $0, 0, 6
+	ori     v0, CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE
+	dmtc0   v0, CP0_CVMMEMCTL_REG	# Write the cavium mem control register
+	dmfc0   v0, CP0_CVMCTL_REG	# Read the cavium control register
+#ifdef CONFIG_CAVIUM_OCTEON_HW_FIX_UNALIGNED
+	# Disable unaligned load/store support but leave HW fixup enabled
+	or  v0, v0, 0x5001
+	xor v0, v0, 0x1001
+#else
+	# Disable unaligned load/store and HW fixup support
+	or  v0, v0, 0x5001
+	xor v0, v0, 0x5001
+#endif
+	# Read the processor ID register
+	mfc0 v1, CP0_PRID_REG
+	# Disable instruction prefetching (Octeon Pass1 errata)
+	or  v0, v0, 0x2000
+	# Skip reenable of prefetching for Octeon Pass1
+	beq v1, CP0_PRID_OCTEON_PASS1, skip
+	nop
+	# Reenable instruction prefetching, not on Pass1
+	xor v0, v0, 0x2000
+	# Strip off pass number off of processor id
+	srl v1, 8
+	sll v1, 8
+	# CN30XX needs some extra stuff turned off for better performance
+	bne v1, CP0_PRID_OCTEON_CN30XX, skip
+	nop
+	# CN30XX Use random Icache replacement
+	or  v0, v0, 0x400
+	# CN30XX Disable instruction prefetching
+	or  v0, v0, 0x2000
+skip:
+	# Write the cavium control register
+	dmtc0   v0, CP0_CVMCTL_REG
+	sync
+	# Flush dcache after config change
+	cache   9, 0($0)
+	# Get my core id
+	rdhwr   v0, $0
+	# Jump the master to kernel_entry
+	bne     a2, zero, octeon_main_processor
+	nop
+
+#ifdef CONFIG_SMP
+
+	#
+	# All cores other than the master need to wait here for SMP bootstrap
+	# to begin
+	#
+
+	# This is the variable where the next core to boot os stored
+	PTR_LA  t0, octeon_processor_boot
+octeon_spin_wait_boot:
+	# Get the core id of the next to be booted
+	LONG_L  t1, (t0)
+	# Keep looping if it isn't me
+	bne t1, v0, octeon_spin_wait_boot
+	nop
+	# Get my GP from the global variable
+	PTR_LA  t0, octeon_processor_gp
+	LONG_L  gp, (t0)
+	# Get my SP from the global variable
+	PTR_LA  t0, octeon_processor_sp
+	LONG_L  sp, (t0)
+	# Set the SP global variable to zero so the master knows we've started
+	LONG_S  zero, (t0)
+#ifdef __OCTEON__
+	syncw
+	syncw
+#else
+	sync
+#endif
+	# Jump to the normal Linux SMP entry point
+	j   smp_bootstrap
+	nop
+#else /* CONFIG_SMP */
+
+	#
+	# Someone tried to boot SMP with a non SMP kernel. All extra cores
+	# will halt here.
+	#
+octeon_wait_forever:
+	wait
+	b   octeon_wait_forever
+	nop
+
+#endif /* CONFIG_SMP */
+octeon_main_processor:
+	.set pop
+.endm
+
+/*
+ * Do SMP slave processor setup necessary before we can savely execute C code.
+ */
+	.macro  smp_slave_setup
+	.endm
+
+#endif /* __ASM_MACH_CAVIUM_OCTEON_KERNEL_ENTRY_H */
--- a/arch/mips/include/asm/mach-cavium-octeon/war.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/war.h
@ -0,0 +1,26 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org>
+ * Copyright (C) 2008 Cavium Networks <support@caviumnetworks.com>
+ */
+#ifndef __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H
+#define __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H
+
+#define R4600_V1_INDEX_ICACHEOP_WAR	0
+#define R4600_V1_HIT_CACHEOP_WAR	0
+#define R4600_V2_HIT_CACHEOP_WAR	0
+#define R5432_CP0_INTERRUPT_WAR		0
+#define BCM1250_M3_WAR			0
+#define SIBYTE_1956_WAR			0
+#define MIPS4K_ICACHE_REFILL_WAR	0
+#define MIPS_CACHE_SYNC_WAR		0
+#define TX49XX_ICACHE_INDEX_INV_WAR	0
+#define RM9000_CDEX_SMP_WAR		0
+#define ICACHE_REFILLS_WORKAROUND_WAR	0
+#define R10000_LLSC_WAR			0
+#define MIPS34K_MISSED_ITLB_WAR		0
+
+#endif /* __ASM_MIPS_MACH_CAVIUM_OCTEON_WAR_H */
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@ -0,0 +1,248 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2004-2008 Cavium Networks
+ */
+#ifndef __ASM_OCTEON_OCTEON_H
+#define __ASM_OCTEON_OCTEON_H
+
+#include "cvmx.h"
+
+extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
+						uint64_t alignment,
+						uint64_t min_addr,
+						uint64_t max_addr,
+						int do_locking);
+extern void *octeon_bootmem_alloc(uint64_t size, uint64_t alignment,
+				  int do_locking);
+extern void *octeon_bootmem_alloc_range(uint64_t size, uint64_t alignment,
+					uint64_t min_addr, uint64_t max_addr,
+					int do_locking);
+extern void *octeon_bootmem_alloc_named(uint64_t size, uint64_t alignment,
+					char *name);
+extern void *octeon_bootmem_alloc_named_range(uint64_t size, uint64_t min_addr,
+					      uint64_t max_addr, uint64_t align,
+					      char *name);
+extern void *octeon_bootmem_alloc_named_address(uint64_t size, uint64_t address,
+						char *name);
+extern int octeon_bootmem_free_named(char *name);
+extern void octeon_bootmem_lock(void);
+extern void octeon_bootmem_unlock(void);
+
+extern int octeon_is_simulation(void);
+extern int octeon_is_pci_host(void);
+extern int octeon_usb_is_ref_clk(void);
+extern uint64_t octeon_get_clock_rate(void);
+extern const char *octeon_board_type_string(void);
+extern const char *octeon_get_pci_interrupts(void);
+extern int octeon_get_southbridge_interrupt(void);
+extern int octeon_get_boot_coremask(void);
+extern int octeon_get_boot_num_arguments(void);
+extern const char *octeon_get_boot_argument(int arg);
+extern void octeon_hal_setup_reserved32(void);
+extern void octeon_user_io_init(void);
+struct octeon_cop2_state;
+extern unsigned long octeon_crypto_enable(struct octeon_cop2_state *state);
+extern void octeon_crypto_disable(struct octeon_cop2_state *state,
+				  unsigned long flags);
+
+extern void octeon_init_cvmcount(void);
+
+#define OCTEON_ARGV_MAX_ARGS	64
+#define OCTOEN_SERIAL_LEN	20
+
+struct octeon_boot_descriptor {
+	/* Start of block referenced by assembly code - do not change! */
+	uint32_t desc_version;
+	uint32_t desc_size;
+	uint64_t stack_top;
+	uint64_t heap_base;
+	uint64_t heap_end;
+	/* Only used by bootloader */
+	uint64_t entry_point;
+	uint64_t desc_vaddr;
+	/* End of This block referenced by assembly code - do not change! */
+	uint32_t exception_base_addr;
+	uint32_t stack_size;
+	uint32_t heap_size;
+	/* Argc count for application. */
+	uint32_t argc;
+	uint32_t argv[OCTEON_ARGV_MAX_ARGS];
+
+#define  BOOT_FLAG_INIT_CORE		(1 << 0)
+#define  OCTEON_BL_FLAG_DEBUG		(1 << 1)
+#define  OCTEON_BL_FLAG_NO_MAGIC	(1 << 2)
+	/* If set, use uart1 for console */
+#define  OCTEON_BL_FLAG_CONSOLE_UART1	(1 << 3)
+	/* If set, use PCI console */
+#define  OCTEON_BL_FLAG_CONSOLE_PCI	(1 << 4)
+	/* Call exit on break on serial port */
+#define  OCTEON_BL_FLAG_BREAK		(1 << 5)
+
+	uint32_t flags;
+	uint32_t core_mask;
+	/* DRAM size in megabyes. */
+	uint32_t dram_size;
+	/* physical address of free memory descriptor block. */
+	uint32_t phy_mem_desc_addr;
+	/* used to pass flags from app to debugger. */
+	uint32_t debugger_flags_base_addr;
+	/* CPU clock speed, in hz. */
+	uint32_t eclock_hz;
+	/* DRAM clock speed, in hz. */
+	uint32_t dclock_hz;
+	/* SPI4 clock in hz. */
+	uint32_t spi_clock_hz;
+	uint16_t board_type;
+	uint8_t board_rev_major;
+	uint8_t board_rev_minor;
+	uint16_t chip_type;
+	uint8_t chip_rev_major;
+	uint8_t chip_rev_minor;
+	char board_serial_number[OCTOEN_SERIAL_LEN];
+	uint8_t mac_addr_base[6];
+	uint8_t mac_addr_count;
+	uint64_t cvmx_desc_vaddr;
+};
+
+union octeon_cvmemctl {
+	uint64_t u64;
+	struct {
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t tlbbist:1;
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t l1cbist:1;
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t l1dbist:1;
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t dcmbist:1;
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t ptgbist:1;
+		/* RO 1 = BIST fail, 0 = BIST pass */
+		uint64_t wbfbist:1;
+		/* Reserved */
+		uint64_t reserved:22;
+		/* R/W If set, marked write-buffer entries time out
+		 * the same as as other entries; if clear, marked
+		 * write-buffer entries use the maximum timeout. */
+		uint64_t dismarkwblongto:1;
+		/* R/W If set, a merged store does not clear the
+		 * write-buffer entry timeout state. */
+		uint64_t dismrgclrwbto:1;
+		/* R/W Two bits that are the MSBs of the resultant
+		 * CVMSEG LM word location for an IOBDMA. The other 8
+		 * bits come from the SCRADDR field of the IOBDMA. */
+		uint64_t iobdmascrmsb:2;
+		/* R/W If set, SYNCWS and SYNCS only order marked
+		 * stores; if clear, SYNCWS and SYNCS only order
+		 * unmarked stores. SYNCWSMARKED has no effect when
+		 * DISSYNCWS is set. */
+		uint64_t syncwsmarked:1;
+		/* R/W If set, SYNCWS acts as SYNCW and SYNCS acts as
+		 * SYNC. */
+		uint64_t dissyncws:1;
+		/* R/W If set, no stall happens on write buffer
+		 * full. */
+		uint64_t diswbfst:1;
+		/* R/W If set (and SX set), supervisor-level
+		 * loads/stores can use XKPHYS addresses with
+		 * VA<48>==0 */
+		uint64_t xkmemenas:1;
+		/* R/W If set (and UX set), user-level loads/stores
+		 * can use XKPHYS addresses with VA<48>==0 */
+		uint64_t xkmemenau:1;
+		/* R/W If set (and SX set), supervisor-level
+		 * loads/stores can use XKPHYS addresses with
+		 * VA<48>==1 */
+		uint64_t xkioenas:1;
+		/* R/W If set (and UX set), user-level loads/stores
+		 * can use XKPHYS addresses with VA<48>==1 */
+		uint64_t xkioenau:1;
+		/* R/W If set, all stores act as SYNCW (NOMERGE must
+		 * be set when this is set) RW, reset to 0. */
+		uint64_t allsyncw:1;
+		/* R/W If set, no stores merge, and all stores reach
+		 * the coherent bus in order. */
+		uint64_t nomerge:1;
+		/* R/W Selects the bit in the counter used for DID
+		 * time-outs 0 = 231, 1 = 230, 2 = 229, 3 =
+		 * 214. Actual time-out is between 1x and 2x this
+		 * interval. For example, with DIDTTO=3, expiration
+		 * interval is between 16K and 32K. */
+		uint64_t didtto:2;
+		/* R/W If set, the (mem) CSR clock never turns off. */
+		uint64_t csrckalwys:1;
+		/* R/W If set, mclk never turns off. */
+		uint64_t mclkalwys:1;
+		/* R/W Selects the bit in the counter used for write
+		 * buffer flush time-outs (WBFLT+11) is the bit
+		 * position in an internal counter used to determine
+		 * expiration. The write buffer expires between 1x and
+		 * 2x this interval. For example, with WBFLT = 0, a
+		 * write buffer expires between 2K and 4K cycles after
+		 * the write buffer entry is allocated. */
+		uint64_t wbfltime:3;
+		/* R/W If set, do not put Istream in the L2 cache. */
+		uint64_t istrnol2:1;
+		/* R/W The write buffer threshold. */
+		uint64_t wbthresh:4;
+		/* Reserved */
+		uint64_t reserved2:2;
+		/* R/W If set, CVMSEG is available for loads/stores in
+		 * kernel/debug mode. */
+		uint64_t cvmsegenak:1;
+		/* R/W If set, CVMSEG is available for loads/stores in
+		 * supervisor mode. */
+		uint64_t cvmsegenas:1;
+		/* R/W If set, CVMSEG is available for loads/stores in
+		 * user mode. */
+		uint64_t cvmsegenau:1;
+		/* R/W Size of local memory in cache blocks, 54 (6912
+		 * bytes) is max legal value. */
+		uint64_t lmemsz:6;
+	} s;
+};
+
+struct octeon_cf_data {
+	unsigned long	base_region_bias;
+	unsigned int	base_region;	/* The chip select region used by CF */
+	int		is16bit;	/* 0 - 8bit, !0 - 16bit */
+	int		dma_engine;	/* -1 for no DMA */
+};
+
+extern void octeon_write_lcd(const char *s);
+extern void octeon_check_cpu_bist(void);
+extern int octeon_get_boot_debug_flag(void);
+extern int octeon_get_boot_uart(void);
+
+struct uart_port;
+extern unsigned int octeon_serial_in(struct uart_port *, int);
+extern void octeon_serial_out(struct uart_port *, int, int);
+
+/**
+ * Write a 32bit value to the Octeon NPI register space
+ *
+ * @address: Address to write to
+ * @val:     Value to write
+ */
+static inline void octeon_npi_write32(uint64_t address, uint32_t val)
+{
+	cvmx_write64_uint32(address ^ 4, val);
+	cvmx_read64_uint32(address ^ 4);
+}
+
+
+/**
+ * Read a 32bit value from the Octeon NPI register space
+ *
+ * @address: Address to read
+ * Returns The result
+ */
+static inline uint32_t octeon_npi_read32(uint64_t address)
+{
+	return cvmx_read64_uint32(address ^ 4);
+}
+
+#endif /* __ASM_OCTEON_OCTEON_H */
--- a/arch/mips/kernel/octeon_switch.S
+++ b/arch/mips/kernel/octeon_switch.S
@ -0,0 +1,506 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1994, 1995, 1996, 1998, 1999, 2002, 2003 Ralf Baechle
+ * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
+ * Copyright (C) 1994, 1995, 1996, by Andreas Busse
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2000 MIPS Technologies, Inc.
+ *    written by Carsten Langgaard, carstenl@mips.com
+ */
+#include <asm/asm.h>
+#include <asm/cachectl.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/asm-offsets.h>
+#include <asm/page.h>
+#include <asm/pgtable-bits.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+#include <asm/thread_info.h>
+
+#include <asm/asmmacro.h>
+
+/*
+ * Offset to the current process status flags, the first 32 bytes of the
+ * stack are not used.
+ */
+#define ST_OFF (_THREAD_SIZE - 32 - PT_SIZE + PT_STATUS)
+
+/*
+ * task_struct *resume(task_struct *prev, task_struct *next,
+ *                     struct thread_info *next_ti)
+ */
+	.align	7
+	LEAF(resume)
+	.set arch=octeon
+#ifndef CONFIG_CPU_HAS_LLSC
+	sw	zero, ll_bit
+#endif
+	mfc0	t1, CP0_STATUS
+	LONG_S	t1, THREAD_STATUS(a0)
+	cpu_save_nonscratch a0
+	LONG_S	ra, THREAD_REG31(a0)
+
+	/* check if we need to save COP2 registers */
+	PTR_L	t2, TASK_THREAD_INFO(a0)
+	LONG_L	t0, ST_OFF(t2)
+	bbit0	t0, 30, 1f
+
+	/* Disable COP2 in the stored process state */
+	li	t1, ST0_CU2
+	xor	t0, t1
+	LONG_S	t0, ST_OFF(t2)
+
+	/* Enable COP2 so we can save it */
+	mfc0	t0, CP0_STATUS
+	or	t0, t1
+	mtc0	t0, CP0_STATUS
+
+	/* Save COP2 */
+	daddu	a0, THREAD_CP2
+	jal octeon_cop2_save
+	dsubu	a0, THREAD_CP2
+
+	/* Disable COP2 now that we are done */
+	mfc0	t0, CP0_STATUS
+	li	t1, ST0_CU2
+	xor	t0, t1
+	mtc0	t0, CP0_STATUS
+
+1:
+#if CONFIG_CAVIUM_OCTEON_CVMSEG_SIZE > 0
+	/* Check if we need to store CVMSEG state */
+	mfc0	t0, $11,7 	/* CvmMemCtl */
+	bbit0	t0, 6, 3f	/* Is user access enabled? */
+
+	/* Store the CVMSEG state */
+	/* Extract the size of CVMSEG */
+	andi	t0, 0x3f
+	/* Multiply * (cache line size/sizeof(long)/2) */
+	sll	t0, 7-LONGLOG-1
+	li	t1, -32768 	/* Base address of CVMSEG */
+	LONG_ADDI t2, a0, THREAD_CVMSEG	/* Where to store CVMSEG to */
+	synciobdma
+2:
+	.set noreorder
+	LONG_L	t8, 0(t1)	/* Load from CVMSEG */
+	subu	t0, 1		/* Decrement loop var */
+	LONG_L	t9, LONGSIZE(t1)/* Load from CVMSEG */
+	LONG_ADDU t1, LONGSIZE*2 /* Increment loc in CVMSEG */
+	LONG_S	t8, 0(t2)	/* Store CVMSEG to thread storage */
+	LONG_ADDU t2, LONGSIZE*2 /* Increment loc in thread storage */
+	bnez	t0, 2b		/* Loop until we've copied it all */
+	 LONG_S	t9, -LONGSIZE(t2)/* Store CVMSEG to thread storage */
+	.set reorder
+
+	/* Disable access to CVMSEG */
+	mfc0	t0, $11,7 	/* CvmMemCtl */
+	xori	t0, t0, 0x40	/* Bit 6 is CVMSEG user enable */
+	mtc0	t0, $11,7 	/* CvmMemCtl */
+#endif
+3:
+	/*
+	 * The order of restoring the registers takes care of the race
+	 * updating $28, $29 and kernelsp without disabling ints.
+	 */
+	move	$28, a2
+	cpu_restore_nonscratch a1
+
+#if (_THREAD_SIZE - 32) < 0x8000
+	PTR_ADDIU	t0, $28, _THREAD_SIZE - 32
+#else
+	PTR_LI		t0, _THREAD_SIZE - 32
+	PTR_ADDU	t0, $28
+#endif
+	set_saved_sp	t0, t1, t2
+
+	mfc0	t1, CP0_STATUS		/* Do we really need this? */
+	li	a3, 0xff01
+	and	t1, a3
+	LONG_L	a2, THREAD_STATUS(a1)
+	nor	a3, $0, a3
+	and	a2, a3
+	or	a2, t1
+	mtc0	a2, CP0_STATUS
+	move	v0, a0
+	jr	ra
+	END(resume)
+
+/*
+ * void octeon_cop2_save(struct octeon_cop2_state *a0)
+ */
+	.align	7
+	LEAF(octeon_cop2_save)
+
+	dmfc0	t9, $9,7	/* CvmCtl register. */
+
+        /* Save the COP2 CRC state */
+	dmfc2	t0, 0x0201
+	dmfc2	t1, 0x0202
+	dmfc2	t2, 0x0200
+	sd	t0, OCTEON_CP2_CRC_IV(a0)
+	sd	t1, OCTEON_CP2_CRC_LENGTH(a0)
+	sd	t2, OCTEON_CP2_CRC_POLY(a0)
+	/* Skip next instructions if CvmCtl[NODFA_CP2] set */
+	bbit1	t9, 28, 1f
+
+	/* Save the LLM state */
+	dmfc2	t0, 0x0402
+	dmfc2	t1, 0x040A
+	sd	t0, OCTEON_CP2_LLM_DAT(a0)
+	sd	t1, OCTEON_CP2_LLM_DAT+8(a0)
+
+1:      bbit1	t9, 26, 3f	/* done if CvmCtl[NOCRYPTO] set */
+
+	/* Save the COP2 crypto state */
+        /* this part is mostly common to both pass 1 and later revisions */
+	dmfc2 	t0, 0x0084
+	dmfc2 	t1, 0x0080
+	dmfc2 	t2, 0x0081
+	dmfc2 	t3, 0x0082
+	sd	t0, OCTEON_CP2_3DES_IV(a0)
+	dmfc2 	t0, 0x0088
+	sd	t1, OCTEON_CP2_3DES_KEY(a0)
+	dmfc2 	t1, 0x0111                      /* only necessary for pass 1 */
+	sd	t2, OCTEON_CP2_3DES_KEY+8(a0)
+	dmfc2 	t2, 0x0102
+	sd	t3, OCTEON_CP2_3DES_KEY+16(a0)
+	dmfc2 	t3, 0x0103
+	sd	t0, OCTEON_CP2_3DES_RESULT(a0)
+	dmfc2 	t0, 0x0104
+	sd	t1, OCTEON_CP2_AES_INP0(a0)     /* only necessary for pass 1 */
+	dmfc2 	t1, 0x0105
+	sd	t2, OCTEON_CP2_AES_IV(a0)
+	dmfc2	t2, 0x0106
+	sd	t3, OCTEON_CP2_AES_IV+8(a0)
+	dmfc2 	t3, 0x0107
+	sd	t0, OCTEON_CP2_AES_KEY(a0)
+	dmfc2	t0, 0x0110
+	sd	t1, OCTEON_CP2_AES_KEY+8(a0)
+	dmfc2	t1, 0x0100
+	sd	t2, OCTEON_CP2_AES_KEY+16(a0)
+	dmfc2	t2, 0x0101
+	sd	t3, OCTEON_CP2_AES_KEY+24(a0)
+	mfc0	t3, $15,0 	/* Get the processor ID register */
+	sd	t0, OCTEON_CP2_AES_KEYLEN(a0)
+	li	t0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
+	sd	t1, OCTEON_CP2_AES_RESULT(a0)
+	sd	t2, OCTEON_CP2_AES_RESULT+8(a0)
+	/* Skip to the Pass1 version of the remainder of the COP2 state */
+	beq	t3, t0, 2f
+
+        /* the non-pass1 state when !CvmCtl[NOCRYPTO] */
+	dmfc2	t1, 0x0240
+	dmfc2	t2, 0x0241
+	dmfc2	t3, 0x0242
+	dmfc2	t0, 0x0243
+	sd	t1, OCTEON_CP2_HSH_DATW(a0)
+	dmfc2	t1, 0x0244
+	sd	t2, OCTEON_CP2_HSH_DATW+8(a0)
+	dmfc2	t2, 0x0245
+	sd	t3, OCTEON_CP2_HSH_DATW+16(a0)
+	dmfc2	t3, 0x0246
+	sd	t0, OCTEON_CP2_HSH_DATW+24(a0)
+	dmfc2	t0, 0x0247
+	sd	t1, OCTEON_CP2_HSH_DATW+32(a0)
+	dmfc2	t1, 0x0248
+	sd	t2, OCTEON_CP2_HSH_DATW+40(a0)
+	dmfc2	t2, 0x0249
+	sd	t3, OCTEON_CP2_HSH_DATW+48(a0)
+	dmfc2	t3, 0x024A
+	sd	t0, OCTEON_CP2_HSH_DATW+56(a0)
+	dmfc2	t0, 0x024B
+	sd	t1, OCTEON_CP2_HSH_DATW+64(a0)
+	dmfc2	t1, 0x024C
+	sd	t2, OCTEON_CP2_HSH_DATW+72(a0)
+	dmfc2	t2, 0x024D
+	sd	t3, OCTEON_CP2_HSH_DATW+80(a0)
+	dmfc2 	t3, 0x024E
+	sd	t0, OCTEON_CP2_HSH_DATW+88(a0)
+	dmfc2	t0, 0x0250
+	sd	t1, OCTEON_CP2_HSH_DATW+96(a0)
+	dmfc2	t1, 0x0251
+	sd	t2, OCTEON_CP2_HSH_DATW+104(a0)
+	dmfc2	t2, 0x0252
+	sd	t3, OCTEON_CP2_HSH_DATW+112(a0)
+	dmfc2	t3, 0x0253
+	sd	t0, OCTEON_CP2_HSH_IVW(a0)
+	dmfc2	t0, 0x0254
+	sd	t1, OCTEON_CP2_HSH_IVW+8(a0)
+	dmfc2	t1, 0x0255
+	sd	t2, OCTEON_CP2_HSH_IVW+16(a0)
+	dmfc2	t2, 0x0256
+	sd	t3, OCTEON_CP2_HSH_IVW+24(a0)
+	dmfc2	t3, 0x0257
+	sd	t0, OCTEON_CP2_HSH_IVW+32(a0)
+	dmfc2 	t0, 0x0258
+	sd	t1, OCTEON_CP2_HSH_IVW+40(a0)
+	dmfc2 	t1, 0x0259
+	sd	t2, OCTEON_CP2_HSH_IVW+48(a0)
+	dmfc2	t2, 0x025E
+	sd	t3, OCTEON_CP2_HSH_IVW+56(a0)
+	dmfc2	t3, 0x025A
+	sd	t0, OCTEON_CP2_GFM_MULT(a0)
+	dmfc2	t0, 0x025B
+	sd	t1, OCTEON_CP2_GFM_MULT+8(a0)
+	sd	t2, OCTEON_CP2_GFM_POLY(a0)
+	sd	t3, OCTEON_CP2_GFM_RESULT(a0)
+	sd	t0, OCTEON_CP2_GFM_RESULT+8(a0)
+	jr	ra
+
+2:      /* pass 1 special stuff when !CvmCtl[NOCRYPTO] */
+	dmfc2	t3, 0x0040
+	dmfc2	t0, 0x0041
+	dmfc2	t1, 0x0042
+	dmfc2	t2, 0x0043
+	sd	t3, OCTEON_CP2_HSH_DATW(a0)
+	dmfc2	t3, 0x0044
+	sd	t0, OCTEON_CP2_HSH_DATW+8(a0)
+	dmfc2	t0, 0x0045
+	sd	t1, OCTEON_CP2_HSH_DATW+16(a0)
+	dmfc2	t1, 0x0046
+	sd	t2, OCTEON_CP2_HSH_DATW+24(a0)
+	dmfc2	t2, 0x0048
+	sd	t3, OCTEON_CP2_HSH_DATW+32(a0)
+	dmfc2	t3, 0x0049
+	sd	t0, OCTEON_CP2_HSH_DATW+40(a0)
+	dmfc2	t0, 0x004A
+	sd	t1, OCTEON_CP2_HSH_DATW+48(a0)
+	sd	t2, OCTEON_CP2_HSH_IVW(a0)
+	sd	t3, OCTEON_CP2_HSH_IVW+8(a0)
+	sd	t0, OCTEON_CP2_HSH_IVW+16(a0)
+
+3:      /* pass 1 or CvmCtl[NOCRYPTO] set */
+	jr	ra
+	END(octeon_cop2_save)
+
+/*
+ * void octeon_cop2_restore(struct octeon_cop2_state *a0)
+ */
+	.align	7
+	.set push
+	.set noreorder
+	LEAF(octeon_cop2_restore)
+        /* First cache line was prefetched before the call */
+        pref    4,  128(a0)
+	dmfc0	t9, $9,7	/* CvmCtl register. */
+
+        pref    4,  256(a0)
+	ld	t0, OCTEON_CP2_CRC_IV(a0)
+        pref    4,  384(a0)
+	ld	t1, OCTEON_CP2_CRC_LENGTH(a0)
+	ld	t2, OCTEON_CP2_CRC_POLY(a0)
+
+	/* Restore the COP2 CRC state */
+	dmtc2	t0, 0x0201
+	dmtc2 	t1, 0x1202
+	bbit1	t9, 28, 2f	/* Skip LLM if CvmCtl[NODFA_CP2] is set */
+	 dmtc2	t2, 0x4200
+
+	/* Restore the LLM state */
+	ld	t0, OCTEON_CP2_LLM_DAT(a0)
+	ld	t1, OCTEON_CP2_LLM_DAT+8(a0)
+	dmtc2	t0, 0x0402
+	dmtc2	t1, 0x040A
+
+2:
+	bbit1	t9, 26, done_restore	/* done if CvmCtl[NOCRYPTO] set */
+	 nop
+
+	/* Restore the COP2 crypto state common to pass 1 and pass 2 */
+	ld	t0, OCTEON_CP2_3DES_IV(a0)
+	ld	t1, OCTEON_CP2_3DES_KEY(a0)
+	ld	t2, OCTEON_CP2_3DES_KEY+8(a0)
+	dmtc2 	t0, 0x0084
+	ld	t0, OCTEON_CP2_3DES_KEY+16(a0)
+	dmtc2 	t1, 0x0080
+	ld	t1, OCTEON_CP2_3DES_RESULT(a0)
+	dmtc2 	t2, 0x0081
+	ld	t2, OCTEON_CP2_AES_INP0(a0) /* only really needed for pass 1 */
+	dmtc2	t0, 0x0082
+	ld	t0, OCTEON_CP2_AES_IV(a0)
+	dmtc2 	t1, 0x0098
+	ld	t1, OCTEON_CP2_AES_IV+8(a0)
+	dmtc2 	t2, 0x010A                  /* only really needed for pass 1 */
+	ld	t2, OCTEON_CP2_AES_KEY(a0)
+	dmtc2 	t0, 0x0102
+	ld	t0, OCTEON_CP2_AES_KEY+8(a0)
+	dmtc2	t1, 0x0103
+	ld	t1, OCTEON_CP2_AES_KEY+16(a0)
+	dmtc2	t2, 0x0104
+	ld	t2, OCTEON_CP2_AES_KEY+24(a0)
+	dmtc2	t0, 0x0105
+	ld	t0, OCTEON_CP2_AES_KEYLEN(a0)
+	dmtc2	t1, 0x0106
+	ld	t1, OCTEON_CP2_AES_RESULT(a0)
+	dmtc2	t2, 0x0107
+	ld	t2, OCTEON_CP2_AES_RESULT+8(a0)
+	mfc0	t3, $15,0 	/* Get the processor ID register */
+	dmtc2	t0, 0x0110
+	li	t0, 0x000d0000	/* This is the processor ID of Octeon Pass1 */
+	dmtc2	t1, 0x0100
+	bne	t0, t3, 3f	/* Skip the next stuff for non-pass1 */
+	 dmtc2	t2, 0x0101
+
+        /* this code is specific for pass 1 */
+	ld	t0, OCTEON_CP2_HSH_DATW(a0)
+	ld	t1, OCTEON_CP2_HSH_DATW+8(a0)
+	ld	t2, OCTEON_CP2_HSH_DATW+16(a0)
+	dmtc2	t0, 0x0040
+	ld	t0, OCTEON_CP2_HSH_DATW+24(a0)
+	dmtc2	t1, 0x0041
+	ld	t1, OCTEON_CP2_HSH_DATW+32(a0)
+	dmtc2	t2, 0x0042
+	ld	t2, OCTEON_CP2_HSH_DATW+40(a0)
+	dmtc2	t0, 0x0043
+	ld	t0, OCTEON_CP2_HSH_DATW+48(a0)
+	dmtc2	t1, 0x0044
+	ld	t1, OCTEON_CP2_HSH_IVW(a0)
+	dmtc2	t2, 0x0045
+	ld	t2, OCTEON_CP2_HSH_IVW+8(a0)
+	dmtc2	t0, 0x0046
+	ld	t0, OCTEON_CP2_HSH_IVW+16(a0)
+	dmtc2	t1, 0x0048
+	dmtc2	t2, 0x0049
+        b done_restore   /* unconditional branch */
+	 dmtc2	t0, 0x004A
+
+3:      /* this is post-pass1 code */
+	ld	t2, OCTEON_CP2_HSH_DATW(a0)
+	ld	t0, OCTEON_CP2_HSH_DATW+8(a0)
+	ld	t1, OCTEON_CP2_HSH_DATW+16(a0)
+	dmtc2	t2, 0x0240
+	ld	t2, OCTEON_CP2_HSH_DATW+24(a0)
+	dmtc2	t0, 0x0241
+	ld	t0, OCTEON_CP2_HSH_DATW+32(a0)
+	dmtc2	t1, 0x0242
+	ld	t1, OCTEON_CP2_HSH_DATW+40(a0)
+	dmtc2	t2, 0x0243
+	ld	t2, OCTEON_CP2_HSH_DATW+48(a0)
+	dmtc2	t0, 0x0244
+	ld	t0, OCTEON_CP2_HSH_DATW+56(a0)
+	dmtc2	t1, 0x0245
+	ld	t1, OCTEON_CP2_HSH_DATW+64(a0)
+	dmtc2	t2, 0x0246
+	ld	t2, OCTEON_CP2_HSH_DATW+72(a0)
+	dmtc2	t0, 0x0247
+	ld	t0, OCTEON_CP2_HSH_DATW+80(a0)
+	dmtc2	t1, 0x0248
+	ld	t1, OCTEON_CP2_HSH_DATW+88(a0)
+	dmtc2	t2, 0x0249
+	ld	t2, OCTEON_CP2_HSH_DATW+96(a0)
+	dmtc2	t0, 0x024A
+	ld	t0, OCTEON_CP2_HSH_DATW+104(a0)
+	dmtc2	t1, 0x024B
+	ld	t1, OCTEON_CP2_HSH_DATW+112(a0)
+	dmtc2	t2, 0x024C
+	ld	t2, OCTEON_CP2_HSH_IVW(a0)
+	dmtc2	t0, 0x024D
+	ld	t0, OCTEON_CP2_HSH_IVW+8(a0)
+	dmtc2	t1, 0x024E
+	ld	t1, OCTEON_CP2_HSH_IVW+16(a0)
+	dmtc2	t2, 0x0250
+	ld	t2, OCTEON_CP2_HSH_IVW+24(a0)
+	dmtc2	t0, 0x0251
+	ld	t0, OCTEON_CP2_HSH_IVW+32(a0)
+	dmtc2	t1, 0x0252
+	ld	t1, OCTEON_CP2_HSH_IVW+40(a0)
+	dmtc2	t2, 0x0253
+	ld	t2, OCTEON_CP2_HSH_IVW+48(a0)
+	dmtc2	t0, 0x0254
+	ld	t0, OCTEON_CP2_HSH_IVW+56(a0)
+	dmtc2	t1, 0x0255
+	ld	t1, OCTEON_CP2_GFM_MULT(a0)
+	dmtc2	t2, 0x0256
+	ld	t2, OCTEON_CP2_GFM_MULT+8(a0)
+	dmtc2	t0, 0x0257
+	ld	t0, OCTEON_CP2_GFM_POLY(a0)
+	dmtc2	t1, 0x0258
+	ld	t1, OCTEON_CP2_GFM_RESULT(a0)
+	dmtc2	t2, 0x0259
+	ld	t2, OCTEON_CP2_GFM_RESULT+8(a0)
+	dmtc2	t0, 0x025E
+	dmtc2	t1, 0x025A
+	dmtc2	t2, 0x025B
+
+done_restore:
+	jr	ra
+	 nop
+	END(octeon_cop2_restore)
+	.set pop
+
+/*
+ * void octeon_mult_save()
+ * sp is assumed to point to a struct pt_regs
+ *
+ * NOTE: This is called in SAVE_SOME in stackframe.h. It can only
+ *       safely modify k0 and k1.
+ */
+	.align	7
+	.set push
+	.set noreorder
+	LEAF(octeon_mult_save)
+	dmfc0	k0, $9,7	/* CvmCtl register. */
+	bbit1	k0, 27, 1f	/* Skip CvmCtl[NOMUL] */
+	 nop
+
+	/* Save the multiplier state */
+	v3mulu	k0, $0, $0
+	v3mulu	k1, $0, $0
+	sd	k0, PT_MTP(sp)        /* PT_MTP    has P0 */
+	v3mulu	k0, $0, $0
+	sd	k1, PT_MTP+8(sp)      /* PT_MTP+8  has P1 */
+	ori	k1, $0, 1
+	v3mulu	k1, k1, $0
+	sd	k0, PT_MTP+16(sp)     /* PT_MTP+16 has P2 */
+	v3mulu	k0, $0, $0
+	sd	k1, PT_MPL(sp)        /* PT_MPL    has MPL0 */
+	v3mulu	k1, $0, $0
+	sd	k0, PT_MPL+8(sp)      /* PT_MPL+8  has MPL1 */
+	jr	ra
+	 sd	k1, PT_MPL+16(sp)     /* PT_MPL+16 has MPL2 */
+
+1:	/* Resume here if CvmCtl[NOMUL] */
+	jr	ra
+	END(octeon_mult_save)
+	.set pop
+
+/*
+ * void octeon_mult_restore()
+ * sp is assumed to point to a struct pt_regs
+ *
+ * NOTE: This is called in RESTORE_SOME in stackframe.h.
+ */
+	.align	7
+	.set push
+	.set noreorder
+	LEAF(octeon_mult_restore)
+	dmfc0	k1, $9,7		/* CvmCtl register. */
+	ld	v0, PT_MPL(sp)        	/* MPL0 */
+	ld	v1, PT_MPL+8(sp)      	/* MPL1 */
+	ld	k0, PT_MPL+16(sp)     	/* MPL2 */
+	bbit1	k1, 27, 1f		/* Skip CvmCtl[NOMUL] */
+	/* Normally falls through, so no time wasted here */
+	nop
+
+	/* Restore the multiplier state */
+	ld	k1, PT_MTP+16(sp)     	/* P2 */
+	MTM0	v0			/* MPL0 */
+	ld	v0, PT_MTP+8(sp)	/* P1 */
+	MTM1	v1			/* MPL1 */
+	ld	v1, PT_MTP(sp)   	/* P0 */
+	MTM2	k0			/* MPL2 */
+	MTP2	k1			/* P2 */
+	MTP1	v0			/* P1 */
+	jr	ra
+	 MTP0	v1			/* P0 */
+
+1:	/* Resume here if CvmCtl[NOMUL] */
+	jr	ra
+	 nop
+	END(octeon_mult_restore)
+	.set pop
+
--- a/arch/mips/mm/c-octeon.c
+++ b/arch/mips/mm/c-octeon.c
@ -0,0 +1,307 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2005-2007 Cavium Networks
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+#include <linux/cpu.h>
+#include <linux/io.h>
+
+#include <asm/bcache.h>
+#include <asm/bootinfo.h>
+#include <asm/cacheops.h>
+#include <asm/cpu-features.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/r4kcache.h>
+#include <asm/system.h>
+#include <asm/mmu_context.h>
+#include <asm/war.h>
+
+#include <asm/octeon/octeon.h>
+
+unsigned long long cache_err_dcache[NR_CPUS];
+
+/**
+ * Octeon automatically flushes the dcache on tlb changes, so
+ * from Linux's viewpoint it acts much like a physically
+ * tagged cache. No flushing is needed
+ *
+ */
+static void octeon_flush_data_cache_page(unsigned long addr)
+{
+    /* Nothing to do */
+}
+
+static inline void octeon_local_flush_icache(void)
+{
+	asm volatile ("synci 0($0)");
+}
+
+/*
+ * Flush local I-cache for the specified range.
+ */
+static void local_octeon_flush_icache_range(unsigned long start,
+					    unsigned long end)
+{
+	octeon_local_flush_icache();
+}
+
+/**
+ * Flush caches as necessary for all cores affected by a
+ * vma. If no vma is supplied, all cores are flushed.
+ *
+ * @vma:    VMA to flush or NULL to flush all icaches.
+ */
+static void octeon_flush_icache_all_cores(struct vm_area_struct *vma)
+{
+	extern void octeon_send_ipi_single(int cpu, unsigned int action);
+#ifdef CONFIG_SMP
+	int cpu;
+	cpumask_t mask;
+#endif
+
+	mb();
+	octeon_local_flush_icache();
+#ifdef CONFIG_SMP
+	preempt_disable();
+	cpu = smp_processor_id();
+
+	/*
+	 * If we have a vma structure, we only need to worry about
+	 * cores it has been used on
+	 */
+	if (vma)
+		mask = vma->vm_mm->cpu_vm_mask;
+	else
+		mask = cpu_online_map;
+	cpu_clear(cpu, mask);
+	for_each_cpu_mask(cpu, mask)
+		octeon_send_ipi_single(cpu, SMP_ICACHE_FLUSH);
+
+	preempt_enable();
+#endif
+}
+
+
+/**
+ * Called to flush the icache on all cores
+ */
+static void octeon_flush_icache_all(void)
+{
+	octeon_flush_icache_all_cores(NULL);
+}
+
+
+/**
+ * Called to flush all memory associated with a memory
+ * context.
+ *
+ * @mm:     Memory context to flush
+ */
+static void octeon_flush_cache_mm(struct mm_struct *mm)
+{
+	/*
+	 * According to the R4K version of this file, CPUs without
+	 * dcache aliases don't need to do anything here
+	 */
+}
+
+
+/**
+ * Flush a range of kernel addresses out of the icache
+ *
+ */
+static void octeon_flush_icache_range(unsigned long start, unsigned long end)
+{
+	octeon_flush_icache_all_cores(NULL);
+}
+
+
+/**
+ * Flush the icache for a trampoline. These are used for interrupt
+ * and exception hooking.
+ *
+ * @addr:   Address to flush
+ */
+static void octeon_flush_cache_sigtramp(unsigned long addr)
+{
+	struct vm_area_struct *vma;
+
+	vma = find_vma(current->mm, addr);
+	octeon_flush_icache_all_cores(vma);
+}
+
+
+/**
+ * Flush a range out of a vma
+ *
+ * @vma:    VMA to flush
+ * @start:
+ * @end:
+ */
+static void octeon_flush_cache_range(struct vm_area_struct *vma,
+				     unsigned long start, unsigned long end)
+{
+	if (vma->vm_flags & VM_EXEC)
+		octeon_flush_icache_all_cores(vma);
+}
+
+
+/**
+ * Flush a specific page of a vma
+ *
+ * @vma:    VMA to flush page for
+ * @page:   Page to flush
+ * @pfn:
+ */
+static void octeon_flush_cache_page(struct vm_area_struct *vma,
+				    unsigned long page, unsigned long pfn)
+{
+	if (vma->vm_flags & VM_EXEC)
+		octeon_flush_icache_all_cores(vma);
+}
+
+
+/**
+ * Probe Octeon's caches
+ *
+ */
+static void __devinit probe_octeon(void)
+{
+	unsigned long icache_size;
+	unsigned long dcache_size;
+	unsigned int config1;
+	struct cpuinfo_mips *c = &current_cpu_data;
+
+	switch (c->cputype) {
+	case CPU_CAVIUM_OCTEON:
+		config1 = read_c0_config1();
+		c->icache.linesz = 2 << ((config1 >> 19) & 7);
+		c->icache.sets = 64 << ((config1 >> 22) & 7);
+		c->icache.ways = 1 + ((config1 >> 16) & 7);
+		c->icache.flags |= MIPS_CACHE_VTAG;
+		icache_size =
+			c->icache.sets * c->icache.ways * c->icache.linesz;
+		c->icache.waybit = ffs(icache_size / c->icache.ways) - 1;
+		c->dcache.linesz = 128;
+		if (OCTEON_IS_MODEL(OCTEON_CN3XXX))
+			c->dcache.sets = 1; /* CN3XXX has one Dcache set */
+		else
+			c->dcache.sets = 2; /* CN5XXX has two Dcache sets */
+		c->dcache.ways = 64;
+		dcache_size =
+			c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+		c->dcache.waybit = ffs(dcache_size / c->dcache.ways) - 1;
+		c->options |= MIPS_CPU_PREFETCH;
+		break;
+
+	default:
+		panic("Unsupported Cavium Networks CPU type\n");
+		break;
+	}
+
+	/* compute a couple of other cache variables */
+	c->icache.waysize = icache_size / c->icache.ways;
+	c->dcache.waysize = dcache_size / c->dcache.ways;
+
+	c->icache.sets = icache_size / (c->icache.linesz * c->icache.ways);
+	c->dcache.sets = dcache_size / (c->dcache.linesz * c->dcache.ways);
+
+	if (smp_processor_id() == 0) {
+		pr_notice("Primary instruction cache %ldkB, %s, %d way, "
+			  "%d sets, linesize %d bytes.\n",
+			  icache_size >> 10,
+			  cpu_has_vtag_icache ?
+				"virtually tagged" : "physically tagged",
+			  c->icache.ways, c->icache.sets, c->icache.linesz);
+
+		pr_notice("Primary data cache %ldkB, %d-way, %d sets, "
+			  "linesize %d bytes.\n",
+			  dcache_size >> 10, c->dcache.ways,
+			  c->dcache.sets, c->dcache.linesz);
+	}
+}
+
+
+/**
+ * Setup the Octeon cache flush routines
+ *
+ */
+void __devinit octeon_cache_init(void)
+{
+	extern unsigned long ebase;
+	extern char except_vec2_octeon;
+
+	memcpy((void *)(ebase + 0x100), &except_vec2_octeon, 0x80);
+	octeon_flush_cache_sigtramp(ebase + 0x100);
+
+	probe_octeon();
+
+	shm_align_mask = PAGE_SIZE - 1;
+
+	flush_cache_all			= octeon_flush_icache_all;
+	__flush_cache_all		= octeon_flush_icache_all;
+	flush_cache_mm			= octeon_flush_cache_mm;
+	flush_cache_page		= octeon_flush_cache_page;
+	flush_cache_range		= octeon_flush_cache_range;
+	flush_cache_sigtramp		= octeon_flush_cache_sigtramp;
+	flush_icache_all		= octeon_flush_icache_all;
+	flush_data_cache_page		= octeon_flush_data_cache_page;
+	flush_icache_range		= octeon_flush_icache_range;
+	local_flush_icache_range	= local_octeon_flush_icache_range;
+
+	build_clear_page();
+	build_copy_page();
+}
+
+/**
+ * Handle a cache error exception
+ */
+
+static void  cache_parity_error_octeon(int non_recoverable)
+{
+	unsigned long coreid = cvmx_get_core_num();
+	uint64_t icache_err = read_octeon_c0_icacheerr();
+
+	pr_err("Cache error exception:\n");
+	pr_err("cp0_errorepc == %lx\n", read_c0_errorepc());
+	if (icache_err & 1) {
+		pr_err("CacheErr (Icache) == %llx\n",
+		       (unsigned long long)icache_err);
+		write_octeon_c0_icacheerr(0);
+	}
+	if (cache_err_dcache[coreid] & 1) {
+		pr_err("CacheErr (Dcache) == %llx\n",
+		       (unsigned long long)cache_err_dcache[coreid]);
+		cache_err_dcache[coreid] = 0;
+	}
+
+	if (non_recoverable)
+		panic("Can't handle cache error: nested exception");
+}
+
+/**
+ * Called when the the exception is not recoverable
+ */
+
+asmlinkage void cache_parity_error_octeon_recoverable(void)
+{
+	cache_parity_error_octeon(0);
+}
+
+/**
+ * Called when the the exception is recoverable
+ */
+
+asmlinkage void cache_parity_error_octeon_non_recoverable(void)
+{
+	cache_parity_error_octeon(1);
+}
+
--- a/arch/mips/mm/cex-oct.S
+++ b/arch/mips/mm/cex-oct.S
@ -0,0 +1,70 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2006 Cavium Networks
+ * Cache error handler
+ */
+
+#include <asm/asm.h>
+#include <asm/regdef.h>
+#include <asm/mipsregs.h>
+#include <asm/stackframe.h>
+
+/*
+ * Handle cache error. Indicate to the second level handler whether
+ * the exception is recoverable.
+ */
+	LEAF(except_vec2_octeon)
+
+	.set    push
+	.set	mips64r2
+	.set	noreorder
+	.set	noat
+
+
+	/* due to an errata we need to read the COP0 CacheErr (Dcache)
+	 * before any cache/DRAM access	 */
+
+	rdhwr   k0, $0        /* get core_id */
+	PTR_LA  k1, cache_err_dcache
+	sll     k0, k0, 3
+	PTR_ADDU k1, k0, k1    /* k1 = &cache_err_dcache[core_id] */
+
+	dmfc0   k0, CP0_CACHEERR, 1
+	sd      k0, (k1)
+	dmtc0   $0, CP0_CACHEERR, 1
+
+        /* check whether this is a nested exception */
+	mfc0    k1, CP0_STATUS
+	andi    k1, k1, ST0_EXL
+	beqz    k1, 1f
+	 nop
+	j	cache_parity_error_octeon_non_recoverable
+	 nop
+
+	/* exception is recoverable */
+1:	j	handle_cache_err
+	 nop
+
+	.set    pop
+	END(except_vec2_octeon)
+
+ /* We need to jump to handle_cache_err so that the previous handler
+  * can fit within 0x80 bytes. We also move from 0xFFFFFFFFAXXXXXXX
+  * space (uncached) to the 0xFFFFFFFF8XXXXXXX space (cached).  */
+	LEAF(handle_cache_err)
+	.set    push
+        .set    noreorder
+        .set    noat
+
+	SAVE_ALL
+	KMODE
+	jal     cache_parity_error_octeon_recoverable
+	nop
+	j       ret_from_exception
+	nop
+
+	.set pop
+	END(handle_cache_err)