IA64 Kexec-Kdump kernel patch

From: Zou Nan hai <nanhai.zou_at_intel.com>
Date: 2006-08-14 16:47:54
This patch is the kexec-kdump patch re-based to 2.6.18-rc4 kernel.

Changes since last patch include:

1. re-base the patch to 2.6.18-rc4
2. per-cpu register dumping, so that user space tool can unwind kernel stack.
   This should really done by INIT event. But unfortunately it seems that the 
   firmware on the machine I am using does not handle INIT event correctly...
   So current I dump per-cpu registers by simple IPI. 
	
3. fix on_line_cpu issue, 
   previous patch stop APs by smp_send_stop. AP will clear his bit in 
   cpu_online_map, then crash tools will see a cpu_online_map which only 
   contains 1 cpu in vmcore file.

4. reserve efi memmap area and ia64 boot param area for the first kernel.

To test crash dump.

Compile a kernel with CONFIG_KEXEC CONFIG_CRASH_DUMP 
CONFIG_PROC_VMCORE enabled.

using this kernel for both host kernel and crash dumping kernel.

Boot this kernel with kernel parameter "crashkernel=XXX@YYY".
XXX should be a size big enough for crash kernel to run, YYY should 
be a physical address which contains more than XXX memory and aligned to 64M.
you can check it in /proc/iomem.
Be careful of choosing the size and address to reserve.
The second kernel may hang silently if there is too few memory to use.

After the first kernel boots, the "Crash kernel" region will show in /proc/iomem.
then load the same kernel with command.

kexec -p vmlinux.gz --initrd=initrd --append="root=... maxcpus=1"

trigger a crash with echo c > /proc/sysrq-trigger

after the crash kernel boots, 
copy /proc/vmcore to disk

gdb vmlinux-of-first-kernel vmcore
then you can check the stack and variables of first kernel.

kexec -l + kexec -e may not work.
That is because of a recent change in mpt-fusion driver 
shutdown code. 
There was some delay in mptscsih_remove path. 
the code was removed in 2.6.17, that cause MPT fusion driver unable to reinitialize.
add a delay back to mptscsih_remove can solve this problem.


Signed-off-by:
 Zou Nan hai <nanhai.zou@intel.com>


diff -Nraup a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
--- a/arch/ia64/hp/common/sba_iommu.c	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/hp/common/sba_iommu.c	2006-08-15 09:50:03.000000000 +0800
@@ -1623,6 +1623,28 @@ ioc_iova_init(struct ioc *ioc)
 	READ_REG(ioc->ioc_hpa + IOC_IBASE);
 }
 
+#ifdef CONFIG_KEXEC
+void
+ioc_iova_disable(void)
+{
+	struct ioc *ioc;
+
+	ioc = ioc_list;
+
+	while (ioc != NULL) {
+		/* Disable IOVA translation */
+		WRITE_REG(ioc->ibase & 0xfffffffffffffffe, ioc->ioc_hpa + IOC_IBASE);
+		READ_REG(ioc->ioc_hpa + IOC_IBASE);
+
+		/* Clear I/O TLB of any possible entries */
+		WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
+		READ_REG(ioc->ioc_hpa + IOC_PCOM);
+
+		ioc = ioc->next;
+	}
+}
+#endif
+
 static void __init
 ioc_resource_init(struct ioc *ioc)
 {
diff -Nraup a/arch/ia64/Kconfig b/arch/ia64/Kconfig
--- a/arch/ia64/Kconfig	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/Kconfig	2006-08-15 09:50:03.000000000 +0800
@@ -427,6 +427,29 @@ config SGI_SN
 
 source "drivers/sn/Kconfig"
 
+config KEXEC
+	bool "kexec system call (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  kexec is a system call that implements the ability to shutdown your
+	  current kernel, and to start another kernel.  It is like a reboot
+	  but it is indepedent of the system firmware.   And like a reboot
+	  you can start any kernel with it, not just Linux.
+
+	  The name comes from the similiarity to the exec system call.
+
+	  It is an ongoing process to be certain the hardware in a machine
+	  is properly shutdown, so do not be surprised if this code does not
+	  initially work for you.  It may help to enable device hotplugging
+	  support.  As of this writing the exact hardware interface is
+	  strongly in flux, so no good recommendation can be made.
+
+config CRASH_DUMP
+	  bool "kernel crash dumps (EXPERIMENTAL)"
+	  depends on EXPERIMENTAL
+	  help
+	    Generate crash dump after being started by kexec.
+
 source "drivers/firmware/Kconfig"
 
 source "fs/Kconfig.binfmt"
diff -Nraup a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
--- a/arch/ia64/kernel/crash.c	1970-01-01 08:00:00.000000000 +0800
+++ b/arch/ia64/kernel/crash.c	2006-08-15 09:50:03.000000000 +0800
@@ -0,0 +1,129 @@
+/*
+ * arch/ia64/kernel/crash.c
+ *
+ * Architecture specific (ia64) functions for kexec based crash dumps.
+ *
+ * Created by: Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
+ *
+ */
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/irq.h>
+#include <linux/pci.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/irq.h>
+#include <linux/delay.h>
+#include <linux/elf.h>
+#include <linux/elfcore.h>
+#include <linux/device.h>
+#include <asm/uaccess.h>
+
+size_t copy_oldmem_page(unsigned long pfn, char *buf,
+                               size_t csize, unsigned long offset, int userbuf)
+{
+        void  *vaddr;
+
+        if (!csize)
+                return 0;
+        vaddr = page_address(pfn_to_page(pfn));
+
+        if (userbuf) {
+                if (copy_to_user(buf, (vaddr + offset), csize)) {
+                        return -EFAULT;
+                }
+        } else
+                memcpy(buf, (vaddr + offset), csize);
+        return csize;
+}
+
+static void device_shootdown(void)
+{
+	irq_desc_t *idesc;
+	int irq;
+	for (irq = 0; irq < NR_IRQS; irq++) {
+		idesc = irq_desc + irq;
+		if (!idesc || !idesc->action)
+			continue;
+		disable_irq_nosync(irq);
+		idesc->chip->end(irq);
+		idesc->chip->shutdown(irq);
+	}
+
+#ifdef CONFIG_IA64_HP_ZX1
+	ioc_iova_disable();
+#endif
+}
+
+static inline Elf64_Word
+*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
+		size_t data_len)
+{
+	struct elf_note *note = (struct elf_note *)buf;
+	note->n_namesz = strlen(name) + 1;
+	note->n_descsz = data_len;
+	note->n_type   = type;
+	buf += (sizeof(*note) + 3)/4;
+	memcpy(buf, name, note->n_namesz);
+	buf += (note->n_namesz + 3)/4;
+	memcpy(buf, data, data_len);
+	buf += (data_len + 3)/4;
+	return buf;
+}
+
+static void
+final_note(void *buf)
+{
+	memset(buf, 0, sizeof(struct elf_note));
+}
+
+extern void ia64_dump_cpu_regs(void *);
+
+void
+crash_save_this_cpu()
+{
+	void *buf;
+	struct elf_prstatus prstatus;
+	int cpu = smp_processor_id();
+	unsigned long cfm, sof, sol;
+	elf_greg_t *dst = (elf_greg_t *)&prstatus.pr_reg;
+	memset(&prstatus, 0, sizeof(prstatus));
+	prstatus.pr_pid = current->pid;
+
+	ia64_dump_cpu_regs(dst);
+        cfm = dst[43];
+        sol = (cfm >> 7) & 0x7f;
+        sof = cfm & 0x7f;
+        dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
+                        sof - sol);
+
+        buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
+	if (!buf)
+		return;
+	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus,
+		sizeof(prstatus));
+	final_note(buf);
+}
+
+void
+machine_crash_shutdown(struct pt_regs *pt)
+{
+	/* This function is only called after the system
+	 * has paniced or is otherwise in a critical state.
+	 * The minimum amount of code to allow a kexec'd kernel
+	 * to run successfully needs to happen here.
+	 *
+	 * In practice this means shooting down the other cpus in
+	 * an SMP system.
+	 */
+	if (in_interrupt())
+		ia64_eoi();
+	device_shootdown();
+#ifdef CONFIG_SMP
+	kdump_smp_send_stop();
+#endif
+}
diff -Nraup a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
--- a/arch/ia64/kernel/efi.c	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/kernel/efi.c	2006-08-15 09:50:03.000000000 +0800
@@ -26,6 +26,7 @@
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/efi.h>
+#include <linux/kexec.h>
 
 #include <asm/io.h>
 #include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void 
 struct efi efi;
 EXPORT_SYMBOL(efi);
 static efi_runtime_services_t *runtime;
-static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
+static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;
 
 #define efi_call_virt(f, args...)	(*(f))(args)
 
@@ -421,6 +422,8 @@ efi_init (void)
 			mem_limit = memparse(cp + 4, &cp);
 		} else if (memcmp(cp, "max_addr=", 9) == 0) {
 			max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
+		} else if (memcmp(cp, "min_addr=", 9) == 0) {
+			min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
 		} else {
 			while (*cp != ' ' && *cp)
 				++cp;
@@ -428,6 +431,8 @@ efi_init (void)
 				++cp;
 		}
 	}
+	if (min_addr != 0UL)
+		printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
 	if (max_addr != ~0UL)
 		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
 
@@ -894,7 +899,8 @@ find_memmap_space (void)
 		as = max(contig_low, md->phys_addr);
 		ae = min(contig_high, efi_md_end(md));
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsign
 		} else
 			ae = efi_md_end(md);
 
-		/* keep within max_addr= command line arg */
+		/* keep within max_addr= and min_addr= command line arg */
+		as = max(as, min_addr);
 		ae = min(ae, max_addr);
 		if (ae <= as)
 			continue;
@@ -1116,6 +1123,12 @@ efi_initialize_iomem_resources(struct re
 			 */
 			insert_resource(res, code_resource);
 			insert_resource(res, data_resource);
+#ifdef CONFIG_KEXEC
+                        insert_resource(res, &efi_memmap_res);
+                        insert_resource(res, &boot_param_res);
+			if (crashk_res.end > crashk_res.start)
+				insert_resource(res, &crashk_res);
+#endif
 		}
 	}
 }
diff -Nraup a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
--- a/arch/ia64/kernel/entry.S	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/kernel/entry.S	2006-08-15 09:50:03.000000000 +0800
@@ -1575,7 +1575,7 @@ sys_call_table:
 	data8 sys_mq_timedreceive		// 1265
 	data8 sys_mq_notify
 	data8 sys_mq_getsetattr
-	data8 sys_ni_syscall			// reserved for kexec_load
+	data8 sys_kexec_load
 	data8 sys_ni_syscall			// reserved for vserver
 	data8 sys_waitid			// 1270
 	data8 sys_add_key
diff -Nraup a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c
--- a/arch/ia64/kernel/machine_kexec.c	1970-01-01 08:00:00.000000000 +0800
+++ b/arch/ia64/kernel/machine_kexec.c	2006-08-15 10:12:00.000000000 +0800
@@ -0,0 +1,131 @@
+/*
+ * arch/ia64/kernel/machine_kexec.c
+ *
+ * Handle transition of Linux booting another kernel
+ * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P.
+ * Copyright (C) 2005 Khalid Aziz <khalid.aziz@hp.com>
+ * Copyright (C) 2006 Intel Corp, Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kexec.h>
+#include <linux/pci.h>
+#include <linux/cpu.h>
+#include <asm/mmu_context.h>
+#include <asm/setup.h>
+#include <asm/mca.h>
+#include <asm/page.h>
+#include <asm/bitops.h>
+#include <asm/tlbflush.h>
+#include <asm/delay.h>
+#include <asm/meminit.h>
+
+typedef void (*relocate_new_kernel_t)(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+static struct kimage *ia64_kimage;
+struct resource efi_memmap_res = {
+        .name  = "EFI Memory Map",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+struct resource boot_param_res = {
+        .name  = "Boot parameter",
+        .start = 0,
+        .end   = 0,
+        .flags = IORESOURCE_BUSY | IORESOURCE_MEM
+};
+
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+	void *control_code_buffer;
+	const unsigned long *func;
+
+	func = (unsigned long *)&relocate_new_kernel;
+	/* Pre-load control code buffer to minimize work in kexec path */
+	control_code_buffer = page_address(image->control_code_page);
+	memcpy((void *)control_code_buffer, (const void *)func[0],
+			relocate_new_kernel_size);
+	flush_icache_range((unsigned long)control_code_buffer,
+			(unsigned long)control_code_buffer + relocate_new_kernel_size);
+	ia64_kimage = image;
+
+	return 0;
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void machine_shutdown(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+	{
+		int cpu;
+
+		for_each_online_cpu(cpu) {
+			if (cpu != smp_processor_id())
+				cpu_down(cpu);
+		}
+	}
+#elif defined(CONFIG_SMP)
+	smp_call_function(kexec_stop_this_cpu, (void *)ia64_kimage->start, 0, 0);
+#endif
+#ifdef CONFIG_PCI
+	{
+		struct pci_dev *dev = NULL;
+		irq_desc_t *idesc;
+		/* Disable all PCI devices */
+		while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+			if (!(dev->is_enabled))
+				continue;
+			idesc = irq_desc + dev->irq;
+			if (!idesc||!idesc->chip)
+				continue;
+			disable_irq_nosync(dev->irq);
+			idesc->chip->end(dev->irq);
+			idesc->action = NULL;
+			pci_disable_device(dev);
+		}
+	}
+#endif
+
+
+#ifdef CONFIG_IA64_HP_ZX1
+	ioc_iova_disable();
+#endif
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+extern void *efi_get_pal_addr(void);
+void machine_kexec(struct kimage *image)
+{
+	relocate_new_kernel_t rnk;
+	void *pal_addr = efi_get_pal_addr();
+	unsigned long code_addr = (unsigned long)page_address(image->control_code_page);
+	if (image->type == KEXEC_TYPE_CRASH)
+		crash_save_this_cpu();
+	/* Interrupts aren't acceptable while we reboot */
+	ia64_set_itv(1<<16);
+	local_irq_disable();
+	rnk = (relocate_new_kernel_t)&code_addr;
+	(*rnk)(image->head, image->start, ia64_boot_param,
+		     GRANULEROUNDDOWN((unsigned long) pal_addr));
+	BUG();
+	for (;;);
+}
diff -Nraup a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
--- a/arch/ia64/kernel/Makefile	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/kernel/Makefile	2006-08-15 09:50:03.000000000 +0800
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
 obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
 obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
 obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
+obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
 obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
 obj-$(CONFIG_AUDIT)		+= audit.o
 mca_recovery-y			+= mca_drv.o mca_drv_asm.o
diff -Nraup a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S
--- a/arch/ia64/kernel/relocate_kernel.S	1970-01-01 08:00:00.000000000 +0800
+++ b/arch/ia64/kernel/relocate_kernel.S	2006-08-15 09:50:03.000000000 +0800
@@ -0,0 +1,490 @@
+/*
+ * arch/ia64/kernel/relocate_kernel.S
+ *
+ * Relocate kexec'able kernel and start it
+ *
+ * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
+ * Copyright (C) 2005 Khalid Aziz  <khalid.aziz@hp.com>
+ * Copyright (C) 2005 Intel Corp,  Zou Nan hai <nanhai.zou@intel.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.  See the file COPYING for more details.
+ */
+#include <linux/config.h>
+#include <asm/asmmacro.h>
+#include <asm/kregs.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mca_asm.h>
+
+       /* Must be relocatable PIC code callable as a C function
+        */
+GLOBAL_ENTRY(relocate_new_kernel)
+	.prologue
+	alloc r31=ar.pfs,4,0,0,0
+        .body
+.reloc_entry:
+{
+	rsm psr.i| psr.ic
+	mov r2=ip
+}
+	;;
+{
+        flushrs                         // must be first insn in group
+        srlz.i
+}
+	;;
+	dep r2=0,r2,61,3		//to physical address
+	;;
+	//first switch to physical mode
+	add r3=1f-.reloc_entry, r2
+	movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC
+	mov ar.rsc=0	          	// put RSE in enforced lazy mode
+	;;
+	add sp=(memory_stack_end - 16 - .reloc_entry),r2
+	add r8=(register_stack - .reloc_entry),r2
+	;;
+	mov r18=ar.rnat
+	mov ar.bspstore=r8
+	;;
+        mov cr.ipsr=r16
+        mov cr.iip=r3
+        mov cr.ifs=r0
+	srlz.i
+	;;
+	mov ar.rnat=r18
+	rfi
+	;;
+1:
+	//physical mode code begin
+	mov b6=in1
+	dep r28=0,in2,61,3	//to physical address
+
+	// purge all TC entries
+#define O(member)       IA64_CPUINFO_##member##_OFFSET
+        GET_THIS_PADDR(r2, cpu_info)    // load phys addr of cpu_info into r2
+        ;;
+        addl r17=O(PTCE_STRIDE),r2
+        addl r2=O(PTCE_BASE),r2
+        ;;
+        ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));;    	// r18=ptce_base
+        ld4 r19=[r2],4                                  // r19=ptce_count[0]
+        ld4 r21=[r17],4                                 // r21=ptce_stride[0]
+        ;;
+        ld4 r20=[r2]                                    // r20=ptce_count[1]
+        ld4 r22=[r17]                                   // r22=ptce_stride[1]
+        mov r24=r0
+        ;;
+        adds r20=-1,r20
+        ;;
+#undef O
+2:
+        cmp.ltu p6,p7=r24,r19
+(p7)    br.cond.dpnt.few 4f
+        mov ar.lc=r20
+3:
+        ptc.e r18
+        ;;
+        add r18=r22,r18
+        br.cloop.sptk.few 3b
+        ;;
+        add r18=r21,r18
+        add r24=1,r24
+        ;;
+        br.sptk.few 2b
+4:
+        srlz.i
+        ;;
+	//purge TR entry for kernel text and data
+        movl r16=KERNEL_START
+        mov r18=KERNEL_TR_PAGE_SHIFT<<2
+        ;;
+        ptr.i r16, r18
+        ptr.d r16, r18
+        ;;
+        srlz.i
+        ;;
+
+	// purge TR entry for percpu data
+        movl r16=PERCPU_ADDR
+        mov r18=PERCPU_PAGE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.d
+	;;
+
+        // purge TR entry for pal code
+        mov r16=in3
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.i r16,r18
+        ;;
+        srlz.i
+	;;
+
+        // purge TR entry for stack
+        mov r16=IA64_KR(CURRENT_STACK)
+        ;;
+        shl r16=r16,IA64_GRANULE_SHIFT
+        movl r19=PAGE_OFFSET
+        ;;
+        add r16=r19,r16
+        mov r18=IA64_GRANULE_SHIFT<<2
+        ;;
+        ptr.d r16,r18
+        ;;
+        srlz.i
+	;;
+
+	//copy segments
+	movl r16=PAGE_MASK
+        mov  r30=in0                    // in0 is page_list
+        br.sptk.few .dest_page
+	;;
+.loop:
+	ld8  r30=[in0], 8;;
+.dest_page:
+	tbit.z p0, p6=r30, 0;;    	// 0x1 dest page
+(p6)	and r17=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 1;;		// 0x2 indirect page
+(p6)	and in0=r30, r16
+(p6)	br.cond.sptk.few .loop;;
+
+	tbit.z p0, p6=r30, 2;;		// 0x4 end flag
+(p6)	br.cond.sptk.few .end_loop;;
+
+	tbit.z p6, p0=r30, 3;;		// 0x8 source page
+(p6)	br.cond.sptk.few .loop
+
+	and r18=r30, r16
+
+	// simple copy page, may optimize later
+	movl r14=PAGE_SIZE/8 - 1;;
+	mov ar.lc=r14;;
+1:
+	ld8 r14=[r18], 8;;
+	st8 [r17]=r14, 8;;
+	fc.i r17
+	br.ctop.sptk.few 1b
+	br.sptk.few .loop
+	;;
+
+.end_loop:
+	sync.i			// for fc.i
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	br.call.sptk.many b0=b6;;
+
+.align  32
+memory_stack:
+	.fill           8192, 1, 0
+memory_stack_end:
+register_stack:
+	.fill           8192, 1, 0
+register_stack_end:
+relocate_new_kernel_end:
+END(relocate_new_kernel)
+
+GLOBAL_ENTRY(kexec_fake_sal_rendez)
+	.prologue
+	alloc r31=ar.pfs,3,0,0,0
+	.body
+.rendez_entry:
+	rsm	psr.i | psr.ic
+	mov r25=ip
+	;;
+	{
+		flushrs
+		srlz.i
+	}
+	;;
+       /* See where I am running, and compute gp */
+	{
+		mov     ar.rsc = 0      /* Put RSE in enforce lacy, LE mode */
+		mov     gp = ip         /* gp == relocate_new_kernel */
+	}
+
+	movl r8=0x00000100000000
+	;;
+	mov cr.iva=r8
+	/* Transition from virtual to physical mode */
+	srlz.i
+	;;
+	add	r17=5f-.rendez_entry, r25
+	movl	r16=(IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_IC | IA64_PSR_MFL)
+	;;
+	tpa	r17=r17
+	mov	cr.ipsr=r16
+	;;
+	mov	cr.iip=r17
+	mov	cr.ifs=r0
+	;;
+	rfi
+	;;
+5:
+	mov     b6=in0			/* _start addr */
+	mov	r8=in1			/* ap_wakeup_vector */
+	mov	r26=in2			/* PAL addr */
+	;;
+	/* Purge kernel TRs */
+	movl	r16=KERNEL_START
+	mov	r18=KERNEL_TR_PAGE_SHIFT<<2
+	;;
+	ptr.i	r16,r18
+	ptr.d	r16,r18
+	;;
+	srlz.i
+	;;
+	srlz.d
+	;;
+	/* Purge percpu TR */
+	movl	r16=PERCPU_ADDR
+	mov	r18=PERCPU_PAGE_SHIFT<<2
+	;;
+	ptr.d	r16,r18
+	;;
+	srlz.d
+	;;
+	/* Purge PAL TR */
+	mov	r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.i	r26,r18
+	;;
+	srlz.i
+	;;
+	/* Purge stack TR */
+	mov	r16=IA64_KR(CURRENT_STACK)
+	;;
+	shl	r16=r16,IA64_GRANULE_SHIFT
+	movl	r19=PAGE_OFFSET
+	;;
+	add	r16=r19,r16
+	mov	r18=IA64_GRANULE_SHIFT<<2
+	;;
+	ptr.d	r16,r18
+	;;
+	srlz.i
+	;;
+
+	/* Ensure we can read and clear external interrupts */
+	mov	cr.tpr=r0
+	srlz.d
+
+	shr.u	r9=r8,6			/* which irr */
+	;;
+	and	r8=63,r8		/* bit offset into irr */
+	;;
+	mov	r10=1;;
+	;;
+	shl	r10=r10,r8		/* bit mask off irr we want */
+	cmp.eq	p6,p0=0,r9
+	;;
+(p6)	br.cond.sptk.few        check_irr0
+	cmp.eq	p7,p0=1,r9
+	;;
+(p7)	br.cond.sptk.few        check_irr1
+	cmp.eq	p8,p0=2,r9
+	;;
+(p8)	br.cond.sptk.few        check_irr2
+	cmp.eq	p9,p0=3,r9
+	;;
+(p9)	br.cond.sptk.few        check_irr3
+
+check_irr0:
+	mov	r8=cr.irr0
+	;;
+	and	r8=r8,r10
+	;;
+	cmp.eq	p6,p0=0,r8
+(p6)	br.cond.sptk.few	check_irr0
+	br.few	call_start
+
+check_irr1:
+	mov	r8=cr.irr1
+	;;
+	and	r8=r8,r10
+	;;
+	cmp.eq	p6,p0=0,r8
+(p6)	br.cond.sptk.few	check_irr1
+	br.few	call_start
+
+check_irr2:
+	mov	r8=cr.irr2
+	;;
+	and	r8=r8,r10
+	;;
+	cmp.eq	p6,p0=0,r8
+(p6)	br.cond.sptk.few	check_irr2
+	br.few	call_start
+
+check_irr3:
+	mov	r8=cr.irr3
+	;;
+	and	r8=r8,r10
+	;;
+	cmp.eq	p6,p0=0,r8
+(p6)	br.cond.sptk.few	check_irr3
+	br.few	call_start
+
+call_start:
+	mov	cr.eoi=r0
+	;;
+	srlz.d
+	;;
+	mov	r8=cr.ivr
+	;;
+	srlz.d
+	;;
+	cmp.eq	p0,p6=15,r8
+(p6)	br.cond.sptk.few	call_start
+	br.sptk.few		b6
+kexec_fake_sal_rendez_end:
+END(kexec_fake_sal_rendez)
+
+	.global relocate_new_kernel_size
+relocate_new_kernel_size:
+	data8	kexec_fake_sal_rendez_end - relocate_new_kernel
+
+GLOBAL_ENTRY(ia64_dump_cpu_regs)
+        .prologue
+        alloc loc0=ar.pfs,1,2,0,0
+        .body
+        mov     ar.rsc=0                // put RSE in enforced lazy mode
+        add     loc1=4*8, in0           // save r4 and r5 first
+        ;;
+{
+        flushrs                         // flush dirty regs to backing store
+        srlz.i
+}
+        st8 [loc1]=r4, 8
+        ;;
+        st8 [loc1]=r5, 8
+        ;;
+        add loc1=32*8, in0
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r0, 8			// r0
+        st8 [loc1]=r4, 8		// rnat
+        mov r5=pr
+        ;;
+        st8 [in0]=r1, 8			// r1
+        st8 [loc1]=r5, 8		// pr
+        mov r4=b0
+        ;;
+        st8 [in0]=r2, 8			// r2
+        st8 [loc1]=r4, 8		// b0
+        mov r5=b1;
+        ;;
+        st8 [in0]=r3, 24		// r3
+        st8 [loc1]=r5, 8		// b1
+        mov r4=b2
+        ;;
+        st8 [in0]=r6, 8			// r6
+        st8 [loc1]=r4, 8		// b2
+	mov r5=b3
+        ;;
+        st8 [in0]=r7, 8			// r7
+        st8 [loc1]=r5, 8		// b3
+        mov r4=b4
+        ;;
+        st8 [in0]=r8, 8			// r8
+        st8 [loc1]=r4, 8		// b4
+        mov r5=b5
+        ;;
+        st8 [in0]=r9, 8			// r9
+        st8 [loc1]=r5, 8		// b5
+        mov r4=b6
+        ;;
+        st8 [in0]=r10, 8		// r10
+        st8 [loc1]=r5, 8		// b6
+        mov r5=b7
+        ;;
+        st8 [in0]=r11, 8		// r11
+        st8 [loc1]=r5, 8		// b7
+        mov r4=b0
+        ;;
+        st8 [in0]=r12, 8		// r12
+        st8 [loc1]=r4, 8		// ip
+        mov r5=loc0
+	;;
+        st8 [in0]=r13, 8		// r13
+        extr.u r5=r5, 0, 38		// ar.pfs.pfm
+	mov r4=r0			// user mask
+        ;;
+        st8 [in0]=r14, 8		// r14
+        st8 [loc1]=r5, 8		// cfm
+        ;;
+        st8 [in0]=r15, 8		// r15
+        st8 [loc1]=r4, 8        	// user mask
+	mov r5=ar.rsc
+        ;;
+        st8 [in0]=r16, 8		// r16
+        st8 [loc1]=r5, 8        	// ar.rsc
+        mov r4=ar.bsp
+        ;;
+        st8 [in0]=r17, 8		// r17
+        st8 [loc1]=r4, 8        	// ar.bsp
+        mov r5=ar.bspstore
+        ;;
+        st8 [in0]=r18, 8		// r18
+        st8 [loc1]=r5, 8        	// ar.bspstore
+        mov r4=ar.rnat
+        ;;
+        st8 [in0]=r19, 8		// r19
+        st8 [loc1]=r4, 8        	// ar.rnat
+        mov r5=ar.ccv
+        ;;
+        st8 [in0]=r20, 8		// r20
+	st8 [loc1]=r5, 8        	// ar.ccv
+        mov r4=ar.unat
+        ;;
+        st8 [in0]=r21, 8		// r21
+        st8 [loc1]=r4, 8        	// ar.unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r22, 8		// r22
+        st8 [loc1]=r5, 8        	// ar.fpsr
+        mov r4 = ar.unat
+        ;;
+        st8 [in0]=r23, 8		// r23
+        st8 [loc1]=r4, 8        	// unat
+        mov r5 = ar.fpsr
+        ;;
+        st8 [in0]=r24, 8		// r24
+        st8 [loc1]=r5, 8        	// fpsr
+        mov r4 = ar.pfs
+        ;;
+        st8 [in0]=r25, 8		// r25
+        st8 [loc1]=r4, 8        	// ar.pfs
+        mov r5 = ar.lc
+        ;;
+        st8 [in0]=r26, 8		// r26
+        st8 [loc1]=r5, 8        	// ar.lc
+        mov r4 = ar.ec
+        ;;
+        st8 [in0]=r27, 8		// r27
+        st8 [loc1]=r4, 8        	// ar.ec
+        mov r5 = ar.csd
+        ;;
+        st8 [in0]=r28, 8		// r28
+        st8 [loc1]=r5, 8        	// ar.csd
+        mov r4 = ar.ssd
+        ;;
+        st8 [in0]=r29, 8		// r29
+        st8 [loc1]=r4, 8        	// ar.ssd
+        ;;
+        st8 [in0]=r30, 8		// r30
+        ;;
+	st8 [in0]=r31, 8		// r31
+        mov ar.pfs=loc0
+        ;;
+        br.ret.sptk.many rp
+END(ia64_dump_cpu_regs)
+
+
diff -Nraup a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
--- a/arch/ia64/kernel/setup.c	2006-08-15 05:52:14.000000000 +0800
+++ b/arch/ia64/kernel/setup.c	2006-08-15 09:50:03.000000000 +0800
@@ -43,6 +43,8 @@
 #include <linux/initrd.h>
 #include <linux/pm.h>
 #include <linux/cpufreq.h>
+#include <linux/kexec.h>
+#include <linux/crash_dump.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -250,6 +252,41 @@ reserve_memory (void)
 	}
 #endif
 
+#ifdef CONFIG_KEXEC
+	/* crashkernel=size@addr specifies the location to reserve for
+	 * a crash kernel.  By reserving this memory we guarantee
+	 * that linux never set's it up as a DMA target.
+	 * Useful for holding code to do something appropriate
+	 * after a kernel panic.
+	 */
+	{
+		char *from = strstr(saved_command_line, "crashkernel=");
+		if (from) {
+			unsigned long size, base;
+			size = memparse(from + 12, &from);
+			if (*from == '@') {
+				base = memparse(from + 1, &from);
+				rsvd_region[n].start =
+					(unsigned long)__va(base);
+				rsvd_region[n].end =
+					(unsigned long)__va(base + size);
+				crashk_res.start = base;
+				crashk_res.end = base + size - 1;
+				n++;
+			}
+		}
+		efi_memmap_res.start = ia64_boot_param->efi_memmap;
+                efi_memmap_res.end = efi_memmap_res.start +
+                        ia64_boot_param->efi_memmap_size;
+                printk("efi_memmap start %lx %lx\n",
+                        efi_memmap_res.start,
+                        efi_memmap_res.end);
+                boot_param_res.start = __pa(ia64_boot_param);
+                boot_param_res.end = boot_param_res.start +
+                        sizeof(*ia64_boot_param);
+	}
+#endif
+
 	efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end);
 	n++;
 
@@ -484,6 +521,16 @@ setup_arch (char **cmdline_p)
 	if (!nomca)
 		ia64_mca_init();
 
+#ifdef CONFIG_CRASH_DUMP
+	{
+		char *from = strstr(saved_command_line, "elfcorehdr=");
+
+		if (from)
+			elfcorehdr_addr = memparse(from+11, &from);
+		saved_max_pfn = (unsigned long) -1;
+	}
+#endif
+
 	platform_setup(cmdline_p);
 	paging_init();
 }
diff -Nraup a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
--- a/arch/ia64/kernel/smp.c	2006-06-18 09:49:35.000000000 +0800
+++ b/arch/ia64/kernel/smp.c	2006-08-15 10:12:17.000000000 +0800
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/efi.h>
 #include <linux/bitops.h>
+#include <linux/kexec.h>
 
 #include <asm/atomic.h>
 #include <asm/current.h>
@@ -66,6 +67,7 @@ static volatile struct call_data_struct 
 
 #define IPI_CALL_FUNC		0
 #define IPI_CPU_STOP		1
+#define IPI_KDUMP_CPU_STOP	3
 
 /* This needs to be cacheline aligned because it is written to by *other* CPUs.  */
 static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
@@ -84,6 +86,34 @@ unlock_ipi_calllock(void)
 	spin_unlock_irq(&call_lock);
 }
 
+#ifdef CONFIG_KEXEC
+/*
+ * Stop the CPU and put it in fake SAL rendezvous. This allows CPU to wake
+ * up with IPI from boot processor
+ */
+void
+kexec_stop_this_cpu (void *func)
+{
+	unsigned long pta, impl_va_bits, pal_base;
+
+	/*
+	 * Remove this CPU by putting it into fake SAL rendezvous
+	 */
+	cpu_clear(smp_processor_id(), cpu_online_map);
+	max_xtp();
+	ia64_eoi();
+
+	/* Disable VHPT */
+	impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
+	pta = POW2(61) - POW2(vmlpt_bits);
+	ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | 0);
+
+	local_irq_disable();
+	pal_base = __get_cpu_var(ia64_mca_pal_base);
+	kexec_fake_sal_rendez(func, ap_wakeup_vector, pal_base);
+}
+#endif
+
 static void
 stop_this_cpu (void)
 {
@@ -155,7 +185,15 @@ handle_IPI (int irq, void *dev_id, struc
 			      case IPI_CPU_STOP:
 				stop_this_cpu();
 				break;
-
+#ifdef CONFIG_CRASH_DUMP
+			      case IPI_KDUMP_CPU_STOP:
+				{
+					local_irq_disable();
+					crash_save_this_cpu();
+					cpu_halt();
+				}
+				break;
+#endif
 			      default:
 				printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
 				break;
@@ -371,6 +409,13 @@ smp_send_stop (void)
 {
 	send_IPI_allbutself(IPI_CPU_STOP);
 }
+#ifdef CONFIG_CRASH_DUMP
+void
+kdump_smp_send_stop()
+{
+ 	send_IPI_allbutself(IPI_KDUMP_CPU_STOP);
+}
+#endif
 
 int __init
 setup_profiling_timer (unsigned int multiplier)
diff -Nraup a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h
--- a/include/asm-ia64/kexec.h	1970-01-01 08:00:00.000000000 +0800
+++ b/include/asm-ia64/kexec.h	2006-08-15 09:50:03.000000000 +0800
@@ -0,0 +1,38 @@
+#ifndef _ASM_IA64_KEXEC_H
+#define _ASM_IA64_KEXEC_H
+
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE
+
+#define KEXEC_CONTROL_CODE_SIZE (8192 + 8192 + 4096)
+
+/* The native architecture */
+#define KEXEC_ARCH KEXEC_ARCH_IA_64
+
+#define MAX_NOTE_BYTES 1024
+
+#define pte_bits	3
+#define vmlpt_bits	(impl_va_bits - PAGE_SHIFT + pte_bits)
+#define POW2(n)		(1ULL << (n))
+
+DECLARE_PER_CPU(u64, ia64_mca_pal_base);
+const extern unsigned int relocate_new_kernel_size;
+volatile extern long kexec_rendez;
+extern void relocate_new_kernel(unsigned long, unsigned long,
+		struct ia64_boot_param *, unsigned long);
+extern void kexec_fake_sal_rendez(void *start, unsigned long wake_up,
+		unsigned long pal_base);
+static inline void
+crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs)
+{
+}
+extern struct resource efi_memmap_res;
+extern struct resource boot_param_res;
+extern void kdump_smp_send_stop(void);
+extern void crash_save_this_cpu(void);
+#endif /* _ASM_IA64_KEXEC_H */
diff -Nraup a/include/asm-ia64/machvec_hpzx1.h b/include/asm-ia64/machvec_hpzx1.h
--- a/include/asm-ia64/machvec_hpzx1.h	2006-06-18 09:49:35.000000000 +0800
+++ b/include/asm-ia64/machvec_hpzx1.h	2006-08-15 09:50:03.000000000 +0800
@@ -34,4 +34,6 @@ extern ia64_mv_dma_mapping_error	sba_dma
 #define platform_dma_supported			sba_dma_supported
 #define platform_dma_mapping_error		sba_dma_mapping_error
 
+extern void ioc_iova_disable(void);
+
 #endif /* _ASM_IA64_MACHVEC_HPZX1_h */
diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h
--- a/include/asm-ia64/meminit.h	2006-08-15 05:52:24.000000000 +0800
+++ b/include/asm-ia64/meminit.h	2006-08-15 09:50:03.000000000 +0800
@@ -15,11 +15,12 @@
  * 	- initrd (optional)
  * 	- command line string
  * 	- kernel code & data
+ * 	- crash dumping code reserved region
  * 	- Kernel memory map built from EFI memory map
  *
  * More could be added if necessary
  */
-#define IA64_MAX_RSVD_REGIONS 6
+#define IA64_MAX_RSVD_REGIONS 7
 
 struct rsvd_region {
 	unsigned long start;	/* virtual address of beginning of element */
diff -Nraup a/include/asm-ia64/smp.h b/include/asm-ia64/smp.h
--- a/include/asm-ia64/smp.h	2006-08-15 05:52:24.000000000 +0800
+++ b/include/asm-ia64/smp.h	2006-08-15 09:50:03.000000000 +0800
@@ -128,6 +128,9 @@ extern void smp_send_reschedule (int cpu
 extern void lock_ipi_calllock(void);
 extern void unlock_ipi_calllock(void);
 extern void identify_siblings (struct cpuinfo_ia64 *);
+#ifdef CONFIG_KEXEC
+extern void kexec_stop_this_cpu(void *);
+#endif
 
 #else
 
diff -Nraup a/kernel/irq/manage.c b/kernel/irq/manage.c
--- a/kernel/irq/manage.c	2006-08-15 05:52:24.000000000 +0800
+++ b/kernel/irq/manage.c	2006-08-15 09:50:03.000000000 +0800
@@ -475,4 +475,3 @@ int request_irq(unsigned int irq,
 	return retval;
 }
 EXPORT_SYMBOL(request_irq);
-









-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Mon Aug 14 18:36:27 2006

This archive was generated by hypermail 2.1.8 : 2006-08-14 18:37:01 EST