diff -Nraup a/arch/ia64/Kconfig b/arch/ia64/Kconfig --- a/arch/ia64/Kconfig 2006-03-01 07:57:10.000000000 +0800 +++ b/arch/ia64/Kconfig 2006-03-01 08:45:15.000000000 +0800 @@ -374,6 +374,23 @@ config IA64_PALINFO To use this option, you have to ensure that the "/proc file system support" (CONFIG_PROC_FS) is enabled, too. +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on EXPERIMENTAL && (!SMP || HOTPLUG_CPU) + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + +config CRASH_DUMP + bool "kernel crash dumps (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Generate crash dump after being started by kexec. + source "drivers/firmware/Kconfig" source "fs/Kconfig.binfmt" diff -Nraup a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c --- a/arch/ia64/kernel/crash.c 1970-01-01 08:00:00.000000000 +0800 +++ b/arch/ia64/kernel/crash.c 2006-03-01 08:44:30.000000000 +0800 @@ -0,0 +1,104 @@ +/* + * arch/ia64/kernel/crash.c + * + * Copyright (C) 2005 Intel Corp + * Zou Nan hai + */ +#include +#include +#include +#include +#include + +note_buf_t crash_notes[NR_CPUS]; + +static void device_shootdown(void) +{ + struct pci_dev *dev; + irq_desc_t *desc; + u16 pci_command; + + list_for_each_entry(dev, &pci_devices, global_list) { + desc = irq_descp(dev->irq); + if (!desc->action) + continue; + pci_read_config_word(dev, PCI_COMMAND, &pci_command); + if (pci_command & PCI_COMMAND_MASTER) { + pci_command &= ~PCI_COMMAND_MASTER; + pci_write_config_word(dev, PCI_COMMAND, pci_command); + } + disable_irq_nosync(dev->irq); + desc->handler->end(dev->irq); + } +} + +static Elf64_Word *append_elf_note(Elf64_Word *buf, char *name, + unsigned type, void *data, + size_t data_len) +{ + struct elf_note *note = (struct elf_note *)buf; + note->n_namesz = strlen(name) + 1; + note->n_descsz = data_len; + note->n_type = type; + buf += (sizeof(*note) + 3)/4; + memcpy(buf, name, note->n_namesz); + buf += (note->n_namesz + 3)/4; + memcpy(buf, data, data_len); + buf += (data_len + 3)/4; + return buf; +} + +static void final_note(void *buf) +{ + memset(buf, 0, sizeof(struct elf_note)); +} + +static void crash_save_this_cpu(void) +{ + void *buf; + struct elf_prstatus prstatus; + int cpu = smp_processor_id(); + elf_greg_t *dst = (elf_greg_t *)&prstatus.pr_reg; + + memset(&prstatus, 0, sizeof(prstatus)); + prstatus.pr_pid = current->pid; + + dst[1] = ia64_getreg(_IA64_REG_GP); + dst[12] = ia64_getreg(_IA64_REG_SP); + dst[13] = ia64_getreg(_IA64_REG_TP); + + dst[42] = ia64_getreg(_IA64_REG_IP); + dst[45] = ia64_getreg(_IA64_REG_AR_RSC); + + ia64_setreg(_IA64_REG_AR_RSC, 0); + ia64_srlz_i(); + + dst[46] = ia64_getreg(_IA64_REG_AR_BSP); + dst[47] = ia64_getreg(_IA64_REG_AR_BSPSTORE); + + dst[48] = ia64_getreg(_IA64_REG_AR_RNAT); + dst[49] = ia64_getreg(_IA64_REG_AR_CCV); + dst[50] = ia64_getreg(_IA64_REG_AR_UNAT); + + dst[51] = ia64_getreg(_IA64_REG_AR_FPSR); + dst[52] = ia64_getreg(_IA64_REG_AR_PFS); + dst[53] = ia64_getreg(_IA64_REG_AR_LC); + + dst[54] = ia64_getreg(_IA64_REG_AR_LC); + dst[55] = ia64_getreg(_IA64_REG_AR_CSD); + dst[56] = ia64_getreg(_IA64_REG_AR_SSD); + + buf = &crash_notes[cpu][0]; + buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, + sizeof(prstatus)); + final_note(buf); +} + +void machine_crash_shutdown(void) +{ + crash_save_this_cpu(); + device_shootdown(); +#ifdef CONFIG_SMP + smp_send_stop(); +#endif +} diff -Nraup a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c --- a/arch/ia64/kernel/efi.c 2006-03-01 07:57:10.000000000 +0800 +++ b/arch/ia64/kernel/efi.c 2006-03-01 08:44:30.000000000 +0800 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -40,7 +41,7 @@ extern efi_status_t efi_call_phys (void struct efi efi; EXPORT_SYMBOL(efi); static efi_runtime_services_t *runtime; -static unsigned long mem_limit = ~0UL, max_addr = ~0UL; +static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; #define efi_call_virt(f, args...) (*(f))(args) @@ -384,24 +385,18 @@ efi_init (void) efi_config_table_t *config_tables; efi_char16_t *c16; u64 efi_desc_size; - char *cp, *end, vendor[100] = "unknown"; + char *cp, vendor[100] = "unknown"; extern char saved_command_line[]; int i; /* it's too early to be able to use the standard kernel command line support... */ for (cp = saved_command_line; *cp; ) { if (memcmp(cp, "mem=", 4) == 0) { - cp += 4; - mem_limit = memparse(cp, &end); - if (end != cp) - break; - cp = end; + mem_limit = memparse(cp + 4, &cp); } else if (memcmp(cp, "max_addr=", 9) == 0) { - cp += 9; - max_addr = GRANULEROUNDDOWN(memparse(cp, &end)); - if (end != cp) - break; - cp = end; + max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); } else { while (*cp != ' ' && *cp) ++cp; @@ -409,9 +404,11 @@ efi_init (void) ++cp; } } + + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20); if (max_addr != ~0UL) printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); - efi.systab = __va(ia64_boot_param->efi_systab); /* @@ -785,7 +782,8 @@ find_memmap_space (void) as = max(contig_low, md->phys_addr); ae = min(contig_high, efi_md_end(md)); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -895,7 +893,8 @@ efi_memmap_init(unsigned long *s, unsign } else ae = efi_md_end(md); - /* keep within max_addr= command line arg */ + /* keep within max_addr= and min_addr command line arg */ + as = max(as, min_addr); ae = min(ae, max_addr); if (ae <= as) continue; @@ -1007,6 +1006,11 @@ efi_initialize_iomem_resources(struct re */ insert_resource(res, code_resource); insert_resource(res, data_resource); +#ifdef CONFIG_KEXEC + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif + } } } diff -Nraup a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S --- a/arch/ia64/kernel/entry.S 2006-03-01 07:57:10.000000000 +0800 +++ b/arch/ia64/kernel/entry.S 2006-03-01 08:44:30.000000000 +0800 @@ -1588,7 +1588,7 @@ sys_call_table: data8 sys_mq_timedreceive // 1265 data8 sys_mq_notify data8 sys_mq_getsetattr - data8 sys_ni_syscall // reserved for kexec_load + data8 sys_kexec_load data8 sys_ni_syscall // reserved for vserver data8 sys_waitid // 1270 data8 sys_add_key diff -Nraup a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c --- a/arch/ia64/kernel/machine_kexec.c 1970-01-01 08:00:00.000000000 +0800 +++ b/arch/ia64/kernel/machine_kexec.c 2006-03-01 08:44:30.000000000 +0800 @@ -0,0 +1,67 @@ +/* + * arch/ia64/kernel/machine_exec.c + * + * Copyright (C) 2005 Intel Corp + * Zou Nan hai + */ + +#include +#include +#include +#include +#include +#include + +int +machine_kexec_prepare(struct kimage * image) +{ + return 0; +} + +void +machine_kexec_cleanup(struct kimage *image) +{ +} + +void +machine_shutdown(void) +{ +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) { + if (cpu != smp_processor_id()) + cpu_down(cpu); + } +#endif + printk(KERN_INFO "kexec: machine_shutdown called\n"); +} + +const extern unsigned char relocate_new_kernel[]; +const extern unsigned long relocate_new_kernel_size; +typedef void (*relocate_kernel_t) (unsigned long, kimage_entry_t, void *, + unsigned long); + +extern void *efi_get_pal_addr(void); + +NORET_TYPE void +machine_kexec(struct kimage *image) +{ + relocate_kernel_t relocator; + void *pal_addr = efi_get_pal_addr(); + unsigned long + code_addr = (unsigned long)page_address(image->control_code_page); + + ia64_set_itv(1<<16); + local_irq_disable(); + relocator = (relocate_kernel_t)&code_addr; + memcpy((void *)code_addr, relocate_new_kernel, + relocate_new_kernel_size); + flush_icache_range(code_addr, code_addr + relocate_new_kernel_size); + + (*relocator)(image->start, image->head, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); + + for(;;); + +} diff -Nraup a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile --- a/arch/ia64/kernel/Makefile 2006-03-01 07:57:10.000000000 +0800 +++ b/arch/ia64/kernel/Makefile 2006-03-01 08:44:30.000000000 +0800 @@ -23,6 +23,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o obj-$(CONFIG_KPROBES) += kprobes.o jprobes.o +obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o relocate_kernel.o obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o mca_recovery-y += mca_drv.o mca_drv_asm.o diff -Nraup a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S --- a/arch/ia64/kernel/relocate_kernel.S 1970-01-01 08:00:00.000000000 +0800 +++ b/arch/ia64/kernel/relocate_kernel.S 2006-03-01 08:44:30.000000000 +0800 @@ -0,0 +1,187 @@ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Copyright (C) 2005 Intel Corp + * Zou Nan hai + */ +#include +#include +#include +#include + +/* relocate new kernel + * => switch to physical mode + * => purge all TC and TR entries + * => go through kimage page_list to copy segments + * => clear system state + * => call to entry in physical mode + */ + +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.here: +{ + rsm psr.i| psr.ic + mov r15=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + + //first switch to physical mode + add r3=1f-.here, r15 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add r2=__reloc_stack-.here, r15 + ;; + add sp=8192-16, r2 + ;; + tpa sp=sp + tpa r3=r3 + ;; + mov r18=ar.rnat + mov ar.bspstore=r2 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi + ;; +1: + //physical mode code begin + mov b6=in0 + tpa r28=in2 // tpa must before TLB purge + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + //purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for percpu data + movl r16=PERCPU_ADDR + mov r18=PERCPU_PAGE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.d + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // copy segments + movl r16=PAGE_MASK + mov r30=in1 // in1 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in1], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in1=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14, 8;; + fc.i r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + br.call.sptk.many b0=b6;; +__reloc_stack: +.skip 8192 +relocate_new_kernel_end: +END(relocate_new_kernel) + .global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel diff -Nraup a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c --- a/arch/ia64/kernel/setup.c 2006-03-01 07:57:10.000000000 +0800 +++ b/arch/ia64/kernel/setup.c 2006-03-01 08:44:30.000000000 +0800 @@ -43,6 +43,8 @@ #include #include #include +#include +#include #include #include @@ -247,6 +249,32 @@ reserve_memory (void) } #endif +#ifdef CONFIG_KEXEC + /* crashkernel=size@addr specifies the location to reserve for + * a crash kernel. By reserving this memory we guarantee + * that linux never set's it up as a DMA target. + * Useful for holding code to do something appropriate + * after a kernel panic. + */ + { + char *from = strstr(saved_command_line, "crashkernel="); + if (from) { + unsigned long size, base; + size = memparse(from + 12, &from); + if (*from == '@') { + base = memparse(from + 1, &from); + rsvd_region[n].start = + (unsigned long)__va(base); + rsvd_region[n].end = + (unsigned long)__va(base + size); + crashk_res.start = base; + crashk_res.end = base + size - 1; + n++; + } + } + } +#endif + efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); n++; @@ -489,6 +517,16 @@ setup_arch (char **cmdline_p) if (!strstr(saved_command_line, "nomca")) ia64_mca_init(); +#ifdef CONFIG_CRASH_DUMP + { + char *from = strstr(saved_command_line, "elfcorehdr="); + if (from) + elfcorehdr_addr = memparse(from+11, &from); + + saved_max_pfn = (unsigned long) -1; + } +#endif + platform_setup(cmdline_p); paging_init(); } diff -Nraup a/include/asm/kexec.h b/include/asm/kexec.h --- a/include/asm/kexec.h 1970-01-01 08:00:00.000000000 +0800 +++ b/include/asm/kexec.h 2006-03-01 08:44:30.000000000 +0800 @@ -0,0 +1,29 @@ +#ifndef _IA64_KEXEC_H +#define _IA64_KEXEC_H + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct + * calculation for the amount of memory directly mappable into the + * kernel memory space. + */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_CODE_SIZE (1UL << 14) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; +extern note_buf_t crash_notes[]; +#endif diff -Nraup a/include/asm/meminit.h b/include/asm/meminit.h --- a/include/asm/meminit.h 2006-03-01 07:57:10.000000000 +0800 +++ b/include/asm/meminit.h 2006-03-01 08:44:30.000000000 +0800 @@ -16,11 +16,12 @@ * - initrd (optional) * - command line string * - kernel code & data + * - crash dumping code reserved region * - Kernel memory map built from EFI memory map * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 6 +#define IA64_MAX_RSVD_REGIONS 7 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ diff -Nraup a/include/asm-ia64/kexec.h b/include/asm-ia64/kexec.h --- a/include/asm-ia64/kexec.h 1970-01-01 08:00:00.000000000 +0800 +++ b/include/asm-ia64/kexec.h 2006-03-01 08:44:30.000000000 +0800 @@ -0,0 +1,29 @@ +#ifndef _IA64_KEXEC_H +#define _IA64_KEXEC_H + +/* + * KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return. + * I.e. Maximum page that is mapped directly into kernel memory, + * and kmap is not required. + * + * Someone correct me if FIXADDR_START - PAGEOFFSET is not the correct + * calculation for the amount of memory directly mappable into the + * kernel memory space. + */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_CODE_SIZE (1UL << 14) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define MAX_NOTE_BYTES 1024 +typedef u32 note_buf_t[MAX_NOTE_BYTES/4]; +extern note_buf_t crash_notes[]; +#endif diff -Nraup a/include/asm-ia64/meminit.h b/include/asm-ia64/meminit.h --- a/include/asm-ia64/meminit.h 2006-03-01 07:57:10.000000000 +0800 +++ b/include/asm-ia64/meminit.h 2006-03-01 08:44:30.000000000 +0800 @@ -16,11 +16,12 @@ * - initrd (optional) * - command line string * - kernel code & data + * - crash dumping code reserved region * - Kernel memory map built from EFI memory map * * More could be added if necessary */ -#define IA64_MAX_RSVD_REGIONS 6 +#define IA64_MAX_RSVD_REGIONS 7 struct rsvd_region { unsigned long start; /* virtual address of beginning of element */ diff -Nraup a/include/linux/kexec.h b/include/linux/kexec.h --- a/include/linux/kexec.h 2006-03-01 07:57:10.000000000 +0800 +++ b/include/linux/kexec.h 2006-03-01 08:44:30.000000000 +0800 @@ -41,7 +41,7 @@ typedef unsigned long kimage_entry_t; #define IND_DONE 0x4 #define IND_SOURCE 0x8 -#define KEXEC_SEGMENT_MAX 8 +#define KEXEC_SEGMENT_MAX 16 struct kexec_segment { void __user *buf; size_t bufsz;