Patch 7/7 Cheers, ashok raj - Linux Core Software Group Name: migrate_irq.patch Author: Ashok Raj (Intel Corporation) D: This patch adds interrupt migration necessary for supporting CPU removal in IA64. D: Devices dont stop generating interrupts, and some special handling is required to D: ensure the kernel does not loose interrupt events in the process of D: migrating interrupt destinations to different target cpu's. For proper D: functioning, we need to disable platform level interrupt redirection. D: Rest is in code for review. To test, i manually migrated network, disk D: interrupts to cpu3 and removed it later, interrupts should now start D: on first online cpu (i.e the boot cpu). At time of release worked fine for D: 24+ hrs without any panics and hangs. --- linux-2.6.5-lhcs-root/arch/ia64/kernel/iosapic.c | 28 +++++ linux-2.6.5-lhcs-root/arch/ia64/kernel/irq.c | 114 ++++++++++++++++++---- linux-2.6.5-lhcs-root/arch/ia64/kernel/irq_ia64.c | 60 ++++++++++- linux-2.6.5-lhcs-root/arch/ia64/kernel/sal.c | 13 ++ 4 files changed, 190 insertions(+), 25 deletions(-) diff -puN arch/ia64/kernel/irq.c~migrate_irq arch/ia64/kernel/irq.c --- linux-2.6.5-lhcs/arch/ia64/kernel/irq.c~migrate_irq 2004-04-23 23:11:22.976199926 -0700 +++ linux-2.6.5-lhcs-root/arch/ia64/kernel/irq.c 2004-04-23 23:11:22.982059304 -0700 @@ -8,6 +8,12 @@ * instead of just grabbing them. Thus setups with different IRQ numbers * shouldn't result in any weird surprises, and installing new handlers * should be easier. + * + * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004 + * + * 4/14/2004: Added code to handle cpu migration and do safe irq + * migration without lossing interrupts for iosapic + * architecture. */ /* @@ -49,8 +55,9 @@ #include <asm/irq.h> #include <asm/tlbflush.h> #include <asm/cpu.h> +#include <asm/bug.h> - +extern cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; /* * Linux has a controller-independent x86 interrupt architecture. @@ -943,10 +950,14 @@ void set_irq_affinity_info (unsigned int static int irq_affinity_read_proc (char *page, char **start, off_t off, int count, int *eof, void *data) { - int len = cpumask_scnprintf(page, count, irq_affinity[(long)data]); + extern void print_rte(unsigned int irq); + int len = sprintf(page, "%s", irq_redir[(long)data] ? "r " : ""); + + len += cpumask_scnprintf(page+len, count, irq_affinity[(long)data]); if (count - len < 2) return -EINVAL; len += sprintf(page + len, "\n"); + return len; } @@ -961,6 +972,7 @@ static int irq_affinity_write_proc (stru int rlen; int prelen; irq_desc_t *desc = irq_descp(irq); + unsigned long flags; if (!desc->handler->set_affinity) return -EIO; @@ -999,40 +1011,106 @@ static int irq_affinity_write_proc (stru if (cpus_empty(tmp)) return -EINVAL; - desc->handler->set_affinity(irq, new_value); + spin_lock_irqsave(&desc->lock, flags); + pending_irq_cpumask[irq] = new_value; + spin_unlock_irqrestore(&desc->lock, flags); + return full_count; } #endif /* CONFIG_SMP */ #ifdef CONFIG_HOTPLUG_CPU -void fixup_irqs(void) +unsigned int vectors_in_migration[NR_IRQS]; + +/* + * Since cpu_online_map is already updated, we just need to check for + * affinity that has zeros + */ +static void migrate_irqs(void) { cpumask_t mask; - unsigned int irq, redir; irq_desc_t *desc; - static int warned; + int irq, new_cpu; + + for (irq=0; irq < NR_IRQS; irq++) { + desc = irq_descp(irq); + + /* + * No handling for now. + * TBD: Implement a disable function so we can now + * tell CPU not to respond to these local intr sources. + * such as ITV,CPEI,MCA etc. + */ + if (desc->status == IRQ_PER_CPU) + continue; - for (irq = 0; irq < NR_IRQS; irq++) { cpus_and(mask, irq_affinity[irq], cpu_online_map); if (any_online_cpu(mask) == NR_CPUS) { - printk("Breaking affinity for irq %ui\n", irq); - mask = any_online_cpu(cpu_online_map); + /* + * Save it for phase 2 processing + */ + vectors_in_migration[irq] = irq; + + new_cpu = any_online_cpu(cpu_online_map); + mask = cpumask_of_cpu(new_cpu); + + /* + * Al three are essential, currently WARN_ON.. maybe panic? + */ + if (desc->handler && desc->handler->disable && + desc->handler->enable && desc->handler->set_affinity) { + desc->handler->disable(irq); + desc->handler->set_affinity(irq, mask); + desc->handler->enable(irq); + } else { + WARN_ON((!(desc->handler) || !(desc->handler->disable) || + !(desc->handler->enable) || + !(desc->handler->set_affinity))); + } } - desc = irq_descp(irq); - if (desc->handler->set_affinity) { - redir = irq_redir[irq]; - desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), - mask); + } +} + +void fixup_irqs(void) +{ + unsigned int irq; + extern void ia64_process_pending_intr(void); + + ia64_set_itv(1<<16); + /* + * Phase 1: Locate irq's bound to this cpu and + * relocate them for cpu removal. + */ + migrate_irqs(); + + /* + * Phase 2: Perform interrupt processing for all entries reported in + * local APIC. + */ + ia64_process_pending_intr(); + + /* + * Phase 3: Now handle any interrupts not captured in local APIC. + * This is to account for cases that device interrupted during the time the + * rte was being disabled and re-programmed. + */ + for (irq=0; irq < NR_IRQS; irq++) { + if (vectors_in_migration[irq]) { + vectors_in_migration[irq]=0; + do_IRQ(irq, NULL); } - else if (desc->action && !(warned++)) - printk("Cannot set affinity for irq %i\n", irq); } + + /* + * Now let processor die. We do irq disable and max_xtp() to + * ensure there is no more interrupts routed to this processor. + * But the local timer interrupt can have 1 pending which we + * take care in timer_interrupt(). + */ max_xtp(); local_irq_disable(); - __get_cpu_var(cpu_state) = CPU_DEAD; } - #endif static int prof_cpu_mask_read_proc (char *page, char **start, off_t off, diff -puN arch/ia64/kernel/irq_ia64.c~migrate_irq arch/ia64/kernel/irq_ia64.c --- linux-2.6.5-lhcs/arch/ia64/kernel/irq_ia64.c~migrate_irq 2004-04-23 23:11:22.977176489 -0700 +++ linux-2.6.5-lhcs-root/arch/ia64/kernel/irq_ia64.c 2004-04-23 23:11:22.983035867 -0700 @@ -10,6 +10,8 @@ * * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector * PCI to vector allocation routine. + * 04/14/2004 Ashok Raj <ashok.raj@intel.com> + * Added CPU Hotplug handling for IPF. */ #include <linux/config.h> @@ -85,6 +87,11 @@ ia64_alloc_vector (void) extern unsigned int do_IRQ(unsigned long irq, struct pt_regs *regs); +#ifdef CONFIG_SMP +# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +#else +# define IS_RESCHEDULE(vec) (0) +#endif /* * That's where the IVT branches when we get an external * interrupt. This branches to the correct hardware IRQ handler via @@ -94,11 +101,6 @@ void ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) { unsigned long saved_tpr; -#ifdef CONFIG_SMP -# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) -#else -# define IS_RESCHEDULE(vec) (0) -#endif #if IRQ_DEBUG { @@ -162,6 +164,54 @@ ia64_handle_irq (ia64_vector vector, str irq_exit(); } +#ifdef CONFIG_HOTPLUG_CPU +/* + * This function emulates a interrupt processing when a cpu is about to be + * brought down. + */ +void ia64_process_pending_intr(void) +{ + ia64_vector vector; + unsigned long saved_tpr; + extern unsigned int vectors_in_migration[NR_IRQS]; + + vector = ia64_get_ivr(); + + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + + /* + * Perform normal interrupt style processing + */ + while (vector != IA64_SPURIOUS_INT_VECTOR) { + if (!IS_RESCHEDULE(vector)) { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + /* + * Now try calling normal ia64_handle_irq as it would have got called + * from a real intr handler. Try passing null for pt_regs, hopefully + * it will work. I hope it works!. + * Probably could shared code. + */ + vectors_in_migration[local_vector_to_irq(vector)]=0; + do_IRQ(local_vector_to_irq(vector), NULL); + + /* + * Disable interrupts and send EOI + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + irq_exit(); +} +#endif + + #ifdef CONFIG_SMP extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs); diff -puN arch/ia64/kernel/iosapic.c~migrate_irq arch/ia64/kernel/iosapic.c --- linux-2.6.5-lhcs/arch/ia64/kernel/iosapic.c~migrate_irq 2004-04-23 23:11:22.979129615 -0700 +++ linux-2.6.5-lhcs-root/arch/ia64/kernel/iosapic.c 2004-04-23 23:11:22.983035867 -0700 @@ -32,6 +32,8 @@ * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. * Remove iosapic_address & gsi_base from external interfaces. * Rationalize __init/__devinit attributes. + * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 + * Updated to work with irq migration necessary for CPU Hotplug */ /* * Here is what the interrupt logic between a PCI device and the kernel looks like: @@ -99,6 +101,8 @@ static spinlock_t iosapic_lock = SPIN_LOCK_UNLOCKED; +cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; + /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ static struct iosapic_intr_info { @@ -188,8 +192,10 @@ set_rte (unsigned int vector, unsigned i pol = iosapic_intr_info[vector].polarity; trigger = iosapic_intr_info[vector].trigger; dmode = iosapic_intr_info[vector].dmode; + vector &= (~IA64_IRQ_REDIRECTED); redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; + #ifdef CONFIG_SMP { unsigned int irq; @@ -307,9 +313,8 @@ iosapic_set_affinity (unsigned int irq, spin_lock_irqsave(&iosapic_lock, flags); { - /* get current delivery mode by reading the low32 */ - writel(IOSAPIC_RTE_LOW(rte_index), addr + IOSAPIC_REG_SELECT); low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + if (redir) /* change delivery mode to lowest priority */ low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); @@ -327,6 +332,21 @@ iosapic_set_affinity (unsigned int irq, #endif } +static inline void move_irq(int irq) +{ + /* note - we hold desc->lock */ + cpumask_t tmp; + irq_desc_t *desc = irq_descp(irq); + + if (!cpus_empty(pending_irq_cpumask[irq])) { + cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); + if (unlikely(!cpus_empty(tmp))) { + desc->handler->set_affinity(irq, pending_irq_cpumask[irq]); + } + cpus_clear(pending_irq_cpumask[irq]); + } +} + /* * Handlers for level-triggered interrupts. */ @@ -343,6 +363,8 @@ iosapic_end_level_irq (unsigned int irq) { ia64_vector vec = irq_to_vector(irq); + move_irq(irq); + writel(vec, iosapic_intr_info[vec].addr + IOSAPIC_EOI); } @@ -382,6 +404,8 @@ static void iosapic_ack_edge_irq (unsigned int irq) { irq_desc_t *idesc = irq_descp(irq); + + move_irq(irq); /* * Once we have recorded IRQ_PENDING already, we can mask the * interrupt for real. This prevents IRQ storms from unhandled diff -puN arch/ia64/kernel/sal.c~migrate_irq arch/ia64/kernel/sal.c --- linux-2.6.5-lhcs/arch/ia64/kernel/sal.c~migrate_irq 2004-04-23 23:11:22.980106178 -0700 +++ linux-2.6.5-lhcs-root/arch/ia64/kernel/sal.c 2004-04-23 23:11:22.984012430 -0700 @@ -122,10 +122,23 @@ sal_desc_entry_point (void *p) static void __init set_smp_redirect (int flag) { +#ifndef CONFIG_HOTPLUG_CPU if (no_int_routing) smp_int_redirect &= ~flag; else smp_int_redirect |= flag; +#else + /* + * For CPU Hotplug we dont want to do any chipset supported + * interrupt redirection. The reason is this would require that + * All interrupts be stopped and hard bind the irq to a cpu. + * Later when the interrupt is fired we need to set the redir hint + * on again in the vector. This is combersome for something that the + * user mode irq balancer will solve anyways. + */ + no_int_routing=1; + smp_int_redirect &= ~flag; +#endif } #else #define set_smp_redirect(flag) do { } while (0) _ - To unsubscribe from this list: send the line "unsubscribe linux-ia64" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.htmlReceived on Sun Apr 25 03:00:40 2004
This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:25 EST