[PATCH/RFC]: Clean up of sn_irq_info list

From: Prarit Bhargava <prarit_at_sgi.com>
Date: 2005-03-04 06:27:31
Hello,

This patch solves a coding problem that was noticed during hotplug 
driver testing on SGI systems. 

When a hotpluggable PCI device is introduced to the system, it is linked 
to a sn_irq_info struct.  These sn_irq_info tructs are linked together 
via a unprotected unidirectional list, which implies FIFO list 
behaviour.  If a PCI device was taken out of service the sn_irq_info 
struct must be removed.

Due to the FIFO nature of the list, the element remove function became 
cumbersome to code. Since the list was unprotected by locks a race was 
quickly noticed where sn_irq_info structs were accessed as they were 
being removed.

The fix is to implement a list using the standard list.h structs and to 
lock using an RCU lock implementation (the list can be accessed in an 
interrupt context).

The attached patch is against latest 2.6 bk pull.

P.


===== Makefile 1.564 vs edited =====
--- 1.564/Makefile	2005-02-12 21:58:24 -05:00
+++ edited/Makefile	2005-03-02 13:52:54 -05:00
@@ -1,7 +1,7 @@
 VERSION = 2
 PATCHLEVEL = 6
 SUBLEVEL = 11
-EXTRAVERSION =-rc4
+EXTRAVERSION =-bk
 NAME=Woozy Numbat
 
 # *DOCUMENTATION*
===== arch/ia64/sn/kernel/io_init.c 1.9 vs edited =====
--- 1.9/arch/ia64/sn/kernel/io_init.c	2005-01-11 19:22:08 -05:00
+++ edited/arch/ia64/sn/kernel/io_init.c	2005-03-02 13:52:37 -05:00
@@ -343,10 +343,6 @@
 	 */
 	ia64_max_iommu_merge_mask = ~PAGE_MASK;
 	sn_fixup_ionodes();
-	sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL);
-	if (sn_irq <= 0)
-		BUG();		/* Canno afford to run out of memory. */
-	memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS);
 
 	sn_init_cpei_timer();
 
===== arch/ia64/sn/kernel/irq.c 1.31 vs edited =====
--- 1.31/arch/ia64/sn/kernel/irq.c	2005-01-22 18:54:50 -05:00
+++ edited/arch/ia64/sn/kernel/irq.c	2005-03-02 13:54:52 -05:00
@@ -9,6 +9,7 @@
  */
 
 #include <linux/irq.h>
+#include <linux/spinlock.h>
 #include <asm/sn/intr.h>
 #include <asm/sn/addrs.h>
 #include <asm/sn/arch.h>
@@ -22,10 +23,11 @@
 static void force_interrupt(int irq);
 static void register_intr_pda(struct sn_irq_info *sn_irq_info);
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
+static spinlock_t sn_irq_info_lock = SPIN_LOCK_UNLOCKED; /* non-IRQ lock */
 
 extern int sn_force_interrupt_flag;
 extern int sn_ioif_inited;
-struct sn_irq_info **sn_irq;
+static LIST_HEAD(sn_irq_info_list);
 
 static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
 				     u64 sn_irq_info,
@@ -128,69 +130,72 @@
 
 static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
 {
-	struct sn_irq_info *sn_irq_info = sn_irq[irq];
-	struct sn_irq_info *tmp_sn_irq_info;
+	struct sn_irq_info *sn_irq_info;
 	int cpuid, cpuphys;
 	nasid_t t_nasid;	/* nasid to target */
 	int t_slice;		/* slice to target */
-
-	/* allocate a temp sn_irq_info struct to get new target info */
-	tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL);
-	if (!tmp_sn_irq_info)
-		return;
+	int status;
+	int local_widget;
 
 	cpuid = first_cpu(mask);
 	cpuphys = cpu_physical_id(cpuid);
 	t_nasid = cpuid_to_nasid(cpuid);
 	t_slice = cpuid_to_slice(cpuid);
 
-	while (sn_irq_info) {
-		int status;
-		int local_widget;
-		uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
-		nasid_t local_nasid = NASID_GET(bridge);
+	list_for_each_entry(sn_irq_info, &sn_irq_info_list, list) {
+		uint64_t bridge;
+		nasid_t local_nasid;
+		struct sn_irq_info *tmp_irq_info;
+
+		tmp_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_ATOMIC);
+		if (tmp_irq_info == NULL)
+			break;
+		memcpy(tmp_irq_info, sn_irq_info, sizeof(struct sn_irq_info));
 
+		bridge = (uint64_t) tmp_irq_info->irq_bridge;
 		if (!bridge)
-			break;	/* irq is not a device interrupt */
+			break; /* irq is not a device interrupt */
+
+		local_nasid = NASID_GET(bridge);
 
 		if (local_nasid & 1)
 			local_widget = TIO_SWIN_WIDGETNUM(bridge);
 		else
 			local_widget = SWIN_WIDGETNUM(bridge);
 
-		/* Free the old PROM sn_irq_info structure */
-		sn_intr_free(local_nasid, local_widget, sn_irq_info);
+		/* Free the old PROM tmp_irq_info structure */
+		sn_intr_free(local_nasid, local_widget, tmp_irq_info);
+		/* Update kernels tmp_irq_info with new target info */
+		unregister_intr_pda(tmp_irq_info);
 
-		/* allocate a new PROM sn_irq_info struct */
+		/* allocate a new PROM tmp_irq_info struct */
 		status = sn_intr_alloc(local_nasid, local_widget,
-				       __pa(tmp_sn_irq_info), irq, t_nasid,
+				       __pa(tmp_irq_info), irq, t_nasid,
 				       t_slice);
 
-		if (status == 0) {
-			/* Update kernels sn_irq_info with new target info */
-			unregister_intr_pda(sn_irq_info);
-			sn_irq_info->irq_cpuid = cpuid;
-			sn_irq_info->irq_nasid = t_nasid;
-			sn_irq_info->irq_slice = t_slice;
-			sn_irq_info->irq_xtalkaddr =
-			    tmp_sn_irq_info->irq_xtalkaddr;
-			sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie;
-			register_intr_pda(sn_irq_info);
+		/* SAL call failed */	
+		if (status)
+			break;
+
+		tmp_irq_info->irq_cpuid = cpuid;
+		tmp_irq_info->irq_nasid = t_nasid;
+		tmp_irq_info->irq_slice = t_slice;
+		register_intr_pda(tmp_irq_info);
 
-			if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) {
-				pcibr_change_devices_irq(sn_irq_info);
-			}
+		if (IS_PCI_BRIDGE_ASIC(tmp_irq_info->irq_bridge_type)) {
+			pcibr_change_devices_irq(tmp_irq_info);
+		}
 
-			sn_irq_info = sn_irq_info->irq_next;
+		spin_lock(&sn_irq_info_lock);
+		list_add_rcu(&tmp_irq_info->list, &sn_irq_info->list);
+		list_del_rcu(&sn_irq_info->list);
+		spin_unlock(&sn_irq_info_lock);
+		call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
 
 #ifdef CONFIG_SMP
-			set_irq_affinity_info((irq & 0xff), cpuphys, 0);
+		set_irq_affinity_info((irq & 0xff), cpuphys, 0);
 #endif
-		} else {
-			break;	/* snp_affinity failed the intr_alloc */
-		}
 	}
-	kfree(tmp_sn_irq_info);
 }
 
 struct hw_interrupt_type irq_type_sn = {
@@ -221,62 +226,49 @@
 	}
 }
 
+// PRARIT: these strange functions appear to keep track of 
+// sn_last_irq and sn_first_irq.  The only reason to do this
+// appears to be to keep track of pdacpu(cpu)->sn_first_irq
+// and pdacpu(cpu)->sn_last_irq
+
 static void register_intr_pda(struct sn_irq_info *sn_irq_info)
 {
-	int irq = sn_irq_info->irq_irq;
 	int cpu = sn_irq_info->irq_cpuid;
 
-	if (pdacpu(cpu)->sn_last_irq < irq) {
-		pdacpu(cpu)->sn_last_irq = irq;
-	}
+	pdacpu(cpu)->sn_last_irq = max(sn_irq_info->irq_irq, 
+					pdacpu(cpu)->sn_last_irq);
 
-	if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) {
-		pdacpu(cpu)->sn_first_irq = irq;
-	}
+	pdacpu(cpu)->sn_first_irq = min(sn_irq_info->irq_irq, 
+					pdacpu(cpu)->sn_last_irq);
 }
 
 static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
 {
-	int irq = sn_irq_info->irq_irq;
-	int cpu = sn_irq_info->irq_cpuid;
 	struct sn_irq_info *tmp_irq_info;
-	int i, foundmatch;
+	int cpu = sn_irq_info->irq_cpuid;
+	int irq = sn_irq_info->irq_irq;
 
-	if (pdacpu(cpu)->sn_last_irq == irq) {
-		foundmatch = 0;
-		for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) {
-			tmp_irq_info = sn_irq[i];
-			while (tmp_irq_info) {
-				if (tmp_irq_info->irq_cpuid == cpu) {
-					foundmatch++;
-					break;
-				}
-				tmp_irq_info = tmp_irq_info->irq_next;
-			}
-			if (foundmatch) {
-				break;
-			}
-		}
-		pdacpu(cpu)->sn_last_irq = i;
-	}
+	rcu_read_lock();
+	list_for_each_entry_rcu(tmp_irq_info, &sn_irq_info_list, list) {
 
-	if (pdacpu(cpu)->sn_first_irq == irq) {
-		foundmatch = 0;
-		for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) {
-			tmp_irq_info = sn_irq[i];
-			while (tmp_irq_info) {
-				if (tmp_irq_info->irq_cpuid == cpu) {
-					foundmatch++;
-					break;
-				}
-				tmp_irq_info = tmp_irq_info->irq_next;
-			}
-			if (foundmatch) {
-				break;
-			}
+		spin_lock(&tmp_irq_info->lock);
+
+		if (tmp_irq_info->irq_cpuid == cpu) {
+		// I'm only interested in irq's on cpus
+		// that correspond to sn_irq_info
+			if (tmp_irq_info->irq_irq < irq)
+				pdacpu(cpu)->sn_last_irq = 
+					max(tmp_irq_info->irq_irq, 
+					pdacpu(cpu)->sn_last_irq);
+
+			if (tmp_irq_info->irq_irq >= irq)
+				pdacpu(cpu)->sn_first_irq = 
+					min(tmp_irq_info->irq_irq, 
+					pdacpu(cpu)->sn_first_irq);
 		}
-		pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
+		spin_unlock(&tmp_irq_info->lock);
 	}
+	rcu_read_unlock();
 }
 
 struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq,
@@ -285,11 +277,11 @@
 	struct sn_irq_info *sn_irq_info;
 	int status;
 
-	sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL);
+	sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
 	if (sn_irq_info == NULL)
 		return NULL;
 
-	memset(sn_irq_info, 0x0, sizeof(*sn_irq_info));
+	memset(sn_irq_info, 0x0, sizeof(struct sn_irq_info));
 
 	status =
 	    sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq,
@@ -303,6 +295,13 @@
 	}
 }
 
+static void sn_irq_info_free(struct rcu_head *head)
+{
+	struct sn_irq_info *sn_irq_info = container_of(head, 
+						struct sn_irq_info, rcu);
+	kfree(sn_irq_info);
+}
+
 void sn_irq_free(struct sn_irq_info *sn_irq_info)
 {
 	uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
@@ -328,26 +327,49 @@
 	sn_irq_info->irq_cpuid = cpu;
 	sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
 
-	/* link it into the sn_irq[irq] list */
-	sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq];
-	sn_irq[sn_irq_info->irq_irq] = sn_irq_info;
-
+	/* link it into sn_irq_info_list */
+	spin_lock(&sn_irq_info_lock);
+	spin_lock(&sn_irq_info->lock);
+	list_add_rcu(&sn_irq_info->list, &sn_irq_info_list);
+	spin_unlock(&sn_irq_info->lock);
+	spin_unlock(&sn_irq_info_lock);
 	(void)register_intr_pda(sn_irq_info);
 }
 
+void sn_irq_unfixup(struct pci_dev *pci_dev)
+{
+	struct sn_irq_info *sn_irq_info;
+
+	/* Only cleanup IRQ stuff if this device has a host bus context */
+	if (!SN_PCIDEV_BUSSOFT(pci_dev))
+		return;
+
+	sn_irq_info = SN_PCIDEV_INFO(pci_dev)->pdi_sn_irq_info;
+
+	unregister_intr_pda(sn_irq_info);
+	spin_lock(&sn_irq_info_lock);
+	spin_lock(&sn_irq_info->lock);
+	list_del_rcu(&sn_irq_info->list);
+	spin_unlock(&sn_irq_info->lock);
+	spin_unlock(&sn_irq_info_lock);
+	call_rcu(&sn_irq_info->rcu, sn_irq_info_free);
+}
+
 static void force_interrupt(int irq)
 {
 	struct sn_irq_info *sn_irq_info;
 
 	if (!sn_ioif_inited)
 		return;
-	sn_irq_info = sn_irq[irq];
-	while (sn_irq_info) {
-		if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
-		    (sn_irq_info->irq_bridge != NULL)) {
-			pcibr_force_interrupt(sn_irq_info);
-		}
-		sn_irq_info = sn_irq_info->irq_next;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sn_irq_info, &sn_irq_info_list, list) {
+
+		if ((sn_irq_info->irq_irq >= irq) &&
+			(IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) &&
+			(sn_irq_info->irq_bridge != NULL))
+				pcibr_force_interrupt(sn_irq_info);
+
 	}
 }
 
@@ -360,7 +382,7 @@
  * the interrupt is in flight, so we may generate a spurious interrupt,
  * but we should never miss a real lost interrupt.
  */
-static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
+static void sn_check_intr(struct sn_irq_info *sn_irq_info)
 {
 	uint64_t regval;
 	int irr_reg_num;
@@ -368,6 +390,7 @@
 	uint64_t irr_reg;
 	struct pcidev_info *pcidev_info;
 	struct pcibus_info *pcibus_info;
+	int irq = sn_irq_info->irq_irq;
 
 	pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
 	if (!pcidev_info)
@@ -413,19 +436,19 @@
 
 void sn_lb_int_war_check(void)
 {
-	int i;
+	struct sn_irq_info *sn_irq_info;
 
 	if (!sn_ioif_inited || pda->sn_first_irq == 0)
 		return;
-	for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
-		struct sn_irq_info *sn_irq_info = sn_irq[i];
-		while (sn_irq_info) {
-			/* Only call for PCI bridges that are fully initialized. */
-			if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
-			    (sn_irq_info->irq_bridge != NULL)) {
-				sn_check_intr(i, sn_irq_info);
-			}
-			sn_irq_info = sn_irq_info->irq_next;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(sn_irq_info, &sn_irq_info_list, list) {
+
+		/* Only call for PCI bridges that are fully initialized. */
+		if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
+		    (sn_irq_info->irq_bridge != NULL)) {
+			sn_check_intr(sn_irq_info);
 		}
 	}
+	rcu_read_unlock();
 }
===== include/asm-ia64/sn/intr.h 1.12 vs edited =====
--- 1.12/include/asm-ia64/sn/intr.h	2004-11-19 02:03:12 -05:00
+++ edited/include/asm-ia64/sn/intr.h	2005-03-02 13:53:46 -05:00
@@ -9,6 +9,8 @@
 #ifndef _ASM_IA64_SN_INTR_H
 #define _ASM_IA64_SN_INTR_H
 
+#include <linux/rcupdate.h>
+
 #define SGI_UART_VECTOR		(0xe9)
 #define SGI_PCIBR_ERROR		(0x33)
 
@@ -47,6 +49,9 @@
 	int		irq_cookie;	/* unique cookie 	     */
 	int		irq_flags;	/* flags */
 	int		irq_share_cnt;	/* num devices sharing IRQ   */
+	spinlock_t	lock;		/* lock for access */
+	struct list_head list; 		/* sharing irq list */
+	struct rcu_head rcu;		/* rcu callback list */
 };
 
 extern void sn_send_IPI_phys(int, long, int, int);

-
To unsubscribe from this list: send the line "unsubscribe linux-ia64" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Thu Mar 3 14:29:57 2005

This archive was generated by hypermail 2.1.8 : 2005-08-02 09:20:36 EST