--- //depot/projects/smpng/sys/amd64/amd64/intr_machdep.c 2009/06/25 21:20:32 +++ //depot/user/jhb/msi/amd64/amd64/intr_machdep.c 2009/06/26 16:06:16 @@ -163,8 +163,8 @@ } int -intr_add_handler(const char *name, int vector, driver_filter_t filter, - driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) +intr_add_handler(const char *name, int vector, driver_filter_t filter, + driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep) { struct intsrc *isrc; int error; @@ -302,7 +302,8 @@ intr_assign_cpu(void *arg, u_char cpu) { #ifdef SMP - struct intsrc *isrc; + struct intsrc *isrc; + int error; /* * Don't do anything during early boot. We will pick up the @@ -311,10 +312,11 @@ if (assign_cpu && cpu != NOCPU) { isrc = arg; mtx_lock(&intr_table_lock); - isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); + error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); mtx_unlock(&intr_table_lock); - } - return (0); + } else + error = 0; + return (error); #else return (EOPNOTSUPP); #endif @@ -371,7 +373,7 @@ intrcnt_setname("???", 0); intrcnt_index = 1; STAILQ_INIT(&pics); - mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF | MTX_RECURSE); + mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); @@ -441,14 +443,14 @@ if (!assign_cpu) return (cpu_apic_ids[0]); - mtx_lock(&intr_table_lock); + mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; } while (!(intr_cpus & (1 << current_cpu))); - mtx_unlock(&intr_table_lock); + mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -489,7 +491,6 @@ intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; - u_int apic_id; int i; /* Don't bother on UP. */ @@ -505,13 +506,17 @@ /* * If this event is already bound to a CPU, * then assign the source to that CPU instead - * of picking one via round-robin. + * of picking one via round-robin. Note that + * this is careful to only advance the + * round-robin if the CPU assignment succeeds. */ if (isrc->is_event->ie_cpu != NOCPU) - apic_id = isrc->is_event->ie_cpu; - else - apic_id = intr_next_cpu(); - isrc->is_pic->pic_assign_cpu(isrc, apic_id); + (void)isrc->is_pic->pic_assign_cpu(isrc, + isrc->is_event->ie_cpu); + else if (isrc->is_pic->pic_assign_cpu(isrc, + cpu_apic_ids[current_cpu]) == 0) + (void)intr_next_cpu(); + } } mtx_unlock(&intr_table_lock); --- //depot/projects/smpng/sys/amd64/amd64/io_apic.c 2009/06/25 20:49:23 +++ //depot/user/jhb/msi/amd64/amd64/io_apic.c 2009/06/26 16:06:16 @@ -120,7 +120,7 @@ static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_resume(struct pic *pic); -static void ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); @@ -322,7 +322,7 @@ mtx_unlock_spin(&icu_lock); } -static void +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; @@ -342,7 +342,7 @@ old_vector = intpin->io_vector; old_id = intpin->io_cpu; if (old_vector && apic_id == old_id) - return; + return (0); /* * Allocate an APIC vector for this interrupt pin. Once @@ -350,6 +350,9 @@ */ intpin->io_cpu = apic_id; intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq); + if (intpin->io_vector == 0) + return (ENOSPC); + if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); @@ -364,6 +367,7 @@ */ if (old_vector) apic_free_vector(old_id, old_vector, intpin->io_irq); + return (0); } static void @@ -372,7 +376,9 @@ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) - ioapic_assign_cpu(isrc, intr_next_cpu()); + if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0) + panic("Couldn't find an APIC vector for IRQ %d", + intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); } @@ -496,7 +502,7 @@ io->io_pic = ioapic_template; mtx_lock_spin(&icu_lock); io->io_id = next_id++; - io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; + io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); @@ -730,7 +736,7 @@ if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); if (io->io_pins[pin].io_irq >= NUM_IO_INTS) - return (EINVAL); + return (EINVAL); edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (io->io_pins[pin].io_edgetrigger == edgetrigger) return (0); --- //depot/projects/smpng/sys/amd64/amd64/local_apic.c 2009/06/25 15:02:49 +++ //depot/user/jhb/msi/amd64/amd64/local_apic.c 2009/06/26 16:06:16 @@ -139,7 +139,7 @@ }; -static u_int32_t lapic_timer_divisors[] = { +static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; @@ -797,7 +797,7 @@ return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); - panic("Couldn't find an APIC vector for IRQ %u", irq); + return (0); } /* @@ -1058,7 +1058,7 @@ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; - + void apic_register_enumerator(struct apic_enumerator *enumerator) { --- //depot/projects/smpng/sys/amd64/amd64/msi.c 2009/06/25 20:49:23 +++ //depot/user/jhb/msi/amd64/amd64/msi.c 2009/06/26 21:12:31 @@ -113,6 +113,8 @@ u_int msi_vector:8; /* IDT vector. */ u_int msi_cpu:8; /* Local APIC ID. (g) */ u_int msi_count:8; /* Messages in this group. (g) */ + u_int msi_maxcount:8; /* Alignment for this group. (g) */ + int *msi_irqs; /* Group's IRQ list. (g) */ }; static void msi_create_source(void); @@ -125,7 +127,7 @@ static int msi_source_pending(struct intsrc *isrc); static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id); struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, msi_enable_intr, msi_disable_intr, msi_vector, @@ -195,32 +197,52 @@ return (ENODEV); } -static void +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id) { - struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc; int old_vector; u_int old_id; - int vector; + int i, vector; + + /* + * Only allow CPUs to be assigned to the first message for an + * MSI group. + */ + if (msi->msi_first != msi) + return (EINVAL); /* Store information to free existing irq. */ old_vector = msi->msi_vector; old_id = msi->msi_cpu; if (old_id == apic_id) - return; - if (!msi->msi_msix && msi->msi_first->msi_count > 1) - return; + return (0); - /* Allocate IDT vector on this cpu. */ - vector = apic_alloc_vector(apic_id, msi->msi_irq); + /* Allocate IDT vectors on this cpu. */ + if (msi->msi_count > 1) { + KASSERT(msi->msi_msix == 0, ("MSI-X message group")); + vector = apic_alloc_vectors(apic_id, msi->msi_irqs, + msi->msi_count, msi->msi_maxcount); + } else + vector = apic_alloc_vector(apic_id, msi->msi_irq); if (vector == 0) - return; /* XXX alloc_vector panics on failure. */ + return (ENOSPC); + msi->msi_cpu = apic_id; msi->msi_vector = vector; if (bootverbose) printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n", msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq, msi->msi_cpu, msi->msi_vector); + for (i = 1; i < msi->msi_count; i++) { + sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]); + sib->msi_cpu = apic_id; + sib->msi_vector = vector + i; + if (bootverbose) + printf( + "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n", + sib->msi_irq, sib->msi_cpu, sib->msi_vector); + } pci_remap_msi_irq(msi->msi_dev, msi->msi_irq); /* @@ -228,6 +250,9 @@ * to prevent races where we could miss an interrupt. */ apic_free_vector(old_id, old_vector, msi->msi_irq); + for (i = 1; i < msi->msi_count; i++) + apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]); + return (0); } void @@ -268,7 +293,7 @@ msi_last_irq++; mtx_unlock(&msi_lock); - msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); + msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); msi->msi_intsrc.is_pic = &msi_pic; msi->msi_irq = irq; intr_register_source(&msi->msi_intsrc); @@ -276,21 +301,22 @@ } /* - * Try to allocate 'count' interrupt sources with contiguous IDT values. If - * we allocate any new sources, then their IRQ values will be at the end of - * the irqs[] array, with *newirq being the index of the first new IRQ value - * and *newcount being the number of new IRQ values added. + * Try to allocate 'count' interrupt sources with contiguous IDT values. */ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; u_int cpu; - int cnt, i, vector; + int cnt, i, *mirqs, vector; if (!msi_enabled) return (ENXIO); + if (count > 1) + mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK); + else + mirqs = NULL; again: mtx_lock(&msi_lock); @@ -317,6 +343,7 @@ /* If we would exceed the max, give up. */ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENXIO); } mtx_unlock(&msi_lock); @@ -337,6 +364,7 @@ vector = apic_alloc_vectors(cpu, irqs, count, maxcount); if (vector == 0) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENOSPC); } @@ -356,6 +384,10 @@ ("dead MSI has handlers")); } fsrc->msi_count = count; + fsrc->msi_maxcount = maxcount; + if (count > 1) + bcopy(irqs, mirqs, count * sizeof(*mirqs)); + fsrc->msi_irqs = mirqs; mtx_unlock(&msi_lock); return (0); @@ -413,6 +445,9 @@ apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq); first->msi_vector = 0; first->msi_count = 0; + first->msi_maxcount = 0; + free(first->msi_irqs, M_MSI); + first->msi_irqs = NULL; mtx_unlock(&msi_lock); return (0); @@ -498,15 +536,23 @@ /* Allocate an IDT vector. */ cpu = intr_next_cpu(); vector = apic_alloc_vector(cpu, i); + if (vector == 0) { + mtx_unlock(&msi_lock); + return (ENOSPC); + } if (bootverbose) printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n", msi->msi_irq, cpu, vector); - + /* Setup source. */ msi->msi_cpu = cpu; msi->msi_dev = dev; + msi->msi_first = msi; msi->msi_vector = vector; msi->msi_msix = 1; + msi->msi_count = 1; + msi->msi_maxcount = 1; + msi->msi_irqs = NULL; KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers")); mtx_unlock(&msi_lock); @@ -536,10 +582,13 @@ KASSERT(msi->msi_dev != NULL, ("unowned message")); /* Clear out the message. */ + msi->msi_first = NULL; msi->msi_dev = NULL; apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq); msi->msi_vector = 0; msi->msi_msix = 0; + msi->msi_count = 0; + msi->msi_maxcount = 0; mtx_unlock(&msi_lock); return (0); --- //depot/projects/smpng/sys/amd64/include/intr_machdep.h 2009/06/25 21:20:32 +++ //depot/user/jhb/msi/amd64/include/intr_machdep.h 2009/06/25 21:40:15 @@ -93,7 +93,7 @@ void (*pic_resume)(struct pic *); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); - void (*pic_assign_cpu)(struct intsrc *, u_int apic_id); + int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); STAILQ_ENTRY(pic) pics; }; --- //depot/projects/smpng/sys/amd64/isa/atpic.c 2008/03/24 19:59:34 +++ //depot/user/jhb/msi/amd64/isa/atpic.c 2008/03/24 21:46:53 @@ -143,7 +143,7 @@ static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { @@ -357,7 +357,7 @@ return (0); } -static void +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { --- //depot/projects/smpng/sys/i386/i386/intr_machdep.c 2009/06/25 21:20:32 +++ //depot/user/jhb/msi/i386/i386/intr_machdep.c 2009/06/26 16:06:16 @@ -290,7 +290,8 @@ intr_assign_cpu(void *arg, u_char cpu) { #ifdef SMP - struct intsrc *isrc; + struct intsrc *isrc; + int error; /* * Don't do anything during early boot. We will pick up the @@ -299,10 +300,11 @@ if (assign_cpu && cpu != NOCPU) { isrc = arg; mtx_lock(&intr_table_lock); - isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); + error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); mtx_unlock(&intr_table_lock); - } - return (0); + } else + error = 0; + return (error); #else return (EOPNOTSUPP); #endif @@ -359,7 +361,7 @@ intrcnt_setname("???", 0); intrcnt_index = 1; STAILQ_INIT(&pics); - mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF | MTX_RECURSE); + mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); @@ -407,14 +409,14 @@ if (!assign_cpu) return (cpu_apic_ids[0]); - mtx_lock(&intr_table_lock); + mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; } while (!(intr_cpus & (1 << current_cpu))); - mtx_unlock(&intr_table_lock); + mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -455,7 +457,6 @@ intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; - u_int apic_id; int i; #ifdef XEN @@ -463,8 +464,8 @@ * Doesn't work yet */ return; -#endif - +#endif + /* Don't bother on UP. */ if (mp_ncpus == 1) return; @@ -478,13 +479,17 @@ /* * If this event is already bound to a CPU, * then assign the source to that CPU instead - * of picking one via round-robin. + * of picking one via round-robin. Note that + * this is careful to only advance the + * round-robin if the CPU assignment succeeds. */ if (isrc->is_event->ie_cpu != NOCPU) - apic_id = isrc->is_event->ie_cpu; - else - apic_id = intr_next_cpu(); - isrc->is_pic->pic_assign_cpu(isrc, apic_id); + (void)isrc->is_pic->pic_assign_cpu(isrc, + isrc->is_event->ie_cpu); + else if (isrc->is_pic->pic_assign_cpu(isrc, + cpu_apic_ids[current_cpu]) == 0) + (void)intr_next_cpu(); + } } mtx_unlock(&intr_table_lock); --- //depot/projects/smpng/sys/i386/i386/io_apic.c 2009/06/25 20:49:23 +++ //depot/user/jhb/msi/i386/i386/io_apic.c 2009/06/26 16:06:16 @@ -120,7 +120,7 @@ static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_resume(struct pic *pic); -static void ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); @@ -322,7 +322,7 @@ mtx_unlock_spin(&icu_lock); } -static void +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; @@ -342,7 +342,7 @@ old_vector = intpin->io_vector; old_id = intpin->io_cpu; if (old_vector && apic_id == old_id) - return; + return (0); /* * Allocate an APIC vector for this interrupt pin. Once @@ -350,6 +350,9 @@ */ intpin->io_cpu = apic_id; intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq); + if (intpin->io_vector == 0) + return (ENOSPC); + if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); @@ -364,6 +367,7 @@ */ if (old_vector) apic_free_vector(old_id, old_vector, intpin->io_irq); + return (0); } static void @@ -372,7 +376,9 @@ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) - ioapic_assign_cpu(isrc, intr_next_cpu()); + if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0) + panic("Couldn't find an APIC vector for IRQ %d", + intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); } @@ -496,7 +502,7 @@ io->io_pic = ioapic_template; mtx_lock_spin(&icu_lock); io->io_id = next_id++; - io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; + io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); --- //depot/projects/smpng/sys/i386/i386/local_apic.c 2009/06/25 15:02:49 +++ //depot/user/jhb/msi/i386/i386/local_apic.c 2009/06/26 16:06:16 @@ -139,7 +139,7 @@ }; -static u_int32_t lapic_timer_divisors[] = { +static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; @@ -799,7 +799,7 @@ return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); - panic("Couldn't find an APIC vector for IRQ %u", irq); + return (0); } /* @@ -1062,7 +1062,7 @@ static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; - + void apic_register_enumerator(struct apic_enumerator *enumerator) { --- //depot/projects/smpng/sys/i386/i386/msi.c 2009/06/25 20:49:23 +++ //depot/user/jhb/msi/i386/i386/msi.c 2009/06/26 21:12:31 @@ -113,6 +113,8 @@ u_int msi_vector:8; /* IDT vector. */ u_int msi_cpu:8; /* Local APIC ID. (g) */ u_int msi_count:8; /* Messages in this group. (g) */ + u_int msi_maxcount:8; /* Alignment for this group. (g) */ + int *msi_irqs; /* Group's IRQ list. (g) */ }; static void msi_create_source(void); @@ -125,7 +127,7 @@ static int msi_source_pending(struct intsrc *isrc); static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id); struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, msi_enable_intr, msi_disable_intr, msi_vector, @@ -195,32 +197,52 @@ return (ENODEV); } -static void +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id) { - struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc; int old_vector; u_int old_id; - int vector; + int i, vector; + + /* + * Only allow CPUs to be assigned to the first message for an + * MSI group. + */ + if (msi->msi_first != msi) + return (EINVAL); /* Store information to free existing irq. */ old_vector = msi->msi_vector; old_id = msi->msi_cpu; if (old_id == apic_id) - return; - if (!msi->msi_msix && msi->msi_first->msi_count > 1) - return; + return (0); - /* Allocate IDT vector on this cpu. */ - vector = apic_alloc_vector(apic_id, msi->msi_irq); + /* Allocate IDT vectors on this cpu. */ + if (msi->msi_count > 1) { + KASSERT(msi->msi_msix == 0, ("MSI-X message group")); + vector = apic_alloc_vectors(apic_id, msi->msi_irqs, + msi->msi_count, msi->msi_maxcount); + } else + vector = apic_alloc_vector(apic_id, msi->msi_irq); if (vector == 0) - return; /* XXX alloc_vector panics on failure. */ + return (ENOSPC); + msi->msi_cpu = apic_id; msi->msi_vector = vector; if (bootverbose) printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n", msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq, msi->msi_cpu, msi->msi_vector); + for (i = 1; i < msi->msi_count; i++) { + sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]); + sib->msi_cpu = apic_id; + sib->msi_vector = vector + i; + if (bootverbose) + printf( + "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n", + sib->msi_irq, sib->msi_cpu, sib->msi_vector); + } pci_remap_msi_irq(msi->msi_dev, msi->msi_irq); /* @@ -228,6 +250,9 @@ * to prevent races where we could miss an interrupt. */ apic_free_vector(old_id, old_vector, msi->msi_irq); + for (i = 1; i < msi->msi_count; i++) + apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]); + return (0); } void @@ -268,7 +293,7 @@ msi_last_irq++; mtx_unlock(&msi_lock); - msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); + msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); msi->msi_intsrc.is_pic = &msi_pic; msi->msi_irq = irq; intr_register_source(&msi->msi_intsrc); @@ -276,21 +301,22 @@ } /* - * Try to allocate 'count' interrupt sources with contiguous IDT values. If - * we allocate any new sources, then their IRQ values will be at the end of - * the irqs[] array, with *newirq being the index of the first new IRQ value - * and *newcount being the number of new IRQ values added. + * Try to allocate 'count' interrupt sources with contiguous IDT values. */ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; u_int cpu; - int cnt, i, vector; + int cnt, i, *mirqs, vector; if (!msi_enabled) return (ENXIO); + if (count > 1) + mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK); + else + mirqs = NULL; again: mtx_lock(&msi_lock); @@ -317,6 +343,7 @@ /* If we would exceed the max, give up. */ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENXIO); } mtx_unlock(&msi_lock); @@ -337,6 +364,7 @@ vector = apic_alloc_vectors(cpu, irqs, count, maxcount); if (vector == 0) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENOSPC); } @@ -356,6 +384,10 @@ ("dead MSI has handlers")); } fsrc->msi_count = count; + fsrc->msi_maxcount = maxcount; + if (count > 1) + bcopy(irqs, mirqs, count * sizeof(*mirqs)); + fsrc->msi_irqs = mirqs; mtx_unlock(&msi_lock); return (0); @@ -413,6 +445,9 @@ apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq); first->msi_vector = 0; first->msi_count = 0; + first->msi_maxcount = 0; + free(first->msi_irqs, M_MSI); + first->msi_irqs = NULL; mtx_unlock(&msi_lock); return (0); @@ -498,15 +533,23 @@ /* Allocate an IDT vector. */ cpu = intr_next_cpu(); vector = apic_alloc_vector(cpu, i); + if (vector == 0) { + mtx_unlock(&msi_lock); + return (ENOSPC); + } if (bootverbose) printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n", msi->msi_irq, cpu, vector); - + /* Setup source. */ msi->msi_cpu = cpu; msi->msi_dev = dev; + msi->msi_first = msi; msi->msi_vector = vector; msi->msi_msix = 1; + msi->msi_count = 1; + msi->msi_maxcount = 1; + msi->msi_irqs = NULL; KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers")); mtx_unlock(&msi_lock); @@ -536,10 +579,13 @@ KASSERT(msi->msi_dev != NULL, ("unowned message")); /* Clear out the message. */ + msi->msi_first = NULL; msi->msi_dev = NULL; apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq); msi->msi_vector = 0; msi->msi_msix = 0; + msi->msi_count = 0; + msi->msi_maxcount = 0; mtx_unlock(&msi_lock); return (0); --- //depot/projects/smpng/sys/i386/include/intr_machdep.h 2009/06/25 21:20:32 +++ //depot/user/jhb/msi/i386/include/intr_machdep.h 2009/06/25 21:40:15 @@ -93,7 +93,7 @@ void (*pic_resume)(struct pic *); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); - void (*pic_assign_cpu)(struct intsrc *, u_int apic_id); + int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); STAILQ_ENTRY(pic) pics; }; --- //depot/projects/smpng/sys/i386/isa/atpic.c 2008/03/24 19:59:34 +++ //depot/user/jhb/msi/i386/isa/atpic.c 2008/03/24 21:46:53 @@ -161,7 +161,7 @@ static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { @@ -389,7 +389,7 @@ #endif /* PC98 */ } -static void +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { --- //depot/projects/smpng/sys/kern/kern_intr.c 2009/06/25 20:49:23 +++ //depot/user/jhb/msi/kern/kern_intr.c 2009/06/26 17:05:58 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -304,9 +305,14 @@ if (ie->ie_assign_cpu == NULL) return (EOPNOTSUPP); + + error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR); + if (error) + return (error); + /* - * If we have any ithreads try to set their mask first since this - * can fail. + * If we have any ithreads try to set their mask first to verify + * permissions, etc. */ mtx_lock(&ie->ie_lock); if (ie->ie_thread != NULL) { @@ -323,8 +329,22 @@ } else mtx_unlock(&ie->ie_lock); error = ie->ie_assign_cpu(ie->ie_source, cpu); - if (error) + if (error) { + mtx_lock(&ie->ie_lock); + if (ie->ie_thread != NULL) { + CPU_ZERO(&mask); + if (ie->ie_cpu == NOCPU) + CPU_COPY(cpuset_root, &mask); + else + CPU_SET(cpu, &mask); + id = ie->ie_thread->it_thread->td_tid; + mtx_unlock(&ie->ie_lock); + (void)cpuset_setthread(id, &mask); + } else + mtx_unlock(&ie->ie_lock); return (error); + } + mtx_lock(&ie->ie_lock); ie->ie_cpu = cpu; mtx_unlock(&ie->ie_lock); --- //depot/projects/smpng/sys/sys/priv.h 2009/06/25 15:02:49 +++ //depot/user/jhb/msi/sys/priv.h 2009/06/26 17:05:58 @@ -185,6 +185,7 @@ #define PRIV_SCHED_SET 204 /* Can set thread scheduler. */ #define PRIV_SCHED_SETPARAM 205 /* Can set thread scheduler params. */ #define PRIV_SCHED_CPUSET 206 /* Can manipulate cpusets. */ +#define PRIV_SCHED_CPUSET_INTR 207 /* Can adjust IRQ to CPU binding. */ /* * POSIX semaphore privileges.