--- //depot/projects/smpng/sys/dev/e1000/if_igb.c 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/dev/e1000/if_igb.c 2010-07-08 19:19:45.000000000 0000 @@ -46,6 +46,9 @@ #endif #include #include +#if __FreeBSD_version >= 900000 +#include +#endif #include #include #include @@ -155,13 +158,15 @@ static int igb_shutdown(device_t); static int igb_suspend(device_t); static int igb_resume(device_t); -static void igb_start(struct ifnet *); -static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #if __FreeBSD_version >= 800000 static int igb_mq_start(struct ifnet *, struct mbuf *); static int igb_mq_start_locked(struct ifnet *, struct tx_ring *, struct mbuf *); static void igb_qflush(struct ifnet *); +static void igb_deferred_mq_start(void *, int); +#else +static void igb_start(struct ifnet *); +static void igb_start_locked(struct tx_ring *, struct ifnet *ifp); #endif static int igb_ioctl(struct ifnet *, u_long, caddr_t); static void igb_init(void *); @@ -238,8 +243,13 @@ static int igb_irq_fast(void *); static void igb_add_rx_process_limit(struct adapter *, const char *, const char *, int *, int); +#if __FreeBSD_version < 900000 static void igb_handle_que(void *context, int pending); static void igb_handle_link(void *context, int pending); +#else +static void igb_handle_que(void *); +static void igb_handle_link(void *); +#endif /* These are MSIX only irq handlers */ static void igb_msix_que(void *); @@ -623,6 +633,8 @@ return (EBUSY); } + ether_ifdetach(adapter->ifp); + if (adapter->led_dev != NULL) led_destroy(adapter->led_dev); @@ -654,8 +666,6 @@ if (adapter->vlan_detach != NULL) EVENTHANDLER_DEREGISTER(vlan_unconfig, adapter->vlan_detach); - ether_ifdetach(adapter->ifp); - callout_drain(&adapter->timer); igb_free_pci_resources(adapter); @@ -713,14 +723,27 @@ { struct adapter *adapter = device_get_softc(dev); struct ifnet *ifp = adapter->ifp; +#if __FreeBSD_version >= 800000 + struct tx_ring *txr = adapter->tx_rings; +#endif IGB_CORE_LOCK(adapter); igb_init_locked(adapter); igb_init_manageability(adapter); if ((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) + (ifp->if_drv_flags & IFF_DRV_RUNNING)) { +#if __FreeBSD_version < 800000 igb_start(ifp); +#else + for (int i = 0; i < adapter->num_queues; i++, txr++) { + IGB_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr, NULL); + IGB_TX_UNLOCK(txr); + } +#endif + } IGB_CORE_UNLOCK(adapter); @@ -728,6 +751,7 @@ } +#if __FreeBSD_version < 800000 /********************************************************************* * Transmit entry point * @@ -804,7 +828,7 @@ return; } -#if __FreeBSD_version >= 800000 +#else /* __FreeBSD_version >= 800000 */ /* ** Multiqueue Transmit driver ** @@ -829,7 +853,7 @@ IGB_TX_UNLOCK(txr); } else { err = drbr_enqueue(ifp, txr->br, m); - taskqueue_enqueue(que->tq, &que->que_task); + taskqueue_enqueue(que->tq, &txr->txq_task); } return (err); @@ -892,6 +916,22 @@ } /* + * Called from a taskqueue to drain queued transmit packets. + */ +static void +igb_deferred_mq_start(void *arg, int pending) +{ + struct tx_ring *txr = arg; + struct adapter *adapter = txr->adapter; + struct ifnet *ifp = adapter->ifp; + + IGB_TX_LOCK(txr); + if (!drbr_empty(ifp, txr->br)) + igb_mq_start_locked(ifp, txr, NULL); + IGB_TX_UNLOCK(txr); +} + +/* ** Flush all ring buffers */ static void @@ -909,7 +949,7 @@ } if_qflush(ifp); } -#endif /* __FreeBSD_version >= 800000 */ +#endif /* __FreeBSD_version < 800000 */ /********************************************************************* * Ioctl entry point @@ -1221,9 +1261,13 @@ IGB_CORE_UNLOCK(adapter); } - +#if __FreeBSD_version < 900000 static void igb_handle_que(void *context, int pending) +#else +static void +igb_handle_que(void *context) +#endif { struct igb_queue *que = context; struct adapter *adapter = que->adapter; @@ -1247,7 +1291,11 @@ #endif IGB_TX_UNLOCK(txr); if (more) { +#if __FreeBSD_version < 900000 taskqueue_enqueue(que->tq, &que->que_task); +#else + hwi_sched(que->tag); +#endif return; } } @@ -1264,8 +1312,13 @@ } /* Deal with link in a sleepable context */ +#if __FreeBSD_version < 900000 static void igb_handle_link(void *context, int pending) +#else +static void +igb_handle_link(void *context) +#endif { struct adapter *adapter = context; @@ -1283,7 +1336,9 @@ igb_irq_fast(void *arg) { struct adapter *adapter = arg; +#if __FreeBSD_version < 900000 struct igb_queue *que = adapter->queues; +#endif u32 reg_icr; @@ -1306,15 +1361,25 @@ * MSI message reordering errata on certain systems. */ igb_disable_intr(adapter); +#if __FreeBSD_version < 900000 taskqueue_enqueue(que->tq, &que->que_task); +#endif /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) +#if __FreeBSD_version < 900000 taskqueue_enqueue(que->tq, &adapter->link_task); +#else + hwi_sched(adapter->link_tag); +#endif if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; +#if __FreeBSD_version < 900000 return FILTER_HANDLED; +#else + return FILTER_HANDLED | FILTER_SCHEDULE_THREAD; +#endif } #ifdef DEVICE_POLLING @@ -1350,7 +1415,11 @@ reg_icr = E1000_READ_REG(&adapter->hw, E1000_ICR); /* Link status change */ if (reg_icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) +#if __FreeBSD_version < 900000 igb_handle_link(adapter, 0); +#else + igb_handle_link(adapter); +#endif if (reg_icr & E1000_ICR_RXO) adapter->rx_overruns++; @@ -1452,8 +1521,12 @@ no_calc: /* Schedule a clean task if needed*/ - if (more_tx || more_rx) + if (more_tx || more_rx) +#if __FreeBSD_version < 900000 taskqueue_enqueue(que->tq, &que->que_task); +#else + hwi_sched(que->tag); +#endif else /* Reenable this interrupt */ E1000_WRITE_REG(&adapter->hw, E1000_EIMS, que->eims); @@ -1477,7 +1550,11 @@ icr = E1000_READ_REG(&adapter->hw, E1000_ICR); if (!(icr & E1000_ICR_LSC)) goto spurious; +#if __FreeBSD_version < 900000 igb_handle_link(adapter, 0); +#else + igb_handle_link(adapter); +#endif spurious: /* Rearm */ @@ -2087,6 +2164,7 @@ { device_t dev = adapter->dev; struct igb_queue *que = adapter->queues; + struct tx_ring *txr = adapter->tx_rings; int error, rid = 0; /* Turn off all interrupts */ @@ -2105,6 +2183,9 @@ return (ENXIO); } + TASK_INIT(&txr->txq_task, 0, igb_deferred_mq_start, txr); + +#if __FreeBSD_version < 900000 /* * Try allocating a fast interrupt and the associated deferred * processing contexts. @@ -2125,6 +2206,28 @@ que->tq = NULL; return (error); } +#else + /* Create a taskqueue for deferred transmit queue starts. */ + que->tq = taskqueue_create("igb_taskq", M_NOWAIT, + taskqueue_thread_enqueue, &que->tq); + taskqueue_start_threads(&que->tq, 1, PI_NET, "%s taskq", + device_get_nameunit(adapter->dev)); + + error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE, + igb_irq_fast, igb_handle_que, adapter, &adapter->tag); + if (error) { + device_printf(dev, "Failed to register que interrupt " + "handler: %d\n", error); + return (error); + } + error = bus_setup_intr(dev, adapter->res, INTR_TYPE_NET | INTR_MPSAFE | + INTR_MANUAL, NULL, igb_handle_link, adapter, &adapter->link_tag); + if (error) { + device_printf(dev, "Failed to register link interrupt " + "handler: %d\n", error); + return (error); + } +#endif return (0); } @@ -2175,9 +2278,13 @@ */ if (adapter->num_queues > 1) bus_bind_intr(dev, que->res, i); + TASK_INIT(&que->txr->txq_task, 0, igb_deferred_mq_start, + que->txr); +#if __FreeBSD_version < 900000 /* Make tasklet for deferred handling */ TASK_INIT(&que->que_task, 0, igb_handle_que, que); - que->tq = taskqueue_create_fast("igb_que", M_NOWAIT, +#endif + que->tq = taskqueue_create("igb_que", M_NOWAIT, taskqueue_thread_enqueue, &que->tq); taskqueue_start_threads(&que->tq, 1, PI_NET, "%s que", device_get_nameunit(adapter->dev)); @@ -2382,13 +2489,34 @@ else (adapter->msix != 0) ? (rid = 1):(rid = 0); + que = adapter->queues; if (adapter->tag != NULL) { +#if __FreeBSD_version < 900000 + taskqueue_drain(que->tq, &adapter->link_task); +#endif bus_teardown_intr(dev, adapter->res, adapter->tag); adapter->tag = NULL; } +#if __FreeBSD_version >= 900000 + if (adapter->link_tag != NULL) { + bus_teardown_intr(dev, adapter->res, adapter->link_tag); + adapter->link_tag = NULL; + } +#endif if (adapter->res != NULL) bus_release_resource(dev, SYS_RES_IRQ, rid, adapter->res); + for (int i = 0; i < adapter->num_queues; i++, que++) { + if (que->tq != NULL) { +#if __FreeBSD_version >= 800000 + taskqueue_drain(que->tq, &que->txr->txq_task); +#endif +#if __FreeBSD_version < 900000 + taskqueue_drain(que->tq, &que->que_task); +#endif + taskqueue_free(que->tq); + } + } mem: if (adapter->msix) pci_release_msi(dev); @@ -2637,10 +2765,11 @@ ifp->if_softc = adapter; ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; ifp->if_ioctl = igb_ioctl; - ifp->if_start = igb_start; #if __FreeBSD_version >= 800000 ifp->if_transmit = igb_mq_start; ifp->if_qflush = igb_qflush; +#else + ifp->if_start = igb_start; #endif IFQ_SET_MAXLEN(&ifp->if_snd, adapter->num_tx_desc - 1); ifp->if_snd.ifq_drv_maxlen = adapter->num_tx_desc - 1; --- //depot/projects/smpng/sys/dev/e1000/if_igb.h 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/dev/e1000/if_igb.h 2010-07-08 19:19:45.000000000 0000 @@ -286,7 +286,9 @@ void *tag; struct tx_ring *txr; struct rx_ring *rxr; +#if __FreeBSD_version < 900000 struct task que_task; +#endif struct taskqueue *tq; u64 irqs; }; @@ -309,6 +311,7 @@ struct buf_ring *br; #endif bus_dma_tag_t txtag; + struct task txq_task; u32 bytes; u32 packets; @@ -377,7 +380,11 @@ int linkvec; int link_mask; +#if __FreeBSD_version < 900000 struct task link_task; +#else + void *link_tag; +#endif int link_irq; struct ifmedia media; --- //depot/projects/smpng/sys/kern/kern_intr.c 2010-06-10 20:54:07.000000000 0000 +++ //depot/user/jhb/intr/kern/kern_intr.c 2010-06-11 13:47:32.000000000 0000 @@ -63,62 +63,92 @@ #endif /* - * Describe an interrupt thread. There is one of these per interrupt event. + * Describe an interrupt thread. Software interrupt events have + * dedicated threads. Hardware interrupt events share a pool of + * threads. */ struct intr_thread { - struct intr_event *it_event; struct thread *it_thread; /* Kernel thread. */ - int it_flags; /* (j) IT_* flags. */ - int it_need; /* Needs service. */ + int it_flags; /* IT_* flags. */ + TAILQ_ENTRY(intr_thread) it_list; /* List of free hwi threads. */ +#ifdef INVARIANTS + struct intr_handler *it_current; /* Current handler for hwi. */ +#endif }; +TAILQ_HEAD(ithread_queue, intr_thread); /* Interrupt thread flags kept in it_flags */ #define IT_DEAD 0x000001 /* Thread is waiting to exit. */ +#define IT_SOFT 0x000002 /* Thread is for a software interrupt. */ struct intr_entropy { struct thread *td; uintptr_t event; }; +/* Return values for intr_handler_execute(). */ +enum { + FINISHED, + DYING, + REQUEUE, +}; + struct intr_event *clk_intr_event; struct intr_event *tty_intr_event; void *vm_ih; -struct proc *intrproc; +struct proc *hwintr; /* Pool of hardware interrupt threads. */ +struct proc *swintr; /* Container for software interrupt threads. */ static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads"); +SYSCTL_NODE(_kern, OID_AUTO, intr, CTLFLAG_RD, 0, "Interrupt parameters"); + +#ifdef notyet static int intr_storm_threshold = 1000; -TUNABLE_INT("hw.intr_storm_threshold", &intr_storm_threshold); -SYSCTL_INT(_hw, OID_AUTO, intr_storm_threshold, CTLFLAG_RW, +TUNABLE_INT("kern.intr.storm_threshold", &intr_storm_threshold); +SYSCTL_INT(_kern_intr, OID_AUTO, storm_threshold, CTLFLAG_RW, &intr_storm_threshold, 0, "Number of consecutive interrupts before storm protection is enabled"); -static TAILQ_HEAD(, intr_event) event_list = - TAILQ_HEAD_INITIALIZER(event_list); +#endif + +static TAILQ_HEAD(, intr_hardware) hwi_event_list = + TAILQ_HEAD_INITIALIZER(hwi_event_list); +static TAILQ_HEAD(, intr_software) swi_event_list = + TAILQ_HEAD_INITIALIZER(swi_event_list); static struct mtx event_lock; -MTX_SYSINIT(intr_event_list, &event_lock, "intr event list", MTX_DEF); +MTX_SYSINIT(intr_event_list, &event_lock, "intr event lists", MTX_DEF); + +static struct ithread_queue hwi_threads = + TAILQ_HEAD_INITIALIZER(hwi_threads); +static struct mtx hwi_thread_lock; +MTX_SYSINIT(hwi_thread_pool, &hwi_thread_lock, "hwi threads", MTX_SPIN | + MTX_RECURSE); +static int hwi_check_cpus, hwi_nhandlers, hwi_thread_count, hwi_thread_warn; +static void hwi_alloc_thread(void); +static void hwi_create_thread(void); +static int hwi_destroy_thread(void); +static void hwi_loop(void *); +static int hwi_max_threads(void); +static int hwi_min_threads(void); +static int hwi_pending_cpu(struct intr_thread *it); +static int hwi_thread_sysctl(SYSCTL_HANDLER_ARGS); +static void intr_event_init(struct intr_event *ie, int flags, + const char *fmt, va_list ap); +static void intr_event_run_lock(struct intr_event *ie); +static void intr_event_run_unlock(struct intr_event *ie); static void intr_event_update(struct intr_event *ie); -#ifdef INTR_FILTER -static int intr_event_schedule_thread(struct intr_event *ie, - struct intr_thread *ithd); -static int intr_filter_loop(struct intr_event *ie, - struct trapframe *frame, struct intr_thread **ithd); -static struct intr_thread *ithread_create(const char *name, - struct intr_handler *ih); -#else -static int intr_event_schedule_thread(struct intr_event *ie); -static struct intr_thread *ithread_create(const char *name); -#endif +static void intr_handler_ack_dying(struct intr_event *ie, + struct intr_handler *ih); +static int intr_handler_execute(struct intr_handler *ih, + struct intr_event *ie, struct thread *td); +static struct intr_hardware *intr_lookup(int irq); +static struct intr_thread *ithread_create(struct proc **pp, void *func, + void *arg, const char *name, int pri); static void ithread_destroy(struct intr_thread *ithread); -static void ithread_execute_handlers(struct proc *p, - struct intr_event *ie); -#ifdef INTR_FILTER -static void priv_ithread_execute_handler(struct proc *p, - struct intr_handler *ih); -#endif -static void ithread_loop(void *); -static void ithread_update(struct intr_thread *ithd); +static void ithread_update(struct intr_event *ie); static void start_softintr(void *); +static void swi_loop(void *); /* Map an interrupt type to an ithread priority. */ u_char @@ -163,17 +193,18 @@ } /* - * Update an ithread based on the associated intr_event. + * Update the ithread for a software interrupt based on the associated + * intr_event. */ static void -ithread_update(struct intr_thread *ithd) +ithread_update(struct intr_event *ie) { - struct intr_event *ie; + struct intr_software *isw; struct thread *td; u_char pri; - ie = ithd->it_event; - td = ithd->it_thread; + isw = (struct intr_software *)ie; + td = isw->isw_thread->it_thread; /* Determine the overall priority of this event. */ if (TAILQ_EMPTY(&ie->ie_handlers)) @@ -239,62 +270,67 @@ } /* - * If this event has an ithread, update it's priority and - * name. + * If this is a software interrupt event, update the priority + * and name of the associated thread. */ - if (ie->ie_thread != NULL) - ithread_update(ie->ie_thread); + if (ie->ie_flags & IE_SOFT) + ithread_update(ie); CTR2(KTR_INTR, "%s: updated %s", __func__, ie->ie_fullname); } -int -intr_event_create(struct intr_event **event, void *source, int flags, int irq, - void (*pre_ithread)(void *), void (*post_ithread)(void *), - void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), - const char *fmt, ...) +static void +intr_event_init(struct intr_event *ie, int flags, const char *fmt, va_list ap) { - struct intr_event *ie; - va_list ap; - /* The only valid flag during creation is IE_SOFT. */ - if ((flags & ~IE_SOFT) != 0) - return (EINVAL); - ie = malloc(sizeof(struct intr_event), M_ITHREAD, M_WAITOK | M_ZERO); - ie->ie_source = source; - ie->ie_pre_ithread = pre_ithread; - ie->ie_post_ithread = post_ithread; - ie->ie_post_filter = post_filter; - ie->ie_assign_cpu = assign_cpu; ie->ie_flags = flags; - ie->ie_irq = irq; ie->ie_cpu = NOCPU; TAILQ_INIT(&ie->ie_handlers); mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF); + vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); + strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); + CTR1(KTR_INTR, "intr_event_init: created %s", ie->ie_name); +} + +void +hwi_create(struct intr_event **event, void *source, int irq, + void (*pre_ithread)(void *), void (*post_ithread)(void *), + void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), + const char *fmt, ...) +{ + struct intr_hardware *ihw; + va_list ap; + + ihw = malloc(sizeof(struct intr_hardware), M_ITHREAD, + M_WAITOK | M_ZERO); va_start(ap, fmt); - vsnprintf(ie->ie_name, sizeof(ie->ie_name), fmt, ap); + intr_event_init(&ihw->ihw_event, 0, fmt, ap); va_end(ap); - strlcpy(ie->ie_fullname, ie->ie_name, sizeof(ie->ie_fullname)); + ihw->ihw_source = source; + ihw->ihw_pre_ithread = pre_ithread; + ihw->ihw_post_ithread = post_ithread; + ihw->ihw_post_filter = post_filter; + ihw->ihw_assign_cpu = assign_cpu; + ihw->ihw_irq = irq; + TAILQ_INIT(&ihw->ihw_manual); mtx_lock(&event_lock); - TAILQ_INSERT_TAIL(&event_list, ie, ie_list); + TAILQ_INSERT_TAIL(&hwi_event_list, ihw, ihw_list); mtx_unlock(&event_lock); if (event != NULL) - *event = ie; - CTR2(KTR_INTR, "%s: created %s", __func__, ie->ie_name); - return (0); + *event = &ihw->ihw_event; } /* * Bind an interrupt event to the specified CPU. Note that not all * platforms support binding an interrupt to a CPU. For those - * platforms this request will fail. For supported platforms, any - * associated ithreads as well as the primary interrupt context will - * be bound to the specificed CPU. Using a cpu id of NOCPU unbinds + * platforms this request will fail. Using a cpu id of NOCPU unbinds * the interrupt event. */ int intr_event_bind(struct intr_event *ie, u_char cpu) { + struct intr_hardware *ihw; + struct intr_software *isw; cpuset_t mask; lwpid_t id; int error; @@ -303,46 +339,33 @@ if (cpu != NOCPU && CPU_ABSENT(cpu)) return (EINVAL); - if (ie->ie_assign_cpu == NULL) - return (EOPNOTSUPP); - error = priv_check(curthread, PRIV_SCHED_CPUSET_INTR); if (error) return (error); - /* - * If we have any ithreads try to set their mask first to verify - * permissions, etc. - */ - mtx_lock(&ie->ie_lock); - if (ie->ie_thread != NULL) { + if (ie->ie_flags & IE_SOFT) { + /* Software interrupts cpuset the associated swi thread. */ + isw = (struct intr_software *)ie; + mtx_lock(&ie->ie_lock); CPU_ZERO(&mask); if (cpu == NOCPU) CPU_COPY(cpuset_root, &mask); else CPU_SET(cpu, &mask); - id = ie->ie_thread->it_thread->td_tid; + id = isw->isw_thread->it_thread->td_tid; mtx_unlock(&ie->ie_lock); error = cpuset_setthread(id, &mask); if (error) return (error); - } else - mtx_unlock(&ie->ie_lock); - error = ie->ie_assign_cpu(ie->ie_source, cpu); - if (error) { - mtx_lock(&ie->ie_lock); - if (ie->ie_thread != NULL) { - CPU_ZERO(&mask); - if (ie->ie_cpu == NOCPU) - CPU_COPY(cpuset_root, &mask); - else - CPU_SET(cpu, &mask); - id = ie->ie_thread->it_thread->td_tid; - mtx_unlock(&ie->ie_lock); - (void)cpuset_setthread(id, &mask); - } else - mtx_unlock(&ie->ie_lock); - return (error); + } else { + ihw = (struct intr_hardware *)ie; + + if (ihw->ihw_assign_cpu == NULL) + return (EOPNOTSUPP); + + error = ihw->ihw_assign_cpu(ihw->ihw_source, cpu); + if (error) + return (error); } mtx_lock(&ie->ie_lock); @@ -352,25 +375,24 @@ return (error); } -static struct intr_event * +static struct intr_hardware * intr_lookup(int irq) { - struct intr_event *ie; + struct intr_hardware *ihw; mtx_lock(&event_lock); - TAILQ_FOREACH(ie, &event_list, ie_list) - if (ie->ie_irq == irq && - (ie->ie_flags & IE_SOFT) == 0 && - TAILQ_FIRST(&ie->ie_handlers) != NULL) + TAILQ_FOREACH(ihw, &hwi_event_list, ihw_list) + if (ihw->ihw_irq == irq && + TAILQ_FIRST(&ihw->ihw_event.ie_handlers) != NULL) break; mtx_unlock(&event_lock); - return (ie); + return (ihw); } int intr_setaffinity(int irq, void *m) { - struct intr_event *ie; + struct intr_hardware *ihw; cpuset_t *mask; u_char cpu; int n; @@ -390,50 +412,49 @@ cpu = (u_char)n; } } - ie = intr_lookup(irq); - if (ie == NULL) + ihw = intr_lookup(irq); + if (ihw == NULL) return (ESRCH); - return (intr_event_bind(ie, cpu)); + return (intr_event_bind(&ihw->ihw_event, cpu)); } int intr_getaffinity(int irq, void *m) { - struct intr_event *ie; + struct intr_hardware *ihw; cpuset_t *mask; mask = m; - ie = intr_lookup(irq); - if (ie == NULL) + ihw = intr_lookup(irq); + if (ihw == NULL) return (ESRCH); CPU_ZERO(mask); - mtx_lock(&ie->ie_lock); - if (ie->ie_cpu == NOCPU) + mtx_lock(&ihw->ihw_event.ie_lock); + if (ihw->ihw_event.ie_cpu == NOCPU) CPU_COPY(cpuset_root, mask); else - CPU_SET(ie->ie_cpu, mask); - mtx_unlock(&ie->ie_lock); + CPU_SET(ihw->ihw_event.ie_cpu, mask); + mtx_unlock(&ihw->ihw_event.ie_lock); return (0); } int -intr_event_destroy(struct intr_event *ie) +hwi_destroy(struct intr_event *ie) { + struct intr_hardware *ihw; + if (ie->ie_flags & IE_SOFT) + return (EINVAL); + ihw = (struct intr_hardware *)ie; + mtx_lock(&event_lock); mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { + if (!TAILQ_EMPTY(&ie->ie_handlers) || !TAILQ_EMPTY(&ihw->ihw_manual)) { mtx_unlock(&ie->ie_lock); mtx_unlock(&event_lock); return (EBUSY); } - TAILQ_REMOVE(&event_list, ie, ie_list); -#ifndef notyet - if (ie->ie_thread != NULL) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; - } -#endif + TAILQ_REMOVE(&hwi_event_list, ihw, ihw_list); mtx_unlock(&ie->ie_lock); mtx_unlock(&event_lock); mtx_destroy(&ie->ie_lock); @@ -441,63 +462,41 @@ return (0); } -#ifndef INTR_FILTER +/* Create an interrupt thread. */ static struct intr_thread * -ithread_create(const char *name) +ithread_create(struct proc **pp, void *func, void *arg, const char *name, + int pri) { struct intr_thread *ithd; struct thread *td; int error; - ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO); - - error = kproc_kthread_add(ithread_loop, ithd, &intrproc, - &td, RFSTOPPED | RFHIGHPID, - 0, "intr", "%s", name); + ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | + M_ZERO); + error = kproc_kthread_add(func, arg, pp, &td, RFSTOPPED | RFHIGHPID, 0, + name, name); if (error) - panic("kproc_create() failed with %d", error); + panic("failed to create interrupt thread with %d", error); thread_lock(td); sched_class(td, PRI_ITHD); + sched_prio(td, pri); TD_SET_IWAIT(td); thread_unlock(td); td->td_pflags |= TDP_ITHREAD; + td->td_ithread = ithd; ithd->it_thread = td; - CTR2(KTR_INTR, "%s: created %s", __func__, name); + CTR2(KTR_INTR, "ithread_create: created tid %d(%s)", td->td_tid, name); return (ithd); } -#else -static struct intr_thread * -ithread_create(const char *name, struct intr_handler *ih) -{ - struct intr_thread *ithd; - struct thread *td; - int error; - - ithd = malloc(sizeof(struct intr_thread), M_ITHREAD, M_WAITOK | M_ZERO); - error = kproc_kthread_add(ithread_loop, ih, &intrproc, - &td, RFSTOPPED | RFHIGHPID, - 0, "intr", "%s", name); - if (error) - panic("kproc_create() failed with %d", error); - thread_lock(td); - sched_class(td, PRI_ITHD); - TD_SET_IWAIT(td); - thread_unlock(td); - td->td_pflags |= TDP_ITHREAD; - ithd->it_thread = td; - CTR2(KTR_INTR, "%s: created %s", __func__, name); - return (ithd); -} -#endif - static void ithread_destroy(struct intr_thread *ithread) { struct thread *td; - CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name); td = ithread->it_thread; + CTR2(KTR_INTR, "ithread_destroy: killing tid %d(%s)", td->td_tid, + td->td_name); thread_lock(td); ithread->it_flags |= IT_DEAD; if (TD_AWAITING_INTR(td)) { @@ -507,106 +506,236 @@ thread_unlock(td); } -#ifndef INTR_FILTER -int -intr_event_add_handler(struct intr_event *ie, const char *name, - driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, - enum intr_type flags, void **cookiep) +/* + * Look for a CPU that has queued interrupts but no active interrupt + * thread to donate the specified thread to. Returns ID of a CPU that + * has queued handlers or NOCPU if no such CPU was found. If no CPU + * was found, hwi_check_cpus is reset. + * + * This is the recovery mechanism used for the case where an interrupt + * handler was scheduled on a CPU's active queue but an interrupt + * thread was not available. + */ +static int +hwi_pending_cpu(struct intr_thread *it) +{ + struct pcpu *pc; + + mtx_assert(&hwi_thread_lock, MA_OWNED); + SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { + if (!STAILQ_EMPTY(&pc->pc_hwi_active) && + pc->pc_hwi_thread == NULL) { + CTR3(KTR_INTR, + "hwi_pending_cpu: tid %d (%s) assigned to CPU %d", + it->it_thread->td_tid, it->it_thread->td_name, + pc->pc_cpuid); + pc->pc_hwi_thread = it->it_thread; + return (pc->pc_cpuid); + } + } + hwi_check_cpus = 0; + return (NOCPU); +} + +/* Create a hardware interrupt thread. */ +static void +hwi_create_thread(void) { - struct intr_handler *ih, *temp_ih; struct intr_thread *it; + struct thread *td; + int cpuid; - if (ie == NULL || name == NULL || (handler == NULL && filter == NULL)) - return (EINVAL); + mtx_assert(&hwi_thread_lock, MA_OWNED); + hwi_thread_count++; + mtx_unlock_spin(&hwi_thread_lock); + it = ithread_create(&hwintr, hwi_loop, NULL, "intr", PRI_MIN_ITHD); + td = it->it_thread; + thread_lock(td); + mtx_lock_spin(&hwi_thread_lock); + hwi_thread_warn = 0; + thread_lock_set(td, &hwi_thread_lock); - /* Allocate and populate an interrupt handler structure. */ - ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO); - ih->ih_filter = filter; - ih->ih_handler = handler; - ih->ih_argument = arg; - strlcpy(ih->ih_name, name, sizeof(ih->ih_name)); - ih->ih_event = ie; - ih->ih_pri = pri; - if (flags & INTR_EXCL) - ih->ih_flags = IH_EXCLUSIVE; - if (flags & INTR_MPSAFE) - ih->ih_flags |= IH_MPSAFE; - if (flags & INTR_ENTROPY) - ih->ih_flags |= IH_ENTROPY; - - /* We can only have one exclusive handler in a event. */ - mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { - if ((flags & INTR_EXCL) || - (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) { - mtx_unlock(&ie->ie_lock); - free(ih, M_ITHREAD); - return (EINVAL); + /* Schedule this thread immediately if a CPU needs a thread. */ + if (hwi_check_cpus) { + cpuid = hwi_pending_cpu(it); + if (cpuid != NOCPU) { + CTR3(KTR_INTR, + "hwi_create_thread: schedule tid %d (%s) for CPU %d", + td->td_tid, td->td_name, cpuid); + sched_bind_ithd(td, cpuid); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + thread_unlock(td); + mtx_lock_spin(&hwi_thread_lock); + return; } } - /* Add the new handler to the event in priority order. */ - TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { - if (temp_ih->ih_pri > ih->ih_pri) + TAILQ_INSERT_TAIL(&hwi_threads, it, it_list); +} + +/* + * Destroy a hardware interrupt thread if a free one is available. If + * all threads are busy, this will return false instead. + */ +static int +hwi_destroy_thread(void) +{ + struct intr_thread *it; + + mtx_assert(&hwi_thread_lock, MA_OWNED); + KASSERT(hwi_thread_count > 0, ("no hwi threads to destroy")); + if (TAILQ_EMPTY(&hwi_threads)) + return (0); + hwi_thread_count--; + it = TAILQ_LAST(&hwi_threads, ithread_queue); + TAILQ_REMOVE(&hwi_threads, it, it_list); + mtx_unlock_spin(&hwi_thread_lock); + ithread_destroy(it); + mtx_lock_spin(&hwi_thread_lock); + return (1); +} + +/* Minimum number of hardware interrupt threads. */ +static __inline int +hwi_min_threads(void) +{ + + /* + * XXX: Capping the minimum at 2 threads per CPU is completely + * arbitrary. + */ + if (hwi_nhandlers < mp_ncpus * 2) + return (hwi_nhandlers); + else + return (mp_ncpus * 2); +} + +/* Maximum number of hardware interrupt threads. */ +static __inline int +hwi_max_threads(void) +{ + + /* No reason to have more threads than handlers. */ + return (hwi_nhandlers); +} + +static int +hwi_thread_sysctl(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = hwi_thread_count; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + + if (value < hwi_min_threads() || value > hwi_max_threads()) + return (EINVAL); + + mtx_lock_spin(&hwi_thread_lock); + while (hwi_thread_count < value && + hwi_thread_count < hwi_max_threads()) + hwi_create_thread(); + while (hwi_thread_count > value && + hwi_thread_count > hwi_min_threads()) + if (!hwi_destroy_thread()) break; + mtx_unlock_spin(&hwi_thread_lock); + return (0); +} +SYSCTL_PROC(_kern_intr, OID_AUTO, thread_count, CTLFLAG_RW | CTLTYPE_INT, + NULL, 0, hwi_thread_sysctl, "I", "Number of hardware interrupt threads"); + +/* + * Acquire the interrupt event "run" lock in a non-interrupt context. + */ +static __inline void +intr_event_run_lock(struct intr_event *ie) +{ + + /* + * We disable interrupts while we hold the interrupt event run + * lock to avoid a priority inversion deadlock if this + * interrupt fires. + */ + spinlock_enter(); + while (!atomic_cmpset_acq_int(&ie->ie_running, 0, 1)) { + spinlock_exit(); + + while (ie->ie_running) + cpu_spinwait(); + + spinlock_enter(); } - if (temp_ih == NULL) - TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); - else - TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); - intr_event_update(ie); +} - /* Create a thread if we need one. */ - while (ie->ie_thread == NULL && handler != NULL) { - if (ie->ie_flags & IE_ADDING_THREAD) - msleep(ie, &ie->ie_lock, 0, "ithread", 0); - else { - ie->ie_flags |= IE_ADDING_THREAD; - mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn"); - mtx_lock(&ie->ie_lock); - ie->ie_flags &= ~IE_ADDING_THREAD; - ie->ie_thread = it; - it->it_event = ie; - ithread_update(it); - wakeup(ie); - } - } - CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, - ie->ie_name); - mtx_unlock(&ie->ie_lock); +/* + * Release the interrupt event "run" lock in a non-interrupt context. + */ +static __inline void +intr_event_run_unlock(struct intr_event *ie) +{ - if (cookiep != NULL) - *cookiep = ih; - return (0); + atomic_store_rel_int(&ie->ie_running, 0); + spinlock_exit(); } -#else + int intr_event_add_handler(struct intr_event *ie, const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, enum intr_type flags, void **cookiep) { struct intr_handler *ih, *temp_ih; - struct intr_thread *it; + struct intr_hardware *ihw; - if (ie == NULL || name == NULL || (handler == NULL && filter == NULL)) + if (ie == NULL || name == NULL || + (handler == NULL && filter == NULL) || + ((flags & INTR_MANUAL) && filter != NULL) || + ((flags & INTR_MANUAL) && (ie->ie_flags & IE_SOFT)) || + ((ie->ie_flags & IE_SOFT) && filter != NULL)) return (EINVAL); /* Allocate and populate an interrupt handler structure. */ - ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | M_ZERO); + ih = malloc(sizeof(struct intr_handler), M_ITHREAD, M_WAITOK | + M_ZERO); ih->ih_filter = filter; ih->ih_handler = handler; ih->ih_argument = arg; strlcpy(ih->ih_name, name, sizeof(ih->ih_name)); ih->ih_event = ie; ih->ih_pri = pri; + if (flags & INTR_MANUAL) + ih->ih_flags |= IH_MANUAL; if (flags & INTR_EXCL) - ih->ih_flags = IH_EXCLUSIVE; + ih->ih_flags |= IH_EXCLUSIVE; if (flags & INTR_MPSAFE) ih->ih_flags |= IH_MPSAFE; if (flags & INTR_ENTROPY) ih->ih_flags |= IH_ENTROPY; + /* + * Manually scheduled interrupt handlers are not part of the + * normal list of handlers. However, they require that at + * least one non-manual handler is already active for this + * event. In general they should be handled by an existing + * filter or handler. + * + * XXX: They should perhaps just be swi handlers instead. + */ + if (flags & INTR_MANUAL) { + mtx_lock(&ie->ie_lock); + if (TAILQ_EMPTY(&ie->ie_handlers)) { + mtx_unlock(&ie->ie_lock); + free(ih, M_ITHREAD); + return (EINVAL); + } + ihw = (struct intr_hardware *)ie; + TAILQ_INSERT_TAIL(&ihw->ihw_manual, ih, ih_next); + mtx_unlock(&ie->ie_lock); + goto finish; + } + /* We can only have one exclusive handler in a event. */ mtx_lock(&ie->ie_lock); if (!TAILQ_EMPTY(&ie->ie_handlers)) { @@ -623,46 +752,32 @@ if (temp_ih->ih_pri > ih->ih_pri) break; } + intr_event_run_lock(ie); if (temp_ih == NULL) TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); else TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); + intr_event_run_unlock(ie); intr_event_update(ie); - /* For filtered handlers, create a private ithread to run on. */ - if (filter != NULL && handler != NULL) { - mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn", ih); - mtx_lock(&ie->ie_lock); - it->it_event = ie; - ih->ih_thread = it; - ithread_update(it); // XXX - do we really need this?!?!? - } else { /* Create the global per-event thread if we need one. */ - while (ie->ie_thread == NULL && handler != NULL) { - if (ie->ie_flags & IE_ADDING_THREAD) - msleep(ie, &ie->ie_lock, 0, "ithread", 0); - else { - ie->ie_flags |= IE_ADDING_THREAD; - mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn", ih); - mtx_lock(&ie->ie_lock); - ie->ie_flags &= ~IE_ADDING_THREAD; - ie->ie_thread = it; - it->it_event = ie; - ithread_update(it); - wakeup(ie); - } - } - } CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, ie->ie_name); mtx_unlock(&ie->ie_lock); + /* Expand the hardware interrupt thread pool if needed. */ + if (!(ie->ie_flags & IE_SOFT) && ih->ih_handler != NULL) { + mtx_lock_spin(&hwi_thread_lock); + hwi_nhandlers++; + while (hwi_thread_count < hwi_min_threads()) + hwi_create_thread(); + mtx_unlock_spin(&hwi_thread_lock); + } + +finish: if (cookiep != NULL) *cookiep = ih; return (0); } -#endif /* * Append a description preceded by a ':' to the name of the specified @@ -672,6 +787,9 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie, const char *descr) { +#ifdef INVARIANTS + struct intr_hardware *ihw; +#endif struct intr_handler *ih; size_t space; char *start; @@ -682,6 +800,13 @@ if (ih == cookie) break; } + if (ih == NULL && !(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + TAILQ_FOREACH(ih, &ihw->ihw_manual, ih_next) { + if (ih == cookie) + break; + } + } if (ih == NULL) { mtx_unlock(&ie->ie_lock); panic("handler %p not found in interrupt event %p", cookie, ie); @@ -720,12 +845,13 @@ } /* - * Return the ie_source field from the intr_event an intr_handler is - * associated with. + * Return the source cookie for a hardware interrupt that a hardware + * interrupt handler is associated with. */ void * -intr_handler_source(void *cookie) +hwi_handler_source(void *cookie) { + struct intr_hardware *ihw; struct intr_handler *ih; struct intr_event *ie; @@ -736,168 +862,37 @@ KASSERT(ie != NULL, ("interrupt handler \"%s\" has a NULL interrupt event", ih->ih_name)); - return (ie->ie_source); + KASSERT(!(ie->ie_flags & IE_SOFT), + ("intr_handler_source: swi handler")); + ihw = (struct intr_hardware *)ie; + return (ihw->ihw_source); } -#ifndef INTR_FILTER -int -intr_event_remove_handler(void *cookie) +/* + * Called from an interrupt thread loop when it encounters a dying + * interrupt handler. This marks the handler as dead and awakens the + * sleeping thread that is removing the handler. + */ +static void +intr_handler_ack_dying(struct intr_event *ie, struct intr_handler *ih) { - struct intr_handler *handler = (struct intr_handler *)cookie; - struct intr_event *ie; -#ifdef INVARIANTS - struct intr_handler *ih; -#endif -#ifdef notyet - int dead; -#endif - if (handler == NULL) - return (EINVAL); - ie = handler->ih_event; - KASSERT(ie != NULL, - ("interrupt handler \"%s\" has a NULL interrupt event", - handler->ih_name)); mtx_lock(&ie->ie_lock); - CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name, - ie->ie_name); -#ifdef INVARIANTS - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) - if (ih == handler) - goto ok; + ih->ih_state = IS_DEAD; + wakeup(ih); mtx_unlock(&ie->ie_lock); - panic("interrupt handler \"%s\" not found in interrupt event \"%s\"", - ih->ih_name, ie->ie_name); -ok: -#endif - /* - * If there is no ithread, then just remove the handler and return. - * XXX: Note that an INTR_FAST handler might be running on another - * CPU! - */ - if (ie->ie_thread == NULL) { - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); - } - - /* - * If the interrupt thread is already running, then just mark this - * handler as being dead and let the ithread do the actual removal. - * - * During a cold boot while cold is set, msleep() does not sleep, - * so we have to remove the handler here rather than letting the - * thread do it. - */ - thread_lock(ie->ie_thread->it_thread); - if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) { - handler->ih_flags |= IH_DEAD; - - /* - * Ensure that the thread will process the handler list - * again and remove this handler if it has already passed - * it on the list. - */ - ie->ie_thread->it_need = 1; - } else - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - thread_unlock(ie->ie_thread->it_thread); - while (handler->ih_flags & IH_DEAD) - msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); - intr_event_update(ie); -#ifdef notyet - /* - * XXX: This could be bad in the case of ppbus(8). Also, I think - * this could lead to races of stale data when servicing an - * interrupt. - */ - dead = 1; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (!(ih->ih_flags & IH_FAST)) { - dead = 0; - break; - } - } - if (dead) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; - } -#endif - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); } -static int -intr_event_schedule_thread(struct intr_event *ie) -{ - struct intr_entropy entropy; - struct intr_thread *it; - struct thread *td; - struct thread *ctd; - struct proc *p; - - /* - * If no ithread or no handlers, then we have a stray interrupt. - */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || - ie->ie_thread == NULL) - return (EINVAL); - - ctd = curthread; - it = ie->ie_thread; - td = it->it_thread; - p = td->td_proc; - - /* - * If any of the handlers for this ithread claim to be good - * sources of entropy, then gather some. - */ - if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { - CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__, - p->p_pid, td->td_name); - entropy.event = (uintptr_t)ie; - entropy.td = ctd; - random_harvest(&entropy, sizeof(entropy), 2, 0, - RANDOM_INTERRUPT); - } - - KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name)); - - /* - * Set it_need to tell the thread to keep running if it is already - * running. Then, lock the thread and see if we actually need to - * put it on the runqueue. - */ - it->it_need = 1; - thread_lock(td); - if (TD_AWAITING_INTR(td)) { - CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, - td->td_name); - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } else { - CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", - __func__, p->p_pid, td->td_name, it->it_need, td->td_state); - } - thread_unlock(td); - - return (0); -} -#else int intr_event_remove_handler(void *cookie) { struct intr_handler *handler = (struct intr_handler *)cookie; struct intr_event *ie; - struct intr_thread *it; #ifdef INVARIANTS + struct intr_hardware *ihw; struct intr_handler *ih; #endif -#ifdef notyet - int dead; -#endif + int state; if (handler == NULL) return (EINVAL); @@ -909,6 +904,13 @@ CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name, ie->ie_name); #ifdef INVARIANTS + if (!(ie->ie_flags & IE_SOFT)) { + ihw = (struct intr_hardware *)ie; + TAILQ_FOREACH(ih, &ihw->ihw_manual, ih_next) { + if (ih == handler) + goto ok; + } + } TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) if (ih == handler) goto ok; @@ -917,139 +919,120 @@ ih->ih_name, ie->ie_name); ok: #endif - /* - * If there are no ithreads (per event and per handler), then - * just remove the handler and return. - * XXX: Note that an INTR_FAST handler might be running on another CPU! - */ - if (ie->ie_thread == NULL && handler->ih_thread == NULL) { - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock(&ie->ie_lock); - free(handler, M_ITHREAD); - return (0); - } - /* Private or global ithread? */ - it = (handler->ih_thread) ? handler->ih_thread : ie->ie_thread; /* - * If the interrupt thread is already running, then just mark this - * handler as being dead and let the ithread do the actual removal. - * - * During a cold boot while cold is set, msleep() does not sleep, - * so we have to remove the handler here rather than letting the - * thread do it. + * Manual interrupt handlers are on a separate list in the + * interrupt event. */ - thread_lock(it->it_thread); - if (!TD_AWAITING_INTR(it->it_thread) && !cold) { - handler->ih_flags |= IH_DEAD; - + if (handler->ih_flags & IH_MANUAL) { + ihw = (struct intr_hardware *)ie; + TAILQ_REMOVE(&ihw->ihw_manual, handler, ih_next); + } else { /* - * Ensure that the thread will process the handler list - * again and remove this handler if it has already passed - * it on the list. + * First, wait for the interrupt event to go idle so + * we can remove the handler from the event's list. */ - it->it_need = 1; - } else + intr_event_run_lock(ie); TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - thread_unlock(it->it_thread); - while (handler->ih_flags & IH_DEAD) - msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); - /* - * At this point, the handler has been disconnected from the event, - * so we can kill the private ithread if any. - */ - if (handler->ih_thread) { - ithread_destroy(handler->ih_thread); - handler->ih_thread = NULL; + intr_event_run_unlock(ie); + intr_event_update(ie); } - intr_event_update(ie); -#ifdef notyet + mtx_unlock(&ie->ie_lock); + /* - * XXX: This could be bad in the case of ppbus(8). Also, I think - * this could lead to races of stale data when servicing an - * interrupt. + * Next, wait for the interrupt handler to go idle. If it is + * already idle, just mark it as dead. If it has been queued + * or is executing, attempt to mark it as dying and then wait + * for an interrupt thread to drain it from an active list. */ - dead = 1; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (handler != NULL) { - dead = 0; + for (state = handler->ih_state; state != IS_DEAD; + state = handler->ih_state) { + switch (state) { + case IS_IDLE: + /* + * If the interrupt handler is idle, try to mark + * it as dead. + */ + atomic_cmpset_int(&handler->ih_state, IS_IDLE, IS_DEAD); + break; + default: + /* + * If the interrupt handler is busy, mark it + * as dying and wait. + */ + if (atomic_cmpset_int(&handler->ih_state, state, + IS_DYING)) { + mtx_lock(&ie->ie_lock); + while (handler->ih_state != IS_DEAD) + mtx_sleep(handler, &ie->ie_lock, 0, + "iev_rmh", 0); + mtx_unlock(&ie->ie_lock); + } break; } } - if (dead) { - ithread_destroy(ie->ie_thread); - ie->ie_thread = NULL; + + /* Shrink the hardware interrupt thread pool if needed. */ + if (!(ie->ie_flags & IE_SOFT) && handler->ih_handler != NULL && + !(handler->ih_flags & IH_MANUAL)) { + mtx_lock_spin(&hwi_thread_lock); + hwi_nhandlers--; + while (hwi_thread_count > hwi_max_threads()) + if (!hwi_destroy_thread()) + break; + mtx_unlock_spin(&hwi_thread_lock); } -#endif - mtx_unlock(&ie->ie_lock); + + /* The handler is now unreferenced, so can finally free it. */ free(handler, M_ITHREAD); return (0); } -static int -intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it) +/* Create a software interrupt event and thread. */ +int +swi_create(struct intr_event **event, const char *fmt, ...) { - struct intr_entropy entropy; - struct thread *td; - struct thread *ctd; - struct proc *p; + struct intr_software *isw; + va_list ap; - /* - * If no ithread or no handlers, then we have a stray interrupt. - */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || it == NULL) - return (EINVAL); - - ctd = curthread; - td = it->it_thread; - p = td->td_proc; - - /* - * If any of the handlers for this ithread claim to be good - * sources of entropy, then gather some. - */ - if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { - CTR3(KTR_INTR, "%s: pid %d (%s) gathering entropy", __func__, - p->p_pid, td->td_name); - entropy.event = (uintptr_t)ie; - entropy.td = ctd; - random_harvest(&entropy, sizeof(entropy), 2, 0, - RANDOM_INTERRUPT); - } - - KASSERT(p != NULL, ("ithread %s has no process", ie->ie_name)); - - /* - * Set it_need to tell the thread to keep running if it is already - * running. Then, lock the thread and see if we actually need to - * put it on the runqueue. - */ - it->it_need = 1; - thread_lock(td); - if (TD_AWAITING_INTR(td)) { - CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, - td->td_name); - TD_CLR_IWAIT(td); - sched_add(td, SRQ_INTR); - } else { - CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", - __func__, p->p_pid, td->td_name, it->it_need, td->td_state); - } - thread_unlock(td); - + isw = malloc(sizeof(struct intr_software), M_ITHREAD, + M_WAITOK | M_ZERO); + va_start(ap, fmt); + intr_event_init(&isw->isw_event, IE_SOFT, fmt, ap); + va_end(ap); + isw->isw_thread = ithread_create(&swintr, swi_loop, isw, "swi", + PRI_MAX_ITHD); + isw->isw_thread->it_flags |= IT_SOFT; + STAILQ_INIT(&isw->isw_active); + mtx_lock(&event_lock); + TAILQ_INSERT_TAIL(&swi_event_list, isw, isw_list); + mtx_unlock(&event_lock); + if (event != NULL) + *event = &isw->isw_event; return (0); } -#endif -/* - * Allow interrupt event binding for software interrupt handlers -- a no-op, - * since interrupts are generated in software rather than being directed by - * a PIC. - */ -static int -swi_assign_cpu(void *arg, u_char cpu) +/* Tear down a software interrupt event and thread. */ +int +swi_destroy(struct intr_event *ie) { + struct intr_software *isw; + if (!(ie->ie_flags & IE_SOFT)) + return (EINVAL); + isw = (struct intr_software *)ie; + + mtx_lock(&event_lock); + mtx_lock(&ie->ie_lock); + if (!TAILQ_EMPTY(&ie->ie_handlers)) { + mtx_unlock(&ie->ie_lock); + mtx_unlock(&event_lock); + return (EBUSY); + } + TAILQ_REMOVE(&swi_event_list, isw, isw_list); + mtx_unlock(&ie->ie_lock); + mtx_unlock(&event_lock); + ithread_destroy(isw->isw_thread); return (0); } @@ -1063,6 +1046,7 @@ { struct thread *td; struct intr_event *ie; + struct intr_software *isw; int error; if (flags & INTR_ENTROPY) @@ -1074,8 +1058,7 @@ if (!(ie->ie_flags & IE_SOFT)) return (EINVAL); } else { - error = intr_event_create(&ie, NULL, IE_SOFT, 0, - NULL, NULL, NULL, swi_assign_cpu, "swi%d:", pri); + error = swi_create(&ie, "swi%d:", pri); if (error) return (error); if (eventp != NULL) @@ -1086,7 +1069,8 @@ if (error) return (error); if (pri == SWI_CLOCK) { - td = ie->ie_thread->it_thread; + isw = (struct intr_software *)ie; + td = isw->isw_thread->it_thread; thread_lock(td); td->td_flags |= TDF_NOLOAD; thread_unlock(td); @@ -1095,547 +1079,799 @@ } /* - * Schedule a software interrupt thread. + * Schedule a software interrupt handler. */ void swi_sched(void *cookie, int flags) { - struct intr_handler *ih = (struct intr_handler *)cookie; - struct intr_event *ie = ih->ih_event; - int error; + struct intr_software *isw; + struct intr_handler *ih; + struct intr_event *ie; + struct thread *td; + int state; + + ih = cookie; + ie = ih->ih_event; + KASSERT(ie->ie_flags & IE_SOFT, + ("swi_sched: hardware interrupt event")); + isw = (struct intr_software *)ie; + td = isw->isw_thread->it_thread; + CTR3(KTR_INTR, "swi_sched: %s %s state=%d", ie->ie_name, ih->ih_name, + ih->ih_state); - CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name, - ih->ih_need); + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* + * Try to change state to queued. If that fails, + * try the loop again. + */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; - /* - * Set ih_need for this handler so that if the ithread is already - * running it will execute this handler on the next pass. Otherwise, - * it will execute it the next time it runs. - */ - atomic_store_rel_int(&ih->ih_need, 1); + /* Queue the handler. */ + thread_lock(td); + STAILQ_INSERT_TAIL(&isw->isw_active, ih, ih_queued); + PCPU_INC(cnt.v_soft); - if (!(flags & SWI_DELAY)) { - PCPU_INC(cnt.v_soft); -#ifdef INTR_FILTER - error = intr_event_schedule_thread(ie, ie->ie_thread); -#else - error = intr_event_schedule_thread(ie); -#endif - KASSERT(error == 0, ("stray software interrupt")); + /* Schedule the thread if needed. */ + if (!(flags & SWI_DELAY)) { + if (TD_AWAITING_INTR(td)) { + CTR2(KTR_INTR, + "swi_sched: schedule tid %d (%s)", + td->td_tid, td->td_name); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); + } else { + CTR3(KTR_INTR, + "swi_sched: tid %d (%s): state %d", + td->td_tid, td->td_name, + td->td_state); + } + } + thread_unlock(td); + return; + case IS_QUEUED: + case IS_REQUEUE: + /* + * Do an atomic op to ensure it is in one of the + * queued states. If so, nothing else to do. + */ + if (atomic_cmpset_int(&ih->ih_state, state, + state)) { + PCPU_INC(cnt.v_soft); + return; + } + break; + case IS_RUNNING: + /* + * Try to change the state to requeue so that + * the interrupt thread will requeue the + * handler when it is finished executing. + */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) + return; + break; + case IS_DEAD: + case IS_DYING: + /* + * If this happens, it is probably a bug in + * the calling code, but just ignore it. + */ + return; + } } } /* * Remove a software interrupt handler. Currently this code does not - * remove the associated interrupt event if it becomes empty. Calling code - * may do so manually via intr_event_destroy(), but that's not really - * an optimal interface. + * remove the associated interrupt event if it becomes empty. */ int swi_remove(void *cookie) { +#ifdef INVARIANTS + struct intr_handler *ih; + ih = cookie; + KASSERT(ih->ih_event->ie_flags & IE_SOFT, + ("swi_remove: hardware interrupt event")); +#endif return (intr_event_remove_handler(cookie)); } -#ifdef INTR_FILTER -static void -priv_ithread_execute_handler(struct proc *p, struct intr_handler *ih) +/* + * Executes a threaded interrupt handler. Returns true if the handler + * has been marked as dying. + */ +static __inline int +intr_handler_execute(struct intr_handler *ih, struct intr_event *ie, + struct thread *td) { - struct intr_event *ie; + int state; - ie = ih->ih_event; - /* - * If this handler is marked for death, remove it from - * the list of handlers and wake up the sleeper. - */ - if (ih->ih_flags & IH_DEAD) { - mtx_lock(&ie->ie_lock); - TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next); - ih->ih_flags &= ~IH_DEAD; - wakeup(ih); - mtx_unlock(&ie->ie_lock); - return; + /* Transition state from queued to running. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_DYING: + return (DYING); + case IS_QUEUED: + /* Mark the handler as running. */ + if (atomic_cmpset_int(&ih->ih_state, IS_QUEUED, + IS_RUNNING)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_QUEUED -> IS_RUNNING", + ih->ih_name); + goto run; + } + break; +#ifdef INVARIANTS + default: + panic("bad pre-exec intr handler state %d", state); +#endif + } } - - /* Execute this handler. */ - CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x", - __func__, p->p_pid, (void *)ih->ih_handler, ih->ih_argument, - ih->ih_name, ih->ih_flags); - + +run: + CTR5(KTR_INTR, "intr_exec: tid %d exec %p(%p) for %s flg=%x", + td->td_tid, (void *)ih->ih_handler, ih->ih_argument, ih->ih_name, + ih->ih_flags); if (!(ih->ih_flags & IH_MPSAFE)) mtx_lock(&Giant); ih->ih_handler(ih->ih_argument); if (!(ih->ih_flags & IH_MPSAFE)) mtx_unlock(&Giant); -} + + /* Transition state from running back to idle. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_DYING: + return (1); + case IS_REQUEUE: + /* + * Try to set the state to queued. If that + * succeeds, requeue the handler. The return + * value tells the interrupt thread to requeue + * the handler. For hardware interrupts, + * there is no need to schedule a thread as + * this thread will reclaim the current CPU if + * there is not another associated thread + * already. + */ + if (atomic_cmpset_int(&ih->ih_state, IS_REQUEUE, + IS_QUEUED)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_REQUEUE -> IS_QUEUED", + ih->ih_name); + return (REQUEUE); + } + break; + case IS_RUNNING: + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_IDLE)) { + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_RUNNING -> IS_IDLE", + ih->ih_name); + return (FINISHED); + } + break; +#ifdef INVARIANTS + default: + panic("bad post-exec intr handler state %d", state); #endif - -/* - * This is a public function for use by drivers that mux interrupt - * handlers for child devices from their interrupt handler. - */ -void -intr_event_execute_handlers(struct proc *p, struct intr_event *ie) -{ - struct intr_handler *ih, *ihn; - - TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) { - /* - * If this handler is marked for death, remove it from - * the list of handlers and wake up the sleeper. - */ - if (ih->ih_flags & IH_DEAD) { - mtx_lock(&ie->ie_lock); - TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next); - ih->ih_flags &= ~IH_DEAD; - wakeup(ih); - mtx_unlock(&ie->ie_lock); - continue; } - - /* Skip filter only handlers */ - if (ih->ih_handler == NULL) - continue; - - /* - * For software interrupt threads, we only execute - * handlers that have their need flag set. Hardware - * interrupt threads always invoke all of their handlers. - */ - if (ie->ie_flags & IE_SOFT) { - if (!ih->ih_need) - continue; - else - atomic_store_rel_int(&ih->ih_need, 0); - } - - /* Execute this handler. */ - CTR6(KTR_INTR, "%s: pid %d exec %p(%p) for %s flg=%x", - __func__, p->p_pid, (void *)ih->ih_handler, - ih->ih_argument, ih->ih_name, ih->ih_flags); - - if (!(ih->ih_flags & IH_MPSAFE)) - mtx_lock(&Giant); - ih->ih_handler(ih->ih_argument); - if (!(ih->ih_flags & IH_MPSAFE)) - mtx_unlock(&Giant); } } +/* + * Main loop for software interrupt threads. Each software interrupt + * thread is bound to a specific software interrupt event and only + * executes handlers for that event. + */ static void -ithread_execute_handlers(struct proc *p, struct intr_event *ie) +swi_loop(void *arg) { + struct intr_software *isw; + struct intr_handler *ih; + struct intr_thread *it; + struct thread *td; + int state; - /* Interrupt handlers should not sleep. */ - if (!(ie->ie_flags & IE_SOFT)) - THREAD_NO_SLEEPING(); - intr_event_execute_handlers(p, ie); - if (!(ie->ie_flags & IE_SOFT)) - THREAD_SLEEPING_OK(); + td = curthread; + isw = arg; + it = td->td_ithread; + KASSERT(it->it_thread == td, ("swi_loop: ithread linkage out of sync")); + KASSERT(it == isw->isw_thread, + ("swi_loop: intr_sofware linkage out of sync")); /* - * Interrupt storm handling: - * - * If this interrupt source is currently storming, then throttle - * it to only fire the handler once per clock tick. - * - * If this interrupt source is not currently storming, but the - * number of back to back interrupts exceeds the storm threshold, - * then enter storming mode. + * Execute handlers queued on the active list. If there are + * no handlers, block waiting for more handlers. */ - if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold && - !(ie->ie_flags & IE_SOFT)) { - /* Report the message only once every second. */ - if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) { - printf( - "interrupt storm detected on \"%s\"; throttling interrupt source\n", - ie->ie_name); + thread_lock(td); + while (!(it->it_flags & IT_DEAD)) { + /* Remove active handlers and execute them. */ + while ((ih = STAILQ_FIRST(&isw->isw_active)) != NULL) { + STAILQ_REMOVE_HEAD(&isw->isw_active, ih_queued); + thread_unlock(td); + state = intr_handler_execute(ih, &isw->isw_event, td); + if (state == DYING) + intr_handler_ack_dying(&isw->isw_event, ih); + + WITNESS_WARN(WARN_PANIC, NULL, "finished swi"); + mtx_assert(&Giant, MA_NOTOWNED); + thread_lock(td); + if (state == REQUEUE) + STAILQ_INSERT_TAIL(&isw->isw_active, ih, + ih_queued); + } + + /* Block waiting for more work. */ + if (!(it->it_flags & IT_DEAD)) { + TD_SET_IWAIT(td); + mi_switch(SW_VOL | SWT_IWAIT, NULL); } - pause("istorm", 1); - } else - ie->ie_count++; + } + thread_unlock(td); - /* - * Now that all the handlers have had a chance to run, reenable - * the interrupt source. - */ - if (ie->ie_post_ithread != NULL) - ie->ie_post_ithread(ie->ie_source); + CTR2(KTR_INTR, "swi_loop: tid %d (%s) exiting", td->td_tid, + td->td_name); + free(it, M_ITHREAD); + mtx_destroy(&isw->isw_event.ie_lock); + free(isw, M_ITHREAD); + kthread_exit(); } -#ifndef INTR_FILTER /* - * This is the main code for interrupt threads. + * Main loop for hardware interrupt threads. Each thread is pinned to + * a specific CPU when it executes and drains handlers for that CPU + * until there are no active handlers left. */ static void -ithread_loop(void *arg) +hwi_loop(void *arg) { - struct intr_thread *ithd; + struct intr_hardware *ihw; + struct intr_handler *ih; + struct intr_thread *it; struct intr_event *ie; struct thread *td; - struct proc *p; + int cpuid, state; + + td = curthread; + it = td->td_ithread; + KASSERT(it->it_thread == td, ("hwi_loop: ithread linkage out of sync")); - td = curthread; - p = td->td_proc; - ithd = (struct intr_thread *)arg; - KASSERT(ithd->it_thread == td, - ("%s: ithread and proc linkage out of sync", __func__)); - ie = ithd->it_event; - ie->ie_count = 0; + /* Hardware interrupt handlers should not sleep. */ + THREAD_NO_SLEEPING(); /* - * As long as we have interrupts outstanding, go through the - * list of handlers, giving each one a go at it. + * Execute handlers queued on this CPU's active list. If there are + * no handlers, block waiting for more handlers. */ - for (;;) { - /* - * If we are an orphaned thread, then just die. - */ - if (ithd->it_flags & IT_DEAD) { - CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__, - p->p_pid, td->td_name); - free(ithd, M_ITHREAD); - kthread_exit(); - } + thread_lock(td); + while (!(it->it_flags & IT_DEAD)) { + spinlock_enter(); + thread_unlock(td); + + /* Remove active handlers and execute them. */ + while ((ih = STAILQ_FIRST(PCPU_PTR(hwi_active))) != NULL) { + STAILQ_REMOVE_HEAD(PCPU_PTR(hwi_active), ih_queued); + spinlock_exit(); + ie = ih->ih_event; + ihw = (struct intr_hardware *)ie; +#ifdef INVARIANTS + it->it_current = ih; +#endif + state = intr_handler_execute(ih, ie, td); +#ifdef INVARIANTS + it->it_current = NULL; +#endif + + /* + * See if we need to invoke the 'post_ithread' + * hook for this event. Skip this if requeueing + * the handler. + * + * XXX: Need to detect and handle interrupt + * storms here somehow. + * + * XXX: If a filter returns just + * FILTER_SCHEDULE_THREAD, then this will + * break. + */ + if (state != REQUEUE && ih->ih_filter == NULL && + ihw->ihw_post_ithread != NULL && + atomic_fetchadd_int(&ihw->ihw_queued, -1) == 1) { + CTR1(KTR_INTR, "hwi_loop: post_ithread for %s", + ie->ie_name); + ihw->ihw_post_ithread(ihw->ihw_source); + } + + if (state == DYING) + intr_handler_ack_dying(ie, ih); + + WITNESS_WARN(WARN_PANIC, NULL, "finished hwi"); + mtx_assert(&Giant, MA_NOTOWNED); + spinlock_enter(); + + if (state == REQUEUE) + STAILQ_INSERT_TAIL(PCPU_PTR(hwi_active), ih, + ih_queued); - /* - * Service interrupts. If another interrupt arrives while - * we are running, it will set it_need to note that we - * should make another pass. - */ - while (ithd->it_need) { /* - * This might need a full read and write barrier - * to make sure that this write posts before any - * of the memory or device accesses in the - * handlers. + * If the handler blocked on a lock, then this + * thread is no longer tied to this CPU. If + * this CPU does not have an active interrupt, + * then reclaim this CPU. Otherwise, fall out + * of the loop and let the other active thread + * for this CPU process any queued handlers. */ - atomic_store_rel_int(&ithd->it_need, 0); - ithread_execute_handlers(p, ie); + if (PCPU_GET(hwi_thread) != td) { + if (PCPU_GET(hwi_thread) == NULL) { + CTR3(KTR_INTR, + "hwi_loop: tid %d (%s) reclaiming CPU %d", + td->td_tid, td->td_name, + PCPU_GET(cpuid)); + PCPU_SET(hwi_thread, td); + } else + break; + } } - WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); - mtx_assert(&Giant, MA_NOTOWNED); + spinlock_exit(); - /* - * Processed all our interrupts. Now get the sched - * lock. This may take a while and it_need may get - * set again, so we have to check it again. - */ + /* Block waiting for more work. */ thread_lock(td); - if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { + if (!(it->it_flags & IT_DEAD) && (PCPU_GET(hwi_thread) != td || + STAILQ_EMPTY(PCPU_PTR(hwi_active)))) { + + /* Disassociate from the current CPU. */ + sched_unbind(td); + mtx_lock_spin(&hwi_thread_lock); + if (PCPU_GET(hwi_thread) == td) { + CTR3(KTR_INTR, + "hwi_loop: tid %d (%s) freeing CPU %d", + td->td_tid, td->td_name, PCPU_GET(cpuid)); + PCPU_SET(hwi_thread, NULL); + } + + /* Handle queued handlers for another CPU if needed. */ + if (hwi_check_cpus) { + cpuid = hwi_pending_cpu(it); + if (cpuid != NOCPU) { + mtx_unlock_spin(&hwi_thread_lock); + sched_bind(td, cpuid); + continue; + } + } + + /* Put this thread on the idle list and block. */ + thread_lock_set(td, &hwi_thread_lock); + TAILQ_INSERT_HEAD(&hwi_threads, it, it_list); TD_SET_IWAIT(td); - ie->ie_count = 0; mi_switch(SW_VOL | SWT_IWAIT, NULL); } + } + thread_unlock(td); + + CTR2(KTR_INTR, "hwi_loop: tid %d (%s) exiting", td->td_tid, + td->td_name); + THREAD_SLEEPING_OK(); + free(it, M_ITHREAD); + kthread_exit(); +} + +/* + * Allocate a new hwi thread for the current CPU. Must be called with + * the hwi_thread_lock held and will return with it dropped. + */ +static void +hwi_alloc_thread(void) +{ + struct intr_thread *it; + struct thread *td; + + mtx_assert(&hwi_thread_lock, MA_OWNED); + + /* Try to grab a free thread. */ + it = TAILQ_FIRST(&hwi_threads); + if (it != NULL) { + /* + * Claim this thread. Bind it to this CPU while it + * drains interrupt handlers. Even though this + * thread's per-thread lock should be hwi_thread_lock + * and thus already held, grab it again via + * thread_lock() to force this code to wait if another + * CPU is switching away from this thread and thus + * td_lock is actually the blocked lock. + */ + TAILQ_REMOVE(&hwi_threads, it, it_list); + td = it->it_thread; + PCPU_SET(hwi_thread, td); + thread_lock(td); + THREAD_LOCKPTR_ASSERT(td, &hwi_thread_lock); + mtx_unlock_spin(&hwi_thread_lock); + sched_bind_ithd(td, PCPU_GET(cpuid)); + KASSERT(TD_AWAITING_INTR(td), ("free hwi thread not idle")); + CTR3(KTR_INTR, + "hwi_alloc_thread: schedule tid %d (%s) for CPU %d", + td->td_tid, td->td_name, PCPU_GET(cpuid)); + TD_CLR_IWAIT(td); + sched_add(td, SRQ_INTR); thread_unlock(td); + } else { + hwi_check_cpus = 1; + if (!hwi_thread_warn) { + hwi_thread_warn = 1; + mtx_unlock(&hwi_thread_lock); + printf("Exhausted hardware interrupt thread pool, " + "increase kern.intr.thread_count\n"); + } else + mtx_unlock(&hwi_thread_lock); } } /* - * Main interrupt handling body. - * - * Input: - * o ie: the event connected to this interrupt. - * o frame: some archs (i.e. i386) pass a frame to some. - * handlers as their main argument. - * Return value: - * o 0: everything ok. - * o EINVAL: stray interrupt. + * Entry point for MD code to call to handle a hardware interrupt. + * The trapframe is passed as the argument to any filter handlers that + * specify NULL as their argument. */ int -intr_event_handle(struct intr_event *ie, struct trapframe *frame) +hwi_handle(struct intr_event *ie, struct trapframe *frame) { + struct intr_entropy entropy; + struct intr_handler_list *active; + struct intr_hardware *ihw; struct intr_handler *ih; struct trapframe *oldframe; - struct thread *td; - int error, ret, thread; + struct thread *td, *ctd; + void *arg; + int nonfilter, ret, state, thread; - td = curthread; - /* An interrupt with no event or handlers is a stray interrupt. */ if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) return (EINVAL); + td = curthread; + td->td_intr_nesting_level++; + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; + critical_enter(); + + /* + * Lock the 'run' lock on the interrupt event while we run filters + * and queue threaded handlers. Unlike top-half code, we do not + * need to disable interrupts, so just spin on the lock token. + */ + while (!atomic_cmpset_acq_int(&ie->ie_running, 0, 1)) { + while (ie->ie_running) + cpu_spinwait(); + } + /* - * Execute fast interrupt handlers directly. + * Execute filter interrupt handlers directly. * To support clock handlers, if a handler registers * with a NULL argument, then we pass it a pointer to * a trapframe as its argument. */ - td->td_intr_nesting_level++; thread = 0; - ret = 0; - critical_enter(); - oldframe = td->td_intr_frame; - td->td_intr_frame = frame; + nonfilter = 0; + active = PCPU_PTR(hwi_active); + ihw = (struct intr_hardware *)ie; TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (ih->ih_filter == NULL) { - thread = 1; + /* If we have a filter, run it first. */ + if (ih->ih_filter != NULL) { + arg = ih->ih_argument; + if (arg == NULL) + arg = frame; + CTR3(KTR_INTR, "hwi_handle: exec %p(%p) for %s", + ih->ih_filter, arg, ih->ih_name); + ret = ih->ih_filter(arg); + CTR1(KTR_INTR, "hwi_handle: filter returned %#x", ret); + KASSERT(ret == FILTER_STRAY || ret == FILTER_HANDLED || + ret == (FILTER_HANDLED | FILTER_SCHEDULE_THREAD) || + ret == FILTER_SCHEDULE_THREAD, + ("incorrect filter return value %#x from %s", ret, + ih->ih_name)); + } else + ret = FILTER_SCHEDULE_THREAD; + + /* + * If no need to schedule threaded handler, nothing + * left to do for this handler. + */ + if (!(ret & FILTER_SCHEDULE_THREAD)) continue; - } - CTR4(KTR_INTR, "%s: exec %p(%p) for %s", __func__, - ih->ih_filter, ih->ih_argument == NULL ? frame : - ih->ih_argument, ih->ih_name); - if (ih->ih_argument == NULL) - ret = ih->ih_filter(frame); - else - ret = ih->ih_filter(ih->ih_argument); - KASSERT(ret == FILTER_STRAY || - ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 && - (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0), - ("%s: incorrect return value %#x from %s", __func__, ret, - ih->ih_name)); + + /* Place this interrupt handler on this CPU's queue. */ + for (;;) { + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* Try to change the state to queued. */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; + + /* XXXTEST */ + CTR1(KTR_INTR, "%s: IS_IDLE -> IS_QUEUED", + ih->ih_name); - /* - * Wrapper handler special handling: - * - * in some particular cases (like pccard and pccbb), - * the _real_ device handler is wrapped in a couple of - * functions - a filter wrapper and an ithread wrapper. - * In this case (and just in this case), the filter wrapper - * could ask the system to schedule the ithread and mask - * the interrupt source if the wrapped handler is composed - * of just an ithread handler. - * - * TODO: write a generic wrapper to avoid people rolling - * their own - */ - if (!thread) { - if (ret == FILTER_SCHEDULE_THREAD) + /* + * Queue the handler. If this + * handler's filter did not handle the + * interrupt, note that we need to use + * the 'pre_ithread' and + * 'post_ithread' hooks. + */ + if (!(ret & FILTER_HANDLED)) { + nonfilter = 1; + atomic_add_int(&ihw->ihw_queued, 1); + } thread = 1; + CTR2(KTR_INTR, + "hwi_handle: scheduled %s for %s", + ih->ih_name, ie->ie_name); + STAILQ_INSERT_TAIL(active, ih, ih_queued); + goto next; + case IS_QUEUED: + case IS_REQUEUE: + /* Ensure it is truly still queued. */ + if (atomic_cmpset_int(&ih->ih_state, state, + state)) { + /* XXXTEST */ + CTR3(KTR_INTR, "%s: %s -> %s", + ih->ih_name, state == IS_QUEUED ? + "IS_QUEUED" : "IS_REQUEUE", + state == IS_QUEUED ? "IS_QUEUED" : + "IS_REQUEUE"); + goto next; + } + break; + case IS_RUNNING: + /* Try to change the state to requeue. */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) { + /* XXXTEST */ + CTR1(KTR_INTR, + "%s: IS_RUNNING -> IS_REQUEUE", + ih->ih_name); + goto next; + } + break; +#ifdef INVARIANTS + default: + /* + * The dying/dead states should not + * happen. They are only set while + * holding the run lock and once they + * are set the event is removed from + * the interrupt event's handler + * list. + */ + panic("hwi_handle: bad state %d", state); +#endif + } } + next:; } - td->td_intr_frame = oldframe; + + /* Drop the 'run' lock. */ + atomic_store_rel_int(&ie->ie_running, 0); - if (thread) { - if (ie->ie_pre_ithread != NULL) - ie->ie_pre_ithread(ie->ie_source); + /* + * If handlers without filters were queued, invoke the + * 'pre_ithread' hook, otherwise invoke the 'post_filter' + * hook. + */ + if (nonfilter) { + if (ihw->ihw_pre_ithread != NULL) { + CTR1(KTR_INTR, "hwi_handle: pre_ithread for %s", + ie->ie_name); + ihw->ihw_pre_ithread(ihw->ihw_source); + } } else { - if (ie->ie_post_filter != NULL) - ie->ie_post_filter(ie->ie_source); + if (ihw->ihw_post_filter != NULL) { + CTR1(KTR_INTR, "hwi_handle: post_filter for %s", + ie->ie_name); + ihw->ihw_post_filter(ihw->ihw_source); + } + } + + /* + * If any of the handlers for this event claim to be good + * sources of entropy, then gather some. + */ + if (harvest.interrupt && ie->ie_flags & IE_ENTROPY) { + ctd = curthread; + CTR2(KTR_INTR, "hwi_handle: tid %d (%s) gathering entropy", + ctd->td_tid, ctd->td_name); + entropy.event = (uintptr_t)ie; + entropy.td = ctd; + random_harvest(&entropy, sizeof(entropy), 2, 0, + RANDOM_INTERRUPT); } - - /* Schedule the ithread if needed. */ - if (thread) { - error = intr_event_schedule_thread(ie); -#ifndef XEN - KASSERT(error == 0, ("bad stray interrupt")); -#else - if (error != 0) - log(LOG_WARNING, "bad stray interrupt"); -#endif + + /* + * If this CPU doesn't have an active interrupt thread, + * schedule a new one. + */ + if (thread && PCPU_GET(hwi_thread) == NULL) { + mtx_lock_spin(&hwi_thread_lock); + hwi_alloc_thread(); } critical_exit(); + td->td_intr_frame = oldframe; td->td_intr_nesting_level--; return (0); } -#else + /* - * This is the main code for interrupt threads. + * Allow a hardware interrupt handler to be manually scheduled on the + * current CPU's queue. This can be used either to schedule manual + * interrupt handlers from a filter or handler or to reschedule the + * currently executing handler. Much of the logic is copied from + * hwi_handle(). */ -static void -ithread_loop(void *arg) +void +hwi_sched(void *cookie) { - struct intr_thread *ithd; + struct intr_hardware *ihw; struct intr_handler *ih; struct intr_event *ie; - struct thread *td; - struct proc *p; - int priv; + int state; - td = curthread; - p = td->td_proc; - ih = (struct intr_handler *)arg; - priv = (ih->ih_thread != NULL) ? 1 : 0; - ithd = (priv) ? ih->ih_thread : ih->ih_event->ie_thread; - KASSERT(ithd->it_thread == td, - ("%s: ithread and proc linkage out of sync", __func__)); - ie = ithd->it_event; - ie->ie_count = 0; + ih = cookie; + ie = ih->ih_event; + ihw = (struct intr_hardware *)ie; + KASSERT((curthread->td_pflags & TDP_ITHREAD) || + curthread->td_intr_nesting_level > 0, + ("hwi_sched: invalid calling thread context")); + KASSERT((ih->ih_flags & IH_MANUAL) || + ((curthread->td_pflags & TDP_ITHREAD) && + curthread->td_ithread->it_current == ih), + ("hwi_sched: attempt to schedule invalid handler")); + KASSERT(!(ie->ie_flags & IE_SOFT), ("hwi_sched: swi event")); - /* - * As long as we have interrupts outstanding, go through the - * list of handlers, giving each one a go at it. - */ + /* Place this interrupt handler on this CPU's queue. */ + spinlock_enter(); for (;;) { - /* - * If we are an orphaned thread, then just die. - */ - if (ithd->it_flags & IT_DEAD) { - CTR3(KTR_INTR, "%s: pid %d (%s) exiting", __func__, - p->p_pid, td->td_name); - free(ithd, M_ITHREAD); - kthread_exit(); - } + state = ih->ih_state; + switch (state) { + case IS_IDLE: + /* Try to change the state to queued. */ + if (!atomic_cmpset_int(&ih->ih_state, IS_IDLE, + IS_QUEUED)) + break; + + /* + * If requeueing the currently executing + * handler and it does not have a filter, bump + * the queued count to defer the + * 'post_ithread' hook. + */ + if (!(ih->ih_flags & IH_MANUAL) && + ih->ih_filter != NULL) + atomic_add_int(&ihw->ihw_queued, 1); - /* - * Service interrupts. If another interrupt arrives while - * we are running, it will set it_need to note that we - * should make another pass. - */ - while (ithd->it_need) { + /* Queue the handler. */ + CTR2(KTR_INTR, "hwi_sched: scheduled %s for %s", + ih->ih_name, ie->ie_name); + STAILQ_INSERT_TAIL(PCPU_PTR(hwi_active), ih, ih_queued); + goto queued; + case IS_QUEUED: + case IS_REQUEUE: + /* Ensure it is truly still queued. */ + if (atomic_cmpset_int(&ih->ih_state, state, state)) + goto queued; + break; + case IS_RUNNING: + /* Try to change the state to requeue. */ + if (atomic_cmpset_int(&ih->ih_state, IS_RUNNING, + IS_REQUEUE)) { + spinlock_exit(); + return; + } + break; + case IS_DYING: + /* + * This can happen if the currently executing + * handler is being removed by another thread. + * In that case, just ignore the reschedule + * attempt. The main loop of the hwi thread + * will ack the dying request once this + * handler finishes. + */ + KASSERT(curthread->td_ithread != NULL && + curthread->td_ithread->it_current == ih, + ("hwi_sched: dying handler is not current")); + return; +#ifdef INVARIANTS + default: /* - * This might need a full read and write barrier - * to make sure that this write posts before any - * of the memory or device accesses in the - * handlers. + * The dead state should not happen. The + * currently executing handler cannot be dead, + * only dying, and the owner of a manual + * handler is responsible for destroying any + * filters or handlers that can schedule that + * event before destroying the manual handler. */ - atomic_store_rel_int(&ithd->it_need, 0); - if (priv) - priv_ithread_execute_handler(p, ih); - else - ithread_execute_handlers(p, ie); + panic("hwi_sched: bad state %d", state); +#endif } - WITNESS_WARN(WARN_PANIC, NULL, "suspending ithread"); - mtx_assert(&Giant, MA_NOTOWNED); + } - /* - * Processed all our interrupts. Now get the sched - * lock. This may take a while and it_need may get - * set again, so we have to check it again. - */ - thread_lock(td); - if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { - TD_SET_IWAIT(td); - ie->ie_count = 0; - mi_switch(SW_VOL | SWT_IWAIT, NULL); - } - thread_unlock(td); +queued: + /* + * If this CPU doesn't have an active interrupt thread, + * schedule a new one. + */ + if (PCPU_GET(hwi_thread) == NULL) { + mtx_lock_spin(&hwi_thread_lock); + hwi_alloc_thread(); } + spinlock_exit(); } -/* - * Main loop for interrupt filter. - * - * Some architectures (i386, amd64 and arm) require the optional frame - * parameter, and use it as the main argument for fast handler execution - * when ih_argument == NULL. - * - * Return value: - * o FILTER_STRAY: No filter recognized the event, and no - * filter-less handler is registered on this - * line. - * o FILTER_HANDLED: A filter claimed the event and served it. - * o FILTER_SCHEDULE_THREAD: No filter claimed the event, but there's at - * least one filter-less handler on this line. - * o FILTER_HANDLED | - * FILTER_SCHEDULE_THREAD: A filter claimed the event, and asked for - * scheduling the per-handler ithread. - * - * In case an ithread has to be scheduled, in *ithd there will be a - * pointer to a struct intr_thread containing the thread to be - * scheduled. - */ - -static int -intr_filter_loop(struct intr_event *ie, struct trapframe *frame, - struct intr_thread **ithd) +/* Called when an interrupt thread blocks on a turnstile waiting for a lock. */ +void +intr_thread_block(struct thread *td) { - struct intr_handler *ih; - void *arg; - int ret, thread_only; - ret = 0; - thread_only = 0; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - /* - * Execute fast interrupt handlers directly. - * To support clock handlers, if a handler registers - * with a NULL argument, then we pass it a pointer to - * a trapframe as its argument. - */ - arg = ((ih->ih_argument == NULL) ? frame : ih->ih_argument); - - CTR5(KTR_INTR, "%s: exec %p/%p(%p) for %s", __func__, - ih->ih_filter, ih->ih_handler, arg, ih->ih_name); + /* + * Only allocate a new thread for this CPU if the currently + * active hwi thread blocks. + */ + if (PCPU_GET(hwi_thread) != td) + return; - if (ih->ih_filter != NULL) - ret = ih->ih_filter(arg); - else { - thread_only = 1; - continue; - } - KASSERT(ret == FILTER_STRAY || - ((ret & (FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) != 0 && - (ret & ~(FILTER_SCHEDULE_THREAD | FILTER_HANDLED)) == 0), - ("%s: incorrect return value %#x from %s", __func__, ret, - ih->ih_name)); - if (ret & FILTER_STRAY) - continue; - else { - *ithd = ih->ih_thread; - return (ret); - } - } + /* Mark this CPU as not having an active hwi thread. */ + mtx_lock_spin(&hwi_thread_lock); + PCPU_SET(hwi_thread, NULL); + CTR3(KTR_INTR, "intr_thread_block: tid %d (%s) freeing CPU %d", + td->td_tid, td->td_name, PCPU_GET(cpuid)); /* - * No filters handled the interrupt and we have at least - * one handler without a filter. In this case, we schedule - * all of the filter-less handlers to run in the ithread. - */ - if (thread_only) { - *ithd = ie->ie_thread; - return (FILTER_SCHEDULE_THREAD); - } - return (FILTER_STRAY); + * If there are any queued handlers, allocate a new hwi thread + * for this CPU. + */ + if (!STAILQ_EMPTY(PCPU_PTR(hwi_active))) + hwi_alloc_thread(); + else + mtx_unlock_spin(&hwi_thread_lock); } -/* - * Main interrupt handling body. - * - * Input: - * o ie: the event connected to this interrupt. - * o frame: some archs (i.e. i386) pass a frame to some. - * handlers as their main argument. - * Return value: - * o 0: everything ok. - * o EINVAL: stray interrupt. - */ -int -intr_event_handle(struct intr_event *ie, struct trapframe *frame) +#ifdef old +static void +ithread_execute_handlers(struct proc *p, struct intr_event *ie) { - struct intr_thread *ithd; - struct trapframe *oldframe; - struct thread *td; - int thread; - ithd = NULL; - td = curthread; - - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) - return (EINVAL); - - td->td_intr_nesting_level++; - thread = 0; - critical_enter(); - oldframe = td->td_intr_frame; - td->td_intr_frame = frame; - thread = intr_filter_loop(ie, frame, &ithd); - if (thread & FILTER_HANDLED) { - if (ie->ie_post_filter != NULL) - ie->ie_post_filter(ie->ie_source); - } else { - if (ie->ie_pre_ithread != NULL) - ie->ie_pre_ithread(ie->ie_source); - } - td->td_intr_frame = oldframe; - critical_exit(); - - /* Interrupt storm logic */ - if (thread & FILTER_STRAY) { + /* + * Interrupt storm handling: + * + * If this interrupt source is currently storming, then throttle + * it to only fire the handler once per clock tick. + * + * If this interrupt source is not currently storming, but the + * number of back to back interrupts exceeds the storm threshold, + * then enter storming mode. + */ + if (intr_storm_threshold != 0 && ie->ie_count >= intr_storm_threshold && + !(ie->ie_flags & IE_SOFT)) { + /* Report the message only once every second. */ + if (ppsratecheck(&ie->ie_warntm, &ie->ie_warncnt, 1)) { + printf( + "interrupt storm detected on \"%s\"; throttling interrupt source\n", + ie->ie_name); + } + pause("istorm", 1); + } else ie->ie_count++; - if (ie->ie_count < intr_storm_threshold) - printf("Interrupt stray detection not present\n"); - } - /* Schedule an ithread if needed. */ - if (thread & FILTER_SCHEDULE_THREAD) { - if (intr_event_schedule_thread(ie, ithd) != 0) - panic("%s: impossible stray interrupt", __func__); - } - td->td_intr_nesting_level--; - return (0); } #endif @@ -1643,8 +1879,8 @@ /* * Dump details about an interrupt handler */ -static void -db_dump_intrhand(struct intr_handler *ih) +void +db_dump_intrhand(struct intr_handler *ih, int display_event) { int comma; @@ -1681,11 +1917,15 @@ break; } db_printf(" "); + if (display_event) + db_printf("(%s) ", ih->ih_event->ie_name); + if (ih->ih_filter != NULL) { + db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC); + db_printf(","); + } db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC); db_printf("(%p)", ih->ih_argument); - if (ih->ih_need || - (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD | - IH_MPSAFE)) != 0) { + if ((ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_MPSAFE)) != 0) { db_printf(" {"); comma = 0; if (ih->ih_flags & IH_EXCLUSIVE) { @@ -1700,25 +1940,37 @@ db_printf("ENTROPY"); comma = 1; } - if (ih->ih_flags & IH_DEAD) { - if (comma) - db_printf(", "); - db_printf("DEAD"); - comma = 1; - } if (ih->ih_flags & IH_MPSAFE) { if (comma) db_printf(", "); db_printf("MPSAFE"); - comma = 1; } - if (ih->ih_need) { - if (comma) - db_printf(", "); - db_printf("NEED"); - } db_printf("}"); } + db_printf(" "); + switch (ih->ih_state) { + case IS_IDLE: + db_printf("IDLE"); + break; + case IS_QUEUED: + db_printf("QUEUED"); + break; + case IS_RUNNING: + db_printf("RUNNING"); + break; + case IS_REQUEUE: + db_printf("REQUEUE"); + break; + case IS_DYING: + db_printf("DYING"); + break; + case IS_DEAD: + db_printf("DEAD"); + break; + default: + db_printf("0x%x", ih->ih_state); + break; + } db_printf("\n"); } @@ -1728,18 +1980,20 @@ void db_dump_intr_event(struct intr_event *ie, int handlers) { + struct intr_software *isw; + struct intr_hardware *ihw; struct intr_handler *ih; - struct intr_thread *it; int comma; db_printf("%s ", ie->ie_fullname); - it = ie->ie_thread; - if (it != NULL) - db_printf("(pid %d)", it->it_thread->td_proc->p_pid); - else - db_printf("(no thread)"); - if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY | IE_ADDING_THREAD)) != 0 || - (it != NULL && it->it_need)) { + if (ie->ie_flags & IE_SOFT) { + isw = (struct intr_software *)ie; + db_printf("(tid %d)", isw->isw_thread->it_thread->td_tid); + } else { + ihw = (struct intr_hardware *)ie; + db_printf("IRQ %d queued %d", ihw->ihw_irq, ihw->ihw_queued); + } + if ((ie->ie_flags & (IE_SOFT | IE_ENTROPY)) != 0) { db_printf(" {"); comma = 0; if (ie->ie_flags & IE_SOFT) { @@ -1750,26 +2004,15 @@ if (comma) db_printf(", "); db_printf("ENTROPY"); - comma = 1; } - if (ie->ie_flags & IE_ADDING_THREAD) { - if (comma) - db_printf(", "); - db_printf("ADDING_THREAD"); - comma = 1; - } - if (it != NULL && it->it_need) { - if (comma) - db_printf(", "); - db_printf("NEED"); - } db_printf("}"); } db_printf("\n"); if (handlers) - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) - db_dump_intrhand(ih); + TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { + db_dump_intrhand(ih, 0); + } } /* @@ -1777,15 +2020,23 @@ */ DB_SHOW_COMMAND(intr, db_show_intr) { - struct intr_event *ie; + struct intr_software *isw; + struct intr_hardware *ihw; int all, verbose; verbose = index(modif, 'v') != NULL; all = index(modif, 'a') != NULL; - TAILQ_FOREACH(ie, &event_list, ie_list) { - if (!all && TAILQ_EMPTY(&ie->ie_handlers)) + TAILQ_FOREACH(ihw, &hwi_event_list, ihw_list) { + if (!all && TAILQ_EMPTY(&ihw->ihw_event.ie_handlers)) + continue; + db_dump_intr_event(&ihw->ihw_event, verbose); + if (db_pager_quit) + break; + } + TAILQ_FOREACH(isw, &swi_event_list, isw_list) { + if (!all && TAILQ_EMPTY(&isw->isw_event.ie_handlers)) continue; - db_dump_intr_event(ie, verbose); + db_dump_intr_event(&isw->isw_event, verbose); if (db_pager_quit) break; } @@ -1793,7 +2044,7 @@ #endif /* DDB */ /* - * Start standard software interrupt threads + * Start standard software interrupt threads. */ static void start_softintr(void *dummy) --- //depot/projects/smpng/sys/kern/sched_4bsd.c 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/kern/sched_4bsd.c 2010-06-25 21:44:42.000000000 0000 @@ -1476,6 +1476,21 @@ } void +sched_bind_ithd(struct thread *td, int cpu) +{ + struct td_sched *ts; + + THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED); + KASSERT(TD_AWAITING_INTR(td), ("sched_bind_ithd: td is not waiting")); + ts = td->td_sched; + KASSERT(!(td->td_flags & TDF_BOUND), ("sched_bind_ithd: td is bound")); + td->td_flags |= TDF_BOUND; +#ifdef SMP + ts->ts_runq = &runq_pcpu[cpu]; +#endif +} + +void sched_unbind(struct thread* td) { THREAD_LOCK_ASSERT(td, MA_OWNED); --- //depot/projects/smpng/sys/kern/sched_ule.c 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/kern/sched_ule.c 2010-06-25 21:44:42.000000000 0000 @@ -2438,6 +2438,23 @@ } /* + * Bind a waiting interrupt thread to a target cpu. + */ +void +sched_bind_ithd(struct thread *td, int cpu) +{ + struct td_sched *ts; + + THREAD_LOCK_ASSERT(td, MA_OWNED|MA_NOTRECURSED); + KASSERT(TD_AWAITING_INTR(td), ("sched_bind_ithd: td is not waiting")); + ts = td->td_sched; + KASSERT(!(ts->ts_flags & TSF_BOUND), ("sched_bind_ithd: td is bound")); + ts->ts_flags |= TSF_BOUND; + td->td_pinned = 1; + ts->ts_cpu = cpu; +} + +/* * Release a bound thread. */ void --- //depot/projects/smpng/sys/kern/subr_pcpu.c 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/kern/subr_pcpu.c 2010-06-25 21:44:42.000000000 0000 @@ -52,7 +52,8 @@ #include #include -#include +#include +#include #include #include #include @@ -60,6 +61,7 @@ #include #include #include +#include #include MALLOC_DEFINE(M_PCPU, "Per-cpu", "Per-cpu resource accouting."); @@ -92,6 +94,7 @@ cpuid_to_pcpu[cpuid] = pcpu; SLIST_INSERT_HEAD(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); + STAILQ_INIT(&pcpu->pc_hwi_active); pcpu->pc_rm_queue.rmq_next = &pcpu->pc_rm_queue; pcpu->pc_rm_queue.rmq_prev = &pcpu->pc_rm_queue; #ifdef KTR @@ -327,6 +330,7 @@ static void show_pcpu(struct pcpu *pc) { + struct intr_handler *ih; struct thread *td; db_printf("cpuid = %d\n", pc->pc_cpuid); @@ -352,12 +356,23 @@ db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name); else db_printf("none\n"); + db_printf("hwi_thread = "); + td = pc->pc_hwi_thread; + if (td != NULL) + db_printf("%p: tid %d \"%s\"\n", td, td->td_tid, td->td_name); + else + db_printf("none\n"); db_show_mdpcpu(pc); #ifdef VIMAGE db_printf("curvnet = %p\n", pc->pc_curthread->td_vnet); #endif + db_printf("queued interrupt handlers:\n"); + STAILQ_FOREACH(ih, &pc->pc_hwi_active, ih_queued) { + db_dump_intrhand(ih, 1); + } + #ifdef WITNESS db_printf("spin locks held:\n"); witness_list_locks(&pc->pc_spinlocks, db_printf); --- //depot/projects/smpng/sys/kern/subr_turnstile.c 2010-03-10 22:33:24.000000000 0000 +++ //depot/user/jhb/intr/kern/subr_turnstile.c 2010-05-20 15:49:03.000000000 0000 @@ -65,6 +65,8 @@ #include #include +#include +#include #include #include #include @@ -679,6 +681,10 @@ MPASS(owner->td_proc->p_magic == P_MAGIC); MPASS(queue == TS_SHARED_QUEUE || queue == TS_EXCLUSIVE_QUEUE); + /* Handle special case when an interrupt thread blocks. */ + if (td->td_pflags & TDP_ITHREAD) + intr_thread_block(td); + /* * If the lock does not already have a turnstile, use this thread's * turnstile. Otherwise insert the current thread into the --- //depot/projects/smpng/sys/sys/bus.h 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/sys/bus.h 2010-06-25 21:44:42.000000000 0000 @@ -194,7 +194,8 @@ INTR_FAST = 128, INTR_EXCL = 256, /* exclusive interrupt */ INTR_MPSAFE = 512, /* this interrupt is SMP safe */ - INTR_ENTROPY = 1024 /* this interrupt provides entropy */ + INTR_ENTROPY = 1024, /* this interrupt provides entropy */ + INTR_MANUAL = 2048 /* only scheduled via hwi_sched() */ }; enum intr_trigger { --- //depot/projects/smpng/sys/sys/interrupt.h 2009-12-16 22:22:28.000000000 0000 +++ //depot/user/jhb/intr/sys/interrupt.h 2010-06-07 21:31:16.000000000 0000 @@ -37,50 +37,123 @@ struct trapframe; /* + * Interrupt handlers are scheduled for execution across a pool of + * interrupt threads. Each CPU maintains a per-CPU list of scheduled + * interrupt handlers. When an interrupt occurs, its handler is added + * to the per-CPU list of handlers. If the list is empty, then an + * interrupt thread is scheduled. This thread is pinned to a specific + * CPU and will continue to execute until it has drained all of the + * handlers for its assigned CPU. If an interrupt thread blocks on a + * lock and other interrupt handlers are queued for the assigned CPU, + * then a new interrupt thread is scheduled to execute those handlers + * if a thread is available. + * + * Pinning all of the work to a per-CPU list with pinned threads + * allows the list of handlers to be maintained with minimal locking + * overhead. The simplest implementation is for the code executing in + * an interrupt thread context to disable interrupts when examining + * the per-CPU list. + * + * An additional wrinkle is needed to handle shared interrupt handlers + * that do not use filters. For this case the interrupt event needs + * to not be enabled until all of the handlers for that event have + * executed. To handle this, each hardware interrupt holds a count of + * non-filter events queued for execution. Once these handlers are + * drained, the 'post_ithread' hook is invoked. + * + * XXX: Not sure how to handle interrupt storms in this mode. + * + * Software interrupt events are still assigned a dedicated interrupt + * thread. A list of scheduled handlers is maintained in the software + * event itself. + */ + +/* * Describe a hardware interrupt handler. * * Multiple interrupt handlers for a specific event can be chained * together. */ struct intr_handler { - driver_filter_t *ih_filter; /* Filter handler function. */ + driver_filter_t *ih_filter; /* Filter handler function. */ driver_intr_t *ih_handler; /* Threaded handler function. */ void *ih_argument; /* Argument to pass to handlers. */ int ih_flags; + volatile int ih_state; /* IS_* state. */ char ih_name[MAXCOMLEN + 1]; /* Name of handler. */ struct intr_event *ih_event; /* Event we are connected to. */ - int ih_need; /* Needs service. */ TAILQ_ENTRY(intr_handler) ih_next; /* Next handler for this event. */ u_char ih_pri; /* Priority of this handler. */ - struct intr_thread *ih_thread; /* Ithread for filtered handler. */ + STAILQ_ENTRY(intr_handler) ih_queued; /* Links for active list. */ }; /* Interrupt handle flags kept in ih_flags */ +#define IH_MANUAL 0x00000001 /* Manually scheduled via hwi_sched(). */ #define IH_EXCLUSIVE 0x00000002 /* Exclusive interrupt. */ #define IH_ENTROPY 0x00000004 /* Device is a good entropy source. */ -#define IH_DEAD 0x00000008 /* Handler should be removed. */ #define IH_MPSAFE 0x80000000 /* Handler does not need Giant. */ /* + * Interrupt handle states. + * + * Initially an interrupt handler is idle. An idle handler can move + * either into the dead state (when it is being removed) or queued + * state (when it is queued to an interrup thread). + * + * A queued handler can move either into the dying state (when it is + * being removed), the queued state (an attempt to queue an + * already-queued handler), or the running state (when an interrupt + * thread executes the handler). + * + * An interrupt handler is placed into the running state by an + * interrupt thread while it is being executed. A running handler can + * move either into the dying state (when it is being removed), the + * requeue state (an attempt to queue an executing handler), or the + * idle state. + * + * If an interrupt handler is rescheduled while it is executing, it is + * placed into the requeue state. A requeued handler can move either + * into the dying state (when it is being removed) or the queued state + * (when the interrupt thread requeues it after execution finishes). + * + * When an interrupt handler is removed, it is placed into the dying + * state if it is not currently idle. The removing thread then sleeps + * until an interrupt thread dequeues the handler or finishes + * executing the handler. The interrupt thread then acks the dying + * request by moving the handler into the dead state. + */ +#define IS_IDLE 0 +#define IS_QUEUED 1 +#define IS_RUNNING 2 +#define IS_REQUEUE 3 +#define IS_DYING 4 +#define IS_DEAD 5 + +/* * Describe an interrupt event. An event holds a list of handlers. + * Events are split into two classes: hardware interrupt events and + * software interrupt events. + * * The 'pre_ithread', 'post_ithread', 'post_filter', and 'assign_cpu' - * hooks are used to invoke MD code for certain operations. + * hooks are used to invoke MD code for certain operations for + * hardware interrupt events. * - * The 'pre_ithread' hook is called when an interrupt thread for - * handlers without filters is scheduled. It is responsible for - * ensuring that 1) the system won't be swamped with an interrupt - * storm from the associated source while the ithread runs and 2) the - * current CPU is able to receive interrupts from other interrupt - * sources. The first is usually accomplished by disabling - * level-triggered interrupts until the ithread completes. The second - * is accomplished on some platforms by acknowledging the interrupt - * via an EOI. + * The 'pre_ithread' hook is called when interrupt handlers without + * filters are scheduled. It is responsible for ensuring that 1) the + * system won't be swamped with an interrupt storm from the associated + * source while the ithread runs and 2) the current CPU is able to + * receive interrupts from other interrupt sources. The first is + * usually accomplished by disabling level-triggered interrupts until + * all of the handlers for this event have completed. The second is + * accomplished on some platforms by acknowledging the interrupt via + * an EOI. * - * The 'post_ithread' hook is invoked when an ithread finishes. It is - * responsible for ensuring that the associated interrupt source will - * trigger an interrupt when it is asserted in the future. Usually - * this is implemented by enabling a level-triggered interrupt that - * was previously disabled via the 'pre_ithread' hook. + * The 'post_ithread' hook is invoked when all of the interrupt + * handlers without filters for an event finish. It is responsible + * for ensuring that the associated interrupt source will trigger an + * interrupt when it is asserted in the future. Usually this is + * implemented by enabling a level-triggered interrupt that was + * previously disabled via the 'pre_ithread' hook. * * The 'post_filter' hook is invoked when a filter handles an * interrupt. It is responsible for ensuring that the current CPU is @@ -91,40 +164,52 @@ * specific CPU. If the interrupt cannot be bound, this function may * return an error. * - * Note that device drivers may also use interrupt events to manage - * multiplexing interrupt interrupt handler into handlers for child - * devices. In that case, the above hooks are not used. The device - * can create an event for its interrupt resource and register child - * event handlers with that event. It can then use - * intr_event_execute_handlers() to execute non-filter handlers. - * Currently filter handlers are not supported by this, but that can - * be added by splitting out the filter loop from intr_event_handle() - * if desired. + * The list of handlers in an interrupt event are protected by two + * locks. First, there is a regular mutex that can be used alone for + * read-only access in top-half code. Second, there is a very simple + * 0/1 spinlock stored in "ie_running". This lightweight lock is held + * in the low-level interrupt code while walking the list of interrupt + * handlers. It must also be held in top-half code that adds or + * removes handlers to or from the list. */ struct intr_event { - TAILQ_ENTRY(intr_event) ie_list; TAILQ_HEAD(, intr_handler) ie_handlers; /* Interrupt handlers. */ char ie_name[MAXCOMLEN + 1]; /* Individual event name. */ char ie_fullname[MAXCOMLEN + 1]; struct mtx ie_lock; - void *ie_source; /* Cookie used by MD code. */ - struct intr_thread *ie_thread; /* Thread we are connected to. */ - void (*ie_pre_ithread)(void *); - void (*ie_post_ithread)(void *); - void (*ie_post_filter)(void *); - int (*ie_assign_cpu)(void *, u_char); + volatile int ie_running; int ie_flags; - int ie_count; /* Loop counter. */ - int ie_warncnt; /* Rate-check interrupt storm warns. */ - struct timeval ie_warntm; - int ie_irq; /* Physical irq number if !SOFT. */ - u_char ie_cpu; /* CPU this event is bound to. */ + int ie_cpu; /* CPU this event is bound to. */ +}; + +struct intr_hardware { + struct intr_event ihw_event; + TAILQ_ENTRY(intr_hardware) ihw_list; + void *ihw_source; /* Cookie used by MD code. */ + void (*ihw_pre_ithread)(void *); + void (*ihw_post_ithread)(void *); + void (*ihw_post_filter)(void *); + int (*ihw_assign_cpu)(void *, u_char); + int ihw_queued; /* Number of queued non-filter handlers. */ + int ihw_irq; /* Physical irq number. */ +#ifdef notyet + int ihw_count; /* Loop counter. */ + int ihw_warncnt; /* Rate-check interrupt storm warns. */ + struct timeval ihw_warntm; +#endif + TAILQ_HEAD(, intr_handler) ihw_manual; /* Manual interrupt handlers. */ +}; + +struct intr_software { + struct intr_event isw_event; + TAILQ_ENTRY(intr_software) isw_list; + struct intr_thread *isw_thread; /* Dedicated thread. */ + STAILQ_HEAD(, intr_handler) isw_active; }; /* Interrupt event flags kept in ie_flags. */ #define IE_SOFT 0x000001 /* Software interrupt. */ #define IE_ENTROPY 0x000002 /* Interrupt is an entropy source. */ -#define IE_ADDING_THREAD 0x000004 /* Currently building an ithread. */ /* Flags to pass to sched_swi. */ #define SWI_DELAY 0x2 @@ -143,6 +228,7 @@ #define SWI_TQ_GIANT 6 struct proc; +struct thread; extern struct intr_event *tty_intr_event; extern struct intr_event *clk_intr_event; @@ -157,30 +243,45 @@ #ifdef DDB void db_dump_intr_event(struct intr_event *ie, int handlers); +void db_dump_intrhand(struct intr_handler *ih, int display_event); #endif +void hwi_create(struct intr_event **event, void *source, int irq, + void (*pre_ithread)(void *), void (*post_ithread)(void *), + void (*post_filter)(void *), int (*assign_cpu)(void *, u_char), + const char *fmt, ...) __printflike(8, 9); +int hwi_destroy(struct intr_event *ie); +int hwi_handle(struct intr_event *ie, struct trapframe *frame); +void *hwi_handler_source(void *cookie); +void hwi_sched(void *cookie); u_char intr_priority(enum intr_type flags); int intr_event_add_handler(struct intr_event *ie, const char *name, driver_filter_t filter, driver_intr_t handler, void *arg, u_char pri, enum intr_type flags, void **cookiep); int intr_event_bind(struct intr_event *ie, u_char cpu); -int intr_event_create(struct intr_event **event, void *source, - int flags, int irq, void (*pre_ithread)(void *), - void (*post_ithread)(void *), void (*post_filter)(void *), - int (*assign_cpu)(void *, u_char), const char *fmt, ...) - __printflike(9, 10); int intr_event_describe_handler(struct intr_event *ie, void *cookie, const char *descr); -int intr_event_destroy(struct intr_event *ie); -void intr_event_execute_handlers(struct proc *p, struct intr_event *ie); -int intr_event_handle(struct intr_event *ie, struct trapframe *frame); int intr_event_remove_handler(void *cookie); int intr_getaffinity(int irq, void *mask); -void *intr_handler_source(void *cookie); int intr_setaffinity(int irq, void *mask); +void intr_thread_block(struct thread *td); int swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler, void *arg, int pri, enum intr_type flags, void **cookiep); +int swi_create(struct intr_event **event, const char *fmt, ...); +int swi_destroy(struct intr_event *ie); +int swi_remove(void *cookie); void swi_sched(void *cookie, int flags); -int swi_remove(void *cookie); + +/* XXX: Compat shims */ +#define intr_event_create(ev, src, f, irq, prei, posti, postf, ac, ...) \ + (hwi_create(ev, src, irq, prei, posti, postf, ac, __VA_ARGS__), 0) +#define intr_event_destroy(ev) \ + hwi_destroy(ev) +#define intr_event_handle(ev, frame) \ + hwi_handle(ev, frame) +#define intr_handler_source(cookie) \ + hwi_handler_source(cookie) + +/* XXX: Should we have hwi_add() and hwi_remove()? */ #endif --- //depot/projects/smpng/sys/sys/pcpu.h 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/sys/pcpu.h 2010-06-25 21:44:42.000000000 0000 @@ -121,6 +121,9 @@ sum; \ }) +struct intr_handler; +STAILQ_HEAD(intr_handler_list, intr_handler); + /* * XXXUPS remove as soon as we have per cpu variable * linker sets and can define rm_queue in _rm_lock.h @@ -159,6 +162,8 @@ struct device *pc_device; void *pc_netisr; /* netisr SWI cookie */ int pc_dnweight; /* vm_page_dontneed() */ + struct intr_handler_list pc_hwi_active; /* Queued HWI handlers */ + struct thread *pc_hwi_thread; /* Active per-CPU HWI thread */ /* * Stuff for read mostly lock --- //depot/projects/smpng/sys/sys/proc.h 2010-06-25 20:21:33.000000000 0000 +++ //depot/user/jhb/intr/sys/proc.h 2010-06-25 21:44:42.000000000 0000 @@ -157,6 +157,7 @@ * either lock is sufficient for read access, but both locks must be held * for write access. */ +struct intr_thread; struct kaudit_record; struct td_sched; struct nlminfo; @@ -301,7 +302,8 @@ int td_errno; /* Error returned by last syscall. */ struct vnet *td_vnet; /* (k) Effective vnet. */ const char *td_vnet_lpush; /* (k) Debugging vnet push / pop. */ - struct trapframe *td_intr_frame;/* (k) Frame of the current irq */ + struct trapframe *td_intr_frame;/* (k) Frame of the current irq. */ + struct intr_thread *td_ithread; /* (b) Interrupt thread state. */ }; struct mtx *thread_lock_block(struct thread *); --- //depot/projects/smpng/sys/sys/sched.h 2009-06-25 15:02:49.000000000 0000 +++ //depot/user/jhb/intr/sys/sched.h 2010-05-20 15:10:26.000000000 0000 @@ -121,6 +121,7 @@ * hold a thread on a particular CPU. */ void sched_bind(struct thread *td, int cpu); +void sched_bind_ithd(struct thread *td, int cpu); static __inline void sched_pin(void); void sched_unbind(struct thread *td); static __inline void sched_unpin(void);