Index: conf/files.i386 =================================================================== --- conf/files.i386 (.../stable/7/sys) (revision 189397) +++ conf/files.i386 (.../user/jhb/mce/sys) (revision 189397) @@ -291,6 +291,7 @@ i386/i386/local_apic.c optional apic i386/i386/locore.s standard no-obj i386/i386/longrun.c optional cpu_enable_longrun i386/i386/machdep.c standard +i386/i386/mca.c standard i386/i386/mem.c optional mem i386/i386/minidump_machdep.c standard i386/i386/mp_clock.c optional smp Index: conf/files.amd64 =================================================================== --- conf/files.amd64 (.../stable/7/sys) (revision 189397) +++ conf/files.amd64 (.../user/jhb/mce/sys) (revision 189397) @@ -109,6 +109,7 @@ amd64/amd64/legacy.c standard amd64/amd64/local_apic.c standard amd64/amd64/locore.S standard no-obj amd64/amd64/machdep.c standard +amd64/amd64/mca.c standard amd64/amd64/mem.c optional mem amd64/amd64/minidump_machdep.c standard amd64/amd64/mp_machdep.c optional smp Property changes on: dev/cxgb ___________________________________________________________________ Modified: svn:mergeinfo Merged /stable/7/sys/dev/cxgb:r187413-189391 Property changes on: dev/ath/ath_hal ___________________________________________________________________ Modified: svn:mergeinfo Merged /stable/7/sys/dev/ath/ath_hal:r187413-189391 Index: i386/include/mca.h =================================================================== --- i386/include/mca.h (.../stable/7/sys) (revision 0) +++ i386/include/mca.h (.../user/jhb/mce/sys) (revision 189397) @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __MACHINE_MCA_H__ +#define __MACHINE_MCA_H__ + +struct mca_record { + uint64_t mr_status; + uint64_t mr_addr; + uint64_t mr_misc; + uint64_t mr_tsc; + int mr_apic_id; +}; + +#ifdef _KERNEL + +void mca_init(void); +int mca_intr(void); + +#endif + +#endif /* !__MACHINE_MCA_H__ */ Index: i386/include/specialreg.h =================================================================== --- i386/include/specialreg.h (.../stable/7/sys) (revision 189397) +++ i386/include/specialreg.h (.../user/jhb/mce/sys) (revision 189397) @@ -392,6 +392,34 @@ #define DIR1 0xff /* + * Machine Check register constants. + */ +#define MCG_CAP_COUNT 0x000000ff +#define MCG_CAP_CTL_P 0x00000100 +#define MCG_CAP_EXT_P 0x00000200 +#define MCG_CAP_TES_P 0x00000800 +#define MCG_CAP_EXT_CNT 0x00ff0000 +#define MCG_STATUS_RIPV 0x00000001 +#define MCG_STATUS_EIPV 0x00000002 +#define MCG_STATUS_MCIP 0x00000004 +#define MCG_CTL_ENABLE 0xffffffffffffffffUL +#define MCG_CTL_DISABLE 0x0000000000000000UL +#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4) +#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) +#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) +#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) +#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL +#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL +#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL +#define MC_STATUS_PCC 0x0200000000000000UL +#define MC_STATUS_ADDRV 0x0400000000000000UL +#define MC_STATUS_MISCV 0x0800000000000000UL +#define MC_STATUS_EN 0x1000000000000000UL +#define MC_STATUS_UC 0x2000000000000000UL +#define MC_STATUS_OVER 0x4000000000000000UL +#define MC_STATUS_VAL 0x8000000000000000UL + +/* * The following four 3-byte registers control the non-cacheable regions. * These registers must be written as three separate bytes. * Index: i386/i386/mp_machdep.c =================================================================== --- i386/i386/mp_machdep.c (.../stable/7/sys) (revision 189397) +++ i386/i386/mp_machdep.c (.../user/jhb/mce/sys) (revision 189397) @@ -72,6 +72,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -602,6 +603,8 @@ init_secondary(void) KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); + mca_init(); + mtx_lock_spin(&ap_boot_mtx); /* Init local apic for irq's */ Index: i386/i386/mca.c =================================================================== --- i386/i386/mca.c (.../stable/7/sys) (revision 0) +++ i386/i386/mca.c (.../user/jhb/mce/sys) (revision 189397) @@ -0,0 +1,519 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for x86 machine check architecture. + */ + +#include +__FBSDID("$FreeBSD"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct mca_internal { + struct mca_record rec; + int logged; + STAILQ_ENTRY(mca_internal) link; +}; + +static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); + +SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, 0, "MCA container"); + +static int mca_count; /* Number of records stored. */ + +SYSCTL_INT(_hw_mca, OID_AUTO, count, CTLFLAG_RD, &mca_count, 0, + "Record count"); + +static STAILQ_HEAD(, mca_internal) mca_records; +static struct callout mca_timer; +static int mca_ticks = 3600; /* Check hourly by default. */ +static struct task mca_task; +static struct mtx mca_lock; + +static int +sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = mca_ticks; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + if (value <= 0) + return (EINVAL); + mca_ticks = value; + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, interval, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_MPSAFE, &mca_ticks, 0, sysctl_mca_ticks, "I", + "Periodic interval in seconds to scan for machine checks"); + +static int +sysctl_mca_records(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *)arg1; + u_int namelen = arg2; + struct mca_record record; + struct mca_internal *rec; + int i; + + if (namelen != 1) + return (EINVAL); + + if (name[0] < 0 || name[0] >= mca_count) + return (EINVAL); + + mtx_lock_spin(&mca_lock); + if (name[0] >= mca_count) { + mtx_unlock_spin(&mca_lock); + return (EINVAL); + } + i = 0; + STAILQ_FOREACH(rec, &mca_records, link) { + if (i == name[0]) { + record = rec->rec; + break; + } + i++; + } + mtx_unlock_spin(&mca_lock); + return (SYSCTL_OUT(req, &record, sizeof(record))); +} +SYSCTL_NODE(_hw_mca, OID_AUTO, records, CTLFLAG_RD, sysctl_mca_records, + "Machine check records"); + +static struct mca_record * +mca_record_entry(int bank) +{ + struct mca_internal *rec; + uint64_t status; + u_int p[4]; + + status = rdmsr(MSR_MC_STATUS(bank)); + if (!(status & MC_STATUS_VAL)) + return (NULL); + + rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); + if (rec == NULL) { + printf("MCA: Unable to allocate space for an event.\n"); + return (NULL); + } + + /* Save exception information. */ + rec->rec.mr_status = status; + if (status & MC_STATUS_ADDRV) + rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); + if (status & MC_STATUS_MISCV) + rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); + rec->rec.mr_tsc = rdtsc(); + rec->rec.mr_apic_id = PCPU_GET(apic_id); + + /* + * Clear machine check. Don't do this for uncorrectable + * errors so that the BIOS can see them. + */ + if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { + wrmsr(MSR_MC_STATUS(bank), 0); + do_cpuid(0, p); + } + + mtx_lock_spin(&mca_lock); + STAILQ_INSERT_TAIL(&mca_records, rec, link); + mca_count++; + mtx_unlock_spin(&mca_lock); + return (&rec->rec); +} + +static const char * +mca_error_ttype(uint16_t mca_error) +{ + + switch ((mca_error & 0x000c) >> 2) { + case 0: + return ("I"); + case 1: + return ("D"); + case 2: + return ("G"); + } + return ("?"); +} + +static const char * +mca_error_level(uint16_t mca_error) +{ + + switch (mca_error & 0x0003) { + case 0: + return ("L0"); + case 1: + return ("L1"); + case 2: + return ("L2"); + case 3: + return ("LG"); + } + return ("L?"); +} + +static const char * +mca_error_request(uint16_t mca_error) +{ + + switch ((mca_error & 0x00f0) >> 4) { + case 0x0: + return ("ERR"); + case 0x1: + return ("RD"); + case 0x2: + return ("WR"); + case 0x3: + return ("DRD"); + case 0x4: + return ("DWR"); + case 0x5: + return ("IRD"); + case 0x6: + return ("PREFETCH"); + case 0x7: + return ("EVICT"); + case 0x8: + return ("SNOOP"); + } + return ("???"); +} + +/* Dump details about a single machine check. */ +static void +mca_log(struct mca_record *rec) +{ + uint16_t mca_error; + + printf("MCA: CPU %d ", rec->mr_apic_id); + if (rec->mr_status & MC_STATUS_UC) + printf("UNCOR "); + else + printf("COR "); + if (rec->mr_status & MC_STATUS_PCC) + printf("PCC "); + if (rec->mr_status & MC_STATUS_OVER) + printf("OVER "); + mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; + switch (mca_error) { + /* Simple error codes. */ + case 0x0000: + printf("no error"); + break; + case 0x0001: + printf("unclassified error"); + break; + case 0x0002: + printf("ucode ROM parity error"); + break; + case 0x0003: + printf("external error"); + break; + case 0x0004: + printf("FRC error"); + break; + case 0x0400: + printf("internal timer error"); + break; + default: + if ((mca_error & 0xfc00) == 0x0400) { + printf("internal error %x", mca_error & 0x03ff); + break; + } + + /* Compound error codes. */ + + /* Memory hierarchy error. */ + if ((mca_error & 0xeffc) == 0x000c) { + printf("%s memory error", mca_error_level(mca_error)); + break; + } + + /* TLB error. */ + if ((mca_error & 0xeff0) == 0x0010) { + printf("%sTLB %s error", mca_error_ttype(mca_error), + mca_error_level(mca_error)); + break; + } + + /* Cache error. */ + if ((mca_error & 0xef00) == 0x0100) { + printf("%sCACHE %s %s error", + mca_error_ttype(mca_error), + mca_error_level(mca_error), + mca_error_request(mca_error)); + break; + } + + /* Bus and/or Interconnect error. */ + if ((mca_error & 0xe800) == 0x0800) { + printf("BUS%s ", mca_error_level(mca_error)); + switch ((mca_error & 0x0600) >> 9) { + case 0: + printf("Source"); + break; + case 1: + printf("Responder"); + break; + case 2: + printf("Observer"); + break; + default: + printf("???"); + break; + } + printf(" %s ", mca_error_request(mca_error)); + switch ((mca_error & 0x000c) >> 2) { + case 0: + printf("Memory"); + break; + case 2: + printf("I/O"); + break; + case 3: + printf("Other"); + break; + default: + printf("???"); + break; + } + if (mca_error & 0x0100) + printf(" timed out"); + break; + } + + printf("unknown error %x", mca_error); + break; + } + printf("\n"); + if (rec->mr_status & MC_STATUS_ADDRV) + printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); +} + +/* + * This scans all the machine check banks of the current CPU to see if + * there are any machine checks. Any non-recoverable errors are + * reported immediately via mca_log(). The current thread must be + * pinned when this is called. The 'mcip' parameter indicates if we + * are being called from the MC exception handler. In that case this + * function returns true if the system is restartable. Otherwise, it + * returns a count of the number of valid MC records found. + */ +static int +mca_scan(int mcip) +{ + struct mca_record *rec; + uint64_t mcg_cap, ucmask; + int count, i, recoverable; + + count = 0; + recoverable = 1; + ucmask = MC_STATUS_UC | MC_STATUS_PCC; + + /* When handling a MCE#, treat the OVER flag as non-restartable. */ + if (mcip) + ucmask = MC_STATUS_OVER; + mcg_cap = rdmsr(MSR_MCG_CAP); + for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { + rec = mca_record_entry(i); + if (rec != NULL) { + count++; + if (rec->mr_status & ucmask) { + recoverable = 0; + mca_log(rec); + } + } + } + return (mcip ? recoverable : count); +} + +/* + * Scan the machine check banks on all CPUs by binding to each CPU in + * turn. If any of the CPUs contained new machine check records, log + * them to the console. + */ +static void +mca_scan_cpus(void *context, int pending) +{ + struct mca_internal *mca; + struct thread *td; + int count, cpu; + + td = curthread; + count = 0; + thread_lock(td); + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + sched_bind(td, cpu); + thread_unlock(td); + count += mca_scan(0); + thread_lock(td); + sched_unbind(td); + } + thread_unlock(td); + if (count != 0) { + mtx_lock_spin(&mca_lock); + STAILQ_FOREACH(mca, &mca_records, link) { + if (!mca->logged) { + mca->logged = 1; + mtx_unlock_spin(&mca_lock); + mca_log(&mca->rec); + mtx_lock_spin(&mca_lock); + } + } + mtx_unlock_spin(&mca_lock); + } +} + +static void +mca_periodic_scan(void *arg) +{ + + taskqueue_enqueue(taskqueue_thread, &mca_task); + callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); +} + +static void +mca_start_timer(void *dummy) +{ + + callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); +} +SYSINIT(mca_start_timer, SI_SUB_SMP, SI_ORDER_ANY, mca_start_timer, NULL); + +static int +sysctl_mca_scan(SYSCTL_HANDLER_ARGS) +{ + int error, i; + + i = 0; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error) + return (error); + if (i) + taskqueue_enqueue(taskqueue_thread, &mca_task); + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, force_scan, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_MPSAFE, NULL, 0, sysctl_mca_scan, "I", + "Force an immediate scan for machine checks"); + +/* Must be executed on each CPU. */ +void +mca_init(void) +{ + uint64_t mcg_cap; + int i; + + /* MCE is required. */ + if (!(cpu_feature & CPUID_MCE)) + return; + + if (PCPU_GET(cpuid) == 0) { + mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); + STAILQ_INIT(&mca_records); + TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); + callout_init(&mca_timer, CALLOUT_MPSAFE); + } + + if (cpu_feature & CPUID_MCA) { + sched_pin(); + mcg_cap = rdmsr(MSR_MCG_CAP); + if (mcg_cap & MCG_CAP_CTL_P) + /* Enable MCA features. */ + wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + + for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { + /* + * Enable logging of all errors. For P6 + * processors, MC0_CTL is always enabled. + * + * XXX: Better CPU test needed here? + */ + if (!(i == 0 && (cpu_id & 0xf00) == 0x600)) + wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL); + + /* XXX: Better CPU test needed here. */ + if ((cpu_id & 0xf00) == 0xf00) + mca_record_entry(i); + + /* Clear all errors. */ + wrmsr(MSR_MC_STATUS(i), 0); + } + sched_unpin(); + } + + load_cr4(rcr4() | CR4_MCE); +} + +/* Called when a machine check exception fires. */ +int +mca_intr(void) +{ + uint64_t mcg_status; + int recoverable; + + if (!(cpu_feature & CPUID_MCA)) { + /* + * Just print the values of the old Pentium registers + * and panic. + */ + printf("MC Type: 0x%llx Address: 0x%llx\n", + rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); + return (0); + } + + /* Scan the banks and check for any non-recoverable errors. */ + recoverable = mca_scan(1); + mcg_status = rdmsr(MSR_MCG_STATUS); + if (!(mcg_status & MCG_STATUS_RIPV)) + recoverable = 0; + + /* Clear MCIP. */ + wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); + return (recoverable); +} + Index: i386/i386/machdep.c =================================================================== --- i386/i386/machdep.c (.../stable/7/sys) (revision 189397) +++ i386/i386/machdep.c (.../user/jhb/mce/sys) (revision 189397) @@ -114,6 +114,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -2328,6 +2329,7 @@ init386(first) setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ + mca_init(); /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ Index: i386/i386/trap.c =================================================================== --- i386/i386/trap.c (.../stable/7/sys) (revision 189397) +++ i386/i386/trap.c (.../user/jhb/mce/sys) (revision 189397) @@ -90,6 +90,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #ifdef SMP @@ -239,6 +240,12 @@ trap(struct trapframe *frame) goto out; #endif + if (type == T_MCHK) { + if (!mca_intr()) + trap_fatal(frame, 0); + goto out; + } + #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before Property changes on: contrib/pf ___________________________________________________________________ Modified: svn:mergeinfo Merged /stable/7/sys/contrib/pf:r187413-189391 Index: amd64/include/mca.h =================================================================== --- amd64/include/mca.h (.../stable/7/sys) (revision 0) +++ amd64/include/mca.h (.../user/jhb/mce/sys) (revision 189397) @@ -0,0 +1,48 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef __MACHINE_MCA_H__ +#define __MACHINE_MCA_H__ + +struct mca_record { + uint64_t mr_status; + uint64_t mr_addr; + uint64_t mr_misc; + uint64_t mr_tsc; + int mr_apic_id; +}; + +#ifdef _KERNEL + +void mca_init(void); +int mca_intr(void); + +#endif + +#endif /* !__MACHINE_MCA_H__ */ Index: amd64/include/specialreg.h =================================================================== --- amd64/include/specialreg.h (.../stable/7/sys) (revision 189397) +++ amd64/include/specialreg.h (.../user/jhb/mce/sys) (revision 189397) @@ -325,6 +325,34 @@ #define DIR1 0xff /* + * Machine Check register constants. + */ +#define MCG_CAP_COUNT 0x000000ff +#define MCG_CAP_CTL_P 0x00000100 +#define MCG_CAP_EXT_P 0x00000200 +#define MCG_CAP_TES_P 0x00000800 +#define MCG_CAP_EXT_CNT 0x00ff0000 +#define MCG_STATUS_RIPV 0x00000001 +#define MCG_STATUS_EIPV 0x00000002 +#define MCG_STATUS_MCIP 0x00000004 +#define MCG_CTL_ENABLE 0xffffffffffffffffUL +#define MCG_CTL_DISABLE 0x0000000000000000UL +#define MSR_MC_CTL(x) (MSR_MC0_CTL + (x) * 4) +#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4) +#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4) +#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4) +#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL +#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL +#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL +#define MC_STATUS_PCC 0x0200000000000000UL +#define MC_STATUS_ADDRV 0x0400000000000000UL +#define MC_STATUS_MISCV 0x0800000000000000UL +#define MC_STATUS_EN 0x1000000000000000UL +#define MC_STATUS_UC 0x2000000000000000UL +#define MC_STATUS_OVER 0x4000000000000000UL +#define MC_STATUS_VAL 0x8000000000000000UL + +/* * The following four 3-byte registers control the non-cacheable regions. * These registers must be written as three separate bytes. * Index: amd64/amd64/mp_machdep.c =================================================================== --- amd64/amd64/mp_machdep.c (.../stable/7/sys) (revision 189397) +++ amd64/amd64/mp_machdep.c (.../user/jhb/mce/sys) (revision 189397) @@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -552,6 +553,8 @@ init_secondary(void) KASSERT(PCPU_GET(idlethread) != NULL, ("no idle thread")); PCPU_SET(curthread, PCPU_GET(idlethread)); + mca_init(); + mtx_lock_spin(&ap_boot_mtx); /* Init local apic for irq's */ Index: amd64/amd64/mca.c =================================================================== --- amd64/amd64/mca.c (.../stable/7/sys) (revision 0) +++ amd64/amd64/mca.c (.../user/jhb/mce/sys) (revision 189397) @@ -0,0 +1,519 @@ +/*- + * Copyright (c) 2009 Advanced Computing Technologies LLC + * Written by: John H. Baldwin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for x86 machine check architecture. + */ + +#include +__FBSDID("$FreeBSD"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct mca_internal { + struct mca_record rec; + int logged; + STAILQ_ENTRY(mca_internal) link; +}; + +static MALLOC_DEFINE(M_MCA, "MCA", "Machine Check Architecture"); + +SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RW, 0, "MCA container"); + +static int mca_count; /* Number of records stored. */ + +SYSCTL_INT(_hw_mca, OID_AUTO, count, CTLFLAG_RD, &mca_count, 0, + "Record count"); + +static STAILQ_HEAD(, mca_internal) mca_records; +static struct callout mca_timer; +static int mca_ticks = 3600; /* Check hourly by default. */ +static struct task mca_task; +static struct mtx mca_lock; + +static int +sysctl_mca_ticks(SYSCTL_HANDLER_ARGS) +{ + int error, value; + + value = mca_ticks; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error || req->newptr == NULL) + return (error); + if (value <= 0) + return (EINVAL); + mca_ticks = value; + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, interval, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_MPSAFE, &mca_ticks, 0, sysctl_mca_ticks, "I", + "Periodic interval in seconds to scan for machine checks"); + +static int +sysctl_mca_records(SYSCTL_HANDLER_ARGS) +{ + int *name = (int *)arg1; + u_int namelen = arg2; + struct mca_record record; + struct mca_internal *rec; + int i; + + if (namelen != 1) + return (EINVAL); + + if (name[0] < 0 || name[0] >= mca_count) + return (EINVAL); + + mtx_lock_spin(&mca_lock); + if (name[0] >= mca_count) { + mtx_unlock_spin(&mca_lock); + return (EINVAL); + } + i = 0; + STAILQ_FOREACH(rec, &mca_records, link) { + if (i == name[0]) { + record = rec->rec; + break; + } + i++; + } + mtx_unlock_spin(&mca_lock); + return (SYSCTL_OUT(req, &record, sizeof(record))); +} +SYSCTL_NODE(_hw_mca, OID_AUTO, records, CTLFLAG_RD, sysctl_mca_records, + "Machine check records"); + +static struct mca_record * +mca_record_entry(int bank) +{ + struct mca_internal *rec; + uint64_t status; + u_int p[4]; + + status = rdmsr(MSR_MC_STATUS(bank)); + if (!(status & MC_STATUS_VAL)) + return (NULL); + + rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO); + if (rec == NULL) { + printf("MCA: Unable to allocate space for an event.\n"); + return (NULL); + } + + /* Save exception information. */ + rec->rec.mr_status = status; + if (status & MC_STATUS_ADDRV) + rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank)); + if (status & MC_STATUS_MISCV) + rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank)); + rec->rec.mr_tsc = rdtsc(); + rec->rec.mr_apic_id = PCPU_GET(apic_id); + + /* + * Clear machine check. Don't do this for uncorrectable + * errors so that the BIOS can see them. + */ + if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) { + wrmsr(MSR_MC_STATUS(bank), 0); + do_cpuid(0, p); + } + + mtx_lock_spin(&mca_lock); + STAILQ_INSERT_TAIL(&mca_records, rec, link); + mca_count++; + mtx_unlock_spin(&mca_lock); + return (&rec->rec); +} + +static const char * +mca_error_ttype(uint16_t mca_error) +{ + + switch ((mca_error & 0x000c) >> 2) { + case 0: + return ("I"); + case 1: + return ("D"); + case 2: + return ("G"); + } + return ("?"); +} + +static const char * +mca_error_level(uint16_t mca_error) +{ + + switch (mca_error & 0x0003) { + case 0: + return ("L0"); + case 1: + return ("L1"); + case 2: + return ("L2"); + case 3: + return ("LG"); + } + return ("L?"); +} + +static const char * +mca_error_request(uint16_t mca_error) +{ + + switch ((mca_error & 0x00f0) >> 4) { + case 0x0: + return ("ERR"); + case 0x1: + return ("RD"); + case 0x2: + return ("WR"); + case 0x3: + return ("DRD"); + case 0x4: + return ("DWR"); + case 0x5: + return ("IRD"); + case 0x6: + return ("PREFETCH"); + case 0x7: + return ("EVICT"); + case 0x8: + return ("SNOOP"); + } + return ("???"); +} + +/* Dump details about a single machine check. */ +static void +mca_log(struct mca_record *rec) +{ + uint16_t mca_error; + + printf("MCA: CPU %d ", rec->mr_apic_id); + if (rec->mr_status & MC_STATUS_UC) + printf("UNCOR "); + else + printf("COR "); + if (rec->mr_status & MC_STATUS_PCC) + printf("PCC "); + if (rec->mr_status & MC_STATUS_OVER) + printf("OVER "); + mca_error = rec->mr_status & MC_STATUS_MCA_ERROR; + switch (mca_error) { + /* Simple error codes. */ + case 0x0000: + printf("no error"); + break; + case 0x0001: + printf("unclassified error"); + break; + case 0x0002: + printf("ucode ROM parity error"); + break; + case 0x0003: + printf("external error"); + break; + case 0x0004: + printf("FRC error"); + break; + case 0x0400: + printf("internal timer error"); + break; + default: + if ((mca_error & 0xfc00) == 0x0400) { + printf("internal error %x", mca_error & 0x03ff); + break; + } + + /* Compound error codes. */ + + /* Memory hierarchy error. */ + if ((mca_error & 0xeffc) == 0x000c) { + printf("%s memory error", mca_error_level(mca_error)); + break; + } + + /* TLB error. */ + if ((mca_error & 0xeff0) == 0x0010) { + printf("%sTLB %s error", mca_error_ttype(mca_error), + mca_error_level(mca_error)); + break; + } + + /* Cache error. */ + if ((mca_error & 0xef00) == 0x0100) { + printf("%sCACHE %s %s error", + mca_error_ttype(mca_error), + mca_error_level(mca_error), + mca_error_request(mca_error)); + break; + } + + /* Bus and/or Interconnect error. */ + if ((mca_error & 0xe800) == 0x0800) { + printf("BUS%s ", mca_error_level(mca_error)); + switch ((mca_error & 0x0600) >> 9) { + case 0: + printf("Source"); + break; + case 1: + printf("Responder"); + break; + case 2: + printf("Observer"); + break; + default: + printf("???"); + break; + } + printf(" %s ", mca_error_request(mca_error)); + switch ((mca_error & 0x000c) >> 2) { + case 0: + printf("Memory"); + break; + case 2: + printf("I/O"); + break; + case 3: + printf("Other"); + break; + default: + printf("???"); + break; + } + if (mca_error & 0x0100) + printf(" timed out"); + break; + } + + printf("unknown error %x", mca_error); + break; + } + printf("\n"); + if (rec->mr_status & MC_STATUS_ADDRV) + printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr); +} + +/* + * This scans all the machine check banks of the current CPU to see if + * there are any machine checks. Any non-recoverable errors are + * reported immediately via mca_log(). The current thread must be + * pinned when this is called. The 'mcip' parameter indicates if we + * are being called from the MC exception handler. In that case this + * function returns true if the system is restartable. Otherwise, it + * returns a count of the number of valid MC records found. + */ +static int +mca_scan(int mcip) +{ + struct mca_record *rec; + uint64_t mcg_cap, ucmask; + int count, i, recoverable; + + count = 0; + recoverable = 1; + ucmask = MC_STATUS_UC | MC_STATUS_PCC; + + /* When handling a MCE#, treat the OVER flag as non-restartable. */ + if (mcip) + ucmask = MC_STATUS_OVER; + mcg_cap = rdmsr(MSR_MCG_CAP); + for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { + rec = mca_record_entry(i); + if (rec != NULL) { + count++; + if (rec->mr_status & ucmask) { + recoverable = 0; + mca_log(rec); + } + } + } + return (mcip ? recoverable : count); +} + +/* + * Scan the machine check banks on all CPUs by binding to each CPU in + * turn. If any of the CPUs contained new machine check records, log + * them to the console. + */ +static void +mca_scan_cpus(void *context, int pending) +{ + struct mca_internal *mca; + struct thread *td; + int count, cpu; + + td = curthread; + count = 0; + thread_lock(td); + for (cpu = 0; cpu <= mp_maxid; cpu++) { + if (CPU_ABSENT(cpu)) + continue; + sched_bind(td, cpu); + thread_unlock(td); + count += mca_scan(0); + thread_lock(td); + sched_unbind(td); + } + thread_unlock(td); + if (count != 0) { + mtx_lock_spin(&mca_lock); + STAILQ_FOREACH(mca, &mca_records, link) { + if (!mca->logged) { + mca->logged = 1; + mtx_unlock_spin(&mca_lock); + mca_log(&mca->rec); + mtx_lock_spin(&mca_lock); + } + } + mtx_unlock_spin(&mca_lock); + } +} + +static void +mca_periodic_scan(void *arg) +{ + + taskqueue_enqueue(taskqueue_thread, &mca_task); + callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); +} + +static void +mca_start_timer(void *dummy) +{ + + callout_reset(&mca_timer, mca_ticks * hz, mca_periodic_scan, NULL); +} +SYSINIT(mca_start_timer, SI_SUB_SMP, SI_ORDER_ANY, mca_start_timer, NULL); + +static int +sysctl_mca_scan(SYSCTL_HANDLER_ARGS) +{ + int error, i; + + i = 0; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error) + return (error); + if (i) + taskqueue_enqueue(taskqueue_thread, &mca_task); + return (0); +} +SYSCTL_PROC(_hw_mca, OID_AUTO, force_scan, CTLTYPE_INT | CTLFLAG_RW | + CTLFLAG_MPSAFE, NULL, 0, sysctl_mca_scan, "I", + "Force an immediate scan for machine checks"); + +/* Must be executed on each CPU. */ +void +mca_init(void) +{ + uint64_t mcg_cap; + int i; + + /* MCE is required. */ + if (!(cpu_feature & CPUID_MCE)) + return; + + if (PCPU_GET(cpuid) == 0) { + mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); + STAILQ_INIT(&mca_records); + TASK_INIT(&mca_task, 0x8000, mca_scan_cpus, NULL); + callout_init(&mca_timer, CALLOUT_MPSAFE); + } + + if (cpu_feature & CPUID_MCA) { + sched_pin(); + mcg_cap = rdmsr(MSR_MCG_CAP); + if (mcg_cap & MCG_CAP_CTL_P) + /* Enable MCA features. */ + wrmsr(MSR_MCG_CTL, MCG_CTL_ENABLE); + + for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) { + /* + * Enable logging of all errors. For P6 + * processors, MC0_CTL is always enabled. + * + * XXX: Better CPU test needed here? + */ + if (!(i == 0 && (cpu_id & 0xf00) == 0x600)) + wrmsr(MSR_MC_CTL(i), 0xffffffffffffffffUL); + + /* XXX: Better CPU test needed here. */ + if ((cpu_id & 0xf00) == 0xf00) + mca_record_entry(i); + + /* Clear all errors. */ + wrmsr(MSR_MC_STATUS(i), 0); + } + sched_unpin(); + } + + load_cr4(rcr4() | CR4_MCE); +} + +/* Called when a machine check exception fires. */ +int +mca_intr(void) +{ + uint64_t mcg_status; + int recoverable; + + if (!(cpu_feature & CPUID_MCA)) { + /* + * Just print the values of the old Pentium registers + * and panic. + */ + printf("MC Type: 0x%lx Address: 0x%lx\n", + rdmsr(MSR_P5_MC_TYPE), rdmsr(MSR_P5_MC_ADDR)); + return (0); + } + + /* Scan the banks and check for any non-recoverable errors. */ + recoverable = mca_scan(1); + mcg_status = rdmsr(MSR_MCG_STATUS); + if (!(mcg_status & MCG_STATUS_RIPV)) + recoverable = 0; + + /* Clear MCIP. */ + wrmsr(MSR_MCG_STATUS, mcg_status & ~MCG_STATUS_MCIP); + return (recoverable); +} + Index: amd64/amd64/machdep.c =================================================================== --- amd64/amd64/machdep.c (.../stable/7/sys) (revision 189397) +++ amd64/amd64/machdep.c (.../user/jhb/mce/sys) (revision 189397) @@ -109,6 +109,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1289,6 +1290,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree) identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ + mca_init(); /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = thread0.td_kstack + \ Index: amd64/amd64/trap.c =================================================================== --- amd64/amd64/trap.c (.../stable/7/sys) (revision 189397) +++ amd64/amd64/trap.c (.../user/jhb/mce/sys) (revision 189397) @@ -88,6 +88,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #ifdef SMP @@ -220,6 +221,12 @@ trap(struct trapframe *frame) goto out; #endif + if (type == T_MCHK) { + if (!mca_intr()) + trap_fatal(frame, 0); + goto out; + } + #ifdef KDTRACE_HOOKS /* * A trap can occur while DTrace executes a probe. Before Property changes on: . ___________________________________________________________________ Modified: svn:mergeinfo Merged /stable/7/sys:r187413-189391