From 0d6f8549b01d05194f3d22034a11f45b77f2cf93 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Mon, 18 Mar 2024 18:11:16 +0000 Subject: [PATCH 1/3] acpi(4): New iattr `apeibus' for attaching an APEI driver. APEI is the ACPI Platform Error Interface, a standard (if very complicated) interface for reporting hardware errors to the OS. Firmware support for APEI is presented through the ACPI tables BERT (Boot Error Record Table), ERST (Error Record Serialization Table), EINJ (Error Injection Table), and HEST (Hardware Error Source Table), rather than through nodes in the ACPI device tree, so it can't just attach through the existing acpinodebus iattr and instead requires a special pseudo-bus like acpiwdrt(4). No driver yet -- this is just the hook to attach one in a module. The new member sc_apei of struct acpi_softc is placed at the end of the structure so that this change can be safely pulled up to release branches without risk to ABI compatibility in existing modules such as acpiverbose.kmod which may rely on the layout (but not size) of struct acpi_softc. PR kern/58046 --- sys/dev/acpi/acpi.c | 8 ++++++++ sys/dev/acpi/acpivar.h | 7 +++++++ sys/dev/acpi/files.acpi | 1 + 3 files changed, 16 insertions(+) diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c index 4ade9c47efa3..24e023373e0b 100644 --- a/sys/dev/acpi/acpi.c +++ b/sys/dev/acpi/acpi.c @@ -636,6 +636,9 @@ acpi_childdet(device_t self, device_t child) if (sc->sc_wdrt == child) sc->sc_wdrt = NULL; + if (sc->sc_apei == child) + sc->sc_apei = NULL; + SIMPLEQ_FOREACH(ad, &sc->sc_head, ad_list) { if (ad->ad_device == child) @@ -923,6 +926,11 @@ acpi_rescan(device_t self, const char *ifattr, const int *locators) CFARGS(.iattr = "acpiwdrtbus")); } + if (ifattr_match(ifattr, "apeibus") && sc->sc_apei == NULL) { + sc->sc_apei = config_found(sc->sc_dev, NULL, NULL, + CFARGS(.iattr = "apeibus")); + } + return 0; } diff --git a/sys/dev/acpi/acpivar.h b/sys/dev/acpi/acpivar.h index 9dafef7f7ac5..ddce8cdd9c18 100644 --- a/sys/dev/acpi/acpivar.h +++ b/sys/dev/acpi/acpivar.h @@ -177,6 +177,13 @@ struct acpi_softc { struct sysmon_pswitch sc_smpsw_sleep; SIMPLEQ_HEAD(, acpi_devnode) sc_head; + + /* + * Move this section to the other pseudo-bus child pointers + * after pullup -- putting it here avoids potential ABI + * compatibility issues with kernel modules. + */ + device_t sc_apei; /* apei(4) pseudo-bus */ }; /* diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index ead9bab1e9bd..383b347f38d8 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -14,6 +14,7 @@ define acpiwdrtbus { } define acpisdtbus { } define acpigtdtbus { } define acpimadtbus { } +define apeibus { } device acpi: acpica, acpiapmbus, acpinodebus, acpiecdtbus, acpisdtbus, acpigtdtbus, acpimadtbus, acpihpetbus, acpiwdrtbus, sysmon_power, sysmon_taskq attach acpi at acpibus From 83d42541124ba98572b5a3f52d709ce05d0fcef2 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Sun, 17 Mar 2024 10:34:13 +0000 Subject: [PATCH 2/3] apei(4): New driver for ACPI Platform Error Interfaces. PR kern/58046 --- share/man/man4/apei.4 | 91 + sys/dev/acpi/apei.c | 3070 ++++++++++++++++++++++++++++++++++ sys/dev/acpi/apei_hed.h | 34 + sys/dev/acpi/apei_interp.c | 326 ++++ sys/dev/acpi/apei_interp.h | 55 + sys/dev/acpi/cper.h | 234 +++ sys/dev/acpi/files.acpi | 6 + sys/modules/Makefile | 7 + sys/modules/apei/Makefile | 13 + sys/modules/apei/apei.ioconf | 11 + 10 files changed, 3847 insertions(+) create mode 100644 share/man/man4/apei.4 create mode 100644 sys/dev/acpi/apei.c create mode 100644 sys/dev/acpi/apei_hed.h create mode 100644 sys/dev/acpi/apei_interp.c create mode 100644 sys/dev/acpi/apei_interp.h create mode 100644 sys/dev/acpi/cper.h create mode 100644 sys/modules/apei/Makefile create mode 100644 sys/modules/apei/apei.ioconf diff --git a/share/man/man4/apei.4 b/share/man/man4/apei.4 new file mode 100644 index 000000000000..975a6bcb8a7c --- /dev/null +++ b/share/man/man4/apei.4 @@ -0,0 +1,91 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm apei +.Nd ACPI Platform Error Interfaces +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "apei* at apeibus?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +.Nm +reports hardware errors discovered through +.Tn APEI , +the +.Tn ACPI +Platform Error Interfaces. +.Pp +.Nm +also supports injecting errors, and reading/writing/clearing error +records in a persistent firmware store. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DIAGNOSTICS +When the hardware detects an error and reports it to +.Nm , +it will print information about the error to the console: +.Bd -literal +apei0: hardware error source 1 reported error: severity=corrected status=0x12 nentries=1 +apei0: hardware error source 1 entry 0: severity=corrected +apei0: hardware error source 1 entry 0: type={0xa5bc1114,0x6f64,0x4ede,0xb8b8,{0x3e,0x83,0xed,0x7c,0x83,0xb1}} (memory error) +apei0: hardware error source 1 entry 0: memory_error_type=8 (PARITY_ERROR) +.Ed +.Pp +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh BUGS +No sysctl interface to read BERT. +.Pp +No simple sysctl interface to inject errors with EINJ. +.Pp +Nothing reads, writes, or clears ERST. +.Pp +Many hardware error source types in the HEST are missing. +.Pp +No formal log format or sysctl/device interface that programs can +reliably act on. diff --git a/sys/dev/acpi/apei.c b/sys/dev/acpi/apei.c new file mode 100644 index 000000000000..f23dc9d43ddc --- /dev/null +++ b/sys/dev/acpi/apei.c @@ -0,0 +1,3070 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * APEI: ACPI Platform Error Interface + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html + * + * XXX map instruction registers in advance so this is safe in nasty + * contexts? + * + * XXX dtrace probes + * + * XXX uncorrectable error NMI comes in on all CPUs at once, what to do? + * + * XXX AMD MCA + * + * XXX IA32 machine check stuff + * + * XXX switch-to-polling for GHES notifications + * + * XXX error threshold for GHES notifications + * + * XXX sort out interrupt notification types, e.g. do we ever need to + * do acpi_intr_establish? + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#if defined(__i386__) || defined(__x86_64__) +#include +#endif + +#include "cper.h" /* XXX find a proper home for this */ + +#include "ioconf.h" + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("apei") + +/* + * struct apei_tab + * + * Collection of pointers to APEI-related ACPI tables. Used + * inside struct apei_softc, and by apei_match without an + * apei_softc. + */ +struct apei_tab { + ACPI_TABLE_BERT *bert; /* Boot Error Record Table */ + ACPI_TABLE_EINJ *einj; /* Error Injection Table */ + ACPI_TABLE_ERST *erst; /* Error Record Serialization Table */ + ACPI_TABLE_HEST *hest; /* Hardware Error Source Table */ +}; + +/* + * struct apei_bert_softc + * + * Software state for access to the BERT, Boot Error Record Table. + */ +struct apei_bert_softc { + ACPI_HEST_GENERIC_STATUS *bsc_gesb; +}; + +/* + * struct apei_erst_softc + * + * Software state for error serialization actions described in the + * ERST, Error Record Serialization Table. + */ +struct apei_erst_softc { + struct apei_interp *ssc_interp; +}; + +/* + * struct apei_einj_softc + * + * Software state for error injection actions described in the + * EINJ, Error Injection Table. + */ +struct apei_einj_softc { + struct apei_interp *jsc_interp; +}; + +/* + * struct apei_source + * + * Software state for a hardware error source from the HEST, + * Hardware Error Source Table, to process error notifications. + */ +struct apei_source { + struct apei_softc *as_sc; + ACPI_HEST_HEADER *as_header; + union { + struct { + ACPI_HEST_GENERIC_STATUS *gesb; + } as_ghes; + }; + union { + struct callout as_ch; +#if defined(__i386__) || defined(__x86_64__) + struct nmi_handler *as_nmi; +#endif + SIMPLEQ_ENTRY(apei_source) as_entry; + }; +}; + +/* + * struct apei_hest_softc + * + * Software state for processing hardware error reports during + * operation, from the HEST, Hardware Error Source table. + */ +struct apei_hest_softc { + struct apei_source *hsc_source; + SIMPLEQ_HEAD(, apei_source) hsc_hed_list; +}; + +/* + * struct apei_softc + * + * All software state for APEI. + */ +struct apei_softc { + device_t sc_dev; + struct apei_tab sc_tab; + + struct sysctllog *sc_sysctllog; + const struct sysctlnode *sc_sysctlroot; + + struct apei_bert_softc sc_bert; + struct apei_einj_softc sc_einj; + struct apei_erst_softc sc_erst; + struct apei_hest_softc sc_hest; +}; + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ + +static const char *const cper_memory_error_type[] = { +#define F(LN, SN, V) [LN] = #SN, + CPER_MEMORY_ERROR_TYPES(F) +#undef F +}; + +/* + * Symbolic names of the APEI EINJ (Error Injection) logical actions + * are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-injection-actions + */ +static const char *const apei_einj_action[] = { + [ACPI_EINJ_BEGIN_OPERATION] = "begin_injection_operation", + [ACPI_EINJ_GET_TRIGGER_TABLE] = "get_trigger_error_action_table", + [ACPI_EINJ_SET_ERROR_TYPE] = "set_error_type", + [ACPI_EINJ_GET_ERROR_TYPE] = "get_error_type", + [ACPI_EINJ_END_OPERATION] = "end_operation", + [ACPI_EINJ_EXECUTE_OPERATION] = "execute_operation", + [ACPI_EINJ_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_EINJ_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS] = "set_error_type_with_address", + [ACPI_EINJ_GET_EXECUTE_TIMINGS] = "get_execute_operation_timings", +}; + +/* + * Symbolic names of the APEI EINJ (Error Injection) instructions to + * implement logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#injection-instructions-table + */ + +static const char *const apei_einj_instruction[] = { + [ACPI_EINJ_READ_REGISTER] = "read_register", + [ACPI_EINJ_READ_REGISTER_VALUE] = "read_register", + [ACPI_EINJ_WRITE_REGISTER] = "write_register", + [ACPI_EINJ_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_EINJ_NOOP] = "noop", +}; + +/* + * Symbolic names of the APEI ERST (Error Record Serialization Table) + * logical actions are taken (and downcased) from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-record-serialization-actions-table + */ + +static const char *const apei_erst_action[] = { + [ACPI_ERST_BEGIN_WRITE] = "begin_write_operation", + [ACPI_ERST_BEGIN_READ] = "begin_read_operation", + [ACPI_ERST_BEGIN_CLEAR] = "begin_clear_operation", + [ACPI_ERST_END] = "end_operation", + [ACPI_ERST_SET_RECORD_OFFSET] = "set_record_offset", + [ACPI_ERST_EXECUTE_OPERATION] = "execute_operation", + [ACPI_ERST_CHECK_BUSY_STATUS] = "check_busy_status", + [ACPI_ERST_GET_COMMAND_STATUS] = "get_command_status", + [ACPI_ERST_GET_RECORD_ID] = "get_record_identifier", + [ACPI_ERST_SET_RECORD_ID] = "set_record_identifier", + [ACPI_ERST_GET_RECORD_COUNT] = "get_record_count", + [ACPI_ERST_BEGIN_DUMMY_WRIITE] = "begin_dummy_write_operation", + [ACPI_ERST_NOT_USED] = "reserved", + [ACPI_ERST_GET_ERROR_RANGE] = "get_error_log_address_range", + [ACPI_ERST_GET_ERROR_LENGTH] = "get_error_log_address_range_length", + [ACPI_ERST_GET_ERROR_ATTRIBUTES] = + "get_error_log_address_range_attributes", + [ACPI_ERST_EXECUTE_TIMINGS] = "get_execute_operations_timings", +}; + +/* + * Symbolic names of the APEI ERST (Error Record Serialization Table) + * instructions to implement logical actions are taken (and downcased) + * from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + */ + +static const char *apei_erst_instruction[] = { + [ACPI_ERST_READ_REGISTER] = "read_register", + [ACPI_ERST_READ_REGISTER_VALUE] = "read_register_value", + [ACPI_ERST_WRITE_REGISTER] = "write_register", + [ACPI_ERST_WRITE_REGISTER_VALUE] = "write_register_value", + [ACPI_ERST_NOOP] = "noop", + [ACPI_ERST_LOAD_VAR1] = "load_var1", + [ACPI_ERST_LOAD_VAR2] = "load_var2", + [ACPI_ERST_STORE_VAR1] = "store_var1", + [ACPI_ERST_ADD] = "add", + [ACPI_ERST_SUBTRACT] = "subtract", + [ACPI_ERST_ADD_VALUE] = "add_value", + [ACPI_ERST_SUBTRACT_VALUE] = "subtract_value", + [ACPI_ERST_STALL] = "stall", + [ACPI_ERST_STALL_WHILE_TRUE] = "stall_while_true", + [ACPI_ERST_SKIP_NEXT_IF_TRUE] = "skip_next_instruction_if_true", + [ACPI_ERST_GOTO] = "goto", + [ACPI_ERST_SET_SRC_ADDRESS_BASE] = "set_src_address_base", + [ACPI_ERST_SET_DST_ADDRESS_BASE] = "set_dst_address_base", + [ACPI_ERST_MOVE_DATA] = "move_data", +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-status-block + * + * The acpica names ACPI_HEST_GEN_ERROR_* appear to coincide with this + * but are designated as being intended for Generic Error Data Entries + * rather than Generic Error Status Blocks. + */ +static const char *const apei_gesb_severity[] = { + [0] = "recoverable", + [1] = "fatal", + [2] = "corrected", + [3] = "none", +}; + +/* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#generic-error-data-entry + */ +static const char *const apei_gede_severity[] = { + [ACPI_HEST_GEN_ERROR_RECOVERABLE] = "recoverable", + [ACPI_HEST_GEN_ERROR_FATAL] = "fatal", + [ACPI_HEST_GEN_ERROR_CORRECTED] = "corrected", + [ACPI_HEST_GEN_ERROR_NONE] = "none", +}; + +/* + * Symbolic names of the APEI Hardware Error Notification Structure + * types are taken from: + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#hardware-error-notification-structure + */ + +static const char *const apei_hens_type[] = { + [ACPI_HEST_NOTIFY_POLLED] = "Polled", + [ACPI_HEST_NOTIFY_EXTERNAL] = "External Interrupt", + [ACPI_HEST_NOTIFY_LOCAL] = "Local Interrupt", + [ACPI_HEST_NOTIFY_SCI] = "SCI", + [ACPI_HEST_NOTIFY_NMI] = "NMI", + [ACPI_HEST_NOTIFY_CMCI] = "CMCI", + [ACPI_HEST_NOTIFY_MCE] = "MCE", + [ACPI_HEST_NOTIFY_GPIO] = "GPIO-Signal", + [ACPI_HEST_NOTIFY_SEA] = "ARMv8 SEA", + [ACPI_HEST_NOTIFY_SEI] = "ARMv8 SEI", + [ACPI_HEST_NOTIFY_GSIV] = "External Interrupt - GSIV", + [ACPI_HEST_NOTIFY_SOFTWARE_DELEGATED] = "Software Delegated Exception", +}; + +/* + * XXX rename these, remove the debug goo, and dtrace them + */ + +static ACPI_STATUS +FakeAcpiRead(uint64_t *p, ACPI_GENERIC_ADDRESS *reg) +{ + + aprint_debug("AcpiRead" + "[SpaceId=%"PRIu8" BitWidth=%"PRIu8" BitOffset=%"PRIu8 + " AccessWidth=%"PRIu8" Address=0x%"PRIx64"]\n", + reg->SpaceId, reg->BitWidth, reg->BitOffset, + reg->AccessWidth, reg->Address); +#if 0 + *p = 12345; + return AE_OK; +#else + return AcpiRead(p, reg); +#endif +} + +static ACPI_STATUS +FakeAcpiWrite(uint64_t v, ACPI_GENERIC_ADDRESS *reg) +{ + + aprint_debug("AcpiWrite" + "[SpaceId=%"PRIu8" BitWidth=%"PRIu8" BitOffset=%"PRIu8 + " AccessWidth=%"PRIu8" Address=0x%"PRIx64"] := 0x%"PRIx64"\n", + reg->SpaceId, reg->BitWidth, reg->BitOffset, + reg->AccessWidth, reg->Address, v); +#if 0 + return AE_OK; +#else + return AcpiWrite(v, reg); +#endif +} + +static void +FakeAcpiMove(uint64_t pdst, uint64_t psrc, uint64_t nbytes) +{ + + aprint_debug("AcpiMove" + " %"PRIu64" bytes from 0x%"PRIx64" to 0x%"PRIx64"\n", + nbytes, psrc, pdst); + +#if 0 +#else + char *vdst, *vsrc; /* XXX iomem */ + + if (pdst < psrc && psrc < pdst + nbytes) { + vdst = AcpiOsMapMemory(pdst, nbytes + (psrc - pdst)); + vsrc = vdst + (psrc - pdst); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes + (psrc - pdst)); + } else if (psrc < pdst && pdst < psrc + nbytes) { + vsrc = AcpiOsMapMemory(psrc, nbytes + (pdst - psrc)); + vdst = vsrc + (pdst - psrc); + memmove(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes + (pdst - psrc)); + } else { + vdst = AcpiOsMapMemory(pdst, nbytes); + vsrc = AcpiOsMapMemory(psrc, nbytes); + memcpy(vdst, vsrc, nbytes); + AcpiOsUnmapMemory(vsrc, nbytes); + AcpiOsUnmapMemory(vdst, nbytes); + } +#endif +} + +/* + * apei_read_register(Register, Mask, &X) + * + * Read from Register, shifted out of position and then masked + * with Mask, and store the result in X. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#read-register + * + * (I'm guessing this applies to both ERST and EINJ, even though + * that section is under the ERST part.) + */ +static ACPI_STATUS +apei_read_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, uint64_t *p) +{ + const uint8_t BitOffset = Register->BitOffset; + uint64_t X; + ACPI_STATUS rv; + + rv = FakeAcpiRead(&X, Register); + if (ACPI_FAILURE(rv)) { + *p = 0; /* XXX */ + return rv; + } + X >>= BitOffset; + X &= Mask; + + *p = X; + return AE_OK; +} + +/* + * apei_write_register(Register, Mask, preserve_register, X) + * + * Write X, masked with Mask and shifted into position, to + * Register, preserving other bits if preserve_register is true. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#write-register + * + * Note: The Preserve Register semantics is based on the clearer + * indentation at + * https://uefi.org/sites/default/files/resources/ACPI_5_1release.pdf#page=714 + * which has been lost in more recent versions of the spec. + */ +static ACPI_STATUS +apei_write_register(ACPI_GENERIC_ADDRESS *Register, uint64_t Mask, + bool preserve_register, uint64_t X) +{ + const uint8_t BitOffset = Register->BitOffset; + ACPI_STATUS rv; + + X &= Mask; + X <<= BitOffset; + if (preserve_register) { + uint64_t Y; + + rv = FakeAcpiRead(&Y, Register); + if (ACPI_FAILURE(rv)) + return rv; + Y &= ~(Mask << BitOffset); + X |= Y; + } + return FakeAcpiWrite(X, Register); +} + +/* + * apei_get_tables(tab) + * + * Get references to whichever APEI-related tables -- BERT, EINJ, + * ERST, HEST -- are available in the system. + */ +static void +apei_get_tables(struct apei_tab *tab) +{ + ACPI_STATUS rv; + + /* + * Probe the BERT -- Boot Error Record Table. + */ + rv = AcpiGetTable(ACPI_SIG_BERT, 0, (ACPI_TABLE_HEADER **)&tab->bert); + if (ACPI_FAILURE(rv)) + tab->bert = NULL; + + /* + * Probe the EINJ -- Error Injection Table. + */ + rv = AcpiGetTable(ACPI_SIG_EINJ, 0, (ACPI_TABLE_HEADER **)&tab->einj); + if (ACPI_FAILURE(rv)) + tab->einj = NULL; + + /* + * Probe the ERST -- Error Record Serialization Table. + */ + rv = AcpiGetTable(ACPI_SIG_ERST, 0, (ACPI_TABLE_HEADER **)&tab->erst); + if (ACPI_FAILURE(rv)) + tab->erst = NULL; + + /* + * Probe the HEST -- Hardware Error Source Table. + */ + rv = AcpiGetTable(ACPI_SIG_HEST, 0, (ACPI_TABLE_HEADER **)&tab->hest); + if (ACPI_FAILURE(rv)) + tab->hest = NULL; +} + +/* + * apei_put_tables(tab) + * + * Release the tables acquired by apei_get_tables. + */ +static void +apei_put_tables(struct apei_tab *tab) +{ + + if (tab->bert != NULL) { + AcpiPutTable(&tab->bert->Header); + tab->bert = NULL; + } + if (tab->einj != NULL) { + AcpiPutTable(&tab->einj->Header); + tab->einj = NULL; + } + if (tab->erst != NULL) { + AcpiPutTable(&tab->erst->Header); + tab->erst = NULL; + } + if (tab->hest != NULL) { + AcpiPutTable(&tab->hest->Header); + tab->hest = NULL; + } +} + +static int apei_match(device_t, cfdata_t, void *); +static void apei_attach(device_t, device_t, void *); +static int apei_detach(device_t, int); + +static void apei_identify(struct apei_softc *, const char *, + const ACPI_TABLE_HEADER *); +static uint32_t apei_gesb_report(struct apei_softc *, + const ACPI_HEST_GENERIC_STATUS *, size_t, const char *); + +static void apei_bert_attach(struct apei_softc *); +static void apei_bert_detach(struct apei_softc *); + +static void apei_einj_attach(struct apei_softc *); +static void apei_einj_detach(struct apei_softc *); +static void apei_einj_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_einj_act(struct apei_softc *, enum AcpiEinjActions, + uint64_t); +static uint64_t apei_einj_trigger(struct apei_softc *, uint64_t); +static int apei_einj_action_sysctl(SYSCTLFN_ARGS); +static int apei_einj_trigger_sysctl(SYSCTLFN_ARGS); +static int apei_einj_types_sysctl(SYSCTLFN_ARGS); + +static void apei_erst_attach(struct apei_softc *); +static void apei_erst_detach(struct apei_softc *); +static bool apei_erst_instvalid(ACPI_WHEA_HEADER *, uint32_t, uint32_t); +static void apei_erst_instfunc(ACPI_WHEA_HEADER *, void *, uint32_t *, + uint32_t); +static uint64_t apei_erst_act(struct apei_softc *, enum AcpiErstActions, + uint64_t); + +static void apei_hest_attach(struct apei_softc *); +static void apei_hest_detach(struct apei_softc *); + +/* + * autoconf goo + */ + +CFATTACH_DECL_NEW(apei, sizeof(struct apei_softc), + apei_match, apei_attach, apei_detach, NULL); + +static int +apei_match(device_t parent, cfdata_t match, void *aux) +{ + struct apei_tab tab; + int prio = 0; + + /* + * If we have any of the APEI tables, match. + */ + apei_get_tables(&tab); + if (tab.bert || tab.einj || tab.erst || tab.hest) + prio = 1; + apei_put_tables(&tab); + + return prio; +} + +static void +apei_attach(device_t parent, device_t self, void *aux) +{ + struct apei_softc *sc = device_private(self); + const struct sysctlnode *sysctl_hw_acpi; + int error; + + aprint_naive("\n"); + aprint_normal(": ACPI Platform Error Interface\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + apei_get_tables(&sc->sc_tab); + + /* + * Get the sysctl hw.acpi node. This should already be created + * but I don't see an easy way to get at it. If this fails, + * something is seriously wrong, so let's stop here. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + NULL, &sysctl_hw_acpi, 0, + CTLTYPE_NODE, "acpi", NULL, NULL, 0, NULL, 0, + CTL_HW, CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi: %d\n", error); + return; + } + + /* + * Create sysctl hw.acpi.apei. + */ + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_hw_acpi, &sc->sc_sysctlroot, 0, + CTLTYPE_NODE, "apei", + SYSCTL_DESCR("ACPI Platform Error Interface"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, + "failed to create sysctl hw.acpi.apei: %d\n", error); + return; + } + + /* + * Set up BERT, EINJ, ERST, and HEST. + */ + if (sc->sc_tab.bert) { + apei_identify(sc, "BERT", &sc->sc_tab.bert->Header); + apei_bert_attach(sc); + } + if (sc->sc_tab.einj) { + apei_identify(sc, "EINJ", &sc->sc_tab.einj->Header); + apei_einj_attach(sc); + } + if (sc->sc_tab.erst) { + apei_identify(sc, "ERST", &sc->sc_tab.erst->Header); + apei_erst_attach(sc); + } + if (sc->sc_tab.hest) { + apei_identify(sc, "HEST", &sc->sc_tab.hest->Header); + apei_hest_attach(sc); + } +} + +static int +apei_detach(device_t self, int flags) +{ + struct apei_softc *sc = device_private(self); + int error; + + /* + * Detach children. We don't currently have any but this is + * harmless without children and mandatory if we ever sprouted + * them, so let's just leave it here for good measure. + * + * After this point, we are committed to detaching; failure is + * forbidden. + */ + error = config_detach_children(self, flags); + if (error) + return error; + + /* + * Tear down all the sysctl nodes first, before the software + * state backing them goes away. + */ + sysctl_teardown(&sc->sc_sysctllog); + sc->sc_sysctlroot = NULL; + + /* + * Detach the software state for the APEI tables. + */ + if (sc->sc_tab.hest) + apei_hest_detach(sc); + if (sc->sc_tab.erst) + apei_erst_detach(sc); + if (sc->sc_tab.einj) + apei_einj_detach(sc); + if (sc->sc_tab.bert) + apei_bert_detach(sc); + + /* + * Release the APEI tables and we're done. + */ + apei_put_tables(&sc->sc_tab); + pmf_device_deregister(self); + return 0; +} + +/* + * apei_identify(sc, name, header) + * + * Identify the APEI-related table header for dmesg. + */ +static void +apei_identify(struct apei_softc *sc, const char *name, + const ACPI_TABLE_HEADER *h) +{ + + aprint_normal_dev(sc->sc_dev, "%s:" + " OemId <%6.6s,%8.8s,%08x>" + " AslId <%4.4s,%08x>\n", + name, + h->OemId, h->OemTableId, h->OemRevision, + h->AslCompilerId, h->AslCompilerRevision); +} + +/* + * apei_cper_guid_dec(buf, uuid) + * + * Decode a Common Platform Error Record UUID/GUID from an ACPI + * table at buf into a sys/uuid.h struct uuid. + */ +static void +apei_cper_guid_dec(const uint8_t buf[static 16], struct uuid *uuid) +{ + + uuid_dec_le(buf, uuid); +} + +/* + * apei_format_guid(uuid, s) + * + * Format a UUID as a string. This uses C initializer notation, + * not UUID notation, in order to match what the text in the UEFI + * specification. + */ +static void +apei_format_guid(const struct uuid *uuid, char guidstr[static 69]) +{ + + snprintf(guidstr, 69, "{0x%08x,0x%04x,0x%04x," + "0x%02x%02x," + "{0x%02x,0x%02x,0x%02x,0x%02x,0x%02x,0x%02x}}", + uuid->time_low, uuid->time_mid, uuid->time_hi_and_version, + uuid->clock_seq_hi_and_reserved, uuid->clock_seq_hi_and_reserved, + uuid->node[0], uuid->node[1], uuid->node[2], + uuid->node[3], uuid->node[4], uuid->node[5]); +} + +/* + * apei_gede_report_header(sc, gede, ctx) + * + * Report the header of the ith Generic Error Data Entry in the + * given context, for revisions prior to 3.0 (no timestamp). + */ +static void +apei_gede_report_header(struct apei_softc *sc, + const ACPI_HEST_GENERIC_DATA *gede, const char *ctx) +{ + struct uuid sectype; + char guidstr[69]; + + device_printf(sc->sc_dev, "%s: severity=%s\n", ctx, + (gede->ErrorSeverity < __arraycount(apei_gede_severity) + ? apei_gede_severity[gede->ErrorSeverity] + : "unknown")); + + apei_cper_guid_dec(gede->SectionType, §ype); + apei_format_guid(§ype, guidstr); + aprint_debug_dev(sc->sc_dev, "%s: SectionType=%s\n", ctx, guidstr); + aprint_debug_dev(sc->sc_dev, "%s:" + " ErrorSeverity=%"PRIu32"\n", ctx, gede->ErrorSeverity); + aprint_debug_dev(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", + ctx, gede->Revision); + char buf[64]; + snprintb(buf, sizeof(buf), "\020" + "\001" "FRU_ID" + "\002" "FRU_STRING" + "\003" "TIMESTAMP" + "\0", gede->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", ctx, buf); + snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags); + aprint_debug_dev(sc->sc_dev, "%s: Flags=%s\n", ctx, buf); + aprint_debug_dev(sc->sc_dev, "%s: ErrorDataLength=0x%"PRIu32"\n", + ctx, gede->ErrorDataLength); + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) { + struct uuid fruid; + + apei_cper_guid_dec(gede->FruId, &fruid); + apei_format_guid(&fruid, guidstr); + aprint_debug_dev(sc->sc_dev, "%s: FruId=%s\n", ctx, guidstr); + } + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) { + aprint_debug_dev(sc->sc_dev, "%s: FruText=%16.16s\n", + ctx, gede->FruText); + } +} + +/* + * apei_gede_report_header_v3(sc, gede, ctx, i) + * + * Report the header of the ith Generic Error Data Entry in the + * given context, for revisions 3.x (timestamp). + */ +static void +apei_gede_report_header_v3(struct apei_softc *sc, + const ACPI_HEST_GENERIC_DATA_V300 *gede, const char *ctx) +{ + struct uuid sectype; + char guidstr[69]; + + device_printf(sc->sc_dev, "%s: severity=%s\n", ctx, + (gede->ErrorSeverity < __arraycount(apei_gede_severity) + ? apei_gede_severity[gede->ErrorSeverity] + : "unknown")); + + apei_cper_guid_dec(gede->SectionType, §ype); + apei_format_guid(§ype, guidstr); + aprint_debug_dev(sc->sc_dev, "%s: SectionType=%s\n", + ctx, guidstr); + aprint_debug_dev(sc->sc_dev, "%s:" + " ErrorSeverity=%"PRIu32"\n", ctx, gede->ErrorSeverity); + device_printf(sc->sc_dev, "%s: Revision=0x%"PRIx16"\n", + ctx, gede->Revision); + char buf[64]; + snprintb(buf, sizeof(buf), "\020" + "\001" "FRU_ID" + "\002" "FRU_STRING" + "\003" "TIMESTAMP" + "\0", gede->ValidationBits); + aprint_debug_dev(sc->sc_dev, "%s: ValidationBits=%s\n", + ctx, buf); + snprintb(buf, sizeof(buf), CPER_SECTION_FLAGS_FMT, gede->Flags); + aprint_debug_dev(sc->sc_dev, "%s: Flags=%s\n", + ctx, buf); + aprint_debug_dev(sc->sc_dev, "%s:" + " ErrorDataLength=0x%"PRIu32"\n", + ctx, gede->ErrorDataLength); + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_ID) { + struct uuid fruid; + + apei_cper_guid_dec(gede->FruId, &fruid); + apei_format_guid(&fruid, guidstr); + aprint_debug_dev(sc->sc_dev, "%s: FruId=%s\n", + ctx, guidstr); + } + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_FRU_STRING) { + aprint_debug_dev(sc->sc_dev, "%s:" + " FruText=%16.16s\n", + ctx, gede->FruText); + } + if (gede->ValidationBits & ACPI_HEST_GEN_VALID_TIMESTAMP) { + aprint_debug_dev(sc->sc_dev, "%s:" + " TimeStamp=0x%"PRIx64"\n", + ctx, gede->TimeStamp); + } +} + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + */ +static const struct uuid CPER_MEMORY_ERROR_SECTION = + {0xa5bc1114,0x6f64,0x4ede,0xb8,0x63,{0x3e,0x83,0xed,0x7c,0x83,0xb1}}; + +static void +apei_cper_memory_error_report(struct apei_softc *sc, const void *buf, + size_t len, const char *ctx) +{ + const struct cper_memory_error *ME = buf; + char validbits[1024]; + + snprintb(validbits, sizeof(validbits), + CPER_MEMORY_ERROR_VALIDATION_BITS_FMT, ME->validation_bits); + aprint_debug_dev(sc->sc_dev, "%s: validation_bits=%s\n", ctx, + validbits); + if (ME->validation_bits & CPER_MEMORY_ERROR_ERROR_STATUS_VALID) { + device_printf(sc->sc_dev, "%s: error_status=0x%"PRIx64"\n", + ctx, ME->error_status); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_PHYSICAL_ADDRESS_VALID) { + device_printf(sc->sc_dev, "%s: physical_address=0x%"PRIx64"\n", + ctx, ME->physical_address); + } + if (ME->validation_bits & + CPER_MEMORY_ERROR_PHYSICAL_ADDRESS_MASK_VALID) { + device_printf(sc->sc_dev, "%s: physical_address_mask=0x%"PRIx64 + "\n", ctx, ME->physical_address_mask); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_NODE_VALID) { + device_printf(sc->sc_dev, "%s: node=0x%"PRIx16"\n", ctx, + ME->node); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_CARD_VALID) { + device_printf(sc->sc_dev, "%s: card=0x%"PRIx16"\n", ctx, + ME->card); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_MODULE_VALID) { + device_printf(sc->sc_dev, "%s: module=0x%"PRIx16"\n", ctx, + ME->module); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_BANK_VALID) { + device_printf(sc->sc_dev, "%s: bank=0x%"PRIx16"\n", ctx, + ME->bank); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_DEVICE_VALID) { + device_printf(sc->sc_dev, "%s: device=0x%"PRIx16"\n", ctx, + ME->device); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_ROW_VALID) { + device_printf(sc->sc_dev, "%s: row=0x%"PRIx16"\n", ctx, + ME->row); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_COLUMN_VALID) { + device_printf(sc->sc_dev, "%s: column=0x%"PRIx16"\n", ctx, + ME->column); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_BIT_POSITION_VALID) { + device_printf(sc->sc_dev, "%s: bit_position=0x%"PRIx16"\n", + ctx, ME->bit_position); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_REQUESTOR_ID_VALID) { + device_printf(sc->sc_dev, "%s: requestor_id=0x%"PRIx64"\n", + ctx, ME->requestor_id); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_RESPONDER_ID_VALID) { + device_printf(sc->sc_dev, "%s: responder_id=0x%"PRIx64"\n", + ctx, ME->responder_id); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_TARGET_ID_VALID) { + device_printf(sc->sc_dev, "%s: target_id=0x%"PRIx64"\n", + ctx, ME->target_id); + } + if (ME->validation_bits & CPER_MEMORY_ERROR_MEMORY_ERROR_TYPE_VALID) { + const uint8_t t = ME->memory_error_type; + const char *n = t < __arraycount(cper_memory_error_type) + ? cper_memory_error_type[t] : NULL; + + if (n) { + device_printf(sc->sc_dev, "%s: memory_error_type=%d" + " (%s)\n", ctx, t, n); + } else { + device_printf(sc->sc_dev, "%s: memory_error_type=%d\n", + ctx, t); + } + } +} + +static const struct { + const char *name; + const struct uuid *type; + size_t minlength; + void (*func)(struct apei_softc *, const void *, size_t, const char *); +} apei_cper_reports[] = { + { "memory", &CPER_MEMORY_ERROR_SECTION, + sizeof(struct cper_memory_error), + apei_cper_memory_error_report }, +}; + +/* + * apei_cper_report(sc, sectype, buf, len, ctx) + * + * Report errors in a Common Platform Error Record of section type + * sectype in the len-byte buffer at buf. + * + * The section types and corresponding section layouts are listed + * at: + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ +static void +apei_cper_report(struct apei_softc *sc, const uint8_t sectypebuf[static 16], + const void *buf, size_t len, const char *ctx) +{ + struct uuid sectype; + char sectypestr[69]; + size_t i; + + apei_cper_guid_dec(sectypebuf, §ype); + apei_format_guid(§ype, sectypestr); + for (i = 0; i < __arraycount(apei_cper_reports); i++) { + if (memcmp(§ype, apei_cper_reports[i].type, + sizeof(sectype)) != 0) + continue; + const bool trunc = len < apei_cper_reports[i].minlength; + device_printf(sc->sc_dev, "%s: type=%s (%s error)%s\n", ctx, + sectypestr, + apei_cper_reports[i].name, + trunc ? " [truncated]" : ""); + if (!trunc) + (*apei_cper_reports[i].func)(sc, buf, len, ctx); + return; + } + device_printf(sc->sc_dev, "%s: unknown CPER type: %s\n", ctx, + sectypestr); +} + +/* + * apei_gesb_report(sc, gesb, size, ctx) + * + * Check a Generic Error Status Block, of at most the specified + * size in bytes, and report any errors in it. Return the 32-bit + * Block Status in case the caller needs it to acknowledge the + * report to firmware. + */ +static uint32_t +apei_gesb_report(struct apei_softc *sc, const ACPI_HEST_GENERIC_STATUS *gesb, + size_t size, const char *ctx) +{ + uint32_t status, unknownstatus, severity, nentries, i; + uint32_t datalen, rawdatalen; + const ACPI_HEST_GENERIC_DATA *gede0, *gede; + const unsigned char *rawdata; + char statusbuf[128]; + + /* + * Verify the buffer is large enough for a Generic Error Status + * Block before we try to touch anything in it. + */ + if (size < sizeof(*gesb)) { + device_printf(sc->sc_dev, "%s: truncated GESB, %zu < %zu\n", + ctx, size, sizeof(*gesb)); + return 0; + } + size -= sizeof(*gesb); + + /* + * Load the status. Access ordering rules are unclear in the + * ACPI specification; I'm guessing that load-acquire of the + * block status is a good idea before any other access to the + * GESB. + */ + status = atomic_load_acquire(&gesb->BlockStatus); + + /* + * If there are no status bits set, the rest of the GESB is + * garbage, so stop here. + */ + if (status == 0) { + /* XXX dtrace */ + /* XXX DPRINTF */ + goto out; + } + + /* XXX define this format somewhere */ + snprintb(statusbuf, sizeof(statusbuf), "\177\020" + "b\000" "UE\0" + "b\001" "CE\0" + "b\002" "MULTI_UE\0" + "b\003" "MULTI_CE\0" + "f\004\010" "GEDE_COUNT\0" + "\0", status); + + /* XXX dtrace */ + /* XXX DPRINTF */ + aprint_debug_dev(sc->sc_dev, "%s: Generic Error Status Block\n", ctx); + aprint_debug_dev(sc->sc_dev, " BlockStatus=0x%s\n", statusbuf); + aprint_debug_dev(sc->sc_dev, " RawDataOffset=0x%x\n", + gesb->RawDataOffset); + aprint_debug_dev(sc->sc_dev, " RawDataLength=0x%x\n", + gesb->RawDataLength); + aprint_debug_dev(sc->sc_dev, " DataLength=0x%x\n", + gesb->DataLength); + aprint_debug_dev(sc->sc_dev, " ErrorSeverity=0x%x\n", + gesb->ErrorSeverity); + + /* + * Print a message to the console and dmesg about the severity + * of the error. + */ + severity = gesb->ErrorSeverity; + nentries = __SHIFTOUT(status, ACPI_HEST_ERROR_ENTRY_COUNT); + if (severity < __arraycount(apei_gesb_severity)) { + device_printf(sc->sc_dev, "%s reported error:" + " severity=%s status=%s nentries=%u\n", + ctx, apei_gesb_severity[severity], statusbuf, nentries); + } else { + device_printf(sc->sc_dev, "%s reported error:" + " severity=%"PRIu32" status=%s nentries=%u\n", + ctx, severity, statusbuf, nentries); + } + + /* + * Clear the bits we know about to warn if there's anything + * left we don't understand. + */ + unknownstatus = status; + unknownstatus &= ~ACPI_HEST_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_UNCORRECTABLE; + unknownstatus &= ~ACPI_HEST_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_MULTIPLE_CORRECTABLE; + unknownstatus &= ~ACPI_HEST_ERROR_ENTRY_COUNT; + if (unknownstatus != 0) { + /* XXX dtrace */ + /* XXX rate-limit? */ + device_printf(sc->sc_dev, "%s: unknown BlockStatus bits:" + " 0x%"PRIx32"\n", ctx, unknownstatus); + } + + /* + * Advance past the Generic Error Status Block (GESB) header to + * the Generic Error Data Entries (GEDEs). + */ + gede0 = gede = (const ACPI_HEST_GENERIC_DATA *)(gesb + 1); + + /* + * Verify that the data length (GEDEs) fits within the size. + * If not, truncate the GEDEs. + */ + datalen = gesb->DataLength; + if (size < datalen) { + device_printf(sc->sc_dev, "%s:" + " GESB DataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, datalen); + datalen = size; + } + size -= datalen; + + /* + * Report each of the Generic Error Data Entries. + */ + for (i = 0; i < nentries; i++) { + const ACPI_HEST_GENERIC_DATA_V300 *gede_v3; + size_t headerlen; + char subctx[128]; + + snprintf(subctx, sizeof(subctx), "%s entry %"PRIu32, ctx, i); + + if (datalen < sizeof(*gede)) { + device_printf(sc->sc_dev, "%s:" + " truncated GEDE: %"PRIu32" < %zu bytes\n", + subctx, datalen, sizeof(*gede)); + break; + } + + /* + * Handle different revisions. The Revision field is a + * binary-coded decimal major.minor version. Starting + * with 3.0, the GEDE has an extra Timestamp field. We + * don't know what will come with 4.0, so out of an + * abundance of caution, don't touch that. + */ + if (gede->Revision < 0x0300) { + headerlen = sizeof(*gede); + apei_gede_report_header(sc, gede, subctx); + } else if (gede->Revision < 0x0400) { + gede_v3 = (const ACPI_HEST_GENERIC_DATA_V300 *)gede; + headerlen = sizeof(*gede_v3); + apei_gede_report_header_v3(sc, gede_v3, subctx); + } else { + device_printf(sc->sc_dev, "%s:" + " unknown revision: 0x%02"PRIx16"\n", + subctx, gede->Revision); + break; + } + + /* + * Stop here if what we mapped is too small for the + * error data length. + */ + datalen -= headerlen; + if (datalen < gede->ErrorDataLength) { + device_printf(sc->sc_dev, "%s: truncated GEDE payload:" + " %"PRIu32" < %"PRIu32" bytes\n", + subctx, datalen, gede->ErrorDataLength); + break; + } + + /* + * Report the Common Platform Error Record appendix to + * this Generic Error Data Entry. + */ + apei_cper_report(sc, gede->SectionType, + (const unsigned char *)gede + headerlen, + gede->ErrorDataLength, subctx); + + /* + * Advance past the GEDE header and CPER data to the + * next GEDE. + */ + gede = (const ACPI_HEST_GENERIC_DATA *)((const char *)gede + + + headerlen + gede->ErrorDataLength); + } + + /* + * Advance past the Generic Error Data Entries (GEDEs) to the + * raw error data. + * + * XXX Provide maximum raw data length as a parameter. + */ + rawdata = (const unsigned char *)gede0 + datalen; + + /* + * Verify that the raw data length fits within the size. If + * not, truncate the raw data. + */ + rawdatalen = gesb->RawDataLength; + if (size < rawdatalen) { + device_printf(sc->sc_dev, "%s:" + " GESB RawDataLength exceeds bounds: %zu < %"PRIu32"\n", + ctx, size, rawdatalen); + rawdatalen = size; + } + size -= rawdatalen; + + /* + * Hexdump the raw data, if any. + */ + if (rawdatalen > 0) { + char devctx[128]; + + snprintf(devctx, sizeof(devctx), "%s: %s: raw data", + device_xname(sc->sc_dev), ctx); + hexdump(printf, devctx, rawdata, rawdatalen); + } + + /* + * If there's anything left after the raw data, warn. + */ + if (size > 0) { + device_printf(sc->sc_dev, "%s: excess data: %zu bytes\n", + ctx, size); + } + + /* + * Return the status so the caller can ack it. + */ +out: return status; +} + +/* + * BERT -- Boot Error Record Table + */ + +/* + * apei_bert_attach(sc) + * + * Scan the Boot Error Record Table for hardware errors that + * happened early at boot or on the previous boot. + */ +static void +apei_bert_attach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + + aprint_debug_dev(sc->sc_dev, "BERT: 0x%x bytes at 0x%"PRIx64"\n", + bert->RegionLength, bert->Address); + + /* + * Verify the length is enough for a Generic Error Status Block + * header, at least. + */ + if (bert->RegionLength < sizeof(*bsc->bsc_gesb)) { + aprint_error_dev(sc->sc_dev, + "BERT: truncated boot error region, expected >=%zu bytes", + sizeof(*bsc->bsc_gesb)); + return; + } + + /* + * Map the GESB and process it, but don't acknowledge it -- + * this is a one-time polled source; it won't (or at least, + * shouldn't) change after boot. + */ + bsc->bsc_gesb = AcpiOsMapMemory(bert->Address, bert->RegionLength); + const uint32_t status = apei_gesb_report(sc, bsc->bsc_gesb, + bert->RegionLength, "boot error record"); + if (status == 0) { + aprint_verbose_dev(sc->sc_dev, + "BERT: no boot errors recorded\n"); + } + /* XXX expose content via sysctl? */ +} + +/* + * apei_bert_detach(sc) + * + * Free any software resources associated with the Boot Error + * Record Table. + */ +static void +apei_bert_detach(struct apei_softc *sc) +{ + const ACPI_TABLE_BERT *bert = sc->sc_tab.bert; + struct apei_bert_softc *bsc = &sc->sc_bert; + + if (bsc->bsc_gesb) { + AcpiOsUnmapMemory(bsc->bsc_gesb, bert->RegionLength); + bsc->bsc_gesb = NULL; + } +} + +/* + * EINJ -- Error Injection Table + */ + +/* + * apei_einj_attach(sc) + * + * Scan the Error Injection table to ascertain what error + * injection actions the firmware supports and how to perform + * them. Create sysctl nodes for triggering error injection. + */ +static void +apei_einj_attach(struct apei_softc *sc) +{ + ACPI_TABLE_EINJ *einj = sc->sc_tab.einj; + struct apei_einj_softc *jsc = &sc->sc_einj; + ACPI_EINJ_ENTRY *entry; + const struct sysctlnode *sysctl_einj; + const struct sysctlnode *sysctl_einj_action; + uint32_t i; + unsigned action; + int error; + + aprint_debug_dev(sc->sc_dev, "EINJ: HeaderLength=%"PRIu32"\n", + einj->HeaderLength); + aprint_debug_dev(sc->sc_dev, "EINJ: Flags=0x%"PRIx8"\n", + einj->Flags); + aprint_debug_dev(sc->sc_dev, "EINJ: Reserved=[%02"PRIx8" %02"PRIx8 + " %02"PRIx8"]\n", + einj->Reserved[0], einj->Reserved[1], einj->Reserved[2]); + aprint_debug_dev(sc->sc_dev, "EINJ: Entries=%"PRIu32"\n", + einj->Entries); + + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sc->sc_sysctlroot, &sysctl_einj, 0, + CTLTYPE_NODE, "einj", + SYSCTL_DESCR("Error injection"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj: %d\n", error); + sysctl_einj = NULL; + } + + error = sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, &sysctl_einj_action, 0, + CTLTYPE_NODE, "action", + SYSCTL_DESCR("EINJ actions"), + NULL, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " hw.acpi.apei.einj.action: %d\n", error); + sysctl_einj_action = NULL; + } + + jsc->jsc_interp = apei_interp_create("EINJ", + apei_einj_action, __arraycount(apei_einj_action), + apei_einj_instruction, __arraycount(apei_einj_instruction), + /*instvalid*/NULL, apei_einj_instfunc); + + /* + * Compile the interpreter. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < einj->Entries; i++, entry++) + apei_interp_pass1_load(jsc->jsc_interp, i, &entry->WheaHeader); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < einj->Entries; i++, entry++) { + apei_interp_pass2_verify(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(jsc->jsc_interp); + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < einj->Entries; i++, entry++) { + apei_interp_pass4_assemble(jsc->jsc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(jsc->jsc_interp); + + /* + * Create sysctl nodes for each action we know about. + * + * XXX Omit those that are not handled by the firmware. + */ + for (action = 0; action < __arraycount(apei_einj_action); action++) { + if (apei_einj_action[action] == NULL) + continue; + + /* + * Check to see if there are any instructions for this + * action. + * + * XXX Maybe add this to the apei_interp.h abstraction. + */ + entry = (ACPI_EINJ_ENTRY *)(einj + 1); + for (i = 0; i < einj->Entries; i++, entry++) { + ACPI_WHEA_HEADER *const header = &entry->WheaHeader; + + if (action == header->Action) + break; + } + if (i == einj->Entries) { + /* + * No instructions for this action, so assume + * it's not supported. + */ + continue; + } + + /* + * Create a sysctl knob to perform the action. + */ + error = sysctl_einj_action == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj_action, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, apei_einj_action[action], + NULL, /* description */ + &apei_einj_action_sysctl, 0, NULL, 0, + action, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.action.%s: %d\n", + apei_einj_action[action], error); + continue; + } + } + + /* + * Create a sysctl knob to trigger error. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, CTLFLAG_READWRITE, + CTLTYPE_QUAD, "trigger", + NULL, /* description */ + &apei_einj_trigger_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.trigger: %d\n", + error); + } + + /* + * Query the available types of error to inject and print it to + * dmesg. + * + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#error-types + */ + uint64_t types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + char typesbuf[1024], *typesp; + snprintb_m(typesbuf, sizeof(typesbuf), "\177\020" + "b\000" "PROC_CORRECTABLE\0" + "b\001" "PROC_UNCORRECTABLE\0" + "b\002" "PROC_FATAL\0" + "b\003" "MEM_CORRECTABLE\0" + "b\004" "MEM_UNCORRECTABLE\0" + "b\005" "MEM_FATAL\0" + "b\006" "PCIE_CORRECTABLE\0" + "b\007" "PCIE_UNCORRECTABLE\0" + "b\010" "PCIE_FATAL\0" + "b\011" "PLAT_CORRECTABLE\0" + "b\012" "PLAT_UNCORRECTABLE\0" + "b\013" "PLAT_FATAL\0" + "b\014" "CXLCACHE_CORRECTABLE\0" + "b\015" "CXLCACHE_UNCORRECTABLE\0" + "b\016" "CXLCACHE_FATAL\0" + "b\017" "CXLMEM_CORRECTABLE\0" + "b\020" "CXLMEM_UNCORRECTABLE\0" + "b\021" "CXLMEM_FATAL\0" +// "f\022\014" "reserved\0" + "b\036" "EINJv2\0" + "b\037" "VENDOR\0" + "\0", types, 32); + for (typesp = typesbuf; strlen(typesp); typesp += strlen(typesp) + 1) { + aprint_normal_dev(sc->sc_dev, "EINJ: injectable errors:" + " %s\n", typesp); + } + + /* + * Create a sysctl knob to query the available types of error + * to inject. In principle this could change dynamically, so + * we'll make it dynamic. + */ + error = sysctl_einj == NULL ? ENOENT : + sysctl_createv(&sc->sc_sysctllog, 0, + &sysctl_einj, NULL, 0, + CTLTYPE_QUAD, "types", + SYSCTL_DESCR("Types of errors that can be injected"), + &apei_einj_types_sysctl, 0, NULL, 0, + CTL_CREATE, CTL_EOL); + if (error) { + aprint_error_dev(sc->sc_dev, "failed to create" + " sysctl hw.acpi.apei.einj.types: %d\n", + error); + } +} + +/* + * apei_einj_detach(sc) + * + * Free any software resources associated with the Error Injection + * table. + */ +static void +apei_einj_detach(struct apei_softc *sc) +{ + struct apei_einj_softc *jsc = &sc->sc_einj; + + if (jsc->jsc_interp) { + apei_interp_destroy(jsc->jsc_interp); + jsc->jsc_interp = NULL; + } +} + +/* + * struct apei_einj_machine + * + * Machine state for executing EINJ instructions. + */ +struct apei_einj_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ +}; + +/* + * apei_einj_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an EINJ + * action. Updates the EINJ machine at cookie in place. + * + * This doesn't read or write ip. The TRIGGER_ERROR logic relies + * on this; if you change the fact, you must update that logic + * too. + */ +static void +apei_einj_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_einj_machine *M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_EINJ_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_einj_instruction) + ? apei_einj_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + switch (header->Instruction) { + case ACPI_EINJ_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + if (ACPI_FAILURE(rv)) + break; + break; + case ACPI_EINJ_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_EINJ_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_EINJ_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_EINJ_NOOP: + break; + default: + rv = AE_ERROR; + break; + } + + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "failed: %s\n", + AcpiFormatException(rv)); + } +} + +/* + * apei_einj_act(sc, action, x) + * + * Perform the named EINJ action with input x, by executing the + * instruction defined for the action by the EINJ, and return the + * output. + */ +static uint64_t +apei_einj_act(struct apei_softc *sc, enum AcpiEinjActions action, + uint64_t x) +{ + struct apei_einj_softc *const jsc = &sc->sc_einj; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_einj_action) + ? apei_einj_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Run the interpreter. + */ + apei_interpret(jsc->jsc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} + +/* + * apei_einj_trigger(sc, x) + * + * Obtain the TRIGGER_ERROR action table and, if there is anything + * to be done with it, execute it with input x and return the + * output. If nothing is to be done, return 0. + */ +static uint64_t +apei_einj_trigger(struct apei_softc *sc, uint64_t x) +{ + uint64_t teatab_pa; + ACPI_EINJ_TRIGGER *teatab = NULL; + size_t mapsize = 0, tabsize; + ACPI_EINJ_ENTRY *entry; + struct apei_einj_machine einj_machine, *const M = &einj_machine; + uint32_t i, nentries; + + /* + * Get the TRIGGER_ERROR action table's physical address. + */ + teatab_pa = apei_einj_act(sc, ACPI_EINJ_GET_TRIGGER_TABLE, 0); + + /* + * Map just the header. We don't know how large the table is + * because we get that from the header. + */ + mapsize = sizeof(*teatab); + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * If there's no entries, stop here -- nothing to do separately + * to trigger an error report. + */ + nentries = teatab->EntryCount; + if (nentries == 0) + goto out; + + /* + * If the header size or the table size is nonsense, bail. + */ + if (teatab->HeaderSize < sizeof(*teatab) || + teatab->TableSize < teatab->HeaderSize) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " invalid sizes:" + " HeaderSize=%"PRIu32" TableSize=%"PRIu32"\n", + teatab->HeaderSize, teatab->TableSize); + } + + /* + * If the revision is nonzero, we don't know what to do. I've + * only seen revision zero so far, and the spec doesn't say + * anything about revisions that I've found. + */ + if (teatab->Revision != 0) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " unknown revision: %"PRIx32"\n", teatab->Revision); + goto out; + } + + /* + * Truncate the table to the number of entries requested and + * ignore trailing garbage if the table is long, or round the + * number of entries down to what fits in the table if the + * table is short. + */ + tabsize = teatab->TableSize; + if (nentries < howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " %zu bytes of trailing garbage\n", + tabsize - nentries*sizeof(ACPI_EINJ_ENTRY)); + tabsize = nentries*sizeof(ACPI_EINJ_ENTRY); + } else if (nentries > howmany(tabsize, sizeof(ACPI_EINJ_ENTRY))) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " truncated to %zu entries\n", + nentries*sizeof(ACPI_EINJ_ENTRY)); + nentries = howmany(tabsize, sizeof(ACPI_EINJ_ENTRY)); + } + + /* + * Unmap the header and map the whole table instead. + */ + AcpiOsUnmapMemory(teatab, mapsize); + mapsize = tabsize; + teatab = AcpiOsMapMemory(teatab_pa, mapsize); + + /* + * Initialize the machine to execute the TRIGGER_ERROR action's + * instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + + /* + * Now iterate over the EINJ-type entries and execute the + * trigger error action instructions -- but skip if they're not + * for the TRIGGER_ERROR action, and stop if they're truncated. + * + * Entries are fixed-size, so we can just index them. + */ + entry = (ACPI_EINJ_ENTRY *)(teatab + 1); + for (i = 0; i < nentries; i++) { + ACPI_WHEA_HEADER *const header = &entry[i].WheaHeader; + + /* + * Verify the action is TRIGGER_ERROR. If not, skip. + */ + if (header->Action != ACPI_EINJ_TRIGGER_ERROR) { + device_printf(sc->sc_dev, "TRIGGER_ERROR action table:" + " other action: %"PRIu32" (%s)\n", + header->Action, + (header->Action < __arraycount(apei_einj_action) + ? apei_einj_action[header->Action] + : "unknown")); + continue; + } + + /* + * Execute the instruction. Since there's only one + * action, we don't bother with the apei_interp + * machinery to collate instruction tables for each + * action. EINJ instructions don't change ip. + */ + uint32_t ip = i + 1; + apei_einj_instfunc(header, M, &ip, nentries); + KASSERT(ip == i + 1); + } + +out: if (teatab) { + AcpiOsUnmapMemory(teatab, mapsize); + teatab = NULL; + mapsize = 0; + } + return M->y; +} + +/* + * apei_einj_action_sysctl: + * + * Handle sysctl queries under hw.acpi.apei.einj.action.*. + */ +static int +apei_einj_action_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + enum AcpiEinjActions action; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Identify the requested action. If we don't recognize it, + * fail with EINVAL. + */ + switch (node.sysctl_num) { + case ACPI_EINJ_BEGIN_OPERATION: + case ACPI_EINJ_GET_TRIGGER_TABLE: + case ACPI_EINJ_SET_ERROR_TYPE: + case ACPI_EINJ_GET_ERROR_TYPE: + case ACPI_EINJ_END_OPERATION: + case ACPI_EINJ_EXECUTE_OPERATION: + case ACPI_EINJ_CHECK_BUSY_STATUS: + case ACPI_EINJ_GET_COMMAND_STATUS: + case ACPI_EINJ_SET_ERROR_TYPE_WITH_ADDRESS: + case ACPI_EINJ_GET_EXECUTE_TIMINGS: + action = node.sysctl_num; + break; + default: + error = ENOENT; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the injection action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the EINJ action by following the table's + * instructions. + */ + v = apei_einj_act(sc, action, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_trigger_sysctl + * + * Handle sysctl hw.acpi.apei.einj.trigger. + */ +static int +apei_einj_trigger_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t v; + int error; + + /* + * As a defence against mistakes, require the user to specify a + * write. + */ + if (newp == NULL) { + error = ENOENT; + goto out; + } + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Kludge: Copy the `new value' for the sysctl in as an input + * to the trigger action. + */ + error = sysctl_copyin(curlwp, newp, &v, sizeof(v)); + if (error) + goto out; + + /* + * Perform the TRIGGER_ERROR action. + */ + v = apei_einj_trigger(sc, v); + + /* + * Return the output of the operation as the `old value' of the + * sysctl. This also updates v with what was written to the + * sysctl was written, but we don't care because we already + * read that in and acted on it. + */ + node.sysctl_data = &v; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * apei_einj_types_sysctl + * + * Handle sysctl hw.acpi.apei.einj.types. + */ +static int +apei_einj_types_sysctl(SYSCTLFN_ARGS) +{ + device_t apei0 = NULL; + struct apei_softc *sc; + struct sysctlnode node = *rnode; + uint64_t types; + int error; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working, and get the softc. + * + * XXX Is this necessary? Shouldn't sysctl_teardown take care + * of preventing new sysctl calls and waiting until all pending + * sysctl calls are done? + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) { + error = ENXIO; + goto out; + } + sc = device_private(apei0); + + /* + * Fail if there's no EINJ. + */ + if (sc->sc_tab.einj == NULL) { + error = ENODEV; + goto out; + } + + /* + * Perform the GET_ERROR_TYPE action and return the value to + * sysctl. + */ + types = apei_einj_act(sc, ACPI_EINJ_GET_ERROR_TYPE, 0); + node.sysctl_data = &types; + error = sysctl_lookup(SYSCTLFN_CALL(&node)); + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } + return error; +} + +/* + * ERST -- Error Record Serialization Table + */ + +/* XXX expose via sysctl */ +static void +apei_erst_dumplog(uint64_t logaddr, uint64_t logbytes) +{ + unsigned char *const errorlog = AcpiOsMapMemory(logaddr, logbytes); + size_t k; + + for (k = 0; k < logbytes; k++) { + if (errorlog[k] != 0) { + hexdump(printf, "Serialized error records", errorlog, + logbytes); + break; + } + } + if (k == logbytes) + printf("All-zero serialized error records\n"); + AcpiOsUnmapMemory(errorlog, logbytes); +} + +/* + * apei_erst_attach(sc) + * + * Scan the Error Record Serialization Table to collate the + * instructions for each ERST action. + */ +static void +apei_erst_attach(struct apei_softc *sc) +{ + ACPI_TABLE_ERST *erst = sc->sc_tab.erst; + struct apei_erst_softc *ssc = &sc->sc_erst; + ACPI_ERST_ENTRY *entry; + uint32_t i; + + aprint_debug_dev(sc->sc_dev, "ERST: HeaderLength=%"PRIu32"\n", + erst->HeaderLength); + aprint_debug_dev(sc->sc_dev, "ERST: Reserved=0x%08"PRIx32"\n", + erst->Reserved); + aprint_debug_dev(sc->sc_dev, "ERST: Entries=%"PRIu32"\n", + erst->Entries); + + ssc->ssc_interp = apei_interp_create("ERST", + apei_erst_action, __arraycount(apei_erst_action), + apei_erst_instruction, __arraycount(apei_erst_instruction), + apei_erst_instvalid, apei_erst_instfunc); + + /* + * Compile the interpreter. + */ + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < erst->Entries; i++, entry++) + apei_interp_pass1_load(ssc->ssc_interp, i, &entry->WheaHeader); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < erst->Entries; i++, entry++) { + apei_interp_pass2_verify(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass3_alloc(ssc->ssc_interp); + entry = (ACPI_ERST_ENTRY *)(erst + 1); + for (i = 0; i < erst->Entries; i++, entry++) { + apei_interp_pass4_assemble(ssc->ssc_interp, i, + &entry->WheaHeader); + } + apei_interp_pass5_verify(ssc->ssc_interp); + + /* + * XXX Print what log records we can. + */ + const uint64_t logaddr = apei_erst_act(sc, + ACPI_ERST_GET_ERROR_RANGE, 0); + const uint64_t logbytes = apei_erst_act(sc, + ACPI_ERST_GET_ERROR_LENGTH, 0); + const uint64_t logattr = apei_erst_act(sc, + ACPI_ERST_GET_ERROR_ATTRIBUTES, 0); + const uint64_t nrecords = apei_erst_act(sc, + ACPI_ERST_GET_RECORD_COUNT, 0); + + aprint_debug_dev(sc->sc_dev, "log %"PRIu64" bytes @ 0x%"PRIx64 + " attr 0x%"PRIx64" nrecords %"PRIu64"\n", + logbytes, logaddr, logattr, nrecords); + + if (logattr & 2) { + apei_erst_dumplog(logaddr, logbytes); + } else { + /* + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#reading + */ + (void)apei_erst_act(sc, ACPI_ERST_BEGIN_WRITE, 0); + (void)apei_erst_act(sc, ACPI_ERST_SET_RECORD_OFFSET, 0); + (void)apei_erst_act(sc, ACPI_ERST_SET_RECORD_ID, 0); + (void)apei_erst_act(sc, ACPI_ERST_EXECUTE_OPERATION, 0); + unsigned timo = 10; + while (apei_erst_act(sc, ACPI_ERST_CHECK_BUSY_STATUS, 0) && + timo --> 0) + DELAY(1000); + switch (apei_erst_act(sc, ACPI_ERST_GET_COMMAND_STATUS, 0)) { + case ACPI_ERST_SUCCESS: + /* XXX How many bytes?? */ + apei_erst_dumplog(logaddr, 32); + /* XXX continue with the next id? */ + (void)apei_erst_act(sc, ACPI_ERST_GET_RECORD_ID, 1); + break; + case ACPI_ERST_NOT_FOUND: + /* XXX try again with the first id? */ + (void)apei_erst_act(sc, ACPI_ERST_GET_RECORD_ID, 0); + break; + case ACPI_ERST_NO_SPACE: + case ACPI_ERST_NOT_AVAILABLE: + case ACPI_ERST_FAILURE: + default: + break; + } + (void)apei_erst_act(sc, ACPI_ERST_END, 0); + } + +} + +/* + * apei_erst_detach(sc) + * + * Free software resource allocated for ERST handling. + */ +static void +apei_erst_detach(struct apei_softc *sc) +{ + struct apei_erst_softc *ssc = &sc->sc_erst; + + if (ssc->ssc_interp) { + apei_interp_destroy(ssc->ssc_interp); + ssc->ssc_interp = NULL; + } +} + +/* + * apei_erst_instvalid(header, ninst, i) + * + * Routine to validate the ith entry, for an action with ninst + * instructions. + */ +static bool +apei_erst_instvalid(ACPI_WHEA_HEADER *header, uint32_t ninst, uint32_t i) +{ + + switch (header->Instruction) { + case ACPI_ERST_GOTO: + if (header->Value > ninst) { + aprint_error("ERST[%"PRIu32"]:" + " GOTO(%"PRIu64") out of bounds," + " disabling action %"PRIu32" (%s)\n", i, + header->Value, + header->Action, + apei_erst_action[header->Action]); + return false; + } + } + return true; +} + +/* + * struct apei_erst_machine + * + * Machine state for executing ERST instructions. + */ +struct apei_erst_machine { + struct apei_softc *sc; + uint64_t x; /* in */ + uint64_t y; /* out */ + uint64_t var1; + uint64_t var2; + uint64_t src_base; + uint64_t dst_base; +}; + +/* + * apei_erst_instfunc(header, cookie, &ip, maxip) + * + * Run a single instruction in the service of performing an ERST + * action. Updates the ERST machine at cookie, and the ip if + * necessary, in place. + * + * On entry, ip points to the next instruction after this one + * sequentially; on exit, ip points to the next instruction to + * execute. + */ +static void +apei_erst_instfunc(ACPI_WHEA_HEADER *header, void *cookie, uint32_t *ipp, + uint32_t maxip) +{ + struct apei_erst_machine *const M = cookie; + ACPI_STATUS rv = AE_OK; + + /* + * Abbreviate some of the intermediate quantities to make the + * instruction logic conciser and more legible. + */ + const uint8_t BitOffset = header->RegisterRegion.BitOffset; + const uint64_t Mask = header->Mask; + const uint64_t Value = header->Value; + ACPI_GENERIC_ADDRESS *const reg = &header->RegisterRegion; + const bool preserve_register = header->Flags & ACPI_ERST_PRESERVE; + + aprint_debug_dev(M->sc->sc_dev, "%s: instr=0x%02"PRIx8 + " (%s)" + " Address=0x%"PRIx64 + " BitOffset=%"PRIu8" Mask=0x%"PRIx64" Value=0x%"PRIx64 + " Flags=0x%"PRIx8"\n", + __func__, header->Instruction, + (header->Instruction < __arraycount(apei_erst_instruction) + ? apei_erst_instruction[header->Instruction] + : "unknown"), + reg->Address, + BitOffset, Mask, Value, + header->Flags); + + /* + * Zero-initialize the output by default. + */ + M->y = 0; + + switch (header->Instruction) { + case ACPI_ERST_READ_REGISTER: + rv = apei_read_register(reg, Mask, &M->y); + break; + case ACPI_ERST_READ_REGISTER_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->y = (v == Value ? 1 : 0); + break; + } + case ACPI_ERST_WRITE_REGISTER: + rv = apei_write_register(reg, Mask, preserve_register, M->x); + break; + case ACPI_ERST_WRITE_REGISTER_VALUE: + rv = apei_write_register(reg, Mask, preserve_register, Value); + break; + case ACPI_ERST_NOOP: + break; + case ACPI_ERST_LOAD_VAR1: + rv = apei_read_register(reg, Mask, &M->var1); + break; + case ACPI_ERST_LOAD_VAR2: + rv = apei_read_register(reg, Mask, &M->var2); + break; + case ACPI_ERST_STORE_VAR1: + rv = apei_write_register(reg, Mask, preserve_register, + M->var1); + break; + case ACPI_ERST_ADD: + M->var1 += M->var2; + break; + case ACPI_ERST_SUBTRACT: + /* + * The specification at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#serialization-instructions + * says: + * + * 0x09 SUBTRACT Subtracts VAR1 from VAR2 + * and stores the result in + * VAR1. + * + * So, according to the spec, this is _not_ simply + * + * M->var1 -= M->var2; + */ + M->var1 = M->var2 - M->var1; + break; + case ACPI_ERST_ADD_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v += Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_SUBTRACT_VALUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + v -= Value; + + rv = apei_write_register(reg, Mask, preserve_register, v); + break; + } + case ACPI_ERST_STALL: + DELAY(Value); /* XXX avoid excessive delays */ + break; + case ACPI_ERST_STALL_WHILE_TRUE: + for (;;) { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + if (v != Value) + break; + DELAY(M->var1); + } + M->y = 0; + break; + case ACPI_ERST_SKIP_NEXT_IF_TRUE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + + /* + * If reading the register yields Value, skip the next + * instruction -- unless that would run past the end of + * the instruction buffer. + */ + if (v == Value) { + if (*ipp < maxip) + (*ipp)++; + } + break; + } + case ACPI_ERST_GOTO: + if (Value >= maxip) /* paranoia */ + *ipp = maxip; + else + *ipp = Value; + break; + case ACPI_ERST_SET_SRC_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_SET_DST_ADDRESS_BASE: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + M->src_base = v; + break; + } + case ACPI_ERST_MOVE_DATA: { + uint64_t v; + + rv = apei_read_register(reg, Mask, &v); + if (ACPI_FAILURE(rv)) + break; + FakeAcpiMove(M->dst_base + v, M->src_base + v, M->var2); + break; + } + default: + break; + } + + if (ACPI_FAILURE(rv)) { + aprint_debug_dev(M->sc->sc_dev, "failed: %s\n", + AcpiFormatException(rv)); + } +} + +/* + * apei_erst_act(sc, action, x) + * + * Perform the named ERST action with input x, by stepping through + * all the instructions defined for the action by the ERST, and + * return the output. + */ +static uint64_t +apei_erst_act(struct apei_softc *sc, enum AcpiErstActions action, uint64_t x) +{ + struct apei_erst_softc *const ssc = &sc->sc_erst; + struct apei_erst_machine erst_machine, *const M = &erst_machine; + + aprint_debug_dev(sc->sc_dev, "%s: action=%d (%s) input=0x%"PRIx64"\n", + __func__, + action, + (action < __arraycount(apei_erst_action) + ? apei_erst_action[action] + : "unknown"), + x); + + /* + * Initialize the machine to execute the action's instructions. + */ + memset(M, 0, sizeof(*M)); + M->sc = sc; + M->x = x; /* input */ + M->y = 0; /* output */ + M->var1 = 0; + M->var2 = 0; + M->src_base = 0; + M->dst_base = 0; + + /* + * Run the interpreter. + */ + apei_interpret(ssc->ssc_interp, action, M); + + /* + * Return the result. + */ + aprint_debug_dev(sc->sc_dev, "%s: output=0x%"PRIx64"\n", __func__, + M->y); + return M->y; +} + +/* + * HEST -- Hardware Error Source Table + */ + +/* + * apei_hest_ghes_handle(sc, src) + * + * Check for, report, and acknowledge any error from a Generic + * Hardware Error Source (GHES, not GHESv2). Return true if there + * was any error to report, false if not. + */ +static bool +apei_hest_ghes_handle(struct apei_softc *sc, struct apei_source *src) +{ + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + ACPI_HEST_GENERIC_STATUS *gesb = src->as_ghes.gesb; + char ctx[sizeof("hardware error source 4294967295")]; + uint32_t status; + + /* + * Process and report any error. + */ + snprintf(ctx, sizeof(ctx), "hardware error source %"PRIu32, + ghes->Header.SourceId); + status = apei_gesb_report(sc, src->as_ghes.gesb, + ghes->ErrorBlockLength, ctx); + + /* + * Acknowledge the error by clearing the block status. To + * avoid races, we probably have to avoid further access to the + * GESB until we get another notification. + * + * As a precaution, we zero this with atomic compare-and-swap + * so at least we can see if the status changed while we were + * working on it. + * + * It is tempting to clear bits with atomic and-complement, but + * the BlockStatus is not just a bit mask -- bits [13:4] are a + * count of Generic Error Data Entries, and who knows what bits + * [31:14] might be used for in the future. + * + * XXX The GHES(v1) protocol is unclear from the specification + * here. The GHESv2 protocol has a separate register write to + * acknowledge, which is a bit clearer. + */ + membar_release(); + const uint32_t status1 = atomic_cas_32(&gesb->BlockStatus, status, 0); + if (status1 != status) { + device_printf(sc->sc_dev, "%s: status changed from" + " 0x%"PRIx32" to 0x%"PRIx32"\n", + ctx, status, status1); + } + + return status != 0; +} + +/* + * apei_hest_ghes_poll(cookie) + * + * Callout handler for periodic polling of a Generic Hardware + * Error Source, using Notification Type `0 - Polled'. + * + * cookie is the struct apei_source pointer for a single source; + * if there are multiple sources there will be multiple callouts. + */ +static void +apei_hest_ghes_poll(void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + ACPI_HEST_GENERIC *ghes = container_of(src->as_header, + ACPI_HEST_GENERIC, Header); + + /* + * Process and acknowledge any error. + */ + (void)apei_hest_ghes_handle(sc, src); + + /* + * Schedule polling again after the firmware-suggested + * interval. + */ + callout_schedule(&src->as_ch, + MAX(1, mstohz(ghes->Notify.PollInterval))); +} + +/* + * apei_hest_ghes_nmi(tf, cookie) + * + * Nonmaskable interrupt handler for Generic Hardware Error + * Sources with Notification Type `4 - NMI'. + */ +#if defined(__i386__) || defined(__x86_64__) +static int +apei_hest_ghes_nmi(const struct trapframe *tf, void *cookie) +{ + struct apei_source *src = cookie; + struct apei_softc *sc = src->as_sc; + + /* + * The NMI is (sometimes?) delivered to all CPUs at once. To + * reduce confusion, let's try to have only one CPU process it + * at a time. + */ + KERNEL_LOCK(1, NULL); + const bool mine = apei_hest_ghes_handle(sc, src); + KERNEL_UNLOCK_ONE(NULL); + + /* + * Tell the NMI subsystem whether this interrupt could have + * been for us or not. + */ + return mine; +} +#endif + +/* + * apei_hest_attach_ghes(sc, ghes, i) + * + * Attach a Generic Hardware Error Source (GHES, not GHESv2) as + * the ith source in the Hardware Error Source Table. + * + * After this point, the system will check for and handle errors + * when notified by this source. + */ +static void +apei_hest_attach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + uint64_t addr; + ACPI_STATUS rv; + + /* + * Limited to aprint_debug because some machines have thousands + * of these. + * + * XXX Maybe skip this, redundant with acpidump? + */ + aprint_debug_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " Generic Hardware Error Source\n", i); + aprint_debug_dev(sc->sc_dev, " SourceId=0x%04"PRIx16"\n", + ghes->Header.SourceId); + aprint_debug_dev(sc->sc_dev, " RelatedSourceId=0x%04"PRIx16"\n", + ghes->RelatedSourceId); + aprint_debug_dev(sc->sc_dev, " Reserved=0x%"PRIx8"\n", + ghes->Reserved); + aprint_debug_dev(sc->sc_dev, " Enabled=0x%"PRIx8"\n", + ghes->Enabled); + aprint_debug_dev(sc->sc_dev, " RecordsToPreallocate=%"PRIu32"\n", + ghes->RecordsToPreallocate); + aprint_debug_dev(sc->sc_dev, " MaxSectionsPerRecord=%"PRIu32"\n", + ghes->MaxSectionsPerRecord); + aprint_debug_dev(sc->sc_dev, " MaxRawDataLength=%"PRIu32"\n", + ghes->MaxRawDataLength); + aprint_debug_dev(sc->sc_dev, " ErrorStatusAddress=" + "[SpaceId=%"PRIu8 + " BitWidth=%"PRIu8" BitOffset=%"PRIu8" AccessWidth=%"PRIu8 + " Address=0x%"PRIx64"]\n", + ghes->ErrorStatusAddress.SpaceId, + ghes->ErrorStatusAddress.BitWidth, + ghes->ErrorStatusAddress.BitOffset, + ghes->ErrorStatusAddress.AccessWidth, + ghes->ErrorStatusAddress.Address); + aprint_debug_dev(sc->sc_dev, " ErrorBlockLength=%"PRIu32"\n", + ghes->ErrorBlockLength); + + /* + * Verify the source is enabled before proceeding. The Enabled + * field is 8 bits with 256 possibilities, but only two of the + * possibilities, 0 andb 1, have semantics defined in the spec, + * so out of an abundance of caution let's tread carefully in + * case anything changes and noisily reject any values other + * than 1. + */ + switch (ghes->Enabled) { + case 1: + break; + case 0: + aprint_debug_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " disabled\n", i); + return; + default: + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unknown GHES Enabled state: 0x%"PRIx8"\n", i, + ghes->Enabled); + return; + } + + /* + * Verify the Error Status Address bit width is at most 64 bits + * before proceeding with this source. When we get 128-bit + * addressing, this code will have to be updated. + */ + if (ghes->ErrorStatusAddress.BitWidth > 64) { + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " excessive address bits: %"PRIu8"\n", i, + ghes->ErrorStatusAddress.BitWidth); + return; + } + + /* + * Read the GHES Error Status Addresss. This is the physical + * address of a GESB, Generic Error Status Block. Why the + * physical address is exposed via this indirection, and not + * simply stored directly in the GHES, is unclear to me. + * Hoping it's not because the address can change dynamically, + * because the error handling path shouldn't involve mapping + * anything. + */ + rv = AcpiRead(&addr, &ghes->ErrorStatusAddress); + if (ACPI_FAILURE(rv)) { + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " failed to read error status address: %s", i, + AcpiFormatException(rv)); + return; + } + aprint_debug_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " error status @ 0x%"PRIx64"\n", i, addr); + + /* + * Initialize the source and map the GESB so we can get at it + * in the error handling path. + */ + src->as_sc = sc; + src->as_header = &ghes->Header; + src->as_ghes.gesb = AcpiOsMapMemory(addr, ghes->ErrorBlockLength); + + /* + * Print the notification structure. + */ + ACPI_HEST_NOTIFY *notify = &ghes->Notify; + if (notify->Type < __arraycount(apei_hens_type) && + apei_hens_type[notify->Type] != NULL) { + aprint_debug_dev(sc->sc_dev, "HEST[%"PRIu32"]" + " notification: %s\n", i, + apei_hens_type[notify->Type]); + } + aprint_debug_dev(sc->sc_dev, " Type=%"PRIu8"\n", notify->Type); + aprint_debug_dev(sc->sc_dev, " Length=%"PRIu8"\n", notify->Length); + char cwe[140]; + snprintb(cwe, sizeof(cwe), "\020" /* hex */ + "\001""Type" + "\002""PollInterval" + "\003""SwitchToPollingThresholdValue" + "\004""SwitchToPollingThresholdWindow" + "\005""ErrorThresholdValue" + "\006""ErrorThresholdWindow" + "\0", notify->ConfigWriteEnable); + aprint_debug_dev(sc->sc_dev, " ConfigWriteEnable=%s\n", cwe); + aprint_debug_dev(sc->sc_dev, " PollInterval=%"PRIu32" ms\n", + notify->PollInterval); + aprint_debug_dev(sc->sc_dev, " Vector=%"PRIu32"\n", notify->Vector); + aprint_debug_dev(sc->sc_dev, " PollingThresholdValue=%"PRIu32"\n", + notify->PollingThresholdValue); + aprint_debug_dev(sc->sc_dev, " PollingThresholdWindow=%"PRIu32 + " ms\n", notify->PollingThresholdWindow); + aprint_debug_dev(sc->sc_dev, " ErrorThresholdValue=%"PRIu32"\n", + notify->ErrorThresholdValue); + aprint_debug_dev(sc->sc_dev, " ErrorThresholdWindow=%"PRIu32" ms\n", + notify->ErrorThresholdWindow); + + /* + * Arrange to receive notifications. + */ + switch (notify->Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_init(&src->as_ch, CALLOUT_MPSAFE); + callout_setfunc(&src->as_ch, &apei_hest_ghes_poll, src); + callout_schedule(&src->as_ch, 0); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * SCI and GPIO notifications are delivered through + * Hardware Error Device (PNP0C33) events. + * + * XXX Where is this spelled out? The text at + * https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html#event-notification-for-generic-error-sources + * is vague. + */ + SIMPLEQ_INSERT_TAIL(&hsc->hsc_hed_list, src, as_entry); + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + src->as_nmi = nmi_establish(&apei_hest_ghes_nmi, src); + break; +#endif + } + + /* + * Now that we have notification set up, process and + * acknowledge the initial GESB report if any. + */ + apei_hest_ghes_handle(sc, src); +} + +/* + * apei_hest_detach_ghes(sc, ghes, i) + * + * Detach the ith source, which is a Generic Hardware Error Source + * (GHES, not GHESv2). + * + * After this point, the system will ignore notifications from + * this source. + */ +static void +apei_hest_detach_ghes(struct apei_softc *sc, ACPI_HEST_GENERIC *ghes, + uint32_t i) +{ + struct apei_hest_softc *hsc = &sc->sc_hest; + struct apei_source *src = &hsc->hsc_source[i]; + ACPI_HEST_NOTIFY *notify = &ghes->Notify; + + /* + * Arrange to stop receiving notifications. + */ + switch (notify->Type) { + case ACPI_HEST_NOTIFY_POLLED: + callout_halt(&src->as_ch, NULL); + callout_destroy(&src->as_ch); + break; + case ACPI_HEST_NOTIFY_SCI: + case ACPI_HEST_NOTIFY_GPIO: + /* + * No need to spend time removing the entry; no further + * calls via apei_hed_notify are possible at this + * point, now that detach has begun. + */ + break; +#if defined(__i386__) || defined(__x86_64__) + case ACPI_HEST_NOTIFY_NMI: + nmi_disestablish(src->as_nmi); + src->as_nmi = NULL; + break; +#endif + } + + /* + * No more notifications. Unmap the GESB and destroy the + * interrupt source now that it will no longer be used in + * error handling path. + */ + AcpiOsUnmapMemory(src->as_ghes.gesb, ghes->ErrorBlockLength); + src->as_ghes.gesb = NULL; + src->as_header = NULL; + src->as_sc = NULL; +} + +/* + * apei_hest_attach_source(sc, header, i) + * + * Attach the ith source in the Hardware Error Source Table given + * its header, and return a pointer to the header of the next + * source in the table. + */ +static ACPI_HEST_HEADER * +apei_hest_attach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i) +{ + + switch (header->Type) { + case ACPI_HEST_TYPE_IA32_CHECK: { + ACPI_HEST_IA_MACHINE_CHECK *const imc = container_of(header, + ACPI_HEST_IA_MACHINE_CHECK, Header); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imc + 1); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(bank + imc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: { + ACPI_HEST_IA_CORRECTED *const imcc = container_of(header, + ACPI_HEST_IA_CORRECTED, Header); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imcc + 1); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(bank + imcc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_IA32_NMI: { + ACPI_HEST_IA_NMI *const ianmi = container_of(header, + ACPI_HEST_IA_NMI, Header); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(ianmi + 1); + } + case ACPI_HEST_TYPE_AER_ROOT_PORT: { + ACPI_HEST_AER_ROOT *const aerroot = container_of(header, + ACPI_HEST_AER_ROOT, Header); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(aerroot + 1); + } + case ACPI_HEST_TYPE_AER_ENDPOINT: { + ACPI_HEST_AER *const aer = container_of(header, + ACPI_HEST_AER, Header); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(aer + 1); + } + case ACPI_HEST_TYPE_AER_BRIDGE: { + ACPI_HEST_AER_BRIDGE *const aerbridge = container_of(header, + ACPI_HEST_AER_BRIDGE, Header); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(aerbridge + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *const ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + apei_hest_attach_ghes(sc, ghes, i); + return (ACPI_HEST_HEADER *)(ghes + 1); + } + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: { + ACPI_HEST_GENERIC_V2 *const ghesv2 = container_of(header, + ACPI_HEST_GENERIC_V2, Header); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(ghesv2 + 1); + } + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: { + ACPI_HEST_IA_DEFERRED_CHECK *const imdc = container_of(header, + ACPI_HEST_IA_DEFERRED_CHECK, Header); + ACPI_HEST_IA_ERROR_BANK *const bank = (void *)(imdc + 1); + + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " unimplemented type: 0x%04"PRIx16"\n", i, header->Type); + return (ACPI_HEST_HEADER *)(bank + imdc->NumHardwareBanks); + } + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + default: + aprint_error_dev(sc->sc_dev, "HEST[%"PRIu32"]: unknown type:" + " 0x%04"PRIx16"\n", i, header->Type); + if (header->Type >= 12) { + /* + * `Beginning with error source type 12 and + * onward, each Error Source Structure must + * use the standard Error Source Structure + * Header as defined below.' + * + * Not yet in acpica, though, so we copy this + * down manually. + */ + struct { + UINT16 Type; + UINT16 Length; + } *const essh = (void *)header; + + return (ACPI_HEST_HEADER *)((char *)header + + essh->Length); + } + return NULL; + } +} + +/* + * apei_hest_detach_source(sc, header, i) + * + * Detach the ith source in the Hardware Error Status Table. + * Caller is assumed to have stored where each source's header is, + * so no need to return the pointer to the header of the next + * source in the table. + */ +static void +apei_hest_detach_source(struct apei_softc *sc, ACPI_HEST_HEADER *header, + uint32_t i) +{ + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: { + ACPI_HEST_GENERIC *ghes = container_of(header, + ACPI_HEST_GENERIC, Header); + + apei_hest_detach_ghes(sc, ghes, i); + break; + } + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + break; + } +} + +/* + * apei_hest_attach(sc) + * + * Scan the Hardware Error Source Table and attach sources + * enumerated in it so we can receive and process hardware errors + * during operation. + */ +static void +apei_hest_attach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + ACPI_HEST_HEADER *header; + uint32_t i, n; + + n = hest->ErrorSourceCount; + aprint_normal_dev(sc->sc_dev, "HEST: %"PRIu32 + " hardware error source%s\n", n, n == 1 ? "" : "s"); + + /* + * This could be SIZE_MAX but let's put a smaller arbitrary + * limit on it; if you have gigabytes of HEST something is + * probably wrong. + */ + if (n > INT32_MAX/sizeof(hsc->hsc_source[0])) { + aprint_error_dev(sc->sc_dev, "HEST: too many error sources\n"); + return; + } + hsc->hsc_source = kmem_zalloc(n * sizeof(hsc->hsc_source[0]), + KM_SLEEP); + + header = (ACPI_HEST_HEADER *)(hest + 1); + for (i = 0; i < n; i++) { + aprint_debug_dev(sc->sc_dev, "HEST[%"PRIu32"]:" + " Type=0x%04"PRIx16" SourceId=0x%04"PRIx16"\n", + i, header->Type, header->SourceId); + header = apei_hest_attach_source(sc, header, i); + if (header == NULL) + break; + } + if (i < n) + aprint_error_dev(sc->sc_dev, "HEST truncated\n"); +} + +/* + * apei_hest_detach(sc) + * + * Stop receiving and processing hardware error notifications and + * free resources set up from the Hardware Error Source Table. + */ +static void +apei_hest_detach(struct apei_softc *sc) +{ + ACPI_TABLE_HEST *hest = sc->sc_tab.hest; + struct apei_hest_softc *hsc = &sc->sc_hest; + uint32_t i, n; + + if (hsc->hsc_source) { + n = hest->ErrorSourceCount; + for (i = 0; i < n; i++) { + struct apei_source *src = &hsc->hsc_source[i]; + ACPI_HEST_HEADER *header = src->as_header; + + if (src->as_header == NULL) + continue; + apei_hest_detach_source(sc, header, i); + } + kmem_free(hsc->hsc_source, n * sizeof(hsc->hsc_source[0])); + hsc->hsc_source = NULL; + } +} + +void +apei_hed_notify(void) +{ + device_t apei0; + struct apei_softc *sc; + struct apei_hest_softc *hsc; + struct apei_source *src; + + /* + * Take a reference to the apei0 device so it doesn't go away + * while we're working. + */ + if ((apei0 = device_lookup_acquire(&apei_cd, 0)) == NULL) + goto out; + sc = device_private(apei0); + + /* + * If there's no HEST, nothing to do. + */ + if (sc->sc_tab.hest == NULL) + goto out; + hsc = &sc->sc_hest; + + /* + * Walk through the HED-notified hardware error sources and + * check them. The list is stable until we release apei0. + */ + SIMPLEQ_FOREACH(src, &hsc->hsc_hed_list, as_entry) { + ACPI_HEST_HEADER *const header = src->as_header; + + switch (header->Type) { + case ACPI_HEST_TYPE_GENERIC_ERROR: + apei_hest_ghes_handle(sc, src); + break; + case ACPI_HEST_TYPE_IA32_CHECK: + case ACPI_HEST_TYPE_IA32_CORRECTED_CHECK: + case ACPI_HEST_TYPE_IA32_NMI: + case ACPI_HEST_TYPE_NOT_USED3: + case ACPI_HEST_TYPE_NOT_USED4: + case ACPI_HEST_TYPE_NOT_USED5: + case ACPI_HEST_TYPE_AER_ROOT_PORT: + case ACPI_HEST_TYPE_AER_ENDPOINT: + case ACPI_HEST_TYPE_AER_BRIDGE: +// case ACPI_HEST_TYPE_GENERIC_ERROR: + case ACPI_HEST_TYPE_GENERIC_ERROR_V2: + case ACPI_HEST_TYPE_IA32_DEFERRED_CHECK: + default: + /* shouldn't happen */ + break; + } + } + +out: if (apei0) { + device_release(apei0); + apei0 = NULL; + } +} + +MODULE(MODULE_CLASS_DRIVER, apei, NULL); + +#ifdef _MODULE +#include "ioconf.c" +#endif + +static int +apei_modcmd(modcmd_t cmd, void *opaque) +{ + int error = 0; + + switch (cmd) { + case MODULE_CMD_INIT: +#ifdef _MODULE + error = config_init_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + case MODULE_CMD_FINI: +#ifdef _MODULE + error = config_fini_component(cfdriver_ioconf_apei, + cfattach_ioconf_apei, cfdata_ioconf_apei); +#endif + return error; + default: + return ENOTTY; + } +} diff --git a/sys/dev/acpi/apei_hed.h b/sys/dev/acpi/apei_hed.h new file mode 100644 index 000000000000..988be5292c0a --- /dev/null +++ b/sys/dev/acpi/apei_hed.h @@ -0,0 +1,34 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_HED_H_ +#define _SYS_DEV_ACPI_APEI_HED_H_ + +void apei_hed_notify(void); + +#endif /* _SYS_DEV_ACPI_APEI_HED_H_ */ diff --git a/sys/dev/acpi/apei_interp.c b/sys/dev/acpi/apei_interp.c new file mode 100644 index 000000000000..ef6fe4c625df --- /dev/null +++ b/sys/dev/acpi/apei_interp.c @@ -0,0 +1,326 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__KERNEL_RCSID(0, "$NetBSD$"); + +#include + +#include +#include + +#include +#include + +/* + * struct apei_actinst + * + * Sequence of instructions to execute for an action. + */ +struct apei_actinst { + uint32_t ninst; + uint32_t ip; + struct acpi_whea_header **inst; +}; + +/* + * struct apei_interp + * + * Table of instructions to interpret APEI actions. +*/ +struct apei_interp { + const char *name; + const char *const *actname; + unsigned nact; + const char *const *instname; + unsigned ninst; + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, + uint32_t); + void (*instfunc)(ACPI_WHEA_HEADER *, void *, + uint32_t *, uint32_t); + struct apei_actinst actinst[]; +}; + +struct apei_interp * +apei_interp_create(const char *name, + const char *const *actname, unsigned nact, + const char *const *instname, unsigned ninst, + bool (*instvalid)(ACPI_WHEA_HEADER *, uint32_t, uint32_t), + void (*instfunc)(ACPI_WHEA_HEADER *, void *, uint32_t *, uint32_t)) +{ + struct apei_interp *I; + + I = kmem_zalloc(offsetof(struct apei_interp, actinst[nact]), KM_SLEEP); + I->name = name; + I->actname = actname; + I->nact = nact; + I->instname = instname; + I->ninst = ninst; + I->instvalid = instvalid; + I->instfunc = instfunc; + + return I; +} + +void +apei_interp_destroy(struct apei_interp *I) +{ + unsigned nact = I->nact; + + kmem_free(I, offsetof(struct apei_interp, actinst[nact])); +} + +/* + * apei_interp_pass1_load(I, i, E) + * + * Load the ith table entry E into the interpreter I. To be + * called for each entry in the table sequentially. + * + * This first pass counts the number of instructions for each + * action, so we can allocate an array of instructions for + * indexing each action. + */ +void +apei_interp_pass1_load(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + aprint_debug("%s[%"PRIu32"]: Action=0x%"PRIx8" (%s)\n", I->name, i, + E->Action, + (E->Action < I->nact && I->actname[E->Action] + ? I->actname[E->Action] : "unknown")); + aprint_debug(" Instruction=0x%02"PRIx8" (%s)\n", + E->Instruction, + (E->Instruction < I->ninst && I->instname[E->Instruction] + ? I->instname[E->Instruction] : "unknown")); + aprint_debug(" Flags=0x%02"PRIx8"\n", E->Flags); + aprint_debug(" Reserved=[%02"PRIx8"]\n", E->Flags); + aprint_debug(" RegisterRegion=" + "[SpaceId=%"PRIu8 + " BitWidth=%"PRIu8" BitOffset=%"PRIu8" AccessWidth=%"PRIu8 + " Address=0x%"PRIx64"]\n", + E->RegisterRegion.SpaceId, + E->RegisterRegion.BitWidth, + E->RegisterRegion.BitOffset, + E->RegisterRegion.AccessWidth, + E->RegisterRegion.Address); + aprint_debug(" Value=0x%016"PRIx64"\n", E->Value); + aprint_debug(" Mask=0x%016"PRIx64"\n", E->Mask); + + /* + * If we don't recognize this action, ignore it and move on. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown action: 0x%"PRIx8"\n", + I->name, i, E->Action); + return; + } + struct apei_actinst *const A = &I->actinst[E->Action]; + + /* + * If we can't interpret this instruction for this action, or + * if we couldn't interpret a previous instruction for this + * action, ignore _all_ instructions for this action -- by + * marking the action as having UINT32_MAX instructions -- and + * move on. + */ + if (E->Instruction >= I->ninst || + I->instname[E->Instruction] == NULL) { + aprint_error("%s[%"PRIu32"]: unknown instruction: 0x%02"PRIx8 + "\n", I->name, i, E->Instruction); + A->ninst = UINT32_MAX; + return; + } + if (A->ninst == UINT32_MAX) + return; + + /* + * Count another instruction. We will make a pointer + * to it in a later pass. + */ + A->ninst++; + + /* + * If it overflows a reasonable size, bail on this instruction. + */ + if (A->ninst >= 256) { + aprint_error("%s[%"PRIu32"]:" + " too many instructions for action %"PRIu32" (%s)\n", + I->name, i, + E->Action, I->actname[E->Action]); + A->ninst = UINT32_MAX; + return; + } +} + +/* + * apei_interp_pass2_verify(I, i, E) + * + * Verify the ith entry's instruction, using the caller's + * instvalid function, now that all the instructions have been + * counted. To be called for each entry in the table + * sequentially. + * + * This second pass checks that GOTO instructions in particular + * don't jump out of bounds. + */ +void +apei_interp_pass2_verify(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If there's no instruction validation function, skip this + * pass. + */ + if (I->instvalid == NULL) + return; + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action > I->nact || I->actname[E->Action] == NULL) + return; + + /* + * If the instruction is invalid, disable the whole action. + */ + struct apei_actinst *const A = &I->actinst[E->Action]; + if (!(*I->instvalid)(E, A->ninst, i)) + A->ninst = UINT32_MAX; +} + +/* + * apei_interp_pass3_alloc(I) + * + * Allocate an array of instructions for each action that we + * didn't decide to bail on, marked with UINT32_MAX. + */ +void +apei_interp_pass3_alloc(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + if (A->ninst == 0 || A->ninst == UINT32_MAX) + continue; + A->inst = kmem_zalloc(A->ninst * sizeof(A->inst[0]), KM_SLEEP); + } +} + +/* + * apei_interp_pass4_assemble(I, i, E) + * + * Put the instruction for the ith entry E into the instruction + * array for its action. To be called for each entry in the table + * sequentially. + */ +void +apei_interp_pass4_assemble(struct apei_interp *I, uint32_t i, + ACPI_WHEA_HEADER *E) +{ + + /* + * If we skipped it in earlier passes, skip it now. + */ + if (E->Action >= I->nact || I->actname[E->Action] == NULL) + return; + + struct apei_actinst *const A = &I->actinst[E->Action]; + if (A->ninst == UINT32_MAX) + return; + + KASSERT(A->ip < A->ninst); + A->inst[A->ip++] = E; +} + +/* + * apei_interp_pass5_verify(I) + * + * Paranoia: Verify we got all the instructions for each action, + * verify the actions point to their own instructions, and dump + * the instructions for each action, collated, with aprint_debug. + */ +void +apei_interp_pass5_verify(struct apei_interp *I) +{ + unsigned action; + + for (action = 0; action < I->nact; action++) { + struct apei_actinst *const A = &I->actinst[action]; + unsigned j; + + /* + * If the action is disabled, it's all set. + */ + if (A->ninst == UINT32_MAX) + continue; + KASSERTMSG(A->ip == A->ninst, + "action %s ip=%"PRIu32" ninstruction=%"PRIu32, + I->actname[action], A->ip, A->ninst); + + /* + * XXX Dump the complete instruction table. + */ + for (j = 0; j < A->ninst; j++) { + ACPI_WHEA_HEADER *const E = A->inst[j]; + + KASSERT(E->Action == action); + aprint_debug("%s: %s[%"PRIu32"]: %s\n", + I->name, I->actname[action], j, + I->instname[E->Instruction]); + } + } +} + +/* + * apei_interpret(I, action, cookie) + * + * Run the instructions associated with the given action by + * calling the interpreter's instfunc for each one. + * + * Halt when the instruction pointer runs past the end of the + * array, or after 1000 cycles, whichever comes first. + */ +void +apei_interpret(struct apei_interp *I, unsigned action, void *cookie) +{ + unsigned juice = 1000; + uint32_t ip = 0; + + if (action > I->nact || I->actname[action] == NULL) + return; + struct apei_actinst *const A = &I->actinst[action]; + + while (ip < A->ninst && juice --> 0) { + ACPI_WHEA_HEADER *const E = A->inst[ip++]; + + (*I->instfunc)(E, cookie, &ip, A->ninst); + } +} diff --git a/sys/dev/acpi/apei_interp.h b/sys/dev/acpi/apei_interp.h new file mode 100644 index 000000000000..8b3570d83b89 --- /dev/null +++ b/sys/dev/acpi/apei_interp.h @@ -0,0 +1,55 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _SYS_DEV_ACPI_APEI_INTERP_H_ +#define _SYS_DEV_ACPI_APEI_INTERP_H_ + +#include + +struct acpi_whea_header; +struct apei_interp; + +struct apei_interp *apei_interp_create(const char *, + const char *const *, unsigned, + const char *const *, unsigned, + bool (*)(struct acpi_whea_header *, uint32_t, uint32_t), + void (*)(struct acpi_whea_header *, void *, uint32_t *, uint32_t)); +void apei_interp_destroy(struct apei_interp *); + +void apei_interp_pass1_load(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass2_verify(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass3_alloc(struct apei_interp *); +void apei_interp_pass4_assemble(struct apei_interp *, uint32_t, + struct acpi_whea_header *); +void apei_interp_pass5_verify(struct apei_interp *); + +void apei_interpret(struct apei_interp *, unsigned, void *); + +#endif /* _SYS_DEV_ACPI_APEI_INTERP_H_ */ diff --git a/sys/dev/acpi/cper.h b/sys/dev/acpi/cper.h new file mode 100644 index 000000000000..6f0f424732e4 --- /dev/null +++ b/sys/dev/acpi/cper.h @@ -0,0 +1,234 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Common Platform Error Record + * + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html + */ + +#ifndef _SYS_SYS_CPER_H_ +#define _SYS_SYS_CPER_H_ + +#include + +#include + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#record-header + */ +struct cper_header { + char signature_start[4]; /* `CPER" */ + uint16_t revision; + uint32_t signature_end; /* 0xfffffff */ + uint16_t section_count; + uint32_t error_severity; + uint32_t validation_bits; + uint32_t record_length; + uint64_t timestamp; + uint8_t platform_id[16]; + uint8_t partition_id[16]; + uint8_t creator_id[16]; + uint8_t notification_type[16]; + uint64_t record_id; + uint32_t flags; + uint64_t persistence_info; + uint8_t reserved[12]; +} __packed; +__CTASSERT(sizeof(struct cper_header) == 128); + +enum { /* struct cper_header::error_severity */ + CPER_ERROR_SEVERITY_RECOVERABLE = 0, + CPER_ERROR_SEVERITY_FATAL = 1, + CPER_ERROR_SEVERITY_CORRECTED = 2, + CPER_ERROR_SEVERITY_INFORMATIONAL = 3, +}; + +enum { /* struct cper_header::validation_bits */ + CPER_PLATFORM_ID_VALID = __BIT(0), + CPER_TIMESTAMP_VALID = __BIT(1), + CPER_PARTITION_ID_VALID = __BIT(2), +}; + +enum { /* struct cper_header::flags */ + CPER_HW_ERROR_FLAG_RECOVERED = __BIT(0), + CPER_HW_ERROR_FLAG_PREVERR = __BIT(1), + CPER_HW_ERROR_FLAG_SIMULATED = __BIT(2), +}; + +#if 0 +struct cper_section_descriptor { + ... +}; +#endif + +enum { + CPER_SECTION_FLAG_PRIMARY = __BIT(0), + CPER_SECTION_FLAG_CONTAINMENT_WARNING = __BIT(1), + CPER_SECTION_FLAG_RESET = __BIT(2), + CPER_SECTION_FLAG_ERROR_THRESHOLD_EXCEEDED = __BIT(3), + CPER_SECTION_FLAG_RESOURCE_NOT_ACCESSIBLE = __BIT(4), + CPER_SECTION_FLAG_LATENT_ERROR = __BIT(5), + CPER_SECTION_FLAG_PROPAGATED = __BIT(6), + CPER_SECTION_FLAG_OVERFLOW = __BIT(7), +}; + +#define CPER_SECTION_FLAGS_FMT "\020" \ + "\001""PRIMARY" \ + "\002""CONTAINMENT_WARNING" \ + "\003""RESET" \ + "\004""ERROR_THRESHOLD_EXCEEDED" \ + "\005""RESOURCE_NOT_ACCESSIBLE" \ + "\006""LATENT_ERROR" \ + "\007""PROPAGATED" \ + "\008""OVERFLOW" \ + "\0" + +/* + * https://uefi.org/specs/UEFI/2.10/Apx_N_Common_Platform_Error_Record.html#memory-error-section + * + * Type: {0xa5bc1114,0x6f64,0x4ede,{0xb8,0x63,0x3e,0x83,0xed,0x7c,0x83,0xb1}} + */ + +struct cper_memory_error { + uint64_t validation_bits; + uint64_t error_status; + uint64_t physical_address; + uint64_t physical_address_mask; + uint16_t node; + uint16_t card; + uint16_t module; + uint16_t bank; + uint16_t device; + uint16_t row; + uint16_t column; + uint16_t bit_position; + uint64_t requestor_id; + uint64_t responder_id; + uint64_t target_id; + uint8_t memory_error_type; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error) == 73); + +struct cper_memory_error_ext { + struct cper_memory_error base; + uint8_t extended; + uint16_t rank_number; + uint16_t card_handle; + uint16_t module_handle; +} __packed; +__CTASSERT(sizeof(struct cper_memory_error_ext) == 80); + +enum { /* struct cper_memory_error::validation_bits */ + CPER_MEMORY_ERROR_ERROR_STATUS_VALID = __BIT(0), + CPER_MEMORY_ERROR_PHYSICAL_ADDRESS_VALID = __BIT(1), + CPER_MEMORY_ERROR_PHYSICAL_ADDRESS_MASK_VALID = __BIT(2), + CPER_MEMORY_ERROR_NODE_VALID = __BIT(3), + CPER_MEMORY_ERROR_CARD_VALID = __BIT(4), + CPER_MEMORY_ERROR_MODULE_VALID = __BIT(5), + CPER_MEMORY_ERROR_BANK_VALID = __BIT(6), + CPER_MEMORY_ERROR_DEVICE_VALID = __BIT(7), + CPER_MEMORY_ERROR_ROW_VALID = __BIT(8), + CPER_MEMORY_ERROR_COLUMN_VALID = __BIT(9), + CPER_MEMORY_ERROR_BIT_POSITION_VALID = __BIT(10), + CPER_MEMORY_ERROR_REQUESTOR_ID_VALID = __BIT(11), + CPER_MEMORY_ERROR_RESPONDER_ID_VALID = __BIT(12), + CPER_MEMORY_ERROR_TARGET_ID_VALID = __BIT(13), + CPER_MEMORY_ERROR_MEMORY_ERROR_TYPE_VALID = __BIT(14), + CPER_MEMORY_ERROR_RANK_NUMBER_VALID = __BIT(15), + CPER_MEMORY_ERROR_CARD_HANDLE_VALID = __BIT(16), + CPER_MEMORY_ERROR_MODULE_HANDLE_VALID = __BIT(17), + CPER_MEMORY_ERROR_EXTENDED_ROW_VALID = __BIT(18), + CPER_MEMORY_ERROR_BANK_GROUP_VALID = __BIT(19), + CPER_MEMORY_ERROR_BANK_ADDRESS_VALID = __BIT(20), + CPER_MEMORY_ERROR_CHIP_ID_VALID = __BIT(21), +}; + +#define CPER_MEMORY_ERROR_VALIDATION_BITS_FMT "\020" \ + "\001""ERROR_STATUS" \ + "\002""PHYSICAL_ADDRESS" \ + "\003""PHYSICAL_ADDRESS_MASK" \ + "\004""NODE" \ + "\005""CARD" \ + "\006""MODULE" \ + "\007""BANK" \ + "\010""DEVICE" \ + "\011""ROW" \ + "\012""COLUJMN" \ + "\013""BIT_POSITION" \ + "\014""REQUESTOR_ID" \ + "\015""RESPONDER_ID" \ + "\016""TARGET_ID" \ + "\017""MEMORY_ERROR_TYPE" \ + "\020""RANK_NUMBER" \ + "\021""CARD_HANDLE" \ + "\022""MODULE_HANDLE" \ + "\023""EXTENDED_ROW" \ + "\024""BANK_GROUP" \ + "\025""BANK_ADDRESS" \ + "\026""CHIP_ID" \ + "\0" + +enum { /* struct cper_memory_error::bank */ + CPER_MEMORY_ERROR_BANK_ADDRESS = __BITS(7,0), + CPER_MEMORY_ERROR_BANK_GROUP = __BITS(15,8), +}; + +#define CPER_MEMORY_ERROR_TYPES(F) \ + F(CPER_MEMORY_ERROR_UNKNOWN, UNKNOWN, 0) \ + F(CPER_MEMORY_ERROR_NO_ERROR, NO_ERROR, 1) \ + F(CPER_MEMORY_ERROR_SINGLEBIT_ECC, SINGLEBIT_ECC, 2) \ + F(CPER_MEMORY_ERROR_MULTIBIT_ECC, MULTIBIT_ECC, 3) \ + F(CPER_MEMORY_ERROR_SINGLESYM_CHIPKILL_ECC, SINGLESYM_CHIPKILL_ECC, 4)\ + F(CPER_MEMORY_ERROR_MULTISYM_CHIPKILL_ECC, MULTISYM_CHIPKILL_ECC, 5) \ + F(CPER_MEMORY_ERROR_MASTER_ABORT, MASTER_ABORT, 6) \ + F(CPER_MEMORY_ERROR_TARGET_ABORT, TARGET_ABORT, 7) \ + F(CPER_MEMORY_ERROR_PARITY_ERROR, PARITY_ERROR, 8) \ + F(CPER_MEMORY_ERROR_WATCHDOG_TIMEOUT, WATCHDOG_TIMEOUT, 9) \ + F(CPER_MEMORY_ERROR_INVALID_ADDRESS, INVALID_ADDRESS, 10) \ + F(CPER_MEMORY_ERROR_MIRROR_BROKEN, MIRROR_BROKEN, 11) \ + F(CPER_MEMORY_ERROR_MEMORY_SPARING, MEMORY_SPARING, 12) \ + F(CPER_MEMORY_ERROR_SCRUB_CORRECTED_ERROR, SCRUB_CORRECTED_ERROR, 13) \ + F(CPER_MEMORY_ERROR_SCRUB_UNCORRECTED_ERROR, SCRUB_UNCORRECTED_ERROR, \ + 14) \ + F(CPER_MEMORY_ERROR_PHYSMEM_MAPOUT_EVENT, PHYSMEM_MAPOUT_EVENT, 15) \ + /* end of CPER_MEMORY_ERROR_TYPE */ + +enum cper_memory_error_type { /* struct cper_memory_error::memory_error_type */ +#define F(LN, SN, V) LN = V, + CPER_MEMORY_ERROR_TYPES(F) +#undef F +}; + +enum { /* struct cper_memory_error_ext::extended */ + CPER_MEMORY_ERROR_EXTENDED_ROWBIT16 = __BIT(0), + CPER_MEMORY_ERROR_EXTENDED_ROWBIT17 = __BIT(1), + CPER_MEMORY_ERROR_EXTENDED_CHIPID = __BITS(7,5), +}; + +#endif /* _SYS_SYS_CPER_H_ */ diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index 383b347f38d8..a573f95a9e3c 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -310,4 +310,10 @@ file dev/acpi/igpio_acpi.c igpio_acpi attach dwcmmc at acpinodebus with dwcmmc_acpi file dev/acpi/dwcmmc_acpi.c dwcmmc_acpi +# ACPI Platform Error Interface +device apei +attach apei at apeibus +file dev/acpi/apei.c apei +file dev/acpi/apei_interp.c apei + include "dev/acpi/wmi/files.wmi" diff --git a/sys/modules/Makefile b/sys/modules/Makefile index d8b64a2134cf..2c3f55affd05 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -277,6 +277,13 @@ SUBDIR+= sljit SUBDIR+= acpiverbose .endif +.if ${MACHINE_ARCH} == "i386" || \ + ${MACHINE_ARCH} == "x86_64" || \ + ${MACHINE_CPU} == "arm" || \ + ${MACHINE_CPU} == "aarch64" +SUBDIR+= apei +.endif + .if ${MACHINE_ARCH} == "i386" || \ ${MACHINE_ARCH} == "x86_64" SUBDIR+= acpiacad diff --git a/sys/modules/apei/Makefile b/sys/modules/apei/Makefile new file mode 100644 index 000000000000..26f63d0e8739 --- /dev/null +++ b/sys/modules/apei/Makefile @@ -0,0 +1,13 @@ +# $NetBSD$ +# + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= apei +IOCONF= apei.ioconf +SRCS+= apei.c +SRCS+= apei_interp.c + +.include diff --git a/sys/modules/apei/apei.ioconf b/sys/modules/apei/apei.ioconf new file mode 100644 index 000000000000..d2099452d391 --- /dev/null +++ b/sys/modules/apei/apei.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf apei + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root apeibus* + +apei* at apeibus? From 6105608f94ce968a661be3a8f641fa1120e07f85 Mon Sep 17 00:00:00 2001 From: Taylor R Campbell Date: Tue, 19 Mar 2024 02:20:11 +0000 Subject: [PATCH 3/3] acpihed(4): New driver for PNP0C33 to notify apei(4). PR kern/58046 --- share/man/man4/acpihed.4 | 67 ++++++++++++++++++ sys/dev/acpi/acpi_hed.c | 108 +++++++++++++++++++++++++++++ sys/dev/acpi/files.acpi | 5 ++ sys/modules/acpihed/Makefile | 11 +++ sys/modules/acpihed/acpihed.ioconf | 11 +++ 5 files changed, 202 insertions(+) create mode 100644 share/man/man4/acpihed.4 create mode 100644 sys/dev/acpi/acpi_hed.c create mode 100644 sys/modules/acpihed/Makefile create mode 100644 sys/modules/acpihed/acpihed.ioconf diff --git a/share/man/man4/acpihed.4 b/share/man/man4/acpihed.4 new file mode 100644 index 000000000000..264cc492ee6e --- /dev/null +++ b/share/man/man4/acpihed.4 @@ -0,0 +1,67 @@ +.\" $NetBSD$ +.\" +.\" Copyright (c) 2024 The NetBSD Foundation, Inc. +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS +.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS +.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +.\" POSSIBILITY OF SUCH DAMAGE. +.\" +.Dd March 18, 2024 +.Dt APEI 4 +.Os +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh NAME +.Nm apeihed +.Nd ACPI Hardware Error Device +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SYNOPSIS +.Cd "apeihed* at acpi?" +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh DESCRIPTION +Certain hardware error sources that can be queried by +.Xr apei 4 +notify an ACPI node with PNP ID +.Sq Li PNP0C33 +when an error occurs. +The +.Nm +driver listens for these notifications and passes them on to +.Xr apei 4 +so it can report the error. +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh SEE ALSO +.Rs +.%B ACPI Specification 6.5 +.%O Chapter 18: ACPI Platform Error Interfaces (APEI) +.%U https://uefi.org/specs/ACPI/6.5/18_Platform_Error_Interfaces.html +.Re +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh HISTORY +The +.Nm +driver first appeared in +.Nx 11.0 . +.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +.Sh AUTHORS +The +.Nm +driver was written by +.An Taylor R Campbell Aq Mt riastradh@NetBSD.org . diff --git a/sys/dev/acpi/acpi_hed.c b/sys/dev/acpi/acpi_hed.c new file mode 100644 index 000000000000..48ae4e3984d8 --- /dev/null +++ b/sys/dev/acpi/acpi_hed.c @@ -0,0 +1,108 @@ +/* $NetBSD$ */ + +/*- + * Copyright (c) 2024 The NetBSD Foundation, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * HED: Hardware Error Device, PNP0C33. + * + * This device serves only to receive notifications about hardware + * errors, which we then dispatch to apei(4). + */ + +#include +#include +#include + +#define _COMPONENT ACPI_RESOURCE_COMPONENT +ACPI_MODULE_NAME ("acpi_hed") + +struct acpihed_softc { + device_t sc_dev; + struct acpi_devnode *sc_node; +}; + +static const struct device_compatible_entry compat_data[] = { + { .compat = "PNP0C33" }, + DEVICE_COMPAT_EOL +}; + +static int acpihed_match(device_t, cfdata_t, void *); +static void acpihed_attach(device_t, device_t, void *); +static int acpihed_detach(device_t, int); +static void acpihed_notify(ACPI_HANDLE, uint32_t, void *); + +CFATTACH_DECL_NEW(acpihed, sizeof(struct acpihed_softc), + acpihed_match, acpihed_attach, acpihed_detach, NULL); + +static int +acpihed_match(device_t parent, cfdata_t match, void *aux) +{ + struct acpi_attach_args *aa = aux; + + return acpi_compatible_match(aa, compat_data); +} + +static void +acpihed_attach(device_t parent, device_t self, void *aux) +{ + struct acpihed_softc *sc = device_private(self); + struct acpi_attach_args *aa = aux; + + aprint_naive("\n"); + aprint_normal(": ACPI Hardware Error Device\n"); + + pmf_device_register(self, NULL, NULL); + + sc->sc_dev = self; + sc->sc_node = aa->aa_node; + + acpi_register_notify(sc->sc_node, acpihed_notify); +} + +static int +acpihed_detach(device_t self, int flags) +{ + struct acpihed_softc *sc = device_private(self); + int error; + + error = config_detach_children(self, flags); + if (error) + return error; + + acpi_deregister_notify(sc->sc_node); + + pmf_device_deregister(self); + + return 0; +} + +static void +acpihed_notify(ACPI_HANDLE handle, uint32_t event, void *cookie) +{ + + apei_hed_notify(); +} diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi index a573f95a9e3c..0af432eb30d5 100644 --- a/sys/dev/acpi/files.acpi +++ b/sys/dev/acpi/files.acpi @@ -316,4 +316,9 @@ attach apei at apeibus file dev/acpi/apei.c apei file dev/acpi/apei_interp.c apei +# ACPI Hardware Error Device +device apeihed: apei +attach apeihed at acpinodebus +file dev/acpi/acpi_hed.c apeihed + include "dev/acpi/wmi/files.wmi" diff --git a/sys/modules/acpihed/Makefile b/sys/modules/acpihed/Makefile new file mode 100644 index 000000000000..7f23c86050a6 --- /dev/null +++ b/sys/modules/acpihed/Makefile @@ -0,0 +1,11 @@ +# $NetBSD$ + +.include "../Makefile.inc" + +.PATH: $S/dev/acpi + +KMOD= acpihed +IOCONF= acpihed.ioconf +SRCS= acpi_hed.c + +.include diff --git a/sys/modules/acpihed/acpihed.ioconf b/sys/modules/acpihed/acpihed.ioconf new file mode 100644 index 000000000000..8edb2783bc6b --- /dev/null +++ b/sys/modules/acpihed/acpihed.ioconf @@ -0,0 +1,11 @@ +# $NetBSD$ +# + +ioconf acpihed + +include "conf/files" +include "dev/acpi/files.acpi" + +pseudo-root acpi* + +acpihed* at acpi?