From dd6dad4288cb93e79bd7abfa6c6a338c47454d1a Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Fri, 18 Oct 2013 14:29:25 -0700 Subject: DMI: Parse memory device (type 17) in SMBIOS This patch adds a new interface to decode memory device (type 17) to help error reporting on DIMMs. Original-author: Tony Luck Signed-off-by: Chen, Gong Acked-by: Naveen N. Rao Acked-by: Borislav Petkov Reviewed-by: Mauro Carvalho Chehab Signed-off-by: Tony Luck --- drivers/firmware/dmi_scan.c | 60 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'drivers/firmware') diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index fa0affb699b..59579a744d5 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -25,6 +25,13 @@ static int dmi_initialized; /* DMI system identification string used during boot */ static char dmi_ids_string[128] __initdata; +static struct dmi_memdev_info { + const char *device; + const char *bank; + u16 handle; +} *dmi_memdev; +static int dmi_memdev_nr; + static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s) { const u8 *bp = ((u8 *) dm) + dm->length; @@ -322,6 +329,42 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm) dmi_save_one_device(*d & 0x7f, dmi_string_nosave(dm, *(d - 1))); } +static void __init count_mem_devices(const struct dmi_header *dm, void *v) +{ + if (dm->type != DMI_ENTRY_MEM_DEVICE) + return; + dmi_memdev_nr++; +} + +static void __init save_mem_devices(const struct dmi_header *dm, void *v) +{ + const char *d = (const char *)dm; + static int nr; + + if (dm->type != DMI_ENTRY_MEM_DEVICE) + return; + if (nr >= dmi_memdev_nr) { + pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n"); + return; + } + dmi_memdev[nr].handle = dm->handle; + dmi_memdev[nr].device = dmi_string(dm, d[0x10]); + dmi_memdev[nr].bank = dmi_string(dm, d[0x11]); + nr++; +} + +void __init dmi_memdev_walk(void) +{ + if (!dmi_available) + return; + + if (dmi_walk_early(count_mem_devices) == 0 && dmi_memdev_nr) { + dmi_memdev = dmi_alloc(sizeof(*dmi_memdev) * dmi_memdev_nr); + if (dmi_memdev) + dmi_walk_early(save_mem_devices); + } +} + /* * Process a DMI table entry. Right now all we care about are the BIOS * and machine entries. For 2.5 we should pull the smbus controller info @@ -815,3 +858,20 @@ bool dmi_match(enum dmi_field f, const char *str) return !strcmp(info, str); } EXPORT_SYMBOL_GPL(dmi_match); + +void dmi_memdev_name(u16 handle, const char **bank, const char **device) +{ + int n; + + if (dmi_memdev == NULL) + return; + + for (n = 0; n < dmi_memdev_nr; n++) { + if (handle == dmi_memdev[n].handle) { + *bank = dmi_memdev[n].bank; + *device = dmi_memdev[n].device; + break; + } + } +} +EXPORT_SYMBOL_GPL(dmi_memdev_name); -- cgit v1.2.3 From 7ea6c6c15ee16f4c7f5eeaa62f77e696f85ae0d1 Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Mon, 28 Oct 2013 14:06:55 -0700 Subject: Move cper.c from drivers/acpi/apei to drivers/firmware/efi cper.c contains code to decode and print "Common Platform Error Records". Originally added under drivers/acpi/apei because the only user was in that same directory - but now we have another consumer, and we shouldn't have to force CONFIG_ACPI_APEI get access to this code. Since CPER is defined in the UEFI specification - the logical home for this code is under drivers/firmware/efi/ Acked-by: Matt Fleming Acked-by: Ingo Molnar Signed-off-by: Tony Luck --- drivers/firmware/efi/Kconfig | 3 + drivers/firmware/efi/Makefile | 1 + drivers/firmware/efi/cper.c | 410 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 414 insertions(+) create mode 100644 drivers/firmware/efi/cper.c (limited to 'drivers/firmware') diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index b0fc7c79dfb..3150aa4874e 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -36,4 +36,7 @@ config EFI_VARS_PSTORE_DEFAULT_DISABLE backend for pstore by default. This setting can be overridden using the efivars module's pstore_disable parameter. +config UEFI_CPER + def_bool n + endmenu diff --git a/drivers/firmware/efi/Makefile b/drivers/firmware/efi/Makefile index 99245ab5a79..9ba156d3c77 100644 --- a/drivers/firmware/efi/Makefile +++ b/drivers/firmware/efi/Makefile @@ -4,3 +4,4 @@ obj-y += efi.o vars.o obj-$(CONFIG_EFI_VARS) += efivars.o obj-$(CONFIG_EFI_VARS_PSTORE) += efi-pstore.o +obj-$(CONFIG_UEFI_CPER) += cper.o diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c new file mode 100644 index 00000000000..1491dd4f08f --- /dev/null +++ b/drivers/firmware/efi/cper.c @@ -0,0 +1,410 @@ +/* + * UEFI Common Platform Error Record (CPER) support + * + * Copyright (C) 2010, Intel Corp. + * Author: Huang Ying + * + * CPER is the format used to describe platform hardware error by + * various tables, such as ERST, BERT and HEST etc. + * + * For more information about CPER, please refer to Appendix N of UEFI + * Specification version 2.4. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define INDENT_SP " " +/* + * CPER record ID need to be unique even after reboot, because record + * ID is used as index for ERST storage, while CPER records from + * multiple boot may co-exist in ERST. + */ +u64 cper_next_record_id(void) +{ + static atomic64_t seq; + + if (!atomic64_read(&seq)) + atomic64_set(&seq, ((u64)get_seconds()) << 32); + + return atomic64_inc_return(&seq); +} +EXPORT_SYMBOL_GPL(cper_next_record_id); + +static const char *cper_severity_strs[] = { + "recoverable", + "fatal", + "corrected", + "info", +}; + +static const char *cper_severity_str(unsigned int severity) +{ + return severity < ARRAY_SIZE(cper_severity_strs) ? + cper_severity_strs[severity] : "unknown"; +} + +/* + * cper_print_bits - print strings for set bits + * @pfx: prefix for each line, including log level and prefix string + * @bits: bit mask + * @strs: string array, indexed by bit position + * @strs_size: size of the string array: @strs + * + * For each set bit in @bits, print the corresponding string in @strs. + * If the output length is longer than 80, multiple line will be + * printed, with @pfx is printed at the beginning of each line. + */ +void cper_print_bits(const char *pfx, unsigned int bits, + const char * const strs[], unsigned int strs_size) +{ + int i, len = 0; + const char *str; + char buf[84]; + + for (i = 0; i < strs_size; i++) { + if (!(bits & (1U << i))) + continue; + str = strs[i]; + if (!str) + continue; + if (len && len + strlen(str) + 2 > 80) { + printk("%s\n", buf); + len = 0; + } + if (!len) + len = snprintf(buf, sizeof(buf), "%s%s", pfx, str); + else + len += snprintf(buf+len, sizeof(buf)-len, ", %s", str); + } + if (len) + printk("%s\n", buf); +} + +static const char * const cper_proc_type_strs[] = { + "IA32/X64", + "IA64", +}; + +static const char * const cper_proc_isa_strs[] = { + "IA32", + "IA64", + "X64", +}; + +static const char * const cper_proc_error_type_strs[] = { + "cache error", + "TLB error", + "bus error", + "micro-architectural error", +}; + +static const char * const cper_proc_op_strs[] = { + "unknown or generic", + "data read", + "data write", + "instruction execution", +}; + +static const char * const cper_proc_flag_strs[] = { + "restartable", + "precise IP", + "overflow", + "corrected", +}; + +static void cper_print_proc_generic(const char *pfx, + const struct cper_sec_proc_generic *proc) +{ + if (proc->validation_bits & CPER_PROC_VALID_TYPE) + printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, + proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? + cper_proc_type_strs[proc->proc_type] : "unknown"); + if (proc->validation_bits & CPER_PROC_VALID_ISA) + printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, + proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? + cper_proc_isa_strs[proc->proc_isa] : "unknown"); + if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { + printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); + cper_print_bits(pfx, proc->proc_error_type, + cper_proc_error_type_strs, + ARRAY_SIZE(cper_proc_error_type_strs)); + } + if (proc->validation_bits & CPER_PROC_VALID_OPERATION) + printk("%s""operation: %d, %s\n", pfx, proc->operation, + proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? + cper_proc_op_strs[proc->operation] : "unknown"); + if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { + printk("%s""flags: 0x%02x\n", pfx, proc->flags); + cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, + ARRAY_SIZE(cper_proc_flag_strs)); + } + if (proc->validation_bits & CPER_PROC_VALID_LEVEL) + printk("%s""level: %d\n", pfx, proc->level); + if (proc->validation_bits & CPER_PROC_VALID_VERSION) + printk("%s""version_info: 0x%016llx\n", pfx, proc->cpu_version); + if (proc->validation_bits & CPER_PROC_VALID_ID) + printk("%s""processor_id: 0x%016llx\n", pfx, proc->proc_id); + if (proc->validation_bits & CPER_PROC_VALID_TARGET_ADDRESS) + printk("%s""target_address: 0x%016llx\n", + pfx, proc->target_addr); + if (proc->validation_bits & CPER_PROC_VALID_REQUESTOR_ID) + printk("%s""requestor_id: 0x%016llx\n", + pfx, proc->requestor_id); + if (proc->validation_bits & CPER_PROC_VALID_RESPONDER_ID) + printk("%s""responder_id: 0x%016llx\n", + pfx, proc->responder_id); + if (proc->validation_bits & CPER_PROC_VALID_IP) + printk("%s""IP: 0x%016llx\n", pfx, proc->ip); +} + +static const char *cper_mem_err_type_strs[] = { + "unknown", + "no error", + "single-bit ECC", + "multi-bit ECC", + "single-symbol chipkill ECC", + "multi-symbol chipkill ECC", + "master abort", + "target abort", + "parity error", + "watchdog timeout", + "invalid address", + "mirror Broken", + "memory sparing", + "scrub corrected error", + "scrub uncorrected error", + "physical memory map-out event", +}; + +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +{ + if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) + printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); + if (mem->validation_bits & CPER_MEM_VALID_PA) + printk("%s""physical_address: 0x%016llx\n", + pfx, mem->physical_addr); + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + printk("%s""physical_address_mask: 0x%016llx\n", + pfx, mem->physical_addr_mask); + if (mem->validation_bits & CPER_MEM_VALID_NODE) + pr_debug("node: %d\n", mem->node); + if (mem->validation_bits & CPER_MEM_VALID_CARD) + pr_debug("card: %d\n", mem->card); + if (mem->validation_bits & CPER_MEM_VALID_MODULE) + pr_debug("module: %d\n", mem->module); + if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) + pr_debug("rank: %d\n", mem->rank); + if (mem->validation_bits & CPER_MEM_VALID_BANK) + pr_debug("bank: %d\n", mem->bank); + if (mem->validation_bits & CPER_MEM_VALID_DEVICE) + pr_debug("device: %d\n", mem->device); + if (mem->validation_bits & CPER_MEM_VALID_ROW) + pr_debug("row: %d\n", mem->row); + if (mem->validation_bits & CPER_MEM_VALID_COLUMN) + pr_debug("column: %d\n", mem->column); + if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) + pr_debug("bit_position: %d\n", mem->bit_pos); + if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) + pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); + if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) + pr_debug("responder_id: 0x%016llx\n", mem->responder_id); + if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) + pr_debug("target_id: 0x%016llx\n", mem->target_id); + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { + u8 etype = mem->error_type; + printk("%s""error_type: %d, %s\n", pfx, etype, + etype < ARRAY_SIZE(cper_mem_err_type_strs) ? + cper_mem_err_type_strs[etype] : "unknown"); + } + if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { + const char *bank = NULL, *device = NULL; + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); + if (bank != NULL && device != NULL) + printk("%s""DIMM location: %s %s", pfx, bank, device); + else + printk("%s""DIMM DMI handle: 0x%.4x", + pfx, mem->mem_dev_handle); + } +} + +static const char *cper_pcie_port_type_strs[] = { + "PCIe end point", + "legacy PCI end point", + "unknown", + "unknown", + "root port", + "upstream switch port", + "downstream switch port", + "PCIe to PCI/PCI-X bridge", + "PCI/PCI-X to PCIe bridge", + "root complex integrated endpoint device", + "root complex event collector", +}; + +static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, + const struct acpi_generic_data *gdata) +{ + if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) + printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, + pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? + cper_pcie_port_type_strs[pcie->port_type] : "unknown"); + if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) + printk("%s""version: %d.%d\n", pfx, + pcie->version.major, pcie->version.minor); + if (pcie->validation_bits & CPER_PCIE_VALID_COMMAND_STATUS) + printk("%s""command: 0x%04x, status: 0x%04x\n", pfx, + pcie->command, pcie->status); + if (pcie->validation_bits & CPER_PCIE_VALID_DEVICE_ID) { + const __u8 *p; + printk("%s""device_id: %04x:%02x:%02x.%x\n", pfx, + pcie->device_id.segment, pcie->device_id.bus, + pcie->device_id.device, pcie->device_id.function); + printk("%s""slot: %d\n", pfx, + pcie->device_id.slot >> CPER_PCIE_SLOT_SHIFT); + printk("%s""secondary_bus: 0x%02x\n", pfx, + pcie->device_id.secondary_bus); + printk("%s""vendor_id: 0x%04x, device_id: 0x%04x\n", pfx, + pcie->device_id.vendor_id, pcie->device_id.device_id); + p = pcie->device_id.class_code; + printk("%s""class_code: %02x%02x%02x\n", pfx, p[0], p[1], p[2]); + } + if (pcie->validation_bits & CPER_PCIE_VALID_SERIAL_NUMBER) + printk("%s""serial number: 0x%04x, 0x%04x\n", pfx, + pcie->serial_number.lower, pcie->serial_number.upper); + if (pcie->validation_bits & CPER_PCIE_VALID_BRIDGE_CONTROL_STATUS) + printk( + "%s""bridge: secondary_status: 0x%04x, control: 0x%04x\n", + pfx, pcie->bridge.secondary_status, pcie->bridge.control); +} + +static void cper_estatus_print_section( + const char *pfx, const struct acpi_generic_data *gdata, int sec_no) +{ + uuid_le *sec_type = (uuid_le *)gdata->section_type; + __u16 severity; + char newpfx[64]; + + severity = gdata->error_severity; + printk("%s""Error %d, type: %s\n", pfx, sec_no, + cper_severity_str(severity)); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + printk("%s""fru_id: %pUl\n", pfx, (uuid_le *)gdata->fru_id); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + printk("%s""fru_text: %.20s\n", pfx, gdata->fru_text); + + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); + if (!uuid_le_cmp(*sec_type, CPER_SEC_PROC_GENERIC)) { + struct cper_sec_proc_generic *proc_err = (void *)(gdata + 1); + printk("%s""section_type: general processor error\n", newpfx); + if (gdata->error_data_length >= sizeof(*proc_err)) + cper_print_proc_generic(newpfx, proc_err); + else + goto err_section_too_small; + } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem_err = (void *)(gdata + 1); + printk("%s""section_type: memory error\n", newpfx); + if (gdata->error_data_length >= sizeof(*mem_err)) + cper_print_mem(newpfx, mem_err); + else + goto err_section_too_small; + } else if (!uuid_le_cmp(*sec_type, CPER_SEC_PCIE)) { + struct cper_sec_pcie *pcie = (void *)(gdata + 1); + printk("%s""section_type: PCIe error\n", newpfx); + if (gdata->error_data_length >= sizeof(*pcie)) + cper_print_pcie(newpfx, pcie, gdata); + else + goto err_section_too_small; + } else + printk("%s""section type: unknown, %pUl\n", newpfx, sec_type); + + return; + +err_section_too_small: + pr_err(FW_WARN "error section length is too small\n"); +} + +void cper_estatus_print(const char *pfx, + const struct acpi_generic_status *estatus) +{ + struct acpi_generic_data *gdata; + unsigned int data_len, gedata_len; + int sec_no = 0; + char newpfx[64]; + __u16 severity; + + severity = estatus->error_severity; + if (severity == CPER_SEV_CORRECTED) + printk("%s%s\n", pfx, + "It has been corrected by h/w " + "and requires no further action"); + printk("%s""event severity: %s\n", pfx, cper_severity_str(severity)); + data_len = estatus->data_length; + gdata = (struct acpi_generic_data *)(estatus + 1); + snprintf(newpfx, sizeof(newpfx), "%s%s", pfx, INDENT_SP); + while (data_len >= sizeof(*gdata)) { + gedata_len = gdata->error_data_length; + cper_estatus_print_section(newpfx, gdata, sec_no); + data_len -= gedata_len + sizeof(*gdata); + gdata = (void *)(gdata + 1) + gedata_len; + sec_no++; + } +} +EXPORT_SYMBOL_GPL(cper_estatus_print); + +int cper_estatus_check_header(const struct acpi_generic_status *estatus) +{ + if (estatus->data_length && + estatus->data_length < sizeof(struct acpi_generic_data)) + return -EINVAL; + if (estatus->raw_data_length && + estatus->raw_data_offset < sizeof(*estatus) + estatus->data_length) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(cper_estatus_check_header); + +int cper_estatus_check(const struct acpi_generic_status *estatus) +{ + struct acpi_generic_data *gdata; + unsigned int data_len, gedata_len; + int rc; + + rc = cper_estatus_check_header(estatus); + if (rc) + return rc; + data_len = estatus->data_length; + gdata = (struct acpi_generic_data *)(estatus + 1); + while (data_len >= sizeof(*gdata)) { + gedata_len = gdata->error_data_length; + if (gedata_len > data_len - sizeof(*gdata)) + return -EINVAL; + data_len -= gedata_len + sizeof(*gdata); + gdata = (void *)(gdata + 1) + gedata_len; + } + if (data_len) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(cper_estatus_check); -- cgit v1.2.3 From 0841c04d65937ad2808f59c43cb54a92473c8f0e Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Fri, 1 Nov 2013 13:59:52 -0700 Subject: dmi: Avoid unaligned memory access in save_mem_devices() Firmware is not required to maintain alignment of SMBIOS entries, so we should take care accessing fields within these structures. Use "get_unaligned()" to avoid problems. [ Found on ia64 (which grumbles about unaligned access) ] Signed-off-by: Tony Luck Cc: Chen Gong Link: http://lkml.kernel.org/r/27d82dbff5be1025bf18ab88498632d36c2fcf3c.1383331440.git.tony.luck@intel.com Signed-off-by: Ingo Molnar --- drivers/firmware/dmi_scan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/firmware') diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index 59579a744d5..c7e81ff8f3e 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -8,6 +8,7 @@ #include #include #include +#include /* * DMI stands for "Desktop Management Interface". It is part @@ -347,7 +348,7 @@ static void __init save_mem_devices(const struct dmi_header *dm, void *v) pr_warn(FW_BUG "Too many DIMM entries in SMBIOS table\n"); return; } - dmi_memdev[nr].handle = dm->handle; + dmi_memdev[nr].handle = get_unaligned(&dm->handle); dmi_memdev[nr].device = dmi_string(dm, d[0x10]); dmi_memdev[nr].bank = dmi_string(dm, d[0x11]); nr++; -- cgit v1.2.3