Jetpack/kernel/nvidia/drivers/platform/tegra/denver_mca.c

415 lines
11 KiB
C

/*
* Copyright (c) 2016-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*/
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/smp_plat.h>
#include <asm/traps.h>
#include <linux/debugfs.h>
#include <linux/cpu.h>
#include <linux/cpu_pm.h>
#include <linux/module.h>
#include <linux/platform/tegra/denver_mca.h>
#include <linux/tegra-mce.h>
#include <linux/platform/tegra/ari_mca.h>
#include <linux/platform/tegra/tegra18_cpu_map.h>
#include <soc/tegra/chip-id.h>
#include <linux/version.h>
static struct cpumask denver_cpumask;
static void do_mca_trip(void *data)
{
u64 *val = (u64 *)data;
unsigned long flags, mca_enable;
flags = arch_local_save_flags();
/* Print some debug information */
pr_crit("%s: DAIF = 0x%lx\n", __func__, flags);
if (flags & 0x4) {
pr_crit("%s: \"A\" not set", __func__);
return;
}
asm volatile("mrs %0, s3_0_c15_c3_2" : "=r" (mca_enable) : : );
pr_crit("%s:SERR_CTL = s3_0_c15_c3_2 = 0x%lx\n", __func__, mca_enable);
if (!(mca_enable & 1)) {
pr_crit("%s: s3_0_c15_c3_2,not set", __func__);
return;
}
/* SERR1 Bank - JSR:MTS */
asm volatile("mrs %0, s3_0_c15_c4_6" : "=r" (mca_enable) : : );
pr_crit("%s:SERR1_CTRL: s3_0_c15_c4_6 = 0x%lx\n", __func__, mca_enable);
/* Do the actual MCA trip */
pr_crit("Write to SERR1_STATUS: msr s3_0_c15_c4_7, 0x%llx\n", *val);
asm volatile("msr s3_0_c15_c4_7, %0" : : "r" (*val));
return;
}
static int mca_trip(void *data, u64 val)
{
smp_call_function_any(&denver_cpumask, do_mca_trip, &val, 1);
return 0;
}
/* This will return the special MCA value to be writen back to the node to trip
an MCA error for debug purposes */
static int mca_trip_null_get(void *data, u64 *val)
{
*val = 0xb400000000000404UL;
return 0;
}
static int mca_trip_open(struct inode *inode, struct file *file)
{
return simple_attr_open(inode, file, mca_trip_null_get, mca_trip,
"0x%08lx");
}
static const struct file_operations fops_mca_trip = {
.read = simple_attr_read,
.write = simple_attr_write,
.open = mca_trip_open,
.llseek = noop_llseek,
};
/* MCA bank handling functions */
static int read_denver_bank_status(struct denver_mca_bank *bank, u8 core_num,
u64 *data)
{
u32 error;
int e;
mca_cmd_t mca_cmd = {.cmd = MCA_ARI_CMD_RD_SERR,
.idx = bank->bank + MCA_ARI_SERR_IDX_OFF,
.subidx = MCA_ARI_RW_SUBIDX_STAT,
.inst = tegra18_logical_to_physical_cpu(core_num)};
e = tegra_mce_read_uncore_mca(mca_cmd, data, &error);
if (e != 0) {
pr_err("%s: ARI call failed\n", __func__);
return -EINVAL;
}
return 0;
}
static int read_denver_bank_addr(struct denver_mca_bank *bank, u8 core_num,
u64 *data)
{
u32 error;
int e;
mca_cmd_t mca_cmd = {.cmd = MCA_ARI_CMD_RD_SERR,
.idx = bank->bank + MCA_ARI_SERR_IDX_OFF,
.subidx = MCA_ARI_RW_SUBIDX_ADDR,
.inst = tegra18_logical_to_physical_cpu(core_num)};
e = tegra_mce_read_uncore_mca(mca_cmd, data, &error);
if (e != 0) {
pr_err("%s: ARI call failed\n", __func__);
return -EINVAL;
}
return 0;
}
static void print_bank(struct denver_mca_bank *mca_bank, u64 status,
u8 core_num)
{
struct denver_mca_error *errors;
u64 msc1, msc2, addr;
u16 error;
u64 i;
int found = 0;
pr_crit("**************************************");
pr_crit("Machine check error in %s:\n", mca_bank->name);
pr_crit("\tStatus = 0x%llx\n", status);
/* Find the name of known errors */
error = get_mca_status_error_code(status);
errors = mca_bank->errors;
if (errors) {
for (i = 0; errors[i].name; i++) {
if (errors[i].error_code == error) {
pr_crit("\t%s: 0x%x\n", errors[i].name, error);
found = 1;
break;
}
}
if (!found)
pr_crit("\tUnknown error: 0x%x\n", error);
} else {
pr_crit("\tBank does not have any known errors\n");
}
if (status & SERRi_STATUS_OVF)
pr_crit("\tOverflow (there may be more errors)\n");
if (status & SERRi_STATUS_UC)
pr_crit("\tUncorrected (this is fatal)\n");
else
pr_crit("\tCorrectable (but, not corrected)\n");
if (status & SERRi_STATUS_EN)
pr_crit("\tError reporting enabled when error arrived\n");
else
pr_crit("\tError reporting not enabled when error arrived\n");
if (status & SERRi_STATUS_MV) {
msc1 = mca_bank->msc1();
msc2 = mca_bank->msc2();
pr_crit("\tMSC1 = 0x%llx\n", msc1);
pr_crit("\tMSC2 = 0x%llx\n", msc2);
}
if (status & SERRi_STATUS_AV) {
if (mca_bank->bank == 1)
read_denver_bank_addr(mca_bank, core_num, &addr);
else
addr = mca_bank->addr();
pr_crit("\tADDR = 0x%llx\n", addr);
}
pr_crit("**************************************");
}
static LIST_HEAD(denver_mca_list);
static DEFINE_RAW_SPINLOCK(denver_mca_lock);
void register_denver_mca_bank(struct denver_mca_bank *bank)
{
unsigned long flags;
raw_spin_lock_irqsave(&denver_mca_lock, flags);
list_add(&bank->node, &denver_mca_list);
raw_spin_unlock_irqrestore(&denver_mca_lock, flags);
}
EXPORT_SYMBOL(register_denver_mca_bank);
void unregister_denver_mca_bank(struct denver_mca_bank *bank)
{
unsigned long flags;
raw_spin_lock_irqsave(&denver_mca_lock, flags);
list_del(&bank->node);
raw_spin_unlock_irqrestore(&denver_mca_lock, flags);
}
EXPORT_SYMBOL(unregister_denver_mca_bank);
static int denver_mca_handler(void)
{
u64 status;
u64 bank_count;
struct denver_mca_bank *bank;
unsigned long flags;
int clear_serr = 0;
/* Ask the hardware how many banks exist */
asm volatile("mrs %0, s3_0_c15_c3_0" : "=r" (bank_count) : );
bank_count &= 0xff;
/* Iterate through the banks looking for one with an error */
raw_spin_lock_irqsave(&denver_mca_lock, flags);
list_for_each_entry(bank, &denver_mca_list, node) {
if ((bank->bank <= bank_count) && (bank->bank != 1)) {
status = bank->stat();
if (status & SERRi_STATUS_VAL) {
print_bank(bank, status, -1);
clear_serr = 1;
}
}
}
if (clear_serr)
ari_clear_serr();
raw_spin_unlock_irqrestore(&denver_mca_lock, flags);
return 1;
}
/* MCA assert register dump */
static int denver_assert_mca_handler(void)
{
u64 status;
struct denver_mca_bank *bank;
unsigned long flags;
int cpu;
int clear_serr = 0;
/* Find the other Denver cores */
for_each_online_cpu(cpu) {
if (tegra18_is_cpu_denver(cpu)) {
raw_spin_lock_irqsave(&denver_mca_lock, flags);
list_for_each_entry(bank, &denver_mca_list, node) {
if (bank->bank == 1) {
if (read_denver_bank_status(
bank, cpu, &status) != 0)
continue;
if (status & SERRi_STATUS_VAL) {
print_bank(bank, status, cpu);
clear_serr = 1;
}
}
}
if (clear_serr)
ari_clear_serr();
raw_spin_unlock_irqrestore(&denver_mca_lock, flags);
}
}
return 1;
}
/* Handle SError for Denver cores */
static int denver_serr_hook(struct pt_regs *regs, int reason,
unsigned int esr, void *priv)
{
u64 serr_status;
int ret = 1;
/* Check that this is a Denver CPU */
if (read_cpuid_implementor() != ARM_CPU_IMP_NVIDIA)
return 1;
asm volatile("mrs %0, s3_0_c15_c3_1" : "=r" (serr_status));
if (serr_status & 4) {
ret = denver_mca_handler();
serr_status = 0;
asm volatile("msr s3_0_c15_c3_1, %0" : : "r" (serr_status));
}
return ret;
}
/*
* Handle SError for Denver cores caused by JSR:MTS MCA (SERR1 Bank)
*
* If one Denver asserts then the other Denver core deadlocks.
* Therefore in case of an asserting Denver core, we have to
* assume that the Denvers are gone and hence we need to read
* the Denver MCA banks from A57 using ARI
*/
static int denver_assert_serr_hook(struct pt_regs *regs, int reason,
unsigned int esr, void *priv)
{
int ret = 1;
/* Run the denver_assert_mca_handler() only on A57 */
if (read_cpuid_implementor() == ARM_CPU_IMP_NVIDIA)
return ret;
ret = denver_assert_mca_handler();
return ret;
}
static struct serr_hook hook = {
.fn = denver_serr_hook
};
/* hook for handling JSR:MTS MCA */
static struct serr_hook assert_hook = {
.fn = denver_assert_serr_hook
};
/* Hotplug callback to enable Denver MCA every time the core comes online */
static void denver_setup_mca(void *info)
{
unsigned long serr_ctl_enable = 1;
asm volatile("msr s3_0_c15_c3_2, %0" : : "r" (serr_ctl_enable));
}
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
static int denver_mca_online(unsigned int cpu)
{
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, cpu);
if (MIDR_IMPLEMENTOR(cpuinfo->reg_midr) == ARM_CPU_IMP_NVIDIA)
{
smp_call_function_single(cpu, denver_setup_mca, NULL, 1);
}
return 0;
}
#else
static int denver_mca_setup_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
int cpu = (int)(uintptr_t) hcpu;
struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, cpu);
if ((action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) &&
(MIDR_IMPLEMENTOR(cpuinfo->reg_midr) == ARM_CPU_IMP_NVIDIA))
{
smp_call_function_single(cpu, denver_setup_mca, NULL, 1);
}
return 0;
}
static struct notifier_block denver_mca_notifier = {
.notifier_call = denver_mca_setup_callback
};
#endif
static struct dentry *debugfs_dir;
static struct dentry *debugfs_node;
static int __init denver_serr_init(void)
{
int cpu;
if (tegra_get_chip_id() != TEGRA186)
return 0;
/* Register the SError hook so that this driver is called on SError */
register_serr_hook(&hook);
register_serr_hook(&assert_hook);
/* Ensure that any CPU brough online sets up MCA */
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "denver:online",
denver_mca_online, NULL);
#else
register_hotcpu_notifier(&denver_mca_notifier);
#endif
/* Enable MCA on all online CPUs */
for_each_online_cpu(cpu) {
/* Skip non-Denver CPUs */
if (!tegra18_is_cpu_denver(cpu))
continue;
cpumask_set_cpu(cpu, &denver_cpumask);
smp_call_function_single(cpu, denver_setup_mca, NULL, 1);
}
/* Install debugfs nodes to test the MCA behavior */
debugfs_dir = debugfs_create_dir("denver_mca", NULL);
if (!debugfs_dir) {
pr_err("Error creating tegra_mca debugfs dir.\n");
return -ENODEV;
}
debugfs_node = debugfs_create_file("mca_trip", 0600, debugfs_dir, NULL,
&fops_mca_trip);
if (!debugfs_node) {
pr_err("Error creating mca_trip debugfs node.\n");
return -ENODEV;
}
return 0;
}
module_init(denver_serr_init);
static void __exit denver_serr_exit(void)
{
debugfs_remove_recursive(debugfs_dir);
unregister_serr_hook(&hook);
unregister_serr_hook(&assert_hook);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
cpuhp_remove_state_nocalls(CPUHP_AP_ONLINE_DYN);
#else
unregister_hotcpu_notifier(&denver_mca_notifier);
#endif
}
module_exit(denver_serr_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Denver Machine Check / SError handler");