forked from rrcarlosr/Jetpack
859 lines
18 KiB
C
859 lines
18 KiB
C
/*
|
|
* drivers/misc/tegra-profiler/main.c
|
|
*
|
|
* Copyright (c) 2013-2020, NVIDIA CORPORATION. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/module.h>
|
|
#include <linux/err.h>
|
|
#include <linux/sched.h>
|
|
|
|
#include <linux/tegra_profiler.h>
|
|
|
|
#include "quadd.h"
|
|
#include "arm_pmu.h"
|
|
#include "hrt.h"
|
|
#include "comm.h"
|
|
#include "mmap.h"
|
|
#include "debug.h"
|
|
#include "tegra.h"
|
|
#include "power_clk.h"
|
|
#include "auth.h"
|
|
#include "version.h"
|
|
#include "quadd_proc.h"
|
|
#include "eh_unwind.h"
|
|
#include "uncore_events.h"
|
|
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
#include "carmel_pmu.h"
|
|
#endif
|
|
|
|
#ifdef CONFIG_ARM64
|
|
#include "armv8_pmu.h"
|
|
#else
|
|
#include "armv7_pmu.h"
|
|
#endif
|
|
|
|
static struct quadd_ctx ctx;
|
|
static DEFINE_PER_CPU(struct source_info, ctx_pmu_info);
|
|
static DEFINE_PER_CPU(struct quadd_comm_cap_for_cpu, per_cpu_caps);
|
|
|
|
static struct source_info *get_pmu_info_for_current_cpu(void)
|
|
{
|
|
return this_cpu_ptr(&ctx_pmu_info);
|
|
}
|
|
|
|
static struct quadd_comm_cap_for_cpu *get_capabilities_for_cpu_int(int cpuid)
|
|
{
|
|
return &per_cpu(per_cpu_caps, cpuid);
|
|
}
|
|
|
|
int tegra_profiler_try_lock(void)
|
|
{
|
|
return atomic_cmpxchg(&ctx.tegra_profiler_lock, 0, 1);
|
|
}
|
|
EXPORT_SYMBOL_GPL(tegra_profiler_try_lock);
|
|
|
|
void tegra_profiler_unlock(void)
|
|
{
|
|
atomic_set(&ctx.tegra_profiler_lock, 0);
|
|
}
|
|
EXPORT_SYMBOL_GPL(tegra_profiler_unlock);
|
|
|
|
static int start(void)
|
|
{
|
|
int err;
|
|
|
|
if (tegra_profiler_try_lock()) {
|
|
pr_err("Error: tegra_profiler lock\n");
|
|
return -EBUSY;
|
|
}
|
|
|
|
if (!atomic_cmpxchg(&ctx.started, 0, 1)) {
|
|
if (quadd_mode_is_sampling(&ctx)) {
|
|
if (ctx.pmu) {
|
|
err = ctx.pmu->enable();
|
|
if (err) {
|
|
pr_err("error: pmu enable\n");
|
|
goto out_err;
|
|
}
|
|
}
|
|
}
|
|
|
|
ctx.comm->reset();
|
|
|
|
err = quadd_hrt_start();
|
|
if (err) {
|
|
pr_err("error: hrt start\n");
|
|
goto out_err;
|
|
}
|
|
|
|
err = quadd_uncore_start();
|
|
if (err) {
|
|
pr_err("error: uncore start\n");
|
|
goto out_err_hrt;
|
|
}
|
|
|
|
err = quadd_power_clk_start();
|
|
if (err < 0) {
|
|
pr_err("error: power_clk start\n");
|
|
goto out_err_uncore;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_err_uncore:
|
|
quadd_uncore_stop();
|
|
out_err_hrt:
|
|
quadd_hrt_stop();
|
|
out_err:
|
|
atomic_set(&ctx.started, 0);
|
|
tegra_profiler_unlock();
|
|
|
|
return err;
|
|
}
|
|
|
|
static void stop(void)
|
|
{
|
|
int cpu;
|
|
|
|
if (atomic_cmpxchg(&ctx.started, 1, 0)) {
|
|
quadd_hrt_stop();
|
|
quadd_uncore_stop();
|
|
quadd_power_clk_stop();
|
|
|
|
ctx.comm->reset();
|
|
quadd_unwind_stop();
|
|
|
|
if (ctx.pmu) {
|
|
ctx.pmu->disable();
|
|
for_each_possible_cpu(cpu)
|
|
per_cpu(ctx_pmu_info, cpu).active = 0;
|
|
}
|
|
|
|
if (ctx.carmel_pmu)
|
|
ctx.carmel_pmu_info.active = 0;
|
|
|
|
tegra_profiler_unlock();
|
|
}
|
|
}
|
|
|
|
static inline int
|
|
is_event_supported(struct source_info *si, const struct quadd_event *event)
|
|
{
|
|
unsigned int type, id;
|
|
int i, nr = si->nr_supp_events;
|
|
struct quadd_event *events = si->supp_events;
|
|
|
|
type = event->type;
|
|
id = event->id;
|
|
|
|
if (type == QUADD_EVENT_TYPE_RAW ||
|
|
type == QUADD_EVENT_TYPE_RAW_CARMEL_UNCORE)
|
|
return (id & ~si->raw_event_mask) == 0;
|
|
|
|
if (type == QUADD_EVENT_TYPE_HARDWARE) {
|
|
for (i = 0; i < nr; i++) {
|
|
if (id == events[i].id)
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline bool
|
|
validate_freq(unsigned int freq)
|
|
{
|
|
return freq >= 100 && freq <= 100000;
|
|
}
|
|
|
|
static int
|
|
set_parameters_for_cpu(struct quadd_pmu_setup_for_cpu *params)
|
|
{
|
|
int i, err, nr_pmu = 0;
|
|
int cpuid = params->cpuid;
|
|
|
|
struct source_info *pmu_info = &per_cpu(ctx_pmu_info, cpuid);
|
|
struct quadd_event pmu_events[QUADD_MAX_COUNTERS];
|
|
|
|
if (!ctx.mode_is_sampling)
|
|
return -EINVAL;
|
|
|
|
if (!pmu_info->is_present)
|
|
return -ENODEV;
|
|
|
|
if (pmu_info->nr_supp_events == 0)
|
|
return -ENODEV;
|
|
|
|
if (params->nr_events > QUADD_MAX_COUNTERS)
|
|
return -EINVAL;
|
|
|
|
for (i = 0; i < params->nr_events; i++) {
|
|
struct quadd_event *event = ¶ms->events[i];
|
|
|
|
if (is_event_supported(pmu_info, event)) {
|
|
pmu_events[nr_pmu++] = *event;
|
|
pr_debug("[%d] PMU active event: %#x (%s)\n",
|
|
cpuid, event->id,
|
|
event->type == QUADD_EVENT_TYPE_RAW ?
|
|
"raw" : "hw");
|
|
} else {
|
|
pr_err("[%d] Bad event: %#x (%s)\n", cpuid, event->id,
|
|
event->type == QUADD_EVENT_TYPE_RAW ?
|
|
"raw" : "hw");
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
err = ctx.pmu->set_events(cpuid, pmu_events, nr_pmu);
|
|
if (err) {
|
|
pr_err("PMU set parameters: error\n");
|
|
per_cpu(ctx_pmu_info, cpuid).active = 0;
|
|
return err;
|
|
}
|
|
per_cpu(ctx_pmu_info, cpuid).active = 1;
|
|
|
|
return err;
|
|
}
|
|
|
|
static int verify_app(struct quadd_parameters *p, uid_t task_uid)
|
|
{
|
|
int err;
|
|
uid_t uid = 0;
|
|
|
|
err = quadd_auth_is_debuggable((char *)p->package_name, &uid);
|
|
if (err < 0) {
|
|
pr_err("error: app either non-debuggable or not found: %s\n",
|
|
p->package_name);
|
|
return err;
|
|
}
|
|
|
|
pr_info("app \"%s\" is debuggable, uid: %u\n",
|
|
p->package_name, (unsigned int)uid);
|
|
|
|
if (task_uid != uid) {
|
|
pr_err("error: uids are not matched: %u, %u\n",
|
|
(unsigned int)task_uid, (unsigned int)uid);
|
|
return -EACCES;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline bool
|
|
is_carmel_events(const struct quadd_event *events, int nr)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
if (events[i].type == QUADD_EVENT_TYPE_RAW_CARMEL_UNCORE)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static int
|
|
set_parameters(struct quadd_parameters *p)
|
|
{
|
|
int err = 0;
|
|
uid_t task_uid, current_uid;
|
|
struct task_struct *task = NULL;
|
|
u64 *low_addr_p;
|
|
u32 extra;
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
int nr;
|
|
#endif
|
|
|
|
extra = p->reserved[QUADD_PARAM_IDX_EXTRA];
|
|
|
|
ctx.mode_is_sampling =
|
|
extra & QUADD_PARAM_EXTRA_SAMPLING ? 1 : 0;
|
|
ctx.mode_is_tracing =
|
|
extra & QUADD_PARAM_EXTRA_TRACING ? 1 : 0;
|
|
ctx.mode_is_sample_all =
|
|
extra & QUADD_PARAM_EXTRA_SAMPLE_ALL_TASKS ? 1 : 0;
|
|
ctx.mode_is_trace_all = p->trace_all_tasks;
|
|
ctx.mode_is_sample_tree =
|
|
extra & QUADD_PARAM_EXTRA_SAMPLE_TREE ? 1 : 0;
|
|
ctx.mode_is_trace_tree =
|
|
extra & QUADD_PARAM_EXTRA_TRACE_TREE ? 1 : 0;
|
|
|
|
ctx.mode_is_sampling_timer =
|
|
extra & QUADD_PARAM_EXTRA_SAMPLING_TIMER ? 1 : 0;
|
|
ctx.mode_is_sampling_sched =
|
|
extra & QUADD_PARAM_EXTRA_SAMPLING_SCHED_OUT ? 1 : 0;
|
|
|
|
if (!ctx.mode_is_sampling_timer && !ctx.mode_is_sampling_sched)
|
|
ctx.mode_is_sampling = 0;
|
|
|
|
if (ctx.mode_is_sample_all)
|
|
ctx.mode_is_sample_tree = 0;
|
|
if (ctx.mode_is_trace_all)
|
|
ctx.mode_is_trace_tree = 0;
|
|
|
|
pr_info("flags: s/t/sa/ta/st/tt: %u/%u/%u/%u/%u/%u, st/ss: %u/%u\n",
|
|
ctx.mode_is_sampling,
|
|
ctx.mode_is_tracing,
|
|
ctx.mode_is_sample_all,
|
|
ctx.mode_is_trace_all,
|
|
ctx.mode_is_sample_tree,
|
|
ctx.mode_is_trace_tree,
|
|
ctx.mode_is_sampling_timer,
|
|
ctx.mode_is_sampling_sched);
|
|
|
|
if ((ctx.mode_is_trace_all || ctx.mode_is_sample_all) &&
|
|
!capable(CAP_SYS_ADMIN)) {
|
|
pr_err("error: \"all tasks\" modes are allowed only for root\n");
|
|
return -EACCES;
|
|
}
|
|
|
|
p->package_name[sizeof(p->package_name) - 1] = '\0';
|
|
ctx.param = *p;
|
|
|
|
current_uid = from_kuid(&init_user_ns, current_fsuid());
|
|
pr_info("owner uid: %u\n", current_uid);
|
|
|
|
if ((ctx.mode_is_tracing && !ctx.mode_is_trace_all) ||
|
|
(ctx.mode_is_sampling && !ctx.mode_is_sample_all)) {
|
|
if (ctx.mode_is_sampling && !validate_freq(p->freq)) {
|
|
pr_err("error: incorrect frequency: %u\n", p->freq);
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Currently only first process */
|
|
if (p->nr_pids != 1 || p->pids[0] == 0)
|
|
return -EINVAL;
|
|
|
|
rcu_read_lock();
|
|
task = get_pid_task(find_vpid(p->pids[0]), PIDTYPE_PID);
|
|
rcu_read_unlock();
|
|
if (!task) {
|
|
pr_err("error: process not found: %u\n", p->pids[0]);
|
|
return -ESRCH;
|
|
}
|
|
|
|
task_uid = from_kuid(&init_user_ns, task_uid(task));
|
|
pr_info("task uid: %u\n", task_uid);
|
|
|
|
if (!capable(CAP_SYS_ADMIN)) {
|
|
if (current_uid != task_uid) {
|
|
err = verify_app(p, task_uid);
|
|
if (err < 0)
|
|
goto out_put_task;
|
|
}
|
|
ctx.collect_kernel_ips = 0;
|
|
} else {
|
|
ctx.collect_kernel_ips = 1;
|
|
}
|
|
|
|
low_addr_p =
|
|
(u64 *)&p->reserved[QUADD_PARAM_IDX_BT_LOWER_BOUND];
|
|
ctx.hrt->low_addr = (unsigned long)*low_addr_p;
|
|
|
|
err = quadd_unwind_start(task);
|
|
if (err)
|
|
goto out_put_task;
|
|
}
|
|
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
nr = p->nr_events;
|
|
|
|
if (nr > QUADD_MAX_COUNTERS) {
|
|
err = -EINVAL;
|
|
goto out_put_task;
|
|
}
|
|
|
|
if (ctx.carmel_pmu && is_carmel_events(p->events, nr)) {
|
|
u32 freq = p->reserved[QUADD_PARAM_IDX_UNCORE_FREQ];
|
|
|
|
if (!capable(CAP_SYS_ADMIN)) {
|
|
pr_err("error: Carmel PMU: allowed only for root\n");
|
|
err = -EACCES;
|
|
goto out_put_task;
|
|
}
|
|
|
|
if (!validate_freq(freq)) {
|
|
pr_err("error: incorrect uncore freq: %u\n", freq);
|
|
err = -EINVAL;
|
|
goto out_put_task;
|
|
}
|
|
|
|
err = ctx.carmel_pmu->set_events(-1, p->events, nr);
|
|
if (err) {
|
|
pr_err("Carmel Uncore PMU set parameters: error\n");
|
|
ctx.carmel_pmu_info.active = 0;
|
|
goto out_put_task;
|
|
}
|
|
ctx.carmel_pmu_info.active = 1;
|
|
}
|
|
#endif
|
|
|
|
pr_info("New parameters have been applied\n");
|
|
|
|
out_put_task:
|
|
if (task)
|
|
put_task_struct(task);
|
|
|
|
return err;
|
|
}
|
|
|
|
static void
|
|
get_capabilities_for_cpu(int cpuid, struct quadd_comm_cap_for_cpu *cap)
|
|
{
|
|
int i, id;
|
|
struct quadd_events_cap *events_cap;
|
|
struct source_info *s = &per_cpu(ctx_pmu_info, cpuid);
|
|
|
|
if (!s->is_present)
|
|
return;
|
|
|
|
cap->cpuid = cpuid;
|
|
cap->l2_cache = 0;
|
|
cap->l2_multiple_events = 0;
|
|
|
|
events_cap = &cap->events_cap;
|
|
|
|
events_cap->raw_event_mask = s->raw_event_mask;
|
|
|
|
events_cap->cpu_cycles = 0;
|
|
events_cap->l1_dcache_read_misses = 0;
|
|
events_cap->l1_dcache_write_misses = 0;
|
|
events_cap->l1_icache_misses = 0;
|
|
|
|
events_cap->instructions = 0;
|
|
events_cap->branch_instructions = 0;
|
|
events_cap->branch_misses = 0;
|
|
events_cap->bus_cycles = 0;
|
|
|
|
events_cap->l2_dcache_read_misses = 0;
|
|
events_cap->l2_dcache_write_misses = 0;
|
|
events_cap->l2_icache_misses = 0;
|
|
|
|
for (i = 0; i < s->nr_supp_events; i++) {
|
|
struct quadd_event *event = &s->supp_events[i];
|
|
|
|
id = event->id;
|
|
|
|
if (id == QUADD_EVENT_HW_L2_DCACHE_READ_MISSES ||
|
|
id == QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES ||
|
|
id == QUADD_EVENT_HW_L2_ICACHE_MISSES) {
|
|
cap->l2_cache = 1;
|
|
cap->l2_multiple_events = 1;
|
|
}
|
|
|
|
switch (id) {
|
|
case QUADD_EVENT_HW_CPU_CYCLES:
|
|
events_cap->cpu_cycles = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_INSTRUCTIONS:
|
|
events_cap->instructions = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_BRANCH_INSTRUCTIONS:
|
|
events_cap->branch_instructions = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_BRANCH_MISSES:
|
|
events_cap->branch_misses = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_BUS_CYCLES:
|
|
events_cap->bus_cycles = 1;
|
|
break;
|
|
|
|
case QUADD_EVENT_HW_L1_DCACHE_READ_MISSES:
|
|
events_cap->l1_dcache_read_misses = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_L1_DCACHE_WRITE_MISSES:
|
|
events_cap->l1_dcache_write_misses = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_L1_ICACHE_MISSES:
|
|
events_cap->l1_icache_misses = 1;
|
|
break;
|
|
|
|
case QUADD_EVENT_HW_L2_DCACHE_READ_MISSES:
|
|
events_cap->l2_dcache_read_misses = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_L2_DCACHE_WRITE_MISSES:
|
|
events_cap->l2_dcache_write_misses = 1;
|
|
break;
|
|
case QUADD_EVENT_HW_L2_ICACHE_MISSES:
|
|
events_cap->l2_icache_misses = 1;
|
|
break;
|
|
|
|
default:
|
|
pr_err_once("%s: error: invalid event\n",
|
|
__func__);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
static u32 get_possible_cpu(void)
|
|
{
|
|
int cpu;
|
|
u32 mask = 0;
|
|
struct source_info *s;
|
|
|
|
if (ctx.pmu) {
|
|
for_each_possible_cpu(cpu) {
|
|
/* since we don't support more than 32 CPUs */
|
|
if (cpu >= BITS_PER_BYTE * sizeof(mask))
|
|
break;
|
|
|
|
s = &per_cpu(ctx_pmu_info, cpu);
|
|
if (s->is_present)
|
|
mask |= (1U << cpu);
|
|
}
|
|
}
|
|
|
|
return mask;
|
|
}
|
|
|
|
static void
|
|
get_capabilities(struct quadd_comm_cap *cap)
|
|
{
|
|
unsigned int extra = 0;
|
|
struct quadd_events_cap *events_cap = &cap->events_cap;
|
|
|
|
cap->pmu = ctx.pmu ? 1 : 0;
|
|
|
|
cap->l2_cache = 0;
|
|
|
|
events_cap->cpu_cycles = 0;
|
|
events_cap->l1_dcache_read_misses = 0;
|
|
events_cap->l1_dcache_write_misses = 0;
|
|
events_cap->l1_icache_misses = 0;
|
|
|
|
events_cap->instructions = 0;
|
|
events_cap->branch_instructions = 0;
|
|
events_cap->branch_misses = 0;
|
|
events_cap->bus_cycles = 0;
|
|
|
|
events_cap->l2_dcache_read_misses = 0;
|
|
events_cap->l2_dcache_write_misses = 0;
|
|
events_cap->l2_icache_misses = 0;
|
|
|
|
cap->tegra_lp_cluster = quadd_is_cpu_with_lp_cluster();
|
|
cap->power_rate = 1;
|
|
cap->blocked_read = 1;
|
|
|
|
extra |= QUADD_COMM_CAP_EXTRA_BT_KERNEL_CTX;
|
|
extra |= QUADD_COMM_CAP_EXTRA_GET_MMAP;
|
|
extra |= QUADD_COMM_CAP_EXTRA_GROUP_SAMPLES;
|
|
extra |= QUADD_COMM_CAP_EXTRA_BT_UNWIND_TABLES;
|
|
extra |= QUADD_COMM_CAP_EXTRA_SUPPORT_AARCH64;
|
|
extra |= QUADD_COMM_CAP_EXTRA_SPECIAL_ARCH_MMAP;
|
|
extra |= QUADD_COMM_CAP_EXTRA_UNWIND_MIXED;
|
|
extra |= QUADD_COMM_CAP_EXTRA_UNW_ENTRY_TYPE;
|
|
extra |= QUADD_COMM_CAP_EXTRA_RB_MMAP_OP;
|
|
extra |= QUADD_COMM_CAP_EXTRA_CPU_MASK;
|
|
|
|
if (ctx.hrt->tc) {
|
|
extra |= QUADD_COMM_CAP_EXTRA_ARCH_TIMER;
|
|
if (ctx.hrt->arch_timer_user_access)
|
|
extra |= QUADD_COMM_CAP_EXTRA_ARCH_TIMER_USR;
|
|
}
|
|
|
|
if (ctx.pclk_cpufreq)
|
|
extra |= QUADD_COMM_CAP_EXTRA_CPUFREQ;
|
|
|
|
cap->reserved[QUADD_COMM_CAP_IDX_EXTRA] = extra;
|
|
cap->reserved[QUADD_COMM_CAP_IDX_CPU_MASK] = get_possible_cpu();
|
|
}
|
|
|
|
void quadd_get_state(struct quadd_module_state *state)
|
|
{
|
|
unsigned int status = 0;
|
|
|
|
quadd_hrt_get_state(state);
|
|
|
|
if (ctx.comm->is_active())
|
|
status |= QUADD_MOD_STATE_STATUS_IS_ACTIVE;
|
|
|
|
if (quadd_auth_is_auth_open())
|
|
status |= QUADD_MOD_STATE_STATUS_IS_AUTH_OPEN;
|
|
|
|
state->reserved[QUADD_MOD_STATE_IDX_STATUS] = status;
|
|
}
|
|
|
|
static int
|
|
set_extab(struct quadd_sections *extabs,
|
|
struct quadd_mmap_area *mmap)
|
|
{
|
|
return quadd_unwind_set_extab(extabs, mmap);
|
|
}
|
|
|
|
static void
|
|
delete_mmap(struct quadd_mmap_area *mmap)
|
|
{
|
|
quadd_unwind_clean_mmap(mmap);
|
|
}
|
|
|
|
static int
|
|
is_cpu_present(int cpuid)
|
|
{
|
|
struct source_info *s = &per_cpu(ctx_pmu_info, cpuid);
|
|
|
|
return s->is_present;
|
|
}
|
|
|
|
static struct quadd_comm_control_interface control = {
|
|
.start = start,
|
|
.stop = stop,
|
|
.set_parameters = set_parameters,
|
|
.set_parameters_for_cpu = set_parameters_for_cpu,
|
|
.get_capabilities = get_capabilities,
|
|
.get_capabilities_for_cpu = get_capabilities_for_cpu,
|
|
.get_state = quadd_get_state,
|
|
.set_extab = set_extab,
|
|
.delete_mmap = delete_mmap,
|
|
.is_cpu_present = is_cpu_present,
|
|
};
|
|
|
|
static inline
|
|
struct quadd_event_source *pmu_init(void)
|
|
{
|
|
#ifdef CONFIG_ARM64
|
|
return quadd_armv8_pmu_init();
|
|
#else
|
|
return quadd_armv7_pmu_init();
|
|
#endif
|
|
}
|
|
|
|
static inline void pmu_deinit(void)
|
|
{
|
|
#ifdef CONFIG_ARM64
|
|
quadd_armv8_pmu_deinit();
|
|
#else
|
|
quadd_armv7_pmu_deinit();
|
|
#endif
|
|
}
|
|
|
|
int quadd_late_init(void)
|
|
{
|
|
int i, nr_events, err;
|
|
unsigned int raw_event_mask;
|
|
struct quadd_event *events;
|
|
struct source_info *pmu_info;
|
|
int cpuid;
|
|
|
|
if (unlikely(!ctx.early_initialized))
|
|
return -ENODEV;
|
|
|
|
if (likely(ctx.initialized))
|
|
return 0;
|
|
|
|
ctx.pmu = pmu_init();
|
|
if (IS_ERR(ctx.pmu)) {
|
|
pr_err("PMU init failed\n");
|
|
err = PTR_ERR(ctx.pmu);
|
|
goto out_err;
|
|
}
|
|
|
|
for_each_possible_cpu(cpuid) {
|
|
const struct quadd_arch_info *arch;
|
|
|
|
arch = ctx.pmu->get_arch(cpuid);
|
|
if (!arch)
|
|
continue;
|
|
|
|
pmu_info = &per_cpu(ctx_pmu_info, cpuid);
|
|
pmu_info->is_present = 1;
|
|
|
|
events = pmu_info->supp_events;
|
|
nr_events =
|
|
ctx.pmu->supported_events(cpuid, events,
|
|
QUADD_MAX_COUNTERS,
|
|
&raw_event_mask);
|
|
|
|
pmu_info->nr_supp_events = nr_events;
|
|
pmu_info->raw_event_mask = raw_event_mask;
|
|
|
|
pr_debug("CPU: %d PMU: amount of events: %d, raw mask: %#x\n",
|
|
cpuid, nr_events, raw_event_mask);
|
|
|
|
for (i = 0; i < nr_events; i++)
|
|
pr_debug("CPU: %d PMU event: %s\n", cpuid,
|
|
quadd_get_hw_event_str(events[i].id));
|
|
}
|
|
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
ctx.carmel_pmu = quadd_carmel_uncore_pmu_init();
|
|
if (IS_ERR(ctx.carmel_pmu)) {
|
|
pr_err("Carmel Uncore PMU init failed\n");
|
|
err = PTR_ERR(ctx.carmel_pmu);
|
|
goto out_err_pmu;
|
|
}
|
|
|
|
pmu_info = &ctx.carmel_pmu_info;
|
|
events = pmu_info->supp_events;
|
|
|
|
nr_events = ctx.carmel_pmu->supported_events(0, events,
|
|
QUADD_MAX_COUNTERS,
|
|
&raw_event_mask);
|
|
|
|
pmu_info->is_present = 1;
|
|
pmu_info->nr_supp_events = nr_events;
|
|
pmu_info->raw_event_mask = raw_event_mask;
|
|
#endif
|
|
|
|
ctx.hrt = quadd_hrt_init(&ctx);
|
|
if (IS_ERR(ctx.hrt)) {
|
|
pr_err("error: HRT init failed\n");
|
|
err = PTR_ERR(ctx.hrt);
|
|
goto out_err_carmel_pmu;
|
|
}
|
|
|
|
err = quadd_uncore_init(&ctx);
|
|
if (err < 0) {
|
|
pr_err("error: uncore events init failed\n");
|
|
goto out_err_hrt;
|
|
}
|
|
|
|
err = quadd_power_clk_init(&ctx);
|
|
if (err < 0) {
|
|
pr_err("error: POWER CLK init failed\n");
|
|
goto out_err_uncore;
|
|
}
|
|
|
|
err = quadd_unwind_init(&ctx);
|
|
if (err < 0) {
|
|
pr_err("error: EH unwinding init failed\n");
|
|
goto out_err_power_clk;
|
|
}
|
|
|
|
get_capabilities(&ctx.cap);
|
|
|
|
for_each_possible_cpu(cpuid)
|
|
get_capabilities_for_cpu(cpuid, &per_cpu(per_cpu_caps, cpuid));
|
|
|
|
ctx.initialized = 1;
|
|
|
|
return 0;
|
|
|
|
out_err_power_clk:
|
|
quadd_power_clk_deinit();
|
|
out_err_uncore:
|
|
quadd_uncore_deinit();
|
|
out_err_hrt:
|
|
quadd_hrt_deinit();
|
|
out_err_carmel_pmu:
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
quadd_carmel_uncore_pmu_deinit();
|
|
out_err_pmu:
|
|
#endif
|
|
pmu_deinit();
|
|
|
|
out_err:
|
|
return err;
|
|
}
|
|
|
|
static int __init quadd_early_init(void)
|
|
{
|
|
int cpuid, err;
|
|
|
|
pr_info("version: %s, samples/io: %d/%d\n",
|
|
QUADD_MODULE_VERSION,
|
|
QUADD_SAMPLES_VERSION,
|
|
QUADD_IO_VERSION);
|
|
|
|
atomic_set(&ctx.started, 0);
|
|
|
|
ctx.early_initialized = 0;
|
|
ctx.initialized = 0;
|
|
|
|
#ifndef MODULE
|
|
atomic_set(&ctx.tegra_profiler_lock, 0);
|
|
#endif
|
|
|
|
ctx.get_capabilities_for_cpu = get_capabilities_for_cpu_int;
|
|
ctx.get_pmu_info = get_pmu_info_for_current_cpu;
|
|
|
|
ctx.pmu = NULL;
|
|
for_each_possible_cpu(cpuid) {
|
|
struct source_info *pmu_info = &per_cpu(ctx_pmu_info, cpuid);
|
|
|
|
pmu_info->active = 0;
|
|
pmu_info->is_present = 0;
|
|
}
|
|
|
|
ctx.carmel_pmu = NULL;
|
|
ctx.carmel_pmu_info.active = 0;
|
|
|
|
ctx.comm = quadd_comm_init(&ctx, &control);
|
|
if (IS_ERR(ctx.comm)) {
|
|
err = PTR_ERR(ctx.comm);
|
|
goto out_err;
|
|
}
|
|
|
|
err = quadd_auth_init(&ctx);
|
|
if (err < 0)
|
|
goto out_err_comm;
|
|
|
|
quadd_proc_init(&ctx);
|
|
ctx.early_initialized = 1;
|
|
|
|
return 0;
|
|
|
|
out_err_comm:
|
|
quadd_comm_exit();
|
|
|
|
out_err:
|
|
return err;
|
|
}
|
|
|
|
static void deinit(void)
|
|
{
|
|
if (ctx.initialized) {
|
|
quadd_unwind_deinit();
|
|
quadd_power_clk_deinit();
|
|
quadd_uncore_deinit();
|
|
quadd_hrt_deinit();
|
|
#ifdef CONFIG_ARCH_TEGRA_19x_SOC
|
|
quadd_carmel_uncore_pmu_deinit();
|
|
#endif
|
|
pmu_deinit();
|
|
|
|
ctx.initialized = 0;
|
|
}
|
|
|
|
if (ctx.early_initialized) {
|
|
quadd_proc_deinit();
|
|
quadd_auth_deinit();
|
|
quadd_comm_exit();
|
|
|
|
ctx.early_initialized = 0;
|
|
}
|
|
}
|
|
|
|
static int __init quadd_module_init(void)
|
|
{
|
|
return quadd_early_init();
|
|
}
|
|
|
|
static void __exit quadd_module_exit(void)
|
|
{
|
|
deinit();
|
|
}
|
|
|
|
module_init(quadd_module_init);
|
|
module_exit(quadd_module_exit);
|
|
|
|
MODULE_LICENSE("GPL");
|
|
|
|
MODULE_AUTHOR("Nvidia Ltd");
|
|
MODULE_DESCRIPTION("Tegra profiler");
|