Jetpack/kernel/nvidia/drivers/video/tegra/host/scale_emc.c
dchvs 31faf4d851 cti_kernel: Add CTI sources
Elroy L4T r32.4.4 – JetPack 4.4.1
2021-03-15 20:15:11 -06:00

309 lines
8.0 KiB
C

/*
* drivers/video/tegra/host/gr3d/scale_emc.c
*
* Tegra Graphics Host 3D clock scaling
*
* Copyright (c) 2010-2015, NVIDIA Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* 3d clock scaling mechanism
*
* module3d_notify_busy() is called upon submit, module3d_notify_idle() is
* called when all outstanding submits are completed. Both functions notify
* the governor about changed state.
*
* 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
* bezier-like factor added to pull 3d.emc rate a bit lower.
*/
#include <linux/types.h>
#include <linux/clk.h>
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/ftrace.h>
#include <soc/tegra/chip-id.h>
#include "chip_support.h"
#include "dev.h"
#include "scale_emc.h"
#include "nvhost_acm.h"
#include "nvhost_scale.h"
#define POW2(x) ((x) * (x))
/*
* 20.12 fixed point arithmetic
*/
static const int FXFRAC = 12;
static const int FX_HALF = (1 << 12) / 2;
#define INT_TO_FX(x) ((x) << FXFRAC)
#define FX_TO_INT(x) ((x) >> FXFRAC)
static int FXMUL(int x, int y);
static int FXDIV(int x, int y);
#define MHZ_TO_HZ(x) ((x) * 1000000)
#define HZ_TO_MHZ(x) ((x) / 1000000)
struct nvhost_gr3d_params {
struct nvhost_emc_params emc_params;
int clk_3d;
int clk_3d2;
int clk_3d_emc;
};
/* Convert clk index to struct clk * */
static inline struct clk *clk(struct nvhost_device_profile *profile, int index)
{
struct nvhost_device_data *pdata =
platform_get_drvdata(profile->pdev);
return pdata->clk[index];
}
long nvhost_scale_emc_get_emc_rate(struct nvhost_emc_params *emc_params,
long freq)
{
long hz;
freq = INT_TO_FX(HZ_TO_MHZ(freq));
hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
if (!emc_params->linear)
hz -= FXMUL(emc_params->emc_dip_slope,
FXMUL(freq - emc_params->emc_xmid,
freq - emc_params->emc_xmid)) +
emc_params->emc_dip_offset;
hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
hz = (hz < 0) ? 0 : hz;
return hz;
}
void nvhost_scale_emc_callback(struct nvhost_device_profile *profile,
unsigned long freq)
{
struct nvhost_gr3d_params *gr3d_params = profile->private_data;
struct nvhost_emc_params *emc_params = &gr3d_params->emc_params;
long hz;
long after;
/* Set EMC clockrate */
after = (long) clk_get_rate(clk(profile, gr3d_params->clk_3d));
hz = nvhost_scale_emc_get_emc_rate(emc_params, after);
nvhost_module_set_rate(profile->pdev, gr3d_params, hz,
gr3d_params->clk_3d_emc, NVHOST_CLOCK);
}
/*
* nvhost_scale_emc_calibrate_emc()
*
* Compute emc scaling parameters
*
* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
*
* Remc - 3d.emc rate
* R3d - 3d.cbus rate
* Rm - 3d.cbus 'middle' rate = (max + min)/2
* S - emc_slope
* O - emc_offset
* Sd - emc_dip_slope
* Od - emc_dip_offset
*
* this superposes a quadratic dip centered around the middle 3d
* frequency over a linear correlation of 3d.emc to 3d clock
* rates.
*
* S, O are chosen so that the maximum 3d rate produces the
* maximum 3d.emc rate exactly, and the minimum 3d rate produces
* at least the minimum 3d.emc rate.
*
* Sd and Od are chosen to produce the largest dip that will
* keep 3d.emc frequencies monotonously decreasing with 3d
* frequencies. To achieve this, the first derivative of Remc
* with respect to R3d should be zero for the minimal 3d rate:
*
* R'emc = S - 2 * Sd * (R3d - Rm)
* R'emc(R3d-min) = 0
* S = 2 * Sd * (R3d-min - Rm)
* = 2 * Sd * (R3d-min - R3d-max) / 2
*
* +------------------------------+
* | Sd = S / (R3d-min - R3d-max) |
* +------------------------------+
*
* dip = Sd * (R3d - Rm)^2 + Od
*
* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
*
* Sd * (R3d-min - Rm)^2 + Od = 0
* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
* = -Sd * ((R3d-min - R3d-max)^2) / 4
*
* +------------------------------+
* | Od = (emc-max - emc-min) / 4 |
* +------------------------------+
*
*/
void nvhost_scale_emc_calibrate_emc(struct nvhost_emc_params *emc_params,
struct clk *clk_3d, struct clk *clk_3d_emc,
bool linear_emc)
{
long correction;
unsigned long max_emc;
unsigned long min_emc;
unsigned long min_rate_3d;
unsigned long max_rate_3d;
max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
min_emc = clk_round_rate(clk_3d_emc, 0);
min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
min_rate_3d = clk_round_rate(clk_3d, 0);
min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
emc_params->emc_slope =
FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
emc_params->emc_offset = max_emc -
FXMUL(emc_params->emc_slope, max_rate_3d);
/* Guarantee max 3d rate maps to max emc rate */
emc_params->emc_offset += max_emc -
(FXMUL(emc_params->emc_slope, max_rate_3d) +
emc_params->emc_offset);
emc_params->linear = linear_emc;
if (linear_emc)
return;
emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
emc_params->emc_dip_slope =
-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
correction =
emc_params->emc_dip_offset +
FXMUL(emc_params->emc_dip_slope,
FXMUL(max_rate_3d - emc_params->emc_xmid,
max_rate_3d - emc_params->emc_xmid));
emc_params->emc_dip_offset -= correction;
}
/*
* nvhost_scale_emc_init(dev)
*
* Initialise 3d clock scaling for the given device. This function installs
* pod_scaling governor to handle the clock scaling.
*/
void nvhost_scale_emc_init(struct platform_device *pdev)
{
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
struct nvhost_device_profile *profile;
struct nvhost_gr3d_params *gr3d_params;
nvhost_scale_init(pdev);
profile = pdata->power_profile;
if (!profile)
return;
gr3d_params = kzalloc(sizeof(*gr3d_params), GFP_KERNEL);
if (!gr3d_params)
goto err_allocate_gr3d_params;
gr3d_params->clk_3d = 0;
gr3d_params->clk_3d_emc = 1;
profile->private_data = gr3d_params;
nvhost_scale_emc_calibrate_emc(&gr3d_params->emc_params,
clk(profile, gr3d_params->clk_3d),
clk(profile, gr3d_params->clk_3d_emc),
pdata->linear_emc);
if (nvhost_module_add_client(pdev, gr3d_params))
nvhost_err(&pdev->dev, "failed to register as a acm client");
return;
err_allocate_gr3d_params:
nvhost_scale_deinit(pdev);
}
/*
* nvhost_scale_emc_deinit(dev)
*
* Stop 3d scaling for the given device.
*/
void nvhost_scale_emc_deinit(struct platform_device *pdev)
{
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
if (!pdata->power_profile)
return;
/* Remove devfreq from acm client list */
nvhost_module_remove_client(pdev, pdata->power_profile->private_data);
kfree(pdata->power_profile->private_data);
pdata->power_profile->private_data = NULL;
nvhost_scale_deinit(pdev);
}
/*
* 20.12 fixed point arithmetic
*
* int FXMUL(int x, int y)
* int FXDIV(int x, int y)
*/
static int FXMUL(int x, int y)
{
return ((long long) x * (long long) y) >> FXFRAC;
}
static int FXDIV(int x, int y)
{
/* long long div operation not supported, must shift manually. This
* would have been
*
* return (((long long) x) << FXFRAC) / (long long) y;
*/
int pos, t;
if (x == 0)
return 0;
/* find largest allowable right shift to numerator, limit to FXFRAC */
t = x < 0 ? -x : x;
pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
if (pos > FXFRAC)
pos = FXFRAC;
y >>= FXFRAC - pos;
if (y == 0)
return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
return (x << pos) / y;
}