forked from rrcarlosr/Jetpack
309 lines
8.0 KiB
C
309 lines
8.0 KiB
C
/*
|
|
* drivers/video/tegra/host/gr3d/scale_emc.c
|
|
*
|
|
* Tegra Graphics Host 3D clock scaling
|
|
*
|
|
* Copyright (c) 2010-2015, NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms and conditions of the GNU General Public License,
|
|
* version 2, as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
* more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/*
|
|
* 3d clock scaling mechanism
|
|
*
|
|
* module3d_notify_busy() is called upon submit, module3d_notify_idle() is
|
|
* called when all outstanding submits are completed. Both functions notify
|
|
* the governor about changed state.
|
|
*
|
|
* 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
|
|
* bezier-like factor added to pull 3d.emc rate a bit lower.
|
|
*/
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/clk.h>
|
|
#include <linux/export.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/ftrace.h>
|
|
#include <soc/tegra/chip-id.h>
|
|
|
|
#include "chip_support.h"
|
|
#include "dev.h"
|
|
#include "scale_emc.h"
|
|
#include "nvhost_acm.h"
|
|
#include "nvhost_scale.h"
|
|
|
|
#define POW2(x) ((x) * (x))
|
|
|
|
/*
|
|
* 20.12 fixed point arithmetic
|
|
*/
|
|
|
|
static const int FXFRAC = 12;
|
|
static const int FX_HALF = (1 << 12) / 2;
|
|
|
|
#define INT_TO_FX(x) ((x) << FXFRAC)
|
|
#define FX_TO_INT(x) ((x) >> FXFRAC)
|
|
|
|
static int FXMUL(int x, int y);
|
|
static int FXDIV(int x, int y);
|
|
|
|
|
|
#define MHZ_TO_HZ(x) ((x) * 1000000)
|
|
#define HZ_TO_MHZ(x) ((x) / 1000000)
|
|
|
|
struct nvhost_gr3d_params {
|
|
struct nvhost_emc_params emc_params;
|
|
int clk_3d;
|
|
int clk_3d2;
|
|
int clk_3d_emc;
|
|
};
|
|
|
|
/* Convert clk index to struct clk * */
|
|
static inline struct clk *clk(struct nvhost_device_profile *profile, int index)
|
|
{
|
|
struct nvhost_device_data *pdata =
|
|
platform_get_drvdata(profile->pdev);
|
|
return pdata->clk[index];
|
|
}
|
|
|
|
long nvhost_scale_emc_get_emc_rate(struct nvhost_emc_params *emc_params,
|
|
long freq)
|
|
{
|
|
long hz;
|
|
|
|
freq = INT_TO_FX(HZ_TO_MHZ(freq));
|
|
hz = FXMUL(freq, emc_params->emc_slope) + emc_params->emc_offset;
|
|
|
|
if (!emc_params->linear)
|
|
hz -= FXMUL(emc_params->emc_dip_slope,
|
|
FXMUL(freq - emc_params->emc_xmid,
|
|
freq - emc_params->emc_xmid)) +
|
|
emc_params->emc_dip_offset;
|
|
|
|
hz = MHZ_TO_HZ(FX_TO_INT(hz + FX_HALF)); /* round to nearest */
|
|
hz = (hz < 0) ? 0 : hz;
|
|
|
|
return hz;
|
|
}
|
|
|
|
void nvhost_scale_emc_callback(struct nvhost_device_profile *profile,
|
|
unsigned long freq)
|
|
{
|
|
struct nvhost_gr3d_params *gr3d_params = profile->private_data;
|
|
struct nvhost_emc_params *emc_params = &gr3d_params->emc_params;
|
|
long hz;
|
|
long after;
|
|
|
|
/* Set EMC clockrate */
|
|
after = (long) clk_get_rate(clk(profile, gr3d_params->clk_3d));
|
|
hz = nvhost_scale_emc_get_emc_rate(emc_params, after);
|
|
nvhost_module_set_rate(profile->pdev, gr3d_params, hz,
|
|
gr3d_params->clk_3d_emc, NVHOST_CLOCK);
|
|
}
|
|
|
|
/*
|
|
* nvhost_scale_emc_calibrate_emc()
|
|
*
|
|
* Compute emc scaling parameters
|
|
*
|
|
* Remc = S * R3d + O - (Sd * (R3d - Rm)^2 + Od)
|
|
*
|
|
* Remc - 3d.emc rate
|
|
* R3d - 3d.cbus rate
|
|
* Rm - 3d.cbus 'middle' rate = (max + min)/2
|
|
* S - emc_slope
|
|
* O - emc_offset
|
|
* Sd - emc_dip_slope
|
|
* Od - emc_dip_offset
|
|
*
|
|
* this superposes a quadratic dip centered around the middle 3d
|
|
* frequency over a linear correlation of 3d.emc to 3d clock
|
|
* rates.
|
|
*
|
|
* S, O are chosen so that the maximum 3d rate produces the
|
|
* maximum 3d.emc rate exactly, and the minimum 3d rate produces
|
|
* at least the minimum 3d.emc rate.
|
|
*
|
|
* Sd and Od are chosen to produce the largest dip that will
|
|
* keep 3d.emc frequencies monotonously decreasing with 3d
|
|
* frequencies. To achieve this, the first derivative of Remc
|
|
* with respect to R3d should be zero for the minimal 3d rate:
|
|
*
|
|
* R'emc = S - 2 * Sd * (R3d - Rm)
|
|
* R'emc(R3d-min) = 0
|
|
* S = 2 * Sd * (R3d-min - Rm)
|
|
* = 2 * Sd * (R3d-min - R3d-max) / 2
|
|
*
|
|
* +------------------------------+
|
|
* | Sd = S / (R3d-min - R3d-max) |
|
|
* +------------------------------+
|
|
*
|
|
* dip = Sd * (R3d - Rm)^2 + Od
|
|
*
|
|
* requiring dip(R3d-min) = 0 and dip(R3d-max) = 0 gives
|
|
*
|
|
* Sd * (R3d-min - Rm)^2 + Od = 0
|
|
* Od = -Sd * ((R3d-min - R3d-max) / 2)^2
|
|
* = -Sd * ((R3d-min - R3d-max)^2) / 4
|
|
*
|
|
* +------------------------------+
|
|
* | Od = (emc-max - emc-min) / 4 |
|
|
* +------------------------------+
|
|
*
|
|
*/
|
|
|
|
void nvhost_scale_emc_calibrate_emc(struct nvhost_emc_params *emc_params,
|
|
struct clk *clk_3d, struct clk *clk_3d_emc,
|
|
bool linear_emc)
|
|
{
|
|
long correction;
|
|
unsigned long max_emc;
|
|
unsigned long min_emc;
|
|
unsigned long min_rate_3d;
|
|
unsigned long max_rate_3d;
|
|
|
|
max_emc = clk_round_rate(clk_3d_emc, UINT_MAX);
|
|
max_emc = INT_TO_FX(HZ_TO_MHZ(max_emc));
|
|
|
|
min_emc = clk_round_rate(clk_3d_emc, 0);
|
|
min_emc = INT_TO_FX(HZ_TO_MHZ(min_emc));
|
|
|
|
max_rate_3d = clk_round_rate(clk_3d, UINT_MAX);
|
|
max_rate_3d = INT_TO_FX(HZ_TO_MHZ(max_rate_3d));
|
|
|
|
min_rate_3d = clk_round_rate(clk_3d, 0);
|
|
min_rate_3d = INT_TO_FX(HZ_TO_MHZ(min_rate_3d));
|
|
|
|
emc_params->emc_slope =
|
|
FXDIV((max_emc - min_emc), (max_rate_3d - min_rate_3d));
|
|
emc_params->emc_offset = max_emc -
|
|
FXMUL(emc_params->emc_slope, max_rate_3d);
|
|
/* Guarantee max 3d rate maps to max emc rate */
|
|
emc_params->emc_offset += max_emc -
|
|
(FXMUL(emc_params->emc_slope, max_rate_3d) +
|
|
emc_params->emc_offset);
|
|
|
|
emc_params->linear = linear_emc;
|
|
if (linear_emc)
|
|
return;
|
|
|
|
emc_params->emc_dip_offset = (max_emc - min_emc) / 4;
|
|
emc_params->emc_dip_slope =
|
|
-FXDIV(emc_params->emc_slope, max_rate_3d - min_rate_3d);
|
|
emc_params->emc_xmid = (max_rate_3d + min_rate_3d) / 2;
|
|
correction =
|
|
emc_params->emc_dip_offset +
|
|
FXMUL(emc_params->emc_dip_slope,
|
|
FXMUL(max_rate_3d - emc_params->emc_xmid,
|
|
max_rate_3d - emc_params->emc_xmid));
|
|
emc_params->emc_dip_offset -= correction;
|
|
}
|
|
|
|
/*
|
|
* nvhost_scale_emc_init(dev)
|
|
*
|
|
* Initialise 3d clock scaling for the given device. This function installs
|
|
* pod_scaling governor to handle the clock scaling.
|
|
*/
|
|
|
|
void nvhost_scale_emc_init(struct platform_device *pdev)
|
|
{
|
|
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
|
|
struct nvhost_device_profile *profile;
|
|
struct nvhost_gr3d_params *gr3d_params;
|
|
|
|
nvhost_scale_init(pdev);
|
|
profile = pdata->power_profile;
|
|
if (!profile)
|
|
return;
|
|
|
|
gr3d_params = kzalloc(sizeof(*gr3d_params), GFP_KERNEL);
|
|
if (!gr3d_params)
|
|
goto err_allocate_gr3d_params;
|
|
|
|
gr3d_params->clk_3d = 0;
|
|
gr3d_params->clk_3d_emc = 1;
|
|
|
|
profile->private_data = gr3d_params;
|
|
|
|
nvhost_scale_emc_calibrate_emc(&gr3d_params->emc_params,
|
|
clk(profile, gr3d_params->clk_3d),
|
|
clk(profile, gr3d_params->clk_3d_emc),
|
|
pdata->linear_emc);
|
|
|
|
if (nvhost_module_add_client(pdev, gr3d_params))
|
|
nvhost_err(&pdev->dev, "failed to register as a acm client");
|
|
|
|
return;
|
|
|
|
err_allocate_gr3d_params:
|
|
nvhost_scale_deinit(pdev);
|
|
}
|
|
|
|
/*
|
|
* nvhost_scale_emc_deinit(dev)
|
|
*
|
|
* Stop 3d scaling for the given device.
|
|
*/
|
|
|
|
void nvhost_scale_emc_deinit(struct platform_device *pdev)
|
|
{
|
|
struct nvhost_device_data *pdata = platform_get_drvdata(pdev);
|
|
if (!pdata->power_profile)
|
|
return;
|
|
|
|
/* Remove devfreq from acm client list */
|
|
nvhost_module_remove_client(pdev, pdata->power_profile->private_data);
|
|
|
|
kfree(pdata->power_profile->private_data);
|
|
pdata->power_profile->private_data = NULL;
|
|
|
|
nvhost_scale_deinit(pdev);
|
|
}
|
|
|
|
/*
|
|
* 20.12 fixed point arithmetic
|
|
*
|
|
* int FXMUL(int x, int y)
|
|
* int FXDIV(int x, int y)
|
|
*/
|
|
|
|
static int FXMUL(int x, int y)
|
|
{
|
|
return ((long long) x * (long long) y) >> FXFRAC;
|
|
}
|
|
|
|
static int FXDIV(int x, int y)
|
|
{
|
|
/* long long div operation not supported, must shift manually. This
|
|
* would have been
|
|
*
|
|
* return (((long long) x) << FXFRAC) / (long long) y;
|
|
*/
|
|
int pos, t;
|
|
if (x == 0)
|
|
return 0;
|
|
|
|
/* find largest allowable right shift to numerator, limit to FXFRAC */
|
|
t = x < 0 ? -x : x;
|
|
pos = 31 - fls(t); /* fls can't be 32 if x != 0 */
|
|
if (pos > FXFRAC)
|
|
pos = FXFRAC;
|
|
|
|
y >>= FXFRAC - pos;
|
|
if (y == 0)
|
|
return 0x7FFFFFFF; /* overflow, return MAX_FIXED */
|
|
|
|
return (x << pos) / y;
|
|
}
|