Jetpack/kernel/nvidia/drivers/video/tegra/host/nvhost_job.c

592 lines
15 KiB
C

/*
* drivers/video/tegra/host/nvhost_job.c
*
* Tegra Graphics Host Job
*
* Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms and conditions of the GNU General Public License,
* version 2, as published by the Free Software Foundation.
*
* This program is distributed in the hope it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
* more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/slab.h>
#include <linux/kref.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/sort.h>
#include <linux/scatterlist.h>
#include <trace/events/nvhost.h>
#include "nvhost_channel.h"
#include "nvhost_vm.h"
#include "nvhost_job.h"
#include "nvhost_syncpt.h"
#include "dev.h"
#include "chip_support.h"
/* Magic to use to fill freed handle slots */
#define BAD_MAGIC 0xdeadbeef
static size_t job_size(u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks,
u32 num_syncpts)
{
u64 num_unpins = (u64)num_cmdbufs + (u64)num_relocs;
u64 total;
total = ALIGN(sizeof(struct nvhost_job), 8)
+ (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc), 8)
+ (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc_shift), 8)
+ (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc_type), 8)
+ num_unpins * ALIGN(sizeof(struct nvhost_job_unpin), 8)
+ (u64)num_waitchks * ALIGN(sizeof(struct nvhost_waitchk), 8)
+ (u64)num_cmdbufs * ALIGN(sizeof(struct nvhost_job_gather), 8)
+ num_unpins * ALIGN(sizeof(dma_addr_t), 8)
+ num_unpins * ALIGN(sizeof(struct nvhost_pinid), 8)
+ (u64)num_syncpts * ALIGN(sizeof(struct nvhost_job_syncpt), 8);
if (total > UINT_MAX)
return 0;
return (size_t)total;
}
static void init_fields(struct nvhost_job *job,
u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks,
u32 num_syncpts)
{
int num_unpins = num_cmdbufs + num_relocs;
void *mem = job;
/* First init state to zero */
/*
* Redistribute memory to the structs.
* Overflows and negative conditions have
* already been checked in job_alloc().
*/
mem += ALIGN(sizeof(struct nvhost_job), 8);
job->relocarray = num_relocs ? mem : NULL;
mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc), 8);
job->relocshiftarray = num_relocs ? mem : NULL;
mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc_shift), 8);
job->reloctypearray = num_relocs ? mem : NULL;
mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc_type), 8);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * ALIGN(sizeof(struct nvhost_job_unpin), 8);
job->waitchk = num_waitchks ? mem : NULL;
mem += num_waitchks * ALIGN(sizeof(struct nvhost_waitchk), 8);
job->gathers = num_cmdbufs ? mem : NULL;
mem += num_cmdbufs * ALIGN(sizeof(struct nvhost_job_gather), 8);
job->addr_phys = num_unpins ? mem : NULL;
mem += num_unpins * ALIGN(sizeof(dma_addr_t), 8);
job->pin_ids = num_unpins ? mem : NULL;
mem += num_unpins * ALIGN(sizeof(struct nvhost_pinid), 8);
job->sp = num_syncpts ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
job->gather_addr_phys = &job->addr_phys[num_relocs];
}
struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch,
int num_cmdbufs, int num_relocs, int num_waitchks,
int num_syncpts)
{
struct nvhost_job *job = NULL;
size_t size =
job_size(num_cmdbufs, num_relocs, num_waitchks, num_syncpts);
struct nvhost_device_data *pdata = nvhost_get_devdata(ch->dev);
if (!size) {
nvhost_err(&pdata->pdev->dev, "empty job requested");
return NULL;
}
if (size <= PAGE_SIZE * 2) {
job = kzalloc(size, GFP_KERNEL);
if (!job) {
job = vzalloc(size);
}
} else {
nvhost_warn(&pdata->pdev->dev,
"job is very large (%lu), expect performance loss\n",
size);
job = vzalloc(size);
}
if (!job) {
nvhost_err(&pdata->pdev->dev, "failed to allocate job");
return NULL;
}
kref_init(&job->ref);
job->ch = ch;
job->size = size;
init_fields(job, num_cmdbufs, num_relocs, num_waitchks, num_syncpts);
if (pdata->enable_timestamps) {
job->engine_timestamps.ptr =
dma_zalloc_coherent(&ch->vm->pdev->dev, sizeof(u64) * 2,
&job->engine_timestamps.dma, GFP_KERNEL);
if (!job->engine_timestamps.ptr) {
nvhost_err(&pdata->pdev->dev,
"failed to allocate engine timestamps");
return NULL;
}
}
return job;
}
EXPORT_SYMBOL(nvhost_job_alloc);
void nvhost_job_get(struct nvhost_job *job)
{
kref_get(&job->ref);
}
static void job_free(struct kref *ref)
{
struct nvhost_job *job = container_of(ref, struct nvhost_job, ref);
struct nvhost_channel *ch = job->ch;
struct nvhost_device_data *pdata = platform_get_drvdata(ch->dev);
if (job->engine_timestamps.ptr) {
if (job->engine_timestamps.ptr[0] != 0) {
/* don't log kernel added syncpt added for op_done */
nvhost_eventlib_log_task(job->ch->dev, job->sp->id,
pdata->push_work_done ? (job->sp->fence - 1) :
job->sp->fence,
job->engine_timestamps.ptr[0] >> 5,
job->engine_timestamps.ptr[1] >> 5);
}
dma_free_coherent(&job->ch->vm->pdev->dev, sizeof(u64) * 2,
job->engine_timestamps.ptr,
job->engine_timestamps.dma);
}
if (job->error_notifier_ref)
dma_buf_put(job->error_notifier_ref);
if (!is_vmalloc_addr(job))
kfree(job);
else
vfree(job);
}
void nvhost_job_put(struct nvhost_job *job)
{
kref_put(&job->ref, job_free);
}
EXPORT_SYMBOL(nvhost_job_put);
int nvhost_job_add_client_gather_address(struct nvhost_job *job,
u32 num_words, u32 class_id, dma_addr_t gather_address)
{
nvhost_job_add_gather(job, 0, num_words, 0, class_id, 0);
job->gathers[job->num_gathers - 1].mem_base = gather_address;
return 0;
}
EXPORT_SYMBOL(nvhost_job_add_client_gather_address);
void nvhost_job_add_gather(struct nvhost_job *job,
u32 mem_id, u32 words, u32 offset, u32 class_id, int pre_fence)
{
struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev);
struct nvhost_job_gather *cur_gather =
&job->gathers[job->num_gathers];
cur_gather->words = words;
cur_gather->mem_id = mem_id;
cur_gather->offset = offset;
cur_gather->class_id = class_id ? class_id : pdata->class;
cur_gather->pre_fence = pre_fence;
job->num_gathers += 1;
}
void nvhost_job_set_notifier(struct nvhost_job *job, u32 error)
{
struct nvhost_notification *error_notifier;
struct timespec time_data;
void *va;
u64 nsec;
if (!job->error_notifier_ref)
return;
/* map handle and clear error notifier struct */
va = dma_buf_vmap(job->error_notifier_ref);
if (!va) {
dma_buf_put(job->error_notifier_ref);
dev_err(&job->ch->dev->dev, "Cannot map notifier handle\n");
return;
}
error_notifier = va + job->error_notifier_offset;
getnstimeofday(&time_data);
nsec = ((u64)time_data.tv_sec) * 1000000000u +
(u64)time_data.tv_nsec;
error_notifier->time_stamp.nanoseconds[0] =
(u32)nsec;
error_notifier->time_stamp.nanoseconds[1] =
(u32)(nsec >> 32);
if (error_notifier->info32 != error)
dev_err(&job->ch->dev->dev, "error notifier set to %d\n", error);
error_notifier->info32 = error;
error_notifier->status = 0xffff;
dma_buf_vunmap(job->error_notifier_ref, va);
}
static int id_cmp(const void *_id1, const void *_id2)
{
u32 id1 = ((struct nvhost_pinid *)_id1)->id;
u32 id2 = ((struct nvhost_pinid *)_id2)->id;
if (id1 < id2)
return -1;
if (id1 > id2)
return 1;
return 0;
}
static int pin_array_ids(struct platform_device *dev,
struct nvhost_pinid *ids,
dma_addr_t *phys_addr,
u32 count,
struct nvhost_job_unpin *unpin_data)
{
int i, pin_count = 0;
struct sg_table *sgt;
struct dma_buf *buf;
struct dma_buf_attachment *attach;
u32 prev_id = 0;
dma_addr_t prev_addr = 0;
int err = 0;
for (i = 0; i < count; i++)
ids[i].index = i;
sort(ids, count, sizeof(*ids), id_cmp, NULL);
for (i = 0; i < count; i++) {
if (ids[i].id == prev_id) {
phys_addr[ids[i].index] = prev_addr;
continue;
}
buf = dma_buf_get(ids[i].id);
if (IS_ERR(buf)) {
err = -EINVAL;
nvhost_err(&dev->dev, "could not get buf err=%d", err);
goto clean_up;
}
attach = dma_buf_attach(buf, &dev->dev);
if (IS_ERR(attach)) {
err = PTR_ERR(attach);
nvhost_err(&dev->dev, "could not attach buf err=%d",
err);
goto clean_up_attach;
}
sgt = dma_buf_map_attachment(attach, ids[i].direction);
if (IS_ERR(sgt)) {
err = PTR_ERR(sgt);
nvhost_err(&dev->dev, "could not map attachment err=%d",
err);
goto clean_up_map;
}
if (!device_is_iommuable(&dev->dev) && sgt->nents > 1) {
dev_err(&dev->dev, "Cannot use non-contiguous buffer w/ IOMMU disabled\n");
err = -EINVAL;
goto clean_up_iommu;
}
if (!sg_dma_address(sgt->sgl))
sg_dma_address(sgt->sgl) = sg_phys(sgt->sgl);
phys_addr[ids[i].index] = sg_dma_address(sgt->sgl);
unpin_data[pin_count].buf = buf;
unpin_data[pin_count].attach = attach;
unpin_data[pin_count].direction = ids[i].direction;
unpin_data[pin_count++].sgt = sgt;
prev_id = ids[i].id;
prev_addr = phys_addr[ids[i].index];
}
return pin_count;
clean_up_iommu:
dma_buf_unmap_attachment(attach, sgt, ids[i].direction);
clean_up_map:
dma_buf_detach(buf, attach);
clean_up_attach:
dma_buf_put(buf);
clean_up:
for (i = 0; i < pin_count; i++) {
dma_buf_unmap_attachment(unpin_data[i].attach,
unpin_data[i].sgt, unpin_data[i].direction);
dma_buf_detach(unpin_data[i].buf, unpin_data[i].attach);
dma_buf_put(unpin_data[i].buf);
}
return err;
}
static int pin_job_mem(struct nvhost_job *job)
{
int i;
int count = 0;
int result;
struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev);
for (i = 0; i < job->num_relocs; i++) {
struct nvhost_reloc *reloc = &job->relocarray[i];
struct nvhost_reloc_type *type = &job->reloctypearray[i];
enum dma_data_direction direction = DMA_BIDIRECTIONAL;
if (pdata->get_dma_direction)
direction = pdata->get_dma_direction(type->reloc_type);
job->pin_ids[count].id = reloc->target;
job->pin_ids[count].direction = direction;
count++;
}
/* validate array and pin unique ids, get refs for reloc unpinning */
result = pin_array_ids(job->ch->vm->pdev,
job->pin_ids, job->addr_phys,
job->num_relocs,
job->unpins);
if (result < 0)
return result;
job->num_unpins = result;
for (i = 0; i < job->num_gathers; i++) {
struct nvhost_job_gather *g = &job->gathers[i];
job->pin_ids[count].id = g->mem_id;
job->pin_ids[count].direction = DMA_BIDIRECTIONAL;
count++;
}
/* validate array and pin unique ids, get refs for gather unpinning */
result = pin_array_ids(nvhost_get_host(job->ch->dev)->dev,
&job->pin_ids[job->num_relocs],
&job->addr_phys[job->num_relocs],
job->num_gathers,
&job->unpins[job->num_unpins]);
if (result < 0) {
nvhost_job_unpin(job);
return result;
}
job->num_unpins += result;
return result;
}
static int do_relocs(struct nvhost_job *job,
u32 cmdbuf_mem, struct dma_buf *buf)
{
struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev);
int i = 0;
int last_page = -1;
size_t last_offset;
void *cmdbuf_page_addr = NULL;
dma_addr_t phys_addr;
int err;
/* pin & patch the relocs for one gather */
while (i < job->num_relocs) {
struct nvhost_reloc *reloc = &job->relocarray[i];
struct nvhost_reloc_shift *shift = &job->relocshiftarray[i];
struct nvhost_reloc_type *type = &job->reloctypearray[i];
/* skip all other gathers */
if (cmdbuf_mem != reloc->cmdbuf_mem) {
i++;
continue;
}
if (reloc->cmdbuf_offset & 3 ||
reloc->cmdbuf_offset >= buf->size) {
nvhost_err(&pdata->pdev->dev,
"invalid cmdbuf_offset=0x%x",
reloc->cmdbuf_offset);
return -EINVAL;
}
if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) {
if (cmdbuf_page_addr) {
dma_buf_kunmap(buf, last_page,
cmdbuf_page_addr);
dma_buf_end_cpu_access(buf, last_offset,
PAGE_SIZE, DMA_TO_DEVICE);
}
cmdbuf_page_addr = dma_buf_kmap(buf,
reloc->cmdbuf_offset >> PAGE_SHIFT);
last_page = reloc->cmdbuf_offset >> PAGE_SHIFT;
last_offset = reloc->cmdbuf_offset & PAGE_MASK;
if (unlikely(!cmdbuf_page_addr)) {
pr_err("Couldn't map cmdbuf for relocation\n");
return -ENOMEM;
}
err = dma_buf_begin_cpu_access(buf, last_offset,
PAGE_SIZE, DMA_TO_DEVICE);
if (err) {
nvhost_err(&pdata->pdev->dev,
"begin_cpu_access() failed for patching reloc %d",
err);
return err;
}
}
if (pdata->get_reloc_phys_addr)
phys_addr = pdata->get_reloc_phys_addr(
job->reloc_addr_phys[i],
type->reloc_type);
else
phys_addr = job->reloc_addr_phys[i];
__raw_writel(
(phys_addr +
reloc->target_offset) >> shift->shift,
(void __iomem *)(cmdbuf_page_addr +
(reloc->cmdbuf_offset & ~PAGE_MASK)));
/* remove completed reloc from the job */
if (i != job->num_relocs - 1) {
struct nvhost_reloc *reloc_last =
&job->relocarray[job->num_relocs - 1];
struct nvhost_reloc_shift *shift_last =
&job->relocshiftarray[job->num_relocs - 1];
struct nvhost_reloc_type *type_last =
&job->reloctypearray[job->num_relocs - 1];
reloc->cmdbuf_mem = reloc_last->cmdbuf_mem;
reloc->cmdbuf_offset = reloc_last->cmdbuf_offset;
reloc->target = reloc_last->target;
reloc->target_offset = reloc_last->target_offset;
shift->shift = shift_last->shift;
type->reloc_type = type_last->reloc_type;
job->reloc_addr_phys[i] =
job->reloc_addr_phys[job->num_relocs - 1];
job->num_relocs--;
} else {
break;
}
}
if (cmdbuf_page_addr) {
dma_buf_kunmap(buf, last_page, cmdbuf_page_addr);
dma_buf_end_cpu_access(buf, last_offset,
PAGE_SIZE, DMA_TO_DEVICE);
}
return 0;
}
int nvhost_job_pin(struct nvhost_job *job, struct nvhost_syncpt *sp)
{
int err = 0, i = 0, j = 0;
/* pin memory */
err = pin_job_mem(job);
if (err <= 0)
goto fail;
/* patch gathers */
for (i = 0; i < job->num_gathers; i++) {
struct nvhost_job_gather *g = &job->gathers[i];
/* process each gather mem only once */
if (!g->buf) {
u64 end_offset;
g->buf = dma_buf_get(g->mem_id);
if (IS_ERR(g->buf)) {
err = PTR_ERR(g->buf);
g->buf = NULL;
break;
}
end_offset = (u64)g->offset + (u64)g->words * 4;
if (end_offset > g->buf->size) {
dma_buf_put(g->buf);
g->buf = NULL;
err = -EINVAL;
break;
}
g->mem_base = job->gather_addr_phys[i];
for (j = 0; j < job->num_gathers; j++) {
struct nvhost_job_gather *tmp =
&job->gathers[j];
if (!tmp->buf && tmp->mem_id == g->mem_id) {
tmp->buf = g->buf;
tmp->mem_base = g->mem_base;
}
}
err = do_relocs(job, g->mem_id, g->buf);
dma_buf_put(g->buf);
if (err)
break;
}
}
fail:
return err;
}
void nvhost_job_unpin(struct nvhost_job *job)
{
int i;
for (i = 0; i < job->num_unpins; i++) {
struct nvhost_job_unpin *unpin = &job->unpins[i];
dma_buf_unmap_attachment(unpin->attach, unpin->sgt,
unpin->direction);
dma_buf_detach(unpin->buf, unpin->attach);
dma_buf_put(unpin->buf);
}
job->num_unpins = 0;
}
/**
* Debug routine used to dump job entries
*/
void nvhost_job_dump(struct device *dev, struct nvhost_job *job)
{
dev_info(dev, " SYNCPT_ID %d\n",
job->sp->id);
dev_info(dev, " SYNCPT_VAL %d\n",
job->sp->fence);
dev_info(dev, " FIRST_GET 0x%x\n",
job->first_get);
dev_info(dev, " TIMEOUT %d\n",
job->timeout);
dev_info(dev, " NUM_SLOTS %d\n",
job->num_slots);
dev_info(dev, " NUM_HANDLES %d\n",
job->num_unpins);
}