/* * drivers/video/tegra/host/nvhost_job.c * * Tegra Graphics Host Job * * Copyright (c) 2010-2018, NVIDIA CORPORATION. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, * version 2, as published by the Free Software Foundation. * * This program is distributed in the hope it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include #include "nvhost_channel.h" #include "nvhost_vm.h" #include "nvhost_job.h" #include "nvhost_syncpt.h" #include "dev.h" #include "chip_support.h" /* Magic to use to fill freed handle slots */ #define BAD_MAGIC 0xdeadbeef static size_t job_size(u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks, u32 num_syncpts) { u64 num_unpins = (u64)num_cmdbufs + (u64)num_relocs; u64 total; total = ALIGN(sizeof(struct nvhost_job), 8) + (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc), 8) + (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc_shift), 8) + (u64)num_relocs * ALIGN(sizeof(struct nvhost_reloc_type), 8) + num_unpins * ALIGN(sizeof(struct nvhost_job_unpin), 8) + (u64)num_waitchks * ALIGN(sizeof(struct nvhost_waitchk), 8) + (u64)num_cmdbufs * ALIGN(sizeof(struct nvhost_job_gather), 8) + num_unpins * ALIGN(sizeof(dma_addr_t), 8) + num_unpins * ALIGN(sizeof(struct nvhost_pinid), 8) + (u64)num_syncpts * ALIGN(sizeof(struct nvhost_job_syncpt), 8); if (total > UINT_MAX) return 0; return (size_t)total; } static void init_fields(struct nvhost_job *job, u32 num_cmdbufs, u32 num_relocs, u32 num_waitchks, u32 num_syncpts) { int num_unpins = num_cmdbufs + num_relocs; void *mem = job; /* First init state to zero */ /* * Redistribute memory to the structs. * Overflows and negative conditions have * already been checked in job_alloc(). */ mem += ALIGN(sizeof(struct nvhost_job), 8); job->relocarray = num_relocs ? mem : NULL; mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc), 8); job->relocshiftarray = num_relocs ? mem : NULL; mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc_shift), 8); job->reloctypearray = num_relocs ? mem : NULL; mem += num_relocs * ALIGN(sizeof(struct nvhost_reloc_type), 8); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * ALIGN(sizeof(struct nvhost_job_unpin), 8); job->waitchk = num_waitchks ? mem : NULL; mem += num_waitchks * ALIGN(sizeof(struct nvhost_waitchk), 8); job->gathers = num_cmdbufs ? mem : NULL; mem += num_cmdbufs * ALIGN(sizeof(struct nvhost_job_gather), 8); job->addr_phys = num_unpins ? mem : NULL; mem += num_unpins * ALIGN(sizeof(dma_addr_t), 8); job->pin_ids = num_unpins ? mem : NULL; mem += num_unpins * ALIGN(sizeof(struct nvhost_pinid), 8); job->sp = num_syncpts ? mem : NULL; job->reloc_addr_phys = job->addr_phys; job->gather_addr_phys = &job->addr_phys[num_relocs]; } struct nvhost_job *nvhost_job_alloc(struct nvhost_channel *ch, int num_cmdbufs, int num_relocs, int num_waitchks, int num_syncpts) { struct nvhost_job *job = NULL; size_t size = job_size(num_cmdbufs, num_relocs, num_waitchks, num_syncpts); struct nvhost_device_data *pdata = nvhost_get_devdata(ch->dev); if (!size) { nvhost_err(&pdata->pdev->dev, "empty job requested"); return NULL; } if (size <= PAGE_SIZE * 2) { job = kzalloc(size, GFP_KERNEL); if (!job) { job = vzalloc(size); } } else { nvhost_warn(&pdata->pdev->dev, "job is very large (%lu), expect performance loss\n", size); job = vzalloc(size); } if (!job) { nvhost_err(&pdata->pdev->dev, "failed to allocate job"); return NULL; } kref_init(&job->ref); job->ch = ch; job->size = size; init_fields(job, num_cmdbufs, num_relocs, num_waitchks, num_syncpts); if (pdata->enable_timestamps) { job->engine_timestamps.ptr = dma_zalloc_coherent(&ch->vm->pdev->dev, sizeof(u64) * 2, &job->engine_timestamps.dma, GFP_KERNEL); if (!job->engine_timestamps.ptr) { nvhost_err(&pdata->pdev->dev, "failed to allocate engine timestamps"); return NULL; } } return job; } EXPORT_SYMBOL(nvhost_job_alloc); void nvhost_job_get(struct nvhost_job *job) { kref_get(&job->ref); } static void job_free(struct kref *ref) { struct nvhost_job *job = container_of(ref, struct nvhost_job, ref); struct nvhost_channel *ch = job->ch; struct nvhost_device_data *pdata = platform_get_drvdata(ch->dev); if (job->engine_timestamps.ptr) { if (job->engine_timestamps.ptr[0] != 0) { /* don't log kernel added syncpt added for op_done */ nvhost_eventlib_log_task(job->ch->dev, job->sp->id, pdata->push_work_done ? (job->sp->fence - 1) : job->sp->fence, job->engine_timestamps.ptr[0] >> 5, job->engine_timestamps.ptr[1] >> 5); } dma_free_coherent(&job->ch->vm->pdev->dev, sizeof(u64) * 2, job->engine_timestamps.ptr, job->engine_timestamps.dma); } if (job->error_notifier_ref) dma_buf_put(job->error_notifier_ref); if (!is_vmalloc_addr(job)) kfree(job); else vfree(job); } void nvhost_job_put(struct nvhost_job *job) { kref_put(&job->ref, job_free); } EXPORT_SYMBOL(nvhost_job_put); int nvhost_job_add_client_gather_address(struct nvhost_job *job, u32 num_words, u32 class_id, dma_addr_t gather_address) { nvhost_job_add_gather(job, 0, num_words, 0, class_id, 0); job->gathers[job->num_gathers - 1].mem_base = gather_address; return 0; } EXPORT_SYMBOL(nvhost_job_add_client_gather_address); void nvhost_job_add_gather(struct nvhost_job *job, u32 mem_id, u32 words, u32 offset, u32 class_id, int pre_fence) { struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev); struct nvhost_job_gather *cur_gather = &job->gathers[job->num_gathers]; cur_gather->words = words; cur_gather->mem_id = mem_id; cur_gather->offset = offset; cur_gather->class_id = class_id ? class_id : pdata->class; cur_gather->pre_fence = pre_fence; job->num_gathers += 1; } void nvhost_job_set_notifier(struct nvhost_job *job, u32 error) { struct nvhost_notification *error_notifier; struct timespec time_data; void *va; u64 nsec; if (!job->error_notifier_ref) return; /* map handle and clear error notifier struct */ va = dma_buf_vmap(job->error_notifier_ref); if (!va) { dma_buf_put(job->error_notifier_ref); dev_err(&job->ch->dev->dev, "Cannot map notifier handle\n"); return; } error_notifier = va + job->error_notifier_offset; getnstimeofday(&time_data); nsec = ((u64)time_data.tv_sec) * 1000000000u + (u64)time_data.tv_nsec; error_notifier->time_stamp.nanoseconds[0] = (u32)nsec; error_notifier->time_stamp.nanoseconds[1] = (u32)(nsec >> 32); if (error_notifier->info32 != error) dev_err(&job->ch->dev->dev, "error notifier set to %d\n", error); error_notifier->info32 = error; error_notifier->status = 0xffff; dma_buf_vunmap(job->error_notifier_ref, va); } static int id_cmp(const void *_id1, const void *_id2) { u32 id1 = ((struct nvhost_pinid *)_id1)->id; u32 id2 = ((struct nvhost_pinid *)_id2)->id; if (id1 < id2) return -1; if (id1 > id2) return 1; return 0; } static int pin_array_ids(struct platform_device *dev, struct nvhost_pinid *ids, dma_addr_t *phys_addr, u32 count, struct nvhost_job_unpin *unpin_data) { int i, pin_count = 0; struct sg_table *sgt; struct dma_buf *buf; struct dma_buf_attachment *attach; u32 prev_id = 0; dma_addr_t prev_addr = 0; int err = 0; for (i = 0; i < count; i++) ids[i].index = i; sort(ids, count, sizeof(*ids), id_cmp, NULL); for (i = 0; i < count; i++) { if (ids[i].id == prev_id) { phys_addr[ids[i].index] = prev_addr; continue; } buf = dma_buf_get(ids[i].id); if (IS_ERR(buf)) { err = -EINVAL; nvhost_err(&dev->dev, "could not get buf err=%d", err); goto clean_up; } attach = dma_buf_attach(buf, &dev->dev); if (IS_ERR(attach)) { err = PTR_ERR(attach); nvhost_err(&dev->dev, "could not attach buf err=%d", err); goto clean_up_attach; } sgt = dma_buf_map_attachment(attach, ids[i].direction); if (IS_ERR(sgt)) { err = PTR_ERR(sgt); nvhost_err(&dev->dev, "could not map attachment err=%d", err); goto clean_up_map; } if (!device_is_iommuable(&dev->dev) && sgt->nents > 1) { dev_err(&dev->dev, "Cannot use non-contiguous buffer w/ IOMMU disabled\n"); err = -EINVAL; goto clean_up_iommu; } if (!sg_dma_address(sgt->sgl)) sg_dma_address(sgt->sgl) = sg_phys(sgt->sgl); phys_addr[ids[i].index] = sg_dma_address(sgt->sgl); unpin_data[pin_count].buf = buf; unpin_data[pin_count].attach = attach; unpin_data[pin_count].direction = ids[i].direction; unpin_data[pin_count++].sgt = sgt; prev_id = ids[i].id; prev_addr = phys_addr[ids[i].index]; } return pin_count; clean_up_iommu: dma_buf_unmap_attachment(attach, sgt, ids[i].direction); clean_up_map: dma_buf_detach(buf, attach); clean_up_attach: dma_buf_put(buf); clean_up: for (i = 0; i < pin_count; i++) { dma_buf_unmap_attachment(unpin_data[i].attach, unpin_data[i].sgt, unpin_data[i].direction); dma_buf_detach(unpin_data[i].buf, unpin_data[i].attach); dma_buf_put(unpin_data[i].buf); } return err; } static int pin_job_mem(struct nvhost_job *job) { int i; int count = 0; int result; struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev); for (i = 0; i < job->num_relocs; i++) { struct nvhost_reloc *reloc = &job->relocarray[i]; struct nvhost_reloc_type *type = &job->reloctypearray[i]; enum dma_data_direction direction = DMA_BIDIRECTIONAL; if (pdata->get_dma_direction) direction = pdata->get_dma_direction(type->reloc_type); job->pin_ids[count].id = reloc->target; job->pin_ids[count].direction = direction; count++; } /* validate array and pin unique ids, get refs for reloc unpinning */ result = pin_array_ids(job->ch->vm->pdev, job->pin_ids, job->addr_phys, job->num_relocs, job->unpins); if (result < 0) return result; job->num_unpins = result; for (i = 0; i < job->num_gathers; i++) { struct nvhost_job_gather *g = &job->gathers[i]; job->pin_ids[count].id = g->mem_id; job->pin_ids[count].direction = DMA_BIDIRECTIONAL; count++; } /* validate array and pin unique ids, get refs for gather unpinning */ result = pin_array_ids(nvhost_get_host(job->ch->dev)->dev, &job->pin_ids[job->num_relocs], &job->addr_phys[job->num_relocs], job->num_gathers, &job->unpins[job->num_unpins]); if (result < 0) { nvhost_job_unpin(job); return result; } job->num_unpins += result; return result; } static int do_relocs(struct nvhost_job *job, u32 cmdbuf_mem, struct dma_buf *buf) { struct nvhost_device_data *pdata = platform_get_drvdata(job->ch->dev); int i = 0; int last_page = -1; size_t last_offset; void *cmdbuf_page_addr = NULL; dma_addr_t phys_addr; int err; /* pin & patch the relocs for one gather */ while (i < job->num_relocs) { struct nvhost_reloc *reloc = &job->relocarray[i]; struct nvhost_reloc_shift *shift = &job->relocshiftarray[i]; struct nvhost_reloc_type *type = &job->reloctypearray[i]; /* skip all other gathers */ if (cmdbuf_mem != reloc->cmdbuf_mem) { i++; continue; } if (reloc->cmdbuf_offset & 3 || reloc->cmdbuf_offset >= buf->size) { nvhost_err(&pdata->pdev->dev, "invalid cmdbuf_offset=0x%x", reloc->cmdbuf_offset); return -EINVAL; } if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) { if (cmdbuf_page_addr) { dma_buf_kunmap(buf, last_page, cmdbuf_page_addr); dma_buf_end_cpu_access(buf, last_offset, PAGE_SIZE, DMA_TO_DEVICE); } cmdbuf_page_addr = dma_buf_kmap(buf, reloc->cmdbuf_offset >> PAGE_SHIFT); last_page = reloc->cmdbuf_offset >> PAGE_SHIFT; last_offset = reloc->cmdbuf_offset & PAGE_MASK; if (unlikely(!cmdbuf_page_addr)) { pr_err("Couldn't map cmdbuf for relocation\n"); return -ENOMEM; } err = dma_buf_begin_cpu_access(buf, last_offset, PAGE_SIZE, DMA_TO_DEVICE); if (err) { nvhost_err(&pdata->pdev->dev, "begin_cpu_access() failed for patching reloc %d", err); return err; } } if (pdata->get_reloc_phys_addr) phys_addr = pdata->get_reloc_phys_addr( job->reloc_addr_phys[i], type->reloc_type); else phys_addr = job->reloc_addr_phys[i]; __raw_writel( (phys_addr + reloc->target_offset) >> shift->shift, (void __iomem *)(cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK))); /* remove completed reloc from the job */ if (i != job->num_relocs - 1) { struct nvhost_reloc *reloc_last = &job->relocarray[job->num_relocs - 1]; struct nvhost_reloc_shift *shift_last = &job->relocshiftarray[job->num_relocs - 1]; struct nvhost_reloc_type *type_last = &job->reloctypearray[job->num_relocs - 1]; reloc->cmdbuf_mem = reloc_last->cmdbuf_mem; reloc->cmdbuf_offset = reloc_last->cmdbuf_offset; reloc->target = reloc_last->target; reloc->target_offset = reloc_last->target_offset; shift->shift = shift_last->shift; type->reloc_type = type_last->reloc_type; job->reloc_addr_phys[i] = job->reloc_addr_phys[job->num_relocs - 1]; job->num_relocs--; } else { break; } } if (cmdbuf_page_addr) { dma_buf_kunmap(buf, last_page, cmdbuf_page_addr); dma_buf_end_cpu_access(buf, last_offset, PAGE_SIZE, DMA_TO_DEVICE); } return 0; } int nvhost_job_pin(struct nvhost_job *job, struct nvhost_syncpt *sp) { int err = 0, i = 0, j = 0; /* pin memory */ err = pin_job_mem(job); if (err <= 0) goto fail; /* patch gathers */ for (i = 0; i < job->num_gathers; i++) { struct nvhost_job_gather *g = &job->gathers[i]; /* process each gather mem only once */ if (!g->buf) { u64 end_offset; g->buf = dma_buf_get(g->mem_id); if (IS_ERR(g->buf)) { err = PTR_ERR(g->buf); g->buf = NULL; break; } end_offset = (u64)g->offset + (u64)g->words * 4; if (end_offset > g->buf->size) { dma_buf_put(g->buf); g->buf = NULL; err = -EINVAL; break; } g->mem_base = job->gather_addr_phys[i]; for (j = 0; j < job->num_gathers; j++) { struct nvhost_job_gather *tmp = &job->gathers[j]; if (!tmp->buf && tmp->mem_id == g->mem_id) { tmp->buf = g->buf; tmp->mem_base = g->mem_base; } } err = do_relocs(job, g->mem_id, g->buf); dma_buf_put(g->buf); if (err) break; } } fail: return err; } void nvhost_job_unpin(struct nvhost_job *job) { int i; for (i = 0; i < job->num_unpins; i++) { struct nvhost_job_unpin *unpin = &job->unpins[i]; dma_buf_unmap_attachment(unpin->attach, unpin->sgt, unpin->direction); dma_buf_detach(unpin->buf, unpin->attach); dma_buf_put(unpin->buf); } job->num_unpins = 0; } /** * Debug routine used to dump job entries */ void nvhost_job_dump(struct device *dev, struct nvhost_job *job) { dev_info(dev, " SYNCPT_ID %d\n", job->sp->id); dev_info(dev, " SYNCPT_VAL %d\n", job->sp->fence); dev_info(dev, " FIRST_GET 0x%x\n", job->first_get); dev_info(dev, " TIMEOUT %d\n", job->timeout); dev_info(dev, " NUM_SLOTS %d\n", job->num_slots); dev_info(dev, " NUM_HANDLES %d\n", job->num_unpins); }