Jetpack/kernel/nvidia/drivers/char/hmm_dmirror.c

1299 lines
30 KiB
C
Raw Permalink Normal View History

/*
* Copyright 2013 Red Hat Inc.
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* Authors: Jérôme Glisse <jglisse@redhat.com>
*/
/*
* This is a dummy driver to exercice the HMM (heterogeneous memory management)
* mirror API of the kernel. Userspace program register with the dummy device
* to mirror their own address space and can use the device to read/write to
* any valid virtual address.
*
* In some way it can also serve as an example driver for people wanting to use
* HMM inside there own device driver.
*/
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/delay.h>
#include <linux/pagemap.h>
#include <linux/hmm.h>
#include <linux/vmalloc.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/platform_device.h>
#include <linux/version.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 14, 0)
#include <linux/sched/mm.h>
#else
#include <linux/sched.h>
#endif
#include <uapi/linux/hmm_dmirror.h>
struct dmirror_device;
struct dummy_bounce {
void *ptr;
unsigned long size;
unsigned long addr;
unsigned long cpages;
};
#define FLAG_HMM_PFN_VALID (1 << 0)
#define FLAG_HMM_PFN_WRITE (1 << 1)
#define VALUE_HMM_PFN_NONE (1 << 4)
#define HPFN_SHIFT 7
#define DPT_SHIFT PAGE_SHIFT
#define DPT_VALID (1 << 0)
#define DPT_WRITE (1 << 1)
#define DPT_DPAGE (1 << 2)
struct dmirror_pt {
unsigned long pgd[PTRS_PER_PGD];
struct rw_semaphore lock;
};
struct dmirror {
struct dmirror_device *mdevice;
struct file *filp;
struct hmm_mirror mirror;
struct mm_struct *mm;
struct dmirror_pt pt;
};
struct dmirror_device {
dev_t dev;
struct cdev cdevice;
struct class *cl;
struct hmm_devmem *devmem;
struct platform_device *pdevice;
struct hmm_device *hmm_device;
struct page *frees;
spinlock_t lock;
unsigned long calloc;
unsigned long cfree;
};
static inline unsigned long dmirror_pt_pgd(unsigned long addr)
{
return (addr >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1);
}
static inline unsigned long dmirror_pt_pud(unsigned long addr)
{
return (addr >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
static inline unsigned long dmirror_pt_pmd(unsigned long addr)
{
return (addr >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
static inline unsigned long dmirror_pt_pte(unsigned long addr)
{
return (addr >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
static inline struct page *dmirror_pt_page(unsigned long dpte)
{
if (!(dpte & DPT_VALID))
return NULL;
return pfn_to_page(dpte >> DPT_SHIFT);
}
static inline unsigned long dmirror_pt_from_page(struct page *page)
{
if (!page)
return 0;
return (page_to_pfn(page) << DPT_SHIFT) | DPT_VALID;
}
static inline unsigned long dmirror_pt_pud_end(unsigned long addr)
{
return (addr & PGDIR_MASK) + ((long)PTRS_PER_PUD << PUD_SHIFT);
}
static inline unsigned long dmirror_pt_pmd_end(unsigned long addr)
{
return (addr & PUD_MASK) + ((long)PTRS_PER_PMD << PMD_SHIFT);
}
static inline unsigned long dmirror_pt_pte_end(unsigned long addr)
{
return (addr & PMD_MASK) + ((long)PTRS_PER_PTE << PAGE_SHIFT);
}
typedef int (*dmirror_walk_cb_t)(struct dmirror *dmirror,
struct hmm_range *range,
unsigned long *dpte,
void *private);
static int dummy_pt_walk(struct dmirror *dmirror,
dmirror_walk_cb_t cb,
struct hmm_range *range,
void *private,
bool populate)
{
unsigned long start = range->start;
unsigned long *dpgd = &dmirror->pt.pgd[dmirror_pt_pgd(start)];
unsigned long addr = start & PAGE_MASK;
unsigned long end = range->end;
BUG_ON(start == end);
for (; addr != end; dpgd++) {
unsigned long pud_end, *dpud;
struct page *pud_page;
pud_end = min(end, dmirror_pt_pud_end(addr));
pud_page = dmirror_pt_page(*dpgd);
if (!pud_page) {
if (!populate) {
addr = pud_end;
continue;
}
pud_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
if (!pud_page) {
return -ENOMEM;
}
*dpgd = dmirror_pt_from_page(pud_page);
}
dpud = kmap(pud_page);
dpud = &dpud[dmirror_pt_pud(addr)];
for (; addr != pud_end; dpud++) {
unsigned long pmd_end, *dpmd;
struct page *pmd_page;
pmd_end = min(end, dmirror_pt_pmd_end(addr));
pmd_page = dmirror_pt_page(*dpud);
if (!pmd_page) {
if (!populate) {
addr = pmd_end;
continue;
}
pmd_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
if (!pmd_page) {
kunmap(pud_page);
return -ENOMEM;
}
*dpud = dmirror_pt_from_page(pmd_page);
}
dpmd = kmap(pmd_page);
dpmd = &dpmd[dmirror_pt_pmd(addr)];
for (; addr != pmd_end; dpmd++) {
unsigned long *dpte, pte_end;
struct hmm_range pte_range;
struct page *pte_page;
int ret;
memcpy(&pte_range, range,
sizeof(struct hmm_range));
pte_range.flags = range->flags;
pte_range.values = range->values;
pte_end = min(end, dmirror_pt_pte_end(addr));
pte_range.start = addr;
pte_range.end = pte_end;
pte_page = dmirror_pt_page(*dpmd);
if (!pte_page) {
if (!populate) {
addr = pte_end;
continue;
}
pte_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
if (!pte_page) {
kunmap(pmd_page);
kunmap(pud_page);
return -ENOMEM;
}
*dpmd = dmirror_pt_from_page(pte_page);
}
dpte = kmap(pte_page);
dpte = &dpte[dmirror_pt_pte(addr)];
ret = cb(dmirror, &pte_range, dpte, private);
kunmap(pte_page);
addr = pte_end;
if (ret) {
kunmap(pmd_page);
kunmap(pud_page);
return ret;
}
}
kunmap(pmd_page);
addr = pmd_end;
}
kunmap(pud_page);
addr = pud_end;
}
return 0;
}
int dummy_bounce_init(struct dummy_bounce *bounce,
unsigned long size,
unsigned long addr)
{
bounce->addr = addr;
bounce->size = size;
bounce->ptr = vmalloc(size);
if (!bounce->ptr)
return -ENOMEM;
return 0;
}
int dummy_bounce_copy_from(struct dummy_bounce *bounce, unsigned long addr)
{
unsigned long end = (addr & PAGE_MASK) + bounce->size;
char __user *uptr = (void __user *)(addr & PAGE_MASK);
void *ptr = bounce->ptr;
for (; addr < end; addr += PAGE_SIZE, ptr += PAGE_SIZE, uptr += PAGE_SIZE) {
int ret;
ret = copy_from_user(ptr, uptr, PAGE_SIZE);
if (ret)
return ret;
}
return 0;
}
int dummy_bounce_copy_to(struct dummy_bounce *bounce, unsigned long addr)
{
unsigned long end = (addr & PAGE_MASK) + bounce->size;
char __user *uptr = (void __user *)(addr & PAGE_MASK);
void *ptr = bounce->ptr;
for (; addr < end; addr += PAGE_SIZE, ptr += PAGE_SIZE, uptr += PAGE_SIZE) {
int ret;
ret = copy_to_user(uptr, ptr, PAGE_SIZE);
if (ret)
return ret;
}
return 0;
}
void dummy_bounce_fini(struct dummy_bounce *bounce)
{
vfree(bounce->ptr);
}
static int dummy_do_update(struct dmirror *dmirror,
struct hmm_range *range,
unsigned long *dpte,
void *private)
{
unsigned long addr = range->start;
unsigned long end = range->end;
for (; addr < end; addr += PAGE_SIZE, ++dpte) {
/* Clear pte */
*dpte = 0;
}
return 0;
}
static void dummy_update(struct hmm_mirror *mirror,
enum hmm_update_type update,
unsigned long start,
unsigned long end)
{
struct dmirror *dmirror = container_of(mirror, struct dmirror, mirror);
struct hmm_range range;
range.start = start;
range.end = end;
down_write(&dmirror->pt.lock);
dummy_pt_walk(dmirror, dummy_do_update, &range, NULL, false);
up_write(&dmirror->pt.lock);
}
static const struct hmm_mirror_ops dmirror_ops = {
.sync_cpu_device_pagetables = &dummy_update,
};
static int dmirror_pt_init(struct dmirror *dmirror)
{
init_rwsem(&dmirror->pt.lock);
return 0;
}
/* dmirror_new() - allocate and initialize dummy mirror struct.
*
* @mdevice: The dummy device this mirror is associated with.
* @filp: The active device file descriptor this mirror is associated with.
*/
static struct dmirror *dmirror_new(struct dmirror_device *mdevice,
struct file *filp)
{
struct mm_struct *mm = get_task_mm(current);
struct dmirror *dmirror;
int r;
if (!mm)
return NULL;
/* Mirror this process address space */
dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
if (dmirror == NULL)
return NULL;
dmirror->mdevice = mdevice;
dmirror->filp = filp;
if (dmirror_pt_init(dmirror)) {
kfree(dmirror);
return NULL;
}
dmirror->mm = mm;
dmirror->mirror.ops = &dmirror_ops;
down_write(&mm->mmap_sem);
r = hmm_mirror_register(&dmirror->mirror, mm);
up_write(&mm->mmap_sem);
mmput(mm);
if (r) {
kfree(dmirror);
return NULL;
}
return dmirror;
}
static void dmirror_del(struct dmirror *dmirror)
{
hmm_mirror_unregister(&dmirror->mirror);
kfree(dmirror);
}
/*
* Below are the file operation for the dummy device file. Only ioctl matter.
*
* Note this is highly specific to the dummy device driver and should not be
* construed as an example on how to design the API a real device driver would
* expose to userspace.
*/
static ssize_t dummy_fops_read(struct file *filp,
char __user *buf,
size_t count,
loff_t *ppos)
{
return -EINVAL;
}
static ssize_t dummy_fops_write(struct file *filp,
const char __user *buf,
size_t count,
loff_t *ppos)
{
return -EINVAL;
}
static int dummy_fops_mmap(struct file *filp, struct vm_area_struct *vma)
{
/* Forbid mmap of the dummy device file. */
return -EINVAL;
}
static int dummy_fops_open(struct inode *inode, struct file *filp)
{
struct cdev *cdev = inode->i_cdev;
struct dmirror_device *mdevice;
struct dmirror *dmirror;
/* No exclusive opens. */
if (filp->f_flags & O_EXCL)
return -EINVAL;
mdevice = container_of(cdev, struct dmirror_device, cdevice);
dmirror = dmirror_new(mdevice, filp);
filp->private_data = dmirror;
return dmirror ? 0 : -ENOMEM;
}
static int dummy_fops_release(struct inode *inode, struct file *filp)
{
struct dmirror_device *mdevice;
struct dmirror *dmirror;
if (!filp->private_data)
return 0;
dmirror = filp->private_data;
mdevice = dmirror->mdevice;
printk(KERN_INFO "DEVICE PAGE %ld %ld (%ld)\n", mdevice->calloc, mdevice->cfree, mdevice->calloc - mdevice->cfree);
dmirror_del(dmirror);
filp->private_data = NULL;
return 0;
}
struct dummy_fault {
uint64_t *pfns;
unsigned long start;
unsigned long missing;
bool write;
bool invalid;
};
static int dummy_do_fault(struct dmirror *dmirror,
struct hmm_range *range,
unsigned long *dpte,
void *private)
{
unsigned long addr = range->start;
struct dummy_fault *dfault = private;
unsigned long idx = (addr - dfault->start) >> PAGE_SHIFT;
unsigned long end = range->end;
uint64_t *pfns = dfault->pfns;
for (; addr < end; addr += PAGE_SIZE, ++dpte, ++idx) {
struct page *page;
/*
* Special pfn are device memory ie page inserted inside the
* CPU page table with either vm_insert_pfn or vm_insert_page
* in both case we assume that device can not access this
* memory safely.
*
* The HMM_PFN_ERROR is if it is accessing invalid memory
* either because of memory error (hardware detected memory
* corruption) or more likely because of truncate on mmap
* file.
*/
if ((pfns[idx] & (range->values[HMM_PFN_SPECIAL] |
range->values[HMM_PFN_ERROR]))) {
dfault->invalid = true;
continue;
}
if (!(pfns[idx] & range->flags[HMM_PFN_VALID])) {
dfault->missing++;
continue;
}
page = hmm_pfn_to_page(range, pfns[idx]);
*dpte = dmirror_pt_from_page(page);
if (pfns[idx] & HMM_PFN_WRITE) {
*dpte |= DPT_WRITE;
} else if (dfault->write) {
dfault->missing++;
}
}
return 0;
}
static int dummy_fault(struct dmirror *dmirror,
unsigned long start,
unsigned long end,
bool write)
{
struct mm_struct *mm = dmirror->mm;
unsigned long addr = start;
uint64_t pfns[64];
uint64_t flags[64];
uint64_t values[64];
memset(pfns, 0, sizeof(pfns));
memset(flags, 0, sizeof(flags));
memset(values, 0, sizeof(values));
flags[HMM_PFN_VALID] = FLAG_HMM_PFN_VALID;
flags[HMM_PFN_WRITE] = FLAG_HMM_PFN_WRITE;
values[HMM_PFN_NONE] = VALUE_HMM_PFN_NONE;
do {
struct vm_area_struct *vma;
struct dummy_fault dfault;
struct hmm_range range;
unsigned long next;
int ret;
down_read(&mm->mmap_sem);
next = min(addr + (64 << PAGE_SHIFT), end);
vma = find_vma_intersection(mm, addr, end);
if (!vma) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
if (!(vma->vm_flags & VM_READ)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
if (write && !(vma->vm_flags & VM_WRITE)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
addr = max(vma->vm_start, addr);
next = min(min(addr + (64 << PAGE_SHIFT), end), vma->vm_end);
range.vma = vma;
range.start = addr;
range.end = next;
range.pfns = pfns;
range.flags = flags;
range.values = values;
range.pfn_shift = HPFN_SHIFT;
ret = hmm_vma_fault(&range, false);
switch (ret) {
case 0:
break;
case -EAGAIN:
continue;
default:
up_read(&mm->mmap_sem);
return ret;
}
down_read(&dmirror->pt.lock);
if (!hmm_vma_range_done(&range)) {
up_read(&dmirror->pt.lock);
up_read(&mm->mmap_sem);
continue;
}
dfault.invalid = false;
dfault.write = write;
dfault.missing = 0;
dfault.start = addr;
dfault.pfns = pfns;
ret = dummy_pt_walk(dmirror, dummy_do_fault,
&range, &dfault, true);
up_read(&dmirror->pt.lock);
up_read(&mm->mmap_sem);
if (ret)
return ret;
if (dfault.invalid)
return -EFAULT;
if (!dfault.missing) {
addr = next;
} else {
return -EFAULT;
}
} while (addr != end);
return 0;
}
static bool dummy_device_is_mine(struct dmirror_device *mdevice,
struct page *page)
{
if (!is_zone_device_page(page))
return false;
return page->pgmap->data == mdevice->devmem;
}
static int dummy_do_read(struct dmirror *dmirror,
struct hmm_range *range,
unsigned long *dpte,
void *private)
{
struct dmirror_device *mdevice = dmirror->mdevice;
struct dummy_bounce *bounce = private;
unsigned long addr = range->start;
unsigned long end = range->end;
void *ptr;
ptr = bounce->ptr + ((addr - bounce->addr) & PAGE_MASK);
for (; addr < end; addr += PAGE_SIZE, ++dpte) {
struct page *page;
void *tmp;
page = dmirror_pt_page(*dpte);
if (!page) {
return -ENOENT;
}
if (is_zone_device_page(page)) {
if (!dummy_device_is_mine(mdevice, page))
return -ENOENT;
page = (void *)hmm_devmem_page_get_drvdata(page);
}
tmp = kmap(page);
memcpy(ptr, tmp, PAGE_SIZE);
kunmap(page);
ptr += PAGE_SIZE;
bounce->cpages++;
}
return 0;
}
static int dummy_read(struct dmirror *dmirror,
struct hmm_dmirror_read *dread)
{
struct dummy_bounce bounce;
struct hmm_range range;
unsigned long start, end;
unsigned long size;
int ret;
if ((dread->ptr & (~PAGE_MASK)) || (dread->addr & (~PAGE_MASK)))
return -EINVAL;
if (dread->addr >= (dread->addr + (dread->npages << PAGE_SHIFT)))
return -EINVAL;
start = dread->addr & PAGE_MASK;
size = (dread->npages << PAGE_SHIFT);
end = start + (dread->npages << PAGE_SHIFT);
ret = dummy_bounce_init(&bounce, size, start);
if (ret)
return ret;
again:
dread->dpages = 0;
bounce.cpages = 0;
range.start = start;
range.end = end;
down_read(&dmirror->pt.lock);
ret = dummy_pt_walk(dmirror, dummy_do_read,
&range, &bounce, true);
up_read(&dmirror->pt.lock);
if (ret == -ENOENT) {
ret = dummy_fault(dmirror, start, end, false);
if (ret) {
dummy_bounce_fini(&bounce);
return ret;
}
goto again;
}
ret = dummy_bounce_copy_to(&bounce, dread->ptr);
dread->cpages = bounce.cpages;
dummy_bounce_fini(&bounce);
return ret;
}
static int dummy_do_write(struct dmirror *dmirror,
struct hmm_range *range,
unsigned long *dpte,
void *private)
{
struct dmirror_device *mdevice = dmirror->mdevice;
struct dummy_bounce *bounce = private;
unsigned long addr = range->start;
unsigned long end = range->end;
void *ptr;
ptr = bounce->ptr + ((addr - bounce->addr) & PAGE_MASK);
for (; addr < end; addr += PAGE_SIZE, ++dpte) {
struct page *page;
void *tmp;
page = dmirror_pt_page(*dpte);
if (!page || !(*dpte & DPT_WRITE))
return -ENOENT;
if (is_zone_device_page(page)) {
if (!dummy_device_is_mine(mdevice, page))
return -ENOENT;
page = (void *)hmm_devmem_page_get_drvdata(page);
}
tmp = kmap(page);
memcpy(tmp, ptr, PAGE_SIZE);
kunmap(page);
ptr += PAGE_SIZE;
bounce->cpages++;
}
return 0;
}
static int dummy_write(struct dmirror *dmirror,
struct hmm_dmirror_write *dwrite)
{
struct dummy_bounce bounce;
struct hmm_range range;
unsigned long start, end;
unsigned long size;
int ret;
if ((dwrite->ptr & (~PAGE_MASK)) || (dwrite->addr & (~PAGE_MASK)))
return -EINVAL;
if (dwrite->addr >= (dwrite->addr + (dwrite->npages << PAGE_SHIFT)))
return -EINVAL;
start = (unsigned long)dwrite->addr & PAGE_MASK;
size = (unsigned long)(dwrite->npages << PAGE_SHIFT);
end = (unsigned long)(start + (dwrite->npages << PAGE_SHIFT));
ret = dummy_bounce_init(&bounce, size, dwrite->addr & PAGE_MASK);
if (ret)
return ret;
ret = dummy_bounce_copy_from(&bounce, dwrite->ptr);
if (ret)
return ret;
again:
bounce.cpages = 0;
dwrite->dpages = 0;
range.start = start;
range.end = end;
down_read(&dmirror->pt.lock);
ret = dummy_pt_walk(dmirror, dummy_do_write,
&range, &bounce, true);
up_read(&dmirror->pt.lock);
if (ret == -ENOENT) {
ret = dummy_fault(dmirror, start, end, true);
if (ret) {
dummy_bounce_fini(&bounce);
return ret;
}
goto again;
}
dwrite->cpages = bounce.cpages;
dummy_bounce_fini(&bounce);
return 0;
}
static struct page *dummy_device_alloc_page(struct dmirror_device *mdevice)
{
struct page *dpage = NULL, *rpage;
/*
* This is a fake device so we alloc real system memory to fake
* our device memory
*/
rpage = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
if (!rpage)
return NULL;
spin_lock(&mdevice->lock);
if (mdevice->frees) {
dpage = mdevice->frees;
mdevice->frees = dpage->s_mem;
} else {
spin_unlock(&mdevice->lock);
__free_page(rpage);
return NULL;
}
if (!trylock_page(dpage)) {
dpage->s_mem = mdevice->frees;
mdevice->frees = dpage;
spin_unlock(&mdevice->lock);
__free_page(rpage);
return NULL;
}
mdevice->calloc++;
spin_unlock(&mdevice->lock);
hmm_devmem_page_set_drvdata(dpage, (unsigned long)rpage);
get_page(dpage);
return dpage;
}
struct dummy_migrate {
struct dmirror_device *mdevice;
struct hmm_dmirror_migrate *dmigrate;
};
static void dummy_migrate_alloc_and_copy(struct vm_area_struct *vma,
const unsigned long *src_pfns,
unsigned long *dst_pfns,
unsigned long start,
unsigned long end,
void *private)
{
struct dummy_migrate *dmigrate = private;
struct dmirror_device *mdevice;
unsigned long addr;
if (!dmigrate)
return;
mdevice = dmigrate->mdevice;
for (addr = start; addr < end; addr += PAGE_SIZE, src_pfns++, dst_pfns++) {
struct page *spage = migrate_pfn_to_page(*src_pfns);
struct page *dpage, *rpage;
*dst_pfns = 0;
if (!spage && !(*src_pfns & MIGRATE_PFN_MIGRATE))
continue;
if (spage && !(*src_pfns & MIGRATE_PFN_MIGRATE))
continue;
if (spage && (*src_pfns & MIGRATE_PFN_DEVICE)) {
if (!dummy_device_is_mine(mdevice, spage)) {
continue;
}
spage = (void *)hmm_devmem_page_get_drvdata(spage);
}
dpage = dummy_device_alloc_page(mdevice);
if (!dpage) {
*dst_pfns = 0;
continue;
}
rpage = (void *)hmm_devmem_page_get_drvdata(dpage);
if (spage)
copy_highpage(rpage, spage);
*dst_pfns = migrate_pfn(page_to_pfn(dpage)) |
MIGRATE_PFN_DEVICE |
MIGRATE_PFN_LOCKED;
}
}
static void dummy_migrate_finalize_and_map(struct vm_area_struct *vma,
const unsigned long *src_pfns,
const unsigned long *dst_pfns,
unsigned long start,
unsigned long end,
void *private)
{
struct dummy_migrate *dmigrate = private;
unsigned long addr;
if (!dmigrate || !dmigrate->dmigrate)
return;
for (addr = start; addr < end; addr+= PAGE_SIZE, src_pfns++, dst_pfns++) {
struct page *page = migrate_pfn_to_page(*dst_pfns);
if (!page)
continue;
if (!(*src_pfns & MIGRATE_PFN_MIGRATE))
continue;
if (!dummy_device_is_mine(dmigrate->mdevice, page))
continue;
dmigrate->dmigrate->npages++;
}
}
static const struct migrate_vma_ops dmirror_migrate_ops = {
.alloc_and_copy = dummy_migrate_alloc_and_copy,
.finalize_and_map = dummy_migrate_finalize_and_map,
};
static int dummy_migrate(struct dmirror *dmirror,
struct hmm_dmirror_migrate *dmigrate)
{
unsigned long addr = dmigrate->addr, end;
struct mm_struct *mm = dmirror->mm;
struct vm_area_struct *vma;
struct dummy_migrate tmp;
int ret;
tmp.mdevice = dmirror->mdevice;
tmp.dmigrate = dmigrate;
down_read(&mm->mmap_sem);
end = addr + (dmigrate->npages << PAGE_SHIFT);
vma = find_vma_intersection(mm, addr, end);
if (!vma || vma->vm_start > addr || vma->vm_end < end) {
ret = -EINVAL;
goto out;
}
for (dmigrate->npages = 0; addr < end;) {
unsigned long src_pfns[64];
unsigned long dst_pfns[64];
unsigned long next;
next = min(end, addr + (64 << PAGE_SHIFT));
ret = migrate_vma(&dmirror_migrate_ops, vma, addr,
next, src_pfns, dst_pfns, &tmp);
if (ret)
goto out;
addr = next;
}
out:
up_read(&mm->mmap_sem);
return ret;
}
static long dummy_fops_unlocked_ioctl(struct file *filp,
unsigned int command,
unsigned long arg)
{
void __user *uarg = (void __user *)arg;
struct hmm_dmirror_migrate dmigrate;
struct hmm_dmirror_write dwrite;
struct hmm_dmirror_read dread;
struct dmirror *dmirror;
int ret;
dmirror = filp->private_data;
if (!dmirror)
return -EINVAL;
switch (command) {
case HMM_DMIRROR_READ:
ret = copy_from_user(&dread, uarg, sizeof(dread));
if (ret)
return ret;
ret = dummy_read(dmirror, &dread);
if (ret)
return ret;
return copy_to_user(uarg, &dread, sizeof(dread));
case HMM_DMIRROR_WRITE:
ret = copy_from_user(&dwrite, uarg, sizeof(dwrite));
if (ret)
return ret;
ret = dummy_write(dmirror, &dwrite);
if (ret)
return ret;
return copy_to_user(uarg, &dwrite, sizeof(dwrite));
case HMM_DMIRROR_MIGRATE:
ret = copy_from_user(&dmigrate, uarg, sizeof(dmigrate));
if (ret)
return ret;
ret = dummy_migrate(dmirror, &dmigrate);
if (ret)
return ret;
return copy_to_user(uarg, &dmigrate, sizeof(dmigrate));
default:
ret = -EINVAL;
break;
}
return ret;
}
static const struct file_operations dmirror_fops = {
.read = dummy_fops_read,
.write = dummy_fops_write,
.mmap = dummy_fops_mmap,
.open = dummy_fops_open,
.release = dummy_fops_release,
.unlocked_ioctl = dummy_fops_unlocked_ioctl,
.llseek = default_llseek,
.owner = THIS_MODULE,
};
static void dummy_devmem_free(struct hmm_devmem *devmem,
struct page *page)
{
struct dmirror_device *mdevice;
struct page *rpage;
rpage = (struct page *)hmm_devmem_page_get_drvdata(page);
mdevice = dev_get_drvdata(devmem->device);
hmm_devmem_page_set_drvdata(page, 0);
__free_page(rpage);
spin_lock(&mdevice->lock);
mdevice->cfree++;
page->s_mem = mdevice->frees;
mdevice->frees = page;
spin_unlock(&mdevice->lock);
}
struct dummy_devmem_fault {
struct dmirror_device *mdevice;
};
static void dummy_devmem_fault_alloc_and_copy(struct vm_area_struct *vma,
const unsigned long *src_pfns,
unsigned long *dst_pfns,
unsigned long start,
unsigned long end,
void *private)
{
struct dummy_devmem_fault *fault = private;
unsigned long addr;
for (addr = start; addr < end; addr += PAGE_SIZE, src_pfns++, dst_pfns++) {
struct page *dpage, *spage;
*dst_pfns = MIGRATE_PFN_ERROR;
spage = migrate_pfn_to_page(*src_pfns);
if (!spage || !(*src_pfns & MIGRATE_PFN_MIGRATE))
continue;
if (!dummy_device_is_mine(fault->mdevice, spage))
continue;
spage = (void *)hmm_devmem_page_get_drvdata(spage);
dpage = hmm_vma_alloc_locked_page(vma, addr);
if (!dpage) {
*dst_pfns = MIGRATE_PFN_ERROR;
continue;
}
copy_highpage(dpage, spage);
*dst_pfns = migrate_pfn(page_to_pfn(dpage)) |
MIGRATE_PFN_LOCKED;
}
}
void dummy_devmem_fault_finalize_and_map(struct vm_area_struct *vma,
const unsigned long *src_pfns,
const unsigned long *dst_pfns,
unsigned long start,
unsigned long end,
void *private)
{
}
static const struct migrate_vma_ops dummy_devmem_migrate = {
.alloc_and_copy = dummy_devmem_fault_alloc_and_copy,
.finalize_and_map = dummy_devmem_fault_finalize_and_map,
};
/*
* hmm_devmem_fault_range() - migrate back a virtual range of memory
*
* @devmem: hmm_devmem struct use to track and manage the ZONE_DEVICE memory
* @vma: virtual memory area containing the range to be migrated
* @ops: migration callback for allocating destination memory and copying
* @src: array of unsigned long containing source pfns
* @dst: array of unsigned long containing destination pfns
* @start: start address of the range to migrate (inclusive)
* @addr: fault address (must be inside the range)
* @end: end address of the range to migrate (exclusive)
* @private: pointer passed back to each of the callback
* Returns: 0 on success, VM_FAULT_SIGBUS on error
*
* This is a wrapper around migrate_vma() which checks the migration status
* for a given fault address and returns the corresponding page fault handler
* status. That will be 0 on success, or VM_FAULT_SIGBUS if migration failed
* for the faulting address.
*
* This is a helper intendend to be used by the ZONE_DEVICE fault handler.
*/
int hmm_devmem_fault_range(struct hmm_devmem *devmem,
struct vm_area_struct *vma,
const struct migrate_vma_ops *ops,
unsigned long *src,
unsigned long *dst,
unsigned long start,
unsigned long addr,
unsigned long end,
void *private)
{
if (migrate_vma(ops, vma, start, end, src, dst, private))
return VM_FAULT_SIGBUS;
if (dst[(addr - start) >> PAGE_SHIFT] & MIGRATE_PFN_ERROR)
return VM_FAULT_SIGBUS;
return 0;
}
EXPORT_SYMBOL(hmm_devmem_fault_range);
static int dummy_devmem_fault(struct hmm_devmem *devmem,
struct vm_area_struct *vma,
unsigned long addr,
const struct page *page,
unsigned flags,
pmd_t *pmdp)
{
unsigned long src_pfns, dst_pfns = 0;
struct dummy_devmem_fault fault;
unsigned long start, end;
fault.mdevice = dev_get_drvdata(devmem->device);
/* FIXME demonstrate how we can adjust migrate range */
start = addr;
end = addr + PAGE_SIZE;
return hmm_devmem_fault_range(devmem, vma, &dummy_devmem_migrate,
&src_pfns, &dst_pfns, start,
addr, end, &fault);
}
static const struct hmm_devmem_ops dmirror_devmem_ops = {
.free = dummy_devmem_free,
.fault = dummy_devmem_fault,
};
static int dmirror_probe(struct platform_device *pdev)
{
struct dmirror_device *mdevice = platform_get_drvdata(pdev);
struct device *dev;
unsigned long pfn;
int ret;
mdevice->hmm_device = hmm_device_new(mdevice);
if (IS_ERR(mdevice->hmm_device))
return PTR_ERR(mdevice->hmm_device);
mdevice->devmem = hmm_devmem_add(&dmirror_devmem_ops,
&mdevice->hmm_device->device,
64 << 20);
if (IS_ERR(mdevice->devmem)) {
hmm_device_put(mdevice->hmm_device);
return PTR_ERR(mdevice->devmem);
}
ret = alloc_chrdev_region(&mdevice->dev, 0, 1, "HMM_DMIRROR");
if (ret < 0) {
hmm_devmem_remove(mdevice->devmem);
hmm_device_put(mdevice->hmm_device);
return ret;
}
mdevice->cl = class_create(THIS_MODULE, "chardrv");
if (IS_ERR_OR_NULL(mdevice->cl)) {
unregister_chrdev_region(mdevice->dev, 1);
hmm_devmem_remove(mdevice->devmem);
hmm_device_put(mdevice->hmm_device);
return PTR_ERR(mdevice->cl);
}
dev = device_create(mdevice->cl, NULL, mdevice->dev, NULL,
"hmm_dummy_device");
if (IS_ERR_OR_NULL(dev)) {
class_destroy(mdevice->cl);
unregister_chrdev_region(mdevice->dev, 1);
hmm_devmem_remove(mdevice->devmem);
hmm_device_put(mdevice->hmm_device);
return PTR_ERR(dev);
}
cdev_init(&mdevice->cdevice, &dmirror_fops);
ret = cdev_add(&mdevice->cdevice, mdevice->dev, 1);
if (ret) {
device_destroy(mdevice->cl, mdevice->dev);
class_destroy(mdevice->cl);
unregister_chrdev_region(mdevice->dev, 1);
hmm_devmem_remove(mdevice->devmem);
hmm_device_put(mdevice->hmm_device);
return ret;
}
/* Build list of free struct page */
spin_lock_init(&mdevice->lock);
spin_lock(&mdevice->lock);
mdevice->frees = NULL;
for (pfn = mdevice->devmem->pfn_first; pfn < mdevice->devmem->pfn_last; pfn++) {
struct page *page = pfn_to_page(pfn);
page->s_mem = mdevice->frees;
mdevice->frees = page;
}
mdevice->calloc = 0;
mdevice->cfree = 0;
spin_unlock(&mdevice->lock);
return 0;
}
static int dmirror_remove(struct platform_device *pdev)
{
struct dmirror_device *mdevice = platform_get_drvdata(pdev);
cdev_del(&mdevice->cdevice);
device_destroy(mdevice->cl, mdevice->dev);
class_destroy(mdevice->cl);
unregister_chrdev_region(mdevice->dev, 1);
hmm_devmem_remove(mdevice->devmem);
hmm_device_put(mdevice->hmm_device);
return 0;
}
static struct platform_device *dmirror_platform_device;
static struct platform_driver dmirror_device_driver = {
.probe = dmirror_probe,
.remove = dmirror_remove,
.driver = {
.name = "HMM_DMIRROR",
},
};
static int __init hmm_dmirror_init(void)
{
struct dmirror_device *mdevice;
int ret;
mdevice = kzalloc(sizeof(*mdevice), GFP_KERNEL);
if (!mdevice)
return -ENOMEM;
dmirror_platform_device = platform_device_alloc("HMM_DMIRROR", -1);
if (!dmirror_platform_device) {
kfree(mdevice);
return -ENOMEM;
}
platform_set_drvdata(dmirror_platform_device, mdevice);
mdevice->pdevice = dmirror_platform_device;
ret = platform_device_add(dmirror_platform_device);
if (ret < 0) {
platform_device_put(dmirror_platform_device);
return ret;
}
ret = platform_driver_register(&dmirror_device_driver);
if (ret < 0) {
platform_device_unregister(dmirror_platform_device);
return ret;
}
pr_debug("hmm_dmirror loaded THIS IS A DANGEROUS MODULE !!!\n");
return 0;
}
static void __exit hmm_dmirror_exit(void)
{
struct dmirror_device *mdevice;
mdevice = platform_get_drvdata(dmirror_platform_device);
platform_driver_unregister(&dmirror_device_driver);
platform_device_unregister(dmirror_platform_device);
kfree(mdevice);
}
module_init(hmm_dmirror_init);
module_exit(hmm_dmirror_exit);
MODULE_LICENSE("GPL");