mirror of https://github.com/armbian/build.git
499 lines
15 KiB
Diff
499 lines
15 KiB
Diff
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
|
From: Iouri Tarassov <iourit@linux.microsoft.com>
|
|
Date: Wed, 19 Jan 2022 13:58:28 -0800
|
|
Subject: drivers: hv: dxgkrnl: Map(unmap) CPU address to device allocation
|
|
|
|
Implement ioctls to map/unmap CPU virtual addresses to compute device
|
|
allocations - LX_DXLOCK2 and LX_DXUNLOCK2.
|
|
|
|
The LX_DXLOCK2 ioctl maps a CPU virtual address to a compute device
|
|
allocation. The allocation could be located in system memory or local
|
|
device memory on the host. When the device allocation is created
|
|
from the guest system memory (existing sysmem allocation), the
|
|
allocation CPU address is known and is returned to the caller.
|
|
For other CPU visible allocations the code flow is the following:
|
|
1. A VM bus message is sent to the host to map the allocation
|
|
2. The host allocates a portion of the guest IO space and maps it
|
|
to the allocation backing store. The IO space address of the
|
|
allocation is returned back to the guest.
|
|
3. The guest allocates a CPU virtual address and maps it to the IO
|
|
space (see the dxg_map_iospace function).
|
|
4. The CPU VA is returned back to the caller
|
|
cpu_address_mapped and cpu_address_refcount are used to track how
|
|
many times an allocation was mapped.
|
|
|
|
The LX_DXUNLOCK2 ioctl unmaps a CPU virtual address from a compute
|
|
device allocation.
|
|
|
|
Signed-off-by: Iouri Tarassov <iourit@linux.microsoft.com>
|
|
[kms: Forward port to v6.1]
|
|
Signed-off-by: Kelsey Steele <kelseysteele@microsoft.com>
|
|
---
|
|
drivers/hv/dxgkrnl/dxgadapter.c | 11 +
|
|
drivers/hv/dxgkrnl/dxgkrnl.h | 14 +
|
|
drivers/hv/dxgkrnl/dxgvmbus.c | 107 +++++++
|
|
drivers/hv/dxgkrnl/dxgvmbus.h | 19 ++
|
|
drivers/hv/dxgkrnl/ioctl.c | 160 +++++++++-
|
|
include/uapi/misc/d3dkmthk.h | 30 ++
|
|
6 files changed, 339 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c
|
|
index 111111111111..222222222222 100644
|
|
--- a/drivers/hv/dxgkrnl/dxgadapter.c
|
|
+++ b/drivers/hv/dxgkrnl/dxgadapter.c
|
|
@@ -885,6 +885,15 @@ void dxgallocation_stop(struct dxgallocation *alloc)
|
|
vfree(alloc->pages);
|
|
alloc->pages = NULL;
|
|
}
|
|
+ dxgprocess_ht_lock_exclusive_down(alloc->process);
|
|
+ if (alloc->cpu_address_mapped) {
|
|
+ dxg_unmap_iospace(alloc->cpu_address,
|
|
+ alloc->num_pages << PAGE_SHIFT);
|
|
+ alloc->cpu_address_mapped = false;
|
|
+ alloc->cpu_address = NULL;
|
|
+ alloc->cpu_address_refcount = 0;
|
|
+ }
|
|
+ dxgprocess_ht_lock_exclusive_up(alloc->process);
|
|
}
|
|
|
|
void dxgallocation_free_handle(struct dxgallocation *alloc)
|
|
@@ -932,6 +941,8 @@ else
|
|
#endif
|
|
if (alloc->priv_drv_data)
|
|
vfree(alloc->priv_drv_data);
|
|
+ if (alloc->cpu_address_mapped)
|
|
+ pr_err("Alloc IO space is mapped: %p", alloc);
|
|
kfree(alloc);
|
|
}
|
|
|
|
diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h
|
|
index 111111111111..222222222222 100644
|
|
--- a/drivers/hv/dxgkrnl/dxgkrnl.h
|
|
+++ b/drivers/hv/dxgkrnl/dxgkrnl.h
|
|
@@ -708,6 +708,8 @@ struct dxgallocation {
|
|
struct d3dkmthandle alloc_handle;
|
|
/* Set to 1 when allocation belongs to resource. */
|
|
u32 resource_owner:1;
|
|
+ /* Set to 1 when 'cpu_address' is mapped to the IO space. */
|
|
+ u32 cpu_address_mapped:1;
|
|
/* Set to 1 when the allocatio is mapped as cached */
|
|
u32 cached:1;
|
|
u32 handle_valid:1;
|
|
@@ -719,6 +721,11 @@ struct dxgallocation {
|
|
#endif
|
|
/* Number of pages in the 'pages' array */
|
|
u32 num_pages;
|
|
+ /*
|
|
+ * How many times dxgk_lock2 is called to allocation, which is mapped
|
|
+ * to IO space.
|
|
+ */
|
|
+ u32 cpu_address_refcount;
|
|
/*
|
|
* CPU address from the existing sysmem allocation, or
|
|
* mapped to the CPU visible backing store in the IO space
|
|
@@ -837,6 +844,13 @@ int dxgvmb_send_wait_sync_object_cpu(struct dxgprocess *process,
|
|
d3dkmt_waitforsynchronizationobjectfromcpu
|
|
*args,
|
|
u64 cpu_event);
|
|
+int dxgvmb_send_lock2(struct dxgprocess *process,
|
|
+ struct dxgadapter *adapter,
|
|
+ struct d3dkmt_lock2 *args,
|
|
+ struct d3dkmt_lock2 *__user outargs);
|
|
+int dxgvmb_send_unlock2(struct dxgprocess *process,
|
|
+ struct dxgadapter *adapter,
|
|
+ struct d3dkmt_unlock2 *args);
|
|
int dxgvmb_send_create_hwqueue(struct dxgprocess *process,
|
|
struct dxgadapter *adapter,
|
|
struct d3dkmt_createhwqueue *args,
|
|
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c
|
|
index 111111111111..222222222222 100644
|
|
--- a/drivers/hv/dxgkrnl/dxgvmbus.c
|
|
+++ b/drivers/hv/dxgkrnl/dxgvmbus.c
|
|
@@ -2354,6 +2354,113 @@ int dxgvmb_send_wait_sync_object_gpu(struct dxgprocess *process,
|
|
return ret;
|
|
}
|
|
|
|
+int dxgvmb_send_lock2(struct dxgprocess *process,
|
|
+ struct dxgadapter *adapter,
|
|
+ struct d3dkmt_lock2 *args,
|
|
+ struct d3dkmt_lock2 *__user outargs)
|
|
+{
|
|
+ int ret;
|
|
+ struct dxgkvmb_command_lock2 *command;
|
|
+ struct dxgkvmb_command_lock2_return result = { };
|
|
+ struct dxgallocation *alloc = NULL;
|
|
+ struct dxgvmbusmsg msg = {.hdr = NULL};
|
|
+
|
|
+ ret = init_message(&msg, adapter, process, sizeof(*command));
|
|
+ if (ret)
|
|
+ goto cleanup;
|
|
+ command = (void *)msg.msg;
|
|
+
|
|
+ command_vgpu_to_host_init2(&command->hdr,
|
|
+ DXGK_VMBCOMMAND_LOCK2, process->host_handle);
|
|
+ command->args = *args;
|
|
+
|
|
+ ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size,
|
|
+ &result, sizeof(result));
|
|
+ if (ret < 0)
|
|
+ goto cleanup;
|
|
+
|
|
+ ret = ntstatus2int(result.status);
|
|
+ if (ret < 0)
|
|
+ goto cleanup;
|
|
+
|
|
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
|
|
+ alloc = hmgrtable_get_object_by_type(&process->handle_table,
|
|
+ HMGRENTRY_TYPE_DXGALLOCATION,
|
|
+ args->allocation);
|
|
+ if (alloc == NULL) {
|
|
+ DXG_ERR("invalid alloc");
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ if (alloc->cpu_address) {
|
|
+ args->data = alloc->cpu_address;
|
|
+ if (alloc->cpu_address_mapped)
|
|
+ alloc->cpu_address_refcount++;
|
|
+ } else {
|
|
+ u64 offset = (u64)result.cpu_visible_buffer_offset;
|
|
+
|
|
+ args->data = dxg_map_iospace(offset,
|
|
+ alloc->num_pages << PAGE_SHIFT,
|
|
+ PROT_READ | PROT_WRITE, alloc->cached);
|
|
+ if (args->data) {
|
|
+ alloc->cpu_address_refcount = 1;
|
|
+ alloc->cpu_address_mapped = true;
|
|
+ alloc->cpu_address = args->data;
|
|
+ }
|
|
+ }
|
|
+ if (args->data == NULL) {
|
|
+ ret = -ENOMEM;
|
|
+ } else {
|
|
+ ret = copy_to_user(&outargs->data, &args->data,
|
|
+ sizeof(args->data));
|
|
+ if (ret) {
|
|
+ DXG_ERR("failed to copy data");
|
|
+ ret = -EINVAL;
|
|
+ alloc->cpu_address_refcount--;
|
|
+ if (alloc->cpu_address_refcount == 0) {
|
|
+ dxg_unmap_iospace(alloc->cpu_address,
|
|
+ alloc->num_pages << PAGE_SHIFT);
|
|
+ alloc->cpu_address_mapped = false;
|
|
+ alloc->cpu_address = NULL;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
|
|
+
|
|
+cleanup:
|
|
+ free_message(&msg, process);
|
|
+ if (ret)
|
|
+ DXG_TRACE("err: %d", ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+int dxgvmb_send_unlock2(struct dxgprocess *process,
|
|
+ struct dxgadapter *adapter,
|
|
+ struct d3dkmt_unlock2 *args)
|
|
+{
|
|
+ int ret;
|
|
+ struct dxgkvmb_command_unlock2 *command;
|
|
+ struct dxgvmbusmsg msg = {.hdr = NULL};
|
|
+
|
|
+ ret = init_message(&msg, adapter, process, sizeof(*command));
|
|
+ if (ret)
|
|
+ goto cleanup;
|
|
+ command = (void *)msg.msg;
|
|
+
|
|
+ command_vgpu_to_host_init2(&command->hdr,
|
|
+ DXGK_VMBCOMMAND_UNLOCK2,
|
|
+ process->host_handle);
|
|
+ command->args = *args;
|
|
+
|
|
+ ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size);
|
|
+
|
|
+cleanup:
|
|
+ free_message(&msg, process);
|
|
+ if (ret)
|
|
+ DXG_TRACE("err: %d", ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
int dxgvmb_send_create_hwqueue(struct dxgprocess *process,
|
|
struct dxgadapter *adapter,
|
|
struct d3dkmt_createhwqueue *args,
|
|
diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h
|
|
index 111111111111..222222222222 100644
|
|
--- a/drivers/hv/dxgkrnl/dxgvmbus.h
|
|
+++ b/drivers/hv/dxgkrnl/dxgvmbus.h
|
|
@@ -570,6 +570,25 @@ struct dxgkvmb_command_waitforsyncobjectfromgpu {
|
|
/* struct d3dkmthandle ObjectHandles[object_count] */
|
|
};
|
|
|
|
+struct dxgkvmb_command_lock2 {
|
|
+ struct dxgkvmb_command_vgpu_to_host hdr;
|
|
+ struct d3dkmt_lock2 args;
|
|
+ bool use_legacy_lock;
|
|
+ u32 flags;
|
|
+ u32 priv_drv_data;
|
|
+};
|
|
+
|
|
+struct dxgkvmb_command_lock2_return {
|
|
+ struct ntstatus status;
|
|
+ void *cpu_visible_buffer_offset;
|
|
+};
|
|
+
|
|
+struct dxgkvmb_command_unlock2 {
|
|
+ struct dxgkvmb_command_vgpu_to_host hdr;
|
|
+ struct d3dkmt_unlock2 args;
|
|
+ bool use_legacy_unlock;
|
|
+};
|
|
+
|
|
/* Returns the same structure */
|
|
struct dxgkvmb_command_createhwqueue {
|
|
struct dxgkvmb_command_vgpu_to_host hdr;
|
|
diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c
|
|
index 111111111111..222222222222 100644
|
|
--- a/drivers/hv/dxgkrnl/ioctl.c
|
|
+++ b/drivers/hv/dxgkrnl/ioctl.c
|
|
@@ -3142,6 +3142,162 @@ dxgkio_wait_sync_object_gpu(struct dxgprocess *process, void *__user inargs)
|
|
return ret;
|
|
}
|
|
|
|
+static int
|
|
+dxgkio_lock2(struct dxgprocess *process, void *__user inargs)
|
|
+{
|
|
+ struct d3dkmt_lock2 args;
|
|
+ struct d3dkmt_lock2 *__user result = inargs;
|
|
+ int ret;
|
|
+ struct dxgadapter *adapter = NULL;
|
|
+ struct dxgdevice *device = NULL;
|
|
+ struct dxgallocation *alloc = NULL;
|
|
+
|
|
+ ret = copy_from_user(&args, inargs, sizeof(args));
|
|
+ if (ret) {
|
|
+ DXG_ERR("failed to copy input args");
|
|
+ ret = -EINVAL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ args.data = NULL;
|
|
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
|
|
+ alloc = hmgrtable_get_object_by_type(&process->handle_table,
|
|
+ HMGRENTRY_TYPE_DXGALLOCATION,
|
|
+ args.allocation);
|
|
+ if (alloc == NULL) {
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ if (alloc->cpu_address) {
|
|
+ ret = copy_to_user(&result->data,
|
|
+ &alloc->cpu_address,
|
|
+ sizeof(args.data));
|
|
+ if (ret == 0) {
|
|
+ args.data = alloc->cpu_address;
|
|
+ if (alloc->cpu_address_mapped)
|
|
+ alloc->cpu_address_refcount++;
|
|
+ } else {
|
|
+ DXG_ERR("Failed to copy cpu address");
|
|
+ ret = -EINVAL;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
|
|
+ if (ret < 0)
|
|
+ goto cleanup;
|
|
+ if (args.data)
|
|
+ goto success;
|
|
+
|
|
+ /*
|
|
+ * The call acquires reference on the device. It is safe to access the
|
|
+ * adapter, because the device holds reference on it.
|
|
+ */
|
|
+ device = dxgprocess_device_by_handle(process, args.device);
|
|
+ if (device == NULL) {
|
|
+ ret = -EINVAL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+ adapter = device->adapter;
|
|
+ ret = dxgadapter_acquire_lock_shared(adapter);
|
|
+ if (ret < 0) {
|
|
+ adapter = NULL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ ret = dxgvmb_send_lock2(process, adapter, &args, result);
|
|
+
|
|
+cleanup:
|
|
+
|
|
+ if (adapter)
|
|
+ dxgadapter_release_lock_shared(adapter);
|
|
+
|
|
+ if (device)
|
|
+ kref_put(&device->device_kref, dxgdevice_release);
|
|
+
|
|
+success:
|
|
+ DXG_TRACE("ioctl:%s %s %d", errorstr(ret), __func__, ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int
|
|
+dxgkio_unlock2(struct dxgprocess *process, void *__user inargs)
|
|
+{
|
|
+ struct d3dkmt_unlock2 args;
|
|
+ int ret;
|
|
+ struct dxgadapter *adapter = NULL;
|
|
+ struct dxgdevice *device = NULL;
|
|
+ struct dxgallocation *alloc = NULL;
|
|
+ bool done = false;
|
|
+
|
|
+ ret = copy_from_user(&args, inargs, sizeof(args));
|
|
+ if (ret) {
|
|
+ DXG_ERR("failed to copy input args");
|
|
+ ret = -EINVAL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ hmgrtable_lock(&process->handle_table, DXGLOCK_EXCL);
|
|
+ alloc = hmgrtable_get_object_by_type(&process->handle_table,
|
|
+ HMGRENTRY_TYPE_DXGALLOCATION,
|
|
+ args.allocation);
|
|
+ if (alloc == NULL) {
|
|
+ ret = -EINVAL;
|
|
+ } else {
|
|
+ if (alloc->cpu_address == NULL) {
|
|
+ DXG_ERR("Allocation is not locked: %p", alloc);
|
|
+ ret = -EINVAL;
|
|
+ } else if (alloc->cpu_address_mapped) {
|
|
+ if (alloc->cpu_address_refcount > 0) {
|
|
+ alloc->cpu_address_refcount--;
|
|
+ if (alloc->cpu_address_refcount != 0) {
|
|
+ done = true;
|
|
+ } else {
|
|
+ dxg_unmap_iospace(alloc->cpu_address,
|
|
+ alloc->num_pages << PAGE_SHIFT);
|
|
+ alloc->cpu_address_mapped = false;
|
|
+ alloc->cpu_address = NULL;
|
|
+ }
|
|
+ } else {
|
|
+ DXG_ERR("Invalid cpu access refcount");
|
|
+ done = true;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL);
|
|
+ if (done)
|
|
+ goto success;
|
|
+ if (ret < 0)
|
|
+ goto cleanup;
|
|
+
|
|
+ /*
|
|
+ * The call acquires reference on the device. It is safe to access the
|
|
+ * adapter, because the device holds reference on it.
|
|
+ */
|
|
+ device = dxgprocess_device_by_handle(process, args.device);
|
|
+ if (device == NULL) {
|
|
+ ret = -EINVAL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+ adapter = device->adapter;
|
|
+ ret = dxgadapter_acquire_lock_shared(adapter);
|
|
+ if (ret < 0) {
|
|
+ adapter = NULL;
|
|
+ goto cleanup;
|
|
+ }
|
|
+
|
|
+ ret = dxgvmb_send_unlock2(process, adapter, &args);
|
|
+
|
|
+cleanup:
|
|
+ if (adapter)
|
|
+ dxgadapter_release_lock_shared(adapter);
|
|
+
|
|
+ if (device)
|
|
+ kref_put(&device->device_kref, dxgdevice_release);
|
|
+
|
|
+success:
|
|
+ DXG_TRACE("ioctl:%s %s %d", errorstr(ret), __func__, ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
static int
|
|
dxgkio_get_device_state(struct dxgprocess *process, void *__user inargs)
|
|
{
|
|
@@ -3909,7 +4065,7 @@ static struct ioctl_desc ioctls[] = {
|
|
/* 0x22 */ {},
|
|
/* 0x23 */ {},
|
|
/* 0x24 */ {},
|
|
-/* 0x25 */ {},
|
|
+/* 0x25 */ {dxgkio_lock2, LX_DXLOCK2},
|
|
/* 0x26 */ {},
|
|
/* 0x27 */ {},
|
|
/* 0x28 */ {},
|
|
@@ -3932,7 +4088,7 @@ static struct ioctl_desc ioctls[] = {
|
|
LX_DXSUBMITSIGNALSYNCOBJECTSTOHWQUEUE},
|
|
/* 0x36 */ {dxgkio_submit_wait_to_hwqueue,
|
|
LX_DXSUBMITWAITFORSYNCOBJECTSTOHWQUEUE},
|
|
-/* 0x37 */ {},
|
|
+/* 0x37 */ {dxgkio_unlock2, LX_DXUNLOCK2},
|
|
/* 0x38 */ {},
|
|
/* 0x39 */ {},
|
|
/* 0x3a */ {dxgkio_wait_sync_object_cpu,
|
|
diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h
|
|
index 111111111111..222222222222 100644
|
|
--- a/include/uapi/misc/d3dkmthk.h
|
|
+++ b/include/uapi/misc/d3dkmthk.h
|
|
@@ -668,6 +668,32 @@ struct d3dkmt_submitcommandtohwqueue {
|
|
#endif
|
|
};
|
|
|
|
+struct d3dddicb_lock2flags {
|
|
+ union {
|
|
+ struct {
|
|
+ __u32 reserved:32;
|
|
+ };
|
|
+ __u32 value;
|
|
+ };
|
|
+};
|
|
+
|
|
+struct d3dkmt_lock2 {
|
|
+ struct d3dkmthandle device;
|
|
+ struct d3dkmthandle allocation;
|
|
+ struct d3dddicb_lock2flags flags;
|
|
+ __u32 reserved;
|
|
+#ifdef __KERNEL__
|
|
+ void *data;
|
|
+#else
|
|
+ __u64 data;
|
|
+#endif
|
|
+};
|
|
+
|
|
+struct d3dkmt_unlock2 {
|
|
+ struct d3dkmthandle device;
|
|
+ struct d3dkmthandle allocation;
|
|
+};
|
|
+
|
|
enum d3dkmt_standardallocationtype {
|
|
_D3DKMT_STANDARDALLOCATIONTYPE_EXISTINGHEAP = 1,
|
|
_D3DKMT_STANDARDALLOCATIONTYPE_CROSSADAPTER = 2,
|
|
@@ -1083,6 +1109,8 @@ struct d3dkmt_shareobjectwithhost {
|
|
_IOWR(0x47, 0x19, struct d3dkmt_destroydevice)
|
|
#define LX_DXDESTROYSYNCHRONIZATIONOBJECT \
|
|
_IOWR(0x47, 0x1d, struct d3dkmt_destroysynchronizationobject)
|
|
+#define LX_DXLOCK2 \
|
|
+ _IOWR(0x47, 0x25, struct d3dkmt_lock2)
|
|
#define LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMCPU \
|
|
_IOWR(0x47, 0x31, struct d3dkmt_signalsynchronizationobjectfromcpu)
|
|
#define LX_DXSIGNALSYNCHRONIZATIONOBJECTFROMGPU \
|
|
@@ -1095,6 +1123,8 @@ struct d3dkmt_shareobjectwithhost {
|
|
_IOWR(0x47, 0x35, struct d3dkmt_submitsignalsyncobjectstohwqueue)
|
|
#define LX_DXSUBMITWAITFORSYNCOBJECTSTOHWQUEUE \
|
|
_IOWR(0x47, 0x36, struct d3dkmt_submitwaitforsyncobjectstohwqueue)
|
|
+#define LX_DXUNLOCK2 \
|
|
+ _IOWR(0x47, 0x37, struct d3dkmt_unlock2)
|
|
#define LX_DXWAITFORSYNCHRONIZATIONOBJECTFROMCPU \
|
|
_IOWR(0x47, 0x3a, struct d3dkmt_waitforsynchronizationobjectfromcpu)
|
|
#define LX_DXWAITFORSYNCHRONIZATIONOBJECTFROMGPU \
|
|
--
|
|
Armbian
|
|
|