Merge: IDXD driver update for 9.1.0
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/692 Bugzilla: https://bugzilla.redhat.com/1971962 Bugzilla: https://bugzilla.redhat.com/1973884 Bugzilla: https://bugzilla.redhat.com/2004573 Bugzilla: https://bugzilla.redhat.com/2040041 Bugzilla: https://bugzilla.redhat.com/2040044 Bugzilla: https://bugzilla.redhat.com/2040046 Bugzilla: https://bugzilla.redhat.com/2040048 Bugzilla: https://bugzilla.redhat.com/2040052 Bugzilla: https://bugzilla.redhat.com/2040496 Bugzilla: https://bugzilla.redhat.com/2046470 Bugzilla: https://bugzilla.redhat.com/2072168 Testing: ran dsa_user_test_runner.sh on sapphire rapids system. Intel is also testing. Conflicts: Should be noted in the individual commits. As with RHEL8 MR, the are a couple conflicts that were caused by having to deal with cleanups that were done in the upstream merge commits. One RH_KABI work around to task_struct. The end result was compared with upstream and the only difference is due to a patch changing a callback function to void not being backported, since the general kernel patch for that hasn't been backported. This patchset updates the idxd driver to 5.18, and also pulls in upstream fixes to re-enable ENQCMD feature support. Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com> Approved-by: Prarit Bhargava <prarit@redhat.com> Approved-by: Phil Auld <pauld@redhat.com> Approved-by: Rafael Aquini <aquini@redhat.com> Approved-by: David Arcari <darcari@redhat.com> Approved-by: Myron Stowe <mstowe@redhat.com> Approved-by: Dean Nelson <dnelson@redhat.com> Approved-by: Chris von Recklinghausen <crecklin@redhat.com> Signed-off-by: Patrick Talbert <ptalbert@redhat.com>
This commit is contained in:
commit
e03a17d432
|
@ -41,14 +41,14 @@ KernelVersion: 5.6.0
|
|||
Contact: dmaengine@vger.kernel.org
|
||||
Description: The maximum number of groups can be created under this device.
|
||||
|
||||
What: /sys/bus/dsa/devices/dsa<m>/max_tokens
|
||||
Date: Oct 25, 2019
|
||||
KernelVersion: 5.6.0
|
||||
What: /sys/bus/dsa/devices/dsa<m>/max_read_buffers
|
||||
Date: Dec 10, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: The total number of bandwidth tokens supported by this device.
|
||||
The bandwidth tokens represent resources within the DSA
|
||||
Description: The total number of read buffers supported by this device.
|
||||
The read buffers represent resources within the DSA
|
||||
implementation, and these resources are allocated by engines to
|
||||
support operations.
|
||||
support operations. See DSA spec v1.2 9.2.4 Total Read Buffers.
|
||||
|
||||
What: /sys/bus/dsa/devices/dsa<m>/max_transfer_size
|
||||
Date: Oct 25, 2019
|
||||
|
@ -115,19 +115,21 @@ KernelVersion: 5.6.0
|
|||
Contact: dmaengine@vger.kernel.org
|
||||
Description: To indicate if this device is configurable or not.
|
||||
|
||||
What: /sys/bus/dsa/devices/dsa<m>/token_limit
|
||||
Date: Oct 25, 2019
|
||||
KernelVersion: 5.6.0
|
||||
What: /sys/bus/dsa/devices/dsa<m>/read_buffer_limit
|
||||
Date: Dec 10, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: The maximum number of bandwidth tokens that may be in use at
|
||||
Description: The maximum number of read buffers that may be in use at
|
||||
one time by operations that access low bandwidth memory in the
|
||||
device.
|
||||
device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit.
|
||||
|
||||
What: /sys/bus/dsa/devices/dsa<m>/cmd_status
|
||||
Date: Aug 28, 2020
|
||||
KernelVersion: 5.10.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: The last executed device administrative command's status/error.
|
||||
Also last configuration error overloaded.
|
||||
Writing to it will clear the status.
|
||||
|
||||
What: /sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
|
||||
Date: Oct 27, 2020
|
||||
|
@ -218,8 +220,38 @@ Contact: dmaengine@vger.kernel.org
|
|||
Description: Show the current number of entries in this WQ if WQ Occupancy
|
||||
Support bit WQ capabilities is 1.
|
||||
|
||||
What: /sys/bus/dsa/devices/wq<m>.<n>/enqcmds_retries
|
||||
Date Oct 29, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: Indicate the number of retires for an enqcmds submission on a sharedwq.
|
||||
A max value to set attribute is capped at 64.
|
||||
|
||||
What: /sys/bus/dsa/devices/engine<m>.<n>/group_id
|
||||
Date: Oct 25, 2019
|
||||
KernelVersion: 5.6.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: The group that this engine belongs to.
|
||||
|
||||
What: /sys/bus/dsa/devices/group<m>.<n>/use_read_buffer_limit
|
||||
Date: Dec 10, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: Enable the use of global read buffer limit for the group. See DSA
|
||||
spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit.
|
||||
|
||||
What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_allowed
|
||||
Date: Dec 10, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: Indicates max number of read buffers that may be in use at one time
|
||||
by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read
|
||||
Buffers Allowed.
|
||||
|
||||
What: /sys/bus/dsa/devices/group<m>.<n>/read_buffers_reserved
|
||||
Date: Dec 10, 2021
|
||||
KernelVersion: 5.17.0
|
||||
Contact: dmaengine@vger.kernel.org
|
||||
Description: Indicates the number of Read Buffers reserved for the use of
|
||||
engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
|
||||
Reserved.
|
||||
|
|
|
@ -1744,6 +1744,11 @@
|
|||
support for the idxd driver. By default it is set to
|
||||
true (1).
|
||||
|
||||
idxd.tc_override= [HW]
|
||||
Format: <bool>
|
||||
Allow override of default traffic class configuration
|
||||
for the device. By default it is set to false (0).
|
||||
|
||||
ieee754= [MIPS] Select IEEE Std 754 conformance mode
|
||||
Format: { strict | legacy | 2008 | relaxed }
|
||||
Default: strict
|
||||
|
|
|
@ -104,18 +104,47 @@ The MSR must be configured on each logical CPU before any application
|
|||
thread can interact with a device. Threads that belong to the same
|
||||
process share the same page tables, thus the same MSR value.
|
||||
|
||||
PASID is cleared when a process is created. The PASID allocation and MSR
|
||||
programming may occur long after a process and its threads have been created.
|
||||
One thread must call iommu_sva_bind_device() to allocate the PASID for the
|
||||
process. If a thread uses ENQCMD without the MSR first being populated, a #GP
|
||||
will be raised. The kernel will update the PASID MSR with the PASID for all
|
||||
threads in the process. A single process PASID can be used simultaneously
|
||||
with multiple devices since they all share the same address space.
|
||||
PASID Life Cycle Management
|
||||
===========================
|
||||
|
||||
One thread can call iommu_sva_unbind_device() to free the allocated PASID.
|
||||
The kernel will clear the PASID MSR for all threads belonging to the process.
|
||||
PASID is initialized as INVALID_IOASID (-1) when a process is created.
|
||||
|
||||
New threads inherit the MSR value from the parent.
|
||||
Only processes that access SVA-capable devices need to have a PASID
|
||||
allocated. This allocation happens when a process opens/binds an SVA-capable
|
||||
device but finds no PASID for this process. Subsequent binds of the same, or
|
||||
other devices will share the same PASID.
|
||||
|
||||
Although the PASID is allocated to the process by opening a device,
|
||||
it is not active in any of the threads of that process. It's loaded to the
|
||||
IA32_PASID MSR lazily when a thread tries to submit a work descriptor
|
||||
to a device using the ENQCMD.
|
||||
|
||||
That first access will trigger a #GP fault because the IA32_PASID MSR
|
||||
has not been initialized with the PASID value assigned to the process
|
||||
when the device was opened. The Linux #GP handler notes that a PASID has
|
||||
been allocated for the process, and so initializes the IA32_PASID MSR
|
||||
and returns so that the ENQCMD instruction is re-executed.
|
||||
|
||||
On fork(2) or exec(2) the PASID is removed from the process as it no
|
||||
longer has the same address space that it had when the device was opened.
|
||||
|
||||
On clone(2) the new task shares the same address space, so will be
|
||||
able to use the PASID allocated to the process. The IA32_PASID is not
|
||||
preemptively initialized as the PASID value might not be allocated yet or
|
||||
the kernel does not know whether this thread is going to access the device
|
||||
and the cleared IA32_PASID MSR reduces context switch overhead by xstate
|
||||
init optimization. Since #GP faults have to be handled on any threads that
|
||||
were created before the PASID was assigned to the mm of the process, newly
|
||||
created threads might as well be treated in a consistent way.
|
||||
|
||||
Due to complexity of freeing the PASID and clearing all IA32_PASID MSRs in
|
||||
all threads in unbind, free the PASID lazily only on mm exit.
|
||||
|
||||
If a process does a close(2) of the device file descriptor and munmap(2)
|
||||
of the device MMIO portal, then the driver will unbind the device. The
|
||||
PASID is still marked VALID in the PASID_MSR for any threads in the
|
||||
process that accessed the device. But this is harmless as without the
|
||||
MMIO portal they cannot submit new work to the device.
|
||||
|
||||
Relationships
|
||||
=============
|
||||
|
|
|
@ -56,8 +56,11 @@
|
|||
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
|
||||
#endif
|
||||
|
||||
/* Force disable because it's broken beyond repair */
|
||||
#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
|
||||
#ifdef CONFIG_INTEL_IOMMU_SVM
|
||||
# define DISABLE_ENQCMD 0
|
||||
#else
|
||||
# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_SGX
|
||||
# define DISABLE_SGX 0
|
||||
|
|
|
@ -605,6 +605,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags)
|
|||
fpu_inherit_perms(dst_fpu);
|
||||
fpregs_unlock();
|
||||
|
||||
/*
|
||||
* Children never inherit PASID state.
|
||||
* Force it to have its init value:
|
||||
*/
|
||||
if (use_xsave())
|
||||
dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID;
|
||||
|
||||
trace_x86_fpu_copy_src(src_fpu);
|
||||
trace_x86_fpu_copy_dst(dst_fpu);
|
||||
|
||||
|
|
|
@ -81,10 +81,10 @@ static unsigned int xstate_offsets[XFEATURE_MAX] __ro_after_init =
|
|||
{ [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_sizes[XFEATURE_MAX] __ro_after_init =
|
||||
{ [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_comp_offsets[XFEATURE_MAX] __ro_after_init =
|
||||
{ [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] __ro_after_init =
|
||||
{ [ 0 ... XFEATURE_MAX - 1] = -1};
|
||||
static unsigned int xstate_flags[XFEATURE_MAX] __ro_after_init;
|
||||
|
||||
#define XSTATE_FLAG_SUPERVISOR BIT(0)
|
||||
#define XSTATE_FLAG_ALIGNED64 BIT(1)
|
||||
|
||||
/*
|
||||
* Return whether the system supports a given xfeature.
|
||||
|
@ -124,17 +124,41 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(cpu_has_xfeatures);
|
||||
|
||||
static bool xfeature_is_aligned64(int xfeature_nr)
|
||||
{
|
||||
return xstate_flags[xfeature_nr] & XSTATE_FLAG_ALIGNED64;
|
||||
}
|
||||
|
||||
static bool xfeature_is_supervisor(int xfeature_nr)
|
||||
{
|
||||
/*
|
||||
* Extended State Enumeration Sub-leaves (EAX = 0DH, ECX = n, n > 1)
|
||||
* returns ECX[0] set to (1) for a supervisor state, and cleared (0)
|
||||
* for a user state.
|
||||
*/
|
||||
u32 eax, ebx, ecx, edx;
|
||||
return xstate_flags[xfeature_nr] & XSTATE_FLAG_SUPERVISOR;
|
||||
}
|
||||
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
return ecx & 1;
|
||||
static unsigned int xfeature_get_offset(u64 xcomp_bv, int xfeature)
|
||||
{
|
||||
unsigned int offs, i;
|
||||
|
||||
/*
|
||||
* Non-compacted format and legacy features use the cached fixed
|
||||
* offsets.
|
||||
*/
|
||||
if (!cpu_feature_enabled(X86_FEATURE_XSAVES) || xfeature <= XFEATURE_SSE)
|
||||
return xstate_offsets[xfeature];
|
||||
|
||||
/*
|
||||
* Compacted format offsets depend on the actual content of the
|
||||
* compacted xsave area which is determined by the xcomp_bv header
|
||||
* field.
|
||||
*/
|
||||
offs = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
for_each_extended_xfeature(i, xcomp_bv) {
|
||||
if (xfeature_is_aligned64(i))
|
||||
offs = ALIGN(offs, 64);
|
||||
if (i == xfeature)
|
||||
break;
|
||||
offs += xstate_sizes[i];
|
||||
}
|
||||
return offs;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -182,7 +206,7 @@ static bool xfeature_enabled(enum xfeature xfeature)
|
|||
* Record the offsets and sizes of various xstates contained
|
||||
* in the XSAVE state memory layout.
|
||||
*/
|
||||
static void __init setup_xstate_features(void)
|
||||
static void __init setup_xstate_cache(void)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx, i;
|
||||
/* start at the beginning of the "extended state" */
|
||||
|
@ -205,6 +229,7 @@ static void __init setup_xstate_features(void)
|
|||
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
xstate_sizes[i] = eax;
|
||||
xstate_flags[i] = ecx;
|
||||
|
||||
/*
|
||||
* If an xfeature is supervisor state, the offset in EBX is
|
||||
|
@ -263,94 +288,6 @@ static void __init print_xstate_features(void)
|
|||
WARN_ON(nr >= XFEATURE_MAX); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* We could cache this like xstate_size[], but we only use
|
||||
* it here, so it would be a waste of space.
|
||||
*/
|
||||
static int xfeature_is_aligned(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
CHECK_XFEATURE(xfeature_nr);
|
||||
|
||||
if (!xfeature_enabled(xfeature_nr)) {
|
||||
WARN_ONCE(1, "Checking alignment of disabled xfeature %d\n",
|
||||
xfeature_nr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
/*
|
||||
* The value returned by ECX[1] indicates the alignment
|
||||
* of state component 'i' when the compacted format
|
||||
* of the extended region of an XSAVE area is used:
|
||||
*/
|
||||
return !!(ecx & 2);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function sets up offsets and sizes of all extended states in
|
||||
* xsave area. This supports both standard format and compacted format
|
||||
* of the xsave area.
|
||||
*/
|
||||
static void __init setup_xstate_comp_offsets(void)
|
||||
{
|
||||
unsigned int next_offset;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The FP xstates and SSE xstates are legacy states. They are always
|
||||
* in the fixed offsets in the xsave area in either compacted form
|
||||
* or standard form.
|
||||
*/
|
||||
xstate_comp_offsets[XFEATURE_FP] = 0;
|
||||
xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
|
||||
xmm_space);
|
||||
|
||||
if (!cpu_feature_enabled(X86_FEATURE_XSAVES)) {
|
||||
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features)
|
||||
xstate_comp_offsets[i] = xstate_offsets[i];
|
||||
return;
|
||||
}
|
||||
|
||||
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
|
||||
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
|
||||
if (xfeature_is_aligned(i))
|
||||
next_offset = ALIGN(next_offset, 64);
|
||||
|
||||
xstate_comp_offsets[i] = next_offset;
|
||||
next_offset += xstate_sizes[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup offsets of a supervisor-state-only XSAVES buffer:
|
||||
*
|
||||
* The offsets stored in xstate_comp_offsets[] only work for one specific
|
||||
* value of the Requested Feature BitMap (RFBM). In cases where a different
|
||||
* RFBM value is used, a different set of offsets is required. This set of
|
||||
* offsets is for when RFBM=xfeatures_mask_supervisor().
|
||||
*/
|
||||
static void __init setup_supervisor_only_offsets(void)
|
||||
{
|
||||
unsigned int next_offset;
|
||||
int i;
|
||||
|
||||
next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
|
||||
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
|
||||
if (!xfeature_is_supervisor(i))
|
||||
continue;
|
||||
|
||||
if (xfeature_is_aligned(i))
|
||||
next_offset = ALIGN(next_offset, 64);
|
||||
|
||||
xstate_supervisor_only_offsets[i] = next_offset;
|
||||
next_offset += xstate_sizes[i];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out xstate component offsets and sizes
|
||||
*/
|
||||
|
@ -360,7 +297,8 @@ static void __init print_xstate_offset_size(void)
|
|||
|
||||
for_each_extended_xfeature(i, fpu_kernel_cfg.max_features) {
|
||||
pr_info("x86/fpu: xstate_offset[%d]: %4d, xstate_sizes[%d]: %4d\n",
|
||||
i, xstate_comp_offsets[i], i, xstate_sizes[i]);
|
||||
i, xfeature_get_offset(fpu_kernel_cfg.max_features, i),
|
||||
i, xstate_sizes[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -419,7 +357,6 @@ static void __init setup_init_fpu_buf(void)
|
|||
if (!boot_cpu_has(X86_FEATURE_XSAVE))
|
||||
return;
|
||||
|
||||
setup_xstate_features();
|
||||
print_xstate_features();
|
||||
|
||||
xstate_init_xcomp_bv(&init_fpstate.regs.xsave, fpu_kernel_cfg.max_features);
|
||||
|
@ -448,25 +385,6 @@ static void __init setup_init_fpu_buf(void)
|
|||
fxsave(&init_fpstate.regs.fxsave);
|
||||
}
|
||||
|
||||
static int xfeature_uncompacted_offset(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/*
|
||||
* Only XSAVES supports supervisor states and it uses compacted
|
||||
* format. Checking a supervisor state's uncompacted offset is
|
||||
* an error.
|
||||
*/
|
||||
if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) {
|
||||
WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr);
|
||||
return -1;
|
||||
}
|
||||
|
||||
CHECK_XFEATURE(xfeature_nr);
|
||||
cpuid_count(XSTATE_CPUID, xfeature_nr, &eax, &ebx, &ecx, &edx);
|
||||
return ebx;
|
||||
}
|
||||
|
||||
int xfeature_size(int xfeature_nr)
|
||||
{
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
@ -644,29 +562,15 @@ static bool __init check_xstate_against_struct(int nr)
|
|||
|
||||
static unsigned int xstate_calculate_size(u64 xfeatures, bool compacted)
|
||||
{
|
||||
unsigned int size = FXSAVE_SIZE + XSAVE_HDR_SIZE;
|
||||
int i;
|
||||
unsigned int topmost = fls64(xfeatures) - 1;
|
||||
unsigned int offset = xstate_offsets[topmost];
|
||||
|
||||
for_each_extended_xfeature(i, xfeatures) {
|
||||
/* Align from the end of the previous feature */
|
||||
if (xfeature_is_aligned(i))
|
||||
size = ALIGN(size, 64);
|
||||
/*
|
||||
* In compacted format the enabled features are packed,
|
||||
* i.e. disabled features do not occupy space.
|
||||
*
|
||||
* In non-compacted format the offsets are fixed and
|
||||
* disabled states still occupy space in the memory buffer.
|
||||
*/
|
||||
if (!compacted)
|
||||
size = xfeature_uncompacted_offset(i);
|
||||
/*
|
||||
* Add the feature size even for non-compacted format
|
||||
* to make the end result correct
|
||||
*/
|
||||
size += xfeature_size(i);
|
||||
}
|
||||
return size;
|
||||
if (topmost <= XFEATURE_SSE)
|
||||
return sizeof(struct xregs_state);
|
||||
|
||||
if (compacted)
|
||||
offset = xfeature_get_offset(xfeatures, topmost);
|
||||
return offset + xstate_sizes[topmost];
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -935,6 +839,10 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
|
|||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
fpu__init_cpu_xstate();
|
||||
|
||||
/* Cache size, offset and flags for initialization */
|
||||
setup_xstate_cache();
|
||||
|
||||
err = init_xstate_size();
|
||||
if (err)
|
||||
goto out_disable;
|
||||
|
@ -950,8 +858,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
|
|||
fpu_user_cfg.max_features);
|
||||
|
||||
setup_init_fpu_buf();
|
||||
setup_xstate_comp_offsets();
|
||||
setup_supervisor_only_offsets();
|
||||
|
||||
/*
|
||||
* Paranoia check whether something in the setup modified the
|
||||
|
@ -1006,13 +912,19 @@ void fpu__resume_cpu(void)
|
|||
*/
|
||||
static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
|
||||
{
|
||||
if (!xfeature_enabled(xfeature_nr)) {
|
||||
WARN_ON_FPU(1);
|
||||
u64 xcomp_bv = xsave->header.xcomp_bv;
|
||||
|
||||
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
|
||||
return NULL;
|
||||
|
||||
if (cpu_feature_enabled(X86_FEATURE_XSAVES)) {
|
||||
if (WARN_ON_ONCE(!(xcomp_bv & BIT_ULL(xfeature_nr))))
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return (void *)xsave + xstate_comp_offsets[xfeature_nr];
|
||||
return (void *)xsave + xfeature_get_offset(xcomp_bv, xfeature_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the xsave area and a state inside, this function returns the
|
||||
* address of the state.
|
||||
|
@ -1043,8 +955,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
|
|||
* We should not ever be requesting features that we
|
||||
* have not enabled.
|
||||
*/
|
||||
WARN_ONCE(!(fpu_kernel_cfg.max_features & BIT_ULL(xfeature_nr)),
|
||||
"get of unsupported state");
|
||||
if (WARN_ON_ONCE(!xfeature_enabled(xfeature_nr)))
|
||||
return NULL;
|
||||
|
||||
/*
|
||||
* This assumes the last 'xsave*' instruction to
|
||||
* have requested that 'xfeature_nr' be saved.
|
||||
|
@ -1625,6 +1538,9 @@ static int __xstate_request_perm(u64 permitted, u64 requested, bool guest)
|
|||
|
||||
/* Calculate the resulting kernel state size */
|
||||
mask = permitted | requested;
|
||||
/* Take supervisor states into account on the host */
|
||||
if (!guest)
|
||||
mask |= xfeatures_mask_supervisor();
|
||||
ksize = xstate_calculate_size(mask, compacted);
|
||||
|
||||
/* Calculate the resulting user state size */
|
||||
|
|
|
@ -39,6 +39,7 @@
|
|||
#include <linux/io.h>
|
||||
#include <linux/hardirq.h>
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/ioasid.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/processor.h>
|
||||
|
@ -528,6 +529,57 @@ static enum kernel_gp_hint get_kernel_gp_address(struct pt_regs *regs,
|
|||
|
||||
#define GPFSTR "general protection fault"
|
||||
|
||||
/*
|
||||
* The unprivileged ENQCMD instruction generates #GPs if the
|
||||
* IA32_PASID MSR has not been populated. If possible, populate
|
||||
* the MSR from a PASID previously allocated to the mm.
|
||||
*/
|
||||
static bool try_fixup_enqcmd_gp(void)
|
||||
{
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
u32 pasid;
|
||||
|
||||
/*
|
||||
* MSR_IA32_PASID is managed using XSAVE. Directly
|
||||
* writing to the MSR is only possible when fpregs
|
||||
* are valid and the fpstate is not. This is
|
||||
* guaranteed when handling a userspace exception
|
||||
* in *before* interrupts are re-enabled.
|
||||
*/
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/*
|
||||
* Hardware without ENQCMD will not generate
|
||||
* #GPs that can be fixed up here.
|
||||
*/
|
||||
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
|
||||
return false;
|
||||
|
||||
pasid = current->mm->pasid;
|
||||
|
||||
/*
|
||||
* If the mm has not been allocated a
|
||||
* PASID, the #GP can not be fixed up.
|
||||
*/
|
||||
if (!pasid_valid(pasid))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Did this thread already have its PASID activated?
|
||||
* If so, the #GP must be from something else.
|
||||
*/
|
||||
if (current->pasid_activated)
|
||||
return false;
|
||||
|
||||
wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID);
|
||||
current->pasid_activated = 1;
|
||||
|
||||
return true;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
|
||||
{
|
||||
char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR;
|
||||
|
@ -536,6 +588,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
|
|||
unsigned long gp_addr;
|
||||
int ret;
|
||||
|
||||
if (user_mode(regs) && try_fixup_enqcmd_gp())
|
||||
return;
|
||||
|
||||
cond_local_irq_enable(regs);
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_UMIP)) {
|
||||
|
|
|
@ -277,10 +277,15 @@ config INTEL_IDMA64
|
|||
Enable DMA support for Intel Low Power Subsystem such as found on
|
||||
Intel Skylake PCH.
|
||||
|
||||
config INTEL_IDXD_BUS
|
||||
tristate
|
||||
default INTEL_IDXD
|
||||
|
||||
config INTEL_IDXD
|
||||
tristate "Intel Data Accelerators support"
|
||||
depends on PCI && X86_64
|
||||
depends on PCI_MSI
|
||||
depends on PCI_PASID
|
||||
depends on SBITMAP
|
||||
select DMA_ENGINE
|
||||
help
|
||||
|
@ -291,6 +296,23 @@ config INTEL_IDXD
|
|||
|
||||
If unsure, say N.
|
||||
|
||||
config INTEL_IDXD_COMPAT
|
||||
bool "Legacy behavior for idxd driver"
|
||||
depends on PCI && X86_64
|
||||
select INTEL_IDXD_BUS
|
||||
help
|
||||
Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior.
|
||||
The old behavior performed driver bind/unbind for device and wq
|
||||
devices all under the dsa driver. The compat driver will emulate
|
||||
the legacy behavior in order to allow existing support apps (i.e.
|
||||
accel-config) to continue function. It is expected that accel-config
|
||||
v3.2 and earlier will need the compat mode. A distro with later
|
||||
accel-config version can disable this compat config.
|
||||
|
||||
Say Y if you have old applications that require such behavior.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
# Config symbol that collects all the dependencies that's necessary to
|
||||
# support shared virtual memory for the devices supported by idxd.
|
||||
config INTEL_IDXD_SVM
|
||||
|
|
|
@ -42,7 +42,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
|
|||
obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
|
||||
obj-$(CONFIG_INTEL_IDMA64) += idma64.o
|
||||
obj-$(CONFIG_INTEL_IOATDMA) += ioat/
|
||||
obj-$(CONFIG_INTEL_IDXD) += idxd/
|
||||
obj-y += idxd/
|
||||
obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
|
||||
obj-$(CONFIG_K3_DMA) += k3dma.o
|
||||
obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
|
||||
|
|
|
@ -1,4 +1,12 @@
|
|||
ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD
|
||||
|
||||
obj-$(CONFIG_INTEL_IDXD) += idxd.o
|
||||
idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
|
||||
|
||||
idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
|
||||
|
||||
obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o
|
||||
idxd_bus-y := bus.o
|
||||
|
||||
obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o
|
||||
idxd_compat-y := compat.o
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include "idxd.h"
|
||||
|
||||
|
||||
int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner,
|
||||
const char *mod_name)
|
||||
{
|
||||
struct device_driver *drv = &idxd_drv->drv;
|
||||
|
||||
if (!idxd_drv->type) {
|
||||
pr_debug("driver type not set (%ps)\n", __builtin_return_address(0));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
drv->name = idxd_drv->name;
|
||||
drv->bus = &dsa_bus_type;
|
||||
drv->owner = owner;
|
||||
drv->mod_name = mod_name;
|
||||
|
||||
return driver_register(drv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__idxd_driver_register);
|
||||
|
||||
void idxd_driver_unregister(struct idxd_device_driver *idxd_drv)
|
||||
{
|
||||
driver_unregister(&idxd_drv->drv);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(idxd_driver_unregister);
|
||||
|
||||
static int idxd_config_bus_match(struct device *dev,
|
||||
struct device_driver *drv)
|
||||
{
|
||||
struct idxd_device_driver *idxd_drv =
|
||||
container_of(drv, struct idxd_device_driver, drv);
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
int i = 0;
|
||||
|
||||
while (idxd_drv->type[i] != IDXD_DEV_NONE) {
|
||||
if (idxd_dev->type == idxd_drv->type[i])
|
||||
return 1;
|
||||
i++;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int idxd_config_bus_probe(struct device *dev)
|
||||
{
|
||||
struct idxd_device_driver *idxd_drv =
|
||||
container_of(dev->driver, struct idxd_device_driver, drv);
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return idxd_drv->probe(idxd_dev);
|
||||
}
|
||||
|
||||
static int idxd_config_bus_remove(struct device *dev)
|
||||
{
|
||||
struct idxd_device_driver *idxd_drv =
|
||||
container_of(dev->driver, struct idxd_device_driver, drv);
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
idxd_drv->remove(idxd_dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bus_type dsa_bus_type = {
|
||||
.name = "dsa",
|
||||
.match = idxd_config_bus_match,
|
||||
.probe = idxd_config_bus_probe,
|
||||
.remove = idxd_config_bus_remove,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(dsa_bus_type);
|
||||
|
||||
static int __init dsa_bus_init(void)
|
||||
{
|
||||
return bus_register(&dsa_bus_type);
|
||||
}
|
||||
module_init(dsa_bus_init);
|
||||
|
||||
static void __exit dsa_bus_exit(void)
|
||||
{
|
||||
bus_unregister(&dsa_bus_type);
|
||||
}
|
||||
module_exit(dsa_bus_exit);
|
||||
|
||||
MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver");
|
||||
MODULE_LICENSE("GPL v2");
|
|
@ -41,7 +41,7 @@ struct idxd_user_context {
|
|||
|
||||
static void idxd_cdev_dev_release(struct device *dev)
|
||||
{
|
||||
struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev);
|
||||
struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
|
||||
struct idxd_cdev_context *cdev_ctx;
|
||||
struct idxd_wq *wq = idxd_cdev->wq;
|
||||
|
||||
|
@ -218,14 +218,13 @@ static __poll_t idxd_cdev_poll(struct file *filp,
|
|||
struct idxd_user_context *ctx = filp->private_data;
|
||||
struct idxd_wq *wq = ctx->wq;
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
unsigned long flags;
|
||||
__poll_t out = 0;
|
||||
|
||||
poll_wait(filp, &wq->err_queue, wait);
|
||||
spin_lock_irqsave(&idxd->dev_lock, flags);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
if (idxd->sw_err.valid)
|
||||
out = EPOLLIN | EPOLLRDNORM;
|
||||
spin_unlock_irqrestore(&idxd->dev_lock, flags);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -256,9 +255,10 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
|
|||
if (!idxd_cdev)
|
||||
return -ENOMEM;
|
||||
|
||||
idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
|
||||
idxd_cdev->wq = wq;
|
||||
cdev = &idxd_cdev->cdev;
|
||||
dev = &idxd_cdev->dev;
|
||||
dev = cdev_dev(idxd_cdev);
|
||||
cdev_ctx = &ictx[wq->idxd->data->type];
|
||||
minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
|
||||
if (minor < 0) {
|
||||
|
@ -268,7 +268,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
|
|||
idxd_cdev->minor = minor;
|
||||
|
||||
device_initialize(dev);
|
||||
dev->parent = &wq->conf_dev;
|
||||
dev->parent = wq_confdev(wq);
|
||||
dev->bus = &dsa_bus_type;
|
||||
dev->type = &idxd_cdev_device_type;
|
||||
dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
|
||||
|
@ -299,10 +299,67 @@ void idxd_wq_del_cdev(struct idxd_wq *wq)
|
|||
|
||||
idxd_cdev = wq->idxd_cdev;
|
||||
wq->idxd_cdev = NULL;
|
||||
cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev);
|
||||
put_device(&idxd_cdev->dev);
|
||||
cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
|
||||
put_device(cdev_dev(idxd_cdev));
|
||||
}
|
||||
|
||||
static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
int rc;
|
||||
|
||||
if (idxd->state != IDXD_DEV_ENABLED)
|
||||
return -ENXIO;
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
wq->type = IDXD_WQT_USER;
|
||||
rc = __drv_enable_wq(wq);
|
||||
if (rc < 0)
|
||||
goto err;
|
||||
|
||||
rc = idxd_wq_add_cdev(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
|
||||
goto err_cdev;
|
||||
}
|
||||
|
||||
idxd->cmd_status = 0;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
return 0;
|
||||
|
||||
err_cdev:
|
||||
__drv_disable_wq(wq);
|
||||
err:
|
||||
wq->type = IDXD_WQT_NONE;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
idxd_wq_del_cdev(wq);
|
||||
__drv_disable_wq(wq);
|
||||
wq->type = IDXD_WQT_NONE;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
}
|
||||
|
||||
static enum idxd_dev_type dev_types[] = {
|
||||
IDXD_DEV_WQ,
|
||||
IDXD_DEV_NONE,
|
||||
};
|
||||
|
||||
struct idxd_device_driver idxd_user_drv = {
|
||||
.probe = idxd_user_drv_probe,
|
||||
.remove = idxd_user_drv_remove,
|
||||
.name = "user",
|
||||
.type = dev_types,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(idxd_user_drv);
|
||||
|
||||
int idxd_cdev_register(void)
|
||||
{
|
||||
int rc, i;
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/device/bus.h>
|
||||
#include "idxd.h"
|
||||
|
||||
extern int device_driver_attach(struct device_driver *drv, struct device *dev);
|
||||
extern void device_driver_detach(struct device *dev);
|
||||
|
||||
#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \
|
||||
struct driver_attribute driver_attr_##_name = \
|
||||
__ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
|
||||
|
||||
static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
|
||||
{
|
||||
struct bus_type *bus = drv->bus;
|
||||
struct device *dev;
|
||||
int rc = -ENODEV;
|
||||
|
||||
dev = bus_find_device_by_name(bus, NULL, buf);
|
||||
if (dev && dev->driver) {
|
||||
device_driver_detach(dev);
|
||||
rc = count;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);
|
||||
|
||||
static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
|
||||
{
|
||||
struct bus_type *bus = drv->bus;
|
||||
struct device *dev;
|
||||
struct device_driver *alt_drv = NULL;
|
||||
int rc = -ENODEV;
|
||||
struct idxd_dev *idxd_dev;
|
||||
|
||||
dev = bus_find_device_by_name(bus, NULL, buf);
|
||||
if (!dev || dev->driver || drv != &dsa_drv.drv)
|
||||
return -ENODEV;
|
||||
|
||||
idxd_dev = confdev_to_idxd_dev(dev);
|
||||
if (is_idxd_dev(idxd_dev)) {
|
||||
alt_drv = driver_find("idxd", bus);
|
||||
} else if (is_idxd_wq_dev(idxd_dev)) {
|
||||
struct idxd_wq *wq = confdev_to_wq(dev);
|
||||
|
||||
if (is_idxd_wq_kernel(wq))
|
||||
alt_drv = driver_find("dmaengine", bus);
|
||||
else if (is_idxd_wq_user(wq))
|
||||
alt_drv = driver_find("user", bus);
|
||||
}
|
||||
if (!alt_drv)
|
||||
return -ENODEV;
|
||||
|
||||
rc = device_driver_attach(alt_drv, dev);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
return count;
|
||||
}
|
||||
static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);
|
||||
|
||||
static struct attribute *dsa_drv_compat_attrs[] = {
|
||||
&driver_attr_bind.attr,
|
||||
&driver_attr_unbind.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group dsa_drv_compat_attr_group = {
|
||||
.attrs = dsa_drv_compat_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *dsa_drv_compat_groups[] = {
|
||||
&dsa_drv_compat_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
}
|
||||
|
||||
static enum idxd_dev_type dev_types[] = {
|
||||
IDXD_DEV_NONE,
|
||||
};
|
||||
|
||||
struct idxd_device_driver dsa_drv = {
|
||||
.name = "dsa",
|
||||
.probe = idxd_dsa_drv_probe,
|
||||
.remove = idxd_dsa_drv_remove,
|
||||
.type = dev_types,
|
||||
.drv = {
|
||||
.suppress_bind_attrs = true,
|
||||
.groups = dsa_drv_compat_groups,
|
||||
},
|
||||
};
|
||||
|
||||
module_idxd_driver(dsa_drv);
|
||||
MODULE_IMPORT_NS(IDXD);
|
|
@ -15,32 +15,10 @@
|
|||
|
||||
static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
|
||||
u32 *status);
|
||||
static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
|
||||
static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
|
||||
|
||||
/* Interrupt control bits */
|
||||
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
|
||||
{
|
||||
struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
|
||||
|
||||
pci_msi_mask_irq(data);
|
||||
}
|
||||
|
||||
void idxd_mask_msix_vectors(struct idxd_device *idxd)
|
||||
{
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
int msixcnt = pci_msix_vec_count(pdev);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < msixcnt; i++)
|
||||
idxd_mask_msix_vector(idxd, i);
|
||||
}
|
||||
|
||||
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
|
||||
{
|
||||
struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
|
||||
|
||||
pci_msi_unmask_irq(data);
|
||||
}
|
||||
|
||||
void idxd_unmask_error_interrupts(struct idxd_device *idxd)
|
||||
{
|
||||
union genctrl_reg genctrl;
|
||||
|
@ -133,34 +111,24 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
|
|||
struct idxd_device *idxd = wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
int rc, num_descs, i;
|
||||
int align;
|
||||
u64 tmp;
|
||||
|
||||
if (wq->type != IDXD_WQT_KERNEL)
|
||||
return 0;
|
||||
|
||||
wq->num_descs = wq->size;
|
||||
num_descs = wq->size;
|
||||
num_descs = wq_dedicated(wq) ? wq->size : wq->threshold;
|
||||
wq->num_descs = num_descs;
|
||||
|
||||
rc = alloc_hw_descs(wq, num_descs);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
align = idxd->data->align;
|
||||
wq->compls_size = num_descs * idxd->data->compl_size + align;
|
||||
wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size,
|
||||
&wq->compls_addr_raw, GFP_KERNEL);
|
||||
if (!wq->compls_raw) {
|
||||
wq->compls_size = num_descs * idxd->data->compl_size;
|
||||
wq->compls = dma_alloc_coherent(dev, wq->compls_size, &wq->compls_addr, GFP_KERNEL);
|
||||
if (!wq->compls) {
|
||||
rc = -ENOMEM;
|
||||
goto fail_alloc_compls;
|
||||
}
|
||||
|
||||
/* Adjust alignment */
|
||||
wq->compls_addr = (wq->compls_addr_raw + (align - 1)) & ~(align - 1);
|
||||
tmp = (u64)wq->compls_raw;
|
||||
tmp = (tmp + (align - 1)) & ~(align - 1);
|
||||
wq->compls = (struct dsa_completion_record *)tmp;
|
||||
|
||||
rc = alloc_descs(wq, num_descs);
|
||||
if (rc < 0)
|
||||
goto fail_alloc_descs;
|
||||
|
@ -189,8 +157,7 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
|
|||
fail_sbitmap_init:
|
||||
free_descs(wq);
|
||||
fail_alloc_descs:
|
||||
dma_free_coherent(dev, wq->compls_size, wq->compls_raw,
|
||||
wq->compls_addr_raw);
|
||||
dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
|
||||
fail_alloc_compls:
|
||||
free_hw_descs(wq);
|
||||
return rc;
|
||||
|
@ -205,8 +172,7 @@ void idxd_wq_free_resources(struct idxd_wq *wq)
|
|||
|
||||
free_hw_descs(wq);
|
||||
free_descs(wq);
|
||||
dma_free_coherent(dev, wq->compls_size, wq->compls_raw,
|
||||
wq->compls_addr_raw);
|
||||
dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
|
||||
sbitmap_queue_free(&wq->sbq);
|
||||
}
|
||||
|
||||
|
@ -234,7 +200,7 @@ int idxd_wq_enable(struct idxd_wq *wq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int idxd_wq_disable(struct idxd_wq *wq)
|
||||
int idxd_wq_disable(struct idxd_wq *wq, bool reset_config)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
|
@ -255,6 +221,8 @@ int idxd_wq_disable(struct idxd_wq *wq)
|
|||
return -ENXIO;
|
||||
}
|
||||
|
||||
if (reset_config)
|
||||
idxd_wq_disable_cleanup(wq);
|
||||
wq->state = IDXD_WQ_DISABLED;
|
||||
dev_dbg(dev, "WQ %d disabled\n", wq->id);
|
||||
return 0;
|
||||
|
@ -289,6 +257,7 @@ void idxd_wq_reset(struct idxd_wq *wq)
|
|||
|
||||
operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
|
||||
idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
|
||||
idxd_wq_disable_cleanup(wq);
|
||||
wq->state = IDXD_WQ_DISABLED;
|
||||
}
|
||||
|
||||
|
@ -315,6 +284,7 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq)
|
|||
|
||||
devm_iounmap(dev, wq->portal);
|
||||
wq->portal = NULL;
|
||||
wq->portal_offset = 0;
|
||||
}
|
||||
|
||||
void idxd_wqs_unmap_portal(struct idxd_device *idxd)
|
||||
|
@ -335,19 +305,18 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
|
|||
int rc;
|
||||
union wqcfg wqcfg;
|
||||
unsigned int offset;
|
||||
unsigned long flags;
|
||||
|
||||
rc = idxd_wq_disable(wq);
|
||||
rc = idxd_wq_disable(wq, false);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
|
||||
spin_lock_irqsave(&idxd->dev_lock, flags);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
|
||||
wqcfg.pasid_en = 1;
|
||||
wqcfg.pasid = pasid;
|
||||
iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
|
||||
spin_unlock_irqrestore(&idxd->dev_lock, flags);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
|
||||
rc = idxd_wq_enable(wq);
|
||||
if (rc < 0)
|
||||
|
@ -362,19 +331,18 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
|
|||
int rc;
|
||||
union wqcfg wqcfg;
|
||||
unsigned int offset;
|
||||
unsigned long flags;
|
||||
|
||||
rc = idxd_wq_disable(wq);
|
||||
rc = idxd_wq_disable(wq, false);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
|
||||
spin_lock_irqsave(&idxd->dev_lock, flags);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
|
||||
wqcfg.pasid_en = 0;
|
||||
wqcfg.pasid = 0;
|
||||
iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
|
||||
spin_unlock_irqrestore(&idxd->dev_lock, flags);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
|
||||
rc = idxd_wq_enable(wq);
|
||||
if (rc < 0)
|
||||
|
@ -383,20 +351,31 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void idxd_wq_disable_cleanup(struct idxd_wq *wq)
|
||||
static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
|
||||
lockdep_assert_held(&idxd->dev_lock);
|
||||
lockdep_assert_held(&wq->wq_lock);
|
||||
memset(wq->wqcfg, 0, idxd->wqcfg_size);
|
||||
wq->type = IDXD_WQT_NONE;
|
||||
wq->size = 0;
|
||||
wq->group = NULL;
|
||||
wq->threshold = 0;
|
||||
wq->priority = 0;
|
||||
wq->ats_dis = 0;
|
||||
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
|
||||
clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
|
||||
clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
|
||||
memset(wq->name, 0, WQ_NAME_SIZE);
|
||||
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
|
||||
wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
|
||||
}
|
||||
|
||||
static void idxd_wq_device_reset_cleanup(struct idxd_wq *wq)
|
||||
{
|
||||
lockdep_assert_held(&wq->wq_lock);
|
||||
|
||||
idxd_wq_disable_cleanup(wq);
|
||||
wq->size = 0;
|
||||
wq->group = NULL;
|
||||
}
|
||||
|
||||
static void idxd_wq_ref_release(struct percpu_ref *ref)
|
||||
|
@ -411,18 +390,29 @@ int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
|
|||
int rc;
|
||||
|
||||
memset(&wq->wq_active, 0, sizeof(wq->wq_active));
|
||||
rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
|
||||
rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release,
|
||||
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
reinit_completion(&wq->wq_dead);
|
||||
reinit_completion(&wq->wq_resurrect);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __idxd_wq_quiesce(struct idxd_wq *wq)
|
||||
{
|
||||
lockdep_assert_held(&wq->wq_lock);
|
||||
reinit_completion(&wq->wq_resurrect);
|
||||
percpu_ref_kill(&wq->wq_active);
|
||||
complete_all(&wq->wq_resurrect);
|
||||
wait_for_completion(&wq->wq_dead);
|
||||
}
|
||||
|
||||
void idxd_wq_quiesce(struct idxd_wq *wq)
|
||||
{
|
||||
percpu_ref_kill(&wq->wq_active);
|
||||
wait_for_completion(&wq->wq_dead);
|
||||
percpu_ref_exit(&wq->wq_active);
|
||||
mutex_lock(&wq->wq_lock);
|
||||
__idxd_wq_quiesce(wq);
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
}
|
||||
|
||||
/* Device control bits */
|
||||
|
@ -455,7 +445,6 @@ int idxd_device_init_reset(struct idxd_device *idxd)
|
|||
{
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
union idxd_command_reg cmd;
|
||||
unsigned long flags;
|
||||
|
||||
if (idxd_device_is_halted(idxd)) {
|
||||
dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
|
||||
|
@ -465,13 +454,13 @@ int idxd_device_init_reset(struct idxd_device *idxd)
|
|||
memset(&cmd, 0, sizeof(cmd));
|
||||
cmd.cmd = IDXD_CMD_RESET_DEVICE;
|
||||
dev_dbg(dev, "%s: sending reset for init.\n", __func__);
|
||||
spin_lock_irqsave(&idxd->cmd_lock, flags);
|
||||
spin_lock(&idxd->cmd_lock);
|
||||
iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
|
||||
|
||||
while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
|
||||
IDXD_CMDSTS_ACTIVE)
|
||||
cpu_relax();
|
||||
spin_unlock_irqrestore(&idxd->cmd_lock, flags);
|
||||
spin_unlock(&idxd->cmd_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -480,7 +469,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
|
|||
{
|
||||
union idxd_command_reg cmd;
|
||||
DECLARE_COMPLETION_ONSTACK(done);
|
||||
unsigned long flags;
|
||||
u32 stat;
|
||||
|
||||
if (idxd_device_is_halted(idxd)) {
|
||||
dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
|
||||
|
@ -494,7 +483,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
|
|||
cmd.operand = operand;
|
||||
cmd.int_req = 1;
|
||||
|
||||
spin_lock_irqsave(&idxd->cmd_lock, flags);
|
||||
spin_lock(&idxd->cmd_lock);
|
||||
wait_event_lock_irq(idxd->cmd_waitq,
|
||||
!test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
|
||||
idxd->cmd_lock);
|
||||
|
@ -511,18 +500,18 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
|
|||
* After command submitted, release lock and go to sleep until
|
||||
* the command completes via interrupt.
|
||||
*/
|
||||
spin_unlock_irqrestore(&idxd->cmd_lock, flags);
|
||||
spin_unlock(&idxd->cmd_lock);
|
||||
wait_for_completion(&done);
|
||||
spin_lock_irqsave(&idxd->cmd_lock, flags);
|
||||
if (status) {
|
||||
*status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
|
||||
idxd->cmd_status = *status & GENMASK(7, 0);
|
||||
}
|
||||
stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
|
||||
spin_lock(&idxd->cmd_lock);
|
||||
if (status)
|
||||
*status = stat;
|
||||
idxd->cmd_status = stat & GENMASK(7, 0);
|
||||
|
||||
__clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
|
||||
/* Wake up other pending commands */
|
||||
wake_up(&idxd->cmd_waitq);
|
||||
spin_unlock_irqrestore(&idxd->cmd_lock, flags);
|
||||
spin_unlock(&idxd->cmd_lock);
|
||||
}
|
||||
|
||||
int idxd_device_enable(struct idxd_device *idxd)
|
||||
|
@ -548,27 +537,10 @@ int idxd_device_enable(struct idxd_device *idxd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
void idxd_device_wqs_clear_state(struct idxd_device *idxd)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&idxd->dev_lock);
|
||||
|
||||
for (i = 0; i < idxd->max_wqs; i++) {
|
||||
struct idxd_wq *wq = idxd->wqs[i];
|
||||
|
||||
if (wq->state == IDXD_WQ_ENABLED) {
|
||||
idxd_wq_disable_cleanup(wq);
|
||||
wq->state = IDXD_WQ_DISABLED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int idxd_device_disable(struct idxd_device *idxd)
|
||||
{
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
u32 status;
|
||||
unsigned long flags;
|
||||
|
||||
if (!idxd_is_enabled(idxd)) {
|
||||
dev_dbg(dev, "Device is not enabled\n");
|
||||
|
@ -584,22 +556,21 @@ int idxd_device_disable(struct idxd_device *idxd)
|
|||
return -ENXIO;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&idxd->dev_lock, flags);
|
||||
idxd_device_wqs_clear_state(idxd);
|
||||
idxd->state = IDXD_DEV_CONF_READY;
|
||||
spin_unlock_irqrestore(&idxd->dev_lock, flags);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
idxd_device_clear_state(idxd);
|
||||
idxd->state = IDXD_DEV_DISABLED;
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void idxd_device_reset(struct idxd_device *idxd)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL);
|
||||
spin_lock_irqsave(&idxd->dev_lock, flags);
|
||||
idxd_device_wqs_clear_state(idxd);
|
||||
idxd->state = IDXD_DEV_CONF_READY;
|
||||
spin_unlock_irqrestore(&idxd->dev_lock, flags);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
idxd_device_clear_state(idxd);
|
||||
idxd->state = IDXD_DEV_DISABLED;
|
||||
idxd_unmask_error_interrupts(idxd);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
}
|
||||
|
||||
void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
|
||||
|
@ -649,7 +620,6 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
|
|||
struct device *dev = &idxd->pdev->dev;
|
||||
u32 operand, status;
|
||||
union idxd_command_reg cmd;
|
||||
unsigned long flags;
|
||||
|
||||
if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -667,13 +637,13 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
|
|||
|
||||
dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
|
||||
|
||||
spin_lock_irqsave(&idxd->cmd_lock, flags);
|
||||
spin_lock(&idxd->cmd_lock);
|
||||
iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
|
||||
|
||||
while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
|
||||
cpu_relax();
|
||||
status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
|
||||
spin_unlock_irqrestore(&idxd->cmd_lock, flags);
|
||||
spin_unlock(&idxd->cmd_lock);
|
||||
|
||||
if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
|
||||
dev_dbg(dev, "release int handle failed: %#x\n", status);
|
||||
|
@ -685,34 +655,63 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
|
|||
}
|
||||
|
||||
/* Device configuration bits */
|
||||
void idxd_msix_perm_setup(struct idxd_device *idxd)
|
||||
static void idxd_engines_clear_state(struct idxd_device *idxd)
|
||||
{
|
||||
union msix_perm mperm;
|
||||
int i, msixcnt;
|
||||
struct idxd_engine *engine;
|
||||
int i;
|
||||
|
||||
msixcnt = pci_msix_vec_count(idxd->pdev);
|
||||
if (msixcnt < 0)
|
||||
return;
|
||||
|
||||
mperm.bits = 0;
|
||||
mperm.pasid = idxd->pasid;
|
||||
mperm.pasid_en = device_pasid_enabled(idxd);
|
||||
for (i = 1; i < msixcnt; i++)
|
||||
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
|
||||
lockdep_assert_held(&idxd->dev_lock);
|
||||
for (i = 0; i < idxd->max_engines; i++) {
|
||||
engine = idxd->engines[i];
|
||||
engine->group = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void idxd_msix_perm_clear(struct idxd_device *idxd)
|
||||
static void idxd_groups_clear_state(struct idxd_device *idxd)
|
||||
{
|
||||
union msix_perm mperm;
|
||||
int i, msixcnt;
|
||||
struct idxd_group *group;
|
||||
int i;
|
||||
|
||||
msixcnt = pci_msix_vec_count(idxd->pdev);
|
||||
if (msixcnt < 0)
|
||||
return;
|
||||
lockdep_assert_held(&idxd->dev_lock);
|
||||
for (i = 0; i < idxd->max_groups; i++) {
|
||||
group = idxd->groups[i];
|
||||
memset(&group->grpcfg, 0, sizeof(group->grpcfg));
|
||||
group->num_engines = 0;
|
||||
group->num_wqs = 0;
|
||||
group->use_rdbuf_limit = false;
|
||||
group->rdbufs_allowed = 0;
|
||||
group->rdbufs_reserved = 0;
|
||||
if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) {
|
||||
group->tc_a = 1;
|
||||
group->tc_b = 1;
|
||||
} else {
|
||||
group->tc_a = -1;
|
||||
group->tc_b = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mperm.bits = 0;
|
||||
for (i = 1; i < msixcnt; i++)
|
||||
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + i * 8);
|
||||
static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
|
||||
{
|
||||
int i;
|
||||
|
||||
lockdep_assert_held(&idxd->dev_lock);
|
||||
for (i = 0; i < idxd->max_wqs; i++) {
|
||||
struct idxd_wq *wq = idxd->wqs[i];
|
||||
|
||||
if (wq->state == IDXD_WQ_ENABLED) {
|
||||
idxd_wq_disable_cleanup(wq);
|
||||
idxd_wq_device_reset_cleanup(wq);
|
||||
wq->state = IDXD_WQ_DISABLED;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void idxd_device_clear_state(struct idxd_device *idxd)
|
||||
{
|
||||
idxd_groups_clear_state(idxd);
|
||||
idxd_engines_clear_state(idxd);
|
||||
idxd_device_wqs_clear_state(idxd);
|
||||
}
|
||||
|
||||
static void idxd_group_config_write(struct idxd_group *group)
|
||||
|
@ -754,10 +753,10 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
|
|||
int i;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
|
||||
/* Setup bandwidth token limit */
|
||||
if (idxd->token_limit) {
|
||||
/* Setup bandwidth rdbuf limit */
|
||||
if (idxd->hw.gen_cap.config_en && idxd->rdbuf_limit) {
|
||||
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
|
||||
reg.token_limit = idxd->token_limit;
|
||||
reg.rdbuf_limit = idxd->rdbuf_limit;
|
||||
iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
|
||||
}
|
||||
|
||||
|
@ -773,6 +772,15 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
|
|||
return 0;
|
||||
}
|
||||
|
||||
static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd)
|
||||
{
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
|
||||
if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int idxd_wq_config_write(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
|
@ -792,19 +800,16 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
|
|||
wq->wqcfg->bits[i] = ioread32(idxd->reg_base + wq_offset);
|
||||
}
|
||||
|
||||
if (wq->size == 0 && wq->type != IDXD_WQT_NONE)
|
||||
wq->size = WQ_DEFAULT_QUEUE_DEPTH;
|
||||
|
||||
/* byte 0-3 */
|
||||
wq->wqcfg->wq_size = wq->size;
|
||||
|
||||
if (wq->size == 0) {
|
||||
dev_warn(dev, "Incorrect work queue size: 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* bytes 4-7 */
|
||||
wq->wqcfg->wq_thresh = wq->threshold;
|
||||
|
||||
/* byte 8-11 */
|
||||
wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
|
||||
if (wq_dedicated(wq))
|
||||
wq->wqcfg->mode = 1;
|
||||
|
||||
|
@ -814,6 +819,25 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
|
|||
wq->wqcfg->pasid = idxd->pasid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Here the priv bit is set depending on the WQ type. priv = 1 if the
|
||||
* WQ type is kernel to indicate privileged access. This setting only
|
||||
* matters for dedicated WQ. According to the DSA spec:
|
||||
* If the WQ is in dedicated mode, WQ PASID Enable is 1, and the
|
||||
* Privileged Mode Enable field of the PCI Express PASID capability
|
||||
* is 0, this field must be 0.
|
||||
*
|
||||
* In the case of a dedicated kernel WQ that is not able to support
|
||||
* the PASID cap, then the configuration will be rejected.
|
||||
*/
|
||||
wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
|
||||
if (wq_dedicated(wq) && wq->wqcfg->pasid_en &&
|
||||
!idxd_device_pasid_priv_enabled(idxd) &&
|
||||
wq->type == IDXD_WQT_KERNEL) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV;
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
wq->wqcfg->priority = wq->priority;
|
||||
|
||||
if (idxd->hw.gen_cap.block_on_fault &&
|
||||
|
@ -870,13 +894,12 @@ static void idxd_group_flags_setup(struct idxd_device *idxd)
|
|||
group->tc_b = group->grpcfg.flags.tc_b = 1;
|
||||
else
|
||||
group->grpcfg.flags.tc_b = group->tc_b;
|
||||
group->grpcfg.flags.use_token_limit = group->use_token_limit;
|
||||
group->grpcfg.flags.tokens_reserved = group->tokens_reserved;
|
||||
if (group->tokens_allowed)
|
||||
group->grpcfg.flags.tokens_allowed =
|
||||
group->tokens_allowed;
|
||||
group->grpcfg.flags.use_rdbuf_limit = group->use_rdbuf_limit;
|
||||
group->grpcfg.flags.rdbufs_reserved = group->rdbufs_reserved;
|
||||
if (group->rdbufs_allowed)
|
||||
group->grpcfg.flags.rdbufs_allowed = group->rdbufs_allowed;
|
||||
else
|
||||
group->grpcfg.flags.tokens_allowed = idxd->max_tokens;
|
||||
group->grpcfg.flags.rdbufs_allowed = idxd->max_rdbufs;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -927,10 +950,9 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
|
|||
|
||||
if (!wq->group)
|
||||
continue;
|
||||
if (!wq->size)
|
||||
continue;
|
||||
|
||||
if (wq_shared(wq) && !device_swq_supported(idxd)) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
|
||||
dev_warn(dev, "No shared wq support but configured.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@ -939,8 +961,10 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
|
|||
configured++;
|
||||
}
|
||||
|
||||
if (configured == 0)
|
||||
if (configured == 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -983,8 +1007,6 @@ static int idxd_wq_load_config(struct idxd_wq *wq)
|
|||
|
||||
wq->size = wq->wqcfg->wq_size;
|
||||
wq->threshold = wq->wqcfg->wq_thresh;
|
||||
if (wq->wqcfg->priv)
|
||||
wq->type = IDXD_WQT_KERNEL;
|
||||
|
||||
/* The driver does not support shared WQ mode in read-only config yet */
|
||||
if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en)
|
||||
|
@ -1068,7 +1090,7 @@ int idxd_device_load_config(struct idxd_device *idxd)
|
|||
int i, rc;
|
||||
|
||||
reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
|
||||
idxd->token_limit = reg.token_limit;
|
||||
idxd->rdbuf_limit = reg.rdbuf_limit;
|
||||
|
||||
for (i = 0; i < idxd->max_groups; i++) {
|
||||
struct idxd_group *group = idxd->groups[i];
|
||||
|
@ -1086,3 +1108,303 @@ int idxd_device_load_config(struct idxd_device *idxd)
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void idxd_flush_pending_descs(struct idxd_irq_entry *ie)
|
||||
{
|
||||
struct idxd_desc *desc, *itr;
|
||||
struct llist_node *head;
|
||||
LIST_HEAD(flist);
|
||||
enum idxd_complete_type ctype;
|
||||
|
||||
spin_lock(&ie->list_lock);
|
||||
head = llist_del_all(&ie->pending_llist);
|
||||
if (head) {
|
||||
llist_for_each_entry_safe(desc, itr, head, llnode)
|
||||
list_add_tail(&desc->list, &ie->work_list);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(desc, itr, &ie->work_list, list)
|
||||
list_move_tail(&desc->list, &flist);
|
||||
spin_unlock(&ie->list_lock);
|
||||
|
||||
list_for_each_entry_safe(desc, itr, &flist, list) {
|
||||
list_del(&desc->list);
|
||||
ctype = desc->completion->status ? IDXD_COMPLETE_NORMAL : IDXD_COMPLETE_ABORT;
|
||||
idxd_dma_complete_txd(desc, ctype, true);
|
||||
}
|
||||
}
|
||||
|
||||
static void idxd_device_set_perm_entry(struct idxd_device *idxd,
|
||||
struct idxd_irq_entry *ie)
|
||||
{
|
||||
union msix_perm mperm;
|
||||
|
||||
if (ie->pasid == INVALID_IOASID)
|
||||
return;
|
||||
|
||||
mperm.bits = 0;
|
||||
mperm.pasid = ie->pasid;
|
||||
mperm.pasid_en = 1;
|
||||
iowrite32(mperm.bits, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
|
||||
}
|
||||
|
||||
static void idxd_device_clear_perm_entry(struct idxd_device *idxd,
|
||||
struct idxd_irq_entry *ie)
|
||||
{
|
||||
iowrite32(0, idxd->reg_base + idxd->msix_perm_offset + ie->id * 8);
|
||||
}
|
||||
|
||||
void idxd_wq_free_irq(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct idxd_irq_entry *ie = &wq->ie;
|
||||
|
||||
synchronize_irq(ie->vector);
|
||||
free_irq(ie->vector, ie);
|
||||
idxd_flush_pending_descs(ie);
|
||||
if (idxd->request_int_handles)
|
||||
idxd_device_release_int_handle(idxd, ie->int_handle, IDXD_IRQ_MSIX);
|
||||
idxd_device_clear_perm_entry(idxd, ie);
|
||||
ie->vector = -1;
|
||||
ie->int_handle = INVALID_INT_HANDLE;
|
||||
ie->pasid = INVALID_IOASID;
|
||||
}
|
||||
|
||||
int idxd_wq_request_irq(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct idxd_irq_entry *ie;
|
||||
int rc;
|
||||
|
||||
ie = &wq->ie;
|
||||
ie->vector = pci_irq_vector(pdev, ie->id);
|
||||
ie->pasid = device_pasid_enabled(idxd) ? idxd->pasid : INVALID_IOASID;
|
||||
idxd_device_set_perm_entry(idxd, ie);
|
||||
|
||||
rc = request_threaded_irq(ie->vector, NULL, idxd_wq_thread, 0, "idxd-portal", ie);
|
||||
if (rc < 0) {
|
||||
dev_err(dev, "Failed to request irq %d.\n", ie->vector);
|
||||
goto err_irq;
|
||||
}
|
||||
|
||||
if (idxd->request_int_handles) {
|
||||
rc = idxd_device_request_int_handle(idxd, ie->id, &ie->int_handle,
|
||||
IDXD_IRQ_MSIX);
|
||||
if (rc < 0)
|
||||
goto err_int_handle;
|
||||
} else {
|
||||
ie->int_handle = ie->id;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_int_handle:
|
||||
ie->int_handle = INVALID_INT_HANDLE;
|
||||
free_irq(ie->vector, ie);
|
||||
err_irq:
|
||||
idxd_device_clear_perm_entry(idxd, ie);
|
||||
ie->pasid = INVALID_IOASID;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int __drv_enable_wq(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
int rc = -ENXIO;
|
||||
|
||||
lockdep_assert_held(&wq->wq_lock);
|
||||
|
||||
if (idxd->state != IDXD_DEV_ENABLED) {
|
||||
idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (wq->state != IDXD_WQ_DISABLED) {
|
||||
dev_dbg(dev, "wq %d already enabled.\n", wq->id);
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_ENABLED;
|
||||
rc = -EBUSY;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (!wq->group) {
|
||||
dev_dbg(dev, "wq %d not attached to group.\n", wq->id);
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (strlen(wq->name) == 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME;
|
||||
dev_dbg(dev, "wq %d name not set.\n", wq->id);
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* Shared WQ checks */
|
||||
if (wq_shared(wq)) {
|
||||
if (!device_swq_supported(idxd)) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM;
|
||||
dev_dbg(dev, "PASID not enabled and shared wq.\n");
|
||||
goto err;
|
||||
}
|
||||
/*
|
||||
* Shared wq with the threshold set to 0 means the user
|
||||
* did not set the threshold or transitioned from a
|
||||
* dedicated wq but did not set threshold. A value
|
||||
* of 0 would effectively disable the shared wq. The
|
||||
* driver does not allow a value of 0 to be set for
|
||||
* threshold via sysfs.
|
||||
*/
|
||||
if (wq->threshold == 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH;
|
||||
dev_dbg(dev, "Shared wq and threshold 0.\n");
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
rc = 0;
|
||||
spin_lock(&idxd->dev_lock);
|
||||
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
|
||||
rc = idxd_device_config(idxd);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
if (rc < 0) {
|
||||
dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = idxd_wq_enable(wq);
|
||||
if (rc < 0) {
|
||||
dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc);
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = idxd_wq_map_portal(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR;
|
||||
dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc);
|
||||
goto err_map_portal;
|
||||
}
|
||||
|
||||
wq->client_count = 0;
|
||||
return 0;
|
||||
|
||||
err_map_portal:
|
||||
rc = idxd_wq_disable(wq, false);
|
||||
if (rc < 0)
|
||||
dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
|
||||
err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
int drv_enable_wq(struct idxd_wq *wq)
|
||||
{
|
||||
int rc;
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
rc = __drv_enable_wq(wq);
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
void __drv_disable_wq(struct idxd_wq *wq)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
|
||||
lockdep_assert_held(&wq->wq_lock);
|
||||
|
||||
if (idxd_wq_refcount(wq))
|
||||
dev_warn(dev, "Clients has claim on wq %d: %d\n",
|
||||
wq->id, idxd_wq_refcount(wq));
|
||||
|
||||
idxd_wq_unmap_portal(wq);
|
||||
|
||||
idxd_wq_drain(wq);
|
||||
idxd_wq_reset(wq);
|
||||
|
||||
wq->client_count = 0;
|
||||
}
|
||||
|
||||
void drv_disable_wq(struct idxd_wq *wq)
|
||||
{
|
||||
mutex_lock(&wq->wq_lock);
|
||||
__drv_disable_wq(wq);
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
}
|
||||
|
||||
int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
|
||||
int rc = 0;
|
||||
|
||||
/*
|
||||
* Device should be in disabled state for the idxd_drv to load. If it's in
|
||||
* enabled state, then the device was altered outside of driver's control.
|
||||
* If the state is in halted state, then we don't want to proceed.
|
||||
*/
|
||||
if (idxd->state != IDXD_DEV_DISABLED) {
|
||||
idxd->cmd_status = IDXD_SCMD_DEV_ENABLED;
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
/* Device configuration */
|
||||
spin_lock(&idxd->dev_lock);
|
||||
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
|
||||
rc = idxd_device_config(idxd);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
if (rc < 0)
|
||||
return -ENXIO;
|
||||
|
||||
/* Start device */
|
||||
rc = idxd_device_enable(idxd);
|
||||
if (rc < 0)
|
||||
return rc;
|
||||
|
||||
/* Setup DMA device without channels */
|
||||
rc = idxd_register_dma_device(idxd);
|
||||
if (rc < 0) {
|
||||
idxd_device_disable(idxd);
|
||||
idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR;
|
||||
return rc;
|
||||
}
|
||||
|
||||
idxd->cmd_status = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void idxd_device_drv_remove(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct device *dev = &idxd_dev->conf_dev;
|
||||
struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < idxd->max_wqs; i++) {
|
||||
struct idxd_wq *wq = idxd->wqs[i];
|
||||
struct device *wq_dev = wq_confdev(wq);
|
||||
|
||||
if (wq->state == IDXD_WQ_DISABLED)
|
||||
continue;
|
||||
dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev));
|
||||
device_release_driver(wq_dev);
|
||||
}
|
||||
|
||||
idxd_unregister_dma_device(idxd);
|
||||
idxd_device_disable(idxd);
|
||||
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
|
||||
idxd_device_reset(idxd);
|
||||
}
|
||||
|
||||
static enum idxd_dev_type dev_types[] = {
|
||||
IDXD_DEV_DSA,
|
||||
IDXD_DEV_IAX,
|
||||
IDXD_DEV_NONE,
|
||||
};
|
||||
|
||||
struct idxd_device_driver idxd_drv = {
|
||||
.type = dev_types,
|
||||
.probe = idxd_device_drv_probe,
|
||||
.remove = idxd_device_drv_remove,
|
||||
.name = "idxd",
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(idxd_drv);
|
||||
|
|
|
@ -21,20 +21,27 @@ static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
|
|||
}
|
||||
|
||||
void idxd_dma_complete_txd(struct idxd_desc *desc,
|
||||
enum idxd_complete_type comp_type)
|
||||
enum idxd_complete_type comp_type,
|
||||
bool free_desc)
|
||||
{
|
||||
struct idxd_device *idxd = desc->wq->idxd;
|
||||
struct dma_async_tx_descriptor *tx;
|
||||
struct dmaengine_result res;
|
||||
int complete = 1;
|
||||
|
||||
if (desc->completion->status == DSA_COMP_SUCCESS)
|
||||
if (desc->completion->status == DSA_COMP_SUCCESS) {
|
||||
res.result = DMA_TRANS_NOERROR;
|
||||
else if (desc->completion->status)
|
||||
} else if (desc->completion->status) {
|
||||
if (idxd->request_int_handles && comp_type != IDXD_COMPLETE_ABORT &&
|
||||
desc->completion->status == DSA_COMP_INT_HANDLE_INVAL &&
|
||||
idxd_queue_int_handle_resubmit(desc))
|
||||
return;
|
||||
res.result = DMA_TRANS_WRITE_FAILED;
|
||||
else if (comp_type == IDXD_COMPLETE_ABORT)
|
||||
} else if (comp_type == IDXD_COMPLETE_ABORT) {
|
||||
res.result = DMA_TRANS_ABORTED;
|
||||
else
|
||||
} else {
|
||||
complete = 0;
|
||||
}
|
||||
|
||||
tx = &desc->txd;
|
||||
if (complete && tx->cookie) {
|
||||
|
@ -44,6 +51,9 @@ void idxd_dma_complete_txd(struct idxd_desc *desc,
|
|||
tx->callback = NULL;
|
||||
tx->callback_result = NULL;
|
||||
}
|
||||
|
||||
if (free_desc)
|
||||
idxd_free_desc(desc->wq, desc);
|
||||
}
|
||||
|
||||
static void op_flag_setup(unsigned long flags, u32 *desc_flags)
|
||||
|
@ -69,7 +79,11 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
|
|||
hw->src_addr = addr_f1;
|
||||
hw->dst_addr = addr_f2;
|
||||
hw->xfer_size = len;
|
||||
hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
|
||||
/*
|
||||
* For dedicated WQ, this field is ignored and HW will use the WQCFG.priv
|
||||
* field instead. This field should be set to 1 for kernel descriptors.
|
||||
*/
|
||||
hw->priv = 1;
|
||||
hw->completion_addr = compl;
|
||||
}
|
||||
|
||||
|
@ -245,7 +259,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq)
|
|||
|
||||
wq->idxd_chan = idxd_chan;
|
||||
idxd_chan->wq = wq;
|
||||
get_device(&wq->conf_dev);
|
||||
get_device(wq_confdev(wq));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -260,5 +274,100 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq)
|
|||
list_del(&chan->device_node);
|
||||
kfree(wq->idxd_chan);
|
||||
wq->idxd_chan = NULL;
|
||||
put_device(&wq->conf_dev);
|
||||
put_device(wq_confdev(wq));
|
||||
}
|
||||
|
||||
static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct device *dev = &idxd_dev->conf_dev;
|
||||
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
int rc;
|
||||
|
||||
if (idxd->state != IDXD_DEV_ENABLED)
|
||||
return -ENXIO;
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
wq->type = IDXD_WQT_KERNEL;
|
||||
|
||||
rc = idxd_wq_request_irq(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_IRQ_ERR;
|
||||
dev_dbg(dev, "WQ %d irq setup failed: %d\n", wq->id, rc);
|
||||
goto err_irq;
|
||||
}
|
||||
|
||||
rc = __drv_enable_wq(wq);
|
||||
if (rc < 0) {
|
||||
dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
|
||||
rc = -ENXIO;
|
||||
goto err;
|
||||
}
|
||||
|
||||
rc = idxd_wq_alloc_resources(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR;
|
||||
dev_dbg(dev, "WQ resource alloc failed\n");
|
||||
goto err_res_alloc;
|
||||
}
|
||||
|
||||
rc = idxd_wq_init_percpu_ref(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_PERCPU_ERR;
|
||||
dev_dbg(dev, "percpu_ref setup failed\n");
|
||||
goto err_ref;
|
||||
}
|
||||
|
||||
rc = idxd_register_dma_channel(wq);
|
||||
if (rc < 0) {
|
||||
idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR;
|
||||
dev_dbg(dev, "Failed to register dma channel\n");
|
||||
goto err_dma;
|
||||
}
|
||||
|
||||
idxd->cmd_status = 0;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
return 0;
|
||||
|
||||
err_dma:
|
||||
__idxd_wq_quiesce(wq);
|
||||
percpu_ref_exit(&wq->wq_active);
|
||||
err_ref:
|
||||
idxd_wq_free_resources(wq);
|
||||
err_res_alloc:
|
||||
__drv_disable_wq(wq);
|
||||
err:
|
||||
idxd_wq_free_irq(wq);
|
||||
err_irq:
|
||||
wq->type = IDXD_WQT_NONE;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
__idxd_wq_quiesce(wq);
|
||||
idxd_unregister_dma_channel(wq);
|
||||
idxd_wq_free_resources(wq);
|
||||
__drv_disable_wq(wq);
|
||||
percpu_ref_exit(&wq->wq_active);
|
||||
idxd_wq_free_irq(wq);
|
||||
wq->type = IDXD_WQT_NONE;
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
}
|
||||
|
||||
static enum idxd_dev_type dev_types[] = {
|
||||
IDXD_DEV_WQ,
|
||||
IDXD_DEV_NONE,
|
||||
};
|
||||
|
||||
struct idxd_device_driver idxd_dmaengine_drv = {
|
||||
.probe = idxd_dmaengine_drv_probe,
|
||||
.remove = idxd_dmaengine_drv_remove,
|
||||
.name = "dmaengine",
|
||||
.type = dev_types,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(idxd_dmaengine_drv);
|
||||
|
|
|
@ -10,15 +10,34 @@
|
|||
#include <linux/cdev.h>
|
||||
#include <linux/idr.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/ioasid.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <uapi/linux/idxd.h>
|
||||
#include "registers.h"
|
||||
|
||||
#define IDXD_DRIVER_VERSION "1.00"
|
||||
|
||||
extern struct kmem_cache *idxd_desc_pool;
|
||||
extern bool tc_override;
|
||||
|
||||
struct idxd_device;
|
||||
struct idxd_wq;
|
||||
struct idxd_dev;
|
||||
|
||||
enum idxd_dev_type {
|
||||
IDXD_DEV_NONE = -1,
|
||||
IDXD_DEV_DSA = 0,
|
||||
IDXD_DEV_IAX,
|
||||
IDXD_DEV_WQ,
|
||||
IDXD_DEV_GROUP,
|
||||
IDXD_DEV_ENGINE,
|
||||
IDXD_DEV_CDEV,
|
||||
IDXD_DEV_MAX_TYPE,
|
||||
};
|
||||
|
||||
struct idxd_dev {
|
||||
struct device conf_dev;
|
||||
enum idxd_dev_type type;
|
||||
};
|
||||
|
||||
#define IDXD_REG_TIMEOUT 50
|
||||
#define IDXD_DRAIN_TIMEOUT 5000
|
||||
|
@ -33,12 +52,24 @@ enum idxd_type {
|
|||
#define IDXD_NAME_SIZE 128
|
||||
#define IDXD_PMU_EVENT_MAX 64
|
||||
|
||||
#define IDXD_ENQCMDS_RETRIES 32
|
||||
#define IDXD_ENQCMDS_MAX_RETRIES 64
|
||||
|
||||
struct idxd_device_driver {
|
||||
const char *name;
|
||||
enum idxd_dev_type *type;
|
||||
int (*probe)(struct idxd_dev *idxd_dev);
|
||||
void (*remove)(struct idxd_dev *idxd_dev);
|
||||
struct device_driver drv;
|
||||
};
|
||||
|
||||
extern struct idxd_device_driver dsa_drv;
|
||||
extern struct idxd_device_driver idxd_drv;
|
||||
extern struct idxd_device_driver idxd_dmaengine_drv;
|
||||
extern struct idxd_device_driver idxd_user_drv;
|
||||
|
||||
#define INVALID_INT_HANDLE -1
|
||||
struct idxd_irq_entry {
|
||||
struct idxd_device *idxd;
|
||||
int id;
|
||||
int vector;
|
||||
struct llist_head pending_llist;
|
||||
|
@ -48,18 +79,20 @@ struct idxd_irq_entry {
|
|||
* and irq thread processing error descriptor.
|
||||
*/
|
||||
spinlock_t list_lock;
|
||||
int int_handle;
|
||||
ioasid_t pasid;
|
||||
};
|
||||
|
||||
struct idxd_group {
|
||||
struct device conf_dev;
|
||||
struct idxd_dev idxd_dev;
|
||||
struct idxd_device *idxd;
|
||||
struct grpcfg grpcfg;
|
||||
int id;
|
||||
int num_engines;
|
||||
int num_wqs;
|
||||
bool use_token_limit;
|
||||
u8 tokens_allowed;
|
||||
u8 tokens_reserved;
|
||||
bool use_rdbuf_limit;
|
||||
u8 rdbufs_allowed;
|
||||
u8 rdbufs_reserved;
|
||||
int tc_a;
|
||||
int tc_b;
|
||||
};
|
||||
|
@ -110,7 +143,7 @@ enum idxd_wq_type {
|
|||
struct idxd_cdev {
|
||||
struct idxd_wq *wq;
|
||||
struct cdev cdev;
|
||||
struct device dev;
|
||||
struct idxd_dev idxd_dev;
|
||||
int minor;
|
||||
};
|
||||
|
||||
|
@ -118,6 +151,10 @@ struct idxd_cdev {
|
|||
#define WQ_NAME_SIZE 1024
|
||||
#define WQ_TYPE_SIZE 10
|
||||
|
||||
#define WQ_DEFAULT_QUEUE_DEPTH 16
|
||||
#define WQ_DEFAULT_MAX_XFER SZ_2M
|
||||
#define WQ_DEFAULT_MAX_BATCH 32
|
||||
|
||||
enum idxd_op_type {
|
||||
IDXD_OP_BLOCK = 0,
|
||||
IDXD_OP_NONBLOCK = 1,
|
||||
|
@ -136,13 +173,17 @@ struct idxd_dma_chan {
|
|||
|
||||
struct idxd_wq {
|
||||
void __iomem *portal;
|
||||
u32 portal_offset;
|
||||
unsigned int enqcmds_retries;
|
||||
struct percpu_ref wq_active;
|
||||
struct completion wq_dead;
|
||||
struct device conf_dev;
|
||||
struct completion wq_resurrect;
|
||||
struct idxd_dev idxd_dev;
|
||||
struct idxd_cdev *idxd_cdev;
|
||||
struct wait_queue_head err_queue;
|
||||
struct idxd_device *idxd;
|
||||
int id;
|
||||
struct idxd_irq_entry ie;
|
||||
enum idxd_wq_type type;
|
||||
struct idxd_group *group;
|
||||
int client_count;
|
||||
|
@ -153,16 +194,13 @@ struct idxd_wq {
|
|||
enum idxd_wq_state state;
|
||||
unsigned long flags;
|
||||
union wqcfg *wqcfg;
|
||||
u32 vec_ptr; /* interrupt steering */
|
||||
struct dsa_hw_desc **hw_descs;
|
||||
int num_descs;
|
||||
union {
|
||||
struct dsa_completion_record *compls;
|
||||
struct iax_completion_record *iax_compls;
|
||||
};
|
||||
void *compls_raw;
|
||||
dma_addr_t compls_addr;
|
||||
dma_addr_t compls_addr_raw;
|
||||
int compls_size;
|
||||
struct idxd_desc **descs;
|
||||
struct sbitmap_queue sbq;
|
||||
|
@ -174,7 +212,7 @@ struct idxd_wq {
|
|||
};
|
||||
|
||||
struct idxd_engine {
|
||||
struct device conf_dev;
|
||||
struct idxd_dev idxd_dev;
|
||||
int id;
|
||||
struct idxd_group *group;
|
||||
struct idxd_device *idxd;
|
||||
|
@ -194,7 +232,6 @@ struct idxd_hw {
|
|||
enum idxd_device_state {
|
||||
IDXD_DEV_HALTED = -1,
|
||||
IDXD_DEV_DISABLED = 0,
|
||||
IDXD_DEV_CONF_READY,
|
||||
IDXD_DEV_ENABLED,
|
||||
};
|
||||
|
||||
|
@ -218,7 +255,7 @@ struct idxd_driver_data {
|
|||
};
|
||||
|
||||
struct idxd_device {
|
||||
struct device conf_dev;
|
||||
struct idxd_dev idxd_dev;
|
||||
struct idxd_driver_data *data;
|
||||
struct list_head list;
|
||||
struct idxd_hw hw;
|
||||
|
@ -226,7 +263,8 @@ struct idxd_device {
|
|||
unsigned long flags;
|
||||
int id;
|
||||
int major;
|
||||
u8 cmd_status;
|
||||
u32 cmd_status;
|
||||
struct idxd_irq_entry ie; /* misc irq, msix 0 */
|
||||
|
||||
struct pci_dev *pdev;
|
||||
void __iomem *reg_base;
|
||||
|
@ -242,6 +280,8 @@ struct idxd_device {
|
|||
unsigned int pasid;
|
||||
|
||||
int num_groups;
|
||||
int irq_cnt;
|
||||
bool request_int_handles;
|
||||
|
||||
u32 msix_perm_offset;
|
||||
u32 wqcfg_offset;
|
||||
|
@ -252,24 +292,20 @@ struct idxd_device {
|
|||
u32 max_batch_size;
|
||||
int max_groups;
|
||||
int max_engines;
|
||||
int max_tokens;
|
||||
int max_rdbufs;
|
||||
int max_wqs;
|
||||
int max_wq_size;
|
||||
int token_limit;
|
||||
int nr_tokens; /* non-reserved tokens */
|
||||
int rdbuf_limit;
|
||||
int nr_rdbufs; /* non-reserved read buffers */
|
||||
unsigned int wqcfg_size;
|
||||
|
||||
union sw_err_reg sw_err;
|
||||
wait_queue_head_t cmd_waitq;
|
||||
int num_wq_irqs;
|
||||
struct idxd_irq_entry *irq_entries;
|
||||
|
||||
struct idxd_dma_dev *idxd_dma;
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
|
||||
int *int_handles;
|
||||
|
||||
struct idxd_pmu *idxd_pmu;
|
||||
};
|
||||
|
||||
|
@ -290,7 +326,6 @@ struct idxd_desc {
|
|||
struct list_head list;
|
||||
int id;
|
||||
int cpu;
|
||||
unsigned int vector;
|
||||
struct idxd_wq *wq;
|
||||
};
|
||||
|
||||
|
@ -302,11 +337,77 @@ enum idxd_completion_status {
|
|||
IDXD_COMP_DESC_ABORT = 0xff,
|
||||
};
|
||||
|
||||
#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
|
||||
#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
|
||||
#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev
|
||||
#define wq_confdev(wq) &wq->idxd_dev.conf_dev
|
||||
#define engine_confdev(engine) &engine->idxd_dev.conf_dev
|
||||
#define group_confdev(group) &group->idxd_dev.conf_dev
|
||||
#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev
|
||||
|
||||
#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev)
|
||||
#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev)
|
||||
#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev)
|
||||
|
||||
static inline struct idxd_device *confdev_to_idxd(struct device *dev)
|
||||
{
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return idxd_dev_to_idxd(idxd_dev);
|
||||
}
|
||||
|
||||
static inline struct idxd_wq *confdev_to_wq(struct device *dev)
|
||||
{
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return idxd_dev_to_wq(idxd_dev);
|
||||
}
|
||||
|
||||
static inline struct idxd_engine *confdev_to_engine(struct device *dev)
|
||||
{
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return container_of(idxd_dev, struct idxd_engine, idxd_dev);
|
||||
}
|
||||
|
||||
static inline struct idxd_group *confdev_to_group(struct device *dev)
|
||||
{
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return container_of(idxd_dev, struct idxd_group, idxd_dev);
|
||||
}
|
||||
|
||||
static inline struct idxd_cdev *dev_to_cdev(struct device *dev)
|
||||
{
|
||||
struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
|
||||
|
||||
return container_of(idxd_dev, struct idxd_cdev, idxd_dev);
|
||||
}
|
||||
|
||||
static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
|
||||
{
|
||||
if (type >= IDXD_DEV_MAX_TYPE) {
|
||||
idev->type = IDXD_DEV_NONE;
|
||||
return;
|
||||
}
|
||||
|
||||
idev->type = type;
|
||||
}
|
||||
|
||||
static inline struct idxd_irq_entry *idxd_get_ie(struct idxd_device *idxd, int idx)
|
||||
{
|
||||
return (idx == 0) ? &idxd->ie : &idxd->wqs[idx - 1]->ie;
|
||||
}
|
||||
|
||||
static inline struct idxd_wq *ie_to_wq(struct idxd_irq_entry *ie)
|
||||
{
|
||||
return container_of(ie, struct idxd_wq, ie);
|
||||
}
|
||||
|
||||
static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie)
|
||||
{
|
||||
return container_of(ie, struct idxd_device, ie);
|
||||
}
|
||||
|
||||
extern struct bus_type dsa_bus_type;
|
||||
extern struct bus_type iax_bus_type;
|
||||
|
||||
extern bool support_enqcmd;
|
||||
extern struct ida idxd_ida;
|
||||
|
@ -316,24 +417,24 @@ extern struct device_type idxd_wq_device_type;
|
|||
extern struct device_type idxd_engine_device_type;
|
||||
extern struct device_type idxd_group_device_type;
|
||||
|
||||
static inline bool is_dsa_dev(struct device *dev)
|
||||
static inline bool is_dsa_dev(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
return dev->type == &dsa_device_type;
|
||||
return idxd_dev->type == IDXD_DEV_DSA;
|
||||
}
|
||||
|
||||
static inline bool is_iax_dev(struct device *dev)
|
||||
static inline bool is_iax_dev(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
return dev->type == &iax_device_type;
|
||||
return idxd_dev->type == IDXD_DEV_IAX;
|
||||
}
|
||||
|
||||
static inline bool is_idxd_dev(struct device *dev)
|
||||
static inline bool is_idxd_dev(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
return is_dsa_dev(dev) || is_iax_dev(dev);
|
||||
return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev);
|
||||
}
|
||||
|
||||
static inline bool is_idxd_wq_dev(struct device *dev)
|
||||
static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev)
|
||||
{
|
||||
return dev->type == &idxd_wq_device_type;
|
||||
return idxd_dev->type == IDXD_DEV_WQ;
|
||||
}
|
||||
|
||||
static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
|
||||
|
@ -343,11 +444,16 @@ static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
|
|||
return false;
|
||||
}
|
||||
|
||||
static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
|
||||
static inline bool is_idxd_wq_user(struct idxd_wq *wq)
|
||||
{
|
||||
return wq->type == IDXD_WQT_USER;
|
||||
}
|
||||
|
||||
static inline bool is_idxd_wq_kernel(struct idxd_wq *wq)
|
||||
{
|
||||
return wq->type == IDXD_WQT_KERNEL;
|
||||
}
|
||||
|
||||
static inline bool wq_dedicated(struct idxd_wq *wq)
|
||||
{
|
||||
return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
|
||||
|
@ -389,6 +495,24 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id,
|
|||
return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
|
||||
}
|
||||
|
||||
#define IDXD_PORTAL_MASK (PAGE_SIZE - 1)
|
||||
|
||||
/*
|
||||
* Even though this function can be accessed by multiple threads, it is safe to use.
|
||||
* At worst the address gets used more than once before it gets incremented. We don't
|
||||
* hit a threshold until iops becomes many million times a second. So the occasional
|
||||
* reuse of the same address is tolerable compare to using an atomic variable. This is
|
||||
* safe on a system that has atomic load/store for 32bit integers. Given that this is an
|
||||
* Intel iEP device, that should not be a problem.
|
||||
*/
|
||||
static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq)
|
||||
{
|
||||
int ofs = wq->portal_offset;
|
||||
|
||||
wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK;
|
||||
return wq->portal + ofs;
|
||||
}
|
||||
|
||||
static inline void idxd_wq_get(struct idxd_wq *wq)
|
||||
{
|
||||
wq->client_count++;
|
||||
|
@ -404,6 +528,16 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq)
|
|||
return wq->client_count;
|
||||
};
|
||||
|
||||
int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv,
|
||||
struct module *module, const char *mod_name);
|
||||
#define idxd_driver_register(driver) \
|
||||
__idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
|
||||
|
||||
void idxd_driver_unregister(struct idxd_device_driver *idxd_drv);
|
||||
|
||||
#define module_idxd_driver(__idxd_driver) \
|
||||
module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister)
|
||||
|
||||
int idxd_register_bus_type(void);
|
||||
void idxd_unregister_bus_type(void);
|
||||
int idxd_register_devices(struct idxd_device *idxd);
|
||||
|
@ -411,26 +545,29 @@ void idxd_unregister_devices(struct idxd_device *idxd);
|
|||
int idxd_register_driver(void);
|
||||
void idxd_unregister_driver(void);
|
||||
void idxd_wqs_quiesce(struct idxd_device *idxd);
|
||||
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
|
||||
|
||||
/* device interrupt control */
|
||||
void idxd_msix_perm_setup(struct idxd_device *idxd);
|
||||
void idxd_msix_perm_clear(struct idxd_device *idxd);
|
||||
irqreturn_t idxd_misc_thread(int vec, void *data);
|
||||
irqreturn_t idxd_wq_thread(int irq, void *data);
|
||||
void idxd_mask_error_interrupts(struct idxd_device *idxd);
|
||||
void idxd_unmask_error_interrupts(struct idxd_device *idxd);
|
||||
void idxd_mask_msix_vectors(struct idxd_device *idxd);
|
||||
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
|
||||
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
|
||||
|
||||
/* device control */
|
||||
int idxd_register_idxd_drv(void);
|
||||
void idxd_unregister_idxd_drv(void);
|
||||
int idxd_device_drv_probe(struct idxd_dev *idxd_dev);
|
||||
void idxd_device_drv_remove(struct idxd_dev *idxd_dev);
|
||||
int drv_enable_wq(struct idxd_wq *wq);
|
||||
int __drv_enable_wq(struct idxd_wq *wq);
|
||||
void drv_disable_wq(struct idxd_wq *wq);
|
||||
void __drv_disable_wq(struct idxd_wq *wq);
|
||||
int idxd_device_init_reset(struct idxd_device *idxd);
|
||||
int idxd_device_enable(struct idxd_device *idxd);
|
||||
int idxd_device_disable(struct idxd_device *idxd);
|
||||
void idxd_device_reset(struct idxd_device *idxd);
|
||||
void idxd_device_cleanup(struct idxd_device *idxd);
|
||||
void idxd_device_clear_state(struct idxd_device *idxd);
|
||||
int idxd_device_config(struct idxd_device *idxd);
|
||||
void idxd_device_wqs_clear_state(struct idxd_device *idxd);
|
||||
void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
|
||||
int idxd_device_load_config(struct idxd_device *idxd);
|
||||
int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
|
||||
|
@ -443,21 +580,24 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd);
|
|||
int idxd_wq_alloc_resources(struct idxd_wq *wq);
|
||||
void idxd_wq_free_resources(struct idxd_wq *wq);
|
||||
int idxd_wq_enable(struct idxd_wq *wq);
|
||||
int idxd_wq_disable(struct idxd_wq *wq);
|
||||
int idxd_wq_disable(struct idxd_wq *wq, bool reset_config);
|
||||
void idxd_wq_drain(struct idxd_wq *wq);
|
||||
void idxd_wq_reset(struct idxd_wq *wq);
|
||||
int idxd_wq_map_portal(struct idxd_wq *wq);
|
||||
void idxd_wq_unmap_portal(struct idxd_wq *wq);
|
||||
void idxd_wq_disable_cleanup(struct idxd_wq *wq);
|
||||
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
|
||||
int idxd_wq_disable_pasid(struct idxd_wq *wq);
|
||||
void __idxd_wq_quiesce(struct idxd_wq *wq);
|
||||
void idxd_wq_quiesce(struct idxd_wq *wq);
|
||||
int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
|
||||
void idxd_wq_free_irq(struct idxd_wq *wq);
|
||||
int idxd_wq_request_irq(struct idxd_wq *wq);
|
||||
|
||||
/* submission */
|
||||
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
|
||||
struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype);
|
||||
void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc);
|
||||
int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc);
|
||||
|
||||
/* dmaengine */
|
||||
int idxd_register_dma_device(struct idxd_device *idxd);
|
||||
|
@ -466,7 +606,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq);
|
|||
void idxd_unregister_dma_channel(struct idxd_wq *wq);
|
||||
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
|
||||
void idxd_dma_complete_txd(struct idxd_desc *desc,
|
||||
enum idxd_complete_type comp_type);
|
||||
enum idxd_complete_type comp_type, bool free_desc);
|
||||
|
||||
/* cdev */
|
||||
int idxd_cdev_register(void);
|
||||
|
@ -490,10 +630,4 @@ static inline void perfmon_init(void) {}
|
|||
static inline void perfmon_exit(void) {}
|
||||
#endif
|
||||
|
||||
static inline void complete_desc(struct idxd_desc *desc, enum idxd_complete_type reason)
|
||||
{
|
||||
idxd_dma_complete_txd(desc, reason);
|
||||
idxd_free_desc(desc->wq, desc);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -26,11 +26,16 @@
|
|||
MODULE_VERSION(IDXD_DRIVER_VERSION);
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Intel Corporation");
|
||||
MODULE_IMPORT_NS(IDXD);
|
||||
|
||||
static bool sva = true;
|
||||
module_param(sva, bool, 0644);
|
||||
MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
|
||||
|
||||
bool tc_override;
|
||||
module_param(tc_override, bool, 0644);
|
||||
MODULE_PARM_DESC(tc_override, "Override traffic class defaults");
|
||||
|
||||
#define DRV_NAME "idxd"
|
||||
|
||||
bool support_enqcmd;
|
||||
|
@ -67,7 +72,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
|
|||
{
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
struct device *dev = &pdev->dev;
|
||||
struct idxd_irq_entry *irq_entry;
|
||||
struct idxd_irq_entry *ie;
|
||||
int i, msixcnt;
|
||||
int rc = 0;
|
||||
|
||||
|
@ -76,6 +81,7 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
|
|||
dev_err(dev, "Not MSI-X interrupt capable.\n");
|
||||
return -ENOSPC;
|
||||
}
|
||||
idxd->irq_cnt = msixcnt;
|
||||
|
||||
rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
|
||||
if (rc != msixcnt) {
|
||||
|
@ -84,87 +90,34 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
|
|||
}
|
||||
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
|
||||
|
||||
/*
|
||||
* We implement 1 completion list per MSI-X entry except for
|
||||
* entry 0, which is for errors and others.
|
||||
*/
|
||||
idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
|
||||
GFP_KERNEL, dev_to_node(dev));
|
||||
if (!idxd->irq_entries) {
|
||||
rc = -ENOMEM;
|
||||
goto err_irq_entries;
|
||||
}
|
||||
|
||||
for (i = 0; i < msixcnt; i++) {
|
||||
idxd->irq_entries[i].id = i;
|
||||
idxd->irq_entries[i].idxd = idxd;
|
||||
idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
|
||||
spin_lock_init(&idxd->irq_entries[i].list_lock);
|
||||
}
|
||||
|
||||
idxd_msix_perm_setup(idxd);
|
||||
|
||||
irq_entry = &idxd->irq_entries[0];
|
||||
rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
|
||||
0, "idxd-misc", irq_entry);
|
||||
ie = idxd_get_ie(idxd, 0);
|
||||
ie->vector = pci_irq_vector(pdev, 0);
|
||||
rc = request_threaded_irq(ie->vector, NULL, idxd_misc_thread, 0, "idxd-misc", ie);
|
||||
if (rc < 0) {
|
||||
dev_err(dev, "Failed to allocate misc interrupt.\n");
|
||||
goto err_misc_irq;
|
||||
}
|
||||
dev_dbg(dev, "Requested idxd-misc handler on msix vector %d\n", ie->vector);
|
||||
|
||||
dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
|
||||
for (i = 0; i < idxd->max_wqs; i++) {
|
||||
int msix_idx = i + 1;
|
||||
|
||||
/* first MSI-X entry is not for wq interrupts */
|
||||
idxd->num_wq_irqs = msixcnt - 1;
|
||||
ie = idxd_get_ie(idxd, msix_idx);
|
||||
ie->id = msix_idx;
|
||||
ie->int_handle = INVALID_INT_HANDLE;
|
||||
ie->pasid = INVALID_IOASID;
|
||||
|
||||
for (i = 1; i < msixcnt; i++) {
|
||||
irq_entry = &idxd->irq_entries[i];
|
||||
|
||||
init_llist_head(&idxd->irq_entries[i].pending_llist);
|
||||
INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
|
||||
rc = request_threaded_irq(irq_entry->vector, NULL,
|
||||
idxd_wq_thread, 0, "idxd-portal", irq_entry);
|
||||
if (rc < 0) {
|
||||
dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
|
||||
goto err_wq_irqs;
|
||||
}
|
||||
|
||||
dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
|
||||
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
|
||||
/*
|
||||
* The MSIX vector enumeration starts at 1 with vector 0 being the
|
||||
* misc interrupt that handles non I/O completion events. The
|
||||
* interrupt handles are for IMS enumeration on guest. The misc
|
||||
* interrupt vector does not require a handle and therefore we start
|
||||
* the int_handles at index 0. Since 'i' starts at 1, the first
|
||||
* int_handles index will be 0.
|
||||
*/
|
||||
rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
|
||||
IDXD_IRQ_MSIX);
|
||||
if (rc < 0) {
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
goto err_wq_irqs;
|
||||
}
|
||||
dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
|
||||
}
|
||||
spin_lock_init(&ie->list_lock);
|
||||
init_llist_head(&ie->pending_llist);
|
||||
INIT_LIST_HEAD(&ie->work_list);
|
||||
}
|
||||
|
||||
idxd_unmask_error_interrupts(idxd);
|
||||
return 0;
|
||||
|
||||
err_wq_irqs:
|
||||
while (--i >= 0) {
|
||||
irq_entry = &idxd->irq_entries[i];
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
if (i != 0)
|
||||
idxd_device_release_int_handle(idxd,
|
||||
idxd->int_handles[i], IDXD_IRQ_MSIX);
|
||||
}
|
||||
err_misc_irq:
|
||||
/* Disable error interrupt generation */
|
||||
idxd_mask_error_interrupts(idxd);
|
||||
idxd_msix_perm_clear(idxd);
|
||||
err_irq_entries:
|
||||
pci_free_irq_vectors(pdev);
|
||||
dev_err(dev, "No usable interrupts\n");
|
||||
return rc;
|
||||
|
@ -173,26 +126,16 @@ static int idxd_setup_interrupts(struct idxd_device *idxd)
|
|||
static void idxd_cleanup_interrupts(struct idxd_device *idxd)
|
||||
{
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
struct idxd_irq_entry *irq_entry;
|
||||
int i, msixcnt;
|
||||
struct idxd_irq_entry *ie;
|
||||
int msixcnt;
|
||||
|
||||
msixcnt = pci_msix_vec_count(pdev);
|
||||
if (msixcnt <= 0)
|
||||
return;
|
||||
|
||||
irq_entry = &idxd->irq_entries[0];
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
|
||||
for (i = 1; i < msixcnt; i++) {
|
||||
|
||||
irq_entry = &idxd->irq_entries[i];
|
||||
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE))
|
||||
idxd_device_release_int_handle(idxd, idxd->int_handles[i],
|
||||
IDXD_IRQ_MSIX);
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
}
|
||||
|
||||
ie = idxd_get_ie(idxd, 0);
|
||||
idxd_mask_error_interrupts(idxd);
|
||||
free_irq(ie->vector, ie);
|
||||
pci_free_irq_vectors(pdev);
|
||||
}
|
||||
|
||||
|
@ -200,6 +143,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
|
|||
{
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
struct idxd_wq *wq;
|
||||
struct device *conf_dev;
|
||||
int i, rc;
|
||||
|
||||
idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
|
||||
|
@ -214,26 +158,30 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
|
|||
goto err;
|
||||
}
|
||||
|
||||
idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ);
|
||||
conf_dev = wq_confdev(wq);
|
||||
wq->id = i;
|
||||
wq->idxd = idxd;
|
||||
device_initialize(&wq->conf_dev);
|
||||
wq->conf_dev.parent = &idxd->conf_dev;
|
||||
wq->conf_dev.bus = &dsa_bus_type;
|
||||
wq->conf_dev.type = &idxd_wq_device_type;
|
||||
rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
|
||||
device_initialize(wq_confdev(wq));
|
||||
conf_dev->parent = idxd_confdev(idxd);
|
||||
conf_dev->bus = &dsa_bus_type;
|
||||
conf_dev->type = &idxd_wq_device_type;
|
||||
rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
|
||||
if (rc < 0) {
|
||||
put_device(&wq->conf_dev);
|
||||
put_device(conf_dev);
|
||||
goto err;
|
||||
}
|
||||
|
||||
mutex_init(&wq->wq_lock);
|
||||
init_waitqueue_head(&wq->err_queue);
|
||||
init_completion(&wq->wq_dead);
|
||||
wq->max_xfer_bytes = idxd->max_xfer_bytes;
|
||||
wq->max_batch_size = idxd->max_batch_size;
|
||||
init_completion(&wq->wq_resurrect);
|
||||
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
|
||||
wq->max_batch_size = WQ_DEFAULT_MAX_BATCH;
|
||||
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
|
||||
wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
|
||||
if (!wq->wqcfg) {
|
||||
put_device(&wq->conf_dev);
|
||||
put_device(conf_dev);
|
||||
rc = -ENOMEM;
|
||||
goto err;
|
||||
}
|
||||
|
@ -243,8 +191,11 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
|
|||
return 0;
|
||||
|
||||
err:
|
||||
while (--i >= 0)
|
||||
put_device(&idxd->wqs[i]->conf_dev);
|
||||
while (--i >= 0) {
|
||||
wq = idxd->wqs[i];
|
||||
conf_dev = wq_confdev(wq);
|
||||
put_device(conf_dev);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -252,6 +203,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
|
|||
{
|
||||
struct idxd_engine *engine;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
struct device *conf_dev;
|
||||
int i, rc;
|
||||
|
||||
idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
|
||||
|
@ -266,15 +218,17 @@ static int idxd_setup_engines(struct idxd_device *idxd)
|
|||
goto err;
|
||||
}
|
||||
|
||||
idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE);
|
||||
conf_dev = engine_confdev(engine);
|
||||
engine->id = i;
|
||||
engine->idxd = idxd;
|
||||
device_initialize(&engine->conf_dev);
|
||||
engine->conf_dev.parent = &idxd->conf_dev;
|
||||
engine->conf_dev.bus = &dsa_bus_type;
|
||||
engine->conf_dev.type = &idxd_engine_device_type;
|
||||
rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
|
||||
device_initialize(conf_dev);
|
||||
conf_dev->parent = idxd_confdev(idxd);
|
||||
conf_dev->bus = &dsa_bus_type;
|
||||
conf_dev->type = &idxd_engine_device_type;
|
||||
rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
|
||||
if (rc < 0) {
|
||||
put_device(&engine->conf_dev);
|
||||
put_device(conf_dev);
|
||||
goto err;
|
||||
}
|
||||
|
||||
|
@ -284,14 +238,18 @@ static int idxd_setup_engines(struct idxd_device *idxd)
|
|||
return 0;
|
||||
|
||||
err:
|
||||
while (--i >= 0)
|
||||
put_device(&idxd->engines[i]->conf_dev);
|
||||
while (--i >= 0) {
|
||||
engine = idxd->engines[i];
|
||||
conf_dev = engine_confdev(engine);
|
||||
put_device(conf_dev);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int idxd_setup_groups(struct idxd_device *idxd)
|
||||
{
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
struct device *conf_dev;
|
||||
struct idxd_group *group;
|
||||
int i, rc;
|
||||
|
||||
|
@ -307,28 +265,37 @@ static int idxd_setup_groups(struct idxd_device *idxd)
|
|||
goto err;
|
||||
}
|
||||
|
||||
idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP);
|
||||
conf_dev = group_confdev(group);
|
||||
group->id = i;
|
||||
group->idxd = idxd;
|
||||
device_initialize(&group->conf_dev);
|
||||
group->conf_dev.parent = &idxd->conf_dev;
|
||||
group->conf_dev.bus = &dsa_bus_type;
|
||||
group->conf_dev.type = &idxd_group_device_type;
|
||||
rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
|
||||
device_initialize(conf_dev);
|
||||
conf_dev->parent = idxd_confdev(idxd);
|
||||
conf_dev->bus = &dsa_bus_type;
|
||||
conf_dev->type = &idxd_group_device_type;
|
||||
rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
|
||||
if (rc < 0) {
|
||||
put_device(&group->conf_dev);
|
||||
put_device(conf_dev);
|
||||
goto err;
|
||||
}
|
||||
|
||||
idxd->groups[i] = group;
|
||||
group->tc_a = -1;
|
||||
group->tc_b = -1;
|
||||
if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) {
|
||||
group->tc_a = 1;
|
||||
group->tc_b = 1;
|
||||
} else {
|
||||
group->tc_a = -1;
|
||||
group->tc_b = -1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
while (--i >= 0)
|
||||
put_device(&idxd->groups[i]->conf_dev);
|
||||
while (--i >= 0) {
|
||||
group = idxd->groups[i];
|
||||
put_device(group_confdev(group));
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -337,11 +304,11 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < idxd->max_groups; i++)
|
||||
put_device(&idxd->groups[i]->conf_dev);
|
||||
put_device(group_confdev(idxd->groups[i]));
|
||||
for (i = 0; i < idxd->max_engines; i++)
|
||||
put_device(&idxd->engines[i]->conf_dev);
|
||||
put_device(engine_confdev(idxd->engines[i]));
|
||||
for (i = 0; i < idxd->max_wqs; i++)
|
||||
put_device(&idxd->wqs[i]->conf_dev);
|
||||
put_device(wq_confdev(idxd->wqs[i]));
|
||||
destroy_workqueue(idxd->wq);
|
||||
}
|
||||
|
||||
|
@ -352,13 +319,6 @@ static int idxd_setup_internals(struct idxd_device *idxd)
|
|||
|
||||
init_waitqueue_head(&idxd->cmd_waitq);
|
||||
|
||||
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
|
||||
idxd->int_handles = kcalloc_node(idxd->max_wqs, sizeof(int), GFP_KERNEL,
|
||||
dev_to_node(dev));
|
||||
if (!idxd->int_handles)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
rc = idxd_setup_wqs(idxd);
|
||||
if (rc < 0)
|
||||
goto err_wqs;
|
||||
|
@ -381,15 +341,14 @@ static int idxd_setup_internals(struct idxd_device *idxd)
|
|||
|
||||
err_wkq_create:
|
||||
for (i = 0; i < idxd->max_groups; i++)
|
||||
put_device(&idxd->groups[i]->conf_dev);
|
||||
put_device(group_confdev(idxd->groups[i]));
|
||||
err_group:
|
||||
for (i = 0; i < idxd->max_engines; i++)
|
||||
put_device(&idxd->engines[i]->conf_dev);
|
||||
put_device(engine_confdev(idxd->engines[i]));
|
||||
err_engine:
|
||||
for (i = 0; i < idxd->max_wqs; i++)
|
||||
put_device(&idxd->wqs[i]->conf_dev);
|
||||
put_device(wq_confdev(idxd->wqs[i]));
|
||||
err_wqs:
|
||||
kfree(idxd->int_handles);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
@ -424,6 +383,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
|
|||
dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
|
||||
}
|
||||
|
||||
/* reading command capabilities */
|
||||
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE))
|
||||
idxd->request_int_handles = true;
|
||||
|
||||
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
|
||||
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
|
||||
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
|
||||
|
@ -437,9 +400,9 @@ static void idxd_read_caps(struct idxd_device *idxd)
|
|||
dev_dbg(dev, "group_cap: %#llx\n", idxd->hw.group_cap.bits);
|
||||
idxd->max_groups = idxd->hw.group_cap.num_groups;
|
||||
dev_dbg(dev, "max groups: %u\n", idxd->max_groups);
|
||||
idxd->max_tokens = idxd->hw.group_cap.total_tokens;
|
||||
dev_dbg(dev, "max tokens: %u\n", idxd->max_tokens);
|
||||
idxd->nr_tokens = idxd->max_tokens;
|
||||
idxd->max_rdbufs = idxd->hw.group_cap.total_rdbufs;
|
||||
dev_dbg(dev, "max read buffers: %u\n", idxd->max_rdbufs);
|
||||
idxd->nr_rdbufs = idxd->max_rdbufs;
|
||||
|
||||
/* read engine capabilities */
|
||||
idxd->hw.engine_cap.bits =
|
||||
|
@ -469,6 +432,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
|
|||
static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
|
||||
{
|
||||
struct device *dev = &pdev->dev;
|
||||
struct device *conf_dev;
|
||||
struct idxd_device *idxd;
|
||||
int rc;
|
||||
|
||||
|
@ -476,19 +440,21 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
|
|||
if (!idxd)
|
||||
return NULL;
|
||||
|
||||
conf_dev = idxd_confdev(idxd);
|
||||
idxd->pdev = pdev;
|
||||
idxd->data = data;
|
||||
idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
|
||||
idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
|
||||
if (idxd->id < 0)
|
||||
return NULL;
|
||||
|
||||
device_initialize(&idxd->conf_dev);
|
||||
idxd->conf_dev.parent = dev;
|
||||
idxd->conf_dev.bus = &dsa_bus_type;
|
||||
idxd->conf_dev.type = idxd->data->dev_type;
|
||||
rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
|
||||
device_initialize(conf_dev);
|
||||
conf_dev->parent = dev;
|
||||
conf_dev->bus = &dsa_bus_type;
|
||||
conf_dev->type = idxd->data->dev_type;
|
||||
rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
|
||||
if (rc < 0) {
|
||||
put_device(&idxd->conf_dev);
|
||||
put_device(conf_dev);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -581,8 +547,6 @@ static int idxd_probe(struct idxd_device *idxd)
|
|||
if (rc)
|
||||
goto err_config;
|
||||
|
||||
dev_dbg(dev, "IDXD interrupt setup complete.\n");
|
||||
|
||||
idxd->major = idxd_cdev_get_major(idxd);
|
||||
|
||||
rc = perfmon_pmu_init(idxd);
|
||||
|
@ -639,15 +603,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
}
|
||||
|
||||
dev_dbg(dev, "Set DMA masks\n");
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (rc)
|
||||
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
|
||||
if (rc)
|
||||
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
|
||||
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
|
||||
if (rc)
|
||||
goto err;
|
||||
|
||||
|
@ -668,8 +624,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
goto err_dev_register;
|
||||
}
|
||||
|
||||
idxd->state = IDXD_DEV_CONF_READY;
|
||||
|
||||
dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
|
||||
idxd->hw.version);
|
||||
|
||||
|
@ -680,38 +634,12 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
err:
|
||||
pci_iounmap(pdev, idxd->reg_base);
|
||||
err_iomap:
|
||||
put_device(&idxd->conf_dev);
|
||||
put_device(idxd_confdev(idxd));
|
||||
err_idxd_alloc:
|
||||
pci_disable_device(pdev);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
|
||||
{
|
||||
struct idxd_desc *desc, *itr;
|
||||
struct llist_node *head;
|
||||
|
||||
head = llist_del_all(&ie->pending_llist);
|
||||
if (!head)
|
||||
return;
|
||||
|
||||
llist_for_each_entry_safe(desc, itr, head, llnode) {
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
|
||||
idxd_free_desc(desc->wq, desc);
|
||||
}
|
||||
}
|
||||
|
||||
static void idxd_flush_work_list(struct idxd_irq_entry *ie)
|
||||
{
|
||||
struct idxd_desc *desc, *iter;
|
||||
|
||||
list_for_each_entry_safe(desc, iter, &ie->work_list, list) {
|
||||
list_del(&desc->list);
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT);
|
||||
idxd_free_desc(desc->wq, desc);
|
||||
}
|
||||
}
|
||||
|
||||
void idxd_wqs_quiesce(struct idxd_device *idxd)
|
||||
{
|
||||
struct idxd_wq *wq;
|
||||
|
@ -724,47 +652,19 @@ void idxd_wqs_quiesce(struct idxd_device *idxd)
|
|||
}
|
||||
}
|
||||
|
||||
static void idxd_release_int_handles(struct idxd_device *idxd)
|
||||
{
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
int i, rc;
|
||||
|
||||
for (i = 0; i < idxd->num_wq_irqs; i++) {
|
||||
if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
|
||||
rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
|
||||
IDXD_IRQ_MSIX);
|
||||
if (rc < 0)
|
||||
dev_warn(dev, "irq handle %d release failed\n",
|
||||
idxd->int_handles[i]);
|
||||
else
|
||||
dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void idxd_shutdown(struct pci_dev *pdev)
|
||||
{
|
||||
struct idxd_device *idxd = pci_get_drvdata(pdev);
|
||||
int rc, i;
|
||||
struct idxd_irq_entry *irq_entry;
|
||||
int msixcnt = pci_msix_vec_count(pdev);
|
||||
int rc;
|
||||
|
||||
rc = idxd_device_disable(idxd);
|
||||
if (rc)
|
||||
dev_err(&pdev->dev, "Disabling device failed\n");
|
||||
|
||||
dev_dbg(&pdev->dev, "%s called\n", __func__);
|
||||
idxd_mask_msix_vectors(idxd);
|
||||
irq_entry = &idxd->ie;
|
||||
synchronize_irq(irq_entry->vector);
|
||||
idxd_mask_error_interrupts(idxd);
|
||||
|
||||
for (i = 0; i < msixcnt; i++) {
|
||||
irq_entry = &idxd->irq_entries[i];
|
||||
synchronize_irq(irq_entry->vector);
|
||||
if (i == 0)
|
||||
continue;
|
||||
idxd_flush_pending_llist(irq_entry);
|
||||
idxd_flush_work_list(irq_entry);
|
||||
}
|
||||
flush_workqueue(idxd->wq);
|
||||
}
|
||||
|
||||
|
@ -772,28 +672,30 @@ static void idxd_remove(struct pci_dev *pdev)
|
|||
{
|
||||
struct idxd_device *idxd = pci_get_drvdata(pdev);
|
||||
struct idxd_irq_entry *irq_entry;
|
||||
int msixcnt = pci_msix_vec_count(pdev);
|
||||
int i;
|
||||
|
||||
dev_dbg(&pdev->dev, "%s called\n", __func__);
|
||||
idxd_unregister_devices(idxd);
|
||||
/*
|
||||
* When ->release() is called for the idxd->conf_dev, it frees all the memory related
|
||||
* to the idxd context. The driver still needs those bits in order to do the rest of
|
||||
* the cleanup. However, we do need to unbound the idxd sub-driver. So take a ref
|
||||
* on the device here to hold off the freeing while allowing the idxd sub-driver
|
||||
* to unbind.
|
||||
*/
|
||||
get_device(idxd_confdev(idxd));
|
||||
device_unregister(idxd_confdev(idxd));
|
||||
idxd_shutdown(pdev);
|
||||
if (device_pasid_enabled(idxd))
|
||||
idxd_disable_system_pasid(idxd);
|
||||
idxd_unregister_devices(idxd);
|
||||
|
||||
for (i = 0; i < msixcnt; i++) {
|
||||
irq_entry = &idxd->irq_entries[i];
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
}
|
||||
idxd_msix_perm_clear(idxd);
|
||||
idxd_release_int_handles(idxd);
|
||||
irq_entry = idxd_get_ie(idxd, 0);
|
||||
free_irq(irq_entry->vector, irq_entry);
|
||||
pci_free_irq_vectors(pdev);
|
||||
pci_iounmap(pdev, idxd->reg_base);
|
||||
iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
pci_disable_device(pdev);
|
||||
destroy_workqueue(idxd->wq);
|
||||
perfmon_pmu_remove(idxd);
|
||||
device_unregister(&idxd->conf_dev);
|
||||
put_device(idxd_confdev(idxd));
|
||||
}
|
||||
|
||||
static struct pci_driver idxd_pci_driver = {
|
||||
|
@ -824,14 +726,18 @@ static int __init idxd_init_module(void)
|
|||
|
||||
perfmon_init();
|
||||
|
||||
err = idxd_register_bus_type();
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
err = idxd_register_driver();
|
||||
err = idxd_driver_register(&idxd_drv);
|
||||
if (err < 0)
|
||||
goto err_idxd_driver_register;
|
||||
|
||||
err = idxd_driver_register(&idxd_dmaengine_drv);
|
||||
if (err < 0)
|
||||
goto err_idxd_dmaengine_driver_register;
|
||||
|
||||
err = idxd_driver_register(&idxd_user_drv);
|
||||
if (err < 0)
|
||||
goto err_idxd_user_driver_register;
|
||||
|
||||
err = idxd_cdev_register();
|
||||
if (err)
|
||||
goto err_cdev_register;
|
||||
|
@ -845,19 +751,23 @@ static int __init idxd_init_module(void)
|
|||
err_pci_register:
|
||||
idxd_cdev_remove();
|
||||
err_cdev_register:
|
||||
idxd_unregister_driver();
|
||||
idxd_driver_unregister(&idxd_user_drv);
|
||||
err_idxd_user_driver_register:
|
||||
idxd_driver_unregister(&idxd_dmaengine_drv);
|
||||
err_idxd_dmaengine_driver_register:
|
||||
idxd_driver_unregister(&idxd_drv);
|
||||
err_idxd_driver_register:
|
||||
idxd_unregister_bus_type();
|
||||
return err;
|
||||
}
|
||||
module_init(idxd_init_module);
|
||||
|
||||
static void __exit idxd_exit_module(void)
|
||||
{
|
||||
idxd_unregister_driver();
|
||||
idxd_driver_unregister(&idxd_user_drv);
|
||||
idxd_driver_unregister(&idxd_dmaengine_drv);
|
||||
idxd_driver_unregister(&idxd_drv);
|
||||
pci_unregister_driver(&idxd_pci_driver);
|
||||
idxd_cdev_remove();
|
||||
idxd_unregister_bus_type();
|
||||
perfmon_exit();
|
||||
}
|
||||
module_exit(idxd_exit_module);
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include <linux/pci.h>
|
||||
#include <linux/io-64-nonatomic-lo-hi.h>
|
||||
#include <linux/dmaengine.h>
|
||||
#include <linux/delay.h>
|
||||
#include <uapi/linux/idxd.h>
|
||||
#include "../dmaengine.h"
|
||||
#include "idxd.h"
|
||||
|
@ -22,12 +23,15 @@ struct idxd_fault {
|
|||
struct idxd_device *idxd;
|
||||
};
|
||||
|
||||
static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
|
||||
enum irq_work_type wtype,
|
||||
int *processed, u64 data);
|
||||
static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
|
||||
enum irq_work_type wtype,
|
||||
int *processed, u64 data);
|
||||
struct idxd_resubmit {
|
||||
struct work_struct work;
|
||||
struct idxd_desc *desc;
|
||||
};
|
||||
|
||||
struct idxd_int_handle_revoke {
|
||||
struct work_struct work;
|
||||
struct idxd_device *idxd;
|
||||
};
|
||||
|
||||
static void idxd_device_reinit(struct work_struct *work)
|
||||
{
|
||||
|
@ -51,7 +55,7 @@ static void idxd_device_reinit(struct work_struct *work)
|
|||
rc = idxd_wq_enable(wq);
|
||||
if (rc < 0) {
|
||||
dev_warn(dev, "Unable to re-enable wq %s\n",
|
||||
dev_name(&wq->conf_dev));
|
||||
dev_name(wq_confdev(wq)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -59,47 +63,163 @@ static void idxd_device_reinit(struct work_struct *work)
|
|||
return;
|
||||
|
||||
out:
|
||||
idxd_device_wqs_clear_state(idxd);
|
||||
idxd_device_clear_state(idxd);
|
||||
}
|
||||
|
||||
static void idxd_device_fault_work(struct work_struct *work)
|
||||
/*
|
||||
* The function sends a drain descriptor for the interrupt handle. The drain ensures
|
||||
* all descriptors with this interrupt handle is flushed and the interrupt
|
||||
* will allow the cleanup of the outstanding descriptors.
|
||||
*/
|
||||
static void idxd_int_handle_revoke_drain(struct idxd_irq_entry *ie)
|
||||
{
|
||||
struct idxd_fault *fault = container_of(work, struct idxd_fault, work);
|
||||
struct idxd_irq_entry *ie;
|
||||
int i;
|
||||
int processed;
|
||||
int irqcnt = fault->idxd->num_wq_irqs + 1;
|
||||
struct idxd_wq *wq = ie_to_wq(ie);
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
struct dsa_hw_desc desc = {};
|
||||
void __iomem *portal;
|
||||
int rc;
|
||||
|
||||
for (i = 1; i < irqcnt; i++) {
|
||||
ie = &fault->idxd->irq_entries[i];
|
||||
irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT,
|
||||
&processed, fault->addr);
|
||||
if (processed)
|
||||
break;
|
||||
/* Issue a simple drain operation with interrupt but no completion record */
|
||||
desc.flags = IDXD_OP_FLAG_RCI;
|
||||
desc.opcode = DSA_OPCODE_DRAIN;
|
||||
desc.priv = 1;
|
||||
|
||||
irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT,
|
||||
&processed, fault->addr);
|
||||
if (processed)
|
||||
break;
|
||||
if (ie->pasid != INVALID_IOASID)
|
||||
desc.pasid = ie->pasid;
|
||||
desc.int_handle = ie->int_handle;
|
||||
portal = idxd_wq_portal_addr(wq);
|
||||
|
||||
/*
|
||||
* The wmb() makes sure that the descriptor is all there before we
|
||||
* issue.
|
||||
*/
|
||||
wmb();
|
||||
if (wq_dedicated(wq)) {
|
||||
iosubmit_cmds512(portal, &desc, 1);
|
||||
} else {
|
||||
rc = idxd_enqcmds(wq, portal, &desc);
|
||||
/* This should not fail unless hardware failed. */
|
||||
if (rc < 0)
|
||||
dev_warn(dev, "Failed to submit drain desc on wq %d\n", wq->id);
|
||||
}
|
||||
}
|
||||
|
||||
static void idxd_abort_invalid_int_handle_descs(struct idxd_irq_entry *ie)
|
||||
{
|
||||
LIST_HEAD(flist);
|
||||
struct idxd_desc *d, *t;
|
||||
struct llist_node *head;
|
||||
|
||||
spin_lock(&ie->list_lock);
|
||||
head = llist_del_all(&ie->pending_llist);
|
||||
if (head) {
|
||||
llist_for_each_entry_safe(d, t, head, llnode)
|
||||
list_add_tail(&d->list, &ie->work_list);
|
||||
}
|
||||
|
||||
kfree(fault);
|
||||
list_for_each_entry_safe(d, t, &ie->work_list, list) {
|
||||
if (d->completion->status == DSA_COMP_INT_HANDLE_INVAL)
|
||||
list_move_tail(&d->list, &flist);
|
||||
}
|
||||
spin_unlock(&ie->list_lock);
|
||||
|
||||
list_for_each_entry_safe(d, t, &flist, list) {
|
||||
list_del(&d->list);
|
||||
idxd_dma_complete_txd(d, IDXD_COMPLETE_ABORT, true);
|
||||
}
|
||||
}
|
||||
|
||||
static int idxd_device_schedule_fault_process(struct idxd_device *idxd,
|
||||
u64 fault_addr)
|
||||
static void idxd_int_handle_revoke(struct work_struct *work)
|
||||
{
|
||||
struct idxd_fault *fault;
|
||||
struct idxd_int_handle_revoke *revoke =
|
||||
container_of(work, struct idxd_int_handle_revoke, work);
|
||||
struct idxd_device *idxd = revoke->idxd;
|
||||
struct pci_dev *pdev = idxd->pdev;
|
||||
struct device *dev = &pdev->dev;
|
||||
int i, new_handle, rc;
|
||||
|
||||
fault = kmalloc(sizeof(*fault), GFP_ATOMIC);
|
||||
if (!fault)
|
||||
return -ENOMEM;
|
||||
if (!idxd->request_int_handles) {
|
||||
kfree(revoke);
|
||||
dev_warn(dev, "Unexpected int handle refresh interrupt.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
fault->addr = fault_addr;
|
||||
fault->idxd = idxd;
|
||||
INIT_WORK(&fault->work, idxd_device_fault_work);
|
||||
queue_work(idxd->wq, &fault->work);
|
||||
return 0;
|
||||
/*
|
||||
* The loop attempts to acquire new interrupt handle for all interrupt
|
||||
* vectors that supports a handle. If a new interrupt handle is acquired and the
|
||||
* wq is kernel type, the driver will kill the percpu_ref to pause all
|
||||
* ongoing descriptor submissions. The interrupt handle is then changed.
|
||||
* After change, the percpu_ref is revived and all the pending submissions
|
||||
* are woken to try again. A drain is sent to for the interrupt handle
|
||||
* at the end to make sure all invalid int handle descriptors are processed.
|
||||
*/
|
||||
for (i = 1; i < idxd->irq_cnt; i++) {
|
||||
struct idxd_irq_entry *ie = idxd_get_ie(idxd, i);
|
||||
struct idxd_wq *wq = ie_to_wq(ie);
|
||||
|
||||
if (ie->int_handle == INVALID_INT_HANDLE)
|
||||
continue;
|
||||
|
||||
rc = idxd_device_request_int_handle(idxd, i, &new_handle, IDXD_IRQ_MSIX);
|
||||
if (rc < 0) {
|
||||
dev_warn(dev, "get int handle %d failed: %d\n", i, rc);
|
||||
/*
|
||||
* Failed to acquire new interrupt handle. Kill the WQ
|
||||
* and release all the pending submitters. The submitters will
|
||||
* get error return code and handle appropriately.
|
||||
*/
|
||||
ie->int_handle = INVALID_INT_HANDLE;
|
||||
idxd_wq_quiesce(wq);
|
||||
idxd_abort_invalid_int_handle_descs(ie);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* No change in interrupt handle, nothing needs to be done */
|
||||
if (ie->int_handle == new_handle)
|
||||
continue;
|
||||
|
||||
if (wq->state != IDXD_WQ_ENABLED || wq->type != IDXD_WQT_KERNEL) {
|
||||
/*
|
||||
* All the MSIX interrupts are allocated at once during probe.
|
||||
* Therefore we need to update all interrupts even if the WQ
|
||||
* isn't supporting interrupt operations.
|
||||
*/
|
||||
ie->int_handle = new_handle;
|
||||
continue;
|
||||
}
|
||||
|
||||
mutex_lock(&wq->wq_lock);
|
||||
reinit_completion(&wq->wq_resurrect);
|
||||
|
||||
/* Kill percpu_ref to pause additional descriptor submissions */
|
||||
percpu_ref_kill(&wq->wq_active);
|
||||
|
||||
/* Wait for all submitters quiesce before we change interrupt handle */
|
||||
wait_for_completion(&wq->wq_dead);
|
||||
|
||||
ie->int_handle = new_handle;
|
||||
|
||||
/* Revive percpu ref and wake up all the waiting submitters */
|
||||
percpu_ref_reinit(&wq->wq_active);
|
||||
complete_all(&wq->wq_resurrect);
|
||||
mutex_unlock(&wq->wq_lock);
|
||||
|
||||
/*
|
||||
* The delay here is to wait for all possible MOVDIR64B that
|
||||
* are issued before percpu_ref_kill() has happened to have
|
||||
* reached the PCIe domain before the drain is issued. The driver
|
||||
* needs to ensure that the drain descriptor issued does not pass
|
||||
* all the other issued descriptors that contain the invalid
|
||||
* interrupt handle in order to ensure that the drain descriptor
|
||||
* interrupt will allow the cleanup of all the descriptors with
|
||||
* invalid interrupt handle.
|
||||
*/
|
||||
if (wq_dedicated(wq))
|
||||
udelay(100);
|
||||
idxd_int_handle_revoke_drain(ie);
|
||||
}
|
||||
kfree(revoke);
|
||||
}
|
||||
|
||||
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
||||
|
@ -110,8 +230,11 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
int i;
|
||||
bool err = false;
|
||||
|
||||
if (cause & IDXD_INTC_HALT_STATE)
|
||||
goto halt;
|
||||
|
||||
if (cause & IDXD_INTC_ERR) {
|
||||
spin_lock_bh(&idxd->dev_lock);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
for (i = 0; i < 4; i++)
|
||||
idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
|
||||
IDXD_SWERR_OFFSET + i * sizeof(u64));
|
||||
|
@ -136,7 +259,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
}
|
||||
}
|
||||
|
||||
spin_unlock_bh(&idxd->dev_lock);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
val |= IDXD_INTC_ERR;
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
|
@ -145,6 +268,23 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
err = true;
|
||||
}
|
||||
|
||||
if (cause & IDXD_INTC_INT_HANDLE_REVOKED) {
|
||||
struct idxd_int_handle_revoke *revoke;
|
||||
|
||||
val |= IDXD_INTC_INT_HANDLE_REVOKED;
|
||||
|
||||
revoke = kzalloc(sizeof(*revoke), GFP_ATOMIC);
|
||||
if (revoke) {
|
||||
revoke->idxd = idxd;
|
||||
INIT_WORK(&revoke->work, idxd_int_handle_revoke);
|
||||
queue_work(idxd->wq, &revoke->work);
|
||||
|
||||
} else {
|
||||
dev_err(dev, "Failed to allocate work for int handle revoke\n");
|
||||
idxd_wqs_quiesce(idxd);
|
||||
}
|
||||
}
|
||||
|
||||
if (cause & IDXD_INTC_CMD) {
|
||||
val |= IDXD_INTC_CMD;
|
||||
complete(idxd->cmd_done);
|
||||
|
@ -168,15 +308,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
if (!err)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* This case should rarely happen and typically is due to software
|
||||
* programming error by the driver.
|
||||
*/
|
||||
if (idxd->sw_err.valid &&
|
||||
idxd->sw_err.desc_valid &&
|
||||
idxd->sw_err.fault_addr)
|
||||
idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr);
|
||||
|
||||
halt:
|
||||
gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
|
||||
if (gensts.state == IDXD_DEVICE_STATE_HALT) {
|
||||
idxd->state = IDXD_DEV_HALTED;
|
||||
|
@ -189,15 +321,16 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
INIT_WORK(&idxd->work, idxd_device_reinit);
|
||||
queue_work(idxd->wq, &idxd->work);
|
||||
} else {
|
||||
spin_lock_bh(&idxd->dev_lock);
|
||||
idxd->state = IDXD_DEV_HALTED;
|
||||
idxd_wqs_quiesce(idxd);
|
||||
idxd_wqs_unmap_portal(idxd);
|
||||
idxd_device_wqs_clear_state(idxd);
|
||||
spin_lock(&idxd->dev_lock);
|
||||
idxd_device_clear_state(idxd);
|
||||
dev_err(&idxd->pdev->dev,
|
||||
"idxd halted, need %s.\n",
|
||||
gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
|
||||
"FLR" : "system reset");
|
||||
spin_unlock_bh(&idxd->dev_lock);
|
||||
spin_unlock(&idxd->dev_lock);
|
||||
return -ENXIO;
|
||||
}
|
||||
}
|
||||
|
@ -208,7 +341,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
|
|||
irqreturn_t idxd_misc_thread(int vec, void *data)
|
||||
{
|
||||
struct idxd_irq_entry *irq_entry = data;
|
||||
struct idxd_device *idxd = irq_entry->idxd;
|
||||
struct idxd_device *idxd = ie_to_idxd(irq_entry);
|
||||
int rc;
|
||||
u32 cause;
|
||||
|
||||
|
@ -228,127 +361,123 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
|
|||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr)
|
||||
static void idxd_int_handle_resubmit_work(struct work_struct *work)
|
||||
{
|
||||
/*
|
||||
* Completion address can be bad as well. Check fault address match for descriptor
|
||||
* and completion address.
|
||||
*/
|
||||
if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) {
|
||||
struct idxd_device *idxd = desc->wq->idxd;
|
||||
struct device *dev = &idxd->pdev->dev;
|
||||
struct idxd_resubmit *irw = container_of(work, struct idxd_resubmit, work);
|
||||
struct idxd_desc *desc = irw->desc;
|
||||
struct idxd_wq *wq = desc->wq;
|
||||
int rc;
|
||||
|
||||
dev_warn(dev, "desc with fault address: %#llx\n", fault_addr);
|
||||
return true;
|
||||
desc->completion->status = 0;
|
||||
rc = idxd_submit_desc(wq, desc);
|
||||
if (rc < 0) {
|
||||
dev_dbg(&wq->idxd->pdev->dev, "Failed to resubmit desc %d to wq %d.\n",
|
||||
desc->id, wq->id);
|
||||
/*
|
||||
* If the error is not -EAGAIN, it means the submission failed due to wq
|
||||
* has been killed instead of ENQCMDS failure. Here the driver needs to
|
||||
* notify the submitter of the failure by reporting abort status.
|
||||
*
|
||||
* -EAGAIN comes from ENQCMDS failure. idxd_submit_desc() will handle the
|
||||
* abort.
|
||||
*/
|
||||
if (rc != -EAGAIN) {
|
||||
desc->completion->status = IDXD_COMP_DESC_ABORT;
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, false);
|
||||
}
|
||||
idxd_free_desc(wq, desc);
|
||||
}
|
||||
|
||||
return false;
|
||||
kfree(irw);
|
||||
}
|
||||
|
||||
static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
|
||||
enum irq_work_type wtype,
|
||||
int *processed, u64 data)
|
||||
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc)
|
||||
{
|
||||
struct idxd_wq *wq = desc->wq;
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct idxd_resubmit *irw;
|
||||
|
||||
irw = kzalloc(sizeof(*irw), GFP_KERNEL);
|
||||
if (!irw)
|
||||
return false;
|
||||
|
||||
irw->desc = desc;
|
||||
INIT_WORK(&irw->work, idxd_int_handle_resubmit_work);
|
||||
queue_work(idxd->wq, &irw->work);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
|
||||
{
|
||||
struct idxd_desc *desc, *t;
|
||||
struct llist_node *head;
|
||||
int queued = 0;
|
||||
unsigned long flags;
|
||||
enum idxd_complete_type reason;
|
||||
|
||||
*processed = 0;
|
||||
head = llist_del_all(&irq_entry->pending_llist);
|
||||
if (!head)
|
||||
goto out;
|
||||
|
||||
if (wtype == IRQ_WORK_NORMAL)
|
||||
reason = IDXD_COMPLETE_NORMAL;
|
||||
else
|
||||
reason = IDXD_COMPLETE_DEV_FAIL;
|
||||
return;
|
||||
|
||||
llist_for_each_entry_safe(desc, t, head, llnode) {
|
||||
u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
|
||||
|
||||
if (status) {
|
||||
if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
|
||||
complete_desc(desc, IDXD_COMPLETE_ABORT);
|
||||
(*processed)++;
|
||||
/*
|
||||
* Check against the original status as ABORT is software defined
|
||||
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
|
||||
*/
|
||||
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(status != DSA_COMP_SUCCESS))
|
||||
match_fault(desc, data);
|
||||
complete_desc(desc, reason);
|
||||
(*processed)++;
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
|
||||
} else {
|
||||
spin_lock_irqsave(&irq_entry->list_lock, flags);
|
||||
spin_lock(&irq_entry->list_lock);
|
||||
list_add_tail(&desc->list,
|
||||
&irq_entry->work_list);
|
||||
spin_unlock_irqrestore(&irq_entry->list_lock, flags);
|
||||
queued++;
|
||||
spin_unlock(&irq_entry->list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return queued;
|
||||
}
|
||||
|
||||
static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
|
||||
enum irq_work_type wtype,
|
||||
int *processed, u64 data)
|
||||
static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
|
||||
{
|
||||
int queued = 0;
|
||||
unsigned long flags;
|
||||
LIST_HEAD(flist);
|
||||
struct idxd_desc *desc, *n;
|
||||
enum idxd_complete_type reason;
|
||||
|
||||
*processed = 0;
|
||||
if (wtype == IRQ_WORK_NORMAL)
|
||||
reason = IDXD_COMPLETE_NORMAL;
|
||||
else
|
||||
reason = IDXD_COMPLETE_DEV_FAIL;
|
||||
|
||||
/*
|
||||
* This lock protects list corruption from access of list outside of the irq handler
|
||||
* thread.
|
||||
*/
|
||||
spin_lock_irqsave(&irq_entry->list_lock, flags);
|
||||
spin_lock(&irq_entry->list_lock);
|
||||
if (list_empty(&irq_entry->work_list)) {
|
||||
spin_unlock_irqrestore(&irq_entry->list_lock, flags);
|
||||
return 0;
|
||||
spin_unlock(&irq_entry->list_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
|
||||
if (desc->completion->status) {
|
||||
list_del(&desc->list);
|
||||
(*processed)++;
|
||||
list_add_tail(&desc->list, &flist);
|
||||
} else {
|
||||
queued++;
|
||||
list_move_tail(&desc->list, &flist);
|
||||
}
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&irq_entry->list_lock, flags);
|
||||
spin_unlock(&irq_entry->list_lock);
|
||||
|
||||
list_for_each_entry(desc, &flist, list) {
|
||||
u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
|
||||
|
||||
if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
|
||||
complete_desc(desc, IDXD_COMPLETE_ABORT);
|
||||
/*
|
||||
* Check against the original status as ABORT is software defined
|
||||
* and 0xff, which DSA_COMP_STATUS_MASK can mask out.
|
||||
*/
|
||||
if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_ABORT, true);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (unlikely(status != DSA_COMP_SUCCESS))
|
||||
match_fault(desc, data);
|
||||
complete_desc(desc, reason);
|
||||
idxd_dma_complete_txd(desc, IDXD_COMPLETE_NORMAL, true);
|
||||
}
|
||||
|
||||
return queued;
|
||||
}
|
||||
|
||||
static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
|
||||
irqreturn_t idxd_wq_thread(int irq, void *data)
|
||||
{
|
||||
int rc, processed, total = 0;
|
||||
struct idxd_irq_entry *irq_entry = data;
|
||||
|
||||
/*
|
||||
* There are two lists we are processing. The pending_llist is where
|
||||
|
@ -367,31 +496,9 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
|
|||
* and process the completed entries.
|
||||
* 4. If the entry is still waiting on hardware, list_add_tail() to
|
||||
* the work_list.
|
||||
* 5. Repeat until no more descriptors.
|
||||
*/
|
||||
do {
|
||||
rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL,
|
||||
&processed, 0);
|
||||
total += processed;
|
||||
if (rc != 0)
|
||||
continue;
|
||||
|
||||
rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL,
|
||||
&processed, 0);
|
||||
total += processed;
|
||||
} while (rc != 0);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
irqreturn_t idxd_wq_thread(int irq, void *data)
|
||||
{
|
||||
struct idxd_irq_entry *irq_entry = data;
|
||||
int processed;
|
||||
|
||||
processed = idxd_desc_process(irq_entry);
|
||||
if (processed == 0)
|
||||
return IRQ_NONE;
|
||||
irq_process_work_list(irq_entry);
|
||||
irq_process_pending_llist(irq_entry);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25
|
||||
#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe
|
||||
|
||||
#define DEVICE_VERSION_1 0x100
|
||||
#define DEVICE_VERSION_2 0x200
|
||||
|
||||
#define IDXD_MMIO_BAR 0
|
||||
#define IDXD_WQ_BAR 2
|
||||
#define IDXD_PORTAL_SIZE PAGE_SIZE
|
||||
|
@ -33,8 +36,7 @@ union gen_cap_reg {
|
|||
u64 max_batch_shift:4;
|
||||
u64 max_ims_mult:6;
|
||||
u64 config_en:1;
|
||||
u64 max_descs_per_engine:8;
|
||||
u64 rsvd3:24;
|
||||
u64 rsvd3:32;
|
||||
};
|
||||
u64 bits;
|
||||
} __packed;
|
||||
|
@ -62,9 +64,9 @@ union wq_cap_reg {
|
|||
union group_cap_reg {
|
||||
struct {
|
||||
u64 num_groups:8;
|
||||
u64 total_tokens:8;
|
||||
u64 token_en:1;
|
||||
u64 token_limit:1;
|
||||
u64 total_rdbufs:8; /* formerly total_tokens */
|
||||
u64 rdbuf_ctrl:1; /* formerly token_en */
|
||||
u64 rdbuf_limit:1; /* formerly token_limit */
|
||||
u64 rsvd:46;
|
||||
};
|
||||
u64 bits;
|
||||
|
@ -108,7 +110,7 @@ union offsets_reg {
|
|||
#define IDXD_GENCFG_OFFSET 0x80
|
||||
union gencfg_reg {
|
||||
struct {
|
||||
u32 token_limit:8;
|
||||
u32 rdbuf_limit:8;
|
||||
u32 rsvd:4;
|
||||
u32 user_int_en:1;
|
||||
u32 rsvd2:19;
|
||||
|
@ -155,6 +157,8 @@ enum idxd_device_reset_type {
|
|||
#define IDXD_INTC_CMD 0x02
|
||||
#define IDXD_INTC_OCCUPY 0x04
|
||||
#define IDXD_INTC_PERFMON_OVFL 0x08
|
||||
#define IDXD_INTC_HALT_STATE 0x10
|
||||
#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
|
||||
|
||||
#define IDXD_CMD_OFFSET 0xa0
|
||||
union idxd_command_reg {
|
||||
|
@ -284,10 +288,10 @@ union group_flags {
|
|||
u32 tc_a:3;
|
||||
u32 tc_b:3;
|
||||
u32 rsvd:1;
|
||||
u32 use_token_limit:1;
|
||||
u32 tokens_reserved:8;
|
||||
u32 use_rdbuf_limit:1;
|
||||
u32 rdbufs_reserved:8;
|
||||
u32 rsvd2:4;
|
||||
u32 tokens_allowed:8;
|
||||
u32 rdbufs_allowed:8;
|
||||
u32 rsvd3:4;
|
||||
};
|
||||
u32 bits;
|
||||
|
|
|
@ -21,23 +21,6 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
|
|||
if (device_pasid_enabled(idxd))
|
||||
desc->hw->pasid = idxd->pasid;
|
||||
|
||||
/*
|
||||
* Descriptor completion vectors are 1...N for MSIX. We will round
|
||||
* robin through the N vectors.
|
||||
*/
|
||||
wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
|
||||
if (!idxd->int_handles) {
|
||||
desc->hw->int_handle = wq->vec_ptr;
|
||||
} else {
|
||||
/*
|
||||
* int_handles are only for descriptor completion. However for device
|
||||
* MSIX enumeration, vec 0 is used for misc interrupts. Therefore even
|
||||
* though we are rotating through 1...N for descriptor interrupts, we
|
||||
* need to acqurie the int_handles from 0..N-1.
|
||||
*/
|
||||
desc->hw->int_handle = idxd->int_handles[desc->vector - 1];
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
|
@ -67,7 +50,7 @@ struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
|
|||
if (signal_pending_state(TASK_INTERRUPTIBLE, current))
|
||||
break;
|
||||
idx = sbitmap_queue_get(sbq, &cpu);
|
||||
if (idx > 0)
|
||||
if (idx >= 0)
|
||||
break;
|
||||
schedule();
|
||||
}
|
||||
|
@ -114,14 +97,14 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
|
|||
{
|
||||
struct idxd_desc *d, *t, *found = NULL;
|
||||
struct llist_node *head;
|
||||
unsigned long flags;
|
||||
LIST_HEAD(flist);
|
||||
|
||||
desc->completion->status = IDXD_COMP_DESC_ABORT;
|
||||
/*
|
||||
* Grab the list lock so it will block the irq thread handler. This allows the
|
||||
* abort code to locate the descriptor need to be aborted.
|
||||
*/
|
||||
spin_lock_irqsave(&ie->list_lock, flags);
|
||||
spin_lock(&ie->list_lock);
|
||||
head = llist_del_all(&ie->pending_llist);
|
||||
if (head) {
|
||||
llist_for_each_entry_safe(d, t, head, llnode) {
|
||||
|
@ -129,32 +112,74 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
|
|||
found = desc;
|
||||
continue;
|
||||
}
|
||||
list_add_tail(&desc->list, &ie->work_list);
|
||||
|
||||
if (d->completion->status)
|
||||
list_add_tail(&d->list, &flist);
|
||||
else
|
||||
list_add_tail(&d->list, &ie->work_list);
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
found = list_abort_desc(wq, ie, desc);
|
||||
spin_unlock_irqrestore(&ie->list_lock, flags);
|
||||
spin_unlock(&ie->list_lock);
|
||||
|
||||
if (found)
|
||||
complete_desc(found, IDXD_COMPLETE_ABORT);
|
||||
idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, false);
|
||||
|
||||
/*
|
||||
* completing the descriptor will return desc to allocator and
|
||||
* the desc can be acquired by a different process and the
|
||||
* desc->list can be modified. Delete desc from list so the
|
||||
* list trasversing does not get corrupted by the other process.
|
||||
*/
|
||||
list_for_each_entry_safe(d, t, &flist, list) {
|
||||
list_del_init(&d->list);
|
||||
idxd_dma_complete_txd(found, IDXD_COMPLETE_ABORT, true);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* ENQCMDS typically fail when the WQ is inactive or busy. On host submission, the driver
|
||||
* has better control of number of descriptors being submitted to a shared wq by limiting
|
||||
* the number of driver allocated descriptors to the wq size. However, when the swq is
|
||||
* exported to a guest kernel, it may be shared with multiple guest kernels. This means
|
||||
* the likelihood of getting busy returned on the swq when submitting goes significantly up.
|
||||
* Having a tunable retry mechanism allows the driver to keep trying for a bit before giving
|
||||
* up. The sysfs knob can be tuned by the system administrator.
|
||||
*/
|
||||
int idxd_enqcmds(struct idxd_wq *wq, void __iomem *portal, const void *desc)
|
||||
{
|
||||
int rc, retries = 0;
|
||||
|
||||
do {
|
||||
rc = enqcmds(portal, desc);
|
||||
if (rc == 0)
|
||||
break;
|
||||
cpu_relax();
|
||||
} while (retries++ < wq->enqcmds_retries);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
|
||||
{
|
||||
struct idxd_device *idxd = wq->idxd;
|
||||
struct idxd_irq_entry *ie = NULL;
|
||||
u32 desc_flags = desc->hw->flags;
|
||||
void __iomem *portal;
|
||||
int rc;
|
||||
|
||||
if (idxd->state != IDXD_DEV_ENABLED)
|
||||
return -EIO;
|
||||
|
||||
if (!percpu_ref_tryget_live(&wq->wq_active))
|
||||
return -ENXIO;
|
||||
if (!percpu_ref_tryget_live(&wq->wq_active)) {
|
||||
wait_for_completion(&wq->wq_resurrect);
|
||||
if (!percpu_ref_tryget_live(&wq->wq_active))
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
portal = wq->portal;
|
||||
portal = idxd_wq_portal_addr(wq);
|
||||
|
||||
/*
|
||||
* The wmb() flushes writes to coherent DMA data before
|
||||
|
@ -167,22 +192,19 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
|
|||
* Pending the descriptor to the lockless list for the irq_entry
|
||||
* that we designated the descriptor to.
|
||||
*/
|
||||
if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
|
||||
ie = &idxd->irq_entries[desc->vector];
|
||||
if (desc_flags & IDXD_OP_FLAG_RCI) {
|
||||
ie = &wq->ie;
|
||||
desc->hw->int_handle = ie->int_handle;
|
||||
llist_add(&desc->llnode, &ie->pending_llist);
|
||||
}
|
||||
|
||||
if (wq_dedicated(wq)) {
|
||||
iosubmit_cmds512(portal, desc->hw, 1);
|
||||
} else {
|
||||
/*
|
||||
* It's not likely that we would receive queue full rejection
|
||||
* since the descriptor allocation gates at wq size. If we
|
||||
* receive a -EAGAIN, that means something went wrong such as the
|
||||
* device is not accepting descriptor at all.
|
||||
*/
|
||||
rc = enqcmds(portal, desc->hw);
|
||||
rc = idxd_enqcmds(wq, portal, desc->hw);
|
||||
if (rc < 0) {
|
||||
percpu_ref_put(&wq->wq_active);
|
||||
/* abort operation frees the descriptor */
|
||||
if (ie)
|
||||
llist_abort_desc(wq, ie, desc);
|
||||
return rc;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -144,8 +144,8 @@ config IOMMU_DMA
|
|||
select IRQ_MSI_IOMMU
|
||||
select NEED_SG_DMA_LENGTH
|
||||
|
||||
# Shared Virtual Addressing library
|
||||
config IOMMU_SVA_LIB
|
||||
# Shared Virtual Addressing
|
||||
config IOMMU_SVA
|
||||
bool
|
||||
select IOASID
|
||||
|
||||
|
@ -357,7 +357,7 @@ config ARM_SMMU_V3
|
|||
config ARM_SMMU_V3_SVA
|
||||
bool "Shared Virtual Addressing support for the ARM SMMUv3"
|
||||
depends on ARM_SMMU_V3
|
||||
select IOMMU_SVA_LIB
|
||||
select IOMMU_SVA
|
||||
select MMU_NOTIFIER
|
||||
help
|
||||
Support for sharing process address spaces with devices using the
|
||||
|
|
|
@ -27,5 +27,5 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
|
|||
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
|
||||
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
|
||||
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
|
||||
obj-$(CONFIG_IOMMU_SVA_LIB) += iommu-sva-lib.o io-pgfault.o
|
||||
obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
|
||||
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
|
||||
|
|
|
@ -340,14 +340,12 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
|
|||
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
|
||||
if (IS_ERR(bond->smmu_mn)) {
|
||||
ret = PTR_ERR(bond->smmu_mn);
|
||||
goto err_free_pasid;
|
||||
goto err_free_bond;
|
||||
}
|
||||
|
||||
list_add(&bond->list, &master->bonds);
|
||||
return &bond->sva;
|
||||
|
||||
err_free_pasid:
|
||||
iommu_sva_free_pasid(mm);
|
||||
err_free_bond:
|
||||
kfree(bond);
|
||||
return ERR_PTR(ret);
|
||||
|
@ -377,7 +375,6 @@ void arm_smmu_sva_unbind(struct iommu_sva *handle)
|
|||
if (refcount_dec_and_test(&bond->refs)) {
|
||||
list_del(&bond->list);
|
||||
arm_smmu_mmu_notifier_put(bond->smmu_mn);
|
||||
iommu_sva_free_pasid(bond->mm);
|
||||
kfree(bond);
|
||||
}
|
||||
mutex_unlock(&sva_lock);
|
||||
|
|
|
@ -48,7 +48,7 @@ config INTEL_IOMMU_SVM
|
|||
select PCI_PRI
|
||||
select MMU_NOTIFIER
|
||||
select IOASID
|
||||
select IOMMU_SVA_LIB
|
||||
select IOMMU_SVA
|
||||
help
|
||||
Shared Virtual Memory (SVM) provides a facility for devices
|
||||
to access DMA resources through process address space by
|
||||
|
|
|
@ -4692,7 +4692,7 @@ attach_failed:
|
|||
link_failed:
|
||||
spin_unlock_irqrestore(&device_domain_lock, flags);
|
||||
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
|
||||
ioasid_put(domain->default_pasid);
|
||||
ioasid_free(domain->default_pasid);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -4722,7 +4722,7 @@ static void aux_domain_remove_dev(struct dmar_domain *domain,
|
|||
spin_unlock_irqrestore(&device_domain_lock, flags);
|
||||
|
||||
if (list_empty(&domain->subdevices) && domain->default_pasid > 0)
|
||||
ioasid_put(domain->default_pasid);
|
||||
ioasid_free(domain->default_pasid);
|
||||
}
|
||||
|
||||
static int prepare_domain_attach_device(struct iommu_domain *domain,
|
||||
|
|
|
@ -514,11 +514,6 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
|
|||
return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
|
||||
}
|
||||
|
||||
static void intel_svm_free_pasid(struct mm_struct *mm)
|
||||
{
|
||||
iommu_sva_free_pasid(mm);
|
||||
}
|
||||
|
||||
static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
|
||||
struct device *dev,
|
||||
struct mm_struct *mm,
|
||||
|
@ -662,8 +657,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid)
|
|||
kfree(svm);
|
||||
}
|
||||
}
|
||||
/* Drop a PASID reference and free it if no reference. */
|
||||
intel_svm_free_pasid(mm);
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
|
@ -1047,8 +1040,6 @@ struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void
|
|||
}
|
||||
|
||||
sva = intel_svm_bind_mm(iommu, dev, mm, flags);
|
||||
if (IS_ERR_OR_NULL(sva))
|
||||
intel_svm_free_pasid(mm);
|
||||
mutex_unlock(&pasid_mutex);
|
||||
|
||||
return sva;
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
/*
|
||||
* I/O Address Space ID allocator. There is one global IOASID space, split into
|
||||
* subsets. Users create a subset with DECLARE_IOASID_SET, then allocate and
|
||||
* free IOASIDs with ioasid_alloc and ioasid_put.
|
||||
* free IOASIDs with ioasid_alloc() and ioasid_free().
|
||||
*/
|
||||
#include <linux/ioasid.h>
|
||||
#include <linux/module.h>
|
||||
|
@ -15,7 +15,6 @@ struct ioasid_data {
|
|||
struct ioasid_set *set;
|
||||
void *private;
|
||||
struct rcu_head rcu;
|
||||
refcount_t refs;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -315,7 +314,6 @@ ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
|
|||
|
||||
data->set = set;
|
||||
data->private = private;
|
||||
refcount_set(&data->refs, 1);
|
||||
|
||||
/*
|
||||
* Custom allocator needs allocator data to perform platform specific
|
||||
|
@ -348,34 +346,11 @@ exit_free:
|
|||
EXPORT_SYMBOL_GPL(ioasid_alloc);
|
||||
|
||||
/**
|
||||
* ioasid_get - obtain a reference to the IOASID
|
||||
*/
|
||||
void ioasid_get(ioasid_t ioasid)
|
||||
{
|
||||
struct ioasid_data *ioasid_data;
|
||||
|
||||
spin_lock(&ioasid_allocator_lock);
|
||||
ioasid_data = xa_load(&active_allocator->xa, ioasid);
|
||||
if (ioasid_data)
|
||||
refcount_inc(&ioasid_data->refs);
|
||||
else
|
||||
WARN_ON(1);
|
||||
spin_unlock(&ioasid_allocator_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ioasid_get);
|
||||
|
||||
/**
|
||||
* ioasid_put - Release a reference to an ioasid
|
||||
* ioasid_free - Free an ioasid
|
||||
* @ioasid: the ID to remove
|
||||
*
|
||||
* Put a reference to the IOASID, free it when the number of references drops to
|
||||
* zero.
|
||||
*
|
||||
* Return: %true if the IOASID was freed, %false otherwise.
|
||||
*/
|
||||
bool ioasid_put(ioasid_t ioasid)
|
||||
void ioasid_free(ioasid_t ioasid)
|
||||
{
|
||||
bool free = false;
|
||||
struct ioasid_data *ioasid_data;
|
||||
|
||||
spin_lock(&ioasid_allocator_lock);
|
||||
|
@ -385,10 +360,6 @@ bool ioasid_put(ioasid_t ioasid)
|
|||
goto exit_unlock;
|
||||
}
|
||||
|
||||
free = refcount_dec_and_test(&ioasid_data->refs);
|
||||
if (!free)
|
||||
goto exit_unlock;
|
||||
|
||||
active_allocator->ops->free(ioasid, active_allocator->ops->pdata);
|
||||
/* Custom allocator needs additional steps to free the xa element */
|
||||
if (active_allocator->flags & IOASID_ALLOCATOR_CUSTOM) {
|
||||
|
@ -398,9 +369,8 @@ bool ioasid_put(ioasid_t ioasid)
|
|||
|
||||
exit_unlock:
|
||||
spin_unlock(&ioasid_allocator_lock);
|
||||
return free;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ioasid_put);
|
||||
EXPORT_SYMBOL_GPL(ioasid_free);
|
||||
|
||||
/**
|
||||
* ioasid_find - Find IOASID data
|
||||
|
|
|
@ -18,8 +18,7 @@ static DECLARE_IOASID_SET(iommu_sva_pasid);
|
|||
*
|
||||
* Try to allocate a PASID for this mm, or take a reference to the existing one
|
||||
* provided it fits within the [@min, @max] range. On success the PASID is
|
||||
* available in mm->pasid, and must be released with iommu_sva_free_pasid().
|
||||
* @min must be greater than 0, because 0 indicates an unused mm->pasid.
|
||||
* available in mm->pasid and will be available for the lifetime of the mm.
|
||||
*
|
||||
* Returns 0 on success and < 0 on error.
|
||||
*/
|
||||
|
@ -33,38 +32,24 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
|
|||
return -EINVAL;
|
||||
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
if (mm->pasid) {
|
||||
if (mm->pasid >= min && mm->pasid <= max)
|
||||
ioasid_get(mm->pasid);
|
||||
else
|
||||
/* Is a PASID already associated with this mm? */
|
||||
if (pasid_valid(mm->pasid)) {
|
||||
if (mm->pasid < min || mm->pasid >= max)
|
||||
ret = -EOVERFLOW;
|
||||
} else {
|
||||
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
|
||||
if (pasid == INVALID_IOASID)
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
mm->pasid = pasid;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
|
||||
if (!pasid_valid(pasid))
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
mm_pasid_set(mm, pasid);
|
||||
out:
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid);
|
||||
|
||||
/**
|
||||
* iommu_sva_free_pasid - Release the mm's PASID
|
||||
* @mm: the mm
|
||||
*
|
||||
* Drop one reference to a PASID allocated with iommu_sva_alloc_pasid()
|
||||
*/
|
||||
void iommu_sva_free_pasid(struct mm_struct *mm)
|
||||
{
|
||||
mutex_lock(&iommu_sva_lock);
|
||||
if (ioasid_put(mm->pasid))
|
||||
mm->pasid = 0;
|
||||
mutex_unlock(&iommu_sva_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(iommu_sva_free_pasid);
|
||||
|
||||
/* ioasid_find getter() requires a void * argument */
|
||||
static bool __mmget_not_zero(void *mm)
|
||||
{
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
#include <linux/mm_types.h>
|
||||
|
||||
int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max);
|
||||
void iommu_sva_free_pasid(struct mm_struct *mm);
|
||||
struct mm_struct *iommu_sva_find(ioasid_t pasid);
|
||||
|
||||
/* I/O Page fault */
|
||||
|
@ -17,7 +16,7 @@ struct device;
|
|||
struct iommu_fault;
|
||||
struct iopf_queue;
|
||||
|
||||
#ifdef CONFIG_IOMMU_SVA_LIB
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
int iommu_queue_iopf(struct iommu_fault *fault, void *cookie);
|
||||
|
||||
int iopf_queue_add_device(struct iopf_queue *queue, struct device *dev);
|
||||
|
@ -28,7 +27,7 @@ struct iopf_queue *iopf_queue_alloc(const char *name);
|
|||
void iopf_queue_free(struct iopf_queue *queue);
|
||||
int iopf_queue_discard_partial(struct iopf_queue *queue);
|
||||
|
||||
#else /* CONFIG_IOMMU_SVA_LIB */
|
||||
#else /* CONFIG_IOMMU_SVA */
|
||||
static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
|
||||
{
|
||||
return -ENODEV;
|
||||
|
@ -64,5 +63,5 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
|
|||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
#endif /* CONFIG_IOMMU_SVA_LIB */
|
||||
#endif /* CONFIG_IOMMU_SVA */
|
||||
#endif /* _IOMMU_SVA_LIB_H */
|
||||
|
|
|
@ -34,13 +34,16 @@ struct ioasid_allocator_ops {
|
|||
#if IS_ENABLED(CONFIG_IOASID)
|
||||
ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, ioasid_t max,
|
||||
void *private);
|
||||
void ioasid_get(ioasid_t ioasid);
|
||||
bool ioasid_put(ioasid_t ioasid);
|
||||
void ioasid_free(ioasid_t ioasid);
|
||||
void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
|
||||
bool (*getter)(void *));
|
||||
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
|
||||
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
|
||||
int ioasid_set_data(ioasid_t ioasid, void *data);
|
||||
static inline bool pasid_valid(ioasid_t ioasid)
|
||||
{
|
||||
return ioasid != INVALID_IOASID;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_IOASID */
|
||||
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
|
||||
|
@ -49,14 +52,7 @@ static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
|
|||
return INVALID_IOASID;
|
||||
}
|
||||
|
||||
static inline void ioasid_get(ioasid_t ioasid)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool ioasid_put(ioasid_t ioasid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void ioasid_free(ioasid_t ioasid) { }
|
||||
|
||||
static inline void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
|
||||
bool (*getter)(void *))
|
||||
|
@ -78,5 +74,10 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
|
|||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
static inline bool pasid_valid(ioasid_t ioasid)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* CONFIG_IOASID */
|
||||
#endif /* __LINUX_IOASID_H */
|
||||
|
|
|
@ -661,7 +661,7 @@ struct mm_struct {
|
|||
#endif
|
||||
struct work_struct async_put_work;
|
||||
|
||||
#ifdef CONFIG_IOMMU_SUPPORT
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
u32 pasid;
|
||||
#endif
|
||||
} __randomize_layout;
|
||||
|
|
|
@ -955,6 +955,9 @@ struct task_struct {
|
|||
/* Recursion prevention for eventfd_signal() */
|
||||
unsigned in_eventfd_signal:1;
|
||||
#endif
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
RH_KABI_FILL_HOLE(unsigned pasid_activated:1)
|
||||
#endif
|
||||
|
||||
unsigned long atomic_flags; /* Flags requiring atomic access. */
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/mm_types.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/sync_core.h>
|
||||
#include <linux/ioasid.h>
|
||||
|
||||
/*
|
||||
* Routines for handling mm_structs
|
||||
|
@ -407,4 +408,29 @@ static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
static inline void mm_pasid_init(struct mm_struct *mm)
|
||||
{
|
||||
mm->pasid = INVALID_IOASID;
|
||||
}
|
||||
|
||||
/* Associate a PASID with an mm_struct: */
|
||||
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
|
||||
{
|
||||
mm->pasid = pasid;
|
||||
}
|
||||
|
||||
static inline void mm_pasid_drop(struct mm_struct *mm)
|
||||
{
|
||||
if (pasid_valid(mm->pasid)) {
|
||||
ioasid_free(mm->pasid);
|
||||
mm->pasid = INVALID_IOASID;
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void mm_pasid_init(struct mm_struct *mm) {}
|
||||
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
|
||||
static inline void mm_pasid_drop(struct mm_struct *mm) {}
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_SCHED_MM_H */
|
||||
|
|
|
@ -9,6 +9,31 @@
|
|||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
/* Driver command error status */
|
||||
enum idxd_scmd_stat {
|
||||
IDXD_SCMD_DEV_ENABLED = 0x80000010,
|
||||
IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020,
|
||||
IDXD_SCMD_WQ_ENABLED = 0x80000021,
|
||||
IDXD_SCMD_DEV_DMA_ERR = 0x80020000,
|
||||
IDXD_SCMD_WQ_NO_GRP = 0x80030000,
|
||||
IDXD_SCMD_WQ_NO_NAME = 0x80040000,
|
||||
IDXD_SCMD_WQ_NO_SVM = 0x80050000,
|
||||
IDXD_SCMD_WQ_NO_THRESH = 0x80060000,
|
||||
IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000,
|
||||
IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000,
|
||||
IDXD_SCMD_PERCPU_ERR = 0x80090000,
|
||||
IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000,
|
||||
IDXD_SCMD_CDEV_ERR = 0x800b0000,
|
||||
IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000,
|
||||
IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
|
||||
IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
|
||||
IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
|
||||
IDXD_SCMD_WQ_IRQ_ERR = 0x80100000,
|
||||
};
|
||||
|
||||
#define IDXD_SCMD_SOFTERR_MASK 0x80000000
|
||||
#define IDXD_SCMD_SOFTERR_SHIFT 16
|
||||
|
||||
/* Descriptor flags */
|
||||
#define IDXD_OP_FLAG_FENCE 0x0001
|
||||
#define IDXD_OP_FLAG_BOF 0x0002
|
||||
|
|
|
@ -96,6 +96,7 @@
|
|||
#include <linux/scs.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
@ -966,6 +967,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
|||
tsk->use_memdelay = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
tsk->pasid_activated = 0;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_MEMCG
|
||||
tsk->active_memcg = NULL;
|
||||
#endif
|
||||
|
@ -1018,13 +1023,6 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
|||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_pasid(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_IOMMU_SUPPORT
|
||||
mm->pasid = INIT_PASID;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_uprobes_state(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_UPROBES
|
||||
|
@ -1054,7 +1052,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
|||
mm_init_cpumask(mm);
|
||||
mm_init_aio(mm);
|
||||
mm_init_owner(mm, p);
|
||||
mm_init_pasid(mm);
|
||||
mm_pasid_init(mm);
|
||||
RCU_INIT_POINTER(mm->exe_file, NULL);
|
||||
mmu_notifier_subscriptions_init(mm);
|
||||
init_tlb_flush_pending(mm);
|
||||
|
@ -1121,6 +1119,7 @@ static inline void __mmput(struct mm_struct *mm)
|
|||
}
|
||||
if (mm->binfmt)
|
||||
module_put(mm->binfmt->module);
|
||||
mm_pasid_drop(mm);
|
||||
mmdrop(mm);
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/user_namespace.h>
|
||||
#include <linux/ioasid.h>
|
||||
#include <asm/mmu.h>
|
||||
|
||||
#ifndef INIT_MM_CONTEXT
|
||||
|
@ -38,6 +39,9 @@ struct mm_struct init_mm = {
|
|||
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
|
||||
.user_ns = &init_user_ns,
|
||||
.cpu_bitmap = CPU_BITS_NONE,
|
||||
#ifdef CONFIG_IOMMU_SVA
|
||||
.pasid = INVALID_IOASID,
|
||||
#endif
|
||||
INIT_MM_CONTEXT(init_mm)
|
||||
};
|
||||
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
# CONFIG_INTEL_IDXD_COMPAT is not set
|
|
@ -110,7 +110,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
|
|||
{
|
||||
struct insn insn;
|
||||
int x86_64, ret;
|
||||
unsigned char op1, op2,
|
||||
unsigned char op1, op2, op3,
|
||||
rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
|
||||
modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
|
||||
sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
|
||||
|
@ -137,6 +137,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
|
|||
|
||||
op1 = insn.opcode.bytes[0];
|
||||
op2 = insn.opcode.bytes[1];
|
||||
op3 = insn.opcode.bytes[2];
|
||||
|
||||
if (insn.rex_prefix.nbytes) {
|
||||
rex = insn.rex_prefix.bytes[0];
|
||||
|
@ -489,6 +490,14 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
|
|||
/* nopl/nopw */
|
||||
*type = INSN_NOP;
|
||||
|
||||
} else if (op2 == 0x38 && op3 == 0xf8) {
|
||||
if (insn.prefixes.nbytes == 1 &&
|
||||
insn.prefixes.bytes[0] == 0xf2) {
|
||||
/* ENQCMD cannot be used in the kernel. */
|
||||
WARN("ENQCMD instruction at %s:%lx", sec->name,
|
||||
offset);
|
||||
}
|
||||
|
||||
} else if (op2 == 0xa0 || op2 == 0xa8) {
|
||||
|
||||
/* push fs/gs */
|
||||
|
|
Loading…
Reference in New Issue