Merge: [RHEL9.6] Recent upstream fixes for IOMMU, and DMAEngine subsystems

MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/6107

# Merge Request Required Information

JIRA: https://issues.redhat.com/browse/RHEL-73035
JIRA: https://issues.redhat.com/browse/RHEL-73037
Upstream Status: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
CVE: CVE-2024-53232
CVE: CVE-2024-56568
CVE: CVE-2024-56624
CVE: CVE-2024-56668
CVE: CVE-2024-56669

Omitted-fix: 97cb1fa0272646c2a033b05338bb8e0260879968
  This depends on some other changes, and is being marked as a fix because it removes something no longer needed once
  the other code is in place. I need to look at the other changes, but if they are going in, they will go in a separate
  MR.

## Summary of Changes

Recent fixes that have landed upstream that touch drivers we support in the IOMMU, and DMAEngine subsystems, including:

* Some fixes for dumping io page table info when a DMAR fault occurs for Intel.
* Locking fix for AMD v2 page table invalidation.
* Fix missed allocation for nested domains in qi batching code which could lead to null pointer dereference.
* Defer client probing in arm-smmu to avoid of_dma_configure being called prior to driver_bound resulting in null pointer dereference.
* Implement blocking domain for s390.
* dmaengine tegra: return correct status when paused.
* dmaengine dw: select only supported masters for acpi devices.

## Approved Development Ticket(s)
All submissions to CentOS Stream must reference a ticket in [Red Hat Jira](https://issues.redhat.com/).

<details><summary>Click for formatting instructions</summary>
Please follow the CentOS Stream [contribution documentation](https://docs.centos.org/en-US/stream-contrib/quickstart/) for how to file this ticket and have it approved.

List tickets each on their own line of this description using the format "Resolves: RHEL-1234", "Related: RHEL-2345" or "Reverts: RHEL-3456", as appropriate.
</details>

Signed-off-by: Jerry Snitselaar <jsnitsel@redhat.com>

Approved-by: Donald Dutile <ddutile@redhat.com>
Approved-by: Brian Masney <bmasney@redhat.com>
Approved-by: Eder Zulian <ezulian@redhat.com>
Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com>

Merged-by: Patrick Talbert <ptalbert@redhat.com>
This commit is contained in:
Patrick Talbert 2025-01-31 10:07:31 -05:00
commit 52fab82b8b
19 changed files with 236 additions and 94 deletions

View File

@ -96,7 +96,6 @@ struct zpci_bar_struct {
u8 size; /* order 2 exponent */ u8 size; /* order 2 exponent */
}; };
struct s390_domain;
struct kvm_zdev; struct kvm_zdev;
#define ZPCI_FUNCTIONS_PER_BUS 256 #define ZPCI_FUNCTIONS_PER_BUS 256
@ -185,9 +184,10 @@ struct zpci_dev {
struct dentry *debugfs_dev; struct dentry *debugfs_dev;
/* IOMMU and passthrough */ /* IOMMU and passthrough */
struct s390_domain *s390_domain; /* s390 IOMMU domain data */ struct iommu_domain *s390_domain; /* attached IOMMU domain */
struct kvm_zdev *kzdev; struct kvm_zdev *kzdev;
struct mutex kzdev_lock; struct mutex kzdev_lock;
spinlock_t dom_lock; /* protect s390_domain change */
}; };
static inline bool zdev_enabled(struct zpci_dev *zdev) static inline bool zdev_enabled(struct zpci_dev *zdev)

View File

@ -160,6 +160,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
struct zpci_iommu_ctrs *ctrs; struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0}; struct zpci_fib fib = {0};
unsigned long flags;
u8 cc, status; u8 cc, status;
if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length) if (zdev->fmb || sizeof(*zdev->fmb) < zdev->fmb_length)
@ -171,6 +172,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf); WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */ /* reset software counters */
spin_lock_irqsave(&zdev->dom_lock, flags);
ctrs = zpci_get_iommu_ctrs(zdev); ctrs = zpci_get_iommu_ctrs(zdev);
if (ctrs) { if (ctrs) {
atomic64_set(&ctrs->mapped_pages, 0); atomic64_set(&ctrs->mapped_pages, 0);
@ -179,6 +181,7 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
atomic64_set(&ctrs->sync_map_rpcits, 0); atomic64_set(&ctrs->sync_map_rpcits, 0);
atomic64_set(&ctrs->sync_rpcits, 0); atomic64_set(&ctrs->sync_rpcits, 0);
} }
spin_unlock_irqrestore(&zdev->dom_lock, flags);
fib.fmb_addr = virt_to_phys(zdev->fmb); fib.fmb_addr = virt_to_phys(zdev->fmb);

View File

@ -71,17 +71,23 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m) static void pci_sw_counter_show(struct seq_file *m)
{ {
struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private); struct zpci_dev *zdev = m->private;
struct zpci_iommu_ctrs *ctrs;
atomic64_t *counter; atomic64_t *counter;
unsigned long flags;
int i; int i;
spin_lock_irqsave(&zdev->dom_lock, flags);
ctrs = zpci_get_iommu_ctrs(m->private);
if (!ctrs) if (!ctrs)
return; goto unlock;
counter = &ctrs->mapped_pages; counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter)); atomic64_read(counter));
unlock:
spin_unlock_irqrestore(&zdev->dom_lock, flags);
} }
static int pci_perf_show(struct seq_file *m, void *v) static int pci_perf_show(struct seq_file *m, void *v)

View File

@ -70,10 +70,14 @@ static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
si = (const struct acpi_csrt_shared_info *)&grp[1]; si = (const struct acpi_csrt_shared_info *)&grp[1];
/* Match device by MMIO and IRQ */ /* Match device by MMIO */
if (si->mmio_base_low != lower_32_bits(mem) || if (si->mmio_base_low != lower_32_bits(mem) ||
si->mmio_base_high != upper_32_bits(mem) || si->mmio_base_high != upper_32_bits(mem))
si->gsi_interrupt != irq) return 0;
/* Match device by Linux vIRQ */
ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity);
if (ret != irq)
return 0; return 0;
dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n", dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",

View File

@ -8,13 +8,15 @@
static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param) static bool dw_dma_acpi_filter(struct dma_chan *chan, void *param)
{ {
struct dw_dma *dw = to_dw_dma(chan->device);
struct dw_dma_chip_pdata *data = dev_get_drvdata(dw->dma.dev);
struct acpi_dma_spec *dma_spec = param; struct acpi_dma_spec *dma_spec = param;
struct dw_dma_slave slave = { struct dw_dma_slave slave = {
.dma_dev = dma_spec->dev, .dma_dev = dma_spec->dev,
.src_id = dma_spec->slave_id, .src_id = dma_spec->slave_id,
.dst_id = dma_spec->slave_id, .dst_id = dma_spec->slave_id,
.m_master = 0, .m_master = data->m_master,
.p_master = 1, .p_master = data->p_master,
}; };
return dw_dma_filter(chan, &slave); return dw_dma_filter(chan, &slave);

View File

@ -51,11 +51,15 @@ struct dw_dma_chip_pdata {
int (*probe)(struct dw_dma_chip *chip); int (*probe)(struct dw_dma_chip *chip);
int (*remove)(struct dw_dma_chip *chip); int (*remove)(struct dw_dma_chip *chip);
struct dw_dma_chip *chip; struct dw_dma_chip *chip;
u8 m_master;
u8 p_master;
}; };
static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = { static __maybe_unused const struct dw_dma_chip_pdata dw_dma_chip_pdata = {
.probe = dw_dma_probe, .probe = dw_dma_probe,
.remove = dw_dma_remove, .remove = dw_dma_remove,
.m_master = 0,
.p_master = 1,
}; };
static const struct dw_dma_platform_data idma32_pdata = { static const struct dw_dma_platform_data idma32_pdata = {
@ -72,6 +76,8 @@ static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
.pdata = &idma32_pdata, .pdata = &idma32_pdata,
.probe = idma32_dma_probe, .probe = idma32_dma_probe,
.remove = idma32_dma_remove, .remove = idma32_dma_remove,
.m_master = 0,
.p_master = 0,
}; };
static const struct dw_dma_platform_data xbar_pdata = { static const struct dw_dma_platform_data xbar_pdata = {
@ -88,6 +94,8 @@ static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
.pdata = &xbar_pdata, .pdata = &xbar_pdata,
.probe = idma32_dma_probe, .probe = idma32_dma_probe,
.remove = idma32_dma_remove, .remove = idma32_dma_remove,
.m_master = 0,
.p_master = 0,
}; };
#endif /* _DMA_DW_INTERNAL_H */ #endif /* _DMA_DW_INTERNAL_H */

View File

@ -56,10 +56,10 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
if (ret) if (ret)
return ret; return ret;
dw_dma_acpi_controller_register(chip->dw);
pci_set_drvdata(pdev, data); pci_set_drvdata(pdev, data);
dw_dma_acpi_controller_register(chip->dw);
return 0; return 0;
} }

View File

@ -601,22 +601,25 @@ static int rz_dmac_config(struct dma_chan *chan,
struct rz_dmac_chan *channel = to_rz_dmac_chan(chan); struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
u32 val; u32 val;
channel->src_per_address = config->src_addr;
channel->dst_per_address = config->dst_addr; channel->dst_per_address = config->dst_addr;
val = rz_dmac_ds_to_val_mapping(config->dst_addr_width);
if (val == CHCFG_DS_INVALID)
return -EINVAL;
channel->chcfg &= ~CHCFG_FILL_DDS_MASK; channel->chcfg &= ~CHCFG_FILL_DDS_MASK;
channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val); if (channel->dst_per_address) {
val = rz_dmac_ds_to_val_mapping(config->dst_addr_width);
if (val == CHCFG_DS_INVALID)
return -EINVAL;
val = rz_dmac_ds_to_val_mapping(config->src_addr_width); channel->chcfg |= FIELD_PREP(CHCFG_FILL_DDS_MASK, val);
if (val == CHCFG_DS_INVALID) }
return -EINVAL;
channel->src_per_address = config->src_addr;
channel->chcfg &= ~CHCFG_FILL_SDS_MASK; channel->chcfg &= ~CHCFG_FILL_SDS_MASK;
channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val); if (channel->src_per_address) {
val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
if (val == CHCFG_DS_INVALID)
return -EINVAL;
channel->chcfg |= FIELD_PREP(CHCFG_FILL_SDS_MASK, val);
}
return 0; return 0;
} }

View File

@ -231,6 +231,7 @@ struct tegra_dma_channel {
bool config_init; bool config_init;
char name[30]; char name[30];
enum dma_transfer_direction sid_dir; enum dma_transfer_direction sid_dir;
enum dma_status status;
int id; int id;
int irq; int irq;
int slave_id; int slave_id;
@ -393,6 +394,8 @@ static int tegra_dma_pause(struct tegra_dma_channel *tdc)
tegra_dma_dump_chan_regs(tdc); tegra_dma_dump_chan_regs(tdc);
} }
tdc->status = DMA_PAUSED;
return ret; return ret;
} }
@ -419,6 +422,8 @@ static void tegra_dma_resume(struct tegra_dma_channel *tdc)
val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE); val = tdc_read(tdc, TEGRA_GPCDMA_CHAN_CSRE);
val &= ~TEGRA_GPCDMA_CHAN_CSRE_PAUSE; val &= ~TEGRA_GPCDMA_CHAN_CSRE_PAUSE;
tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val); tdc_write(tdc, TEGRA_GPCDMA_CHAN_CSRE, val);
tdc->status = DMA_IN_PROGRESS;
} }
static int tegra_dma_device_resume(struct dma_chan *dc) static int tegra_dma_device_resume(struct dma_chan *dc)
@ -544,6 +549,7 @@ static void tegra_dma_xfer_complete(struct tegra_dma_channel *tdc)
tegra_dma_sid_free(tdc); tegra_dma_sid_free(tdc);
tdc->dma_desc = NULL; tdc->dma_desc = NULL;
tdc->status = DMA_COMPLETE;
} }
static void tegra_dma_chan_decode_error(struct tegra_dma_channel *tdc, static void tegra_dma_chan_decode_error(struct tegra_dma_channel *tdc,
@ -716,6 +722,7 @@ static int tegra_dma_terminate_all(struct dma_chan *dc)
tdc->dma_desc = NULL; tdc->dma_desc = NULL;
} }
tdc->status = DMA_COMPLETE;
tegra_dma_sid_free(tdc); tegra_dma_sid_free(tdc);
vchan_get_all_descriptors(&tdc->vc, &head); vchan_get_all_descriptors(&tdc->vc, &head);
spin_unlock_irqrestore(&tdc->vc.lock, flags); spin_unlock_irqrestore(&tdc->vc.lock, flags);
@ -769,6 +776,9 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
if (ret == DMA_COMPLETE) if (ret == DMA_COMPLETE)
return ret; return ret;
if (tdc->status == DMA_PAUSED)
ret = DMA_PAUSED;
spin_lock_irqsave(&tdc->vc.lock, flags); spin_lock_irqsave(&tdc->vc.lock, flags);
vd = vchan_find_desc(&tdc->vc, cookie); vd = vchan_find_desc(&tdc->vc, cookie);
if (vd) { if (vd) {

View File

@ -3186,27 +3186,40 @@ static int udma_configure_statictr(struct udma_chan *uc, struct udma_desc *d,
d->static_tr.elcnt = elcnt; d->static_tr.elcnt = elcnt;
/*
* PDMA must to close the packet when the channel is in packet mode.
* For TR mode when the channel is not cyclic we also need PDMA to close
* the packet otherwise the transfer will stall because PDMA holds on
* the data it has received from the peripheral.
*/
if (uc->config.pkt_mode || !uc->cyclic) { if (uc->config.pkt_mode || !uc->cyclic) {
/*
* PDMA must close the packet when the channel is in packet mode.
* For TR mode when the channel is not cyclic we also need PDMA
* to close the packet otherwise the transfer will stall because
* PDMA holds on the data it has received from the peripheral.
*/
unsigned int div = dev_width * elcnt; unsigned int div = dev_width * elcnt;
if (uc->cyclic) if (uc->cyclic)
d->static_tr.bstcnt = d->residue / d->sglen / div; d->static_tr.bstcnt = d->residue / d->sglen / div;
else else
d->static_tr.bstcnt = d->residue / div; d->static_tr.bstcnt = d->residue / div;
} else if (uc->ud->match_data->type == DMA_TYPE_BCDMA &&
uc->config.dir == DMA_DEV_TO_MEM &&
uc->cyclic) {
/*
* For cyclic mode with BCDMA we have to set EOP in each TR to
* prevent short packet errors seen on channel teardown. So the
* PDMA must close the packet after every TR transfer by setting
* burst count equal to the number of bytes transferred.
*/
struct cppi5_tr_type1_t *tr_req = d->hwdesc[0].tr_req_base;
if (uc->config.dir == DMA_DEV_TO_MEM && d->static_tr.bstcnt =
d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask) (tr_req->icnt0 * tr_req->icnt1) / dev_width;
return -EINVAL;
} else { } else {
d->static_tr.bstcnt = 0; d->static_tr.bstcnt = 0;
} }
if (uc->config.dir == DMA_DEV_TO_MEM &&
d->static_tr.bstcnt > uc->ud->match_data->statictr_z_mask)
return -EINVAL;
return 0; return 0;
} }
@ -3451,8 +3464,9 @@ udma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
/* static TR for remote PDMA */ /* static TR for remote PDMA */
if (udma_configure_statictr(uc, d, dev_width, burst)) { if (udma_configure_statictr(uc, d, dev_width, burst)) {
dev_err(uc->ud->dev, dev_err(uc->ud->dev,
"%s: StaticTR Z is limited to maximum 4095 (%u)\n", "%s: StaticTR Z is limited to maximum %u (%u)\n",
__func__, d->static_tr.bstcnt); __func__, uc->ud->match_data->statictr_z_mask,
d->static_tr.bstcnt);
udma_free_hwdesc(uc, d); udma_free_hwdesc(uc, d);
kfree(d); kfree(d);
@ -3477,6 +3491,7 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
u16 tr0_cnt0, tr0_cnt1, tr1_cnt0; u16 tr0_cnt0, tr0_cnt1, tr1_cnt0;
unsigned int i; unsigned int i;
int num_tr; int num_tr;
u32 period_csf = 0;
num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0, num_tr = udma_get_tr_counters(period_len, __ffs(buf_addr), &tr0_cnt0,
&tr0_cnt1, &tr1_cnt0); &tr0_cnt1, &tr1_cnt0);
@ -3499,6 +3514,20 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
period_addr = buf_addr | period_addr = buf_addr |
((u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT); ((u64)uc->config.asel << K3_ADDRESS_ASEL_SHIFT);
/*
* For BCDMA <-> PDMA transfers, the EOP flag needs to be set on the
* last TR of a descriptor, to mark the packet as complete.
* This is required for getting the teardown completion message in case
* of TX, and to avoid short-packet error in case of RX.
*
* As we are in cyclic mode, we do not know which period might be the
* last one, so set the flag for each period.
*/
if (uc->config.ep_type == PSIL_EP_PDMA_XY &&
uc->ud->match_data->type == DMA_TYPE_BCDMA) {
period_csf = CPPI5_TR_CSF_EOP;
}
for (i = 0; i < periods; i++) { for (i = 0; i < periods; i++) {
int tr_idx = i * num_tr; int tr_idx = i * num_tr;
@ -3526,8 +3555,10 @@ udma_prep_dma_cyclic_tr(struct udma_chan *uc, dma_addr_t buf_addr,
} }
if (!(flags & DMA_PREP_INTERRUPT)) if (!(flags & DMA_PREP_INTERRUPT))
cppi5_tr_csf_set(&tr_req[tr_idx].flags, period_csf |= CPPI5_TR_CSF_SUPR_EVT;
CPPI5_TR_CSF_SUPR_EVT);
if (period_csf)
cppi5_tr_csf_set(&tr_req[tr_idx].flags, period_csf);
period_addr += period_len; period_addr += period_len;
} }
@ -3656,8 +3687,9 @@ udma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
/* static TR for remote PDMA */ /* static TR for remote PDMA */
if (udma_configure_statictr(uc, d, dev_width, burst)) { if (udma_configure_statictr(uc, d, dev_width, burst)) {
dev_err(uc->ud->dev, dev_err(uc->ud->dev,
"%s: StaticTR Z is limited to maximum 4095 (%u)\n", "%s: StaticTR Z is limited to maximum %u (%u)\n",
__func__, d->static_tr.bstcnt); __func__, uc->ud->match_data->statictr_z_mask,
d->static_tr.bstcnt);
udma_free_hwdesc(uc, d); udma_free_hwdesc(uc, d);
kfree(d); kfree(d);

View File

@ -268,8 +268,11 @@ static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
out: out:
if (updated) { if (updated) {
struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); struct protection_domain *pdom = io_pgtable_ops_to_domain(ops);
unsigned long flags;
spin_lock_irqsave(&pdom->lock, flags);
amd_iommu_domain_flush_pages(pdom, o_iova, size); amd_iommu_domain_flush_pages(pdom, o_iova, size);
spin_unlock_irqrestore(&pdom->lock, flags);
} }
if (mapped) if (mapped)

View File

@ -801,7 +801,9 @@ out_fallback:
return 0; return 0;
} }
struct dentry *cmdqv_debugfs_dir; #ifdef CONFIG_IOMMU_DEBUGFS
static struct dentry *cmdqv_debugfs_dir;
#endif
static struct arm_smmu_device * static struct arm_smmu_device *
__tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res, __tegra241_cmdqv_probe(struct arm_smmu_device *smmu, struct resource *res,

View File

@ -1437,6 +1437,17 @@ static struct iommu_device *arm_smmu_probe_device(struct device *dev)
goto out_free; goto out_free;
} else { } else {
smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode); smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
/*
* Defer probe if the relevant SMMU instance hasn't finished
* probing yet. This is a fragile hack and we'd ideally
* avoid this race in the core code. Until that's ironed
* out, however, this is the most pragmatic option on the
* table.
*/
if (!smmu)
return ERR_PTR(dev_err_probe(dev, -EPROBE_DEFER,
"smmu dev has not bound yet\n"));
} }
ret = -EINVAL; ret = -EINVAL;

View File

@ -105,12 +105,35 @@ static void cache_tag_unassign(struct dmar_domain *domain, u16 did,
spin_unlock_irqrestore(&domain->cache_lock, flags); spin_unlock_irqrestore(&domain->cache_lock, flags);
} }
/* domain->qi_batch will be freed in iommu_free_domain() path. */
static int domain_qi_batch_alloc(struct dmar_domain *domain)
{
unsigned long flags;
int ret = 0;
spin_lock_irqsave(&domain->cache_lock, flags);
if (domain->qi_batch)
goto out_unlock;
domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_ATOMIC);
if (!domain->qi_batch)
ret = -ENOMEM;
out_unlock:
spin_unlock_irqrestore(&domain->cache_lock, flags);
return ret;
}
static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did, static int __cache_tag_assign_domain(struct dmar_domain *domain, u16 did,
struct device *dev, ioasid_t pasid) struct device *dev, ioasid_t pasid)
{ {
struct device_domain_info *info = dev_iommu_priv_get(dev); struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret; int ret;
ret = domain_qi_batch_alloc(domain);
if (ret)
return ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB); ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_IOTLB);
if (ret || !info->ats_enabled) if (ret || !info->ats_enabled)
return ret; return ret;
@ -139,6 +162,10 @@ static int __cache_tag_assign_parent_domain(struct dmar_domain *domain, u16 did,
struct device_domain_info *info = dev_iommu_priv_get(dev); struct device_domain_info *info = dev_iommu_priv_get(dev);
int ret; int ret;
ret = domain_qi_batch_alloc(domain);
if (ret)
return ret;
ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB); ret = cache_tag_assign(domain, did, dev, pasid, CACHE_TAG_NESTING_IOTLB);
if (ret || !info->ats_enabled) if (ret || !info->ats_enabled)
return ret; return ret;
@ -190,13 +217,6 @@ int cache_tag_assign_domain(struct dmar_domain *domain,
u16 did = domain_get_id_for_dev(domain, dev); u16 did = domain_get_id_for_dev(domain, dev);
int ret; int ret;
/* domain->qi_bach will be freed in iommu_free_domain() path. */
if (!domain->qi_batch) {
domain->qi_batch = kzalloc(sizeof(*domain->qi_batch), GFP_KERNEL);
if (!domain->qi_batch)
return -ENOMEM;
}
ret = __cache_tag_assign_domain(domain, did, dev, pasid); ret = __cache_tag_assign_domain(domain, did, dev, pasid);
if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED) if (ret || domain->domain.type != IOMMU_DOMAIN_NESTED)
return ret; return ret;

View File

@ -707,14 +707,15 @@ static void pgtable_walk(struct intel_iommu *iommu, unsigned long pfn,
while (1) { while (1) {
offset = pfn_level_offset(pfn, level); offset = pfn_level_offset(pfn, level);
pte = &parent[offset]; pte = &parent[offset];
if (!pte || (dma_pte_superpage(pte) || !dma_pte_present(pte))) {
pr_info("PTE not present at level %d\n", level);
break;
}
pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val); pr_info("pte level: %d, pte value: 0x%016llx\n", level, pte->val);
if (level == 1) if (!dma_pte_present(pte)) {
pr_info("page table not present at level %d\n", level - 1);
break;
}
if (level == 1 || dma_pte_superpage(pte))
break; break;
parent = phys_to_virt(dma_pte_addr(pte)); parent = phys_to_virt(dma_pte_addr(pte));
@ -737,11 +738,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr); pr_info("Dump %s table entries for IOVA 0x%llx\n", iommu->name, addr);
/* root entry dump */ /* root entry dump */
rt_entry = &iommu->root_entry[bus]; if (!iommu->root_entry) {
if (!rt_entry) { pr_info("root table is not present\n");
pr_info("root table entry is not present\n");
return; return;
} }
rt_entry = &iommu->root_entry[bus];
if (sm_supported(iommu)) if (sm_supported(iommu))
pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n", pr_info("scalable mode root entry: hi 0x%016llx, low 0x%016llx\n",
@ -752,7 +753,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* context entry dump */ /* context entry dump */
ctx_entry = iommu_context_addr(iommu, bus, devfn, 0); ctx_entry = iommu_context_addr(iommu, bus, devfn, 0);
if (!ctx_entry) { if (!ctx_entry) {
pr_info("context table entry is not present\n"); pr_info("context table is not present\n");
return; return;
} }
@ -761,17 +762,23 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* legacy mode does not require PASID entries */ /* legacy mode does not require PASID entries */
if (!sm_supported(iommu)) { if (!sm_supported(iommu)) {
if (!context_present(ctx_entry)) {
pr_info("legacy mode page table is not present\n");
return;
}
level = agaw_to_level(ctx_entry->hi & 7); level = agaw_to_level(ctx_entry->hi & 7);
pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); pgtable = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
goto pgtable_walk; goto pgtable_walk;
} }
/* get the pointer to pasid directory entry */ if (!context_present(ctx_entry)) {
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK); pr_info("pasid directory table is not present\n");
if (!dir) {
pr_info("pasid directory entry is not present\n");
return; return;
} }
/* get the pointer to pasid directory entry */
dir = phys_to_virt(ctx_entry->lo & VTD_PAGE_MASK);
/* For request-without-pasid, get the pasid from context entry */ /* For request-without-pasid, get the pasid from context entry */
if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID)
pasid = IOMMU_NO_PASID; pasid = IOMMU_NO_PASID;
@ -783,7 +790,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
/* get the pointer to the pasid table entry */ /* get the pointer to the pasid table entry */
entries = get_pasid_table_from_pde(pde); entries = get_pasid_table_from_pde(pde);
if (!entries) { if (!entries) {
pr_info("pasid table entry is not present\n"); pr_info("pasid table is not present\n");
return; return;
} }
index = pasid & PASID_PTE_MASK; index = pasid & PASID_PTE_MASK;
@ -791,6 +798,11 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id,
for (i = 0; i < ARRAY_SIZE(pte->val); i++) for (i = 0; i < ARRAY_SIZE(pte->val); i++)
pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]); pr_info("pasid table entry[%d]: 0x%016llx\n", i, pte->val[i]);
if (!pasid_pte_is_present(pte)) {
pr_info("scalable mode page table is not present\n");
return;
}
if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) { if (pasid_pte_get_pgtt(pte) == PASID_ENTRY_PGTT_FL_ONLY) {
level = pte->val[2] & BIT_ULL(2) ? 5 : 4; level = pte->val[2] & BIT_ULL(2) ? 5 : 4;
pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK); pgtable = phys_to_virt(pte->val[2] & VTD_PAGE_MASK);
@ -3360,6 +3372,9 @@ void device_block_translation(struct device *dev)
struct intel_iommu *iommu = info->iommu; struct intel_iommu *iommu = info->iommu;
unsigned long flags; unsigned long flags;
if (info->domain)
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
iommu_disable_pci_caps(info); iommu_disable_pci_caps(info);
if (!dev_is_real_dma_subdevice(dev)) { if (!dev_is_real_dma_subdevice(dev)) {
if (sm_supported(iommu)) if (sm_supported(iommu))
@ -3376,7 +3391,6 @@ void device_block_translation(struct device *dev)
list_del(&info->link); list_del(&info->link);
spin_unlock_irqrestore(&info->domain->lock, flags); spin_unlock_irqrestore(&info->domain->lock, flags);
cache_tag_unassign_domain(info->domain, dev, IOMMU_NO_PASID);
domain_detach_iommu(info->domain, iommu); domain_detach_iommu(info->domain, iommu);
info->domain = NULL; info->domain = NULL;
} }

View File

@ -416,8 +416,6 @@ out_put_fdno:
put_unused_fd(fdno); put_unused_fd(fdno);
out_fput: out_fput:
fput(filep); fput(filep);
refcount_dec(&fault->obj.users);
iommufd_ctx_put(fault->ictx);
out_abort: out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj); iommufd_object_abort_and_destroy(ucmd->ictx, &fault->obj);

View File

@ -33,6 +33,8 @@ struct s390_domain {
struct rcu_head rcu; struct rcu_head rcu;
}; };
static struct iommu_domain blocking_domain;
static inline unsigned int calc_rtx(dma_addr_t ptr) static inline unsigned int calc_rtx(dma_addr_t ptr)
{ {
return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@ -369,20 +371,36 @@ static void s390_domain_free(struct iommu_domain *domain)
call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
} }
static void s390_iommu_detach_device(struct iommu_domain *domain, static void zdev_s390_domain_update(struct zpci_dev *zdev,
struct device *dev) struct iommu_domain *domain)
{ {
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&zdev->dom_lock, flags);
zdev->s390_domain = domain;
spin_unlock_irqrestore(&zdev->dom_lock, flags);
}
static int blocking_domain_attach_device(struct iommu_domain *domain,
struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
struct s390_domain *s390_domain;
unsigned long flags;
if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
return 0;
s390_domain = to_s390_domain(zdev->s390_domain);
spin_lock_irqsave(&s390_domain->list_lock, flags); spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list); list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags); spin_unlock_irqrestore(&s390_domain->list_lock, flags);
zpci_unregister_ioat(zdev, 0); zpci_unregister_ioat(zdev, 0);
zdev->s390_domain = NULL;
zdev->dma_table = NULL; zdev->dma_table = NULL;
zdev_s390_domain_update(zdev, domain);
return 0;
} }
static int s390_iommu_attach_device(struct iommu_domain *domain, static int s390_iommu_attach_device(struct iommu_domain *domain,
@ -401,20 +419,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
domain->geometry.aperture_end < zdev->start_dma)) domain->geometry.aperture_end < zdev->start_dma))
return -EINVAL; return -EINVAL;
if (zdev->s390_domain) blocking_domain_attach_device(&blocking_domain, dev);
s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
/* If we fail now DMA remains blocked via blocking domain */
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status); virt_to_phys(s390_domain->dma_table), &status);
/*
* If the device is undergoing error recovery the reset code
* will re-establish the new domain.
*/
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO; return -EIO;
zdev->dma_table = s390_domain->dma_table; zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain; zdev_s390_domain_update(zdev, domain);
spin_lock_irqsave(&s390_domain->list_lock, flags); spin_lock_irqsave(&s390_domain->list_lock, flags);
list_add_rcu(&zdev->iommu_list, &s390_domain->devices); list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
@ -466,21 +479,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
if (zdev->tlb_refresh) if (zdev->tlb_refresh)
dev->iommu->shadow_on_flush = 1; dev->iommu->shadow_on_flush = 1;
/* Start with DMA blocked */
spin_lock_init(&zdev->dom_lock);
zdev_s390_domain_update(zdev, &blocking_domain);
return &zdev->iommu_dev; return &zdev->iommu_dev;
} }
static void s390_iommu_release_device(struct device *dev)
{
struct zpci_dev *zdev = to_zpci_dev(dev);
/*
* release_device is expected to detach any domain currently attached
* to the device, but keep it attached to other devices in the group.
*/
if (zdev)
s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
}
static int zpci_refresh_all(struct zpci_dev *zdev) static int zpci_refresh_all(struct zpci_dev *zdev)
{ {
return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
@ -697,9 +702,15 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
{ {
if (!zdev || !zdev->s390_domain) struct s390_domain *s390_domain;
lockdep_assert_held(&zdev->dom_lock);
if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
return NULL; return NULL;
return &zdev->s390_domain->ctrs;
s390_domain = to_s390_domain(zdev->s390_domain);
return &s390_domain->ctrs;
} }
int zpci_init_iommu(struct zpci_dev *zdev) int zpci_init_iommu(struct zpci_dev *zdev)
@ -776,11 +787,19 @@ static int __init s390_iommu_init(void)
} }
subsys_initcall(s390_iommu_init); subsys_initcall(s390_iommu_init);
static struct iommu_domain blocking_domain = {
.type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_device,
}
};
static const struct iommu_ops s390_iommu_ops = { static const struct iommu_ops s390_iommu_ops = {
.blocked_domain = &blocking_domain,
.release_domain = &blocking_domain,
.capable = s390_iommu_capable, .capable = s390_iommu_capable,
.domain_alloc_paging = s390_domain_alloc_paging, .domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device, .probe_device = s390_iommu_probe_device,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group, .device_group = generic_device_group,
.pgsize_bitmap = SZ_4K, .pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions, .get_resv_regions = s390_iommu_get_resv_regions,

View File

@ -242,7 +242,7 @@ static inline bool dev_is_dma_coherent(struct device *dev)
{ {
return true; return true;
} }
#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ #endif
static inline void dma_reset_need_sync(struct device *dev) static inline void dma_reset_need_sync(struct device *dev)
{ {

View File

@ -84,7 +84,7 @@ enum dma_transfer_direction {
DMA_TRANS_NONE, DMA_TRANS_NONE,
}; };
/** /*
* Interleaved Transfer Request * Interleaved Transfer Request
* ---------------------------- * ----------------------------
* A chunk is collection of contiguous bytes to be transferred. * A chunk is collection of contiguous bytes to be transferred.
@ -223,7 +223,7 @@ enum sum_check_bits {
}; };
/** /**
* enum pq_check_flags - result of async_{xor,pq}_zero_sum operations * enum sum_check_flags - result of async_{xor,pq}_zero_sum operations
* @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise * @SUM_CHECK_P_RESULT - 1 if xor zero sum error, 0 otherwise
* @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise * @SUM_CHECK_Q_RESULT - 1 if reed-solomon zero sum error, 0 otherwise
*/ */
@ -286,7 +286,7 @@ typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t;
* pointer to the engine's metadata area * pointer to the engine's metadata area
* 4. Read out the metadata from the pointer * 4. Read out the metadata from the pointer
* *
* Note: the two mode is not compatible and clients must use one mode for a * Warning: the two modes are not compatible and clients must use one mode for a
* descriptor. * descriptor.
*/ */
enum dma_desc_metadata_mode { enum dma_desc_metadata_mode {
@ -594,9 +594,13 @@ struct dma_descriptor_metadata_ops {
* @phys: physical address of the descriptor * @phys: physical address of the descriptor
* @chan: target channel for this operation * @chan: target channel for this operation
* @tx_submit: accept the descriptor, assign ordered cookie and mark the * @tx_submit: accept the descriptor, assign ordered cookie and mark the
* @desc_free: driver's callback function to free a resusable descriptor
* after completion
* descriptor pending. To be pushed on .issue_pending() call * descriptor pending. To be pushed on .issue_pending() call
* @callback: routine to call after this operation is complete * @callback: routine to call after this operation is complete
* @callback_result: error result from a DMA transaction
* @callback_param: general parameter to pass to the callback routine * @callback_param: general parameter to pass to the callback routine
* @unmap: hook for generic DMA unmap data
* @desc_metadata_mode: core managed metadata mode to protect mixed use of * @desc_metadata_mode: core managed metadata mode to protect mixed use of
* DESC_METADATA_CLIENT or DESC_METADATA_ENGINE. Otherwise * DESC_METADATA_CLIENT or DESC_METADATA_ENGINE. Otherwise
* DESC_METADATA_NONE * DESC_METADATA_NONE
@ -827,6 +831,9 @@ struct dma_filter {
* @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_memset: prepares a memset operation
* @device_prep_dma_memset_sg: prepares a memset operation over a scatter list * @device_prep_dma_memset_sg: prepares a memset operation over a scatter list
* @device_prep_dma_interrupt: prepares an end of chain interrupt operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation
* @device_prep_peripheral_dma_vec: prepares a scatter-gather DMA transfer,
* where the address and size of each segment is located in one entry of
* the dma_vec array.
* @device_prep_slave_sg: prepares a slave dma operation * @device_prep_slave_sg: prepares a slave dma operation
* @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio. * @device_prep_dma_cyclic: prepare a cyclic dma operation suitable for audio.
* The function takes a buffer of size buf_len. The callback function will * The function takes a buffer of size buf_len. The callback function will