powerpc fixes for 7.0 #3

- Fix KUAP warning in VMX usercopy path
  - Fix lockdep warning during PCI enumeration
  - Fix to move CMA reservations to arch_mm_preinit
  - Fix to check current->mm is alive before getting user callchain
 
 Thanks to: Aboorva Devarajan, Christophe Leroy (CS GROUP), Dan Horák, Nicolin
 Chen, Nilay Shroff, Qiao Zhao, Ritesh Harjani (IBM), Saket Kumar Bhaskar,
 Sayali Patil, Shrikanth Hegde, Venkat Rao Bagalkote, Viktor Malik,
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEqX2DNAOgU8sBX3pRpnEsdPSHZJQFAmm2KVsACgkQpnEsdPSH
 ZJT6VBAAlmediTysFMpS6qnxrhJ/ZbERskIvfGkcW4i8lPr5yluPjQhj75Q9RYIy
 eRFS5eYssQVXbhS1/YWfsQKcG2tH7ucl0ocYfl8xvGGCgpSEu+wYTwECj2OVSF7T
 BiQ6VsHcOLJJ1SxCoS17n+sl8WGuIGikWKYM2ECeNx7iysrFczcj4RQ9Z4aYWT91
 xmgDyQwrNmxSy85OXq5ITLcY5IcVLtwnpjyTp4z94fP2Ho/R/muL9i3Sven7Iiqm
 a5I5XDozMFxtFtOxYlh7y8cKisDEYqinqoA/9P59kEtZ5XML8yp/s7rJ7Gjl/AmF
 O3fEAbtevTz2XvpVpx6XiRAXDtdRyR+YFUZMTABawDFlHZffD7m4eg/9A4JvDJ/8
 LxklCGLECZes+dEULGG/kXoOD7e2jJKDBsGYjgGWXU5+ZI8qjhfSWdiXAcl1DEHd
 gYZ2N6eYNWP/m2wqs5FUiabdB0yPdcpI7ukxmECpQDdS4TCA4sU3DI0FRyGktABV
 nNaYBZezZhlCWzNo/NBxFAvj6OHmo8WYHX1G6piE6nJKYyPlbjLyV5/tvkW9oxlM
 HlejFBKF4Us9ZotNgWxQdJzZCJ3qWmuxDgukzShX4mDbGdK8+4Vv9Qjk1SwsCypS
 HQ/ff0SNcHVdDJkw41jOJxoTv/2+vEB+1FmytmZ7s/fxUs/qW04=
 =OrN9
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Madhavan Srinivasan:

 - Fix KUAP warning in VMX usercopy path

 - Fix lockdep warning during PCI enumeration

 - Fix to move CMA reservations to arch_mm_preinit

 - Fix to check current->mm is alive before getting user callchain

Thanks to Aboorva Devarajan, Christophe Leroy (CS GROUP), Dan Horák,
Nicolin Chen, Nilay Shroff, Qiao Zhao, Ritesh Harjani (IBM), Saket Kumar
Bhaskar, Sayali Patil, Shrikanth Hegde, Venkat Rao Bagalkote, and Viktor
Malik.

* tag 'powerpc-7.0-3' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  powerpc/iommu: fix lockdep warning during PCI enumeration
  powerpc/selftests/copyloops: extend selftest to exercise __copy_tofrom_user_power7_vmx
  powerpc: fix KUAP warning in VMX usercopy path
  powerpc, perf: Check that current->mm is alive before getting user callchain
  powerpc/mem: Move CMA reservations to arch_mm_preinit
This commit is contained in:
Linus Torvalds 2026-03-15 11:36:11 -07:00
commit 4f3df2e5ea
14 changed files with 109 additions and 79 deletions

View File

@ -15,6 +15,9 @@
#define TASK_SIZE_MAX TASK_SIZE_USER64
#endif
/* Threshold above which VMX copy path is used */
#define VMX_COPY_THRESHOLD 3328
#include <asm-generic/access_ok.h>
/*
@ -326,40 +329,62 @@ do { \
extern unsigned long __copy_tofrom_user(void __user *to,
const void __user *from, unsigned long size);
#ifdef __powerpc64__
unsigned long __copy_tofrom_user_base(void __user *to,
const void __user *from, unsigned long size);
unsigned long __copy_tofrom_user_power7_vmx(void __user *to,
const void __user *from, unsigned long size);
static __always_inline bool will_use_vmx(unsigned long n)
{
return IS_ENABLED(CONFIG_ALTIVEC) && cpu_has_feature(CPU_FTR_VMX_COPY) &&
n > VMX_COPY_THRESHOLD;
}
static __always_inline unsigned long
raw_copy_tofrom_user(void __user *to, const void __user *from,
unsigned long n, unsigned long dir)
{
unsigned long ret;
if (will_use_vmx(n) && enter_vmx_usercopy()) {
allow_user_access(to, dir);
ret = __copy_tofrom_user_power7_vmx(to, from, n);
prevent_user_access(dir);
exit_vmx_usercopy();
if (unlikely(ret)) {
allow_user_access(to, dir);
ret = __copy_tofrom_user_base(to, from, n);
prevent_user_access(dir);
}
return ret;
}
allow_user_access(to, dir);
ret = __copy_tofrom_user(to, from, n);
prevent_user_access(dir);
return ret;
}
#ifdef CONFIG_PPC64
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
unsigned long ret;
barrier_nospec();
allow_user_access(to, KUAP_READ_WRITE);
ret = __copy_tofrom_user(to, from, n);
prevent_user_access(KUAP_READ_WRITE);
return ret;
return raw_copy_tofrom_user(to, from, n, KUAP_READ_WRITE);
}
#endif /* __powerpc64__ */
#endif /* CONFIG_PPC64 */
static inline unsigned long raw_copy_from_user(void *to,
const void __user *from, unsigned long n)
static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n)
{
unsigned long ret;
allow_user_access(NULL, KUAP_READ);
ret = __copy_tofrom_user((__force void __user *)to, from, n);
prevent_user_access(KUAP_READ);
return ret;
return raw_copy_tofrom_user((__force void __user *)to, from, n, KUAP_READ);
}
static inline unsigned long
raw_copy_to_user(void __user *to, const void *from, unsigned long n)
{
unsigned long ret;
allow_user_access(to, KUAP_WRITE);
ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
prevent_user_access(KUAP_WRITE);
return ret;
return raw_copy_tofrom_user(to, (__force const void __user *)from, n, KUAP_WRITE);
}
unsigned long __arch_clear_user(void __user *addr, unsigned long size);

View File

@ -1159,7 +1159,7 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
struct device *dev,
struct iommu_domain *old)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_domain *domain = iommu_driver_get_domain_for_dev(dev);
struct iommu_table_group *table_group;
struct iommu_group *grp;

View File

@ -35,7 +35,6 @@
#include <linux/of_irq.h>
#include <linux/hugetlb.h>
#include <linux/pgtable.h>
#include <asm/kexec.h>
#include <asm/io.h>
#include <asm/paca.h>
#include <asm/processor.h>
@ -995,15 +994,6 @@ void __init setup_arch(char **cmdline_p)
initmem_init();
/*
* Reserve large chunks of memory for use by CMA for kdump, fadump, KVM and
* hugetlb. These must be called after initmem_init(), so that
* pageblock_order is initialised.
*/
fadump_cma_init();
kdump_cma_reserve();
kvm_cma_reserve();
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
if (ppc_md.setup_arch)

View File

@ -562,3 +562,4 @@ exc; std r10,32(3)
li r5,4096
b .Ldst_aligned
EXPORT_SYMBOL(__copy_tofrom_user)
EXPORT_SYMBOL(__copy_tofrom_user_base)

View File

@ -5,13 +5,9 @@
*
* Author: Anton Blanchard <anton@au.ibm.com>
*/
#include <linux/export.h>
#include <asm/ppc_asm.h>
#ifndef SELFTEST_CASE
/* 0 == don't use VMX, 1 == use VMX */
#define SELFTEST_CASE 0
#endif
#ifdef __BIG_ENDIAN__
#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
@ -47,10 +43,14 @@
ld r15,STK_REG(R15)(r1)
ld r14,STK_REG(R14)(r1)
.Ldo_err3:
bl CFUNC(exit_vmx_usercopy)
ld r6,STK_REG(R31)(r1) /* original destination pointer */
ld r5,STK_REG(R29)(r1) /* original number of bytes */
subf r7,r6,r3 /* #bytes copied */
subf r3,r7,r5 /* #bytes not copied in r3 */
ld r0,STACKFRAMESIZE+16(r1)
mtlr r0
b .Lexit
addi r1,r1,STACKFRAMESIZE
blr
#endif /* CONFIG_ALTIVEC */
.Ldo_err2:
@ -74,7 +74,6 @@
_GLOBAL(__copy_tofrom_user_power7)
cmpldi r5,16
cmpldi cr1,r5,3328
std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
@ -82,12 +81,6 @@ _GLOBAL(__copy_tofrom_user_power7)
blt .Lshort_copy
#ifdef CONFIG_ALTIVEC
test_feature = SELFTEST_CASE
BEGIN_FTR_SECTION
bgt cr1,.Lvmx_copy
END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
#endif
.Lnonvmx_copy:
/* Get the source 8B aligned */
@ -263,23 +256,14 @@ err1; stb r0,0(r3)
15: li r3,0
blr
.Lunwind_stack_nonvmx_copy:
addi r1,r1,STACKFRAMESIZE
b .Lnonvmx_copy
.Lvmx_copy:
#ifdef CONFIG_ALTIVEC
_GLOBAL(__copy_tofrom_user_power7_vmx)
mflr r0
std r0,16(r1)
stdu r1,-STACKFRAMESIZE(r1)
bl CFUNC(enter_vmx_usercopy)
cmpwi cr1,r3,0
ld r0,STACKFRAMESIZE+16(r1)
ld r3,STK_REG(R31)(r1)
ld r4,STK_REG(R30)(r1)
ld r5,STK_REG(R29)(r1)
mtlr r0
std r3,STK_REG(R31)(r1)
std r5,STK_REG(R29)(r1)
/*
* We prefetch both the source and destination using enhanced touch
* instructions. We use a stream ID of 0 for the load side and
@ -300,8 +284,6 @@ err1; stb r0,0(r3)
DCBT_SETUP_STREAMS(r6, r7, r9, r10, r8)
beq cr1,.Lunwind_stack_nonvmx_copy
/*
* If source and destination are not relatively aligned we use a
* slower permute loop.
@ -478,7 +460,8 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
b CFUNC(exit_vmx_usercopy) /* tail call optimise */
li r3,0
blr
.Lvmx_unaligned_copy:
/* Get the destination 16B aligned */
@ -681,5 +664,7 @@ err3; lbz r0,0(r4)
err3; stb r0,0(r3)
15: addi r1,r1,STACKFRAMESIZE
b CFUNC(exit_vmx_usercopy) /* tail call optimise */
li r3,0
blr
EXPORT_SYMBOL(__copy_tofrom_user_power7_vmx)
#endif /* CONFIG_ALTIVEC */

View File

@ -27,6 +27,7 @@ int enter_vmx_usercopy(void)
return 1;
}
EXPORT_SYMBOL(enter_vmx_usercopy);
/*
* This function must return 0 because we tail call optimise when calling
@ -49,6 +50,7 @@ int exit_vmx_usercopy(void)
set_dec(1);
return 0;
}
EXPORT_SYMBOL(exit_vmx_usercopy);
int enter_vmx_ops(void)
{

View File

@ -30,6 +30,10 @@
#include <asm/setup.h>
#include <asm/fixmap.h>
#include <asm/fadump.h>
#include <asm/kexec.h>
#include <asm/kvm_ppc.h>
#include <mm/mmu_decl.h>
unsigned long long memory_limit __initdata;
@ -268,6 +272,16 @@ void __init paging_init(void)
void __init arch_mm_preinit(void)
{
/*
* Reserve large chunks of memory for use by CMA for kdump, fadump, KVM
* and hugetlb. These must be called after pageblock_order is
* initialised.
*/
fadump_cma_init();
kdump_cma_reserve();
kvm_cma_reserve();
/*
* book3s is limited to 16 page sizes due to encoding this in
* a 4-bit field for slices.

View File

@ -103,6 +103,11 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
perf_callchain_store(entry, perf_arch_instruction_pointer(regs));
if (!current->mm)
return;
if (!is_32bit_task())
perf_callchain_user_64(entry, regs);
else

View File

@ -142,7 +142,6 @@ void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
next_ip = perf_arch_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned int __user *) (unsigned long) sp;

View File

@ -77,7 +77,6 @@ void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
next_ip = perf_arch_instruction_pointer(regs);
lr = regs->link;
sp = regs->gpr[1];
perf_callchain_store(entry, next_ip);
while (entry->nr < entry->max_stack) {
fp = (unsigned long __user *) sp;

View File

@ -2,8 +2,8 @@
copyuser_64_t0
copyuser_64_t1
copyuser_64_t2
copyuser_p7_t0
copyuser_p7_t1
copyuser_p7
copyuser_p7_vmx
memcpy_64_t0
memcpy_64_t1
memcpy_64_t2

View File

@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
copyuser_p7 copyuser_p7_vmx \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \
@ -28,10 +28,15 @@ $(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
-o $@ $^
$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES)
$(OUTPUT)/copyuser_p7: copyuser_power7.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
-D COPY_LOOP=test___copy_tofrom_user_power7 \
-D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
-o $@ $^
$(OUTPUT)/copyuser_p7_vmx: copyuser_power7.S $(EXTRA_SOURCES) ../utils.c
$(CC) $(CPPFLAGS) $(CFLAGS) \
-D COPY_LOOP=test___copy_tofrom_user_power7_vmx \
-D VMX_TEST \
-o $@ $^
# Strictly speaking, we only need the memcpy_64 test cases for big-endian

View File

@ -1,13 +1,5 @@
#include <asm/ppc_asm.h>
FUNC_START(enter_vmx_usercopy)
li r3,1
blr
FUNC_START(exit_vmx_usercopy)
li r3,0
blr
FUNC_START(enter_vmx_ops)
li r3,1
blr

View File

@ -12,6 +12,10 @@
#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
#define POISON 0xa5
#ifdef VMX_TEST
#define VMX_COPY_THRESHOLD 3328
#endif
unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
static void do_one(char *src, char *dst, unsigned long src_off,
@ -81,8 +85,12 @@ int test_copy_loop(void)
/* Fill with sequential bytes */
for (i = 0; i < BUFLEN; i++)
fill[i] = i & 0xff;
#ifdef VMX_TEST
/* Force sizes above kernel VMX threshold (3328) */
for (len = VMX_COPY_THRESHOLD + 1; len < MAX_LEN; len++) {
#else
for (len = 1; len < MAX_LEN; len++) {
#endif
for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
do_one(src, dst, src_off, dst_off, len,
@ -96,5 +104,10 @@ int test_copy_loop(void)
int main(void)
{
#ifdef VMX_TEST
/* Skip if Altivec not present */
SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC), "ALTIVEC not supported");
#endif
return test_harness(test_copy_loop, str(COPY_LOOP));
}