Centos-kernel-stream-9/kernel/kexec.c

310 lines
7.6 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
2015-09-09 22:38:55 +00:00
* kexec.c - kexec_load system call
* Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com>
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
#include <linux/security.h>
#include <linux/kexec.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/syscalls.h>
#include <linux/vmalloc.h>
2015-09-09 22:38:55 +00:00
#include <linux/slab.h>
#include "kexec_internal.h"
static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
struct kexec_segment *segments,
unsigned long flags)
{
int ret;
struct kimage *image;
bool kexec_on_panic = flags & KEXEC_ON_CRASH;
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#ifdef CONFIG_CRASH_DUMP
if (kexec_on_panic) {
/* Verify we have a valid entry point */
if ((entry < phys_to_boot_phys(crashk_res.start)) ||
(entry > phys_to_boot_phys(crashk_res.end)))
return -EADDRNOTAVAIL;
}
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#endif
/* Allocate and initialize a controlling structure */
image = do_kimage_alloc_init();
if (!image)
return -ENOMEM;
image->start = entry;
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
image->nr_segments = nr_segments;
memcpy(image->segment, segments, nr_segments * sizeof(*segments));
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#ifdef CONFIG_CRASH_DUMP
if (kexec_on_panic) {
/* Enable special crash kernel control page alloc policy. */
image->control_page = crashk_res.start;
image->type = KEXEC_TYPE_CRASH;
}
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#endif
ret = sanity_check_segment_list(image);
if (ret)
goto out_free_image;
/*
* Find a location for the control code buffer, and add it
* the vector of segments so that it's pages will also be
* counted as destination pages.
*/
ret = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
pr_err("Could not allocate control_code_buffer\n");
goto out_free_image;
}
if (!kexec_on_panic) {
image->swap_page = kimage_alloc_control_pages(image, 0);
if (!image->swap_page) {
pr_err("Could not allocate swap buffer\n");
goto out_free_control_pages;
}
kexec jump This patch provides an enhancement to kexec/kdump. It implements the following features: - Backup/restore memory used by the original kernel before/after kexec. - Save/restore CPU state before/after kexec. The features of this patch can be used as a general method to call program in physical mode (paging turning off). This can be used to call BIOS code under Linux. kexec-tools needs to be patched to support kexec jump. The patches and the precompiled kexec can be download from the following URL: source: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-src_git_kh10.tar.bz2 patches: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec-tools-patches_git_kh10.tar.bz2 binary: http://khibernation.sourceforge.net/download/release_v10/kexec-tools/kexec_git_kh10 Usage example of calling some physical mode code and return: 1. Compile and install patched kernel with following options selected: CONFIG_X86_32=y CONFIG_KEXEC=y CONFIG_PM=y CONFIG_KEXEC_JUMP=y 2. Build patched kexec-tool or download the pre-built one. 3. Build some physical mode executable named such as "phy_mode" 4. Boot kernel compiled in step 1. 5. Load physical mode executable with /sbin/kexec. The shell command line can be as follow: /sbin/kexec --load-preserve-context --args-none phy_mode 6. Call physical mode executable with following shell command line: /sbin/kexec -e Implementation point: To support jumping without reserving memory. One shadow backup page (source page) is allocated for each page used by kexeced code image (destination page). When do kexec_load, the image of kexeced code is loaded into source pages, and before executing, the destination pages and the source pages are swapped, so the contents of destination pages are backupped. Before jumping to the kexeced code image and after jumping back to the original kernel, the destination pages and the source pages are swapped too. C ABI (calling convention) is used as communication protocol between kernel and called code. A flag named KEXEC_PRESERVE_CONTEXT for sys_kexec_load is added to indicate that the loaded kernel image is used for jumping back. Now, only the i386 architecture is supported. Signed-off-by: Huang Ying <ying.huang@intel.com> Acked-by: Vivek Goyal <vgoyal@redhat.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Pavel Machek <pavel@ucw.cz> Cc: Nigel Cunningham <nigel@nigel.suspend2.net> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-26 02:45:07 +00:00
}
*rimage = image;
return 0;
out_free_control_pages:
kimage_free_page_list(&image->control_pages);
out_free_image:
kfree(image);
return ret;
}
static int do_kexec_load(unsigned long entry, unsigned long nr_segments,
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
struct kexec_segment *segments, unsigned long flags)
{
struct kimage **dest_image, *image;
unsigned long i;
int ret;
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
/*
* Because we write directly to the reserved memory region when loading
panic, kexec: make __crash_kexec() NMI safe Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2166717 Upstream-status: https://github.com/torvalds/linux.git commit 05c6257433b7212f07a7e53479a8ab038fc1666a Author: Valentin Schneider <vschneid@redhat.com> Date: Thu Jun 30 23:32:58 2022 +0100 panic, kexec: make __crash_kexec() NMI safe Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Valentin Schneider <vschneid@redhat.com>
2023-02-02 18:05:46 +00:00
* crash kernels we need a serialization here to prevent multiple crash
* kernels from attempting to load simultaneously.
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
*/
panic, kexec: make __crash_kexec() NMI safe Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2166717 Upstream-status: https://github.com/torvalds/linux.git commit 05c6257433b7212f07a7e53479a8ab038fc1666a Author: Valentin Schneider <vschneid@redhat.com> Date: Thu Jun 30 23:32:58 2022 +0100 panic, kexec: make __crash_kexec() NMI safe Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Valentin Schneider <vschneid@redhat.com>
2023-02-02 18:05:46 +00:00
if (!kexec_trylock())
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
return -EBUSY;
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#ifdef CONFIG_CRASH_DUMP
if (flags & KEXEC_ON_CRASH) {
dest_image = &kexec_crash_image;
if (kexec_crash_image)
arch_kexec_unprotect_crashkres();
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
} else
#endif
dest_image = &kexec_image;
if (nr_segments == 0) {
/* Uninstall image */
kimage_free(xchg(dest_image, NULL));
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
ret = 0;
goto out_unlock;
}
if (flags & KEXEC_ON_CRASH) {
/*
* Loading another kernel to switch to if this one
* crashes. Free any current crash dump kernel before
* we corrupt it.
*/
kimage_free(xchg(&kexec_crash_image, NULL));
}
ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags);
if (ret)
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
goto out_unlock;
if (flags & KEXEC_PRESERVE_CONTEXT)
image->preserve_context = 1;
crash: hotplug support for kexec_load() JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because commit cbc2fe9d9cb2 ("kexec_file: add kexec_file flag to control debug printing") has been back ported, and in 1st hunk of kernel/ksysfs.c because of fuzz. Meanwhile move definition of kexec_dprintk down for easier latter back porting. commit a72bbec70da285a7e09e53fb13c2da7da2032da9 Author: Eric DeVolder <eric.devolder@oracle.com> Date: Mon Aug 14 17:44:44 2023 -0400 crash: hotplug support for kexec_load() The hotplug support for kexec_load() requires changes to the userspace kexec-tools and a little extra help from the kernel. Given a kdump capture kernel loaded via kexec_load(), and a subsequent hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites it to reflect the hotplug change. That is the desired outcome, however, at kernel panic time, the purgatory integrity check fails (because the elfcorehdr changed), and the capture kernel does not boot and no vmcore is generated. Therefore, the userspace kexec-tools/kexec must indicate to the kernel that the elfcorehdr can be modified (because the kexec excluded the elfcorehdr from the digest, and sized the elfcorehdr memory buffer appropriately). To facilitate hotplug support with kexec_load(): - a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is safe for the kernel to modify the kexec_load()'d elfcorehdr - the /sys/kernel/crash_elfcorehdr_size node communicates the preferred size of the elfcorehdr memory buffer - The sysfs crash_hotplug nodes (ie. /sys/devices/system/[cpu|memory]/crash_hotplug) dynamically take into account kexec_file_load() vs kexec_load() and KEXEC_UPDATE_ELFCOREHDR. This is critical so that the udev rule processing of crash_hotplug is all that is needed to determine if the userspace unload-then-load of the kdump image is to be skipped, or not. The proposed udev rule change looks like: # The kernel updates the crash elfcorehdr for CPU and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" The table below indicates the behavior of kexec_load()'d kdump image updates (with the new udev crash_hotplug rule in place): Kernel |Kexec -------+-----+---- Old |Old |New | a | a -------+-----+---- New | a | b -------+-----+---- where kexec 'old' and 'new' delineate kexec-tools has the needed modifications for the crash hotplug feature, and kernel 'old' and 'new' delineate the kernel supports this crash hotplug feature. Behavior 'a' indicates the unload-then-reload of the entire kdump image. For the kexec 'old' column, the unload-then-reload occurs due to the missing flag KEXEC_UPDATE_ELFCOREHDR. An 'old' kernel (with 'new' kexec) does not present the crash_hotplug sysfs node, which leads to the unload-then-reload of the kdump image. Behavior 'b' indicates the desired optimized behavior of the kernel directly modifying the elfcorehdr and avoiding the unload-then-reload of the kdump image. If the udev rule is not updated with crash_hotplug node check, then no matter any combination of kernel or kexec is new or old, the kdump image continues to be unload-then-reload on hotplug changes. To fully support crash hotplug feature, there needs to be a rollout of kernel, kexec-tools and udev rule changes. However, the order of the rollout of these pieces does not matter; kexec_load()'d kdump images still function for hotplug as-is. Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com Signed-off-by: Eric DeVolder <eric.devolder@oracle.com> Suggested-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Baoquan He <bhe@redhat.com> Cc: Akhil Raj <lf32.dev@gmail.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Borislav Petkov (AMD) <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Mimi Zohar <zohar@linux.ibm.com> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Sean Christopherson <seanjc@google.com> Cc: Sourabh Jain <sourabhjain@linux.ibm.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Weißschuh <linux@weissschuh.net> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-29 11:13:19 +00:00
#ifdef CONFIG_CRASH_HOTPLUG
crash: add a new kexec flag for hotplug support JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git commit 79365026f86948b52c3cb7bf099dded92c559b4c Author: Sourabh Jain <sourabhjain@linux.ibm.com> Date: Tue Mar 26 11:24:09 2024 +0530 crash: add a new kexec flag for hotplug support Commit a72bbec70da2 ("crash: hotplug support for kexec_load()") introduced a new kexec flag, `KEXEC_UPDATE_ELFCOREHDR`. Kexec tool uses this flag to indicate to the kernel that it is safe to modify the elfcorehdr of the kdump image loaded using the kexec_load system call. However, it is possible that architectures may need to update kexec segments other then elfcorehdr. For example, FDT (Flatten Device Tree) on PowerPC. Introducing a new kexec flag for every new kexec segment may not be a good solution. Hence, a generic kexec flag bit, `KEXEC_CRASH_HOTPLUG_SUPPORT`, is introduced to share the CPU/Memory hotplug support intent between the kexec tool and the kernel for the kexec_load system call. Now we have two kexec flags that enables crash hotplug support for kexec_load system call. First is KEXEC_UPDATE_ELFCOREHDR (only used in x86), and second is KEXEC_CRASH_HOTPLUG_SUPPORT (for all architectures). To simplify the process of finding and reporting the crash hotplug support the following changes are introduced. 1. Define arch specific function to process the kexec flags and determine crash hotplug support 2. Rename the @update_elfcorehdr member of struct kimage to @hotplug_support and populate it for both kexec_load and kexec_file_load syscalls, because architecture can update more than one kexec segment 3. Let generic function crash_check_hotplug_support report hotplug support for loaded kdump image based on value of @hotplug_support To bring the x86 crash hotplug support in line with the above points, the following changes have been made: - Introduce the arch_crash_hotplug_support function to process kexec flags and determine crash hotplug support - Remove the arch_crash_hotplug_[cpu|memory]_support functions Signed-off-by: Sourabh Jain <sourabhjain@linux.ibm.com> Acked-by: Baoquan He <bhe@redhat.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/20240326055413.186534-3-sourabhjain@linux.ibm.com Signed-off-by: Baoquan He <bhe@redhat.com>
2024-11-04 02:51:12 +00:00
if ((flags & KEXEC_ON_CRASH) && arch_crash_hotplug_support(image, flags))
image->hotplug_support = 1;
crash: hotplug support for kexec_load() JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because commit cbc2fe9d9cb2 ("kexec_file: add kexec_file flag to control debug printing") has been back ported, and in 1st hunk of kernel/ksysfs.c because of fuzz. Meanwhile move definition of kexec_dprintk down for easier latter back porting. commit a72bbec70da285a7e09e53fb13c2da7da2032da9 Author: Eric DeVolder <eric.devolder@oracle.com> Date: Mon Aug 14 17:44:44 2023 -0400 crash: hotplug support for kexec_load() The hotplug support for kexec_load() requires changes to the userspace kexec-tools and a little extra help from the kernel. Given a kdump capture kernel loaded via kexec_load(), and a subsequent hotplug event, the crash hotplug handler finds the elfcorehdr and rewrites it to reflect the hotplug change. That is the desired outcome, however, at kernel panic time, the purgatory integrity check fails (because the elfcorehdr changed), and the capture kernel does not boot and no vmcore is generated. Therefore, the userspace kexec-tools/kexec must indicate to the kernel that the elfcorehdr can be modified (because the kexec excluded the elfcorehdr from the digest, and sized the elfcorehdr memory buffer appropriately). To facilitate hotplug support with kexec_load(): - a new kexec flag KEXEC_UPATE_ELFCOREHDR indicates that it is safe for the kernel to modify the kexec_load()'d elfcorehdr - the /sys/kernel/crash_elfcorehdr_size node communicates the preferred size of the elfcorehdr memory buffer - The sysfs crash_hotplug nodes (ie. /sys/devices/system/[cpu|memory]/crash_hotplug) dynamically take into account kexec_file_load() vs kexec_load() and KEXEC_UPDATE_ELFCOREHDR. This is critical so that the udev rule processing of crash_hotplug is all that is needed to determine if the userspace unload-then-load of the kdump image is to be skipped, or not. The proposed udev rule change looks like: # The kernel updates the crash elfcorehdr for CPU and memory changes SUBSYSTEM=="cpu", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" SUBSYSTEM=="memory", ATTRS{crash_hotplug}=="1", GOTO="kdump_reload_end" The table below indicates the behavior of kexec_load()'d kdump image updates (with the new udev crash_hotplug rule in place): Kernel |Kexec -------+-----+---- Old |Old |New | a | a -------+-----+---- New | a | b -------+-----+---- where kexec 'old' and 'new' delineate kexec-tools has the needed modifications for the crash hotplug feature, and kernel 'old' and 'new' delineate the kernel supports this crash hotplug feature. Behavior 'a' indicates the unload-then-reload of the entire kdump image. For the kexec 'old' column, the unload-then-reload occurs due to the missing flag KEXEC_UPDATE_ELFCOREHDR. An 'old' kernel (with 'new' kexec) does not present the crash_hotplug sysfs node, which leads to the unload-then-reload of the kdump image. Behavior 'b' indicates the desired optimized behavior of the kernel directly modifying the elfcorehdr and avoiding the unload-then-reload of the kdump image. If the udev rule is not updated with crash_hotplug node check, then no matter any combination of kernel or kexec is new or old, the kdump image continues to be unload-then-reload on hotplug changes. To fully support crash hotplug feature, there needs to be a rollout of kernel, kexec-tools and udev rule changes. However, the order of the rollout of these pieces does not matter; kexec_load()'d kdump images still function for hotplug as-is. Link: https://lkml.kernel.org/r/20230814214446.6659-7-eric.devolder@oracle.com Signed-off-by: Eric DeVolder <eric.devolder@oracle.com> Suggested-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Hari Bathini <hbathini@linux.ibm.com> Acked-by: Baoquan He <bhe@redhat.com> Cc: Akhil Raj <lf32.dev@gmail.com> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: Borislav Petkov (AMD) <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Dave Young <dyoung@redhat.com> Cc: David Hildenbrand <david@redhat.com> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Mimi Zohar <zohar@linux.ibm.com> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael@kernel.org> Cc: Sean Christopherson <seanjc@google.com> Cc: Sourabh Jain <sourabhjain@linux.ibm.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Thomas Weißschuh <linux@weissschuh.net> Cc: Valentin Schneider <vschneid@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-29 11:13:19 +00:00
#endif
ret = machine_kexec_prepare(image);
if (ret)
goto out;
kdump: protect vmcoreinfo data under the crash memory Currently vmcoreinfo data is updated at boot time subsys_initcall(), it has the risk of being modified by some wrong code during system is running. As a result, vmcore dumped may contain the wrong vmcoreinfo. Later on, when using "crash", "makedumpfile", etc utility to parse this vmcore, we probably will get "Segmentation fault" or other unexpected errors. E.g. 1) wrong code overwrites vmcoreinfo_data; 2) further crashes the system; 3) trigger kdump, then we obviously will fail to recognize the crash context correctly due to the corrupted vmcoreinfo. Now except for vmcoreinfo, all the crash data is well protected(including the cpu note which is fully updated in the crash path, thus its correctness is guaranteed). Given that vmcoreinfo data is a large chunk prepared for kdump, we better protect it as well. To solve this, we relocate and copy vmcoreinfo_data to the crash memory when kdump is loading via kexec syscalls. Because the whole crash memory will be protected by existing arch_kexec_protect_crashkres() mechanism, we naturally protect vmcoreinfo_data from write(even read) access under kernel direct mapping after kdump is loaded. Since kdump is usually loaded at the very early stage after boot, we can trust the correctness of the vmcoreinfo data copied. On the other hand, we still need to operate the vmcoreinfo safe copy when crash happens to generate vmcoreinfo_note again, we rely on vmap() to map out a new kernel virtual address and update to use this new one instead in the following crash_save_vmcoreinfo(). BTW, we do not touch vmcoreinfo_note, because it will be fully updated using the protected vmcoreinfo_data after crash which is surely correct just like the cpu crash note. Link: http://lkml.kernel.org/r/1493281021-20737-3-git-send-email-xlpang@redhat.com Signed-off-by: Xunlei Pang <xlpang@redhat.com> Tested-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Dave Young <dyoung@redhat.com> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.vnet.ibm.com> Cc: Juergen Gross <jgross@suse.com> Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2017-07-12 21:33:21 +00:00
/*
* Some architecture(like S390) may touch the crash memory before
* machine_kexec_prepare(), we must copy vmcoreinfo data after it.
*/
ret = kimage_crash_copy_vmcoreinfo(image);
if (ret)
goto out;
for (i = 0; i < nr_segments; i++) {
ret = kimage_load_segment(image, &image->segment[i]);
if (ret)
goto out;
}
kimage_terminate(image);
ret = machine_kexec_post_load(image);
if (ret)
goto out;
/* Install the new kernel and uninstall the old */
image = xchg(dest_image, image);
out:
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#ifdef CONFIG_CRASH_DUMP
if ((flags & KEXEC_ON_CRASH) && kexec_crash_image)
arch_kexec_protect_crashkres();
crash: split crash dumping code out from kexec_core.c JIRA: https://issues.redhat.com/browse/RHEL-58641 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git Conflicts: There's conflict in last hunk of include/linux/kexec.h because of the fuzz caused by earlier back ported commits related to commit f4af41bf177a ("kexec: fix the unexpected kexec_dprintk() macro"). commit 02aff8480533817a29e820729360866441d7403d Author: Baoquan He <bhe@redhat.com> Date: Wed Jan 24 13:12:44 2024 +0800 crash: split crash dumping code out from kexec_core.c Currently, KEXEC_CORE select CRASH_CORE automatically because crash codes need be built in to avoid compiling error when building kexec code even though the crash dumping functionality is not enabled. E.g -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_KEXEC=y CONFIG_KEXEC_FILE=y --------------------- After splitting out crashkernel reservation code and vmcoreinfo exporting code, there's only crash related code left in kernel/crash_core.c. Now move crash related codes from kexec_core.c to crash_core.c and only build it in when CONFIG_CRASH_DUMP=y. And also wrap up crash codes inside CONFIG_CRASH_DUMP ifdeffery scope, or replace inappropriate CONFIG_KEXEC_CORE ifdef with CONFIG_CRASH_DUMP ifdef in generic kernel files. With these changes, crash_core codes are abstracted from kexec codes and can be disabled at all if only kexec reboot feature is wanted. Link: https://lkml.kernel.org/r/20240124051254.67105-5-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Hari Bathini <hbathini@linux.ibm.com> Cc: Pingfan Liu <piliu@redhat.com> Cc: Klara Modin <klarasmodin@gmail.com> Cc: Michael Kelley <mhklinux@outlook.com> Cc: Nathan Chancellor <nathan@kernel.org> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Cc: Yang Li <yang.lee@linux.alibaba.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-10-31 05:40:53 +00:00
#endif
kimage_free(image);
kexec: move locking into do_kexec_load Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 4b692e861619353ce069e547a67c8d0e32d9ef3d Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:10 2021 -0700 kexec: move locking into do_kexec_load Patch series "compat: remove compat_alloc_user_space", v5. Going through compat_alloc_user_space() to convert indirect system call arguments tends to add complexity compared to handling the native and compat logic in the same code. This patch (of 6): The locking is the same between the native and compat version of sys_kexec_load(), so it can be done in the common implementation to reduce duplication. Link: https://lkml.kernel.org/r/20210727144859.4150043-1-arnd@kernel.org Link: https://lkml.kernel.org/r/20210727144859.4150043-2-arnd@kernel.org Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Helge Deller <deller@gmx.de> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: "David S. Miller" <davem@davemloft.net> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Feng Tang <feng.tang@intel.com> Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:41 +00:00
out_unlock:
panic, kexec: make __crash_kexec() NMI safe Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2166717 Upstream-status: https://github.com/torvalds/linux.git commit 05c6257433b7212f07a7e53479a8ab038fc1666a Author: Valentin Schneider <vschneid@redhat.com> Date: Thu Jun 30 23:32:58 2022 +0100 panic, kexec: make __crash_kexec() NMI safe Attempting to get a crash dump out of a debug PREEMPT_RT kernel via an NMI panic() doesn't work. The cause of that lies in the PREEMPT_RT definition of mutex_trylock(): if (IS_ENABLED(CONFIG_DEBUG_RT_MUTEXES) && WARN_ON_ONCE(!in_task())) return 0; This prevents an nmi_panic() from executing the main body of __crash_kexec() which does the actual kexec into the kdump kernel. The warning and return are explained by: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") [...] The reasons for this are: 1) There is a potential deadlock in the slowpath 2) Another cpu which blocks on the rtmutex will boost the task which allegedly locked the rtmutex, but that cannot work because the hard/softirq context borrows the task context. Furthermore, grabbing the lock isn't NMI safe, so do away with kexec_mutex and replace it with an atomic variable. This is somewhat overzealous as *some* callsites could keep using a mutex (e.g. the sysfs-facing ones like crash_shrink_memory()), but this has the benefit of involving a single unified lock and preventing any future NMI-related surprises. Tested by triggering NMI panics via: $ echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi $ echo 1 > /proc/sys/kernel/unknown_nmi_panic $ echo 1 > /proc/sys/kernel/panic $ ipmitool power diag Link: https://lkml.kernel.org/r/20220630223258.4144112-3-vschneid@redhat.com Fixes: 6ce47fd961fa ("rtmutex: Warn if trylock is called from hard/softirq context") Signed-off-by: Valentin Schneider <vschneid@redhat.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Baoquan He <bhe@redhat.com> Cc: "Eric W . Biederman" <ebiederm@xmission.com> Cc: Juri Lelli <jlelli@redhat.com> Cc: Luis Claudio R. Goncalves <lgoncalv@redhat.com> Cc: Miaohe Lin <linmiaohe@huawei.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Valentin Schneider <vschneid@redhat.com>
2023-02-02 18:05:46 +00:00
kexec_unlock();
return ret;
}
/*
* Exec Kernel system call: for obvious reasons only root may call it.
*
* This call breaks up into three pieces.
* - A generic part which loads the new kernel from the current
* address space, and very carefully places the data in the
* allocated pages.
*
* - A generic part that interacts with the kernel and tells all of
* the devices to shut down. Preventing on-going dmas, and placing
* the devices in a consistent state so a later kernel can
* reinitialize them.
*
* - A machine specific part that includes the syscall number
* and then copies the image to it's final destination. And
* jumps into the image at entry.
*
* kexec does not sync, or unmount filesystems so if you need
* that to happen you need to do that yourself.
*/
static inline int kexec_load_check(unsigned long nr_segments,
unsigned long flags)
{
kexec: introduce sysctl parameters kexec_load_limit_* JIRA: https://issues.redhat.com/browse/RHEL-32199 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git commit a42aaad2e47b23d63037bfc0130e33fc0f74cd71 Author: Ricardo Ribalda <ribalda@chromium.org> Date: Wed Jan 4 15:38:48 2023 +0100 kexec: introduce sysctl parameters kexec_load_limit_* kexec allows replacing the current kernel with a different one. This is usually a source of concerns for sysadmins that want to harden a system. Linux already provides a way to disable loading new kexec kernel via kexec_load_disabled, but that control is very coard, it is all or nothing and does not make distinction between a panic kexec and a normal kexec. This patch introduces new sysctl parameters, with finer tuning to specify how many times a kexec kernel can be loaded. The sysadmin can set different limits for kexec panic and kexec reboot kernels. The value can be modified at runtime via sysctl, but only with a stricter value. With these new parameters on place, a system with loadpin and verity enabled, using the following kernel parameters: sysctl.kexec_load_limit_reboot=0 sysct.kexec_load_limit_panic=1 can have a good warranty that if initrd tries to load a panic kernel, a malitious user will have small chances to replace that kernel with a different one, even if they can trigger timeouts on the disk where the panic kernel lives. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-3-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda <ribalda@chromium.org> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Acked-by: Baoquan He <bhe@redhat.com> Cc: Bagas Sanjaya <bagasdotme@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck Cc: Joel Fernandes (Google) <joel@joelfernandes.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Philipp Rudo <prudo@redhat.com> Cc: Ross Zwisler <zwisler@kernel.org> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-04-15 03:16:00 +00:00
int image_type = (flags & KEXEC_ON_CRASH) ?
KEXEC_TYPE_CRASH : KEXEC_TYPE_DEFAULT;
int result;
/* We only trust the superuser with rebooting the system. */
kexec: introduce sysctl parameters kexec_load_limit_* JIRA: https://issues.redhat.com/browse/RHEL-32199 Upstream Status: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git commit a42aaad2e47b23d63037bfc0130e33fc0f74cd71 Author: Ricardo Ribalda <ribalda@chromium.org> Date: Wed Jan 4 15:38:48 2023 +0100 kexec: introduce sysctl parameters kexec_load_limit_* kexec allows replacing the current kernel with a different one. This is usually a source of concerns for sysadmins that want to harden a system. Linux already provides a way to disable loading new kexec kernel via kexec_load_disabled, but that control is very coard, it is all or nothing and does not make distinction between a panic kexec and a normal kexec. This patch introduces new sysctl parameters, with finer tuning to specify how many times a kexec kernel can be loaded. The sysadmin can set different limits for kexec panic and kexec reboot kernels. The value can be modified at runtime via sysctl, but only with a stricter value. With these new parameters on place, a system with loadpin and verity enabled, using the following kernel parameters: sysctl.kexec_load_limit_reboot=0 sysct.kexec_load_limit_panic=1 can have a good warranty that if initrd tries to load a panic kernel, a malitious user will have small chances to replace that kernel with a different one, even if they can trigger timeouts on the disk where the panic kernel lives. Link: https://lkml.kernel.org/r/20221114-disable-kexec-reset-v6-3-6a8531a09b9a@chromium.org Signed-off-by: Ricardo Ribalda <ribalda@chromium.org> Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org> Acked-by: Baoquan He <bhe@redhat.com> Cc: Bagas Sanjaya <bagasdotme@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Guilherme G. Piccoli <gpiccoli@igalia.com> # Steam Deck Cc: Joel Fernandes (Google) <joel@joelfernandes.org> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Philipp Rudo <prudo@redhat.com> Cc: Ross Zwisler <zwisler@kernel.org> Cc: Sergey Senozhatsky <senozhatsky@chromium.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Baoquan He <bhe@redhat.com>
2024-04-15 03:16:00 +00:00
if (!kexec_load_permitted(image_type))
return -EPERM;
/* Permit LSMs and IMA to fail the kexec */
LSM: Introduce kernel_post_load_data() hook There are a few places in the kernel where LSMs would like to have visibility into the contents of a kernel buffer that has been loaded or read. While security_kernel_post_read_file() (which includes the buffer) exists as a pairing for security_kernel_read_file(), no such hook exists to pair with security_kernel_load_data(). Earlier proposals for just using security_kernel_post_read_file() with a NULL file argument were rejected (i.e. "file" should always be valid for the security_..._file hooks, but it appears at least one case was left in the kernel during earlier refactoring. (This will be fixed in a subsequent patch.) Since not all cases of security_kernel_load_data() can have a single contiguous buffer made available to the LSM hook (e.g. kexec image segments are separately loaded), there needs to be a way for the LSM to reason about its expectations of the hook coverage. In order to handle this, add a "contents" argument to the "kernel_load_data" hook that indicates if the newly added "kernel_post_load_data" hook will be called with the full contents once loaded. That way, LSMs requiring full contents can choose to unilaterally reject "kernel_load_data" with contents=false (which is effectively the existing hook coverage), but when contents=true they can allow it and later evaluate the "kernel_post_load_data" hook once the buffer is loaded. With this change, LSMs can gain coverage over non-file-backed data loads (e.g. init_module(2) and firmware userspace helper), which will happen in subsequent patches. Additionally prepare IMA to start processing these cases. Signed-off-by: Kees Cook <keescook@chromium.org> Reviewed-by: KP Singh <kpsingh@google.com> Link: https://lore.kernel.org/r/20201002173828.2099543-9-keescook@chromium.org Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2020-10-02 17:38:20 +00:00
result = security_kernel_load_data(LOADING_KEXEC_IMAGE, false);
if (result < 0)
return result;
/*
* kexec can be used to circumvent module loading restrictions, so
* prevent loading in that case
*/
result = security_locked_down(LOCKDOWN_KEXEC);
if (result)
return result;
/*
* Verify we have a legal set of flags
* This leaves us room for future extensions.
*/
if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK))
return -EINVAL;
/* Put an artificial cap on the number
* of segments passed to kexec_load.
*/
if (nr_segments > KEXEC_SEGMENT_MAX)
return -EINVAL;
return 0;
}
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
struct kexec_segment __user *, segments, unsigned long, flags)
{
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
struct kexec_segment *ksegments;
unsigned long result;
result = kexec_load_check(nr_segments, flags);
if (result)
return result;
/* Verify we are on the appropriate architecture */
if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) &&
((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))
return -EINVAL;
ksegments = memdup_array_user(segments, nr_segments, sizeof(ksegments[0]));
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
if (IS_ERR(ksegments))
return PTR_ERR(ksegments);
result = do_kexec_load(entry, nr_segments, ksegments, flags);
kfree(ksegments);
return result;
}
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
compat_ulong_t, nr_segments,
struct compat_kexec_segment __user *, segments,
compat_ulong_t, flags)
{
struct compat_kexec_segment in;
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
struct kexec_segment *ksegments;
unsigned long i, result;
result = kexec_load_check(nr_segments, flags);
if (result)
return result;
/* Don't allow clients that don't understand the native
* architecture to do anything.
*/
if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT)
return -EINVAL;
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
ksegments = kmalloc_array(nr_segments, sizeof(ksegments[0]),
GFP_KERNEL);
if (!ksegments)
return -ENOMEM;
for (i = 0; i < nr_segments; i++) {
result = copy_from_user(&in, &segments[i], sizeof(in));
if (result)
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
goto fail;
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
ksegments[i].buf = compat_ptr(in.buf);
ksegments[i].bufsz = in.bufsz;
ksegments[i].mem = in.mem;
ksegments[i].memsz = in.memsz;
}
result = do_kexec_load(entry, nr_segments, ksegments, flags);
kexec: avoid compat_alloc_user_space Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2023396 This patch is a backport of the following upstream commit: commit 5d700a0fd71ded7096b97f01d276efc1a6579613 Author: Arnd Bergmann <arnd@arndb.de> Date: Wed Sep 8 15:18:13 2021 -0700 kexec: avoid compat_alloc_user_space kimage_alloc_init() expects a __user pointer, so compat_sys_kexec_load() uses compat_alloc_user_space() to convert the layout and put it back onto the user space caller stack. Moving the user space access into the syscall handler directly actually makes the code simpler, as the conversion for compat mode can now be done on kernel memory. Link: https://lkml.kernel.org/r/20210727144859.4150043-3-arnd@kernel.org Link: https://lore.kernel.org/lkml/YPbtsU4GX6PL7%2F42@infradead.org/ Link: https://lore.kernel.org/lkml/m1y2cbzmnw.fsf@fess.ebiederm.org/ Signed-off-by: Arnd Bergmann <arnd@arndb.de> Co-developed-by: Eric Biederman <ebiederm@xmission.com> Co-developed-by: Christoph Hellwig <hch@infradead.org> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Feng Tang <feng.tang@intel.com> Cc: Heiko Carstens <hca@linux.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Paul Mackerras <paulus@samba.org> Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vasily Gorbik <gor@linux.ibm.com> Cc: Will Deacon <will@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Rafael Aquini <aquini@redhat.com>
2021-11-29 16:38:42 +00:00
fail:
kfree(ksegments);
return result;
}
#endif