mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-26 18:43:33 -05:00
76f4e2da45
Currently machine_kexec() doesn't reset to EL2 in the case of a crashdump kernel. This leaves potentially dodgy state active at EL2, and means that if the crashdump kernel attempts to online secondary CPUs, these will be booted as mismatched ELs. Let's reset to EL2, as we do in all other cases, and simplify things. If EL2 state is corrupt, things are already sufficiently bad that kdump is unlikely to work, and it's best-effort regardless. Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: James Morse <james.morse@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com>
363 lines
9.4 KiB
C
363 lines
9.4 KiB
C
/*
|
|
* kexec for arm64
|
|
*
|
|
* Copyright (C) Linaro.
|
|
* Copyright (C) Huawei Futurewei Technologies.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/page-flags.h>
|
|
#include <linux/smp.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/cpu_ops.h>
|
|
#include <asm/daifflags.h>
|
|
#include <asm/memory.h>
|
|
#include <asm/mmu.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/page.h>
|
|
|
|
#include "cpu-reset.h"
|
|
|
|
/* Global variables for the arm64_relocate_new_kernel routine. */
|
|
extern const unsigned char arm64_relocate_new_kernel[];
|
|
extern const unsigned long arm64_relocate_new_kernel_size;
|
|
|
|
/**
|
|
* kexec_image_info - For debugging output.
|
|
*/
|
|
#define kexec_image_info(_i) _kexec_image_info(__func__, __LINE__, _i)
|
|
static void _kexec_image_info(const char *func, int line,
|
|
const struct kimage *kimage)
|
|
{
|
|
unsigned long i;
|
|
|
|
pr_debug("%s:%d:\n", func, line);
|
|
pr_debug(" kexec kimage info:\n");
|
|
pr_debug(" type: %d\n", kimage->type);
|
|
pr_debug(" start: %lx\n", kimage->start);
|
|
pr_debug(" head: %lx\n", kimage->head);
|
|
pr_debug(" nr_segments: %lu\n", kimage->nr_segments);
|
|
|
|
for (i = 0; i < kimage->nr_segments; i++) {
|
|
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
|
i,
|
|
kimage->segment[i].mem,
|
|
kimage->segment[i].mem + kimage->segment[i].memsz,
|
|
kimage->segment[i].memsz,
|
|
kimage->segment[i].memsz / PAGE_SIZE);
|
|
}
|
|
}
|
|
|
|
void machine_kexec_cleanup(struct kimage *kimage)
|
|
{
|
|
/* Empty routine needed to avoid build errors. */
|
|
}
|
|
|
|
/**
|
|
* machine_kexec_prepare - Prepare for a kexec reboot.
|
|
*
|
|
* Called from the core kexec code when a kernel image is loaded.
|
|
* Forbid loading a kexec kernel if we have no way of hotplugging cpus or cpus
|
|
* are stuck in the kernel. This avoids a panic once we hit machine_kexec().
|
|
*/
|
|
int machine_kexec_prepare(struct kimage *kimage)
|
|
{
|
|
kexec_image_info(kimage);
|
|
|
|
if (kimage->type != KEXEC_TYPE_CRASH && cpus_are_stuck_in_kernel()) {
|
|
pr_err("Can't kexec: CPUs are stuck in the kernel.\n");
|
|
return -EBUSY;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* kexec_list_flush - Helper to flush the kimage list and source pages to PoC.
|
|
*/
|
|
static void kexec_list_flush(struct kimage *kimage)
|
|
{
|
|
kimage_entry_t *entry;
|
|
|
|
for (entry = &kimage->head; ; entry++) {
|
|
unsigned int flag;
|
|
void *addr;
|
|
|
|
/* flush the list entries. */
|
|
__flush_dcache_area(entry, sizeof(kimage_entry_t));
|
|
|
|
flag = *entry & IND_FLAGS;
|
|
if (flag == IND_DONE)
|
|
break;
|
|
|
|
addr = phys_to_virt(*entry & PAGE_MASK);
|
|
|
|
switch (flag) {
|
|
case IND_INDIRECTION:
|
|
/* Set entry point just before the new list page. */
|
|
entry = (kimage_entry_t *)addr - 1;
|
|
break;
|
|
case IND_SOURCE:
|
|
/* flush the source pages. */
|
|
__flush_dcache_area(addr, PAGE_SIZE);
|
|
break;
|
|
case IND_DESTINATION:
|
|
break;
|
|
default:
|
|
BUG();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* kexec_segment_flush - Helper to flush the kimage segments to PoC.
|
|
*/
|
|
static void kexec_segment_flush(const struct kimage *kimage)
|
|
{
|
|
unsigned long i;
|
|
|
|
pr_debug("%s:\n", __func__);
|
|
|
|
for (i = 0; i < kimage->nr_segments; i++) {
|
|
pr_debug(" segment[%lu]: %016lx - %016lx, 0x%lx bytes, %lu pages\n",
|
|
i,
|
|
kimage->segment[i].mem,
|
|
kimage->segment[i].mem + kimage->segment[i].memsz,
|
|
kimage->segment[i].memsz,
|
|
kimage->segment[i].memsz / PAGE_SIZE);
|
|
|
|
__flush_dcache_area(phys_to_virt(kimage->segment[i].mem),
|
|
kimage->segment[i].memsz);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* machine_kexec - Do the kexec reboot.
|
|
*
|
|
* Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC.
|
|
*/
|
|
void machine_kexec(struct kimage *kimage)
|
|
{
|
|
phys_addr_t reboot_code_buffer_phys;
|
|
void *reboot_code_buffer;
|
|
bool in_kexec_crash = (kimage == kexec_crash_image);
|
|
bool stuck_cpus = cpus_are_stuck_in_kernel();
|
|
|
|
/*
|
|
* New cpus may have become stuck_in_kernel after we loaded the image.
|
|
*/
|
|
BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1)));
|
|
WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()),
|
|
"Some CPUs may be stale, kdump will be unreliable.\n");
|
|
|
|
reboot_code_buffer_phys = page_to_phys(kimage->control_code_page);
|
|
reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys);
|
|
|
|
kexec_image_info(kimage);
|
|
|
|
pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__,
|
|
kimage->control_code_page);
|
|
pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__,
|
|
&reboot_code_buffer_phys);
|
|
pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__,
|
|
reboot_code_buffer);
|
|
pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__,
|
|
arm64_relocate_new_kernel);
|
|
pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n",
|
|
__func__, __LINE__, arm64_relocate_new_kernel_size,
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
/*
|
|
* Copy arm64_relocate_new_kernel to the reboot_code_buffer for use
|
|
* after the kernel is shut down.
|
|
*/
|
|
memcpy(reboot_code_buffer, arm64_relocate_new_kernel,
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
/* Flush the reboot_code_buffer in preparation for its execution. */
|
|
__flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size);
|
|
flush_icache_range((uintptr_t)reboot_code_buffer,
|
|
arm64_relocate_new_kernel_size);
|
|
|
|
/* Flush the kimage list and its buffers. */
|
|
kexec_list_flush(kimage);
|
|
|
|
/* Flush the new image if already in place. */
|
|
if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE))
|
|
kexec_segment_flush(kimage);
|
|
|
|
pr_info("Bye!\n");
|
|
|
|
local_daif_mask();
|
|
|
|
/*
|
|
* cpu_soft_restart will shutdown the MMU, disable data caches, then
|
|
* transfer control to the reboot_code_buffer which contains a copy of
|
|
* the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel
|
|
* uses physical addressing to relocate the new image to its final
|
|
* position and transfers control to the image entry point when the
|
|
* relocation is complete.
|
|
*/
|
|
|
|
cpu_soft_restart(reboot_code_buffer_phys, kimage->head, kimage->start, 0);
|
|
|
|
BUG(); /* Should never get here. */
|
|
}
|
|
|
|
static void machine_kexec_mask_interrupts(void)
|
|
{
|
|
unsigned int i;
|
|
struct irq_desc *desc;
|
|
|
|
for_each_irq_desc(i, desc) {
|
|
struct irq_chip *chip;
|
|
int ret;
|
|
|
|
chip = irq_desc_get_chip(desc);
|
|
if (!chip)
|
|
continue;
|
|
|
|
/*
|
|
* First try to remove the active state. If this
|
|
* fails, try to EOI the interrupt.
|
|
*/
|
|
ret = irq_set_irqchip_state(i, IRQCHIP_STATE_ACTIVE, false);
|
|
|
|
if (ret && irqd_irq_inprogress(&desc->irq_data) &&
|
|
chip->irq_eoi)
|
|
chip->irq_eoi(&desc->irq_data);
|
|
|
|
if (chip->irq_mask)
|
|
chip->irq_mask(&desc->irq_data);
|
|
|
|
if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
|
|
chip->irq_disable(&desc->irq_data);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* machine_crash_shutdown - shutdown non-crashing cpus and save registers
|
|
*/
|
|
void machine_crash_shutdown(struct pt_regs *regs)
|
|
{
|
|
local_irq_disable();
|
|
|
|
/* shutdown non-crashing cpus */
|
|
crash_smp_send_stop();
|
|
|
|
/* for crashing cpu */
|
|
crash_save_cpu(regs, smp_processor_id());
|
|
machine_kexec_mask_interrupts();
|
|
|
|
pr_info("Starting crashdump kernel...\n");
|
|
}
|
|
|
|
void arch_kexec_protect_crashkres(void)
|
|
{
|
|
int i;
|
|
|
|
kexec_segment_flush(kexec_crash_image);
|
|
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
set_memory_valid(
|
|
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
|
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 0);
|
|
}
|
|
|
|
void arch_kexec_unprotect_crashkres(void)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
set_memory_valid(
|
|
__phys_to_virt(kexec_crash_image->segment[i].mem),
|
|
kexec_crash_image->segment[i].memsz >> PAGE_SHIFT, 1);
|
|
}
|
|
|
|
#ifdef CONFIG_HIBERNATION
|
|
/*
|
|
* To preserve the crash dump kernel image, the relevant memory segments
|
|
* should be mapped again around the hibernation.
|
|
*/
|
|
void crash_prepare_suspend(void)
|
|
{
|
|
if (kexec_crash_image)
|
|
arch_kexec_unprotect_crashkres();
|
|
}
|
|
|
|
void crash_post_resume(void)
|
|
{
|
|
if (kexec_crash_image)
|
|
arch_kexec_protect_crashkres();
|
|
}
|
|
|
|
/*
|
|
* crash_is_nosave
|
|
*
|
|
* Return true only if a page is part of reserved memory for crash dump kernel,
|
|
* but does not hold any data of loaded kernel image.
|
|
*
|
|
* Note that all the pages in crash dump kernel memory have been initially
|
|
* marked as Reserved in kexec_reserve_crashkres_pages().
|
|
*
|
|
* In hibernation, the pages which are Reserved and yet "nosave" are excluded
|
|
* from the hibernation iamge. crash_is_nosave() does thich check for crash
|
|
* dump kernel and will reduce the total size of hibernation image.
|
|
*/
|
|
|
|
bool crash_is_nosave(unsigned long pfn)
|
|
{
|
|
int i;
|
|
phys_addr_t addr;
|
|
|
|
if (!crashk_res.end)
|
|
return false;
|
|
|
|
/* in reserved memory? */
|
|
addr = __pfn_to_phys(pfn);
|
|
if ((addr < crashk_res.start) || (crashk_res.end < addr))
|
|
return false;
|
|
|
|
if (!kexec_crash_image)
|
|
return true;
|
|
|
|
/* not part of loaded kernel image? */
|
|
for (i = 0; i < kexec_crash_image->nr_segments; i++)
|
|
if (addr >= kexec_crash_image->segment[i].mem &&
|
|
addr < (kexec_crash_image->segment[i].mem +
|
|
kexec_crash_image->segment[i].memsz))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
|
|
{
|
|
unsigned long addr;
|
|
struct page *page;
|
|
|
|
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
|
page = phys_to_page(addr);
|
|
ClearPageReserved(page);
|
|
free_reserved_page(page);
|
|
}
|
|
}
|
|
#endif /* CONFIG_HIBERNATION */
|
|
|
|
void arch_crash_save_vmcoreinfo(void)
|
|
{
|
|
VMCOREINFO_NUMBER(VA_BITS);
|
|
/* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */
|
|
vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n",
|
|
kimage_voffset);
|
|
vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n",
|
|
PHYS_OFFSET);
|
|
}
|