summaryrefslogtreecommitdiff
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/.gitignore1
-rw-r--r--arch/powerpc/kernel/Makefile23
-rw-r--r--arch/powerpc/kernel/asm-offsets.c1
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S6
-rw-r--r--arch/powerpc/kernel/cputable.c6
-rw-r--r--arch/powerpc/kernel/dbell.c6
-rw-r--r--arch/powerpc/kernel/dma-iommu.c13
-rw-r--r--arch/powerpc/kernel/dt_cpu_ftrs.c35
-rw-r--r--arch/powerpc/kernel/eeh.c281
-rw-r--r--arch/powerpc/kernel/eeh_cache.c37
-rw-r--r--arch/powerpc/kernel/eeh_dev.c2
-rw-r--r--arch/powerpc/kernel/eeh_driver.c280
-rw-r--r--arch/powerpc/kernel/eeh_event.c34
-rw-r--r--arch/powerpc/kernel/eeh_pe.c145
-rw-r--r--arch/powerpc/kernel/entry_32.S40
-rw-r--r--arch/powerpc/kernel/entry_64.S21
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S22
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S1634
-rw-r--r--arch/powerpc/kernel/fadump.c1340
-rw-r--r--arch/powerpc/kernel/head_32.S55
-rw-r--r--arch/powerpc/kernel/head_32.h21
-rw-r--r--arch/powerpc/kernel/head_64.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S28
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c77
-rw-r--r--arch/powerpc/kernel/io-workarounds.c13
-rw-r--r--arch/powerpc/kernel/iommu.c97
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c545
-rw-r--r--arch/powerpc/kernel/kvm.c58
-rw-r--r--arch/powerpc/kernel/kvm_emul.S16
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c9
-rw-r--r--arch/powerpc/kernel/mce.c71
-rw-r--r--arch/powerpc/kernel/mce_power.c50
-rw-r--r--arch/powerpc/kernel/misc_32.S36
-rw-r--r--arch/powerpc/kernel/note.S40
-rw-r--r--arch/powerpc/kernel/paca.c52
-rw-r--r--arch/powerpc/kernel/pci-common.c4
-rw-r--r--arch/powerpc/kernel/pci-hotplug.c7
-rw-r--r--arch/powerpc/kernel/pci_32.c4
-rw-r--r--arch/powerpc/kernel/pci_64.c12
-rw-r--r--arch/powerpc/kernel/pci_dn.c21
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c66
-rw-r--r--arch/powerpc/kernel/process.c28
-rw-r--r--arch/powerpc/kernel/prom.c8
-rw-r--r--arch/powerpc/kernel/prom_init.c98
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh2
-rw-r--r--arch/powerpc/kernel/rtas.c15
-rw-r--r--arch/powerpc/kernel/security.c19
-rw-r--r--arch/powerpc/kernel/setup-common.c14
-rw-r--r--arch/powerpc/kernel/setup_32.c2
-rw-r--r--arch/powerpc/kernel/stacktrace.c2
-rw-r--r--arch/powerpc/kernel/sysfs.c20
-rw-r--r--arch/powerpc/kernel/trace/ftrace.c5
-rw-r--r--arch/powerpc/kernel/trace/ftrace_32.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_mprofile.S1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_64_pg.S1
-rw-r--r--arch/powerpc/kernel/traps.c1
-rw-r--r--arch/powerpc/kernel/ucall.S14
-rw-r--r--arch/powerpc/kernel/vdso.c22
-rw-r--r--arch/powerpc/kernel/vdso32/datapage.S2
-rw-r--r--arch/powerpc/kernel/vdso32/vdso32.lds.S4
60 files changed, 2764 insertions, 2712 deletions
diff --git a/arch/powerpc/kernel/.gitignore b/arch/powerpc/kernel/.gitignore
index c5f676c3c224..67ebd3003c05 100644
--- a/arch/powerpc/kernel/.gitignore
+++ b/arch/powerpc/kernel/.gitignore
@@ -1 +1,2 @@
+prom_init_check
vmlinux.lds
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 56dfa7a2a6f2..a7ca8fe62368 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -52,7 +52,7 @@ obj-y := cputable.o ptrace.o syscalls.o \
of_platform.o prom_parse.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
- paca.o nvram_64.o firmware.o
+ paca.o nvram_64.o firmware.o note.o
obj-$(CONFIG_VDSO32) += vdso32/
obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
@@ -78,7 +78,9 @@ obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
-obj-$(CONFIG_FA_DUMP) += fadump.o
+ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
+obj-y += fadump.o
+endif
ifdef CONFIG_PPC32
obj-$(CONFIG_E500) += idle_e500.o
endif
@@ -155,6 +157,9 @@ endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
+ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
+obj-y += ucall.o
+endif
# Disable GCOV, KCOV & sanitizers in odd or sensitive code
GCOV_PROFILE_prom_init.o := n
@@ -184,15 +189,13 @@ extra-$(CONFIG_ALTIVEC) += vector.o
extra-$(CONFIG_PPC64) += entry_64.o
extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init.o
-ifdef CONFIG_PPC_OF_BOOT_TRAMPOLINE
-$(obj)/built-in.a: prom_init_check
+extra-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += prom_init_check
-quiet_cmd_prom_init_check = CALL $<
- cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o"
+quiet_cmd_prom_init_check = PROMCHK $@
+ cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" $(obj)/prom_init.o; touch $@
-PHONY += prom_init_check
-prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o
- $(call cmd,prom_init_check)
-endif
+$(obj)/prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o FORCE
+ $(call if_changed,prom_init_check)
+targets += prom_init_check
clean-files := vmlinux.lds
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ccb6b3a7fbd..484f54dab247 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -506,6 +506,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+ OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 3239a9fe6c1c..a460298c7ddb 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -23,6 +23,7 @@ _GLOBAL(__setup_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -37,6 +38,7 @@ _GLOBAL(__restore_cpu_power7)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
li r4,(LPCR_LPES1 >> LPCR_LPES_SH)
@@ -54,6 +56,7 @@ _GLOBAL(__setup_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -76,6 +79,7 @@ _GLOBAL(__restore_cpu_power8)
beqlr
li r0,0
mtspr SPRN_LPID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
ori r3, r3, LPCR_PECEDH
@@ -98,6 +102,7 @@ _GLOBAL(__setup_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
@@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9)
mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mtspr SPRN_PID,r0
+ LOAD_REG_IMMEDIATE(r0, PCR_MASK)
mtspr SPRN_PCR,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC)
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index bfe5f4a2886b..e745abc5457a 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -569,7 +569,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_PPC32
-#ifdef CONFIG_PPC_BOOK3S_32
+#ifdef CONFIG_PPC_BOOK3S_601
{ /* 601 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00010000,
@@ -583,6 +583,8 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc601",
},
+#endif /* CONFIG_PPC_BOOK3S_601 */
+#ifdef CONFIG_PPC_BOOK3S_6xx
{ /* 603 */
.pvr_mask = 0xffff0000,
.pvr_value = 0x00030000,
@@ -1212,7 +1214,7 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check = machine_check_generic,
.platform = "ppc603",
},
-#endif /* CONFIG_PPC_BOOK3S_32 */
+#endif /* CONFIG_PPC_BOOK3S_6xx */
#ifdef CONFIG_PPC_8xx
{ /* 8xx */
.pvr_mask = 0xffff0000,
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index 804b1a6196fa..f17ff1200eaa 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -33,7 +33,7 @@ void doorbell_global_ipi(int cpu)
{
u32 tag = get_hard_smp_processor_id(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -48,7 +48,7 @@ void doorbell_core_ipi(int cpu)
{
u32 tag = cpu_thread_in_core(cpu);
- kvmppc_set_host_ipi(cpu, 1);
+ kvmppc_set_host_ipi(cpu);
/* Order previous accesses vs. msgsnd, which is treated as a store */
ppc_msgsnd_sync();
ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
@@ -84,7 +84,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
- kvmppc_set_host_ipi(smp_processor_id(), 0);
+ kvmppc_clear_host_ipi(smp_processor_id());
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux_relaxed(); /* already performed the barrier */
diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c
index a0879674a9c8..e486d1d78de2 100644
--- a/arch/powerpc/kernel/dma-iommu.c
+++ b/arch/powerpc/kernel/dma-iommu.c
@@ -122,18 +122,17 @@ int dma_iommu_dma_supported(struct device *dev, u64 mask)
{
struct iommu_table *tbl = get_iommu_table_base(dev);
- if (!tbl) {
- dev_info(dev, "Warning: IOMMU dma not supported: mask 0x%08llx"
- ", table unavailable\n", mask);
- return 0;
- }
-
if (dev_is_pci(dev) && dma_iommu_bypass_supported(dev, mask)) {
dev->archdata.iommu_bypass = true;
dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
return 1;
}
+ if (!tbl) {
+ dev_err(dev, "Warning: IOMMU dma not supported: mask 0x%08llx, table unavailable\n", mask);
+ return 0;
+ }
+
if (tbl->it_offset > (mask >> tbl->it_page_shift)) {
dev_info(dev, "Warning: IOMMU offset too big for device mask\n");
dev_info(dev, "mask: 0x%08llx, table offset: 0x%08lx\n",
@@ -208,4 +207,6 @@ const struct dma_map_ops dma_iommu_ops = {
.sync_single_for_device = dma_iommu_sync_for_device,
.sync_sg_for_cpu = dma_iommu_sync_sg_for_cpu,
.sync_sg_for_device = dma_iommu_sync_sg_for_device,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
};
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index bd95318d2202..180b3a5d1001 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -101,7 +101,7 @@ static void __restore_cpu_cpufeatures(void)
if (hv_mode) {
mtspr(SPRN_LPID, 0);
mtspr(SPRN_HFSCR, system_registers.hfscr);
- mtspr(SPRN_PCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
}
mtspr(SPRN_FSCR, system_registers.fscr);
@@ -144,6 +144,7 @@ static void __init cpufeatures_setup_cpu(void)
mtspr(SPRN_HFSCR, 0);
}
mtspr(SPRN_FSCR, 0);
+ mtspr(SPRN_PCR, PCR_MASK);
/*
* LPCR does not get cleared, to match behaviour with secondaries
@@ -691,9 +692,37 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
return true;
}
+/*
+ * Handle POWER9 broadcast tlbie invalidation issue using
+ * cpu feature flag.
+ */
+static __init void update_tlbie_feature_flag(unsigned long pvr)
+{
+ if (PVR_VER(pvr) == PVR_POWER9) {
+ /*
+ * Set the tlbie feature flag for anything below
+ * Nimbus DD 2.3 and Cumulus DD 1.3
+ */
+ if ((pvr & 0xe000) == 0) {
+ /* Nimbus */
+ if ((pvr & 0xfff) < 0x203)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else if ((pvr & 0xc000) == 0) {
+ /* Cumulus */
+ if ((pvr & 0xfff) < 0x103)
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ } else {
+ WARN_ONCE(1, "Unknown PVR");
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_STQ_BUG;
+ }
+
+ cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_ERAT_BUG;
+ }
+}
+
static __init void cpufeatures_cpu_quirks(void)
{
- int version = mfspr(SPRN_PVR);
+ unsigned long version = mfspr(SPRN_PVR);
/*
* Not all quirks can be derived from the cpufeatures device tree.
@@ -712,10 +741,10 @@ static __init void cpufeatures_cpu_quirks(void)
if ((version & 0xffff0000) == 0x004e0000) {
cur_cpu_spec->cpu_features &= ~(CPU_FTR_DAWR);
- cur_cpu_spec->cpu_features |= CPU_FTR_P9_TLBIE_BUG;
cur_cpu_spec->cpu_features |= CPU_FTR_P9_TIDR;
}
+ update_tlbie_feature_flag(version);
/*
* PKEY was not in the initial base or feature node
* specification, but it should become optional in the next
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index c0e4b73191f3..bc8a551013be 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -150,6 +150,16 @@ static int __init eeh_setup(char *str)
}
__setup("eeh=", eeh_setup);
+void eeh_show_enabled(void)
+{
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ pr_info("EEH: Recovery disabled by kernel parameter.\n");
+ else if (eeh_has_flag(EEH_ENABLED))
+ pr_info("EEH: Capable adapter found: recovery enabled.\n");
+ else
+ pr_info("EEH: No capable adapters found: recovery disabled.\n");
+}
+
/*
* This routine captures assorted PCI configuration space data
* for the indicated PCI device, and puts them into a buffer
@@ -410,11 +420,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
- pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pr_debug("EEH: PHB#%x failure detected, location: %s\n",
phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe));
- dump_stack();
eeh_send_failure_event(phb_pe);
-
return 1;
out:
eeh_serialize_unlock(flags);
@@ -441,7 +449,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
unsigned long flags;
struct device_node *dn;
struct pci_dev *dev;
- struct eeh_pe *pe, *parent_pe, *phb_pe;
+ struct eeh_pe *pe, *parent_pe;
int rc = 0;
const char *location = NULL;
@@ -460,8 +468,7 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
/* Access to IO BARs might get this far and still not want checking. */
if (!pe) {
eeh_stats.ignored_check++;
- pr_debug("EEH: Ignored check for %s\n",
- eeh_pci_name(dev));
+ eeh_edev_dbg(edev, "Ignored check\n");
return 0;
}
@@ -501,12 +508,11 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
if (dn)
location = of_get_property(dn, "ibm,loc-code",
NULL);
- printk(KERN_ERR "EEH: %d reads ignored for recovering device at "
- "location=%s driver=%s pci addr=%s\n",
+ eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n",
pe->check_count,
location ? location : "unknown",
- eeh_driver_name(dev), eeh_pci_name(dev));
- printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ eeh_driver_name(dev));
+ eeh_edev_err(edev, "Might be infinite loop in %s driver\n",
eeh_driver_name(dev));
dump_stack();
}
@@ -573,13 +579,8 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
* a stack trace will help the device-driver authors figure
* out what happened. So print that out.
*/
- phb_pe = eeh_phb_pe_get(pe->phb);
- pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
- pe->phb->global_number, pe->addr);
- pr_err("EEH: PE location: %s, PHB location: %s\n",
- eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
- dump_stack();
-
+ pr_debug("EEH: %s: Frozen PHB#%x-PE#%x detected\n",
+ __func__, pe->phb->global_number, pe->addr);
eeh_send_failure_event(pe);
return 1;
@@ -697,7 +698,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
return rc;
}
-static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
+static void eeh_disable_and_save_dev_state(struct eeh_dev *edev,
void *userdata)
{
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
@@ -708,7 +709,7 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* state for the specified device
*/
if (!pdev || pdev == dev)
- return NULL;
+ return;
/* Ensure we have D0 power state */
pci_set_power_state(pdev, PCI_D0);
@@ -721,18 +722,16 @@ static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev,
* interrupt from the device
*/
pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE);
-
- return NULL;
}
-static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
+static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
struct pci_dev *dev = userdata;
if (!pdev)
- return NULL;
+ return;
/* Apply customization from firmware */
if (pdn && eeh_ops->restore_config)
@@ -741,8 +740,6 @@ static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
/* The caller should restore state for the specified device */
if (pdev != dev)
pci_restore_state(pdev);
-
- return NULL;
}
int eeh_restore_vf_config(struct pci_dn *pdn)
@@ -868,7 +865,7 @@ int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state stat
* the indicated device and its children so that the bunch of the
* devices could be reset properly.
*/
-static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
+static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
{
struct pci_dev *dev;
unsigned int *freset = (unsigned int *)flag;
@@ -876,8 +873,6 @@ static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag)
dev = eeh_dev_to_pci_dev(edev);
if (dev)
*freset |= dev->needs_freset;
-
- return NULL;
}
static void eeh_pe_refreeze_passed(struct eeh_pe *root)
@@ -1063,23 +1058,6 @@ static struct notifier_block eeh_reboot_nb = {
.notifier_call = eeh_reboot_notifier,
};
-void eeh_probe_devices(void)
-{
- struct pci_controller *hose, *tmp;
- struct pci_dn *pdn;
-
- /* Enable EEH for all adapters */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
- pdn = hose->pci_data;
- traverse_pci_dn(pdn, eeh_ops->probe, NULL);
- }
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_info("EEH: No capable adapters found\n");
-
-}
-
/**
* eeh_init - EEH initialization
*
@@ -1120,6 +1098,8 @@ static int eeh_init(void)
list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
eeh_dev_phb_init_dynamic(hose);
+ eeh_addr_cache_init();
+
/* Initialize EEH event */
return eeh_event_init();
}
@@ -1190,15 +1170,14 @@ void eeh_add_device_late(struct pci_dev *dev)
struct pci_dn *pdn;
struct eeh_dev *edev;
- if (!dev || !eeh_enabled())
+ if (!dev)
return;
- pr_debug("EEH: Adding device %s\n", pci_name(dev));
-
pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
edev = pdn_to_eeh_dev(pdn);
+ eeh_edev_dbg(edev, "Adding device\n");
if (edev->pdev == dev) {
- pr_debug("EEH: Already referenced !\n");
+ eeh_edev_dbg(edev, "Device already referenced!\n");
return;
}
@@ -1246,6 +1225,8 @@ void eeh_add_device_tree_late(struct pci_bus *bus)
{
struct pci_dev *dev;
+ if (eeh_has_flag(EEH_FORCE_DISABLED))
+ return;
list_for_each_entry(dev, &bus->devices, bus_list) {
eeh_add_device_late(dev);
if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) {
@@ -1299,10 +1280,10 @@ void eeh_remove_device(struct pci_dev *dev)
edev = pci_dev_to_eeh_dev(dev);
/* Unregister the device with the EEH/PCI address search system */
- pr_debug("EEH: Removing device %s\n", pci_name(dev));
+ dev_dbg(&dev->dev, "EEH: Removing device\n");
if (!edev || !edev->pdev || !edev->pe) {
- pr_debug("EEH: Not referenced !\n");
+ dev_dbg(&dev->dev, "EEH: Device not referenced!\n");
return;
}
@@ -1890,6 +1871,198 @@ static const struct file_operations eeh_force_recover_fops = {
.llseek = no_llseek,
.write = eeh_force_recover_write,
};
+
+static ssize_t eeh_debugfs_dev_usage(struct file *filp,
+ char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ static const char usage[] = "input format: <domain>:<bus>:<dev>.<fn>\n";
+
+ return simple_read_from_buffer(user_buf, count, ppos,
+ usage, sizeof(usage) - 1);
+}
+
+static ssize_t eeh_dev_check_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ struct eeh_dev *edev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ edev = pci_dev_to_eeh_dev(pdev);
+ if (!edev) {
+ pci_err(pdev, "No eeh_dev for this device!\n");
+ pci_dev_put(pdev);
+ return -ENODEV;
+ }
+
+ ret = eeh_dev_check_failure(edev);
+ pci_info(pdev, "eeh_dev_check_failure(%04x:%02x:%02x.%01x) = %d\n",
+ domain, bus, dev, fn, ret);
+
+ pci_dev_put(pdev);
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_check_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_check_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
+static int eeh_debugfs_break_device(struct pci_dev *pdev)
+{
+ struct resource *bar = NULL;
+ void __iomem *mapped;
+ u16 old, bit;
+ int i, pos;
+
+ /* Do we have an MMIO BAR to disable? */
+ for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ struct resource *r = &pdev->resource[i];
+
+ if (!r->flags || !r->start)
+ continue;
+ if (r->flags & IORESOURCE_IO)
+ continue;
+ if (r->flags & IORESOURCE_UNSET)
+ continue;
+
+ bar = r;
+ break;
+ }
+
+ if (!bar) {
+ pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
+ return -ENXIO;
+ }
+
+ pci_err(pdev, "Going to break: %pR\n", bar);
+
+ if (pdev->is_virtfn) {
+#ifndef CONFIG_PCI_IOV
+ return -ENXIO;
+#else
+ /*
+ * VFs don't have a per-function COMMAND register, so the best
+ * we can do is clear the Memory Space Enable bit in the PF's
+ * SRIOV control reg.
+ *
+ * Unfortunately, this requires that we have a PF (i.e doesn't
+ * work for a passed-through VF) and it has the potential side
+ * effect of also causing an EEH on every other VF under the
+ * PF. Oh well.
+ */
+ pdev = pdev->physfn;
+ if (!pdev)
+ return -ENXIO; /* passed through VFs have no PF */
+
+ pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
+ pos += PCI_SRIOV_CTRL;
+ bit = PCI_SRIOV_CTRL_MSE;
+#endif /* !CONFIG_PCI_IOV */
+ } else {
+ bit = PCI_COMMAND_MEMORY;
+ pos = PCI_COMMAND;
+ }
+
+ /*
+ * Process here is:
+ *
+ * 1. Disable Memory space.
+ *
+ * 2. Perform an MMIO to the device. This should result in an error
+ * (CA / UR) being raised by the device which results in an EEH
+ * PE freeze. Using the in_8() accessor skips the eeh detection hook
+ * so the freeze hook so the EEH Detection machinery won't be
+ * triggered here. This is to match the usual behaviour of EEH
+ * where the HW will asyncronously freeze a PE and it's up to
+ * the kernel to notice and deal with it.
+ *
+ * 3. Turn Memory space back on. This is more important for VFs
+ * since recovery will probably fail if we don't. For normal
+ * the COMMAND register is reset as a part of re-initialising
+ * the device.
+ *
+ * Breaking stuff is the point so who cares if it's racy ;)
+ */
+ pci_read_config_word(pdev, pos, &old);
+
+ mapped = ioremap(bar->start, PAGE_SIZE);
+ if (!mapped) {
+ pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
+ return -ENXIO;
+ }
+
+ pci_write_config_word(pdev, pos, old & ~bit);
+ in_8(mapped);
+ pci_write_config_word(pdev, pos, old);
+
+ iounmap(mapped);
+
+ return 0;
+}
+
+static ssize_t eeh_dev_break_write(struct file *filp,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ uint32_t domain, bus, dev, fn;
+ struct pci_dev *pdev;
+ char buf[20];
+ int ret;
+
+ memset(buf, 0, sizeof(buf));
+ ret = simple_write_to_buffer(buf, sizeof(buf)-1, ppos, user_buf, count);
+ if (!ret)
+ return -EFAULT;
+
+ ret = sscanf(buf, "%x:%x:%x.%x", &domain, &bus, &dev, &fn);
+ if (ret != 4) {
+ pr_err("%s: expected 4 args, got %d\n", __func__, ret);
+ return -EINVAL;
+ }
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, (dev << 3) | fn);
+ if (!pdev)
+ return -ENODEV;
+
+ ret = eeh_debugfs_break_device(pdev);
+ pci_dev_put(pdev);
+
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static const struct file_operations eeh_dev_break_fops = {
+ .open = simple_open,
+ .llseek = no_llseek,
+ .write = eeh_dev_break_write,
+ .read = eeh_debugfs_dev_usage,
+};
+
#endif
static int __init eeh_init_proc(void)
@@ -1905,6 +2078,12 @@ static int __init eeh_init_proc(void)
debugfs_create_bool("eeh_disable_recovery", 0600,
powerpc_debugfs_root,
&eeh_debugfs_no_recover);
+ debugfs_create_file_unsafe("eeh_dev_check", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_check_fops);
+ debugfs_create_file_unsafe("eeh_dev_break", 0600,
+ powerpc_debugfs_root, NULL,
+ &eeh_dev_break_fops);
debugfs_create_file_unsafe("eeh_force_recover", 0600,
powerpc_debugfs_root, NULL,
&eeh_force_recover_fops);
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 05ffd32b3416..cf11277ebd02 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -148,8 +148,8 @@ eeh_addr_cache_insert(struct pci_dev *dev, resource_size_t alo,
piar->pcidev = dev;
piar->flags = flags;
- pr_debug("PIAR: insert range=[%pap:%pap] dev=%s\n",
- &alo, &ahi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: insert range=[%pap:%pap]\n",
+ &alo, &ahi);
rb_link_node(&piar->rb_node, parent, p);
rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
@@ -229,8 +229,8 @@ restart:
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
if (piar->pcidev == dev) {
- pr_debug("PIAR: remove range=[%pap:%pap] dev=%s\n",
- &piar->addr_lo, &piar->addr_hi, pci_name(dev));
+ eeh_edev_dbg(piar->edev, "PIAR: remove range=[%pap:%pap]\n",
+ &piar->addr_lo, &piar->addr_hi);
rb_erase(n, &pci_io_addr_cache_root.rb_root);
kfree(piar);
goto restart;
@@ -258,37 +258,14 @@ void eeh_addr_cache_rmv_dev(struct pci_dev *dev)
}
/**
- * eeh_addr_cache_build - Build a cache of I/O addresses
+ * eeh_addr_cache_init - Initialize a cache of I/O addresses
*
- * Build a cache of pci i/o addresses. This cache will be used to
+ * Initialize a cache of pci i/o addresses. This cache will be used to
* find the pci device that corresponds to a given address.
- * This routine scans all pci busses to build the cache.
- * Must be run late in boot process, after the pci controllers
- * have been scanned for devices (after all device resources are known).
*/
-void eeh_addr_cache_build(void)
+void eeh_addr_cache_init(void)
{
- struct pci_dn *pdn;
- struct eeh_dev *edev;
- struct pci_dev *dev = NULL;
-
spin_lock_init(&pci_io_addr_cache_root.piar_lock);
-
- for_each_pci_dev(dev) {
- pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
- if (!pdn)
- continue;
-
- edev = pdn_to_eeh_dev(pdn);
- if (!edev)
- continue;
-
- dev->dev.archdata.edev = edev;
- edev->pdev = dev;
-
- eeh_addr_cache_insert_dev(dev);
- eeh_sysfs_add_device(dev);
- }
}
static int eeh_addr_cache_show(struct seq_file *s, void *v)
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index c4317c452d98..7370185c7a05 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -47,6 +47,8 @@ struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
+ edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+ edev->controller = pdn->phb;
return edev;
}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89623962c727..d9279d0ee9f5 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -27,6 +27,7 @@
#include <linux/irq.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/pci_hotplug.h>
#include <asm/eeh.h>
#include <asm/eeh_event.h>
#include <asm/ppc-pci.h>
@@ -81,23 +82,6 @@ static const char *pci_ers_result_name(enum pci_ers_result result)
}
};
-static __printf(2, 3) void eeh_edev_info(const struct eeh_dev *edev,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- printk(KERN_INFO "EEH: PE#%x (PCI %s): %pV\n", edev->pe_config_addr,
- edev->pdev ? dev_name(&edev->pdev->dev) : "none", &vaf);
-
- va_end(args);
-}
-
static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
enum pci_ers_result new)
{
@@ -113,8 +97,16 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
static bool eeh_edev_actionable(struct eeh_dev *edev)
{
- return (edev->pdev && !eeh_dev_removed(edev) &&
- !eeh_pe_passed(edev->pe));
+ if (!edev->pdev)
+ return false;
+ if (edev->pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+ if (eeh_dev_removed(edev))
+ return false;
+ if (eeh_pe_passed(edev->pe))
+ return false;
+
+ return true;
}
/**
@@ -214,12 +206,12 @@ static void eeh_enable_irq(struct eeh_dev *edev)
}
}
-static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* We cannot access the config space on some adapters.
@@ -229,14 +221,13 @@ static void *eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
* device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
- return NULL;
+ return;
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_save_state(pdev);
- return NULL;
}
static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
@@ -274,20 +265,27 @@ static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
}
typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
+ struct pci_dev *,
struct pci_driver *);
static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
enum pci_ers_result *result)
{
+ struct pci_dev *pdev;
struct pci_driver *driver;
enum pci_ers_result new_result;
- if (!edev->pdev) {
+ pci_lock_rescan_remove();
+ pdev = edev->pdev;
+ if (pdev)
+ get_device(&pdev->dev);
+ pci_unlock_rescan_remove();
+ if (!pdev) {
eeh_edev_info(edev, "no device");
return;
}
- device_lock(&edev->pdev->dev);
+ device_lock(&pdev->dev);
if (eeh_edev_actionable(edev)) {
- driver = eeh_pcid_get(edev->pdev);
+ driver = eeh_pcid_get(pdev);
if (!driver)
eeh_edev_info(edev, "no driver");
@@ -296,7 +294,7 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
else if (edev->mode & EEH_DEV_NO_HANDLER)
eeh_edev_info(edev, "driver bound too late");
else {
- new_result = fn(edev, driver);
+ new_result = fn(edev, pdev, driver);
eeh_edev_info(edev, "%s driver reports: '%s'",
driver->name,
pci_ers_result_name(new_result));
@@ -305,12 +303,15 @@ static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
new_result);
}
if (driver)
- eeh_pcid_put(edev->pdev);
+ eeh_pcid_put(pdev);
} else {
- eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!edev->pdev,
+ eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
!eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
}
- device_unlock(&edev->pdev->dev);
+ device_unlock(&pdev->dev);
+ if (edev->pdev != pdev)
+ eeh_edev_warn(edev, "Device changed during processing!\n");
+ put_device(&pdev->dev);
}
static void eeh_pe_report(const char *name, struct eeh_pe *root,
@@ -337,20 +338,20 @@ static void eeh_pe_report(const char *name, struct eeh_pe *root,
* Report an EEH error to each device driver.
*/
static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
- struct pci_dev *dev = edev->pdev;
if (!driver->err_handler->error_detected)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
driver->name);
- rc = driver->err_handler->error_detected(dev, pci_channel_io_frozen);
+ rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
edev->in_error = true;
- pci_uevent_ers(dev, PCI_ERS_RESULT_NONE);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
return rc;
}
@@ -363,12 +364,13 @@ static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
* are now enabled.
*/
static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->mmio_enabled)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
- return driver->err_handler->mmio_enabled(edev->pdev);
+ return driver->err_handler->mmio_enabled(pdev);
}
/**
@@ -382,20 +384,21 @@ static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
* driver can work again while the device is recovered.
*/
static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->slot_reset || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
- return driver->err_handler->slot_reset(edev->pdev);
+ return driver->err_handler->slot_reset(pdev);
}
-static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
+static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
{
struct pci_dev *pdev;
if (!edev)
- return NULL;
+ return;
/*
* The content in the config space isn't saved because
@@ -407,15 +410,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
- return NULL;
+ return;
}
pdev = eeh_dev_to_pci_dev(edev);
if (!pdev)
- return NULL;
+ return;
pci_restore_state(pdev);
- return NULL;
}
/**
@@ -428,13 +430,14 @@ static void *eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
* to make the recovered device work again.
*/
static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
if (!driver->err_handler->resume || !edev->in_error)
return PCI_ERS_RESULT_NONE;
eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
- driver->err_handler->resume(edev->pdev);
+ driver->err_handler->resume(pdev);
pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
#ifdef CONFIG_PCI_IOV
@@ -453,6 +456,7 @@ static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
* dead, and that no further recovery attempts will be made on it.
*/
static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
+ struct pci_dev *pdev,
struct pci_driver *driver)
{
enum pci_ers_result rc;
@@ -462,10 +466,10 @@ static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
driver->name);
- rc = driver->err_handler->error_detected(edev->pdev,
+ rc = driver->err_handler->error_detected(pdev,
pci_channel_io_perm_failure);
- pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_DISCONNECT);
+ pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
return rc;
}
@@ -473,12 +477,9 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
if (!(edev->physfn)) {
- pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
- __func__, pdn->phb->global_number, pdn->busno,
- PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+ eeh_edev_warn(edev, "Not for VF\n");
return NULL;
}
@@ -492,12 +493,12 @@ static void *eeh_add_virt_device(struct eeh_dev *edev)
}
#ifdef CONFIG_PCI_IOV
- pci_iov_add_virtfn(edev->physfn, pdn->vf_index);
+ pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
#endif
return NULL;
}
-static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
+static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
{
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
@@ -512,7 +513,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
*/
if (!eeh_edev_actionable(edev) ||
(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
- return NULL;
+ return;
if (rmv_data) {
driver = eeh_pcid_get(dev);
@@ -521,7 +522,7 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
driver->err_handler->error_detected &&
driver->err_handler->slot_reset) {
eeh_pcid_put(dev);
- return NULL;
+ return;
}
eeh_pcid_put(dev);
}
@@ -554,8 +555,6 @@ static void *eeh_rmv_device(struct eeh_dev *edev, void *userdata)
pci_stop_and_remove_bus_device(dev);
pci_unlock_rescan_remove();
}
-
- return NULL;
}
static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
@@ -744,6 +743,99 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
*/
#define MAX_WAIT_FOR_RECOVERY 300
+
+/* Walks the PE tree after processing an event to remove any stale PEs.
+ *
+ * NB: This needs to be recursive to ensure the leaf PEs get removed
+ * before their parents do. Although this is possible to do recursively
+ * we don't since this is easier to read and we need to garantee
+ * the leaf nodes will be handled first.
+ */
+static void eeh_pe_cleanup(struct eeh_pe *pe)
+{
+ struct eeh_pe *child_pe, *tmp;
+
+ list_for_each_entry_safe(child_pe, tmp, &pe->child_list, child)
+ eeh_pe_cleanup(child_pe);
+
+ if (pe->state & EEH_PE_KEEP)
+ return;
+
+ if (!(pe->state & EEH_PE_INVALID))
+ return;
+
+ if (list_empty(&pe->edevs) && list_empty(&pe->child_list)) {
+ list_del(&pe->child);
+ kfree(pe);
+ }
+}
+
+/**
+ * eeh_check_slot_presence - Check if a device is still present in a slot
+ * @pdev: pci_dev to check
+ *
+ * This function may return a false positive if we can't determine the slot's
+ * presence state. This might happen for for PCIe slots if the PE containing
+ * the upstream bridge is also frozen, or the bridge is part of the same PE
+ * as the device.
+ *
+ * This shouldn't happen often, but you might see it if you hotplug a PCIe
+ * switch.
+ */
+static bool eeh_slot_presence_check(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+ u8 state;
+ int rc;
+
+ if (!pdev)
+ return false;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return false;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return true;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->get_adapter_status)
+ return true;
+
+ /* set the attention indicator while we've got the slot ops */
+ if (ops->set_attention_status)
+ ops->set_attention_status(slot->hotplug, 1);
+
+ rc = ops->get_adapter_status(slot->hotplug, &state);
+ if (rc)
+ return true;
+
+ return !!state;
+}
+
+static void eeh_clear_slot_attention(struct pci_dev *pdev)
+{
+ const struct hotplug_slot_ops *ops;
+ struct pci_slot *slot;
+
+ if (!pdev)
+ return;
+
+ if (pdev->error_state == pci_channel_io_perm_failure)
+ return;
+
+ slot = pdev->slot;
+ if (!slot || !slot->hotplug)
+ return;
+
+ ops = slot->hotplug->ops;
+ if (!ops || !ops->set_attention_status)
+ return;
+
+ ops->set_attention_status(slot->hotplug, 0);
+}
+
/**
* eeh_handle_normal_event - Handle EEH events on a specific PE
* @pe: EEH PE - which should not be used after we return, as it may
@@ -774,6 +866,7 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
struct eeh_rmv_data rmv_data =
{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
+ int devices = 0;
bus = eeh_pe_bus_get(pe);
if (!bus) {
@@ -782,7 +875,59 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
- eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ /*
+ * When devices are hot-removed we might get an EEH due to
+ * a driver attempting to touch the MMIO space of a removed
+ * device. In this case we don't have a device to recover
+ * so suppress the event if we can't find any present devices.
+ *
+ * The hotplug driver should take care of tearing down the
+ * device itself.
+ */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ if (eeh_slot_presence_check(edev->pdev))
+ devices++;
+
+ if (!devices) {
+ pr_debug("EEH: Frozen PHB#%x-PE#%x is empty!\n",
+ pe->phb->global_number, pe->addr);
+ goto out; /* nothing to recover */
+ }
+
+ /* Log the event */
+ if (pe->type & EEH_PE_PHB) {
+ pr_err("EEH: PHB#%x failure detected, location: %s\n",
+ pe->phb->global_number, eeh_pe_loc_get(pe));
+ } else {
+ struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+ pr_err("EEH: PE location: %s, PHB location: %s\n",
+ eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
+ }
+
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Print the saved stack trace now that we've verified there's
+ * something to recover.
+ */
+ if (pe->trace_entries) {
+ void **ptrs = (void **) pe->stack_trace;
+ int i;
+
+ pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+ pe->phb->global_number, pe->addr);
+
+ /* FIXME: Use the same format as dump_stack() */
+ pr_err("EEH: Call Trace:\n");
+ for (i = 0; i < pe->trace_entries; i++)
+ pr_err("EEH: [%pK] %pS\n", ptrs[i], ptrs[i]);
+
+ pe->trace_entries = 0;
+ }
+#endif /* CONFIG_STACKTRACE */
eeh_pe_update_time_stamp(pe);
pe->freeze_count++;
@@ -793,6 +938,10 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
result = PCI_ERS_RESULT_DISCONNECT;
}
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* to accomplish the reset. Each child gets a report of the
@@ -969,6 +1118,19 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
return;
}
}
+
+out:
+ /*
+ * Clean up any PEs without devices. While marked as EEH_PE_RECOVERYING
+ * we don't want to modify the PE tree structure so we do it here.
+ */
+ eeh_pe_cleanup(pe);
+
+ /* clear the slot attention LED for all recovered devices */
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp)
+ eeh_clear_slot_attention(edev->pdev);
+
eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
}
@@ -981,7 +1143,8 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
*/
void eeh_handle_special_event(void)
{
- struct eeh_pe *pe, *phb_pe;
+ struct eeh_pe *pe, *phb_pe, *tmp_pe;
+ struct eeh_dev *edev, *tmp_edev;
struct pci_bus *bus;
struct pci_controller *hose;
unsigned long flags;
@@ -1040,6 +1203,7 @@ void eeh_handle_special_event(void)
*/
if (rc == EEH_NEXT_ERR_FROZEN_PE ||
rc == EEH_NEXT_ERR_FENCED_PHB) {
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
eeh_handle_normal_event(pe);
} else {
pci_lock_rescan_remove();
@@ -1050,6 +1214,10 @@ void eeh_handle_special_event(void)
(phb_pe->state & EEH_PE_RECOVERING))
continue;
+ eeh_for_each_pe(pe, tmp_pe)
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+
/* Notify all devices to be down */
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
eeh_set_channel_state(pe, pci_channel_io_perm_failure);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 64cfbe41174b..a7a8dc182efb 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -40,7 +40,6 @@ static int eeh_event_handler(void * dummy)
{
unsigned long flags;
struct eeh_event *event;
- struct eeh_pe *pe;
while (!kthread_should_stop()) {
if (wait_for_completion_interruptible(&eeh_eventlist_event))
@@ -59,19 +58,10 @@ static int eeh_event_handler(void * dummy)
continue;
/* We might have event without binding PE */
- pe = event->pe;
- if (pe) {
- if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%x\n",
- pe->phb->global_number);
- else
- pr_info("EEH: Detected PCI bus error on "
- "PHB#%x-PE#%x\n",
- pe->phb->global_number, pe->addr);
- eeh_handle_normal_event(pe);
- } else {
+ if (event->pe)
+ eeh_handle_normal_event(event->pe);
+ else
eeh_handle_special_event();
- }
kfree(event);
}
@@ -121,6 +111,24 @@ int __eeh_send_failure_event(struct eeh_pe *pe)
}
event->pe = pe;
+ /*
+ * Mark the PE as recovering before inserting it in the queue.
+ * This prevents the PE from being free()ed by a hotplug driver
+ * while the PE is sitting in the event queue.
+ */
+ if (pe) {
+#ifdef CONFIG_STACKTRACE
+ /*
+ * Save the current stack trace so we can dump it from the
+ * event handler thread.
+ */
+ pe->trace_entries = stack_trace_save(pe->stack_trace,
+ ARRAY_SIZE(pe->stack_trace), 0);
+#endif /* CONFIG_STACKTRACE */
+
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ }
+
/* We may or may not be called in an interrupt context */
spin_lock_irqsave(&eeh_eventlist_lock, flags);
list_add(&event->list, &eeh_eventlist);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 854cef7b18f4..177852e39a25 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -231,29 +231,22 @@ void *eeh_pe_traverse(struct eeh_pe *root,
* The function is used to traverse the devices of the specified
* PE and its child PEs.
*/
-void *eeh_pe_dev_traverse(struct eeh_pe *root,
+void eeh_pe_dev_traverse(struct eeh_pe *root,
eeh_edev_traverse_func fn, void *flag)
{
struct eeh_pe *pe;
struct eeh_dev *edev, *tmp;
- void *ret;
if (!root) {
pr_warn("%s: Invalid PE %p\n",
__func__, root);
- return NULL;
+ return;
}
/* Traverse root PE */
- eeh_for_each_pe(root, pe) {
- eeh_pe_for_each_dev(pe, edev, tmp) {
- ret = fn(edev, flag);
- if (ret)
- return ret;
- }
- }
-
- return NULL;
+ eeh_for_each_pe(root, pe)
+ eeh_pe_for_each_dev(pe, edev, tmp)
+ fn(edev, flag);
}
/**
@@ -379,8 +372,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
- __func__, config_addr, pdn->phb->global_number);
+ eeh_edev_err(edev, "PE#0 is invalid for this PHB!\n");
return -EINVAL;
}
@@ -391,42 +383,34 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
* components.
*/
pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
- if (pe && !(pe->type & EEH_PE_INVALID)) {
- /* Mark the PE as type of PCI bus */
- pe->type = EEH_PE_BUS;
- edev->pe = pe;
-
- /* Put the edev to PE */
- list_add_tail(&edev->entry, &pe->edevs);
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr);
- return 0;
- } else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->entry, &pe->edevs);
- edev->pe = pe;
- /*
- * We're running to here because of PCI hotplug caused by
- * EEH recovery. We need clear EEH_PE_INVALID until the top.
- */
- parent = pe;
- while (parent) {
- if (!(parent->type & EEH_PE_INVALID))
- break;
- parent->type &= ~EEH_PE_INVALID;
- parent = parent->parent;
- }
+ if (pe) {
+ if (pe->type & EEH_PE_INVALID) {
+ list_add_tail(&edev->entry, &pe->edevs);
+ edev->pe = pe;
+ /*
+ * We're running to here because of PCI hotplug caused by
+ * EEH recovery. We need clear EEH_PE_INVALID until the top.
+ */
+ parent = pe;
+ while (parent) {
+ if (!(parent->type & EEH_PE_INVALID))
+ break;
+ parent->type &= ~EEH_PE_INVALID;
+ parent = parent->parent;
+ }
+
+ eeh_edev_dbg(edev,
+ "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
+ } else {
+ /* Mark the PE as type of PCI bus */
+ pe->type = EEH_PE_BUS;
+ edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to Device "
- "PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ /* Put the edev to PE */
+ list_add_tail(&edev->entry, &pe->edevs);
+ eeh_edev_dbg(edev, "Added to bus PE\n");
+ }
return 0;
}
@@ -468,13 +452,8 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
list_add_tail(&pe->child, &parent->child_list);
list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
- pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
- "Device PE#%x, Parent PE#%x\n",
- pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn),
- pe->addr, pe->parent->addr);
+ eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
+ pe->parent->addr);
return 0;
}
@@ -491,16 +470,12 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
{
struct eeh_pe *pe, *parent, *child;
+ bool keep, recover;
int cnt;
- struct pci_dn *pdn = eeh_dev_to_pdn(edev);
pe = eeh_dev_to_pe(edev);
if (!pe) {
- pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "No PE found for device.\n");
return -EEXIST;
}
@@ -516,10 +491,21 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
*/
while (1) {
parent = pe->parent;
+
+ /* PHB PEs should never be removed */
if (pe->type & EEH_PE_PHB)
break;
- if (!(pe->state & EEH_PE_KEEP)) {
+ /*
+ * XXX: KEEP is set while resetting a PE. I don't think it's
+ * ever set without RECOVERING also being set. I could
+ * be wrong though so catch that with a WARN.
+ */
+ keep = !!(pe->state & EEH_PE_KEEP);
+ recover = !!(pe->state & EEH_PE_RECOVERING);
+ WARN_ON(keep && !recover);
+
+ if (!keep && !recover) {
if (list_empty(&pe->edevs) &&
list_empty(&pe->child_list)) {
list_del(&pe->child);
@@ -528,6 +514,15 @@ int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
break;
}
} else {
+ /*
+ * Mark the PE as invalid. At the end of the recovery
+ * process any invalid PEs will be garbage collected.
+ *
+ * We need to delay the free()ing of them since we can
+ * remove edev's while traversing the PE tree which
+ * might trigger the removal of a PE and we can't
+ * deal with that (yet).
+ */
if (list_empty(&pe->edevs)) {
cnt = 0;
list_for_each_entry(child, &pe->child_list, child) {
@@ -623,13 +618,11 @@ void eeh_pe_mark_isolated(struct eeh_pe *root)
}
EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
-static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
+static void __eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
int mode = *((int *)flag);
edev->mode |= mode;
-
- return NULL;
}
/**
@@ -717,17 +710,13 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (!(edev->mode & (EEH_DEV_ROOT_PORT | EEH_DEV_DS_PORT)))
return;
- pr_debug("%s: Check PCIe link for %04x:%02x:%02x.%01x ...\n",
- __func__, pdn->phb->global_number,
- pdn->busno,
- PCI_SLOT(pdn->devfn),
- PCI_FUNC(pdn->devfn));
+ eeh_edev_dbg(edev, "Checking PCIe link...\n");
/* Check slot status */
cap = edev->pcie_cap;
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
if (!(val & PCI_EXP_SLTSTA_PDS)) {
- pr_debug(" No card in the slot (0x%04x) !\n", val);
+ eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
return;
}
@@ -736,7 +725,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
if (val & PCI_EXP_SLTCAP_PCP) {
eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
if (val & PCI_EXP_SLTCTL_PCC) {
- pr_debug(" In power-off state, power it on ...\n");
+ eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
@@ -752,7 +741,7 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
/* Check link */
eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
- pr_debug(" No link reporting capability (0x%08x) \n", val);
+ eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
msleep(1000);
return;
}
@@ -769,10 +758,10 @@ static void eeh_bridge_check_link(struct eeh_dev *edev)
}
if (val & PCI_EXP_LNKSTA_DLLLA)
- pr_debug(" Link up (%s)\n",
+ eeh_edev_dbg(edev, "Link up (%s)\n",
(val & PCI_EXP_LNKSTA_CLS_2_5GB) ? "2.5GB" : "5GB");
else
- pr_debug(" Link not ready (0x%04x)\n", val);
+ eeh_edev_dbg(edev, "Link not ready (0x%04x)\n", val);
}
#define BYTE_SWAP(OFF) (8*((OFF)/4)+3-(OFF))
@@ -852,7 +841,7 @@ static void eeh_restore_device_bars(struct eeh_dev *edev)
* the expansion ROM base address, the latency timer, and etc.
* from the saved values in the device node.
*/
-static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
+static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
{
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
@@ -864,8 +853,6 @@ static void *eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
if (eeh_ops->restore_config && pdn)
eeh_ops->restore_config(pdn);
-
- return NULL;
}
/**
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 54fab22c9a43..d60908ea37fb 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -230,7 +230,7 @@ transfer_to_handler_cont:
*/
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
- LOAD_MSR_KERNEL(r0, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
mtspr SPRN_SRR1,r0
SYNC
@@ -304,7 +304,7 @@ stack_ovf:
addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
lis r9,StackOverflow@ha
addi r9,r9,StackOverflow@l
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
#endif
@@ -324,7 +324,7 @@ trace_syscall_entry_irq_off:
bl trace_hardirqs_on
/* Now enable for real */
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
mtmsr r10
REST_GPR(0, r1)
@@ -394,7 +394,7 @@ ret_from_syscall:
#endif
mr r6,r3
/* disable interrupts so current_thread_info()->flags can't change */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL) /* doesn't include MSR_EE */
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL) /* doesn't include MSR_EE */
/* Note: We don't bother telling lockdep about it */
SYNC
MTMSRD(r10)
@@ -777,11 +777,19 @@ fast_exception_return:
1: lis r3,exc_exit_restart_end@ha
addi r3,r3,exc_exit_restart_end@l
cmplw r12,r3
+#if CONFIG_PPC_BOOK3S_601
+ bge 2b
+#else
bge 3f
+#endif
lis r4,exc_exit_restart@ha
addi r4,r4,exc_exit_restart@l
cmplw r12,r4
+#if CONFIG_PPC_BOOK3S_601
+ blt 2b
+#else
blt 3f
+#endif
lis r3,fee_restarts@ha
tophys(r3,r3)
lwz r5,fee_restarts@l(r3)
@@ -800,9 +808,6 @@ fee_restarts:
/* aargh, we don't know which trap this is */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
3:
-BEGIN_FTR_SECTION
- b 2b
-END_FTR_SECTION_IFSET(CPU_FTR_601)
li r10,-1
stw r10,_TRAP(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -824,7 +829,7 @@ ret_from_except:
* can't change between when we test it and when we return
* from the interrupt. */
/* Note: We don't bother telling lockdep about it */
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC /* Some chip revs have problems here... */
MTMSRD(r10) /* disable interrupts */
@@ -991,7 +996,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
* can restart the exception exit path at the label
* exc_exit_restart below. -- paulus
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL & ~MSR_RI)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL & ~MSR_RI)
SYNC
MTMSRD(r10) /* clear the RI bit */
.globl exc_exit_restart
@@ -1066,7 +1071,7 @@ exc_exit_restart_end:
REST_NVGPRS(r1); \
lwz r3,_MSR(r1); \
andi. r3,r3,MSR_PR; \
- LOAD_MSR_KERNEL(r10,MSR_KERNEL); \
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL); \
bne user_exc_return; \
lwz r0,GPR0(r1); \
lwz r2,GPR2(r1); \
@@ -1236,7 +1241,7 @@ recheck:
* neither. Those disable/enable cycles used to peek at
* TI_FLAGS aren't advertised.
*/
- LOAD_MSR_KERNEL(r10,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10,MSR_KERNEL)
SYNC
MTMSRD(r10) /* disable interrupts */
lwz r9,TI_FLAGS(r2)
@@ -1270,11 +1275,19 @@ nonrecoverable:
lis r10,exc_exit_restart_end@ha
addi r10,r10,exc_exit_restart_end@l
cmplw r12,r10
+#ifdef CONFIG_PPC_BOOK3S_601
+ bgelr
+#else
bge 3f
+#endif
lis r11,exc_exit_restart@ha
addi r11,r11,exc_exit_restart@l
cmplw r12,r11
+#ifdef CONFIG_PPC_BOOK3S_601
+ bltlr
+#else
blt 3f
+#endif
lis r10,ee_restarts@ha
lwz r12,ee_restarts@l(r10)
addi r12,r12,1
@@ -1283,9 +1296,6 @@ nonrecoverable:
blr
3: /* OK, we can't recover, kill this process */
/* but the 601 doesn't implement the RI bit, so assume it's OK */
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFSET(CPU_FTR_601)
lwz r3,_TRAP(r1)
andi. r0,r3,1
beq 5f
@@ -1329,7 +1339,7 @@ _GLOBAL(enter_rtas)
lwz r4,RTASBASE(r4)
mfmsr r9
stw r9,8(r1)
- LOAD_MSR_KERNEL(r0,MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r0,MSR_KERNEL)
SYNC /* disable interrupts so SRR0/1 */
MTMSRD(r0) /* don't get trashed */
li r9,MSR_KERNEL & ~(MSR_IR|MSR_DR)
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 0a0b5310f54a..6467bdab8d40 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -69,24 +69,20 @@ BEGIN_FTR_SECTION
bne .Ltabort_syscall
END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
- andi. r10,r12,MSR_PR
mr r10,r1
- addi r1,r1,-INT_FRAME_SIZE
- beq- 1f
ld r1,PACAKSAVE(r13)
-1: std r10,0(r1)
+ std r10,0(r1)
std r11,_NIP(r1)
std r12,_MSR(r1)
std r0,GPR0(r1)
std r10,GPR1(r1)
- beq 2f /* if from kernel mode */
#ifdef CONFIG_PPC_FSL_BOOK3E
START_BTB_FLUSH_SECTION
BTB_FLUSH(r10)
END_BTB_FLUSH_SECTION
#endif
ACCOUNT_CPU_USER_ENTRY(r13, r10, r11)
-2: std r2,GPR2(r1)
+ std r2,GPR2(r1)
std r3,GPR3(r1)
mfcr r2
std r4,GPR4(r1)
@@ -122,14 +118,13 @@ END_BTB_FLUSH_SECTION
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
- beq 33f
- /* if from user, see if there are any DTL entries to process */
+ /* see if there are any DTL entries to process */
ld r10,PACALPPACAPTR(r13) /* get ptr to VPA */
ld r11,PACA_DTL_RIDX(r13) /* get log read index */
addi r10,r10,LPPACA_DTLIDX
LDX_BE r10,0,r10 /* get log write index */
- cmpd cr1,r11,r10
- beq+ cr1,33f
+ cmpd r11,r10
+ beq+ 33f
bl accumulate_stolen_time
REST_GPR(0,r1)
REST_4GPRS(3,r1)
@@ -203,6 +198,7 @@ system_call: /* label this so stack traces look sane */
mtctr r12
bctrl /* Call handler */
+ /* syscall_exit can exit to kernel mode, via ret_from_kernel_thread */
.Lsyscall_exit:
std r3,RESULT(r1)
@@ -216,11 +212,6 @@ system_call: /* label this so stack traces look sane */
ld r12, PACA_THREAD_INFO(r13)
ld r8,_MSR(r1)
-#ifdef CONFIG_PPC_BOOK3S
- /* No MSR:RI on BookE */
- andi. r10,r8,MSR_RI
- beq- .Lunrecov_restore
-#endif
/*
* This is a few instructions into the actual syscall exit path (which actually
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 1cfb3da4a84a..829950b96d29 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -750,12 +750,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, __end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -820,12 +822,14 @@ kernel_dbg_exc:
ld r15,PACATOC(r13)
ld r14,interrupt_base_book3e@got(r15)
ld r15,__end_interrupts@got(r15)
-#else
- LOAD_REG_IMMEDIATE(r14,interrupt_base_book3e)
- LOAD_REG_IMMEDIATE(r15,__end_interrupts)
-#endif
cmpld cr0,r10,r14
cmpld cr1,r10,r15
+#else
+ LOAD_REG_IMMEDIATE_SYM(r14, r15, interrupt_base_book3e)
+ cmpld cr0, r10, r14
+ LOAD_REG_IMMEDIATE_SYM(r14, r15,__end_interrupts)
+ cmpld cr1, r10, r14
+#endif
blt+ cr0,1f
bge+ cr1,1f
@@ -1449,7 +1453,7 @@ a2_tlbinit_code_start:
a2_tlbinit_after_linear_map:
/* Now we branch the new virtual address mapped by this entry */
- LOAD_REG_IMMEDIATE(r3,1f)
+ LOAD_REG_IMMEDIATE_SYM(r3, r5, 1f)
mtctr r3
bctr
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6ba3cc2ef8ab..d0018dd17e0a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -44,6 +44,58 @@
#endif
/*
+ * Following are fixed section helper macros.
+ *
+ * EXC_REAL_BEGIN/END - real, unrelocated exception vectors
+ * EXC_VIRT_BEGIN/END - virt (AIL), unrelocated exception vectors
+ * TRAMP_REAL_BEGIN - real, unrelocated helpers (virt may call these)
+ * TRAMP_VIRT_BEGIN - virt, unreloc helpers (in practice, real can use)
+ * TRAMP_KVM_BEGIN - KVM handlers, these are put into real, unrelocated
+ * EXC_COMMON - After switching to virtual, relocated mode.
+ */
+
+#define EXC_REAL_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_REAL_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##name, start, size)
+
+#define EXC_VIRT_BEGIN(name, start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_VIRT_END(name, start, size) \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##name, start, size)
+
+#define EXC_COMMON_BEGIN(name) \
+ USE_TEXT_SECTION(); \
+ .balign IFETCH_ALIGN_BYTES; \
+ .global name; \
+ _ASM_NOKPROBE_SYMBOL(name); \
+ DEFINE_FIXED_SYMBOL(name); \
+name:
+
+#define TRAMP_REAL_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(real_trampolines, name)
+
+#define TRAMP_VIRT_BEGIN(name) \
+ FIXED_SECTION_ENTRY_BEGIN(virt_trampolines, name)
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+#define TRAMP_KVM_BEGIN(name) \
+ TRAMP_VIRT_BEGIN(name)
+#else
+#define TRAMP_KVM_BEGIN(name)
+#endif
+
+#define EXC_REAL_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(real_vectors, exc_real_##start##_##unused, start, size)
+
+#define EXC_VIRT_NONE(start, size) \
+ FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \
+ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size)
+
+/*
* We're short on space and time in the exception prolog, so we can't
* use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
* Instead we get the base of the kernel from paca->kernelbase and or in the low
@@ -68,6 +120,7 @@
addis reg,reg,(ABS_ADDR(label))@h
/* Exception register prefixes */
+#define EXC_HV_OR_STD 2 /* depends on HVMODE */
#define EXC_HV 1
#define EXC_STD 0
@@ -127,126 +180,6 @@ BEGIN_FTR_SECTION_NESTED(943) \
std ra,offset(r13); \
END_FTR_SECTION_NESTED(ftr,ftr,943)
-.macro EXCEPTION_PROLOG_0 area
- SET_SCRATCH0(r13) /* save r13 */
- GET_PACA(r13)
- std r9,\area\()+EX_R9(r13) /* save r9 */
- OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
- HMT_MEDIUM
- std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
- OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-.endm
-
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask
- OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
- OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
- INTERRUPT_TO_KERNEL
- SAVE_CTR(r10, \area\())
- mfcr r9
- .if \kvm
- KVMTEST \hsrr \vec
- .endif
- .if \bitmask
- lbz r10,PACAIRQSOFTMASK(r13)
- andi. r10,r10,\bitmask
- /* Associate vector numbers with bits in paca->irq_happened */
- .if \vec == 0x500 || \vec == 0xea0
- li r10,PACA_IRQ_EE
- .elseif \vec == 0x900
- li r10,PACA_IRQ_DEC
- .elseif \vec == 0xa00 || \vec == 0xe80
- li r10,PACA_IRQ_DBELL
- .elseif \vec == 0xe60
- li r10,PACA_IRQ_HMI
- .elseif \vec == 0xf00
- li r10,PACA_IRQ_PMI
- .else
- .abort "Bad maskable vector"
- .endif
-
- .if \hsrr
- bne masked_Hinterrupt
- .else
- bne masked_interrupt
- .endif
- .endif
-
- std r11,\area\()+EX_R11(r13)
- std r12,\area\()+EX_R12(r13)
-
- /*
- * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
- * because a d-side MCE will clobber those registers so is
- * not recoverable if they are live.
- */
- GET_SCRATCH0(r10)
- std r10,\area\()+EX_R13(r13)
- .if \dar
- mfspr r10,SPRN_DAR
- std r10,\area\()+EX_DAR(r13)
- .endif
- .if \dsisr
- mfspr r10,SPRN_DSISR
- stw r10,\area\()+EX_DSISR(r13)
- .endif
-.endm
-
-.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
- ld r10,PACAKMSR(r13) /* get MSR value for kernel */
- .if ! \set_ri
- xori r10,r10,MSR_RI /* Clear MSR_RI */
- .endif
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- mtspr SPRN_HSRR1,r10
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- mtspr SPRN_SRR1,r10
- .endif
- LOAD_HANDLER(r10, \label\())
- .if \hsrr
- mtspr SPRN_HSRR0,r10
- HRFI_TO_KERNEL
- .else
- mtspr SPRN_SRR0,r10
- RFI_TO_KERNEL
- .endif
- b . /* prevent speculative execution */
-.endm
-
-.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
-#ifdef CONFIG_RELOCATABLE
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- .endif
- LOAD_HANDLER(r12, \label\())
- mtctr r12
- .if \hsrr
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r12,SPRN_SRR1 /* and HSRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- bctr
-#else
- .if \hsrr
- mfspr r11,SPRN_HSRR0 /* save HSRR0 */
- mfspr r12,SPRN_HSRR1 /* and HSRR1 */
- .else
- mfspr r11,SPRN_SRR0 /* save SRR0 */
- mfspr r12,SPRN_SRR1 /* and SRR1 */
- .endif
- li r10,MSR_RI
- mtmsrd r10,1 /* Set RI (EE=0) */
- b \label
-#endif
-.endm
-
/*
* Branch to label using its 0xC000 address. This results in instruction
* address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -260,6 +193,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr reg; \
bctr
+.macro INT_KVM_HANDLER name, vec, hsrr, area, skip
+ TRAMP_KVM_BEGIN(\name\()_kvm)
+ KVM_HANDLER \vec, \hsrr, \area, \skip
+.endm
+
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/*
@@ -272,17 +210,13 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi r10,0
- .if \hsrr
- bne do_kvm_H\n
- .else
- bne do_kvm_\n
- .endif
+ bne \name\()_kvm
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+.macro KVM_HANDLER vec, hsrr, area, skip
.if \skip
cmpwi r10,KVM_GUEST_MODE_SKIP
beq 89f
@@ -301,10 +235,16 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r12,HSTATE_SCRATCH0(r13)
sldi r12,r9,32
/* HSRR variants have the 0x2 bit added to their trap number */
- .if \hsrr
- ori r12,r12,(\n + 0x2)
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ ori r12,r12,(\vec + 0x2)
+ FTR_SECTION_ELSE
+ ori r12,r12,(\vec)
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ ori r12,r12,(\vec + 0x2)
.else
- ori r12,r12,(\n)
+ ori r12,r12,(\vec)
.endif
#ifdef CONFIG_RELOCATABLE
@@ -329,7 +269,13 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
89: mtocrf 0x80,r9
ld r9,\area+EX_R9(r13)
ld r10,\area+EX_R10(r13)
- .if \hsrr
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ b kvmppc_skip_Hinterrupt
+ FTR_SECTION_ELSE
+ b kvmppc_skip_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
b kvmppc_skip_Hinterrupt
.else
b kvmppc_skip_interrupt
@@ -338,88 +284,328 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
.endm
#else
-.macro KVMTEST hsrr, n
+.macro KVMTEST name, hsrr, n
+.endm
+.macro KVM_HANDLER name, vec, hsrr, area, skip
.endm
-.macro KVM_HANDLER area, hsrr, n, skip
+#endif
+
+.macro INT_SAVE_SRR_AND_JUMP label, hsrr, set_ri
+ ld r10,PACAKMSR(r13) /* get MSR value for kernel */
+ .if ! \set_ri
+ xori r10,r10,MSR_RI /* Clear MSR_RI */
+ .endif
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ mtspr SPRN_HSRR1,r10
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ mtspr SPRN_SRR1,r10
+ .endif
+ LOAD_HANDLER(r10, \label\())
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ FTR_SECTION_ELSE
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mtspr SPRN_HSRR0,r10
+ HRFI_TO_KERNEL
+ .else
+ mtspr SPRN_SRR0,r10
+ RFI_TO_KERNEL
+ .endif
+ b . /* prevent speculative execution */
.endm
+
+/* INT_SAVE_SRR_AND_JUMP works for real or virt, this is faster but virt only */
+.macro INT_VIRT_SAVE_SRR_AND_JUMP label, hsrr
+#ifdef CONFIG_RELOCATABLE
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ .endif
+ LOAD_HANDLER(r12, \label\())
+ mtctr r12
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r12,SPRN_SRR1 /* and HSRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ bctr
+#else
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ FTR_SECTION_ELSE
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ mfspr r11,SPRN_HSRR0 /* save HSRR0 */
+ mfspr r12,SPRN_HSRR1 /* and HSRR1 */
+ .else
+ mfspr r11,SPRN_SRR0 /* save SRR0 */
+ mfspr r12,SPRN_SRR1 /* and SRR1 */
+ .endif
+ li r10,MSR_RI
+ mtmsrd r10,1 /* Set RI (EE=0) */
+ b \label
#endif
+.endm
-#define EXCEPTION_PROLOG_COMMON_1() \
- std r9,_CCR(r1); /* save CR in stackframe */ \
- std r11,_NIP(r1); /* save SRR0 in stackframe */ \
- std r12,_MSR(r1); /* save SRR1 in stackframe */ \
- std r10,0(r1); /* make stack chain pointer */ \
- std r0,GPR0(r1); /* save r0 in stackframe */ \
- std r10,GPR1(r1); /* save r1 in stackframe */ \
-
-/* Save original regs values from save area to stack frame. */
-#define EXCEPTION_PROLOG_COMMON_2(area) \
- ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \
- ld r10,area+EX_R10(r13); \
- std r9,GPR9(r1); \
- std r10,GPR10(r1); \
- ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \
- ld r10,area+EX_R12(r13); \
- ld r11,area+EX_R13(r13); \
- std r9,GPR11(r1); \
- std r10,GPR12(r1); \
- std r11,GPR13(r1); \
-BEGIN_FTR_SECTION_NESTED(66); \
- ld r10,area+EX_CFAR(r13); \
- std r10,ORIG_GPR3(r1); \
-END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
- GET_CTR(r10, area); \
- std r10,_CTR(r1);
-
-#define EXCEPTION_PROLOG_COMMON_3(trap) \
- std r2,GPR2(r1); /* save r2 in stackframe */ \
- SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \
- SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \
- mflr r9; /* Get LR, later save to stack */ \
- ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \
- std r9,_LINK(r1); \
- lbz r10,PACAIRQSOFTMASK(r13); \
- mfspr r11,SPRN_XER; /* save XER in stackframe */ \
- std r10,SOFTE(r1); \
- std r11,_XER(r1); \
- li r9,(trap)+1; \
- std r9,_TRAP(r1); /* set trap number */ \
- li r10,0; \
- ld r11,exception_marker@toc(r2); \
- std r10,RESULT(r1); /* clear regs->result */ \
- std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */
+/*
+ * This is the BOOK3S interrupt entry code macro.
+ *
+ * This can result in one of several things happening:
+ * - Branch to the _common handler, relocated, in virtual mode.
+ * These are normal interrupts (synchronous and asynchronous) handled by
+ * the kernel.
+ * - Branch to KVM, relocated but real mode interrupts remain in real mode.
+ * These occur when HSTATE_IN_GUEST is set. The interrupt may be caused by
+ * / intended for host or guest kernel, but KVM must always be involved
+ * because the machine state is set for guest execution.
+ * - Branch to the masked handler, unrelocated.
+ * These occur when maskable asynchronous interrupts are taken with the
+ * irq_soft_mask set.
+ * - Branch to an "early" handler in real mode but relocated.
+ * This is done if early=1. MCE and HMI use these to handle errors in real
+ * mode.
+ * - Fall through and continue executing in real, unrelocated mode.
+ * This is done if early=2.
+ */
+.macro INT_HANDLER name, vec, ool=0, early=0, virt=0, hsrr=0, area=PACA_EXGEN, ri=1, dar=0, dsisr=0, bitmask=0, kvm=0
+ SET_SCRATCH0(r13) /* save r13 */
+ GET_PACA(r13)
+ std r9,\area\()+EX_R9(r13) /* save r9 */
+ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+ HMT_MEDIUM
+ std r10,\area\()+EX_R10(r13) /* save r10 - r12 */
+ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+ .if \ool
+ .if !\virt
+ b tramp_real_\name
+ .pushsection .text
+ TRAMP_REAL_BEGIN(tramp_real_\name)
+ .else
+ b tramp_virt_\name
+ .pushsection .text
+ TRAMP_VIRT_BEGIN(tramp_virt_\name)
+ .endif
+ .endif
+
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+ OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+ INTERRUPT_TO_KERNEL
+ SAVE_CTR(r10, \area\())
+ mfcr r9
+ .if \kvm
+ KVMTEST \name \hsrr \vec
+ .endif
+ .if \bitmask
+ lbz r10,PACAIRQSOFTMASK(r13)
+ andi. r10,r10,\bitmask
+ /* Associate vector numbers with bits in paca->irq_happened */
+ .if \vec == 0x500 || \vec == 0xea0
+ li r10,PACA_IRQ_EE
+ .elseif \vec == 0x900
+ li r10,PACA_IRQ_DEC
+ .elseif \vec == 0xa00 || \vec == 0xe80
+ li r10,PACA_IRQ_DBELL
+ .elseif \vec == 0xe60
+ li r10,PACA_IRQ_HMI
+ .elseif \vec == 0xf00
+ li r10,PACA_IRQ_PMI
+ .else
+ .abort "Bad maskable vector"
+ .endif
+
+ .if \hsrr == EXC_HV_OR_STD
+ BEGIN_FTR_SECTION
+ bne masked_Hinterrupt
+ FTR_SECTION_ELSE
+ bne masked_interrupt
+ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ .elseif \hsrr
+ bne masked_Hinterrupt
+ .else
+ bne masked_interrupt
+ .endif
+ .endif
+
+ std r11,\area\()+EX_R11(r13)
+ std r12,\area\()+EX_R12(r13)
+
+ /*
+ * DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+ * because a d-side MCE will clobber those registers so is
+ * not recoverable if they are live.
+ */
+ GET_SCRATCH0(r10)
+ std r10,\area\()+EX_R13(r13)
+ .if \dar
+ .if \hsrr
+ mfspr r10,SPRN_HDAR
+ .else
+ mfspr r10,SPRN_DAR
+ .endif
+ std r10,\area\()+EX_DAR(r13)
+ .endif
+ .if \dsisr
+ .if \hsrr
+ mfspr r10,SPRN_HDSISR
+ .else
+ mfspr r10,SPRN_DSISR
+ .endif
+ stw r10,\area\()+EX_DSISR(r13)
+ .endif
+
+ .if \early == 2
+ /* nothing more */
+ .elseif \early
+ mfctr r10 /* save ctr, even for !RELOCATABLE */
+ BRANCH_TO_C000(r11, \name\()_early_common)
+ .elseif !\virt
+ INT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr, \ri
+ .else
+ INT_VIRT_SAVE_SRR_AND_JUMP \name\()_common, \hsrr
+ .endif
+ .if \ool
+ .popsection
+ .endif
+.endm
/*
* On entry r13 points to the paca, r9-r13 are saved in the paca,
* r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
* SRR1, and relocation is on.
+ *
+ * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
+ * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
*/
-#define EXCEPTION_COMMON(area, trap) \
- andi. r10,r12,MSR_PR; /* See if coming from user */ \
- mr r10,r1; /* Save r1 */ \
- subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \
- beq- 1f; \
- ld r1,PACAKSAVE(r13); /* kernel stack to use */ \
-1: tdgei r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace */ \
- EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \
-3: EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1, cr0; \
- beq 4f; /* if from kernel mode */ \
- ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
- SAVE_PPR(area, r9); \
-4: EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap); \
+.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
+ .if \stack
+ andi. r10,r12,MSR_PR /* See if coming from user */
+ mr r10,r1 /* Save r1 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc frame on kernel stack */
+ beq- 100f
+ ld r1,PACAKSAVE(r13) /* kernel stack to use */
+100: tdgei r1,-INT_FRAME_SIZE /* trap if r1 is in userspace */
+ EMIT_BUG_ENTRY 100b,__FILE__,__LINE__,0
+ .endif
+
+ std r9,_CCR(r1) /* save CR in stackframe */
+ std r11,_NIP(r1) /* save SRR0 in stackframe */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+
+ .if \stack
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1, cr0
+ .endif
+ beq 101f /* if from kernel mode */
+ ACCOUNT_CPU_USER_ENTRY(r13, r9, r10)
+ SAVE_PPR(\area, r9)
+101:
+ .else
+ .if \kaup
+ kuap_save_amr_and_lock r9, r10, cr1
+ .endif
+ .endif
+
+ /* Save original regs values from save area to stack frame. */
+ ld r9,\area+EX_R9(r13) /* move r9, r10 to stackframe */
+ ld r10,\area+EX_R10(r13)
+ std r9,GPR9(r1)
+ std r10,GPR10(r1)
+ ld r9,\area+EX_R11(r13) /* move r11 - r13 to stackframe */
+ ld r10,\area+EX_R12(r13)
+ ld r11,\area+EX_R13(r13)
+ std r9,GPR11(r1)
+ std r10,GPR12(r1)
+ std r11,GPR13(r1)
+ .if \dar
+ .if \dar == 2
+ ld r10,_NIP(r1)
+ .else
+ ld r10,\area+EX_DAR(r13)
+ .endif
+ std r10,_DAR(r1)
+ .endif
+ .if \dsisr
+ .if \dsisr == 2
+ ld r10,_MSR(r1)
+ lis r11,DSISR_SRR1_MATCH_64S@h
+ and r10,r10,r11
+ .else
+ lwz r10,\area+EX_DSISR(r13)
+ .endif
+ std r10,_DSISR(r1)
+ .endif
+BEGIN_FTR_SECTION_NESTED(66)
+ ld r10,\area+EX_CFAR(r13)
+ std r10,ORIG_GPR3(r1)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66)
+ GET_CTR(r10, \area)
+ std r10,_CTR(r1)
+ std r2,GPR2(r1) /* save r2 in stackframe */
+ SAVE_4GPRS(3, r1) /* save r3 - r6 in stackframe */
+ SAVE_2GPRS(7, r1) /* save r7, r8 in stackframe */
+ mflr r9 /* Get LR, later save to stack */
+ ld r2,PACATOC(r13) /* get kernel TOC into r2 */
+ std r9,_LINK(r1)
+ lbz r10,PACAIRQSOFTMASK(r13)
+ mfspr r11,SPRN_XER /* save XER in stackframe */
+ std r10,SOFTE(r1)
+ std r11,_XER(r1)
+ li r9,(\vec)+1
+ std r9,_TRAP(r1) /* set trap number */
+ li r10,0
+ ld r11,exception_marker@toc(r2)
+ std r10,RESULT(r1) /* clear regs->result */
+ std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame */
+
+ .if \stack
ACCOUNT_STOLEN_TIME
+ .endif
-/*
- * Exception where stack is already set in r1, r1 is saved in r10.
- * PPR save and CPU accounting is not done (for some reason).
- */
-#define EXCEPTION_COMMON_STACK(area, trap) \
- EXCEPTION_PROLOG_COMMON_1(); \
- kuap_save_amr_and_lock r9, r10, cr1; \
- EXCEPTION_PROLOG_COMMON_2(area); \
- EXCEPTION_PROLOG_COMMON_3(trap)
+ .if \reconcile
+ RECONCILE_IRQ_STATE(r10, r11)
+ .endif
+.endm
/*
* Restore all registers including H/SRR0/1 saved in a stack frame of a
@@ -428,6 +614,9 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \
.macro EXCEPTION_RESTORE_REGS hsrr
/* Move original SRR0 and SRR1 into the respective regs */
ld r9,_MSR(r1)
+ .if \hsrr == EXC_HV_OR_STD
+ .error "EXC_HV_OR_STD Not implemented for EXCEPTION_RESTORE_REGS"
+ .endif
.if \hsrr
mtspr SPRN_HSRR1,r9
.else
@@ -481,219 +670,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
#define FINISH_NAP
#endif
-/*
- * Following are the BOOK3S exception handler helper macros.
- * Handlers come in a number of types, and each type has a number of varieties.
- *
- * EXC_REAL_* - real, unrelocated exception vectors
- * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_* - real, unrelocated helpers (virt can call these)
- * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM - KVM handlers that get put into real, unrelocated
- * EXC_COMMON - virt, relocated common handlers
- *
- * The EXC handlers are given a name, and branch to name_common, or the
- * appropriate KVM or masking function. Vector handler verieties are as
- * follows:
- *
- * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
- *
- * EXC_{REAL|VIRT} - standard exception
- *
- * EXC_{REAL|VIRT}_suffix
- * where _suffix is:
- * - _MASKABLE - maskable exception
- * - _OOL - out of line with trampoline to common handler
- * - _HV - HV exception
- *
- * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
- *
- * KVM handlers come in the following verieties:
- * TRAMP_KVM
- * TRAMP_KVM_SKIP
- * TRAMP_KVM_HV
- * TRAMP_KVM_HV_SKIP
- *
- * COMMON handlers come in the following verieties:
- * EXC_COMMON_BEGIN/END - used to open-code the handler
- * EXC_COMMON
- * EXC_COMMON_ASYNC
- *
- * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
- * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
- */
-
-#define __EXC_REAL(name, start, size, area) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_REAL(name, start, size) \
- __EXC_REAL(name, start, size, PACA_EXGEN)
-
-#define __EXC_VIRT(name, start, size, realvec, area) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 area ; \
- EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_VIRT(name, start, size, realvec) \
- __EXC_VIRT(name, start, size, realvec, PACA_EXGEN)
-
-#define EXC_REAL_MASKABLE(name, start, size, bitmask) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ; \
- EXC_VIRT_END(name, start, size)
-
-#define EXC_REAL_HV(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \
- EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_HV(name, start, size, realvec) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN; \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \
- EXC_VIRT_END(name, start, size)
-
-#define __EXC_REAL_OOL(name, start, size) \
- EXC_REAL_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_real_##name ; \
- EXC_REAL_END(name, start, size)
-
-#define __TRAMP_REAL_OOL(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL(name, start, size) \
- __EXC_REAL_OOL(name, start, size); \
- __TRAMP_REAL_OOL(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_REAL_OOL_MASKABLE(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE(name, start, bitmask)
-
-#define __EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_HV(name, vec) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_HV(name, start, size) \
- __EXC_REAL_OOL_HV(name, start, size); \
- __TRAMP_REAL_OOL_HV(name, start)
-
-#define __EXC_REAL_OOL_MASKABLE_HV(name, start, size) \
- __EXC_REAL_OOL(name, start, size)
-
-#define __TRAMP_REAL_OOL_MASKABLE_HV(name, vec, bitmask) \
- TRAMP_REAL_BEGIN(tramp_real_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1
-
-#define EXC_REAL_OOL_MASKABLE_HV(name, start, size, bitmask) \
- __EXC_REAL_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_REAL_OOL_MASKABLE_HV(name, start, bitmask)
-
-#define __EXC_VIRT_OOL(name, start, size) \
- EXC_VIRT_BEGIN(name, start, size); \
- EXCEPTION_PROLOG_0 PACA_EXGEN ; \
- b tramp_virt_##name; \
- EXC_VIRT_END(name, start, size)
-
-#define __TRAMP_VIRT_OOL(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD
-
-#define EXC_VIRT_OOL(name, start, size, realvec) \
- __EXC_VIRT_OOL(name, start, size); \
- __TRAMP_VIRT_OOL(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1
-
-#define EXC_VIRT_OOL_MASKABLE(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE(name, realvec, bitmask)
-
-#define __EXC_VIRT_OOL_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_HV(name, realvec) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_HV(name, start, size, realvec) \
- __EXC_VIRT_OOL_HV(name, start, size); \
- __TRAMP_VIRT_OOL_HV(name, realvec)
-
-#define __EXC_VIRT_OOL_MASKABLE_HV(name, start, size) \
- __EXC_VIRT_OOL(name, start, size)
-
-#define __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask) \
- TRAMP_VIRT_BEGIN(tramp_virt_##name); \
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, bitmask ; \
- EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV
-
-#define EXC_VIRT_OOL_MASKABLE_HV(name, start, size, realvec, bitmask) \
- __EXC_VIRT_OOL_MASKABLE_HV(name, start, size); \
- __TRAMP_VIRT_OOL_MASKABLE_HV(name, realvec, bitmask)
-
-#define TRAMP_KVM(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 0
-
-#define TRAMP_KVM_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_##n); \
- KVM_HANDLER area, EXC_STD, n, 1
-
-#define TRAMP_KVM_HV(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 0
-
-#define TRAMP_KVM_HV_SKIP(area, n) \
- TRAMP_KVM_BEGIN(do_kvm_H##n); \
- KVM_HANDLER area, EXC_HV, n, 1
-
#define EXC_COMMON(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
bl save_nvgprs; \
- RECONCILE_IRQ_STATE(r10, r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
b ret_from_except
@@ -704,9 +684,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
*/
#define EXC_COMMON_ASYNC(name, realvec, hdlr) \
EXC_COMMON_BEGIN(name); \
- EXCEPTION_COMMON(PACA_EXGEN, realvec); \
+ INT_COMMON realvec, PACA_EXGEN, 1, 1, 1, 0, 0 ; \
FINISH_NAP; \
- RECONCILE_IRQ_STATE(r10, r11); \
RUNLATCH_ON; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
bl hdlr; \
@@ -836,9 +815,7 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
#endif
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0, kvm=1
/*
* MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
* being used, so a nested NMI exception would corrupt it.
@@ -850,9 +827,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
* be dangerous anyway.
*/
EXC_REAL_END(system_reset, 0x100, 0x100)
-
EXC_VIRT_NONE(0x4100, 0x100)
-TRAMP_KVM(PACA_EXNMI, 0x100)
+INT_KVM_HANDLER system_reset 0x100, EXC_STD, PACA_EXNMI, 0
#ifdef CONFIG_PPC_P7_NAP
TRAMP_REAL_BEGIN(system_reset_idle_wake)
@@ -868,9 +844,7 @@ TRAMP_REAL_BEGIN(system_reset_idle_wake)
*/
TRAMP_REAL_BEGIN(system_reset_fwnmi)
/* See comment at system_reset exception, don't turn on RI */
- EXCEPTION_PROLOG_0 PACA_EXNMI
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 0, 0x100, 0, 0, 0
- EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
+ INT_HANDLER system_reset, 0x100, area=PACA_EXNMI, ri=0
#endif /* CONFIG_PPC_PSERIES */
@@ -890,7 +864,7 @@ EXC_COMMON_BEGIN(system_reset_common)
mr r10,r1
ld r1,PACA_NMI_EMERG_SP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXNMI, 0x100)
+ INT_COMMON 0x100, PACA_EXNMI, 0, 1, 0, 0, 0
bl save_nvgprs
/*
* Set IRQS_ALL_DISABLED unconditionally so arch_irqs_disabled does
@@ -933,26 +907,39 @@ EXC_COMMON_BEGIN(system_reset_common)
EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
- /* This is moved out of line as it can be patched by FW, but
- * some code path might still want to branch into the original
- * vector
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+ /*
+ * MSR_RI is not enabled, because PACA_EXMC is being used, so a
+ * nested machine check corrupts it. machine_check_common enables
+ * MSR_RI.
*/
- EXCEPTION_PROLOG_0 PACA_EXMC
-BEGIN_FTR_SECTION
- b machine_check_common_early
-FTR_SECTION_ELSE
- b machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_common_early)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, early=1, area=PACA_EXMC, dar=1, dsisr=1
+#endif
+
+INT_KVM_HANDLER machine_check 0x200, EXC_STD, PACA_EXMC, 1
+
+#define MACHINE_CHECK_HANDLER_WINDUP \
+ /* Clear MSR_RI before setting SRR0 and SRR1. */\
+ li r9,0; \
+ mtmsrd r9,1; /* Clear MSR_RI */ \
+ /* Decrement paca->in_mce now RI is clear. */ \
+ lhz r12,PACA_IN_MCE(r13); \
+ subi r12,r12,1; \
+ sth r12,PACA_IN_MCE(r13); \
+ EXCEPTION_RESTORE_REGS EXC_STD
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+ mtctr r10 /* Restore ctr */
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+
/*
- * Register contents:
- * R13 = PACA
- * R9 = CR
- * Original R9 to R13 is saved on PACA_EXMC
- *
* Switch to mc_emergency stack and handle re-entrancy (we limit
* the nested MCE upto level 4 to avoid stack overflow).
* Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -973,103 +960,127 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
* the machine check is handled then the idle wakeup code is called
* to restore state.
*/
- mr r11,r1 /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi r10,0 /* Are we in nested machine check */
- bne 0f /* Yes, we are. */
- /* First machine check entry */
- ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
-0: subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */
addi r10,r10,1 /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
- /* Limit nested MCE to level 4 to avoid stack overflow */
- cmpwi r10,MAX_MCE_DEPTH
- bgt 2f /* Check if we hit limit of 4 */
- std r11,GPR1(r1) /* Save r1 on the stack. */
- std r11,0(r1) /* make stack chain pointer */
- mfspr r11,SPRN_SRR0 /* Save SRR0 */
- std r11,_NIP(r1)
- mfspr r11,SPRN_SRR1 /* Save SRR1 */
- std r11,_MSR(r1)
- mfspr r11,SPRN_DAR /* Save DAR */
- std r11,_DAR(r1)
- mfspr r11,SPRN_DSISR /* Save DSISR */
- std r11,_DSISR(r1)
- std r9,_CCR(r1) /* Save CR in stackframe */
+
+ mr r10,r1 /* Save r1 */
+ bne 1f
+ /* First machine check entry */
+ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+ bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+
/* We don't touch AMR here, we never go to virtual mode */
- /* Save r9 through r13 from EXMC save area to stack frame. */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
- mfmsr r11 /* get MSR value */
+ INT_COMMON 0x200, PACA_EXMC, 0, 0, 0, 1, 1
+
BEGIN_FTR_SECTION
- ori r11,r11,MSR_ME /* turn on ME bit */
+ bl enable_machine_check
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ori r11,r11,MSR_RI /* turn on RI bit */
- LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r11
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-2:
- /* Stack overflow. Stay on emergency stack and panic.
- * Keep the ME bit off while panic-ing, so that if we hit
- * another machine check we checkstop.
- */
- addi r1,r1,INT_FRAME_SIZE /* go back to previous stack frame */
- ld r11,PACAKMSR(r13)
- LOAD_HANDLER(r12, unrecover_mce)
- li r10,MSR_ME
- andc r11,r11,r10 /* Turn off MSR_ME */
- b 1b
- b . /* prevent speculative execution */
+ li r10,MSR_RI
+ mtmsrd r10,1
+
+ bl save_nvgprs
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_early
+ std r3,RESULT(r1) /* Save result */
+ ld r12,_MSR(r1)
-TRAMP_REAL_BEGIN(machine_check_pSeries)
- .globl machine_check_fwnmi
-machine_check_fwnmi:
- EXCEPTION_PROLOG_0 PACA_EXMC
+#ifdef CONFIG_PPC_P7_NAP
+ /*
+ * Check if thread was in power saving mode. We come here when any
+ * of the following is true:
+ * a. thread wasn't in power saving mode
+ * b. thread was in power saving mode with no state loss,
+ * supervisor state loss or hypervisor state loss.
+ *
+ * Go back to nap/sleep/winkle mode again if (b) is true.
+ */
BEGIN_FTR_SECTION
- b machine_check_common_early
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-machine_check_pSeries_0:
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+ rlwinm. r11,r12,47-31,30,31
+ bne machine_check_idle_common
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
- * MSR_RI is not enabled, because PACA_EXMC is being used, so a
- * nested machine check corrupts it. machine_check_common enables
- * MSR_RI.
+ * Check if we are coming from guest. If yes, then run the normal
+ * exception handler which will take the
+ * machine_check_kvm->kvmppc_interrupt branch to deliver the MC event
+ * to guest.
*/
- EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
+ lbz r11,HSTATE_IN_GUEST(r13)
+ cmpwi r11,0 /* Check if coming from guest */
+ bne mce_deliver /* continue if we are. */
+#endif
-TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+ /*
+ * Check if we are coming from userspace. If yes, then run the normal
+ * exception handler which will deliver the MC event to this kernel.
+ */
+ andi. r11,r12,MSR_PR /* See if coming from user. */
+ bne mce_deliver /* continue in V mode if we are. */
+
+ /*
+ * At this point we are coming from kernel context.
+ * Queue up the MCE event and return from the interrupt.
+ * But before that, check if this is an un-recoverable exception.
+ * If yes, then stay on emergency stack and panic.
+ */
+ andi. r11,r12,MSR_RI
+ beq unrecoverable_mce
+
+ /*
+ * Check if we have successfully handled/recovered from error, if not
+ * then stay on emergency stack and panic.
+ */
+ ld r3,RESULT(r1) /* Load result */
+ cmpdi r3,0 /* see if we handled MCE successfully */
+ beq unrecoverable_mce /* if !handled then panic */
+
+ /*
+ * Return from MC interrupt.
+ * Queue up the MCE event so that we can log it later, while
+ * returning from kernel or opal call.
+ */
+ bl machine_check_queue_event
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+
+mce_deliver:
+ /*
+ * This is a host user or guest MCE. Restore all registers, then
+ * run the "late" handler. For host user, this will run the
+ * machine_check_exception handler in virtual mode like a normal
+ * interrupt handler. For guest, this will trigger the KVM test
+ * and branch to the KVM interrupt similarly to other interrupts.
+ */
+BEGIN_FTR_SECTION
+ ld r10,ORIG_GPR3(r1)
+ mtspr SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+ MACHINE_CHECK_HANDLER_WINDUP
+ /* See comment at machine_check exception, don't turn on RI */
+ INT_HANDLER machine_check, 0x200, area=PACA_EXMC, ri=0, dar=1, dsisr=1, kvm=1
EXC_COMMON_BEGIN(machine_check_common)
/*
* Machine check is different because we use a different
* save area: PACA_EXMC instead of PACA_EXGEN.
*/
- EXCEPTION_COMMON(PACA_EXMC, 0x200)
+ INT_COMMON 0x200, PACA_EXMC, 1, 1, 1, 1, 1
FINISH_NAP
- RECONCILE_IRQ_STATE(r10, r11)
- ld r3,PACA_EXMC+EX_DAR(r13)
- lwz r4,PACA_EXMC+EX_DSISR(r13)
/* Enable MSR_RI when finished with PACA_EXMC */
li r10,MSR_RI
mtmsrd r10,1
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
b ret_from_except
-#define MACHINE_CHECK_HANDLER_WINDUP \
- /* Clear MSR_RI before setting SRR0 and SRR1. */\
- li r9,0; \
- mtmsrd r9,1; /* Clear MSR_RI */ \
- /* Decrement paca->in_mce now RI is clear. */ \
- lhz r12,PACA_IN_MCE(r13); \
- subi r12,r12,1; \
- sth r12,PACA_IN_MCE(r13); \
- EXCEPTION_RESTORE_REGS EXC_STD
-
#ifdef CONFIG_PPC_P7_NAP
/*
* This is an idle wakeup. Low level machine check has already been
@@ -1101,72 +1112,8 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
bltlr cr1 /* no state loss, return to idle caller */
b idle_return_gpr_loss
#endif
- /*
- * Handle machine check early in real mode. We come here with
- * ME=1, MMU (IR=0 and DR=0) off and using MC emergency stack.
- */
-EXC_COMMON_BEGIN(machine_check_handle_early)
- std r0,GPR0(r1) /* Save r0 */
- EXCEPTION_PROLOG_COMMON_3(0x200)
- bl save_nvgprs
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_early
- std r3,RESULT(r1) /* Save result */
- ld r12,_MSR(r1)
-BEGIN_FTR_SECTION
- b 4f
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
-#ifdef CONFIG_PPC_P7_NAP
- /*
- * Check if thread was in power saving mode. We come here when any
- * of the following is true:
- * a. thread wasn't in power saving mode
- * b. thread was in power saving mode with no state loss,
- * supervisor state loss or hypervisor state loss.
- *
- * Go back to nap/sleep/winkle mode again if (b) is true.
- */
-BEGIN_FTR_SECTION
- rlwinm. r11,r12,47-31,30,31
- bne machine_check_idle_common
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif
-
- /*
- * Check if we are coming from hypervisor userspace. If yes then we
- * continue in host kernel in V mode to deliver the MC event.
- */
- rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
- beq 5f
-4: andi. r11,r12,MSR_PR /* See if coming from user. */
- bne 9f /* continue in V mode if we are. */
-
-5:
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-BEGIN_FTR_SECTION
- /*
- * We are coming from kernel context. Check if we are coming from
- * guest. if yes, then we can continue. We will fall through
- * do_kvm_200->kvmppc_interrupt to deliver the MC event to guest.
- */
- lbz r11,HSTATE_IN_GUEST(r13)
- cmpwi r11,0 /* Check if coming from guest */
- bne 9f /* continue if we are. */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-#endif
- /*
- * At this point we are not sure about what context we come from.
- * Queue up the MCE event and return from the interrupt.
- * But before that, check if this is an un-recoverable exception.
- * If yes, then stay on emergency stack and panic.
- */
- andi. r11,r12,MSR_RI
- bne 2f
-1: mfspr r11,SPRN_SRR0
- LOAD_HANDLER(r10,unrecover_mce)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
+EXC_COMMON_BEGIN(unrecoverable_mce)
/*
* We are going down. But there are chances that we might get hit by
* another MCE during panic path and we may run into unstable state
@@ -1174,84 +1121,36 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
* when another MCE is hit during panic path, system will checkstop
* and hypervisor will get restarted cleanly by SP.
*/
- li r3,MSR_ME
- andc r10,r10,r3 /* Turn off MSR_ME */
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-2:
- /*
- * Check if we have successfully handled/recovered from error, if not
- * then stay on emergency stack and panic.
- */
- ld r3,RESULT(r1) /* Load result */
- cmpdi r3,0 /* see if we handled MCE successfully */
-
- beq 1b /* if !handled then panic */
BEGIN_FTR_SECTION
- /*
- * Return from MC interrupt.
- * Queue up the MCE event so that we can log it later, while
- * returning from kernel or opal call.
- */
- bl machine_check_queue_event
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_USER_OR_KERNEL
-FTR_SECTION_ELSE
- /*
- * pSeries: Return from MC interrupt. Before that stay on emergency
- * stack and call machine_check_exception to log the MCE event.
- */
- LOAD_HANDLER(r10,mce_return)
- mtspr SPRN_SRR0,r10
+ li r10,0 /* clear MSR_RI */
+ mtmsrd r10,1
+ bl disable_machine_check
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-9:
- /* Deliver the machine check to host kernel in V mode. */
- MACHINE_CHECK_HANDLER_WINDUP
- EXCEPTION_PROLOG_0 PACA_EXMC
- b machine_check_pSeries_0
+ li r3,MSR_ME
+ andc r10,r10,r3
+ mtmsrd r10
-EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
addi r3,r1,STACK_FRAME_OVERHEAD
bl machine_check_exception
+
/*
- * We will not reach here. Even if we did, there is no way out. Call
- * unrecoverable_exception and die.
+ * We will not reach here. Even if we did, there is no way out.
+ * Call unrecoverable_exception and die.
*/
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(mce_return)
- /* Invoke machine_check_exception to print MCE event and return. */
addi r3,r1,STACK_FRAME_OVERHEAD
- bl machine_check_exception
- MACHINE_CHECK_HANDLER_WINDUP
- RFI_TO_KERNEL
+ bl unrecoverable_exception
b .
+
EXC_REAL_BEGIN(data_access, 0x300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b tramp_real_data_access
+ INT_HANDLER data_access, 0x300, ool=1, dar=1, dsisr=1, kvm=1
EXC_REAL_END(data_access, 0x300, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 1, 1, 0
-EXCEPTION_PROLOG_2_VIRT data_access_common, EXC_STD
+ INT_HANDLER data_access, 0x300, virt=1, dar=1, dsisr=1
EXC_VIRT_END(data_access, 0x4300, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x300)
-
+INT_KVM_HANDLER data_access, 0x300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON_BEGIN(data_access_common)
/*
* Here r13 points to the paca, r9 contains the saved CR,
@@ -1259,15 +1158,12 @@ EXC_COMMON_BEGIN(data_access_common)
* r9 - r13 are saved in paca->exgen.
* EX_DAR and EX_DSISR have saved DAR/DSISR
*/
- EXCEPTION_COMMON(PACA_EXGEN, 0x300)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- li r5,0x300
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x300, PACA_EXGEN, 1, 1, 1, 1, 1
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x300
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
@@ -1275,26 +1171,15 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- b tramp_real_data_access_slb
+ INT_HANDLER data_access_slb, 0x380, ool=1, area=PACA_EXSLB, dar=1, kvm=1
EXC_REAL_END(data_access_slb, 0x380, 0x80)
-
-TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 1, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_REAL data_access_slb_common, EXC_STD, 1
-
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- EXCEPTION_PROLOG_0 PACA_EXSLB
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSLB, 0, 0x380, 1, 0, 0
- EXCEPTION_PROLOG_2_VIRT data_access_slb_common, EXC_STD
+ INT_HANDLER data_access_slb, 0x380, virt=1, area=PACA_EXSLB, dar=1
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
-
-TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
-
+INT_KVM_HANDLER data_access_slb, 0x380, EXC_STD, PACA_EXSLB, 1
EXC_COMMON_BEGIN(data_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x380)
- ld r4,PACA_EXSLB+EX_DAR(r13)
- std r4,_DAR(r1)
+ INT_COMMON 0x380, PACA_EXSLB, 1, 1, 0, 1, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1317,33 +1202,36 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL(instruction_access, 0x400, 0x80)
-EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
-TRAMP_KVM(PACA_EXGEN, 0x400)
-
+EXC_REAL_BEGIN(instruction_access, 0x400, 0x80)
+ INT_HANDLER instruction_access, 0x400, kvm=1
+EXC_REAL_END(instruction_access, 0x400, 0x80)
+EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
+ INT_HANDLER instruction_access, 0x400, virt=1
+EXC_VIRT_END(instruction_access, 0x4400, 0x80)
+INT_KVM_HANDLER instruction_access, 0x400, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(instruction_access_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x400)
- RECONCILE_IRQ_STATE(r10, r11)
- ld r12,_MSR(r1)
- ld r3,_NIP(r1)
- andis. r4,r12,DSISR_SRR1_MATCH_64S@h
- li r5,0x400
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x400, PACA_EXGEN, 1, 1, 1, 2, 2
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
BEGIN_MMU_FTR_SECTION
+ ld r6,_MSR(r1)
+ li r3,0x400
b do_hash_page /* Try to handle as hpte fault */
MMU_FTR_SECTION_ELSE
b handle_page_fault
ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
-__EXC_REAL(instruction_access_slb, 0x480, 0x80, PACA_EXSLB)
-__EXC_VIRT(instruction_access_slb, 0x4480, 0x80, 0x480, PACA_EXSLB)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
+EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, area=PACA_EXSLB, kvm=1
+EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
+EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
+ INT_HANDLER instruction_access_slb, 0x480, virt=1, area=PACA_EXSLB
+EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
+INT_KVM_HANDLER instruction_access_slb, 0x480, EXC_STD, PACA_EXSLB, 0
EXC_COMMON_BEGIN(instruction_access_slb_common)
- EXCEPTION_COMMON(PACA_EXSLB, 0x480)
- ld r4,_NIP(r1)
+ INT_COMMON 0x480, PACA_EXSLB, 1, 1, 0, 2, 0
+ ld r4,_DAR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
/* HPT case, do SLB fault */
@@ -1359,69 +1247,44 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
std r3,RESULT(r1)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
- ld r4,_NIP(r1)
+ ld r4,_DAR(r1)
ld r5,RESULT(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_bad_slb_fault
b ret_from_except
-
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+ INT_HANDLER hardware_interrupt, 0x500, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
-
EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
-BEGIN_FTR_SECTION
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV
-FTR_SECTION_ELSE
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+ INT_HANDLER hardware_interrupt, 0x500, virt=1, hsrr=EXC_HV_OR_STD, bitmask=IRQS_DISABLED, kvm=1
EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x500)
-TRAMP_KVM_HV(PACA_EXGEN, 0x500)
+INT_KVM_HANDLER hardware_interrupt, 0x500, EXC_HV_OR_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(hardware_interrupt_common, 0x500, do_IRQ)
EXC_REAL_BEGIN(alignment, 0x600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_REAL alignment_common, EXC_STD, 1
+ INT_HANDLER alignment, 0x600, dar=1, dsisr=1, kvm=1
EXC_REAL_END(alignment, 0x600, 0x100)
-
EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x600, 1, 1, 0
- EXCEPTION_PROLOG_2_VIRT alignment_common, EXC_STD
+ INT_HANDLER alignment, 0x600, virt=1, dar=1, dsisr=1
EXC_VIRT_END(alignment, 0x4600, 0x100)
-
-TRAMP_KVM(PACA_EXGEN, 0x600)
+INT_KVM_HANDLER alignment, 0x600, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(alignment_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x600)
- ld r3,PACA_EXGEN+EX_DAR(r13)
- lwz r4,PACA_EXGEN+EX_DSISR(r13)
- std r3,_DAR(r1)
- std r4,_DSISR(r1)
+ INT_COMMON 0x600, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl alignment_exception
b ret_from_except
-EXC_REAL(program_check, 0x700, 0x100)
-EXC_VIRT(program_check, 0x4700, 0x100, 0x700)
-TRAMP_KVM(PACA_EXGEN, 0x700)
+EXC_REAL_BEGIN(program_check, 0x700, 0x100)
+ INT_HANDLER program_check, 0x700, kvm=1
+EXC_REAL_END(program_check, 0x700, 0x100)
+EXC_VIRT_BEGIN(program_check, 0x4700, 0x100)
+ INT_HANDLER program_check, 0x700, virt=1
+EXC_VIRT_END(program_check, 0x4700, 0x100)
+INT_KVM_HANDLER program_check, 0x700, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(program_check_common)
/*
* It's possible to receive a TM Bad Thing type program check with
@@ -1447,27 +1310,33 @@ EXC_COMMON_BEGIN(program_check_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- b 3f /* Jump into the macro !! */
+ INT_COMMON 0x700, PACA_EXGEN, 0, 1, 1, 0, 0
+ b 3f
2:
- EXCEPTION_COMMON(PACA_EXGEN, 0x700)
+ INT_COMMON 0x700, PACA_EXGEN, 1, 1, 1, 0, 0
+3:
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl program_check_exception
b ret_from_except
-EXC_REAL(fp_unavailable, 0x800, 0x100)
-EXC_VIRT(fp_unavailable, 0x4800, 0x100, 0x800)
-TRAMP_KVM(PACA_EXGEN, 0x800)
+EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, kvm=1
+EXC_REAL_END(fp_unavailable, 0x800, 0x100)
+EXC_VIRT_BEGIN(fp_unavailable, 0x4800, 0x100)
+ INT_HANDLER fp_unavailable, 0x800, virt=1
+EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
+INT_KVM_HANDLER fp_unavailable, 0x800, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(fp_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0x800)
+ INT_COMMON 0x800, PACA_EXGEN, 1, 1, 0, 0, 0
bne 1f /* if from user, just load it up */
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl kernel_fp_unavailable_exception
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
1:
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
BEGIN_FTR_SECTION
@@ -1490,21 +1359,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
#endif
-EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0x900)
+EXC_REAL_BEGIN(decrementer, 0x900, 0x80)
+ INT_HANDLER decrementer, 0x900, ool=1, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(decrementer, 0x900, 0x80)
+EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
+ INT_HANDLER decrementer, 0x900, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(decrementer, 0x4900, 0x80)
+INT_KVM_HANDLER decrementer, 0x900, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt)
-EXC_REAL_HV(hdecrementer, 0x980, 0x80)
-EXC_VIRT_HV(hdecrementer, 0x4980, 0x80, 0x980)
-TRAMP_KVM_HV(PACA_EXGEN, 0x980)
+EXC_REAL_BEGIN(hdecrementer, 0x980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(hdecrementer, 0x980, 0x80)
+EXC_VIRT_BEGIN(hdecrementer, 0x4980, 0x80)
+ INT_HANDLER hdecrementer, 0x980, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(hdecrementer, 0x4980, 0x80)
+INT_KVM_HANDLER hdecrementer, 0x980, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(hdecrementer_common, 0x980, hdec_interrupt)
-EXC_REAL_MASKABLE(doorbell_super, 0xa00, 0x100, IRQS_DISABLED)
-EXC_VIRT_MASKABLE(doorbell_super, 0x4a00, 0x100, 0xa00, IRQS_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xa00)
+EXC_REAL_BEGIN(doorbell_super, 0xa00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(doorbell_super, 0xa00, 0x100)
+EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
+ INT_HANDLER doorbell_super, 0xa00, virt=1, bitmask=IRQS_DISABLED
+EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
+INT_KVM_HANDLER doorbell_super, 0xa00, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, doorbell_exception)
#else
@@ -1512,17 +1393,13 @@ EXC_COMMON_ASYNC(doorbell_super_common, 0xa00, unknown_exception)
#endif
-EXC_REAL(trap_0b, 0xb00, 0x100)
-EXC_VIRT(trap_0b, 0x4b00, 0x100, 0xb00)
-TRAMP_KVM(PACA_EXGEN, 0xb00)
-EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
+EXC_REAL_NONE(0xb00, 0x100)
+EXC_VIRT_NONE(0x4b00, 0x100)
/*
* system call / hypercall (0xc00, 0x4c00)
*
* The system call exception is invoked with "sc 0" and does not alter HV bit.
- * There is support for kernel code to invoke system calls but there are no
- * in-tree users.
*
* The hypercall is invoked with "sc 1" and sets HV=1.
*
@@ -1567,7 +1444,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
GET_PACA(r13)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
- KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */
+ KVMTEST system_call EXC_STD 0xc00 /* uses r10, branch to system_call_kvm */
mfctr r9
#else
mr r9,r13
@@ -1621,7 +1498,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
EXC_REAL_BEGIN(system_call, 0xc00, 0x100)
SYSTEM_CALL 0
EXC_REAL_END(system_call, 0xc00, 0x100)
-
EXC_VIRT_BEGIN(system_call, 0x4c00, 0x100)
SYSTEM_CALL 1
EXC_VIRT_END(system_call, 0x4c00, 0x100)
@@ -1634,7 +1510,7 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100)
* ctr = orig r13
* orig r10 saved in PACA
*/
-TRAMP_KVM_BEGIN(do_kvm_0xc00)
+TRAMP_KVM_BEGIN(system_call_kvm)
/*
* Save the PPR (on systems that support it) before changing to
* HMT_MEDIUM. That allows the KVM code to save that value into the
@@ -1647,32 +1523,33 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcr r9
- KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0
+ KVM_HANDLER 0xc00, EXC_STD, PACA_EXGEN, 0
#endif
-EXC_REAL(single_step, 0xd00, 0x100)
-EXC_VIRT(single_step, 0x4d00, 0x100, 0xd00)
-TRAMP_KVM(PACA_EXGEN, 0xd00)
+EXC_REAL_BEGIN(single_step, 0xd00, 0x100)
+ INT_HANDLER single_step, 0xd00, kvm=1
+EXC_REAL_END(single_step, 0xd00, 0x100)
+EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
+ INT_HANDLER single_step, 0xd00, virt=1
+EXC_VIRT_END(single_step, 0x4d00, 0x100)
+INT_KVM_HANDLER single_step, 0xd00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(single_step_common, 0xd00, single_step_exception)
-EXC_REAL_OOL_HV(h_data_storage, 0xe00, 0x20)
-EXC_VIRT_OOL_HV(h_data_storage, 0x4e00, 0x20, 0xe00)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0xe00)
+
+EXC_REAL_BEGIN(h_data_storage, 0xe00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_REAL_END(h_data_storage, 0xe00, 0x20)
+EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
+ INT_HANDLER h_data_storage, 0xe00, ool=1, virt=1, hsrr=EXC_HV, dar=1, dsisr=1, kvm=1
+EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
+INT_KVM_HANDLER h_data_storage, 0xe00, EXC_HV, PACA_EXGEN, 1
EXC_COMMON_BEGIN(h_data_storage_common)
- mfspr r10,SPRN_HDAR
- std r10,PACA_EXGEN+EX_DAR(r13)
- mfspr r10,SPRN_HDSISR
- stw r10,PACA_EXGEN+EX_DSISR(r13)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe00)
+ INT_COMMON 0xe00, PACA_EXGEN, 1, 1, 1, 1, 1
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
BEGIN_MMU_FTR_SECTION
- ld r4,PACA_EXGEN+EX_DAR(r13)
- lwz r5,PACA_EXGEN+EX_DSISR(r13)
- std r4,_DAR(r1)
- std r5,_DSISR(r1)
+ ld r4,_DAR(r1)
li r5,SIGSEGV
bl bad_page_fault
MMU_FTR_SECTION_ELSE
@@ -1681,15 +1558,23 @@ ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX)
b ret_from_except
-EXC_REAL_OOL_HV(h_instr_storage, 0xe20, 0x20)
-EXC_VIRT_OOL_HV(h_instr_storage, 0x4e20, 0x20, 0xe20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe20)
+EXC_REAL_BEGIN(h_instr_storage, 0xe20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_instr_storage, 0xe20, 0x20)
+EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
+ INT_HANDLER h_instr_storage, 0xe20, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
+INT_KVM_HANDLER h_instr_storage, 0xe20, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_instr_storage_common, 0xe20, unknown_exception)
-EXC_REAL_OOL_HV(emulation_assist, 0xe40, 0x20)
-EXC_VIRT_OOL_HV(emulation_assist, 0x4e40, 0x20, 0xe40)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe40)
+EXC_REAL_BEGIN(emulation_assist, 0xe40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(emulation_assist, 0xe40, 0x20)
+EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
+ INT_HANDLER emulation_assist, 0xe40, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
+INT_KVM_HANDLER emulation_assist, 0xe40, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
@@ -1699,16 +1584,10 @@ EXC_COMMON(emulation_assist_common, 0xe40, emulation_assist_interrupt)
* mode.
*/
EXC_REAL_BEGIN(hmi_exception, 0xe60, 0x20)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- b hmi_exception_early
+ INT_HANDLER hmi_exception, 0xe60, ool=1, early=1, hsrr=EXC_HV, ri=0, kvm=1
EXC_REAL_END(hmi_exception, 0xe60, 0x20)
EXC_VIRT_NONE(0x4e60, 0x20)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe60)
-TRAMP_REAL_BEGIN(hmi_exception_early)
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, 0
- mfctr r10 /* save ctr, even for !RELOCATABLE */
- BRANCH_TO_C000(r11, hmi_exception_early_common)
-
+INT_KVM_HANDLER hmi_exception, 0xe60, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_BEGIN(hmi_exception_early_common)
mtctr r10 /* Restore ctr */
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
@@ -1716,10 +1595,10 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
mr r10,r1 /* Save r1 */
ld r1,PACAEMERGSP(r13) /* Use emergency stack for realmode */
subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- EXCEPTION_PROLOG_COMMON_1()
+
/* We don't touch AMR here, we never go to virtual mode */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 0, 0, 0, 0, 0
+
addi r3,r1,STACK_FRAME_OVERHEAD
bl hmi_exception_realmode
cmpdi cr0,r3,0
@@ -1734,23 +1613,25 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
* firmware.
*/
EXCEPTION_RESTORE_REGS EXC_HV
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0xe60, 0, 0, IRQS_DISABLED
- EXCEPTION_PROLOG_2_REAL hmi_exception_common, EXC_HV, 1
+ INT_HANDLER hmi_exception, 0xe60, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
EXC_COMMON_BEGIN(hmi_exception_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xe60)
+ INT_COMMON 0xe60, PACA_EXGEN, 1, 1, 1, 0, 0
FINISH_NAP
- bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
RUNLATCH_ON
+ bl save_nvgprs
addi r3,r1,STACK_FRAME_OVERHEAD
bl handle_hmi_exception
b ret_from_except
-EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xe80)
+
+EXC_REAL_BEGIN(h_doorbell, 0xe80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_doorbell, 0xe80, 0x20)
+EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
+ INT_HANDLER h_doorbell, 0xe80, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
+INT_KVM_HANDLER h_doorbell, 0xe80, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DOORBELL
EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, doorbell_exception)
#else
@@ -1758,9 +1639,13 @@ EXC_COMMON_ASYNC(h_doorbell_common, 0xe80, unknown_exception)
#endif
-EXC_REAL_OOL_MASKABLE_HV(h_virt_irq, 0xea0, 0x20, IRQS_DISABLED)
-EXC_VIRT_OOL_MASKABLE_HV(h_virt_irq, 0x4ea0, 0x20, 0xea0, IRQS_DISABLED)
-TRAMP_KVM_HV(PACA_EXGEN, 0xea0)
+EXC_REAL_BEGIN(h_virt_irq, 0xea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_REAL_END(h_virt_irq, 0xea0, 0x20)
+EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
+ INT_HANDLER h_virt_irq, 0xea0, ool=1, virt=1, hsrr=EXC_HV, bitmask=IRQS_DISABLED, kvm=1
+EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
+INT_KVM_HANDLER h_virt_irq, 0xea0, EXC_HV, PACA_EXGEN, 0
EXC_COMMON_ASYNC(h_virt_irq_common, 0xea0, do_IRQ)
@@ -1770,17 +1655,25 @@ EXC_REAL_NONE(0xee0, 0x20)
EXC_VIRT_NONE(0x4ee0, 0x20)
-EXC_REAL_OOL_MASKABLE(performance_monitor, 0xf00, 0x20, IRQS_PMI_DISABLED)
-EXC_VIRT_OOL_MASKABLE(performance_monitor, 0x4f00, 0x20, 0xf00, IRQS_PMI_DISABLED)
-TRAMP_KVM(PACA_EXGEN, 0xf00)
+EXC_REAL_BEGIN(performance_monitor, 0xf00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, bitmask=IRQS_PMI_DISABLED, kvm=1
+EXC_REAL_END(performance_monitor, 0xf00, 0x20)
+EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
+ INT_HANDLER performance_monitor, 0xf00, ool=1, virt=1, bitmask=IRQS_PMI_DISABLED
+EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
+INT_KVM_HANDLER performance_monitor, 0xf00, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_ASYNC(performance_monitor_common, 0xf00, performance_monitor_exception)
-EXC_REAL_OOL(altivec_unavailable, 0xf20, 0x20)
-EXC_VIRT_OOL(altivec_unavailable, 0x4f20, 0x20, 0xf20)
-TRAMP_KVM(PACA_EXGEN, 0xf20)
+EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, kvm=1
+EXC_REAL_END(altivec_unavailable, 0xf20, 0x20)
+EXC_VIRT_BEGIN(altivec_unavailable, 0x4f20, 0x20)
+ INT_HANDLER altivec_unavailable, 0xf20, ool=1, virt=1
+EXC_VIRT_END(altivec_unavailable, 0x4f20, 0x20)
+INT_KVM_HANDLER altivec_unavailable, 0xf20, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(altivec_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf20)
+ INT_COMMON 0xf20, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_ALTIVEC
BEGIN_FTR_SECTION
beq 1f
@@ -1813,11 +1706,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
b ret_from_except
-EXC_REAL_OOL(vsx_unavailable, 0xf40, 0x20)
-EXC_VIRT_OOL(vsx_unavailable, 0x4f40, 0x20, 0xf40)
-TRAMP_KVM(PACA_EXGEN, 0xf40)
+EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, kvm=1
+EXC_REAL_END(vsx_unavailable, 0xf40, 0x20)
+EXC_VIRT_BEGIN(vsx_unavailable, 0x4f40, 0x20)
+ INT_HANDLER vsx_unavailable, 0xf40, ool=1, virt=1
+EXC_VIRT_END(vsx_unavailable, 0x4f40, 0x20)
+INT_KVM_HANDLER vsx_unavailable, 0xf40, EXC_STD, PACA_EXGEN, 0
EXC_COMMON_BEGIN(vsx_unavailable_common)
- EXCEPTION_COMMON(PACA_EXGEN, 0xf40)
+ INT_COMMON 0xf40, PACA_EXGEN, 1, 1, 0, 0, 0
#ifdef CONFIG_VSX
BEGIN_FTR_SECTION
beq 1f
@@ -1849,15 +1746,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
b ret_from_except
-EXC_REAL_OOL(facility_unavailable, 0xf60, 0x20)
-EXC_VIRT_OOL(facility_unavailable, 0x4f60, 0x20, 0xf60)
-TRAMP_KVM(PACA_EXGEN, 0xf60)
+EXC_REAL_BEGIN(facility_unavailable, 0xf60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, kvm=1
+EXC_REAL_END(facility_unavailable, 0xf60, 0x20)
+EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
+ INT_HANDLER facility_unavailable, 0xf60, ool=1, virt=1
+EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
+INT_KVM_HANDLER facility_unavailable, 0xf60, EXC_STD, PACA_EXGEN, 0
EXC_COMMON(facility_unavailable_common, 0xf60, facility_unavailable_exception)
-EXC_REAL_OOL_HV(h_facility_unavailable, 0xf80, 0x20)
-EXC_VIRT_OOL_HV(h_facility_unavailable, 0x4f80, 0x20, 0xf80)
-TRAMP_KVM_HV(PACA_EXGEN, 0xf80)
+EXC_REAL_BEGIN(h_facility_unavailable, 0xf80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(h_facility_unavailable, 0xf80, 0x20)
+EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
+ INT_HANDLER h_facility_unavailable, 0xf80, ool=1, virt=1, hsrr=EXC_HV, kvm=1
+EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
+INT_KVM_HANDLER h_facility_unavailable, 0xf80, EXC_HV, PACA_EXGEN, 0
EXC_COMMON(h_facility_unavailable_common, 0xf80, facility_unavailable_exception)
@@ -1874,9 +1779,11 @@ EXC_REAL_NONE(0x1100, 0x100)
EXC_VIRT_NONE(0x5100, 0x100)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_system_error, 0x1200, 0x100)
+EXC_REAL_BEGIN(cbe_system_error, 0x1200, 0x100)
+ INT_HANDLER cbe_system_error, 0x1200, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
EXC_VIRT_NONE(0x5200, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1200)
+INT_KVM_HANDLER cbe_system_error, 0x1200, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_system_error_common, 0x1200, cbe_system_error_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1200, 0x100)
@@ -1884,37 +1791,43 @@ EXC_VIRT_NONE(0x5200, 0x100)
#endif
-EXC_REAL(instruction_breakpoint, 0x1300, 0x100)
-EXC_VIRT(instruction_breakpoint, 0x5300, 0x100, 0x1300)
-TRAMP_KVM_SKIP(PACA_EXGEN, 0x1300)
+EXC_REAL_BEGIN(instruction_breakpoint, 0x1300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, kvm=1
+EXC_REAL_END(instruction_breakpoint, 0x1300, 0x100)
+EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
+ INT_HANDLER instruction_breakpoint, 0x1300, virt=1
+EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
+INT_KVM_HANDLER instruction_breakpoint, 0x1300, EXC_STD, PACA_EXGEN, 1
EXC_COMMON(instruction_breakpoint_common, 0x1300, instruction_breakpoint_exception)
+
EXC_REAL_NONE(0x1400, 0x100)
EXC_VIRT_NONE(0x5400, 0x100)
EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
- EXCEPTION_PROLOG_0 PACA_EXGEN
- EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0
-
+ INT_HANDLER denorm_exception_hv, 0x1500, early=2, hsrr=EXC_HV
#ifdef CONFIG_PPC_DENORMALISATION
mfspr r10,SPRN_HSRR1
andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
bne+ denorm_assist
#endif
-
- KVMTEST EXC_HV 0x1500
- EXCEPTION_PROLOG_2_REAL denorm_common, EXC_HV, 1
+ KVMTEST denorm_exception_hv, EXC_HV 0x1500
+ INT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV, 1
EXC_REAL_END(denorm_exception_hv, 0x1500, 0x100)
#ifdef CONFIG_PPC_DENORMALISATION
EXC_VIRT_BEGIN(denorm_exception, 0x5500, 0x100)
- b exc_real_0x1500_denorm_exception_hv
+ INT_HANDLER denorm_exception, 0x1500, 0, 2, 1, EXC_HV, PACA_EXGEN, 1, 0, 0, 0, 0
+ mfspr r10,SPRN_HSRR1
+ andis. r10,r10,(HSRR1_DENORM)@h /* denorm? */
+ bne+ denorm_assist
+ INT_VIRT_SAVE_SRR_AND_JUMP denorm_common, EXC_HV
EXC_VIRT_END(denorm_exception, 0x5500, 0x100)
#else
EXC_VIRT_NONE(0x5500, 0x100)
#endif
-TRAMP_KVM_HV(PACA_EXGEN, 0x1500)
+INT_KVM_HANDLER denorm_exception_hv, 0x1500, EXC_HV, PACA_EXGEN, 0
#ifdef CONFIG_PPC_DENORMALISATION
TRAMP_REAL_BEGIN(denorm_assist)
@@ -1989,9 +1902,11 @@ EXC_COMMON(denorm_common, 0x1500, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_maintenance, 0x1600, 0x100)
+EXC_REAL_BEGIN(cbe_maintenance, 0x1600, 0x100)
+ INT_HANDLER cbe_maintenance, 0x1600, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
EXC_VIRT_NONE(0x5600, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1600)
+INT_KVM_HANDLER cbe_maintenance, 0x1600, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_maintenance_common, 0x1600, cbe_maintenance_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1600, 0x100)
@@ -1999,9 +1914,13 @@ EXC_VIRT_NONE(0x5600, 0x100)
#endif
-EXC_REAL(altivec_assist, 0x1700, 0x100)
-EXC_VIRT(altivec_assist, 0x5700, 0x100, 0x1700)
-TRAMP_KVM(PACA_EXGEN, 0x1700)
+EXC_REAL_BEGIN(altivec_assist, 0x1700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, kvm=1
+EXC_REAL_END(altivec_assist, 0x1700, 0x100)
+EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
+ INT_HANDLER altivec_assist, 0x1700, virt=1
+EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
+INT_KVM_HANDLER altivec_assist, 0x1700, EXC_STD, PACA_EXGEN, 0
#ifdef CONFIG_ALTIVEC
EXC_COMMON(altivec_assist_common, 0x1700, altivec_assist_exception)
#else
@@ -2010,15 +1929,18 @@ EXC_COMMON(altivec_assist_common, 0x1700, unknown_exception)
#ifdef CONFIG_CBE_RAS
-EXC_REAL_HV(cbe_thermal, 0x1800, 0x100)
+EXC_REAL_BEGIN(cbe_thermal, 0x1800, 0x100)
+ INT_HANDLER cbe_thermal, 0x1800, ool=1, hsrr=EXC_HV, kvm=1
+EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
-TRAMP_KVM_HV_SKIP(PACA_EXGEN, 0x1800)
+INT_KVM_HANDLER cbe_thermal, 0x1800, EXC_HV, PACA_EXGEN, 1
EXC_COMMON(cbe_thermal_common, 0x1800, cbe_thermal_exception)
#else /* CONFIG_CBE_RAS */
EXC_REAL_NONE(0x1800, 0x100)
EXC_VIRT_NONE(0x5800, 0x100)
#endif
+
#ifdef CONFIG_PPC_WATCHDOG
#define MASKED_DEC_HANDLER_LABEL 3f
@@ -2028,7 +1950,7 @@ EXC_VIRT_NONE(0x5800, 0x100)
std r12,PACA_EXGEN+EX_R12(r13); \
GET_SCRATCH0(r10); \
std r10,PACA_EXGEN+EX_R13(r13); \
- EXCEPTION_PROLOG_2_REAL soft_nmi_common, _H, 1
+ INT_SAVE_SRR_AND_JUMP soft_nmi_common, _H, 1
/*
* Branch to soft_nmi_interrupt using the emergency stack. The emergency
@@ -2043,9 +1965,8 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr r10,r1
ld r1,PACAEMERGSP(r13)
subi r1,r1,INT_FRAME_SIZE
- EXCEPTION_COMMON_STACK(PACA_EXGEN, 0x900)
+ INT_COMMON 0x900, PACA_EXGEN, 0, 1, 1, 0, 0
bl save_nvgprs
- RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
bl soft_nmi_interrupt
b ret_from_except
@@ -2302,6 +2223,35 @@ CLOSE_FIXED_SECTION(virt_trampolines);
USE_TEXT_SECTION()
+/* MSR[RI] should be clear because this uses SRR[01] */
+enable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ ori r3,r3,MSR_ME
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
+/* MSR[RI] should be clear because this uses SRR[01] */
+disable_machine_check:
+ mflr r0
+ bcl 20,31,$+4
+0: mflr r3
+ addi r3,r3,(1f - 0b)
+ mtspr SPRN_SRR0,r3
+ mfmsr r3
+ li r4,MSR_ME
+ andc r3,r3,r4
+ mtspr SPRN_SRR1,r3
+ RFI_TO_KERNEL
+1: mtlr r0
+ blr
+
/*
* Hash table stuff
*/
@@ -2310,7 +2260,7 @@ do_hash_page:
#ifdef CONFIG_PPC_BOOK3S_64
lis r0,(DSISR_BAD_FAULT_64S | DSISR_DABRMATCH | DSISR_KEYFAULT)@h
ori r0,r0,DSISR_BAD_FAULT_64S@l
- and. r0,r4,r0 /* weird error? */
+ and. r0,r5,r0 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
ld r11, PACA_THREAD_INFO(r13)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
@@ -2318,15 +2268,13 @@ do_hash_page:
bne 77f /* then don't call hash_page now */
/*
- * r3 contains the faulting address
- * r4 msr
- * r5 contains the trap number
- * r6 contains dsisr
+ * r3 contains the trap number
+ * r4 contains the faulting address
+ * r5 contains dsisr
+ * r6 msr
*
* at return r3 = 0 for success, 1 for page fault, negative for error
*/
- mr r4,r12
- ld r6,_DSISR(r1)
bl __hash_page /* build HPTE if possible */
cmpdi r3,0 /* see if __hash_page succeeded */
@@ -2336,16 +2284,15 @@ do_hash_page:
/* Error */
blt- 13f
- /* Reload DSISR into r4 for the DABR check below */
- ld r4,_DSISR(r1)
+ /* Reload DAR/DSISR into r4/r5 for the DABR check below */
+ ld r4,_DAR(r1)
+ ld r5,_DSISR(r1)
#endif /* CONFIG_PPC_BOOK3S_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
-11: andis. r0,r4,DSISR_DABRMATCH@h
+11: andis. r0,r5,DSISR_DABRMATCH@h
bne- handle_dabr_fault
- ld r4,_DAR(r1)
- ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault
cmpdi r3,0
@@ -2353,7 +2300,7 @@ handle_page_fault:
bl save_nvgprs
mr r5,r3
addi r3,r1,STACK_FRAME_OVERHEAD
- lwz r4,_DAR(r1)
+ ld r4,_DAR(r1)
bl bad_page_fault
b ret_from_except
@@ -2392,7 +2339,6 @@ handle_dabr_fault:
* the access, or panic if there isn't a handler.
*/
77: bl save_nvgprs
- mr r4,r3
addi r3,r1,STACK_FRAME_OVERHEAD
li r5,SIGSEGV
bl bad_page_fault
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 4eab97292cc2..ed59855430b9 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -28,24 +28,22 @@
#include <asm/debugfs.h>
#include <asm/page.h>
#include <asm/prom.h>
-#include <asm/rtas.h>
#include <asm/fadump.h>
+#include <asm/fadump-internal.h>
#include <asm/setup.h>
static struct fw_dump fw_dump;
-static struct fadump_mem_struct fdm;
-static const struct fadump_mem_struct *fdm_active;
-#ifdef CONFIG_CMA
-static struct cma *fadump_cma;
-#endif
+static void __init fadump_reserve_crash_area(u64 base);
+
+#ifndef CONFIG_PRESERVE_FA_DUMP
static DEFINE_MUTEX(fadump_mutex);
-struct fad_crash_memory_ranges *crash_memory_ranges;
-int crash_memory_ranges_size;
-int crash_mem_ranges;
-int max_crash_mem_ranges;
+struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0 };
+struct fadump_mrange_info reserved_mrange_info = { "reserved", NULL, 0, 0, 0 };
#ifdef CONFIG_CMA
+static struct cma *fadump_cma;
+
/*
* fadump_cma_init() - Initialize CMA area from a fadump reserved memory
*
@@ -107,84 +105,45 @@ static int __init fadump_cma_init(void) { return 1; }
#endif /* CONFIG_CMA */
/* Scan the Firmware Assisted dump configuration details. */
-int __init early_init_dt_scan_fw_dump(unsigned long node,
- const char *uname, int depth, void *data)
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
{
- const __be32 *sections;
- int i, num_sections;
- int size;
- const __be32 *token;
-
- if (depth != 1 || strcmp(uname, "rtas") != 0)
+ if (depth != 1)
return 0;
- /*
- * Check if Firmware Assisted dump is supported. if yes, check
- * if dump has been initiated on last reboot.
- */
- token = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump", NULL);
- if (!token)
+ if (strcmp(uname, "rtas") == 0) {
+ rtas_fadump_dt_scan(&fw_dump, node);
return 1;
+ }
- fw_dump.fadump_supported = 1;
- fw_dump.ibm_configure_kernel_dump = be32_to_cpu(*token);
-
- /*
- * The 'ibm,kernel-dump' rtas node is present only if there is
- * dump data waiting for us.
- */
- fdm_active = of_get_flat_dt_prop(node, "ibm,kernel-dump", NULL);
- if (fdm_active)
- fw_dump.dump_active = 1;
-
- /* Get the sizes required to store dump data for the firmware provided
- * dump sections.
- * For each dump section type supported, a 32bit cell which defines
- * the ID of a supported section followed by two 32 bit cells which
- * gives teh size of the section in bytes.
- */
- sections = of_get_flat_dt_prop(node, "ibm,configure-kernel-dump-sizes",
- &size);
-
- if (!sections)
+ if (strcmp(uname, "ibm,opal") == 0) {
+ opal_fadump_dt_scan(&fw_dump, node);
return 1;
-
- num_sections = size / (3 * sizeof(u32));
-
- for (i = 0; i < num_sections; i++, sections += 3) {
- u32 type = (u32)of_read_number(sections, 1);
-
- switch (type) {
- case FADUMP_CPU_STATE_DATA:
- fw_dump.cpu_state_data_size =
- of_read_ulong(&sections[1], 2);
- break;
- case FADUMP_HPTE_REGION:
- fw_dump.hpte_region_size =
- of_read_ulong(&sections[1], 2);
- break;
- }
}
- return 1;
+ return 0;
}
/*
* If fadump is registered, check if the memory provided
* falls within boot memory area and reserved memory area.
*/
-int is_fadump_memory_area(u64 addr, ulong size)
+int is_fadump_memory_area(u64 addr, unsigned long size)
{
- u64 d_start = fw_dump.reserve_dump_area_start;
- u64 d_end = d_start + fw_dump.reserve_dump_area_size;
+ u64 d_start, d_end;
if (!fw_dump.dump_registered)
return 0;
+ if (!size)
+ return 0;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
if (((addr + size) > d_start) && (addr <= d_end))
return 1;
- return (addr + size) > RMA_START && addr <= fw_dump.boot_memory_size;
+ return (addr <= fw_dump.boot_mem_top);
}
int should_fadump_crash(void)
@@ -200,31 +159,29 @@ int is_fadump_active(void)
}
/*
- * Returns 1, if there are no holes in boot memory area,
- * 0 otherwise.
+ * Returns true, if there are no holes in memory area between d_start to d_end,
+ * false otherwise.
*/
-static int is_boot_memory_area_contiguous(void)
+static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(RMA_START);
- unsigned long end_pfn = PHYS_PFN(RMA_START + fw_dump.boot_memory_size);
- unsigned int ret = 0;
+ bool ret = false;
+ u64 start, end;
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
- if (tstart < tend) {
- /* Memory hole from start_pfn to tstart */
- if (tstart > start_pfn)
+ start = max_t(u64, d_start, reg->base);
+ end = min_t(u64, d_end, (reg->base + reg->size));
+ if (d_start < end) {
+ /* Memory hole from d_start to start */
+ if (start > d_start)
break;
- if (tend == end_pfn) {
- ret = 1;
+ if (end == d_end) {
+ ret = true;
break;
}
- start_pfn = tend + 1;
+ d_start = end + 1;
}
}
@@ -232,37 +189,45 @@ static int is_boot_memory_area_contiguous(void)
}
/*
- * Returns true, if there are no holes in reserved memory area,
+ * Returns true, if there are no holes in boot memory area,
* false otherwise.
*/
-static bool is_reserved_memory_area_contiguous(void)
+bool is_fadump_boot_mem_contiguous(void)
{
- struct memblock_region *reg;
- unsigned long start, end;
- unsigned long d_start = fw_dump.reserve_dump_area_start;
- unsigned long d_end = d_start + fw_dump.reserve_dump_area_size;
-
- for_each_memblock(memory, reg) {
- start = max(d_start, (unsigned long)reg->base);
- end = min(d_end, (unsigned long)(reg->base + reg->size));
- if (d_start < end) {
- /* Memory hole from d_start to start */
- if (start > d_start)
- break;
+ unsigned long d_start, d_end;
+ bool ret = false;
+ int i;
- if (end == d_end)
- return true;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ d_start = fw_dump.boot_mem_addr[i];
+ d_end = d_start + fw_dump.boot_mem_sz[i];
- d_start = end + 1;
- }
+ ret = is_fadump_mem_area_contiguous(d_start, d_end);
+ if (!ret)
+ break;
}
- return false;
+ return ret;
+}
+
+/*
+ * Returns true, if there are no holes in reserved memory area,
+ * false otherwise.
+ */
+bool is_fadump_reserved_mem_contiguous(void)
+{
+ u64 d_start, d_end;
+
+ d_start = fw_dump.reserve_dump_area_start;
+ d_end = d_start + fw_dump.reserve_dump_area_size;
+ return is_fadump_mem_area_contiguous(d_start, d_end);
}
/* Print firmware assisted dump configurations for debugging purpose. */
static void fadump_show_config(void)
{
+ int i;
+
pr_debug("Support for firmware-assisted dump (fadump): %s\n",
(fw_dump.fadump_supported ? "present" : "no support"));
@@ -276,62 +241,13 @@ static void fadump_show_config(void)
pr_debug("Dump section sizes:\n");
pr_debug(" CPU state data size: %lx\n", fw_dump.cpu_state_data_size);
pr_debug(" HPTE region size : %lx\n", fw_dump.hpte_region_size);
- pr_debug("Boot memory size : %lx\n", fw_dump.boot_memory_size);
-}
-
-static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
- unsigned long addr)
-{
- if (!fdm)
- return 0;
-
- memset(fdm, 0, sizeof(struct fadump_mem_struct));
- addr = addr & PAGE_MASK;
-
- fdm->header.dump_format_version = cpu_to_be32(0x00000001);
- fdm->header.dump_num_sections = cpu_to_be16(3);
- fdm->header.dump_status_flag = 0;
- fdm->header.offset_first_dump_section =
- cpu_to_be32((u32)offsetof(struct fadump_mem_struct, cpu_state_data));
-
- /*
- * Fields for disk dump option.
- * We are not using disk dump option, hence set these fields to 0.
- */
- fdm->header.dd_block_size = 0;
- fdm->header.dd_block_offset = 0;
- fdm->header.dd_num_blocks = 0;
- fdm->header.dd_offset_disk_path = 0;
-
- /* set 0 to disable an automatic dump-reboot. */
- fdm->header.max_time_auto = 0;
-
- /* Kernel dump sections */
- /* cpu state data section. */
- fdm->cpu_state_data.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->cpu_state_data.source_data_type = cpu_to_be16(FADUMP_CPU_STATE_DATA);
- fdm->cpu_state_data.source_address = 0;
- fdm->cpu_state_data.source_len = cpu_to_be64(fw_dump.cpu_state_data_size);
- fdm->cpu_state_data.destination_address = cpu_to_be64(addr);
- addr += fw_dump.cpu_state_data_size;
-
- /* hpte region section */
- fdm->hpte_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->hpte_region.source_data_type = cpu_to_be16(FADUMP_HPTE_REGION);
- fdm->hpte_region.source_address = 0;
- fdm->hpte_region.source_len = cpu_to_be64(fw_dump.hpte_region_size);
- fdm->hpte_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.hpte_region_size;
-
- /* RMA region section */
- fdm->rmr_region.request_flag = cpu_to_be32(FADUMP_REQUEST_FLAG);
- fdm->rmr_region.source_data_type = cpu_to_be16(FADUMP_REAL_MODE_REGION);
- fdm->rmr_region.source_address = cpu_to_be64(RMA_START);
- fdm->rmr_region.source_len = cpu_to_be64(fw_dump.boot_memory_size);
- fdm->rmr_region.destination_address = cpu_to_be64(addr);
- addr += fw_dump.boot_memory_size;
-
- return addr;
+ pr_debug(" Boot memory size : %lx\n", fw_dump.boot_memory_size);
+ pr_debug(" Boot memory top : %llx\n", fw_dump.boot_mem_top);
+ pr_debug("Boot memory regions cnt: %llx\n", fw_dump.boot_mem_regs_cnt);
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ pr_debug("[%03d] base = %llx, size = %llx\n", i,
+ fw_dump.boot_mem_addr[i], fw_dump.boot_mem_sz[i]);
+ }
}
/**
@@ -349,10 +265,10 @@ static unsigned long init_fadump_mem_struct(struct fadump_mem_struct *fdm,
* that is required for a kernel to boot successfully.
*
*/
-static inline unsigned long fadump_calculate_reserve_size(void)
+static inline u64 fadump_calculate_reserve_size(void)
{
+ u64 base, size, bootmem_min;
int ret;
- unsigned long long base, size;
if (fw_dump.reserve_bootvar)
pr_warn("'fadump_reserve_mem=' parameter is deprecated in favor of 'crashkernel=' parameter.\n");
@@ -402,7 +318,8 @@ static inline unsigned long fadump_calculate_reserve_size(void)
if (memory_limit && size > memory_limit)
size = memory_limit;
- return (size > MIN_BOOT_MEM ? size : MIN_BOOT_MEM);
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ return (size > bootmem_min ? size : bootmem_min);
}
/*
@@ -423,57 +340,136 @@ static unsigned long get_fadump_area_size(void)
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
size = PAGE_ALIGN(size);
+
+ /* This is to hold kernel metadata on platforms that support it */
+ size += (fw_dump.ops->fadump_get_metadata_size ?
+ fw_dump.ops->fadump_get_metadata_size() : 0);
return size;
}
-static void __init fadump_reserve_crash_area(unsigned long base,
- unsigned long size)
+static int __init add_boot_mem_region(unsigned long rstart,
+ unsigned long rsize)
+{
+ int i = fw_dump.boot_mem_regs_cnt++;
+
+ if (fw_dump.boot_mem_regs_cnt > FADUMP_MAX_MEM_REGS) {
+ fw_dump.boot_mem_regs_cnt = FADUMP_MAX_MEM_REGS;
+ return 0;
+ }
+
+ pr_debug("Added boot memory range[%d] [%#016lx-%#016lx)\n",
+ i, rstart, (rstart + rsize));
+ fw_dump.boot_mem_addr[i] = rstart;
+ fw_dump.boot_mem_sz[i] = rsize;
+ return 1;
+}
+
+/*
+ * Firmware usually has a hard limit on the data it can copy per region.
+ * Honour that by splitting a memory range into multiple regions.
+ */
+static int __init add_boot_mem_regions(unsigned long mstart,
+ unsigned long msize)
{
+ unsigned long rstart, rsize, max_size;
+ int ret = 1;
+
+ rstart = mstart;
+ max_size = fw_dump.max_copy_size ? fw_dump.max_copy_size : msize;
+ while (msize) {
+ if (msize > max_size)
+ rsize = max_size;
+ else
+ rsize = msize;
+
+ ret = add_boot_mem_region(rstart, rsize);
+ if (!ret)
+ break;
+
+ msize -= rsize;
+ rstart += rsize;
+ }
+
+ return ret;
+}
+
+static int __init fadump_get_boot_mem_regions(void)
+{
+ unsigned long base, size, cur_size, hole_size, last_end;
+ unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg;
- unsigned long mstart, mend, msize;
+ int ret = 1;
+
+ fw_dump.boot_mem_regs_cnt = 0;
+ last_end = 0;
+ hole_size = 0;
+ cur_size = 0;
for_each_memblock(memory, reg) {
- mstart = max_t(unsigned long, base, reg->base);
- mend = reg->base + reg->size;
- mend = min(base + size, mend);
-
- if (mstart < mend) {
- msize = mend - mstart;
- memblock_reserve(mstart, msize);
- pr_info("Reserved %ldMB of memory at %#016lx for saving crash dump\n",
- (msize >> 20), mstart);
+ base = reg->base;
+ size = reg->size;
+ hole_size += (base - last_end);
+
+ if ((cur_size + size) >= mem_size) {
+ size = (mem_size - cur_size);
+ ret = add_boot_mem_regions(base, size);
+ break;
}
+
+ mem_size -= size;
+ cur_size += size;
+ ret = add_boot_mem_regions(base, size);
+ if (!ret)
+ break;
+
+ last_end = base + size;
}
+ fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
+
+ return ret;
}
int __init fadump_reserve_mem(void)
{
- unsigned long base, size, memory_boundary;
+ u64 base, size, mem_boundary, bootmem_min, align = PAGE_SIZE;
+ bool is_memblock_bottom_up = memblock_bottom_up();
+ int ret = 1;
if (!fw_dump.fadump_enabled)
return 0;
if (!fw_dump.fadump_supported) {
- printk(KERN_INFO "Firmware-assisted dump is not supported on"
- " this hardware\n");
- fw_dump.fadump_enabled = 0;
- return 0;
+ pr_info("Firmware-Assisted Dump is not supported on this hardware\n");
+ goto error_out;
}
+
/*
* Initialize boot memory size
* If dump is active then we have already calculated the size during
* first kernel.
*/
- if (fdm_active)
- fw_dump.boot_memory_size = be64_to_cpu(fdm_active->rmr_region.source_len);
- else {
- fw_dump.boot_memory_size = fadump_calculate_reserve_size();
+ if (!fw_dump.dump_active) {
+ fw_dump.boot_memory_size =
+ PAGE_ALIGN(fadump_calculate_reserve_size());
#ifdef CONFIG_CMA
- if (!fw_dump.nocma)
+ if (!fw_dump.nocma) {
+ align = FADUMP_CMA_ALIGNMENT;
fw_dump.boot_memory_size =
- ALIGN(fw_dump.boot_memory_size,
- FADUMP_CMA_ALIGNMENT);
+ ALIGN(fw_dump.boot_memory_size, align);
+ }
#endif
+
+ bootmem_min = fw_dump.ops->fadump_get_bootmem_min();
+ if (fw_dump.boot_memory_size < bootmem_min) {
+ pr_err("Can't enable fadump with boot memory size (0x%lx) less than 0x%llx\n",
+ fw_dump.boot_memory_size, bootmem_min);
+ goto error_out;
+ }
+
+ if (!fadump_get_boot_mem_regions()) {
+ pr_err("Too many holes in boot memory area to enable fadump\n");
+ goto error_out;
+ }
}
/*
@@ -493,10 +489,13 @@ int __init fadump_reserve_mem(void)
" dump, now %#016llx\n", memory_limit);
}
if (memory_limit)
- memory_boundary = memory_limit;
+ mem_boundary = memory_limit;
else
- memory_boundary = memblock_end_of_DRAM();
+ mem_boundary = memblock_end_of_DRAM();
+ base = fw_dump.boot_mem_top;
+ size = get_fadump_area_size();
+ fw_dump.reserve_dump_area_size = size;
if (fw_dump.dump_active) {
pr_info("Firmware-assisted dump is active.\n");
@@ -510,58 +509,55 @@ int __init fadump_reserve_mem(void)
#endif
/*
* If last boot has crashed then reserve all the memory
- * above boot_memory_size so that we don't touch it until
+ * above boot memory size so that we don't touch it until
* dump is written to disk by userspace tool. This memory
- * will be released for general use once the dump is saved.
+ * can be released for general use by invalidating fadump.
*/
- base = fw_dump.boot_memory_size;
- size = memory_boundary - base;
- fadump_reserve_crash_area(base, size);
-
- fw_dump.fadumphdr_addr =
- be64_to_cpu(fdm_active->rmr_region.destination_address) +
- be64_to_cpu(fdm_active->rmr_region.source_len);
- pr_debug("fadumphdr_addr = %pa\n", &fw_dump.fadumphdr_addr);
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- } else {
- size = get_fadump_area_size();
+ fadump_reserve_crash_area(base);
+ pr_debug("fadumphdr_addr = %#016lx\n", fw_dump.fadumphdr_addr);
+ pr_debug("Reserve dump area start address: 0x%lx\n",
+ fw_dump.reserve_dump_area_start);
+ } else {
/*
* Reserve memory at an offset closer to bottom of the RAM to
- * minimize the impact of memory hot-remove operation. We can't
- * use memblock_find_in_range() here since it doesn't allocate
- * from bottom to top.
+ * minimize the impact of memory hot-remove operation.
*/
- for (base = fw_dump.boot_memory_size;
- base <= (memory_boundary - size);
- base += size) {
- if (memblock_is_region_memory(base, size) &&
- !memblock_is_region_reserved(base, size))
- break;
+ memblock_set_bottom_up(true);
+ base = memblock_find_in_range(base, mem_boundary, size, align);
+
+ /* Restore the previous allocation mode */
+ memblock_set_bottom_up(is_memblock_bottom_up);
+
+ if (!base) {
+ pr_err("Failed to find memory chunk for reservation!\n");
+ goto error_out;
}
- if ((base > (memory_boundary - size)) ||
- memblock_reserve(base, size)) {
- pr_err("Failed to reserve memory\n");
- return 0;
+ fw_dump.reserve_dump_area_start = base;
+
+ /*
+ * Calculate the kernel metadata address and register it with
+ * f/w if the platform supports.
+ */
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ goto error_out;
+
+ if (memblock_reserve(base, size)) {
+ pr_err("Failed to reserve memory!\n");
+ goto error_out;
}
- pr_info("Reserved %ldMB of memory at %ldMB for firmware-"
- "assisted dump (System RAM: %ldMB)\n",
- (unsigned long)(size >> 20),
- (unsigned long)(base >> 20),
- (unsigned long)(memblock_phys_mem_size() >> 20));
+ pr_info("Reserved %lldMB of memory at %#016llx (System RAM: %lldMB)\n",
+ (size >> 20), base, (memblock_phys_mem_size() >> 20));
- fw_dump.reserve_dump_area_start = base;
- fw_dump.reserve_dump_area_size = size;
- return fadump_cma_init();
+ ret = fadump_cma_init();
}
- return 1;
-}
-unsigned long __init arch_reserved_kernel_pages(void)
-{
- return memblock_reserved_size() / PAGE_SIZE;
+ return ret;
+error_out:
+ fw_dump.fadump_enabled = 0;
+ return 0;
}
/* Look for fadump= cmdline option. */
@@ -596,61 +592,6 @@ static int __init early_fadump_reserve_mem(char *p)
}
early_param("fadump_reserve_mem", early_fadump_reserve_mem);
-static int register_fw_dump(struct fadump_mem_struct *fdm)
-{
- int rc, err;
- unsigned int wait_time;
-
- pr_debug("Registering for firmware-assisted kernel dump...\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_REGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
-
- } while (wait_time);
-
- err = -EIO;
- switch (rc) {
- default:
- pr_err("Failed to register. Unknown Error(%d).\n", rc);
- break;
- case -1:
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Hardware Error(%d).\n", rc);
- break;
- case -3:
- if (!is_boot_memory_area_contiguous())
- pr_err("Can't have holes in boot memory area while registering fadump\n");
- else if (!is_reserved_memory_area_contiguous())
- pr_err("Can't have holes in reserved memory area while"
- " registering fadump\n");
-
- printk(KERN_ERR "Failed to register firmware-assisted kernel"
- " dump. Parameter Error(%d).\n", rc);
- err = -EINVAL;
- break;
- case -9:
- printk(KERN_ERR "firmware-assisted kernel dump is already "
- " registered.");
- fw_dump.dump_registered = 1;
- err = -EEXIST;
- break;
- case 0:
- printk(KERN_INFO "firmware-assisted kernel dump registration"
- " is successful\n");
- fw_dump.dump_registered = 1;
- err = 0;
- break;
- }
- return err;
-}
-
void crash_fadump(struct pt_regs *regs, const char *str)
{
struct fadump_crash_info_header *fdh = NULL;
@@ -693,71 +634,10 @@ void crash_fadump(struct pt_regs *regs, const char *str)
fdh->online_mask = *cpu_online_mask;
- /* Call ibm,os-term rtas call to trigger firmware assisted dump */
- rtas_os_term((char *)str);
-}
-
-#define GPR_MASK 0xffffff0000000000
-static inline int fadump_gpr_index(u64 id)
-{
- int i = -1;
- char str[3];
-
- if ((id & GPR_MASK) == REG_ID("GPR")) {
- /* get the digits at the end */
- id &= ~GPR_MASK;
- id >>= 24;
- str[2] = '\0';
- str[1] = id & 0xff;
- str[0] = (id >> 8) & 0xff;
- sscanf(str, "%d", &i);
- if (i > 31)
- i = -1;
- }
- return i;
-}
-
-static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
- u64 reg_val)
-{
- int i;
-
- i = fadump_gpr_index(reg_id);
- if (i >= 0)
- regs->gpr[i] = (unsigned long)reg_val;
- else if (reg_id == REG_ID("NIA"))
- regs->nip = (unsigned long)reg_val;
- else if (reg_id == REG_ID("MSR"))
- regs->msr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CTR"))
- regs->ctr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("LR"))
- regs->link = (unsigned long)reg_val;
- else if (reg_id == REG_ID("XER"))
- regs->xer = (unsigned long)reg_val;
- else if (reg_id == REG_ID("CR"))
- regs->ccr = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DAR"))
- regs->dar = (unsigned long)reg_val;
- else if (reg_id == REG_ID("DSISR"))
- regs->dsisr = (unsigned long)reg_val;
-}
-
-static struct fadump_reg_entry*
-fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
-{
- memset(regs, 0, sizeof(struct pt_regs));
-
- while (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUEND")) {
- fadump_set_regval(regs, be64_to_cpu(reg_entry->reg_id),
- be64_to_cpu(reg_entry->reg_value));
- reg_entry++;
- }
- reg_entry++;
- return reg_entry;
+ fw_dump.ops->fadump_trigger(fdh, str);
}
-static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
+u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
{
struct elf_prstatus prstatus;
@@ -772,7 +652,7 @@ static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
return buf;
}
-static void fadump_update_elfcore_header(char *bufp)
+void fadump_update_elfcore_header(char *bufp)
{
struct elfhdr *elf;
struct elf_phdr *phdr;
@@ -784,7 +664,7 @@ static void fadump_update_elfcore_header(char *bufp)
phdr = (struct elf_phdr *)bufp;
if (phdr->p_type == PT_NOTE) {
- phdr->p_paddr = fw_dump.cpu_notes_buf;
+ phdr->p_paddr = __pa(fw_dump.cpu_notes_buf_vaddr);
phdr->p_offset = phdr->p_paddr;
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
@@ -792,228 +672,100 @@ static void fadump_update_elfcore_header(char *bufp)
return;
}
-static void *fadump_cpu_notes_buf_alloc(unsigned long size)
+static void *fadump_alloc_buffer(unsigned long size)
{
- void *vaddr;
+ unsigned long count, i;
struct page *page;
- unsigned long order, count, i;
+ void *vaddr;
- order = get_order(size);
- vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+ vaddr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (!vaddr)
return NULL;
- count = 1 << order;
+ count = PAGE_ALIGN(size) / PAGE_SIZE;
page = virt_to_page(vaddr);
for (i = 0; i < count; i++)
- SetPageReserved(page + i);
+ mark_page_reserved(page + i);
return vaddr;
}
-static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
+static void fadump_free_buffer(unsigned long vaddr, unsigned long size)
{
- struct page *page;
- unsigned long order, count, i;
-
- order = get_order(size);
- count = 1 << order;
- page = virt_to_page(vaddr);
- for (i = 0; i < count; i++)
- ClearPageReserved(page + i);
- __free_pages(page, order);
+ free_reserved_area((void *)vaddr, (void *)(vaddr + size), -1, NULL);
}
-/*
- * Read CPU state dump data and convert it into ELF notes.
- * The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
- * used to access the data to allow for additional fields to be added without
- * affecting compatibility. Each list of registers for a CPU starts with
- * "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
- * 8 Byte ASCII identifier and 8 Byte register value. The register entry
- * with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
- * of register value. For more details refer to PAPR document.
- *
- * Only for the crashing cpu we ignore the CPU dump data and get exact
- * state from fadump crash info structure populated by first kernel at the
- * time of crash.
- */
-static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
+s32 fadump_setup_cpu_notes_buf(u32 num_cpus)
{
- struct fadump_reg_save_area_header *reg_header;
- struct fadump_reg_entry *reg_entry;
- struct fadump_crash_info_header *fdh = NULL;
- void *vaddr;
- unsigned long addr;
- u32 num_cpus, *note_buf;
- struct pt_regs regs;
- int i, rc = 0, cpu = 0;
-
- if (!fdm->cpu_state_data.bytes_dumped)
- return -EINVAL;
-
- addr = be64_to_cpu(fdm->cpu_state_data.destination_address);
- vaddr = __va(addr);
-
- reg_header = vaddr;
- if (be64_to_cpu(reg_header->magic_number) != REGSAVE_AREA_MAGIC) {
- printk(KERN_ERR "Unable to read register save area.\n");
- return -ENOENT;
- }
- pr_debug("--------CPU State Data------------\n");
- pr_debug("Magic Number: %llx\n", be64_to_cpu(reg_header->magic_number));
- pr_debug("NumCpuOffset: %x\n", be32_to_cpu(reg_header->num_cpu_offset));
-
- vaddr += be32_to_cpu(reg_header->num_cpu_offset);
- num_cpus = be32_to_cpu(*((__be32 *)(vaddr)));
- pr_debug("NumCpus : %u\n", num_cpus);
- vaddr += sizeof(u32);
- reg_entry = (struct fadump_reg_entry *)vaddr;
-
/* Allocate buffer to hold cpu crash notes. */
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
- note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
- if (!note_buf) {
- printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
- "cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr =
+ (unsigned long)fadump_alloc_buffer(fw_dump.cpu_notes_buf_size);
+ if (!fw_dump.cpu_notes_buf_vaddr) {
+ pr_err("Failed to allocate %ld bytes for CPU notes buffer\n",
+ fw_dump.cpu_notes_buf_size);
return -ENOMEM;
}
- fw_dump.cpu_notes_buf = __pa(note_buf);
-
- pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
- (num_cpus * sizeof(note_buf_t)), note_buf);
- if (fw_dump.fadumphdr_addr)
- fdh = __va(fw_dump.fadumphdr_addr);
-
- for (i = 0; i < num_cpus; i++) {
- if (be64_to_cpu(reg_entry->reg_id) != REG_ID("CPUSTRT")) {
- printk(KERN_ERR "Unable to read CPU state data\n");
- rc = -ENOENT;
- goto error_out;
- }
- /* Lower 4 bytes of reg_value contains logical cpu id */
- cpu = be64_to_cpu(reg_entry->reg_value) & FADUMP_CPU_ID_MASK;
- if (fdh && !cpumask_test_cpu(cpu, &fdh->online_mask)) {
- SKIP_TO_NEXT_CPU(reg_entry);
- continue;
- }
- pr_debug("Reading register data for cpu %d...\n", cpu);
- if (fdh && fdh->crashing_cpu == cpu) {
- regs = fdh->regs;
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- SKIP_TO_NEXT_CPU(reg_entry);
- } else {
- reg_entry++;
- reg_entry = fadump_read_registers(reg_entry, &regs);
- note_buf = fadump_regs_to_elf_notes(note_buf, &regs);
- }
- }
- final_note(note_buf);
-
- if (fdh) {
- pr_debug("Updating elfcore header (%llx) with cpu notes\n",
- fdh->elfcorehdr_addr);
- fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
- }
+ pr_debug("Allocated buffer for cpu notes of size %ld at 0x%lx\n",
+ fw_dump.cpu_notes_buf_size,
+ fw_dump.cpu_notes_buf_vaddr);
return 0;
-
-error_out:
- fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- return rc;
-
}
-/*
- * Validate and process the dump data stored by firmware before exporting
- * it through '/proc/vmcore'.
- */
-static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
+void fadump_free_cpu_notes_buf(void)
{
- struct fadump_crash_info_header *fdh;
- int rc = 0;
-
- if (!fdm_active || !fw_dump.fadumphdr_addr)
- return -EINVAL;
-
- /* Check if the dump data is valid. */
- if ((be16_to_cpu(fdm_active->header.dump_status_flag) == FADUMP_ERROR_FLAG) ||
- (fdm_active->cpu_state_data.error_flags != 0) ||
- (fdm_active->rmr_region.error_flags != 0)) {
- printk(KERN_ERR "Dump taken by platform is not valid\n");
- return -EINVAL;
- }
- if ((fdm_active->rmr_region.bytes_dumped !=
- fdm_active->rmr_region.source_len) ||
- !fdm_active->cpu_state_data.bytes_dumped) {
- printk(KERN_ERR "Dump taken by platform is incomplete\n");
- return -EINVAL;
- }
-
- /* Validate the fadump crash info header */
- fdh = __va(fw_dump.fadumphdr_addr);
- if (fdh->magic_number != FADUMP_CRASH_INFO_MAGIC) {
- printk(KERN_ERR "Crash info header is not valid.\n");
- return -EINVAL;
- }
-
- rc = fadump_build_cpu_notes(fdm_active);
- if (rc)
- return rc;
-
- /*
- * We are done validating dump info and elfcore header is now ready
- * to be exported. set elfcorehdr_addr so that vmcore module will
- * export the elfcore header through '/proc/vmcore'.
- */
- elfcorehdr_addr = fdh->elfcorehdr_addr;
+ if (!fw_dump.cpu_notes_buf_vaddr)
+ return;
- return 0;
+ fadump_free_buffer(fw_dump.cpu_notes_buf_vaddr,
+ fw_dump.cpu_notes_buf_size);
+ fw_dump.cpu_notes_buf_vaddr = 0;
+ fw_dump.cpu_notes_buf_size = 0;
}
-static void free_crash_memory_ranges(void)
+static void fadump_free_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- kfree(crash_memory_ranges);
- crash_memory_ranges = NULL;
- crash_memory_ranges_size = 0;
- max_crash_mem_ranges = 0;
+ kfree(mrange_info->mem_ranges);
+ mrange_info->mem_ranges = NULL;
+ mrange_info->mem_ranges_sz = 0;
+ mrange_info->max_mem_ranges = 0;
}
/*
- * Allocate or reallocate crash memory ranges array in incremental units
+ * Allocate or reallocate mem_ranges array in incremental units
* of PAGE_SIZE.
*/
-static int allocate_crash_memory_ranges(void)
+static int fadump_alloc_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- struct fad_crash_memory_ranges *new_array;
+ struct fadump_memory_range *new_array;
u64 new_size;
- new_size = crash_memory_ranges_size + PAGE_SIZE;
- pr_debug("Allocating %llu bytes of memory for crash memory ranges\n",
- new_size);
+ new_size = mrange_info->mem_ranges_sz + PAGE_SIZE;
+ pr_debug("Allocating %llu bytes of memory for %s memory ranges\n",
+ new_size, mrange_info->name);
- new_array = krealloc(crash_memory_ranges, new_size, GFP_KERNEL);
+ new_array = krealloc(mrange_info->mem_ranges, new_size, GFP_KERNEL);
if (new_array == NULL) {
- pr_err("Insufficient memory for setting up crash memory ranges\n");
- free_crash_memory_ranges();
+ pr_err("Insufficient memory for setting up %s memory ranges\n",
+ mrange_info->name);
+ fadump_free_mem_ranges(mrange_info);
return -ENOMEM;
}
- crash_memory_ranges = new_array;
- crash_memory_ranges_size = new_size;
- max_crash_mem_ranges = (new_size /
- sizeof(struct fad_crash_memory_ranges));
+ mrange_info->mem_ranges = new_array;
+ mrange_info->mem_ranges_sz = new_size;
+ mrange_info->max_mem_ranges = (new_size /
+ sizeof(struct fadump_memory_range));
return 0;
}
-static inline int fadump_add_crash_memory(unsigned long long base,
- unsigned long long end)
+static inline int fadump_add_mem_range(struct fadump_mrange_info *mrange_info,
+ u64 base, u64 end)
{
- u64 start, size;
+ struct fadump_memory_range *mem_ranges = mrange_info->mem_ranges;
bool is_adjacent = false;
+ u64 start, size;
if (base == end)
return 0;
@@ -1022,38 +774,41 @@ static inline int fadump_add_crash_memory(unsigned long long base,
* Fold adjacent memory ranges to bring down the memory ranges/
* PT_LOAD segments count.
*/
- if (crash_mem_ranges) {
- start = crash_memory_ranges[crash_mem_ranges - 1].base;
- size = crash_memory_ranges[crash_mem_ranges - 1].size;
+ if (mrange_info->mem_range_cnt) {
+ start = mem_ranges[mrange_info->mem_range_cnt - 1].base;
+ size = mem_ranges[mrange_info->mem_range_cnt - 1].size;
if ((start + size) == base)
is_adjacent = true;
}
if (!is_adjacent) {
/* resize the array on reaching the limit */
- if (crash_mem_ranges == max_crash_mem_ranges) {
+ if (mrange_info->mem_range_cnt == mrange_info->max_mem_ranges) {
int ret;
- ret = allocate_crash_memory_ranges();
+ ret = fadump_alloc_mem_ranges(mrange_info);
if (ret)
return ret;
+
+ /* Update to the new resized array */
+ mem_ranges = mrange_info->mem_ranges;
}
start = base;
- crash_memory_ranges[crash_mem_ranges].base = start;
- crash_mem_ranges++;
+ mem_ranges[mrange_info->mem_range_cnt].base = start;
+ mrange_info->mem_range_cnt++;
}
- crash_memory_ranges[crash_mem_ranges - 1].size = (end - start);
- pr_debug("crash_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
- (crash_mem_ranges - 1), start, end - 1, (end - start));
+ mem_ranges[mrange_info->mem_range_cnt - 1].size = (end - start);
+ pr_debug("%s_memory_range[%d] [%#016llx-%#016llx], %#llx bytes\n",
+ mrange_info->name, (mrange_info->mem_range_cnt - 1),
+ start, end - 1, (end - start));
return 0;
}
-static int fadump_exclude_reserved_area(unsigned long long start,
- unsigned long long end)
+static int fadump_exclude_reserved_area(u64 start, u64 end)
{
- unsigned long long ra_start, ra_end;
+ u64 ra_start, ra_end;
int ret = 0;
ra_start = fw_dump.reserve_dump_area_start;
@@ -1061,18 +816,22 @@ static int fadump_exclude_reserved_area(unsigned long long start,
if ((ra_start < end) && (ra_end > start)) {
if ((start < ra_start) && (end > ra_end)) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
if (ret)
return ret;
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
} else if (start < ra_start) {
- ret = fadump_add_crash_memory(start, ra_start);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ start, ra_start);
} else if (ra_end < end) {
- ret = fadump_add_crash_memory(ra_end, end);
+ ret = fadump_add_mem_range(&crash_mrange_info,
+ ra_end, end);
}
} else
- ret = fadump_add_crash_memory(start, end);
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
return ret;
}
@@ -1117,36 +876,36 @@ static int fadump_init_elfcore_header(char *bufp)
static int fadump_setup_crash_memory_ranges(void)
{
struct memblock_region *reg;
- unsigned long long start, end;
- int ret;
+ u64 start, end;
+ int i, ret;
pr_debug("Setup crash memory ranges.\n");
- crash_mem_ranges = 0;
+ crash_mrange_info.mem_range_cnt = 0;
/*
- * add the first memory chunk (RMA_START through boot_memory_size) as
- * a separate memory chunk. The reason is, at the time crash firmware
- * will move the content of this memory chunk to different location
- * specified during fadump registration. We need to create a separate
- * program header for this chunk with the correct offset.
+ * Boot memory region(s) registered with firmware are moved to
+ * different location at the time of crash. Create separate program
+ * header(s) for this memory chunk(s) with the correct offset.
*/
- ret = fadump_add_crash_memory(RMA_START, fw_dump.boot_memory_size);
- if (ret)
- return ret;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ start = fw_dump.boot_mem_addr[i];
+ end = start + fw_dump.boot_mem_sz[i];
+ ret = fadump_add_mem_range(&crash_mrange_info, start, end);
+ if (ret)
+ return ret;
+ }
for_each_memblock(memory, reg) {
- start = (unsigned long long)reg->base;
- end = start + (unsigned long long)reg->size;
+ start = (u64)reg->base;
+ end = start + (u64)reg->size;
/*
- * skip the first memory chunk that is already added (RMA_START
- * through boot_memory_size). This logic needs a relook if and
- * when RMA_START changes to a non-zero value.
+ * skip the memory chunk that is already added
+ * (0 through boot_memory_top).
*/
- BUILD_BUG_ON(RMA_START != 0);
- if (start < fw_dump.boot_memory_size) {
- if (end > fw_dump.boot_memory_size)
- start = fw_dump.boot_memory_size;
+ if (start < fw_dump.boot_mem_top) {
+ if (end > fw_dump.boot_mem_top)
+ start = fw_dump.boot_mem_top;
else
continue;
}
@@ -1167,17 +926,35 @@ static int fadump_setup_crash_memory_ranges(void)
*/
static inline unsigned long fadump_relocate(unsigned long paddr)
{
- if (paddr > RMA_START && paddr < fw_dump.boot_memory_size)
- return be64_to_cpu(fdm.rmr_region.destination_address) + paddr;
- else
- return paddr;
+ unsigned long raddr, rstart, rend, rlast, hole_size;
+ int i;
+
+ hole_size = 0;
+ rlast = 0;
+ raddr = paddr;
+ for (i = 0; i < fw_dump.boot_mem_regs_cnt; i++) {
+ rstart = fw_dump.boot_mem_addr[i];
+ rend = rstart + fw_dump.boot_mem_sz[i];
+ hole_size += (rstart - rlast);
+
+ if (paddr >= rstart && paddr < rend) {
+ raddr += fw_dump.boot_mem_dest_addr - hole_size;
+ break;
+ }
+
+ rlast = rend;
+ }
+
+ pr_debug("vmcoreinfo: paddr = 0x%lx, raddr = 0x%lx\n", paddr, raddr);
+ return raddr;
}
static int fadump_create_elfcore_headers(char *bufp)
{
- struct elfhdr *elf;
+ unsigned long long raddr, offset;
struct elf_phdr *phdr;
- int i;
+ struct elfhdr *elf;
+ int i, j;
fadump_init_elfcore_header(bufp);
elf = (struct elfhdr *)bufp;
@@ -1220,12 +997,14 @@ static int fadump_create_elfcore_headers(char *bufp)
(elf->e_phnum)++;
/* setup PT_LOAD sections. */
-
- for (i = 0; i < crash_mem_ranges; i++) {
- unsigned long long mbase, msize;
- mbase = crash_memory_ranges[i].base;
- msize = crash_memory_ranges[i].size;
-
+ j = 0;
+ offset = 0;
+ raddr = fw_dump.boot_mem_addr[0];
+ for (i = 0; i < crash_mrange_info.mem_range_cnt; i++) {
+ u64 mbase, msize;
+
+ mbase = crash_mrange_info.mem_ranges[i].base;
+ msize = crash_mrange_info.mem_ranges[i].size;
if (!msize)
continue;
@@ -1235,13 +1014,17 @@ static int fadump_create_elfcore_headers(char *bufp)
phdr->p_flags = PF_R|PF_W|PF_X;
phdr->p_offset = mbase;
- if (mbase == RMA_START) {
+ if (mbase == raddr) {
/*
- * The entire RMA region will be moved by firmware
- * to the specified destination_address. Hence set
- * the correct offset.
+ * The entire real memory region will be moved by
+ * firmware to the specified destination_address.
+ * Hence set the correct offset.
*/
- phdr->p_offset = be64_to_cpu(fdm.rmr_region.destination_address);
+ phdr->p_offset = fw_dump.boot_mem_dest_addr + offset;
+ if (j < (fw_dump.boot_mem_regs_cnt - 1)) {
+ offset += fw_dump.boot_mem_sz[j];
+ raddr = fw_dump.boot_mem_addr[++j];
+ }
}
phdr->p_paddr = mbase;
@@ -1263,7 +1046,6 @@ static unsigned long init_fadump_header(unsigned long addr)
if (!addr)
return 0;
- fw_dump.fadumphdr_addr = addr;
fdh = __va(addr);
addr += sizeof(struct fadump_crash_info_header);
@@ -1271,7 +1053,7 @@ static unsigned long init_fadump_header(unsigned long addr)
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
fdh->elfcorehdr_addr = addr;
/* We will set the crashing cpu id in crash_fadump() during crash. */
- fdh->crashing_cpu = CPU_UNKNOWN;
+ fdh->crashing_cpu = FADUMP_CPU_UNKNOWN;
return addr;
}
@@ -1293,7 +1075,8 @@ static int register_fadump(void)
if (ret)
return ret;
- addr = be64_to_cpu(fdm.rmr_region.destination_address) + be64_to_cpu(fdm.rmr_region.source_len);
+ addr = fw_dump.fadumphdr_addr;
+
/* Initialize fadump crash info header. */
addr = init_fadump_header(addr);
vaddr = __va(addr);
@@ -1302,74 +1085,27 @@ static int register_fadump(void)
fadump_create_elfcore_headers(vaddr);
/* register the future kernel dump with firmware. */
- return register_fw_dump(&fdm);
-}
-
-static int fadump_unregister_dump(struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Un-register firmware-assisted dump\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_UNREGISTER, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- printk(KERN_ERR "Failed to un-register firmware-assisted dump."
- " unexpected error(%d).\n", rc);
- return rc;
- }
- fw_dump.dump_registered = 0;
- return 0;
-}
-
-static int fadump_invalidate_dump(const struct fadump_mem_struct *fdm)
-{
- int rc = 0;
- unsigned int wait_time;
-
- pr_debug("Invalidating firmware-assisted dump registration\n");
-
- /* TODO: Add upper time limit for the delay */
- do {
- rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL,
- FADUMP_INVALIDATE, fdm,
- sizeof(struct fadump_mem_struct));
-
- wait_time = rtas_busy_delay_time(rc);
- if (wait_time)
- mdelay(wait_time);
- } while (wait_time);
-
- if (rc) {
- pr_err("Failed to invalidate firmware-assisted dump registration. Unexpected error (%d).\n", rc);
- return rc;
- }
- fw_dump.dump_active = 0;
- fdm_active = NULL;
- return 0;
+ pr_debug("Registering for firmware-assisted kernel dump...\n");
+ return fw_dump.ops->fadump_register(&fw_dump);
}
void fadump_cleanup(void)
{
+ if (!fw_dump.fadump_supported)
+ return;
+
/* Invalidate the registration only if dump is active. */
if (fw_dump.dump_active) {
- /* pass the same memory dump structure provided by platform */
- fadump_invalidate_dump(fdm_active);
+ pr_debug("Invalidating firmware-assisted dump registration\n");
+ fw_dump.ops->fadump_invalidate(&fw_dump);
} else if (fw_dump.dump_registered) {
/* Un-register Firmware-assisted dump if it was registered. */
- fadump_unregister_dump(&fdm);
- free_crash_memory_ranges();
+ fw_dump.ops->fadump_unregister(&fw_dump);
+ fadump_free_mem_ranges(&crash_mrange_info);
}
+
+ if (fw_dump.ops->fadump_cleanup)
+ fw_dump.ops->fadump_cleanup(&fw_dump);
}
static void fadump_free_reserved_memory(unsigned long start_pfn,
@@ -1394,90 +1130,197 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
/*
* Skip memory holes and free memory that was actually reserved.
*/
-static void fadump_release_reserved_area(unsigned long start, unsigned long end)
+static void fadump_release_reserved_area(u64 start, u64 end)
{
+ u64 tstart, tend, spfn, epfn;
struct memblock_region *reg;
- unsigned long tstart, tend;
- unsigned long start_pfn = PHYS_PFN(start);
- unsigned long end_pfn = PHYS_PFN(end);
+ spfn = PHYS_PFN(start);
+ epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
- tstart = max(start_pfn, memblock_region_memory_base_pfn(reg));
- tend = min(end_pfn, memblock_region_memory_end_pfn(reg));
+ tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
+ tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
- if (tend == end_pfn)
+ if (tend == epfn)
break;
- start_pfn = tend + 1;
+ spfn = tend;
}
}
}
/*
- * Release the memory that was reserved in early boot to preserve the memory
- * contents. The released memory will be available for general use.
+ * Sort the mem ranges in-place and merge adjacent ranges
+ * to minimize the memory ranges count.
*/
-static void fadump_release_memory(unsigned long begin, unsigned long end)
+static void sort_and_merge_mem_ranges(struct fadump_mrange_info *mrange_info)
{
- unsigned long ra_start, ra_end;
+ struct fadump_memory_range *mem_ranges;
+ struct fadump_memory_range tmp_range;
+ u64 base, size;
+ int i, j, idx;
+
+ if (!reserved_mrange_info.mem_range_cnt)
+ return;
+
+ /* Sort the memory ranges */
+ mem_ranges = mrange_info->mem_ranges;
+ for (i = 0; i < mrange_info->mem_range_cnt; i++) {
+ idx = i;
+ for (j = (i + 1); j < mrange_info->mem_range_cnt; j++) {
+ if (mem_ranges[idx].base > mem_ranges[j].base)
+ idx = j;
+ }
+ if (idx != i) {
+ tmp_range = mem_ranges[idx];
+ mem_ranges[idx] = mem_ranges[i];
+ mem_ranges[i] = tmp_range;
+ }
+ }
+
+ /* Merge adjacent reserved ranges */
+ idx = 0;
+ for (i = 1; i < mrange_info->mem_range_cnt; i++) {
+ base = mem_ranges[i-1].base;
+ size = mem_ranges[i-1].size;
+ if (mem_ranges[i].base == (base + size))
+ mem_ranges[idx].size += mem_ranges[i].size;
+ else {
+ idx++;
+ if (i == idx)
+ continue;
+
+ mem_ranges[idx] = mem_ranges[i];
+ }
+ }
+ mrange_info->mem_range_cnt = idx + 1;
+}
+
+/*
+ * Scan reserved-ranges to consider them while reserving/releasing
+ * memory for FADump.
+ */
+static inline int fadump_scan_reserved_mem_ranges(void)
+{
+ struct device_node *root;
+ const __be32 *prop;
+ int len, ret = -1;
+ unsigned long i;
+
+ root = of_find_node_by_path("/");
+ if (!root)
+ return ret;
+
+ prop = of_get_property(root, "reserved-ranges", &len);
+ if (!prop)
+ return ret;
+
+ /*
+ * Each reserved range is an (address,size) pair, 2 cells each,
+ * totalling 4 cells per range.
+ */
+ for (i = 0; i < len / (sizeof(*prop) * 4); i++) {
+ u64 base, size;
+
+ base = of_read_number(prop + (i * 4) + 0, 2);
+ size = of_read_number(prop + (i * 4) + 2, 2);
+
+ if (size) {
+ ret = fadump_add_mem_range(&reserved_mrange_info,
+ base, base + size);
+ if (ret < 0) {
+ pr_warn("some reserved ranges are ignored!\n");
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * Release the memory that was reserved during early boot to preserve the
+ * crash'ed kernel's memory contents except reserved dump area (permanent
+ * reservation) and reserved ranges used by F/W. The released memory will
+ * be available for general use.
+ */
+static void fadump_release_memory(u64 begin, u64 end)
+{
+ u64 ra_start, ra_end, tstart;
+ int i, ret;
+
+ fadump_scan_reserved_mem_ranges();
ra_start = fw_dump.reserve_dump_area_start;
ra_end = ra_start + fw_dump.reserve_dump_area_size;
/*
- * exclude the dump reserve area. Will reuse it for next
- * fadump registration.
+ * Add reserved dump area to reserved ranges list
+ * and exclude all these ranges while releasing memory.
*/
- if (begin < ra_end && end > ra_start) {
- if (begin < ra_start)
- fadump_release_reserved_area(begin, ra_start);
- if (end > ra_end)
- fadump_release_reserved_area(ra_end, end);
- } else
- fadump_release_reserved_area(begin, end);
+ ret = fadump_add_mem_range(&reserved_mrange_info, ra_start, ra_end);
+ if (ret != 0) {
+ /*
+ * Not enough memory to setup reserved ranges but the system is
+ * running shortage of memory. So, release all the memory except
+ * Reserved dump area (reused for next fadump registration).
+ */
+ if (begin < ra_end && end > ra_start) {
+ if (begin < ra_start)
+ fadump_release_reserved_area(begin, ra_start);
+ if (end > ra_end)
+ fadump_release_reserved_area(ra_end, end);
+ } else
+ fadump_release_reserved_area(begin, end);
+
+ return;
+ }
+
+ /* Get the reserved ranges list in order first. */
+ sort_and_merge_mem_ranges(&reserved_mrange_info);
+
+ /* Exclude reserved ranges and release remaining memory */
+ tstart = begin;
+ for (i = 0; i < reserved_mrange_info.mem_range_cnt; i++) {
+ ra_start = reserved_mrange_info.mem_ranges[i].base;
+ ra_end = ra_start + reserved_mrange_info.mem_ranges[i].size;
+
+ if (tstart >= ra_end)
+ continue;
+
+ if (tstart < ra_start)
+ fadump_release_reserved_area(tstart, ra_start);
+ tstart = ra_end;
+ }
+
+ if (tstart < end)
+ fadump_release_reserved_area(tstart, end);
}
static void fadump_invalidate_release_mem(void)
{
- unsigned long reserved_area_start, reserved_area_end;
- unsigned long destination_address;
-
mutex_lock(&fadump_mutex);
if (!fw_dump.dump_active) {
mutex_unlock(&fadump_mutex);
return;
}
- destination_address = be64_to_cpu(fdm_active->cpu_state_data.destination_address);
fadump_cleanup();
mutex_unlock(&fadump_mutex);
+ fadump_release_memory(fw_dump.boot_mem_top, memblock_end_of_DRAM());
+ fadump_free_cpu_notes_buf();
+
/*
- * Save the current reserved memory bounds we will require them
- * later for releasing the memory for general use.
- */
- reserved_area_start = fw_dump.reserve_dump_area_start;
- reserved_area_end = reserved_area_start +
- fw_dump.reserve_dump_area_size;
- /*
- * Setup reserve_dump_area_start and its size so that we can
- * reuse this reserved memory for Re-registration.
+ * Setup kernel metadata and initialize the kernel dump
+ * memory structure for FADump re-registration.
*/
- fw_dump.reserve_dump_area_start = destination_address;
- fw_dump.reserve_dump_area_size = get_fadump_area_size();
-
- fadump_release_memory(reserved_area_start, reserved_area_end);
- if (fw_dump.cpu_notes_buf) {
- fadump_cpu_notes_buf_free(
- (unsigned long)__va(fw_dump.cpu_notes_buf),
- fw_dump.cpu_notes_buf_size);
- fw_dump.cpu_notes_buf = 0;
- fw_dump.cpu_notes_buf_size = 0;
- }
- /* Initialize the kernel dump memory structure for FAD registration. */
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ if (fw_dump.ops->fadump_setup_metadata &&
+ (fw_dump.ops->fadump_setup_metadata(&fw_dump) < 0))
+ pr_warn("Failed to setup kernel metadata!\n");
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
}
static ssize_t fadump_release_memory_store(struct kobject *kobj,
@@ -1528,7 +1371,7 @@ static ssize_t fadump_register_store(struct kobject *kobj,
int ret = 0;
int input = -1;
- if (!fw_dump.fadump_enabled || fdm_active)
+ if (!fw_dump.fadump_enabled || fw_dump.dump_active)
return -EPERM;
if (kstrtoint(buf, 0, &input))
@@ -1541,13 +1384,15 @@ static ssize_t fadump_register_store(struct kobject *kobj,
if (fw_dump.dump_registered == 0) {
goto unlock_out;
}
+
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ pr_debug("Un-register firmware-assisted dump\n");
+ fw_dump.ops->fadump_unregister(&fw_dump);
break;
case 1:
if (fw_dump.dump_registered == 1) {
/* Un-register Firmware-assisted dump */
- fadump_unregister_dump(&fdm);
+ fw_dump.ops->fadump_unregister(&fw_dump);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
@@ -1564,62 +1409,12 @@ unlock_out:
static int fadump_region_show(struct seq_file *m, void *private)
{
- const struct fadump_mem_struct *fdm_ptr;
-
if (!fw_dump.fadump_enabled)
return 0;
mutex_lock(&fadump_mutex);
- if (fdm_active)
- fdm_ptr = fdm_active;
- else {
- mutex_unlock(&fadump_mutex);
- fdm_ptr = &fdm;
- }
-
- seq_printf(m,
- "CPU : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address),
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) +
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.source_len),
- be64_to_cpu(fdm_ptr->cpu_state_data.bytes_dumped));
- seq_printf(m,
- "HPTE: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->hpte_region.destination_address),
- be64_to_cpu(fdm_ptr->hpte_region.destination_address) +
- be64_to_cpu(fdm_ptr->hpte_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->hpte_region.source_len),
- be64_to_cpu(fdm_ptr->hpte_region.bytes_dumped));
- seq_printf(m,
- "DUMP: [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- be64_to_cpu(fdm_ptr->rmr_region.destination_address),
- be64_to_cpu(fdm_ptr->rmr_region.destination_address) +
- be64_to_cpu(fdm_ptr->rmr_region.source_len) - 1,
- be64_to_cpu(fdm_ptr->rmr_region.source_len),
- be64_to_cpu(fdm_ptr->rmr_region.bytes_dumped));
-
- if (!fdm_active ||
- (fw_dump.reserve_dump_area_start ==
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address)))
- goto out;
-
- /* Dump is active. Show reserved memory region. */
- seq_printf(m,
- " : [%#016llx-%#016llx] %#llx bytes, "
- "Dumped: %#llx\n",
- (unsigned long long)fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) - 1,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start,
- be64_to_cpu(fdm_ptr->cpu_state_data.destination_address) -
- fw_dump.reserve_dump_area_start);
-out:
- if (fdm_active)
- mutex_unlock(&fadump_mutex);
+ fw_dump.ops->fadump_region_show(&fw_dump, m);
+ mutex_unlock(&fadump_mutex);
return 0;
}
@@ -1690,14 +1485,77 @@ int __init setup_fadump(void)
* if dump process fails then invalidate the registration
* and release memory before proceeding for re-registration.
*/
- if (process_fadump(fdm_active) < 0)
+ if (fw_dump.ops->fadump_process(&fw_dump) < 0)
fadump_invalidate_release_mem();
}
/* Initialize the kernel dump memory structure for FAD registration. */
else if (fw_dump.reserve_dump_area_size)
- init_fadump_mem_struct(&fdm, fw_dump.reserve_dump_area_start);
+ fw_dump.ops->fadump_init_mem_struct(&fw_dump);
+
fadump_init_files();
return 1;
}
subsys_initcall(setup_fadump);
+#else /* !CONFIG_PRESERVE_FA_DUMP */
+
+/* Scan the Firmware Assisted dump configuration details. */
+int __init early_init_dt_scan_fw_dump(unsigned long node, const char *uname,
+ int depth, void *data)
+{
+ if ((depth != 1) || (strcmp(uname, "ibm,opal") != 0))
+ return 0;
+
+ opal_fadump_dt_scan(&fw_dump, node);
+ return 1;
+}
+
+/*
+ * When dump is active but PRESERVE_FA_DUMP is enabled on the kernel,
+ * preserve crash data. The subsequent memory preserving kernel boot
+ * is likely to process this crash data.
+ */
+int __init fadump_reserve_mem(void)
+{
+ if (fw_dump.dump_active) {
+ /*
+ * If last boot has crashed then reserve all the memory
+ * above boot memory to preserve crash data.
+ */
+ pr_info("Preserving crash data for processing in next boot.\n");
+ fadump_reserve_crash_area(fw_dump.boot_mem_top);
+ } else
+ pr_debug("FADump-aware kernel..\n");
+
+ return 1;
+}
+#endif /* CONFIG_PRESERVE_FA_DUMP */
+
+/* Preserve everything above the base address */
+static void __init fadump_reserve_crash_area(u64 base)
+{
+ struct memblock_region *reg;
+ u64 mstart, msize;
+
+ for_each_memblock(memory, reg) {
+ mstart = reg->base;
+ msize = reg->size;
+
+ if ((mstart + msize) < base)
+ continue;
+
+ if (mstart < base) {
+ msize -= (base - mstart);
+ mstart = base;
+ }
+
+ pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data",
+ (msize >> 20), mstart);
+ memblock_reserve(mstart, msize);
+ }
+}
+
+unsigned long __init arch_reserved_kernel_pages(void)
+{
+ return memblock_reserved_size() / PAGE_SIZE;
+}
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index f255e22184b4..4a24f8f026c7 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -34,7 +34,16 @@
#include "head_32.h"
-/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
+/* 601 only have IBAT */
+#ifdef CONFIG_PPC_BOOK3S_601
+#define LOAD_BAT(n, reg, RA, RB) \
+ li RA,0; \
+ mtspr SPRN_IBAT##n##U,RA; \
+ lwz RA,(n*16)+0(reg); \
+ lwz RB,(n*16)+4(reg); \
+ mtspr SPRN_IBAT##n##U,RA; \
+ mtspr SPRN_IBAT##n##L,RB
+#else
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
li RA,0; \
@@ -44,12 +53,11 @@
lwz RB,(n*16)+4(reg); \
mtspr SPRN_IBAT##n##U,RA; \
mtspr SPRN_IBAT##n##L,RB; \
- beq 1f; \
lwz RA,(n*16)+8(reg); \
lwz RB,(n*16)+12(reg); \
mtspr SPRN_DBAT##n##U,RA; \
- mtspr SPRN_DBAT##n##L,RB; \
-1:
+ mtspr SPRN_DBAT##n##L,RB
+#endif
__HEAD
.stabs "arch/powerpc/kernel/",N_SO,0,0,0f
@@ -557,9 +565,9 @@ DataStoreTLBMiss:
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
#ifdef CONFIG_SWAP
- li r1, _PAGE_RW | _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
#else
- li r1, _PAGE_RW | _PAGE_PRESENT
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
#endif
bge- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -820,9 +828,6 @@ load_up_mmu:
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
- mfpvr r3
- srwi r3,r3,16
- cmpwi r3,1
lis r3,BATS@ha
addi r3,r3,BATS@l
tophys(r3,r3)
@@ -897,9 +902,11 @@ start_here:
bl machine_init
bl __save_cpu_setup
bl MMU_init
+#ifdef CONFIG_KASAN
BEGIN_MMU_FTR_SECTION
bl MMU_init_hw_patch
END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
+#endif
/*
* Go back to running unmapped so we can load up new values
@@ -996,11 +1003,8 @@ EXPORT_SYMBOL(switch_mmu_context)
*/
clear_bats:
li r10,0
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi r9, 1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT0U,r10
mtspr SPRN_DBAT0L,r10
mtspr SPRN_DBAT1U,r10
@@ -1009,7 +1013,7 @@ clear_bats:
mtspr SPRN_DBAT2L,r10
mtspr SPRN_DBAT3U,r10
mtspr SPRN_DBAT3L,r10
-1:
+#endif
mtspr SPRN_IBAT0U,r10
mtspr SPRN_IBAT0L,r10
mtspr SPRN_IBAT1U,r10
@@ -1104,10 +1108,7 @@ mmu_off:
*/
initial_bats:
lis r11,PAGE_OFFSET@h
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- bne 4f
+#ifdef CONFIG_PPC_BOOK3S_601
ori r11,r11,4 /* set up BAT registers for 601 */
li r8,0x7f /* valid, block length = 8MB */
mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */
@@ -1120,10 +1121,8 @@ initial_bats:
addis r8,r8,0x800000@h
mtspr SPRN_IBAT2U,r11
mtspr SPRN_IBAT2L,r8
- isync
- blr
-
-4: tophys(r8,r11)
+#else
+ tophys(r8,r11)
#ifdef CONFIG_SMP
ori r8,r8,0x12 /* R/W access, M=1 */
#else
@@ -1135,10 +1134,10 @@ initial_bats:
mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */
mtspr SPRN_IBAT0L,r8
mtspr SPRN_IBAT0U,r11
+#endif
isync
blr
-
#ifdef CONFIG_BOOTX_TEXT
setup_disp_bat:
/*
@@ -1153,15 +1152,13 @@ setup_disp_bat:
beqlr
lwz r11,0(r8)
lwz r8,4(r8)
- mfspr r9,SPRN_PVR
- rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */
- cmpwi 0,r9,1
- beq 1f
+#ifndef CONFIG_PPC_BOOK3S_601
mtspr SPRN_DBAT3L,r8
mtspr SPRN_DBAT3U,r11
- blr
-1: mtspr SPRN_IBAT3L,r8
+#else
+ mtspr SPRN_IBAT3L,r8
mtspr SPRN_IBAT3U,r11
+#endif
blr
#endif /* CONFIG_BOOTX_TEXT */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 4a692553651f..8abc7783dbe5 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -5,19 +5,6 @@
#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
/*
- * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
- */
-.macro __LOAD_MSR_KERNEL r, x
-.if \x >= 0x8000
- lis \r, (\x)@h
- ori \r, \r, (\x)@l
-.else
- li \r, (\x)
-.endif
-.endm
-#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
-
-/*
* Exception entry code. This code runs with address translation
* turned off, i.e. using physical addresses.
* We assume sprg3 has the physical address of the current
@@ -92,7 +79,7 @@
#ifdef CONFIG_40x
rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
MTMSRD(r10) /* (except for mach check in rtas) */
#endif
lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
@@ -140,10 +127,10 @@
* otherwise we might risk taking an interrupt before we tell lockdep
* they are enabled.
*/
- LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL)
rlwimi r10, r9, 0, MSR_EE
#else
- LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ LOAD_REG_IMMEDIATE(r10, MSR_KERNEL | MSR_EE)
#endif
#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
mtspr SPRN_NRI, r0
@@ -187,7 +174,7 @@ label:
#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
- LOAD_MSR_KERNEL(r10, msr); \
+ LOAD_REG_IMMEDIATE(r10, msr); \
bl tfer; \
.long hdlr; \
.long ret
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 91d297e696dd..ad79fddb974d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -182,7 +182,8 @@ __secondary_hold:
isync
bctr
#else
- BUG_OPCODE
+0: trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
#endif
CLOSE_FIXED_SECTION(first_256B)
@@ -635,7 +636,7 @@ __after_prom_start:
sub r5,r5,r11
#else
/* just copy interrupts */
- LOAD_REG_IMMEDIATE(r5, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
+ LOAD_REG_IMMEDIATE_SYM(r5, r11, FIXED_SYMBOL_ABS_ADDR(__end_interrupts))
#endif
b 5f
3:
@@ -998,7 +999,8 @@ start_here_common:
bl start_kernel
/* Not reached */
- BUG_OPCODE
+ trap
+ EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
/*
* We put a few things here that have to be page-aligned.
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 5ab9178c2347..19f583e18402 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -15,6 +15,7 @@
*/
#include <linux/init.h>
+#include <linux/magic.h>
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -574,8 +575,6 @@ InstructionBreakpoint:
* by decoding the registers used by the dcbx instruction and adding them.
* DAR is set to the calculated address.
*/
- /* define if you don't want to use self modifying code */
-#define NO_SELF_MODIFYING_CODE
FixupDAR:/* Entry point for dcbx workaround. */
mtspr SPRN_M_TW, r10
/* fetch instruction from memory. */
@@ -639,27 +638,6 @@ FixupDAR:/* Entry point for dcbx workaround. */
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
142: /* continue, it was a dcbx, dcbi instruction. */
-#ifndef NO_SELF_MODIFYING_CODE
- andis. r10,r11,0x1f /* test if reg RA is r0 */
- li r10,modified_instr@l
- dcbtst r0,r10 /* touch for store */
- rlwinm r11,r11,0,0,20 /* Zero lower 10 bits */
- oris r11,r11,640 /* Transform instr. to a "add r10,RA,RB" */
- ori r11,r11,532
- stw r11,0(r10) /* store add/and instruction */
- dcbf 0,r10 /* flush new instr. to memory. */
- icbi 0,r10 /* invalidate instr. cache line */
- mfspr r11, SPRN_SPRG_SCRATCH1 /* restore r11 */
- mfspr r10, SPRN_SPRG_SCRATCH0 /* restore r10 */
- isync /* Wait until new instr is loaded from memory */
-modified_instr:
- .space 4 /* this is where the add instr. is stored */
- bne+ 143f
- subf r10,r0,r10 /* r10=r10-r0, only if reg RA is r0 */
-143: mtdar r10 /* store faulting EA in DAR */
- mfspr r10,SPRN_M_TW
- b DARFixed /* Go back to normal TLB handling */
-#else
mfctr r10
mtdar r10 /* save ctr reg in DAR */
rlwinm r10, r11, 24, 24, 28 /* offset into jump table for reg RB */
@@ -723,7 +701,6 @@ modified_instr:
add r10, r10, r11 /* add it */
mfctr r11 /* restore r11 */
b 151b
-#endif
/*
* This is where the main kernel code starts.
@@ -741,6 +718,9 @@ start_here:
/* stack */
lis r1,init_thread_union@ha
addi r1,r1,init_thread_union@l
+ lis r0, STACK_END_MAGIC@h
+ ori r0, r0, STACK_END_MAGIC@l
+ stw r0, 0(r1)
li r0,0
stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index c8d1fa2e9d53..1007ec36b4cb 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -195,18 +195,63 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
}
+static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
+{
+ int ret, type;
+ struct instruction_op op;
+
+ ret = analyse_instr(&op, regs, instr);
+ type = GETTYPE(op.type);
+ return (!ret && (type == LARX || type == STCX));
+}
+
/*
* Handle debug exception notifications.
*/
+static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
+ unsigned long addr)
+{
+ unsigned int instr = 0;
+
+ if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+ goto fail;
+
+ if (is_larx_stcx_instr(regs, instr)) {
+ printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
+ " Breakpoint at 0x%lx will be disabled.\n", addr);
+ goto disable;
+ }
+
+ /* Do not emulate user-space instructions, instead single-step them */
+ if (user_mode(regs)) {
+ current->thread.last_hit_ubp = bp;
+ regs->msr |= MSR_SE;
+ return false;
+ }
+
+ if (!emulate_step(regs, instr))
+ goto fail;
+
+ return true;
+
+fail:
+ /*
+ * We've failed in reliably handling the hw-breakpoint. Unregister
+ * it and throw a warning message to let the user know about it.
+ */
+ WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+ "0x%lx will be disabled.", addr);
+
+disable:
+ perf_event_disable_inatomic(bp);
+ return false;
+}
+
int hw_breakpoint_handler(struct die_args *args)
{
int rc = NOTIFY_STOP;
struct perf_event *bp;
struct pt_regs *regs = args->regs;
-#ifndef CONFIG_PPC_8xx
- int stepped = 1;
- unsigned int instr;
-#endif
struct arch_hw_breakpoint *info;
unsigned long dar = regs->dar;
@@ -251,32 +296,10 @@ int hw_breakpoint_handler(struct die_args *args)
(dar - bp->attr.bp_addr < bp->attr.bp_len)))
info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-#ifndef CONFIG_PPC_8xx
- /* Do not emulate user-space instructions, instead single-step them */
- if (user_mode(regs)) {
- current->thread.last_hit_ubp = bp;
- regs->msr |= MSR_SE;
+ if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
goto out;
- }
-
- stepped = 0;
- instr = 0;
- if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
- stepped = emulate_step(regs, instr);
/*
- * emulate_step() could not execute it. We've failed in reliably
- * handling the hw-breakpoint. Unregister it and throw a warning
- * message to let the user know about it.
- */
- if (!stepped) {
- WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
- "0x%lx will be disabled.", info->address);
- perf_event_disable_inatomic(bp);
- goto out;
- }
-#endif
- /*
* As a policy, the callback is invoked in a 'trigger-after-execute'
* fashion
*/
diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c
index fbd2d0007c52..0276bc8c8969 100644
--- a/arch/powerpc/kernel/io-workarounds.c
+++ b/arch/powerpc/kernel/io-workarounds.c
@@ -149,8 +149,8 @@ static const struct ppc_pci_io iowa_pci_io = {
};
#ifdef CONFIG_PPC_INDIRECT_MMIO
-static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- pgprot_t prot, void *caller)
+void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
void __iomem *res = __ioremap_caller(addr, size, prot, caller);
@@ -163,20 +163,17 @@ static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
}
return res;
}
-#else /* CONFIG_PPC_INDIRECT_MMIO */
-#define iowa_ioremap NULL
#endif /* !CONFIG_PPC_INDIRECT_MMIO */
+bool io_workaround_inited;
+
/* Enable IO workaround */
static void io_workaround_init(void)
{
- static int io_workaround_inited;
-
if (io_workaround_inited)
return;
ppc_pci_io = iowa_pci_io;
- ppc_md.ioremap = iowa_ioremap;
- io_workaround_inited = 1;
+ io_workaround_inited = true;
}
/* Register new bus to support workaround */
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 0a67ce9f827e..9704f3f76e63 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -633,11 +633,54 @@ static void iommu_table_clear(struct iommu_table *tbl)
#endif
}
+static void iommu_table_reserve_pages(struct iommu_table *tbl,
+ unsigned long res_start, unsigned long res_end)
+{
+ int i;
+
+ WARN_ON_ONCE(res_end < res_start);
+ /*
+ * Reserve page 0 so it will not be used for any mappings.
+ * This avoids buggy drivers that consider page 0 to be invalid
+ * to crash the machine or even lose data.
+ */
+ if (tbl->it_offset == 0)
+ set_bit(0, tbl->it_map);
+
+ tbl->it_reserved_start = res_start;
+ tbl->it_reserved_end = res_end;
+
+ /* Check if res_start..res_end isn't empty and overlaps the table */
+ if (res_start && res_end &&
+ (tbl->it_offset + tbl->it_size < res_start ||
+ res_end < tbl->it_offset))
+ return;
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ set_bit(i - tbl->it_offset, tbl->it_map);
+}
+
+static void iommu_table_release_pages(struct iommu_table *tbl)
+{
+ int i;
+
+ /*
+ * In case we have reserved the first bit, we should not emit
+ * the warning below.
+ */
+ if (tbl->it_offset == 0)
+ clear_bit(0, tbl->it_map);
+
+ for (i = tbl->it_reserved_start; i < tbl->it_reserved_end; ++i)
+ clear_bit(i - tbl->it_offset, tbl->it_map);
+}
+
/*
* Build a iommu_table structure. This contains a bit map which
* is used to manage allocation of the tce space.
*/
-struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
+struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid,
+ unsigned long res_start, unsigned long res_end)
{
unsigned long sz;
static int welcomed = 0;
@@ -656,13 +699,7 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
tbl->it_map = page_address(page);
memset(tbl->it_map, 0, sz);
- /*
- * Reserve page 0 so it will not be used for any mappings.
- * This avoids buggy drivers that consider page 0 to be invalid
- * to crash the machine or even lose data.
- */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, res_start, res_end);
/* We only split the IOMMU table if we have 1GB or more of space */
if ((tbl->it_size << tbl->it_page_shift) >= (1UL * 1024 * 1024 * 1024))
@@ -714,12 +751,7 @@ static void iommu_table_free(struct kref *kref)
return;
}
- /*
- * In case we have reserved the first bit, we should not emit
- * the warning below.
- */
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
@@ -981,29 +1013,32 @@ int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
}
EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
-long iommu_tce_xchg(struct mm_struct *mm, struct iommu_table *tbl,
+extern long iommu_tce_xchg_no_kill(struct mm_struct *mm,
+ struct iommu_table *tbl,
unsigned long entry, unsigned long *hpa,
enum dma_data_direction *direction)
{
long ret;
unsigned long size = 0;
- ret = tbl->it_ops->exchange(tbl, entry, hpa, direction);
-
+ ret = tbl->it_ops->xchg_no_kill(tbl, entry, hpa, direction, false);
if (!ret && ((*direction == DMA_FROM_DEVICE) ||
(*direction == DMA_BIDIRECTIONAL)) &&
!mm_iommu_is_devmem(mm, *hpa, tbl->it_page_shift,
&size))
SetPageDirty(pfn_to_page(*hpa >> PAGE_SHIFT));
- /* if (unlikely(ret))
- pr_err("iommu_tce: %s failed on hwaddr=%lx ioba=%lx kva=%lx ret=%d\n",
- __func__, hwaddr, entry << tbl->it_page_shift,
- hwaddr, ret); */
-
return ret;
}
-EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_no_kill);
+
+void iommu_tce_kill(struct iommu_table *tbl,
+ unsigned long entry, unsigned long pages)
+{
+ if (tbl->it_ops->tce_kill)
+ tbl->it_ops->tce_kill(tbl, entry, pages, false);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_kill);
int iommu_take_ownership(struct iommu_table *tbl)
{
@@ -1017,22 +1052,21 @@ int iommu_take_ownership(struct iommu_table *tbl)
* requires exchange() callback defined so if it is not
* implemented, we disallow taking ownership over the table.
*/
- if (!tbl->it_ops->exchange)
+ if (!tbl->it_ops->xchg_no_kill)
return -EINVAL;
spin_lock_irqsave(&tbl->large_pool.lock, flags);
for (i = 0; i < tbl->nr_pools; i++)
spin_lock(&tbl->pools[i].lock);
- if (tbl->it_offset == 0)
- clear_bit(0, tbl->it_map);
+ iommu_table_release_pages(tbl);
if (!bitmap_empty(tbl->it_map, tbl->it_size)) {
pr_err("iommu_tce: it_map is not empty");
ret = -EBUSY;
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ /* Undo iommu_table_release_pages, i.e. restore bit#0, etc */
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
} else {
memset(tbl->it_map, 0xff, sz);
}
@@ -1055,9 +1089,8 @@ void iommu_release_ownership(struct iommu_table *tbl)
memset(tbl->it_map, 0, sz);
- /* Restore bit#0 set by iommu_init_table() */
- if (tbl->it_offset == 0)
- set_bit(0, tbl->it_map);
+ iommu_table_reserve_pages(tbl, tbl->it_reserved_start,
+ tbl->it_reserved_end);
for (i = 0; i < tbl->nr_pools; i++)
spin_unlock(&tbl->pools[i].lock);
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
index 83cf7b852876..3072fd6dbe94 100644
--- a/arch/powerpc/kernel/kexec_elf_64.c
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -23,541 +23,6 @@
#include <linux/slab.h>
#include <linux/types.h>
-#define PURGATORY_STACK_SIZE (16 * 1024)
-
-#define elf_addr_to_cpu elf64_to_cpu
-
-#ifndef Elf_Rel
-#define Elf_Rel Elf64_Rel
-#endif /* Elf_Rel */
-
-struct elf_info {
- /*
- * Where the ELF binary contents are kept.
- * Memory managed by the user of the struct.
- */
- const char *buffer;
-
- const struct elfhdr *ehdr;
- const struct elf_phdr *proghdrs;
- struct elf_shdr *sechdrs;
-};
-
-static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
-{
- return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
-}
-
-static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le64_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be64_to_cpu(value);
-
- return value;
-}
-
-static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le16_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be16_to_cpu(value);
-
- return value;
-}
-
-static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
-{
- if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
- value = le32_to_cpu(value);
- else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
- value = be32_to_cpu(value);
-
- return value;
-}
-
-/**
- * elf_is_ehdr_sane - check that it is safe to use the ELF header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
-{
- if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
- pr_debug("Bad program header size.\n");
- return false;
- } else if (ehdr->e_shnum > 0 &&
- ehdr->e_shentsize != sizeof(struct elf_shdr)) {
- pr_debug("Bad section header size.\n");
- return false;
- } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
- ehdr->e_version != EV_CURRENT) {
- pr_debug("Unknown ELF version.\n");
- return false;
- }
-
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- size_t phdr_size;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- /* Sanity check the program header table location. */
- if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
- pr_debug("Program headers at invalid location.\n");
- return false;
- } else if (ehdr->e_phoff + phdr_size > buf_len) {
- pr_debug("Program headers truncated.\n");
- return false;
- }
- }
-
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- size_t shdr_size;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
-
- /* Sanity check the section header table location. */
- if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
- pr_debug("Section headers at invalid location.\n");
- return false;
- } else if (ehdr->e_shoff + shdr_size > buf_len) {
- pr_debug("Section headers truncated.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
-{
- struct elfhdr *buf_ehdr;
-
- if (len < sizeof(*buf_ehdr)) {
- pr_debug("Buffer is too small to hold ELF header.\n");
- return -ENOEXEC;
- }
-
- memset(ehdr, 0, sizeof(*ehdr));
- memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
- if (!elf_is_elf_file(ehdr)) {
- pr_debug("No ELF header magic.\n");
- return -ENOEXEC;
- }
-
- if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
- pr_debug("Not a supported ELF class.\n");
- return -ENOEXEC;
- } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
- ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
- pr_debug("Not a supported ELF data format.\n");
- return -ENOEXEC;
- }
-
- buf_ehdr = (struct elfhdr *) buf;
- if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
- pr_debug("Bad ELF header size.\n");
- return -ENOEXEC;
- }
-
- ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
- ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
- ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
- ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
- ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
- ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
- ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
- ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
- ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
- ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
- ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
- ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
-
- return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_is_phdr_sane - check that it is safe to use the program header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
-{
-
- if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
- pr_debug("ELF segment location wraps around.\n");
- return false;
- } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
- pr_debug("ELF segment not in file.\n");
- return false;
- } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
- pr_debug("ELF segment address wraps around.\n");
- return false;
- }
-
- return true;
-}
-
-static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- /* Override the const in proghdrs, we are the ones doing the loading. */
- struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
- const char *pbuf;
- struct elf_phdr *buf_phdr;
-
- pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
- buf_phdr = (struct elf_phdr *) pbuf;
-
- phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
- phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
- phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
- phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
- phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
- phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
- phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
-
- return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_phdrs - read the program headers from the buffer
- *
- * This function assumes that the program header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_phdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t phdr_size, i;
- const struct elfhdr *ehdr = elf_info->ehdr;
-
- /*
- * e_phnum is at most 65535 so calculating the size of the
- * program header cannot overflow.
- */
- phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
-
- elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
- if (!elf_info->proghdrs)
- return -ENOMEM;
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- int ret;
-
- ret = elf_read_phdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->proghdrs);
- elf_info->proghdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_is_shdr_sane - check that it is safe to use the section header
- * @buf_len: size of the buffer in which the ELF file is loaded.
- */
-static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
-{
- bool size_ok;
-
- /* SHT_NULL headers have undefined values, so we can't check them. */
- if (shdr->sh_type == SHT_NULL)
- return true;
-
- /* Now verify sh_entsize */
- switch (shdr->sh_type) {
- case SHT_SYMTAB:
- size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
- break;
- case SHT_RELA:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
- break;
- case SHT_DYNAMIC:
- size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
- break;
- case SHT_REL:
- size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
- break;
- case SHT_NOTE:
- case SHT_PROGBITS:
- case SHT_HASH:
- case SHT_NOBITS:
- default:
- /*
- * This is a section whose entsize requirements
- * I don't care about. If I don't know about
- * the section I can't care about it's entsize
- * requirements.
- */
- size_ok = true;
- break;
- }
-
- if (!size_ok) {
- pr_debug("ELF section with wrong entry size.\n");
- return false;
- } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
- pr_debug("ELF section address wraps around.\n");
- return false;
- }
-
- if (shdr->sh_type != SHT_NOBITS) {
- if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
- pr_debug("ELF section location wraps around.\n");
- return false;
- } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
- pr_debug("ELF section not in file.\n");
- return false;
- }
- }
-
- return true;
-}
-
-static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
- int idx)
-{
- struct elf_shdr *shdr = &elf_info->sechdrs[idx];
- const struct elfhdr *ehdr = elf_info->ehdr;
- const char *sbuf;
- struct elf_shdr *buf_shdr;
-
- sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
- buf_shdr = (struct elf_shdr *) sbuf;
-
- shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
- shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
- shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
- shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
- shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
- shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
-
- /*
- * The following fields have a type equivalent to Elf_Addr
- * both in 32 bit and 64 bit ELF.
- */
- shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
- shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
- shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
- shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
-
- return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_read_shdrs - read the section headers from the buffer
- *
- * This function assumes that the section header table was checked for sanity.
- * Use elf_is_ehdr_sane() if it wasn't.
- */
-static int elf_read_shdrs(const char *buf, size_t len,
- struct elf_info *elf_info)
-{
- size_t shdr_size, i;
-
- /*
- * e_shnum is at most 65536 so calculating
- * the size of the section header cannot overflow.
- */
- shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
-
- elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
- if (!elf_info->sechdrs)
- return -ENOMEM;
-
- for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
- int ret;
-
- ret = elf_read_shdr(buf, len, elf_info, i);
- if (ret) {
- kfree(elf_info->sechdrs);
- elf_info->sechdrs = NULL;
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
- * @buf: Buffer to read ELF file from.
- * @len: Size of @buf.
- * @ehdr: Pointer to existing struct which will be populated.
- * @elf_info: Pointer to existing struct which will be populated.
- *
- * This function allows reading ELF files with different byte order than
- * the kernel, byte-swapping the fields as needed.
- *
- * Return:
- * On success returns 0, and the caller should call elf_free_info(elf_info) to
- * free the memory allocated for the section and program headers.
- */
-int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int ret;
-
- ret = elf_read_ehdr(buf, len, ehdr);
- if (ret)
- return ret;
-
- elf_info->buffer = buf;
- elf_info->ehdr = ehdr;
- if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
- ret = elf_read_phdrs(buf, len, elf_info);
- if (ret)
- return ret;
- }
- if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
- ret = elf_read_shdrs(buf, len, elf_info);
- if (ret) {
- kfree(elf_info->proghdrs);
- return ret;
- }
- }
-
- return 0;
-}
-
-/**
- * elf_free_info - free memory allocated by elf_read_from_buffer
- */
-void elf_free_info(struct elf_info *elf_info)
-{
- kfree(elf_info->proghdrs);
- kfree(elf_info->sechdrs);
- memset(elf_info, 0, sizeof(*elf_info));
-}
-/**
- * build_elf_exec_info - read ELF executable and check that we can use it
- */
-static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
- struct elf_info *elf_info)
-{
- int i;
- int ret;
-
- ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
- if (ret)
- return ret;
-
- /* Big endian vmlinux has type ET_DYN. */
- if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
- pr_err("Not an ELF executable.\n");
- goto error;
- } else if (!elf_info->proghdrs) {
- pr_err("No ELF program header.\n");
- goto error;
- }
-
- for (i = 0; i < ehdr->e_phnum; i++) {
- /*
- * Kexec does not support loading interpreters.
- * In addition this check keeps us from attempting
- * to kexec ordinay executables.
- */
- if (elf_info->proghdrs[i].p_type == PT_INTERP) {
- pr_err("Requires an ELF interpreter.\n");
- goto error;
- }
- }
-
- return 0;
-error:
- elf_free_info(elf_info);
- return -ENOEXEC;
-}
-
-static int elf64_probe(const char *buf, unsigned long len)
-{
- struct elfhdr ehdr;
- struct elf_info elf_info;
- int ret;
-
- ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
- if (ret)
- return ret;
-
- elf_free_info(&elf_info);
-
- return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
-}
-
-/**
- * elf_exec_load - load ELF executable image
- * @lowest_load_addr: On return, will be the address where the first PT_LOAD
- * section will be loaded in memory.
- *
- * Return:
- * 0 on success, negative value on failure.
- */
-static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
- struct elf_info *elf_info,
- unsigned long *lowest_load_addr)
-{
- unsigned long base = 0, lowest_addr = UINT_MAX;
- int ret;
- size_t i;
- struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
- .top_down = false };
-
- /* Read in the PT_LOAD segments. */
- for (i = 0; i < ehdr->e_phnum; i++) {
- unsigned long load_addr;
- size_t size;
- const struct elf_phdr *phdr;
-
- phdr = &elf_info->proghdrs[i];
- if (phdr->p_type != PT_LOAD)
- continue;
-
- size = phdr->p_filesz;
- if (size > phdr->p_memsz)
- size = phdr->p_memsz;
-
- kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
- kbuf.bufsz = size;
- kbuf.memsz = phdr->p_memsz;
- kbuf.buf_align = phdr->p_align;
- kbuf.buf_min = phdr->p_paddr + base;
- kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- goto out;
- load_addr = kbuf.mem;
-
- if (load_addr < lowest_addr)
- lowest_addr = load_addr;
- }
-
- /* Update entry point to reflect new load address. */
- ehdr->e_entry += base;
-
- *lowest_load_addr = lowest_addr;
- ret = 0;
- out:
- return ret;
-}
-
static void *elf64_load(struct kimage *image, char *kernel_buf,
unsigned long kernel_len, char *initrd,
unsigned long initrd_len, char *cmdline,
@@ -570,18 +35,18 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
void *fdt;
const void *slave_code;
struct elfhdr ehdr;
- struct elf_info elf_info;
+ struct kexec_elf_info elf_info;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size };
struct kexec_buf pbuf = { .image = image, .buf_min = 0,
.buf_max = ppc64_rma_size, .top_down = true,
.mem = KEXEC_BUF_MEM_UNKNOWN };
- ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
if (ret)
goto out;
- ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
if (ret)
goto out;
@@ -648,13 +113,13 @@ static void *elf64_load(struct kimage *image, char *kernel_buf,
pr_err("Error setting up the purgatory.\n");
out:
- elf_free_info(&elf_info);
+ kexec_free_elf_info(&elf_info);
/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
return ret ? ERR_PTR(ret) : fdt;
}
const struct kexec_file_ops kexec_elf64_ops = {
- .probe = elf64_probe,
+ .probe = kexec_elf_probe,
.load = elf64_load,
};
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index b7b3a5e4e224..617eba82531c 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -64,16 +64,17 @@
#define KVM_INST_MTSRIN 0x7c0001e4
static bool kvm_patching_worked = true;
-char kvm_tmp[1024 * 1024];
+extern char kvm_tmp[];
+extern char kvm_tmp_end[];
static int kvm_tmp_index;
-static inline void kvm_patch_ins(u32 *inst, u32 new_inst)
+static void __init kvm_patch_ins(u32 *inst, u32 new_inst)
{
*inst = new_inst;
flush_icache_range((ulong)inst, (ulong)inst + 4);
}
-static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -82,7 +83,7 @@ static void kvm_patch_ins_ll(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_LD | rt | (addr & 0x0000fffc));
@@ -91,12 +92,12 @@ static void kvm_patch_ins_ld(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_lwz(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_LWZ | rt | (addr & 0x0000ffff));
}
-static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
{
#ifdef CONFIG_64BIT
kvm_patch_ins(inst, KVM_INST_STD | rt | (addr & 0x0000fffc));
@@ -105,17 +106,17 @@ static void kvm_patch_ins_std(u32 *inst, long addr, u32 rt)
#endif
}
-static void kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
+static void __init kvm_patch_ins_stw(u32 *inst, long addr, u32 rt)
{
kvm_patch_ins(inst, KVM_INST_STW | rt | (addr & 0x0000fffc));
}
-static void kvm_patch_ins_nop(u32 *inst)
+static void __init kvm_patch_ins_nop(u32 *inst)
{
kvm_patch_ins(inst, KVM_INST_NOP);
}
-static void kvm_patch_ins_b(u32 *inst, int addr)
+static void __init kvm_patch_ins_b(u32 *inst, int addr)
{
#if defined(CONFIG_RELOCATABLE) && defined(CONFIG_PPC_BOOK3S)
/* On relocatable kernels interrupts handlers and our code
@@ -128,11 +129,11 @@ static void kvm_patch_ins_b(u32 *inst, int addr)
kvm_patch_ins(inst, KVM_INST_B | (addr & KVM_INST_B_MASK));
}
-static u32 *kvm_alloc(int len)
+static u32 * __init kvm_alloc(int len)
{
u32 *p;
- if ((kvm_tmp_index + len) > ARRAY_SIZE(kvm_tmp)) {
+ if ((kvm_tmp_index + len) > (kvm_tmp_end - kvm_tmp)) {
printk(KERN_ERR "KVM: No more space (%d + %d)\n",
kvm_tmp_index, len);
kvm_patching_worked = false;
@@ -151,7 +152,7 @@ extern u32 kvm_emulate_mtmsrd_orig_ins_offs;
extern u32 kvm_emulate_mtmsrd_len;
extern u32 kvm_emulate_mtmsrd[];
-static void kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsrd(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -204,7 +205,7 @@ extern u32 kvm_emulate_mtmsr_orig_ins_offs;
extern u32 kvm_emulate_mtmsr_len;
extern u32 kvm_emulate_mtmsr[];
-static void kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
+static void __init kvm_patch_ins_mtmsr(u32 *inst, u32 rt)
{
u32 *p;
int distance_start;
@@ -265,7 +266,7 @@ extern u32 kvm_emulate_wrtee_orig_ins_offs;
extern u32 kvm_emulate_wrtee_len;
extern u32 kvm_emulate_wrtee[];
-static void kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
+static void __init kvm_patch_ins_wrtee(u32 *inst, u32 rt, int imm_one)
{
u32 *p;
int distance_start;
@@ -322,7 +323,7 @@ extern u32 kvm_emulate_wrteei_0_branch_offs;
extern u32 kvm_emulate_wrteei_0_len;
extern u32 kvm_emulate_wrteei_0[];
-static void kvm_patch_ins_wrteei_0(u32 *inst)
+static void __init kvm_patch_ins_wrteei_0(u32 *inst)
{
u32 *p;
int distance_start;
@@ -363,7 +364,7 @@ extern u32 kvm_emulate_mtsrin_orig_ins_offs;
extern u32 kvm_emulate_mtsrin_len;
extern u32 kvm_emulate_mtsrin[];
-static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
+static void __init kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
{
u32 *p;
int distance_start;
@@ -399,7 +400,7 @@ static void kvm_patch_ins_mtsrin(u32 *inst, u32 rt, u32 rb)
#endif
-static void kvm_map_magic_page(void *data)
+static void __init kvm_map_magic_page(void *data)
{
u32 *features = data;
@@ -414,7 +415,7 @@ static void kvm_map_magic_page(void *data)
*features = out[0];
}
-static void kvm_check_ins(u32 *inst, u32 features)
+static void __init kvm_check_ins(u32 *inst, u32 features)
{
u32 _inst = *inst;
u32 inst_no_rt = _inst & ~KVM_MASK_RT;
@@ -658,7 +659,7 @@ static void kvm_check_ins(u32 *inst, u32 features)
extern u32 kvm_template_start[];
extern u32 kvm_template_end[];
-static void kvm_use_magic_page(void)
+static void __init kvm_use_magic_page(void)
{
u32 *p;
u32 *start, *end;
@@ -699,25 +700,13 @@ static void kvm_use_magic_page(void)
kvm_patching_worked ? "worked" : "failed");
}
-static __init void kvm_free_tmp(void)
-{
- /*
- * Inform kmemleak about the hole in the .bss section since the
- * corresponding pages will be unmapped with DEBUG_PAGEALLOC=y.
- */
- kmemleak_free_part(&kvm_tmp[kvm_tmp_index],
- ARRAY_SIZE(kvm_tmp) - kvm_tmp_index);
- free_reserved_area(&kvm_tmp[kvm_tmp_index],
- &kvm_tmp[ARRAY_SIZE(kvm_tmp)], -1, NULL);
-}
-
static int __init kvm_guest_init(void)
{
if (!kvm_para_available())
- goto free_tmp;
+ return 0;
if (!epapr_paravirt_enabled)
- goto free_tmp;
+ return 0;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))
kvm_use_magic_page();
@@ -727,9 +716,6 @@ static int __init kvm_guest_init(void)
powersave_nap = 1;
#endif
-free_tmp:
- kvm_free_tmp();
-
return 0;
}
diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S
index eb2568f583ae..7af6f8b50c5d 100644
--- a/arch/powerpc/kernel/kvm_emul.S
+++ b/arch/powerpc/kernel/kvm_emul.S
@@ -192,6 +192,8 @@ kvm_emulate_mtmsr_orig_ins_offs:
kvm_emulate_mtmsr_len:
.long (kvm_emulate_mtmsr_end - kvm_emulate_mtmsr) / 4
+#ifdef CONFIG_BOOKE
+
/* also used for wrteei 1 */
.global kvm_emulate_wrtee
kvm_emulate_wrtee:
@@ -285,6 +287,10 @@ kvm_emulate_wrteei_0_branch_offs:
kvm_emulate_wrteei_0_len:
.long (kvm_emulate_wrteei_0_end - kvm_emulate_wrteei_0) / 4
+#endif /* CONFIG_BOOKE */
+
+#ifdef CONFIG_PPC_BOOK3S_32
+
.global kvm_emulate_mtsrin
kvm_emulate_mtsrin:
@@ -334,5 +340,15 @@ kvm_emulate_mtsrin_orig_ins_offs:
kvm_emulate_mtsrin_len:
.long (kvm_emulate_mtsrin_end - kvm_emulate_mtsrin) / 4
+#endif /* CONFIG_PPC_BOOK3S_32 */
+
+ .balign 4
+ .global kvm_tmp
+kvm_tmp:
+ .space (64 * 1024)
+
+.global kvm_tmp_end
+kvm_tmp_end:
+
.global kvm_template_end
kvm_template_end:
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index 18481b0e2788..04a7cba58eff 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -29,6 +29,8 @@
#include <asm/smp.h>
#include <asm/hw_breakpoint.h>
#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
int default_machine_kexec_prepare(struct kimage *image)
{
@@ -327,6 +329,13 @@ void default_machine_kexec(struct kimage *image)
#ifdef CONFIG_PPC_PSERIES
kexec_paca.lppaca_ptr = NULL;
#endif
+
+ if (is_secure_guest() && !(image->preserve_context ||
+ image->type == KEXEC_TYPE_CRASH)) {
+ uv_unshare_all_pages();
+ printk("kexec: Unshared all shared pages.\n");
+ }
+
paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
setup_paca(&kexec_paca);
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b18df633eae9..34c1001e9e8b 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -33,13 +33,18 @@ static DEFINE_PER_CPU(struct machine_check_event[MAX_MC_EVT],
mce_ue_event_queue);
static void machine_check_process_queued_event(struct irq_work *work);
-void machine_check_ue_event(struct machine_check_event *evt);
+static void machine_check_ue_irq_work(struct irq_work *work);
+static void machine_check_ue_event(struct machine_check_event *evt);
static void machine_process_ue_event(struct work_struct *work);
static struct irq_work mce_event_process_work = {
.func = machine_check_process_queued_event,
};
+static struct irq_work mce_ue_event_irq_work = {
+ .func = machine_check_ue_irq_work,
+};
+
DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
static void mce_set_error_info(struct machine_check_event *mce,
@@ -144,6 +149,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
if (phys_addr != ULONG_MAX) {
mce->u.ue_error.physical_address_provided = true;
mce->u.ue_error.physical_address = phys_addr;
+ mce->u.ue_error.ignore_event = mce_err->ignore_event;
machine_check_ue_event(mce);
}
}
@@ -199,11 +205,15 @@ void release_mce_event(void)
get_mce_event(NULL, true);
}
+static void machine_check_ue_irq_work(struct irq_work *work)
+{
+ schedule_work(&mce_ue_event_work);
+}
/*
* Queue up the MCE event which then can be handled later.
*/
-void machine_check_ue_event(struct machine_check_event *evt)
+static void machine_check_ue_event(struct machine_check_event *evt)
{
int index;
@@ -216,7 +226,7 @@ void machine_check_ue_event(struct machine_check_event *evt)
memcpy(this_cpu_ptr(&mce_ue_event_queue[index]), evt, sizeof(*evt));
/* Queue work to process this event later. */
- schedule_work(&mce_ue_event_work);
+ irq_work_queue(&mce_ue_event_irq_work);
}
/*
@@ -257,8 +267,17 @@ static void machine_process_ue_event(struct work_struct *work)
/*
* This should probably queued elsewhere, but
* oh! well
+ *
+ * Don't report this machine check because the caller has a
+ * asked us to ignore the event, it has a fixup handler which
+ * will do the appropriate error handling and reporting.
*/
if (evt->error_type == MCE_ERROR_TYPE_UE) {
+ if (evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_ue_count);
+ continue;
+ }
+
if (evt->u.ue_error.physical_address_provided) {
unsigned long pfn;
@@ -292,6 +311,12 @@ static void machine_check_process_queued_event(struct irq_work *work)
while (__this_cpu_read(mce_queue_count) > 0) {
index = __this_cpu_read(mce_queue_count) - 1;
evt = this_cpu_ptr(&mce_event_queue[index]);
+
+ if (evt->error_type == MCE_ERROR_TYPE_UE &&
+ evt->u.ue_error.ignore_event) {
+ __this_cpu_dec(mce_queue_count);
+ continue;
+ }
machine_check_print_event_info(evt, false, false);
__this_cpu_dec(mce_queue_count);
}
@@ -300,7 +325,7 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype, *err_type;
+ const char *level, *sevstr, *subtype, *err_type, *initiator;
uint64_t ea = 0, pa = 0;
int n = 0;
char dar_str[50];
@@ -385,6 +410,28 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
+ switch(evt->initiator) {
+ case MCE_INITIATOR_CPU:
+ initiator = "CPU";
+ break;
+ case MCE_INITIATOR_PCI:
+ initiator = "PCI";
+ break;
+ case MCE_INITIATOR_ISA:
+ initiator = "ISA";
+ break;
+ case MCE_INITIATOR_MEMORY:
+ initiator = "Memory";
+ break;
+ case MCE_INITIATOR_POWERMGM:
+ initiator = "Power Management";
+ break;
+ case MCE_INITIATOR_UNKNOWN:
+ default:
+ initiator = "Unknown";
+ break;
+ }
+
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
err_type = "UE";
@@ -451,6 +498,14 @@ void machine_check_print_event_info(struct machine_check_event *evt,
if (evt->u.link_error.effective_address_provided)
ea = evt->u.link_error.effective_address;
break;
+ case MCE_ERROR_TYPE_DCACHE:
+ err_type = "D-Cache";
+ subtype = "Unknown";
+ break;
+ case MCE_ERROR_TYPE_ICACHE:
+ err_type = "I-Cache";
+ subtype = "Unknown";
+ break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
err_type = "Unknown";
@@ -483,9 +538,17 @@ void machine_check_print_event_info(struct machine_check_event *evt,
level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
}
+ printk("%sMCE: CPU%d: Initiator %s\n", level, evt->cpu, initiator);
+
subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
mc_error_class[evt->error_class] : "Unknown";
printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (evt->error_type == MCE_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index a814d2dfb5b0..1cbf7f1a4e3d 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <linux/extable.h>
#include <asm/mmu.h>
#include <asm/mce.h>
#include <asm/machdep.h>
@@ -18,6 +19,7 @@
#include <asm/pte-walk.h>
#include <asm/sstep.h>
#include <asm/exception-64s.h>
+#include <asm/extable.h>
/*
* Convert an address related to an mm to a PFN. NOTE: we are in real
@@ -26,7 +28,8 @@
unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
- unsigned long flags;
+ unsigned int shift;
+ unsigned long pfn, flags;
struct mm_struct *mm;
if (user_mode(regs))
@@ -35,14 +38,23 @@ unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
mm = &init_mm;
local_irq_save(flags);
- if (mm == current->mm)
- ptep = find_current_mm_pte(mm->pgd, addr, NULL, NULL);
- else
- ptep = find_init_mm_pte(addr, NULL);
+ ptep = __find_linux_pte(mm->pgd, addr, NULL, &shift);
+
+ if (!ptep || pte_special(*ptep)) {
+ pfn = ULONG_MAX;
+ goto out;
+ }
+
+ if (shift <= PAGE_SHIFT)
+ pfn = pte_pfn(*ptep);
+ else {
+ unsigned long rpnmask = (1ul << shift) - PAGE_SIZE;
+ pfn = pte_pfn(__pte(pte_val(*ptep) | (addr & rpnmask)));
+ }
+
+out:
local_irq_restore(flags);
- if (!ptep || pte_special(*ptep))
- return ULONG_MAX;
- return pte_pfn(*ptep);
+ return pfn;
}
/* flush SLBs and reload */
@@ -344,7 +356,7 @@ static const struct mce_derror_table mce_p9_derror_table[] = {
MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0, false, 0, 0, 0, 0, 0 } };
-static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
+static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
{
/*
@@ -397,6 +409,8 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
handled = mce_flush(MCE_FLUSH_SLB);
break;
case MCE_ERROR_TYPE_ERAT:
@@ -482,6 +496,8 @@ static int mce_handle_derror(struct pt_regs *regs,
/* attempt to correct the error */
switch (table[i].error_type) {
case MCE_ERROR_TYPE_SLB:
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
if (mce_flush(MCE_FLUSH_SLB))
handled = 1;
break;
@@ -541,7 +557,8 @@ static int mce_handle_derror(struct pt_regs *regs,
* kernel/exception-64s.h
*/
if (get_paca()->in_mce < MAX_MCE_DEPTH)
- mce_find_instr_ea_and_pfn(regs, addr, phys_addr);
+ mce_find_instr_ea_and_phys(regs, addr,
+ phys_addr);
}
found = 1;
}
@@ -558,9 +575,18 @@ static int mce_handle_derror(struct pt_regs *regs,
return 0;
}
-static long mce_handle_ue_error(struct pt_regs *regs)
+static long mce_handle_ue_error(struct pt_regs *regs,
+ struct mce_error_info *mce_err)
{
long handled = 0;
+ const struct exception_table_entry *entry;
+
+ entry = search_kernel_exception_table(regs->nip);
+ if (entry) {
+ mce_err->ignore_event = true;
+ regs->nip = extable_fixup(entry);
+ return 1;
+ }
/*
* On specific SCOM read via MMIO we may get a machine check
@@ -593,7 +619,7 @@ static long mce_handle_error(struct pt_regs *regs,
&phys_addr);
if (!handled && mce_err.error_type == MCE_ERROR_TYPE_UE)
- handled = mce_handle_ue_error(regs);
+ handled = mce_handle_ue_error(regs, &mce_err);
save_mce_event(regs, handled, &mce_err, regs->nip, addr, phys_addr);
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index fe4bd321730e..82df4b09e79f 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -292,22 +292,20 @@ _GLOBAL(flush_instruction_cache)
iccci 0,r3
#endif
#elif defined(CONFIG_FSL_BOOKE)
-BEGIN_FTR_SECTION
+#ifdef CONFIG_E200
mfspr r3,SPRN_L1CSR0
ori r3,r3,L1CSR0_CFI|L1CSR0_CLFC
/* msync; isync recommended here */
mtspr SPRN_L1CSR0,r3
isync
blr
-END_FTR_SECTION_IFSET(CPU_FTR_UNIFIED_ID_CACHE)
+#endif
mfspr r3,SPRN_L1CSR1
ori r3,r3,L1CSR1_ICFI|L1CSR1_ICLFR
mtspr SPRN_L1CSR1,r3
+#elif defined(CONFIG_PPC_BOOK3S_601)
+ blr /* for 601, do nothing */
#else
- mfspr r3,SPRN_PVR
- rlwinm r3,r3,16,16,31
- cmpwi 0,r3,1
- beqlr /* for 601, do nothing */
/* 603/604 processor - use invalidate-all bit in HID0 */
mfspr r3,SPRN_HID0
ori r3,r3,HID0_ICFI
@@ -326,10 +324,10 @@ EXPORT_SYMBOL(flush_instruction_cache)
* flush_icache_range(unsigned long start, unsigned long stop)
*/
_GLOBAL(flush_icache_range)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
rlwinm r3,r3,0,0,31 - L1_CACHE_SHIFT
subf r4,r3,r4
addi r4,r4,L1_CACHE_BYTES - 1
@@ -355,6 +353,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
sync /* additional sync needed on g4 */
isync
blr
+#endif
_ASM_NOKPROBE_SYMBOL(flush_icache_range)
EXPORT_SYMBOL(flush_icache_range)
@@ -362,15 +361,15 @@ EXPORT_SYMBOL(flush_icache_range)
* Flush a particular page from the data cache to RAM.
* Note: this is necessary because the instruction cache does *not*
* snoop from the data cache.
- * This is a no-op on the 601 which has a unified cache.
+ * This is a no-op on the 601 and e200 which have a unified cache.
*
* void __flush_dcache_icache(void *page)
*/
_GLOBAL(__flush_dcache_icache)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+#else
rlwinm r3,r3,0,0,31-PAGE_SHIFT /* Get page base address */
li r4,PAGE_SIZE/L1_CACHE_BYTES /* Number of lines in a page */
mtctr r4
@@ -398,6 +397,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
sync
isync
blr
+#endif
#ifndef CONFIG_BOOKE
/*
@@ -409,10 +409,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
* void __flush_dcache_icache_phys(unsigned long physaddr)
*/
_GLOBAL(__flush_dcache_icache_phys)
-BEGIN_FTR_SECTION
+#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
PURGE_PREFETCHED_INS
- blr /* for 601, do nothing */
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
+ blr /* for 601 and e200, do nothing */
+#else
mfmsr r10
rlwinm r0,r10,0,28,26 /* clear DR */
mtmsr r0
@@ -433,6 +433,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
mtmsr r10 /* restore DR */
isync
blr
+#endif
#endif /* CONFIG_BOOKE */
/*
@@ -452,7 +453,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
stwu r9,16(r3)
_GLOBAL(copy_page)
+ rlwinm r5, r3, 0, L1_CACHE_BYTES - 1
addi r3,r3,-4
+
+0: twnei r5, 0 /* WARN if r3 is not cache aligned */
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+
addi r4,r4,-4
li r5,4
diff --git a/arch/powerpc/kernel/note.S b/arch/powerpc/kernel/note.S
new file mode 100644
index 000000000000..bcdad15395dd
--- /dev/null
+++ b/arch/powerpc/kernel/note.S
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PowerPC ELF notes.
+ *
+ * Copyright 2019, IBM Corporation
+ */
+
+#include <linux/elfnote.h>
+#include <asm/elfnote.h>
+
+/*
+ * Ultravisor-capable bit (PowerNV only).
+ *
+ * Bit 0 indicates that the powerpc kernel binary knows how to run in an
+ * ultravisor-enabled system.
+ *
+ * In an ultravisor-enabled system, some machine resources are now controlled
+ * by the ultravisor. If the kernel is not ultravisor-capable, but it ends up
+ * being run on a machine with ultravisor, the kernel will probably crash
+ * trying to access ultravisor resources. For instance, it may crash in early
+ * boot trying to set the partition table entry 0.
+ *
+ * In an ultravisor-enabled system, a bootloader could warn the user or prevent
+ * the kernel from being run if the PowerPC ultravisor capability doesn't exist
+ * or the Ultravisor-capable bit is not set.
+ */
+#ifdef CONFIG_PPC_POWERNV
+#define PPCCAP_ULTRAVISOR_BIT (1 << 0)
+#else
+#define PPCCAP_ULTRAVISOR_BIT 0
+#endif
+
+/*
+ * Add the PowerPC Capabilities in the binary ELF note. It is a bitmap that
+ * can be used to advertise kernel capabilities to userland.
+ */
+#define PPC_CAPABILITIES_BITMAP (PPCCAP_ULTRAVISOR_BIT)
+
+ELFNOTE(PowerPC, PPC_ELFNOTE_CAPABILITIES,
+ .long PPC_CAPABILITIES_BITMAP)
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e3ad8aa4730d..949eceb254d8 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -14,6 +14,8 @@
#include <asm/sections.h>
#include <asm/pgtable.h>
#include <asm/kexec.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
#include "setup.h"
@@ -52,6 +54,43 @@ static void *__init alloc_paca_data(unsigned long size, unsigned long align,
#ifdef CONFIG_PPC_PSERIES
+#define LPPACA_SIZE 0x400
+
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
+ unsigned long limit, int cpu)
+{
+ size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
+ static unsigned long shared_lppaca_size;
+ static void *shared_lppaca;
+ void *ptr;
+
+ if (!shared_lppaca) {
+ memblock_set_bottom_up(true);
+
+ shared_lppaca =
+ memblock_alloc_try_nid(shared_lppaca_total_size,
+ PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
+ limit, NUMA_NO_NODE);
+ if (!shared_lppaca)
+ panic("cannot allocate shared data");
+
+ memblock_set_bottom_up(false);
+ uv_share_page(PHYS_PFN(__pa(shared_lppaca)),
+ shared_lppaca_total_size >> PAGE_SHIFT);
+ }
+
+ ptr = shared_lppaca + shared_lppaca_size;
+ shared_lppaca_size += size;
+
+ /*
+ * This is very early in boot, so no harm done if the kernel crashes at
+ * this point.
+ */
+ BUG_ON(shared_lppaca_size >= shared_lppaca_total_size);
+
+ return ptr;
+}
+
/*
* See asm/lppaca.h for more detail.
*
@@ -65,7 +104,7 @@ static inline void init_lppaca(struct lppaca *lppaca)
*lppaca = (struct lppaca) {
.desc = cpu_to_be32(0xd397d781), /* "LpPa" */
- .size = cpu_to_be16(0x400),
+ .size = cpu_to_be16(LPPACA_SIZE),
.fpregs_in_use = 1,
.slb_count = cpu_to_be16(64),
.vmxregs_in_use = 0,
@@ -75,19 +114,22 @@ static inline void init_lppaca(struct lppaca *lppaca)
static struct lppaca * __init new_lppaca(int cpu, unsigned long limit)
{
struct lppaca *lp;
- size_t size = 0x400;
- BUILD_BUG_ON(size < sizeof(struct lppaca));
+ BUILD_BUG_ON(sizeof(struct lppaca) > LPPACA_SIZE);
if (early_cpu_has_feature(CPU_FTR_HVMODE))
return NULL;
- lp = alloc_paca_data(size, 0x400, limit, cpu);
+ if (is_secure_guest())
+ lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+ else
+ lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
+
init_lppaca(lp);
return lp;
}
-#endif /* CONFIG_PPC_BOOK3S */
+#endif /* CONFIG_PPC_PSERIES */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index f627e15bb43c..1c448cf25506 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1379,10 +1379,6 @@ void __init pcibios_resource_survey(void)
pr_debug("PCI: Assigning unassigned resources...\n");
pci_assign_unassigned_resources();
}
-
- /* Call machine dependent fixup */
- if (ppc_md.pcibios_fixup)
- ppc_md.pcibios_fixup();
}
/* This is used by the PCI hotplug driver to allocate resource
diff --git a/arch/powerpc/kernel/pci-hotplug.c b/arch/powerpc/kernel/pci-hotplug.c
index 0b0cf8168b47..fc62c4bc47b1 100644
--- a/arch/powerpc/kernel/pci-hotplug.c
+++ b/arch/powerpc/kernel/pci-hotplug.c
@@ -55,11 +55,18 @@ EXPORT_SYMBOL_GPL(pci_find_bus_by_node);
void pcibios_release_device(struct pci_dev *dev)
{
struct pci_controller *phb = pci_bus_to_host(dev->bus);
+ struct pci_dn *pdn = pci_get_pdn(dev);
eeh_remove_device(dev);
if (phb->controller_ops.release_device)
phb->controller_ops.release_device(dev);
+
+ /* free()ing the pci_dn has been deferred to us, do it now */
+ if (pdn && (pdn->flags & PCI_DN_FLAG_DEAD)) {
+ pci_dbg(dev, "freeing dead pdn\n");
+ kfree(pdn);
+ }
}
/**
diff --git a/arch/powerpc/kernel/pci_32.c b/arch/powerpc/kernel/pci_32.c
index 50942a1d1a5f..b49e1060a3bf 100644
--- a/arch/powerpc/kernel/pci_32.c
+++ b/arch/powerpc/kernel/pci_32.c
@@ -263,6 +263,10 @@ static int __init pcibios_init(void)
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
/* Call machine dependent post-init code */
if (ppc_md.pcibios_after_init)
ppc_md.pcibios_after_init();
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index b7030b1189d0..f83d1f69b1dd 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -54,14 +54,20 @@ static int __init pcibios_init(void)
pci_add_flags(PCI_ENABLE_PROC_DOMAINS | PCI_COMPAT_DOMAIN_0);
/* Scan all of the recorded PCI controllers. */
- list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
pcibios_scan_phb(hose);
- pci_bus_add_devices(hose->bus);
- }
/* Call common code to handle resource allocation */
pcibios_resource_survey();
+ /* Add devices. */
+ list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
+ pci_bus_add_devices(hose->bus);
+
+ /* Call machine dependent fixup */
+ if (ppc_md.pcibios_fixup)
+ ppc_md.pcibios_fixup();
+
printk(KERN_DEBUG "PCI: Probing PCI hardware done\n");
return 0;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index c4c8c237a106..9524009ca1ae 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -323,6 +323,7 @@ void pci_remove_device_node_info(struct device_node *dn)
{
struct pci_dn *pdn = dn ? PCI_DN(dn) : NULL;
struct device_node *parent;
+ struct pci_dev *pdev;
#ifdef CONFIG_EEH
struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
@@ -336,12 +337,28 @@ void pci_remove_device_node_info(struct device_node *dn)
WARN_ON(!list_empty(&pdn->child_list));
list_del(&pdn->list);
+ /* Drop the parent pci_dn's ref to our backing dt node */
parent = of_get_parent(dn);
if (parent)
of_node_put(parent);
- dn->data = NULL;
- kfree(pdn);
+ /*
+ * At this point we *might* still have a pci_dev that was
+ * instantiated from this pci_dn. So defer free()ing it until
+ * the pci_dev's release function is called.
+ */
+ pdev = pci_get_domain_bus_and_slot(pdn->phb->global_number,
+ pdn->busno, pdn->devfn);
+ if (pdev) {
+ /* NB: pdev has a ref to dn */
+ pci_dbg(pdev, "marked pdn (from %pOF) as dead\n", dn);
+ pdn->flags |= PCI_DN_FLAG_DEAD;
+ } else {
+ dn->data = NULL;
+ kfree(pdn);
+ }
+
+ pci_dev_put(pdev);
}
EXPORT_SYMBOL_GPL(pci_remove_device_node_info);
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 409c6c1beabf..f91d7e94872e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -34,31 +34,75 @@ static u32 get_int_prop(struct device_node *np, const char *name, u32 def)
* pci_parse_of_flags - Parse the flags cell of a device tree PCI address
* @addr0: value of 1st cell of a device tree PCI address.
* @bridge: Set this flag if the address is from a bridge 'ranges' property
+ *
+ * PCI Bus Binding to IEEE Std 1275-1994
+ *
+ * Bit# 33222222 22221111 11111100 00000000
+ * 10987654 32109876 54321098 76543210
+ * phys.hi cell: npt000ss bbbbbbbb dddddfff rrrrrrrr
+ * phys.mid cell: hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh
+ * phys.lo cell: llllllll llllllll llllllll llllllll
+ *
+ * where:
+ * n is 0 if the address is relocatable, 1 otherwise
+ * p is 1 if the addressable region is "prefetchable", 0 otherwise
+ * t is 1 if the address is aliased (for non-relocatable I/O),
+ * below 1 MB (for Memory),or below 64 KB (for relocatable I/O).
+ * ss is the space code, denoting the address space:
+ * 00 denotes Configuration Space
+ * 01 denotes I/O Space
+ * 10 denotes 32-bit-address Memory Space
+ * 11 denotes 64-bit-address Memory Space
+ * bbbbbbbb is the 8-bit Bus Number
+ * ddddd is the 5-bit Device Number
+ * fff is the 3-bit Function Number
+ * rrrrrrrr is the 8-bit Register Number
*/
+#define OF_PCI_ADDR0_SPACE(ss) (((ss)&3)<<24)
+#define OF_PCI_ADDR0_SPACE_CFG OF_PCI_ADDR0_SPACE(0)
+#define OF_PCI_ADDR0_SPACE_IO OF_PCI_ADDR0_SPACE(1)
+#define OF_PCI_ADDR0_SPACE_MMIO32 OF_PCI_ADDR0_SPACE(2)
+#define OF_PCI_ADDR0_SPACE_MMIO64 OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_SPACE_MASK OF_PCI_ADDR0_SPACE(3)
+#define OF_PCI_ADDR0_RELOC (1UL<<31)
+#define OF_PCI_ADDR0_PREFETCH (1UL<<30)
+#define OF_PCI_ADDR0_ALIAS (1UL<<29)
+#define OF_PCI_ADDR0_BUS 0x00FF0000UL
+#define OF_PCI_ADDR0_DEV 0x0000F800UL
+#define OF_PCI_ADDR0_FN 0x00000700UL
+#define OF_PCI_ADDR0_BARREG 0x000000FFUL
+
unsigned int pci_parse_of_flags(u32 addr0, int bridge)
{
- unsigned int flags = 0;
+ unsigned int flags = 0, as = addr0 & OF_PCI_ADDR0_SPACE_MASK;
- if (addr0 & 0x02000000) {
+ if (as == OF_PCI_ADDR0_SPACE_MMIO32 || as == OF_PCI_ADDR0_SPACE_MMIO64) {
flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY;
- flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64;
- if (flags & PCI_BASE_ADDRESS_MEM_TYPE_64)
- flags |= IORESOURCE_MEM_64;
- flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M;
- if (addr0 & 0x40000000)
- flags |= IORESOURCE_PREFETCH
- | PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+ if (as == OF_PCI_ADDR0_SPACE_MMIO64)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_64 | IORESOURCE_MEM_64;
+
+ if (addr0 & OF_PCI_ADDR0_ALIAS)
+ flags |= PCI_BASE_ADDRESS_MEM_TYPE_1M;
+
+ if (addr0 & OF_PCI_ADDR0_PREFETCH)
+ flags |= IORESOURCE_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_PREFETCH;
+
/* Note: We don't know whether the ROM has been left enabled
* by the firmware or not. We mark it as disabled (ie, we do
* not set the IORESOURCE_ROM_ENABLE flag) for now rather than
* do a config space read, it will be force-enabled if needed
*/
- if (!bridge && (addr0 & 0xff) == 0x30)
+ if (!bridge && (addr0 & OF_PCI_ADDR0_BARREG) == PCI_ROM_ADDRESS)
flags |= IORESOURCE_READONLY;
- } else if (addr0 & 0x01000000)
+
+ } else if (as == OF_PCI_ADDR0_SPACE_IO)
flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO;
+
if (flags)
flags |= IORESOURCE_SIZEALIGN;
+
return flags;
}
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7a84c9f1778e..639ceae7da9d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1587,8 +1587,9 @@ static void setup_ksp_vsid(struct task_struct *p, unsigned long sp)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p)
+int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
+ unsigned long kthread_arg, struct task_struct *p,
+ unsigned long tls)
{
struct pt_regs *childregs, *kregs;
extern void ret_from_fork(void);
@@ -1629,10 +1630,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_PPC64
if (!is_32bit_task())
- childregs->gpr[13] = childregs->gpr[6];
+ childregs->gpr[13] = tls;
else
#endif
- childregs->gpr[2] = childregs->gpr[6];
+ childregs->gpr[2] = tls;
}
f = ret_from_fork;
@@ -2033,10 +2034,8 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
int count = 0;
int firstframe = 1;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- struct ftrace_ret_stack *ret_stack;
- extern void return_to_handler(void);
- unsigned long rth = (unsigned long)return_to_handler;
- int curr_frame = 0;
+ unsigned long ret_addr;
+ int ftrace_idx = 0;
#endif
if (tsk == NULL)
@@ -2065,15 +2064,10 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- if ((ip == rth) && curr_frame >= 0) {
- ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
- if (ret_stack)
- pr_cont(" (%pS)",
- (void *)ret_stack->ret);
- else
- curr_frame = -1;
- }
+ ret_addr = ftrace_graph_ret_addr(current,
+ &ftrace_idx, ip, stack);
+ if (ret_addr != ip)
+ pr_cont(" (%pS)", (void *)ret_addr);
#endif
if (firstframe)
pr_cont(" (unreliable)");
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 7159e791a70d..6620f37abe73 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -55,6 +55,7 @@
#include <asm/firmware.h>
#include <asm/dt_cpu_ftrs.h>
#include <asm/drmem.h>
+#include <asm/ultravisor.h>
#include <mm/mmu_decl.h>
@@ -702,9 +703,12 @@ void __init early_init_devtree(void *params)
#ifdef CONFIG_PPC_POWERNV
/* Some machines might need OPAL info for debugging, grab it now. */
of_scan_flat_dt(early_init_dt_scan_opal, NULL);
+
+ /* Scan tree for ultravisor feature */
+ of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
#endif
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/* scan tree to see if dump is active during last boot */
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
#endif
@@ -731,7 +735,7 @@ void __init early_init_devtree(void *params)
if (PHYSICAL_START > MEMORY_START)
memblock_reserve(MEMORY_START, 0x8000);
reserve_kdump_trampoline();
-#ifdef CONFIG_FA_DUMP
+#if defined(CONFIG_FA_DUMP) || defined(CONFIG_PRESERVE_FA_DUMP)
/*
* If we fail to reserve memory for firmware-assisted dump then
* fallback to kexec based kdump.
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 514707ef6779..a4e7762dd286 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -40,6 +40,7 @@
#include <asm/sections.h>
#include <asm/machdep.h>
#include <asm/asm-prototypes.h>
+#include <asm/ultravisor-api.h>
#include <linux/linux_logo.h>
@@ -94,7 +95,7 @@ static int of_workarounds __prombss;
#define PROM_BUG() do { \
prom_printf("kernel BUG at %s line 0x%x!\n", \
__FILE__, __LINE__); \
- __asm__ __volatile__(".long " BUG_ILLEGAL_INSTR); \
+ __builtin_trap(); \
} while (0)
#ifdef DEBUG_PROM
@@ -171,6 +172,10 @@ static bool __prombss prom_radix_disable;
static bool __prombss prom_xive_disable;
#endif
+#ifdef CONFIG_PPC_SVM
+static bool __prombss prom_svm_enable;
+#endif
+
struct platform_support {
bool hash_mmu;
bool radix_mmu;
@@ -812,6 +817,17 @@ static void __init early_cmdline_parse(void)
prom_debug("XIVE disabled from cmdline\n");
}
#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_SVM
+ opt = prom_strstr(prom_cmd_line, "svm=");
+ if (opt) {
+ bool val;
+
+ opt += sizeof("svm=") - 1;
+ if (!prom_strtobool(opt, &val))
+ prom_svm_enable = val;
+ }
+#endif /* CONFIG_PPC_SVM */
}
#ifdef CONFIG_PPC_PSERIES
@@ -1712,6 +1728,43 @@ static void __init prom_close_stdin(void)
}
}
+#ifdef CONFIG_PPC_SVM
+static int prom_rtas_hcall(uint64_t args)
+{
+ register uint64_t arg1 asm("r3") = H_RTAS;
+ register uint64_t arg2 asm("r4") = args;
+
+ asm volatile("sc 1\n" : "=r" (arg1) :
+ "r" (arg1),
+ "r" (arg2) :);
+ return arg1;
+}
+
+static struct rtas_args __prombss os_term_args;
+
+static void __init prom_rtas_os_term(char *str)
+{
+ phandle rtas_node;
+ __be32 val;
+ u32 token;
+
+ prom_debug("%s: start...\n", __func__);
+ rtas_node = call_prom("finddevice", 1, 1, ADDR("/rtas"));
+ prom_debug("rtas_node: %x\n", rtas_node);
+ if (!PHANDLE_VALID(rtas_node))
+ return;
+
+ val = 0;
+ prom_getprop(rtas_node, "ibm,os-term", &val, sizeof(val));
+ token = be32_to_cpu(val);
+ prom_debug("ibm,os-term: %x\n", token);
+ if (token == 0)
+ prom_panic("Could not get token for ibm,os-term\n");
+ os_term_args.token = cpu_to_be32(token);
+ prom_rtas_hcall((uint64_t)&os_term_args);
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* Allocate room for and instantiate RTAS
*/
@@ -3168,6 +3221,46 @@ static void unreloc_toc(void)
#endif
#endif
+#ifdef CONFIG_PPC_SVM
+/*
+ * Perform the Enter Secure Mode ultracall.
+ */
+static int enter_secure_mode(unsigned long kbase, unsigned long fdt)
+{
+ register unsigned long r3 asm("r3") = UV_ESM;
+ register unsigned long r4 asm("r4") = kbase;
+ register unsigned long r5 asm("r5") = fdt;
+
+ asm volatile("sc 2" : "+r"(r3) : "r"(r4), "r"(r5));
+
+ return r3;
+}
+
+/*
+ * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
+ */
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+ int ret;
+
+ if (!prom_svm_enable)
+ return;
+
+ /* Switch to secure mode. */
+ prom_printf("Switching to secure mode.\n");
+
+ ret = enter_secure_mode(kbase, fdt);
+ if (ret != U_SUCCESS) {
+ prom_printf("Returned %d from switching to secure mode.\n", ret);
+ prom_rtas_os_term("Switch to secure mode failed.\n");
+ }
+}
+#else
+static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
/*
* We enter here early on, when the Open Firmware prom is still
* handling exceptions and the MMU hash table for us.
@@ -3366,6 +3459,9 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
unreloc_toc();
#endif
+ /* Move to secure memory if we're supposed to be secure guests. */
+ setup_secure_guest(kbase, hdr);
+
__start(hdr, kbase, 0, 0, 0, 0, 0);
return 0;
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 160bef0d553d..78bab17b1396 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -33,7 +33,7 @@ OBJ="$2"
ERROR=0
-function check_section()
+check_section()
{
file=$1
section=$2
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 5faf0a64c92b..c5fa251b8950 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -16,6 +16,7 @@
#include <linux/capability.h>
#include <linux/delay.h>
#include <linux/cpu.h>
+#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
@@ -871,15 +872,17 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
return 0;
for_each_cpu(cpu, cpus) {
+ struct device *dev = get_cpu_device(cpu);
+
switch (state) {
case DOWN:
- cpuret = cpu_down(cpu);
+ cpuret = device_offline(dev);
break;
case UP:
- cpuret = cpu_up(cpu);
+ cpuret = device_online(dev);
break;
}
- if (cpuret) {
+ if (cpuret < 0) {
pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
__func__,
((state == UP) ? "up" : "down"),
@@ -896,6 +899,7 @@ static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
cpumask_clear_cpu(cpu, cpus);
}
}
+ cond_resched();
}
return ret;
@@ -922,13 +926,11 @@ int rtas_online_cpus_mask(cpumask_var_t cpus)
return ret;
}
-EXPORT_SYMBOL(rtas_online_cpus_mask);
int rtas_offline_cpus_mask(cpumask_var_t cpus)
{
return rtas_cpu_state_change_mask(DOWN, cpus);
}
-EXPORT_SYMBOL(rtas_offline_cpus_mask);
int rtas_ibm_suspend_me(u64 handle)
{
@@ -968,6 +970,8 @@ int rtas_ibm_suspend_me(u64 handle)
data.token = rtas_token("ibm,suspend-me");
data.complete = &done;
+ lock_device_hotplug();
+
/* All present CPUs must be online */
cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
cpuret = rtas_online_cpus_mask(offline_mask);
@@ -1006,6 +1010,7 @@ out_hotplug_enable:
__func__);
out:
+ unlock_device_hotplug();
free_cpumask_var(offline_mask);
return atomic_read(&data.error);
}
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e1c9cf079503..7cfcb294b11c 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -28,7 +28,7 @@ static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NO
bool barrier_nospec_enabled;
static bool no_nospec;
static bool btb_flush_enabled;
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static bool no_spectrev2;
#endif
@@ -114,7 +114,7 @@ static __init int security_feature_debugfs_init(void)
device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
-#ifdef CONFIG_PPC_FSL_BOOK3E
+#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_BOOK3S_64)
static int __init handle_nospectre_v2(char *p)
{
no_spectrev2 = true;
@@ -122,6 +122,9 @@ static int __init handle_nospectre_v2(char *p)
return 0;
}
early_param("nospectre_v2", handle_nospectre_v2);
+#endif /* CONFIG_PPC_FSL_BOOK3E || CONFIG_PPC_BOOK3S_64 */
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
void setup_spectre_v2(void)
{
if (no_spectrev2 || cpu_mitigations_off())
@@ -399,7 +402,17 @@ static void toggle_count_cache_flush(bool enable)
void setup_count_cache_flush(void)
{
- toggle_count_cache_flush(true);
+ bool enable = true;
+
+ if (no_spectrev2 || cpu_mitigations_off()) {
+ if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
+ security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
+ pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
+
+ enable = false;
+ }
+
+ toggle_count_cache_flush(enable);
}
#ifdef CONFIG_DEBUG_FS
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1f8db666468d..25aaa3903000 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -778,12 +778,6 @@ void ppc_printk_progress(char *s, unsigned short hex)
pr_info("%s\n", s);
}
-void arch_setup_pdev_archdata(struct platform_device *pdev)
-{
- pdev->archdata.dma_mask = DMA_BIT_MASK(32);
- pdev->dev.dma_mask = &pdev->archdata.dma_mask;
-}
-
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
@@ -806,9 +800,15 @@ static __init void print_system_info(void)
pr_info("mmu_features = 0x%08x\n", cur_cpu_spec->mmu_features);
#ifdef CONFIG_PPC64
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
+#ifdef CONFIG_PPC_BOOK3S
+ pr_info("vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+#endif
#endif
- print_system_hash_info();
+ if (!early_radix_enabled())
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 94517e4a2723..a7541edf0cdb 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -206,6 +206,6 @@ __init void initialize_cache_info(void)
dcache_bsize = cur_cpu_spec->dcache_bsize;
icache_bsize = cur_cpu_spec->icache_bsize;
ucache_bsize = 0;
- if (cpu_has_feature(CPU_FTR_UNIFIED_ID_CACHE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_601) || IS_ENABLED(CONFIG_E200))
ucache_bsize = icache_bsize = dcache_bsize;
}
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct task_struct *tsk,
* FIXME: IMHO these tests do not belong in
* arch-dependent code, they are generic.
*/
- ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+ ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
#ifdef CONFIG_KPROBES
/*
* Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index e2147d7c9e72..80a676da11cb 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -19,6 +19,7 @@
#include <asm/smp.h>
#include <asm/pmc.h>
#include <asm/firmware.h>
+#include <asm/svm.h>
#include "cacheinfo.h"
#include "setup.h"
@@ -715,6 +716,23 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
+#ifdef CONFIG_PPC_SVM
+static ssize_t show_svm(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%u\n", is_secure_guest());
+}
+static DEVICE_ATTR(svm, 0444, show_svm, NULL);
+
+static void create_svm_file(void)
+{
+ device_create_file(cpu_subsys.dev_root, &dev_attr_svm);
+}
+#else
+static void create_svm_file(void)
+{
+}
+#endif /* CONFIG_PPC_SVM */
+
static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1058,6 +1076,8 @@ static int __init topology_init(void)
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
+ create_svm_file();
+
return 0;
}
subsys_initcall(topology_init);
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index be1ca98fce5c..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
* Hook the return address and push it in the stack of return addrs
* in current thread info. Return the address we want to divert to.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+ unsigned long sp)
{
unsigned long return_hooker;
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
return_hooker = ppc_function_entry(return_to_handler);
- if (!function_graph_enter(parent, ip, 0, NULL))
+ if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld r2, PACATOC(r13) /* get kernel TOC in r2 */
+ addi r5, r1, 112
mfctr r4 /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflr r3 /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
_GLOBAL(ftrace_graph_caller)
+ addi r5, r1, 112
/* load r4 with local address */
ld r4, 128(r1)
subi r4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 11caa0291254..82f43535e686 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -472,6 +472,7 @@ void system_reset_exception(struct pt_regs *regs)
if (debugger(regs))
goto out;
+ kmsg_dump(KMSG_DUMP_OOPS);
/*
* A system reset is a request to dump, so we always send
* it through the crashdump code (if fadump or kdump are
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index 000000000000..07296bc39166
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include <asm/ppc_asm.h>
+#include <asm/export.h>
+
+_GLOBAL(ucall_norets)
+EXPORT_SYMBOL_GPL(ucall_norets)
+ sc 2 /* Invoke the ultravisor */
+ blr /* Return r3 = status */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index d60598113a9f..eae9ddaecbcf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -94,28 +94,6 @@ static struct vdso_patch_def vdso_patches[] = {
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
-#ifdef CONFIG_PPC32
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_gettimeofday", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_gettime", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_clock_getres", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_get_tbfreq", NULL
- },
- {
- CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
- "__kernel_time", NULL
- },
-#endif
};
/*
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 6984125b9fc0..6c7401bd284e 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -70,6 +70,7 @@ V_FUNCTION_END(__kernel_get_syscall_map)
*
* returns the timebase frequency in HZ
*/
+#ifndef CONFIG_PPC_BOOK3S_601
V_FUNCTION_BEGIN(__kernel_get_tbfreq)
.cfi_startproc
mflr r12
@@ -82,3 +83,4 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq)
blr
.cfi_endproc
V_FUNCTION_END(__kernel_get_tbfreq)
+#endif
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 099a6db14e67..00c025ba4a92 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -144,10 +144,13 @@ VERSION
__kernel_datapage_offset;
__kernel_get_syscall_map;
+#ifndef CONFIG_PPC_BOOK3S_601
__kernel_gettimeofday;
__kernel_clock_gettime;
__kernel_clock_getres;
+ __kernel_time;
__kernel_get_tbfreq;
+#endif
__kernel_sync_dicache;
__kernel_sync_dicache_p5;
__kernel_sigtramp32;
@@ -155,7 +158,6 @@ VERSION
#ifdef CONFIG_PPC64
__kernel_getcpu;
#endif
- __kernel_time;
local: *;
};