From b62b6cf170237677ae96001940c54d5c09d0015a Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Tue, 3 Dec 2019 17:52:03 +0100
Subject: s390/spinlock: remove confusing comment in arch_spin_lock_wait

arch_spin_lock_wait does not take steal time into consideration.

Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/spinlock.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ce1e4bbe53aa..9b2dab5a69f9 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
 
 void arch_spin_lock_wait(arch_spinlock_t *lp)
 {
-	/* Use classic spinlocks + niai if the steal time is >= 10% */
 	if (test_cpu_flag(CIF_DEDICATED_CPU))
 		arch_spin_lock_queued(lp);
 	else
-- 
cgit v1.2.3


From 7e914fd17e9a750d8896e5645c84e75e52d77a4b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 2 Dec 2019 09:02:15 +0000
Subject: s390/test_unwind: fix spelling mistake "reqister" -> "register"

There is a spelling mistake in a pr_info message. Fix it.

Link: https://lkml.kernel.org/r/20191202090215.28766-1-colin.king@canonical.com
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/lib/test_unwind.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index bda7ac0ddd29..32b7a30b2485 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -238,7 +238,7 @@ static int test_unwind_irq(struct unwindme *u)
 {
 	preempt_disable();
 	if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
-		pr_info("Couldn't reqister external interrupt handler");
+		pr_info("Couldn't register external interrupt handler");
 		return -1;
 	}
 	u->task = current;
-- 
cgit v1.2.3


From 39d4a501a9ef55c57b51e3ef07fc2aeed7f30b3b Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Thu, 28 Nov 2019 10:26:41 +0100
Subject: s390/cpum_sf: Adjust sampling interval to avoid hitting sample limits

Function perf_event_ever_overflow() and perf_event_account_interrupt()
are called every time samples are processed by the interrupt handler.
However function perf_event_account_interrupt() has checks to avoid being
flooded with interrupts (more then 1000 samples are received per
task_tick).  Samples are then dropped and a PERF_RECORD_THROTTLED is
added to the perf data. The perf subsystem limit calculation is:

    maximum sample frequency := 100000 --> 1 samples per 10 us
    task_tick = 10ms = 10000us --> 1000 samples per task_tick

The work flow is

measurement_alert() uses SDBT head and each SBDT points to 511
 SDB pages, each with 126 sample entries. After processing 8 SBDs
 and for each valid sample calling:

     perf_event_overflow()
       perf_event_account_interrupts()

there is a considerable amount of samples being dropped, especially when
the sample frequency is very high and near the 100000 limit.

To avoid the high amount of samples being dropped near the end of a
task_tick time frame, increment the sampling interval in case of
dropped events. The CPU Measurement sampling facility on the s390
supports only intervals, specifiing how many CPU cycles have to be
executed before a sample is generated. Increase the interval when the
samples being generated hit the task_tick limit.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c07fdcd73726..3f6b02ee73ec 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1315,6 +1315,22 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 	if (sampl_overflow)
 		OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
 						 sampl_overflow, 1 + num_sdb);
+
+	/* Perf_event_overflow() and perf_event_account_interrupt() limit
+	 * the interrupt rate to an upper limit. Roughly 1000 samples per
+	 * task tick.
+	 * Hitting this limit results in a large number
+	 * of throttled REF_REPORT_THROTTLE entries and the samples
+	 * are dropped.
+	 * Slightly increase the interval to avoid hitting this limit.
+	 */
+	if (event_overflow) {
+		SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+		debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+				    __func__,
+				    DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+	}
+
 	if (sampl_overflow || event_overflow)
 		debug_sprintf_event(sfdbg, 4, "%s: "
 				    "overflows: sample %llu event %llu"
-- 
cgit v1.2.3


From 0539ad0b22877225095d8adef0c376f52cc23834 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Fri, 29 Nov 2019 15:24:25 +0100
Subject: s390/cpum_sf: Avoid SBD overflow condition in irq handler

The s390 CPU Measurement sampling facility has an overflow condition
which fires when all entries in a SBD are used.
The measurement alert interrupt is triggered and reads out all samples
in this SDB. It then tests the successor SDB, if this SBD is not full,
the interrupt handler does not read any samples at all from this SDB
The design waits for the hardware to fill this SBD and then trigger
another meassurement alert interrupt.

This scheme works nicely until
an perf_event_overflow() function call discards the sample due to
a too high sampling rate.
The interrupt handler has logic to read out a partially filled SDB
when the perf event overflow condition in linux common code is met.
This causes the CPUM sampling measurement hardware and the PMU
device driver to operate on the same SBD's trailer entry.
This should not happen.

This can be seen here using this trace:
   cpumsf_pmu_add: tear:0xb5286000
   hw_perf_event_update: sdbt 0xb5286000 full 1 over 0 flush_all:0
   hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0
        above shows 1. interrupt
   hw_perf_event_update: sdbt 0xb5286008 full 1 over 0 flush_all:0
   hw_perf_event_update: sdbt 0xb5286008 full 0 over 0 flush_all:0
        above shows 2. interrupt
	... this goes on fine until...
   hw_perf_event_update: sdbt 0xb5286068 full 1 over 0 flush_all:0
   perf_push_sample1: overflow
      one or more samples read from the IRQ handler are rejected by
      perf_event_overflow() and the IRQ handler advances to the next SDB
      and modifies the trailer entry of a partially filled SDB.
   hw_perf_event_update: sdbt 0xb5286070 full 0 over 0 flush_all:1
      timestamp: 14:32:52.519953

Next time the IRQ handler is called for this SDB the trailer entry shows
an overflow count of 19 missed entries.
   hw_perf_event_update: sdbt 0xb5286070 full 1 over 19 flush_all:1
      timestamp: 14:32:52.970058

Remove access to a follow on SDB when event overflow happened.

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/kernel/perf_cpum_sf.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 3f6b02ee73ec..77d93c534284 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1303,12 +1303,6 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
 		 */
 		if (flush_all && done)
 			break;
-
-		/* If an event overflow happened, discard samples by
-		 * processing any remaining sample-data-blocks.
-		 */
-		if (event_overflow)
-			flush_all = 1;
 	}
 
 	/* Account sample overflows in the event hardware structure */
-- 
cgit v1.2.3


From 157309a97015d8b32d65847d8a6d493d1a958e93 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 6 Dec 2019 08:27:51 +0100
Subject: s390/uv: use EOPNOTSUPP instead of ENOTSUPP

ENOTSUP is just an internal kernel error and should never reach
userspace. The return value of the share function is not exported to
userspace, but to avoid giving bad examples let us use EOPNOTSUPP:

Suggested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/uv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index ef3c00b049ab..4093a2856929 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd)
 	};
 
 	if (!is_prot_virt_guest())
-		return -ENOTSUPP;
+		return -EOPNOTSUPP;
 	/*
 	 * Sharing is page wise, if we encounter addresses that are
 	 * not page aligned, we assume something went wrong. If
-- 
cgit v1.2.3


From 1b68ac8678a8e9993deebd55014cbe803e78ca02 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Fri, 29 Nov 2019 12:59:59 +0100
Subject: s390: remove last diag 0x44 caller

diag 0x44 is a voluntary undirected yield of a virtual CPU. This has
caused a lot of performance issues in the past.

There is only one caller left, and that one is only executed if diag
0x9c (directed yield) is not present. Given that all hypervisors
implement diag 0x9c anyway, remove the last diag 0x44 to avoid that
more callers will be added.

Worst case that could happen now, if diag 0x9c is not present, is that
a virtual CPU would loop a bit instead of giving its time slice up.

diag 0x44 statistics in debugfs are kept and will always be zero, so
that user space can tell that there are no calls.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/include/asm/setup.h |  2 --
 arch/s390/kernel/early.c      | 16 ----------------
 arch/s390/kernel/smp.c        | 13 +++++--------
 3 files changed, 5 insertions(+), 26 deletions(-)

diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 6dc6c4fbc8e2..69289e99cabd 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -27,7 +27,6 @@
 #define MACHINE_FLAG_DIAG9C	BIT(3)
 #define MACHINE_FLAG_ESOP	BIT(4)
 #define MACHINE_FLAG_IDTE	BIT(5)
-#define MACHINE_FLAG_DIAG44	BIT(6)
 #define MACHINE_FLAG_EDAT1	BIT(7)
 #define MACHINE_FLAG_EDAT2	BIT(8)
 #define MACHINE_FLAG_TOPOLOGY	BIT(10)
@@ -94,7 +93,6 @@ extern unsigned long __swsusp_reset_dma;
 #define MACHINE_HAS_DIAG9C	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
 #define MACHINE_HAS_ESOP	(S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
 #define MACHINE_HAS_IDTE	(S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44	(S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
 #define MACHINE_HAS_EDAT1	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
 #define MACHINE_HAS_EDAT2	(S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
 #define MACHINE_HAS_TOPOLOGY	(S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index db32a55daaec..cd241ee66eff 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -204,21 +204,6 @@ static __init void detect_diag9c(void)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
 }
 
-static __init void detect_diag44(void)
-{
-	int rc;
-
-	diag_stat_inc(DIAG_STAT_X044);
-	asm volatile(
-		"	diag	0,0,0x44\n"
-		"0:	la	%0,0\n"
-		"1:\n"
-		EX_TABLE(0b,1b)
-		: "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
-	if (!rc)
-		S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
 static __init void detect_machine_facilities(void)
 {
 	if (test_facility(8)) {
@@ -331,7 +316,6 @@ void __init startup_init(void)
 	setup_arch_string();
 	setup_boot_command_line();
 	detect_diag9c();
-	detect_diag44();
 	detect_machine_facilities();
 	save_vector_registers();
 	setup_topology();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2794cad9312e..a08bd2522dd9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -413,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
 
 void smp_yield_cpu(int cpu)
 {
-	if (MACHINE_HAS_DIAG9C) {
-		diag_stat_inc_norecursion(DIAG_STAT_X09C);
-		asm volatile("diag %0,0,0x9c"
-			     : : "d" (pcpu_devices[cpu].address));
-	} else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
-		diag_stat_inc_norecursion(DIAG_STAT_X044);
-		asm volatile("diag 0,0,0x44");
-	}
+	if (!MACHINE_HAS_DIAG9C)
+		return;
+	diag_stat_inc_norecursion(DIAG_STAT_X09C);
+	asm volatile("diag %0,0,0x9c"
+		     : : "d" (pcpu_devices[cpu].address));
 }
 
 /*
-- 
cgit v1.2.3


From 3e39ce266a273e5751e5792aee248cb8d9630c8b Mon Sep 17 00:00:00 2001
From: Vasily Gorbik <gor@linux.ibm.com>
Date: Fri, 2 Aug 2019 12:42:59 +0200
Subject: s390/kasan: add KASAN_VMALLOC support

Add KASAN_VMALLOC support which now enables vmalloc memory area access
checks as well as enables usage of VMAP_STACK under kasan.

KASAN_VMALLOC changes the way vmalloc and modules areas shadow memory
is handled. With this new approach only top level page tables are
pre-populated and lower levels are filled dynamically upon memory
allocation.

Acked-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/Kconfig         |  1 +
 arch/s390/mm/kasan_init.c | 68 ++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d4051e88e625..bc88841d335d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -124,6 +124,7 @@ config S390
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE
 	select HAVE_ARCH_KASAN
+	select HAVE_ARCH_KASAN_VMALLOC
 	select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
 	select HAVE_ARCH_SECCOMP_FILTER
 	select HAVE_ARCH_SOFT_DIRTY
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 460f25572940..06345616a646 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void)
 enum populate_mode {
 	POPULATE_ONE2ONE,
 	POPULATE_MAP,
-	POPULATE_ZERO_SHADOW
+	POPULATE_ZERO_SHADOW,
+	POPULATE_SHALLOW
 };
 static void __init kasan_early_vmemmap_populate(unsigned long address,
 						unsigned long end,
@@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 			pgd_populate(&init_mm, pg_dir, p4_dir);
 		}
 
+		if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + P4D_SIZE) & P4D_MASK;
+			continue;
+		}
+
 		p4_dir = p4d_offset(pg_dir, address);
 		if (p4d_none(*p4_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 			p4d_populate(&init_mm, p4_dir, pu_dir);
 		}
 
+		if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+		    mode == POPULATE_SHALLOW) {
+			address = (address + PUD_SIZE) & PUD_MASK;
+			continue;
+		}
+
 		pu_dir = pud_offset(p4_dir, address);
 		if (pud_none(*pu_dir)) {
 			if (mode == POPULATE_ZERO_SHADOW &&
@@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
 				page = kasan_early_shadow_page;
 				pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
 				break;
+			case POPULATE_SHALLOW:
+				/* should never happen */
+				break;
 			}
 		}
 		address += PAGE_SIZE;
@@ -313,22 +329,50 @@ void __init kasan_early_init(void)
 	init_mm.pgd = early_pg_dir;
 	/*
 	 * Current memory layout:
-	 * +- 0 -------------+	 +- shadow start -+
-	 * | 1:1 ram mapping |	/| 1/8 ram	  |
-	 * +- end of ram ----+ / +----------------+
-	 * | ... gap ...     |/  |	kasan	  |
-	 * +- shadow start --+	 |	zero	  |
-	 * | 1/8 addr space  |	 |	page	  |
-	 * +- shadow end    -+	 |	mapping	  |
-	 * | ... gap ...     |\  |    (untracked) |
-	 * +- modules vaddr -+ \ +----------------+
-	 * | 2Gb	     |	\|	unmapped  | allocated per module
-	 * +-----------------+	 +- shadow end ---+
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |		    |
+	 * |		     |/    |	kasan	    |
+	 * +- shadow start --+	   |	zero	    |
+	 * | 1/8 addr space  |	   |	page	    |
+	 * +- shadow end    -+	   |	mapping	    |
+	 * | ... gap ...     |\    |  (untracked)   |
+	 * +- vmalloc area  -+ \   |		    |
+	 * | vmalloc_size    |	\  |		    |
+	 * +- modules vaddr -+	 \ +----------------+
+	 * | 2Gb	     |	  \|	  unmapped  | allocated per module
+	 * +-----------------+	   +- shadow end ---+
+	 *
+	 * Current memory layout (KASAN_VMALLOC):
+	 * +- 0 -------------+	   +- shadow start -+
+	 * | 1:1 ram mapping |	  /| 1/8 ram	    |
+	 * |		     |	 / |		    |
+	 * +- end of ram ----+	/  +----------------+
+	 * | ... gap ...     | /   |	kasan	    |
+	 * |		     |/    |	zero	    |
+	 * +- shadow start --+	   |	page	    |
+	 * | 1/8 addr space  |	   |	mapping     |
+	 * +- shadow end    -+	   |  (untracked)   |
+	 * | ... gap ...     |\    |		    |
+	 * +- vmalloc area  -+ \   +- vmalloc area -+
+	 * | vmalloc_size    |	\  |shallow populate|
+	 * +- modules vaddr -+	 \ +- modules area -+
+	 * | 2Gb	     |	  \|shallow populate|
+	 * +-----------------+	   +- shadow end ---+
 	 */
 	/* populate kasan shadow (for identity mapping and zero page mapping) */
 	kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
 	if (IS_ENABLED(CONFIG_MODULES))
 		untracked_mem_end = vmax - MODULES_LEN;
+	if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		untracked_mem_end = vmax - vmalloc_size - MODULES_LEN;
+		/* shallowly populate kasan shadow for vmalloc and modules */
+		kasan_early_vmemmap_populate(__sha(untracked_mem_end),
+					     __sha(vmax), POPULATE_SHALLOW);
+	}
+	/* populate kasan shadow for untracked memory */
 	kasan_early_vmemmap_populate(__sha(max_physmem_end),
 				     __sha(untracked_mem_end),
 				     POPULATE_ZERO_SHADOW);
-- 
cgit v1.2.3