<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 4c95c365058a..4d8134176354 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -101,6 +101,8 @@ static inline void fpstate_init_fxstate(struct fxregs_state *fx)
 	fx-&gt;mxcsr = MXCSR_DEFAULT;
 }
 extern void fpstate_sanitize_xstate(struct fpu *fpu);
+extern void switch_to_usercopy_page_tables(struct task_struct *next_tsk);
+extern void switch_away_from_usercopy_page_tables(struct task_struct *next_tsk);
 
 #define user_insn(insn, output, input...)				\
 ({									\
@@ -108,6 +110,7 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 									\
 	might_fault();							\
 									\
+	start_usercopy_page_tables();					\
 	asm volatile(ASM_STAC "\n"					\
 		     "1:" #insn "\n\t"					\
 		     "2: " ASM_CLAC "\n"				\
@@ -118,6 +121,7 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
 		     _ASM_EXTABLE(1b, 3b)				\
 		     : [err] "=r" (err), output				\
 		     : "0"(0), input);					\
+	end_usercopy_page_tables();					\
 	err;								\
 })
 
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h
index 13c83fe97988..5775720a26bc 100644
--- a/arch/x86/include/asm/futex.h
+++ b/arch/x86/include/asm/futex.h
@@ -13,6 +13,7 @@
 #include &lt;asm/smap.h&gt;
 
 #define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg)	\
+	start_usercopy_page_tables();			\
 	asm volatile("\t" ASM_STAC "\n"				\
 		     "1:\t" insn "\n"				\
 		     "2:\t" ASM_CLAC "\n"			\
@@ -22,9 +23,11 @@
 		     "\t.previous\n"				\
 		     _ASM_EXTABLE_UA(1b, 3b)			\
 		     : "=r" (oldval), "=r" (ret), "+m" (*uaddr)	\
-		     : "i" (-EFAULT), "0" (oparg), "1" (0))
+		     : "i" (-EFAULT), "0" (oparg), "1" (0));	\
+	end_usercopy_page_tables();
 
 #define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg)	\
+	start_usercopy_page_tables();			\
 	asm volatile("\t" ASM_STAC "\n"				\
 		     "1:\tmovl	%2, %0\n"			\
 		     "\tmovl\t%0, %3\n"				\
@@ -40,7 +43,8 @@
 		     _ASM_EXTABLE_UA(2b, 4b)			\
 		     : "=&amp;a" (oldval), "=&amp;r" (ret),		\
 		       "+m" (*uaddr), "=&amp;r" (tem)		\
-		     : "r" (oparg), "i" (-EFAULT), "1" (0))
+		     : "r" (oparg), "i" (-EFAULT), "1" (0));	\
+	end_usercopy_page_tables();
 
 static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
 		u32 __user *uaddr)
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index a281e61ec60c..07f659cb8a15 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -36,7 +36,7 @@ extern gfp_t __userpte_alloc_gfp;
  * both 8k in size and 8k-aligned.  That lets us just flip bit 12
  * in a pointer to swap between the two 4k halves.
  */
-#define PGD_ALLOCATION_ORDER 1
+#define PGD_ALLOCATION_ORDER 2
 #else
 #define PGD_ALLOCATION_ORDER 0
 #endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 5e0509b41986..346d35d12faf 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1268,6 +1268,21 @@ static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
 	return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
 }
 
+static inline pgd_t *kernel_to_usercopy_pgdp(pgd_t *pgdp)
+{
+	/*
+	 * When "SMAP-emulation" is enabled, there is a third
+	 * PGD: the "usercopy PGD".  All three copies are
+	 * laid out within an order-2 (16k) page like this:
+	 *
+	 * page[0]: Kernel page tables
+	 * page[1]: User page tables
+	 * page[2]: Usercopy page tables
+	 * page[3]: Currently unused
+	 */
+	return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT+1);
+}
+
 static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
 {
 	return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 27c47d183f4b..5159ae3558c2 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -41,14 +41,19 @@
 
 #ifdef CONFIG_X86_SMAP
 
+extern void start_usercopy_page_tables(void);
+extern void end_usercopy_page_tables(void);
+
 static __always_inline void clac(void)
 {
+	end_usercopy_page_tables();
 	/* Note: a barrier is implicit in alternative() */
 	alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
 }
 
 static __always_inline void stac(void)
 {
+	start_usercopy_page_tables();
 	/* Note: a barrier is implicit in alternative() */
 	alternative("", __ASM_STAC, X86_FEATURE_SMAP);
 }
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 5cd1caa8bc65..c2193c1b11dd 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -34,6 +34,7 @@ copy_user_generic(void *to, const void *from, unsigned len)
 	 * Otherwise, if CPU has rep_good feature, use copy_user_generic_string.
 	 * Otherwise, use copy_user_generic_unrolled.
 	 */
+	start_usercopy_page_tables();
 	alternative_call_2(copy_user_generic_unrolled,
 			 copy_user_generic_string,
 			 X86_FEATURE_REP_GOOD,
@@ -43,6 +44,7 @@ copy_user_generic(void *to, const void *from, unsigned len)
 				     "=d" (len)),
 			 "1" (to), "2" (from), "3" (len)
 			 : "memory", "rcx", "r8", "r9", "r10", "r11");
+	end_usercopy_page_tables();
 	return ret;
 }
 
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index d50c7b747d8b..6b07404c9d93 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -223,12 +223,14 @@ static __always_inline void __xen_stac(void)
 	 * Suppress objtool seeing the STAC/CLAC and getting confused about it
 	 * calling random code with AC=1.
 	 */
+	start_usercopy_page_tables();
 	asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
 		     ASM_STAC ::: "memory", "flags");
 }
 
 static __always_inline void __xen_clac(void)
 {
+	end_usercopy_page_tables();
 	asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
 		     ASM_CLAC ::: "memory", "flags");
 }
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 390596b761e3..68886337813e 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -741,6 +741,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 * The lock is not really needed, but this allows to avoid open-coding.
 	 */
 	ptep = get_locked_pte(poking_mm, poking_addr, &amp;ptl);
+	printk("%s()::%d ptep: %016lx\n", __func__, __LINE__, (unsigned long)ptep);
 
 	/*
 	 * This must not fail; preallocated in poking_init().
@@ -753,6 +754,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	if (cross_page_boundary) {
 		pte = mk_pte(pages[1], pgprot);
 		set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+		printk("%s()::%d ptep+1: %016lx\n", __func__, __LINE__, (unsigned long)(ptep+1));
 	}
 
 	/*
@@ -761,6 +763,8 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	prev = use_temporary_mm(poking_mm);
 
+	printk("%s()::%d poking_addr: %016lx\n", __func__, __LINE__, (unsigned long)poking_addr);
+
 	kasan_disable_current();
 	memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
 	kasan_enable_current();
@@ -771,6 +775,7 @@ static void *__text_poke(void *addr, const void *opcode, size_t len)
 	 */
 	barrier();
 
+	printk("%s()::%d ptep: %016lx\n", __func__, __LINE__, (unsigned long)ptep);
 	pte_clear(poking_mm, poking_addr, ptep);
 	if (cross_page_boundary)
 		pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
@@ -902,6 +907,8 @@ void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
 {
 	unsigned char int3 = 0xcc;
 
+	printk("%s(%016lx)\n", __func__, (unsigned long)addr);
+
 	bp_int3_handler = handler;
 	bp_int3_addr = (u8 *)addr + sizeof(int3);
 	bp_patching_in_progress = true;
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index bcd206c8ac90..7cdb43199f54 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -413,6 +413,7 @@ NEXT_PAGE(level2_ident_pgt)
 NEXT_PGD_PAGE(init_top_pgt)
 	.fill	512,8,0
 	.fill	PTI_USER_PGD_FILL,8,0
+GLOBAL(end_init_top_pgt)
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index b2463fcb20a8..14628f7b25bd 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -162,13 +162,14 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
 
 static void sanity_check_ldt_mapping(struct mm_struct *mm)
 {
-	pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
-	pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
+	pgd_t *k_pgd  = pgd_offset(mm, LDT_BASE_ADDR);
+	pgd_t *u_pgd  = kernel_to_user_pgdp(k_pgd);
+	pgd_t *uc_pgd = kernel_to_usercopy_pgdp(k_pgd);
 	bool had_kernel, had_user;
 	pmd_t *k_pmd, *u_pmd;
 
-	k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
-	u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
+	k_pmd      = pgd_to_pmd_walk(k_pgd,  LDT_BASE_ADDR);
+	u_pmd      = pgd_to_pmd_walk(uc_pgd, LDT_BASE_ADDR);
 	had_kernel = (k_pmd-&gt;pmd != 0);
 	had_user   = (u_pmd-&gt;pmd != 0);
 
@@ -179,17 +180,19 @@ static void sanity_check_ldt_mapping(struct mm_struct *mm)
 
 static void map_ldt_struct_to_user(struct mm_struct *mm)
 {
-	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
+	pgd_t *pgdp = pgd_offset(mm, LDT_BASE_ADDR);
+	pgd_t pgd = *kernel_to_usercopy_pgdp(pgdp);
 
 	if (boot_cpu_has(X86_FEATURE_PTI) &amp;&amp; !mm-&gt;context.ldt)
-		set_pgd(kernel_to_user_pgdp(pgd), *pgd);
+		set_pgd(kernel_to_user_pgdp(pgdp), pgd);
 }
 
 static void sanity_check_ldt_mapping(struct mm_struct *mm)
 {
-	pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
-	bool had_kernel = (pgd-&gt;pgd != 0);
-	bool had_user   = (kernel_to_user_pgdp(pgd)-&gt;pgd != 0);
+	pgd_t *pgdp = pgd_offset(mm, LDT_BASE_ADDR);
+	pgd_t pgd = *kernel_to_usercopy_pgdp(pgdp);
+	bool had_kernel = (pgd.pgd != 0);
+	bool had_user   = (kernel_to_user_pgdp(pgdp)-&gt;pgd != 0);
 
 	do_sanity_check(mm, had_kernel, had_user);
 }
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6a38717d179c..fa67f663a171 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -174,6 +174,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
 
 	down_write(&amp;mm-&gt;mmap_sem);
 	pgd = pgd_offset(mm, 0xA0000);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (pgd_none_or_clear_bad(pgd))
 		goto out;
 	p4d = p4d_offset(pgd, 0xA0000);
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 378a1f70ae7d..c52f5bb8a1b3 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -54,6 +54,7 @@
  * eax uncopied bytes or 0 if successful.
  */
 ENTRY(copy_user_generic_unrolled)
+	//call start_usercopy_page_tables
 	ASM_STAC
 	cmpl $8,%edx
 	jb 20f		/* less then 8 bytes, go to byte copy loop */
@@ -104,6 +105,7 @@ ENTRY(copy_user_generic_unrolled)
 	jnz 21b
 23:	xor %eax,%eax
 	ASM_CLAC
+	//call start_usercopy_page_tables
 	ret
 
 	.section .fixup,"ax"
@@ -158,6 +160,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
  * eax uncopied bytes or 0 if successful.
  */
 ENTRY(copy_user_generic_string)
+	//call start_usercopy_page_tables
 	ASM_STAC
 	cmpl $8,%edx
 	jb 2f		/* less than 8 bytes, go to byte copy loop */
@@ -172,6 +175,7 @@ ENTRY(copy_user_generic_string)
 	movsb
 	xorl %eax,%eax
 	ASM_CLAC
+	//call start_usercopy_page_tables
 	ret
 
 	.section .fixup,"ax"
@@ -198,6 +202,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
  * eax uncopied bytes or 0 if successful.
  */
 ENTRY(copy_user_enhanced_fast_string)
+	//call start_usercopy_page_tables
 	ASM_STAC
 	cmpl $64,%edx
 	jb .L_copy_short_string	/* less then 64 bytes, avoid the costly 'rep' */
@@ -206,6 +211,7 @@ ENTRY(copy_user_enhanced_fast_string)
 	movsb
 	xorl %eax,%eax
 	ASM_CLAC
+	//call start_usercopy_page_tables
 	ret
 
 	.section .fixup,"ax"
@@ -236,6 +242,7 @@ copy_user_handle_tail:
 1:	rep movsb
 2:	mov %ecx,%eax
 	ASM_CLAC
+	//call start_usercopy_page_tables
 	ret
 
 	_ASM_EXTABLE_UA(1b, 2b)
@@ -251,6 +258,7 @@ ENDPROC(copy_user_handle_tail)
  *  - Require 4-byte alignment when size is 4 bytes.
  */
 ENTRY(__copy_user_nocache)
+	//call start_usercopy_page_tables
 	ASM_STAC
 
 	/* If size is less than 8 bytes, go to 4-byte copy */
@@ -346,6 +354,7 @@ ENTRY(__copy_user_nocache)
 .L_finish_copy:
 	xorl %eax,%eax
 	ASM_CLAC
+	//call start_usercopy_page_tables
 	sfence
 	ret
 
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index d2e5c9c39601..2f19580774cd 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -38,17 +38,20 @@
 .text
 ENTRY(__put_user_1)
 	ENTER
+	call start_usercopy_page_tables
 	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
 	jae bad_put_user
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
 	xor %eax,%eax
+	call end_usercopy_page_tables
 	EXIT
 ENDPROC(__put_user_1)
 EXPORT_SYMBOL(__put_user_1)
 
 ENTRY(__put_user_2)
 	ENTER
+	call start_usercopy_page_tables
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
@@ -56,12 +59,14 @@ ENTRY(__put_user_2)
 	ASM_STAC
 2:	movw %ax,(%_ASM_CX)
 	xor %eax,%eax
+	call end_usercopy_page_tables
 	EXIT
 ENDPROC(__put_user_2)
 EXPORT_SYMBOL(__put_user_2)
 
 ENTRY(__put_user_4)
 	ENTER
+	call start_usercopy_page_tables
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
@@ -69,12 +74,14 @@ ENTRY(__put_user_4)
 	ASM_STAC
 3:	movl %eax,(%_ASM_CX)
 	xor %eax,%eax
+	call end_usercopy_page_tables
 	EXIT
 ENDPROC(__put_user_4)
 EXPORT_SYMBOL(__put_user_4)
 
 ENTRY(__put_user_8)
 	ENTER
+	call start_usercopy_page_tables
 	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
 	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
@@ -85,12 +92,14 @@ ENTRY(__put_user_8)
 5:	movl %edx,4(%_ASM_CX)
 #endif
 	xor %eax,%eax
+	call end_usercopy_page_tables
 	EXIT
 ENDPROC(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)
 
 bad_put_user:
 	movl $-EFAULT,%eax
+	call end_usercopy_page_tables
 	EXIT
 END(bad_put_user)
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1f67b1e15bf6..45aabc9a2396 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -140,6 +140,14 @@ static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
 		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
 				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 				KERNEL_PGD_PTRS);
+
+		/*
+		 * The usercopy pgd needs a full copy of the kernel
+		 * page tables.
+		 */
+		clone_pgd_range(kernel_to_usercopy_pgdp(pgd) + KERNEL_PGD_BOUNDARY,
+				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+				KERNEL_PGD_PTRS);
 	}
 
 	/* list required to sync kernel mapping updates */
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index b196524759ec..fe663b58a2f9 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -122,8 +122,22 @@ void __init pti_check_boottime_disable(void)
 	setup_force_cpu_cap(X86_FEATURE_PTI);
 }
 
+static bool ptr_in_page(void *_ptr, void *page)
+{
+	char *ptr = _ptr;
+
+	if (ptr &lt;= (char *)page)
+		return false;
+
+	if (ptr - PAGE_SIZE &gt;= (char *)page)
+		return false;
+
+	return true;
+}
+
 pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 {
+	unsigned long pgd_ptr_val = (unsigned long)pgdp;
 	/*
 	 * Changes to the high (kernel) portion of the kernelmode page
 	 * tables are not automatically propagated to the usermode tables.
@@ -136,12 +150,28 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 	if (!pgdp_maps_userspace(pgdp))
 		return pgd;
 
+	if ((pgd_ptr_val &gt;&gt; PAGE_SHIFT) &amp; 0x3) {
+		//printk("bad kernel PGD pointer.  Should have been 16k-aligned\n");
+		printk("pgd_ptr_val: %016lx\n", pgd_ptr_val);
+		//printk("     masked: %016lx\n", pgd_ptr_val &amp; ~0xfff);
+		//printk("   low2bits: %016lx\n", (pgd_ptr_val &gt;&gt; PAGE_SHIFT) &amp; 0x3);
+		//dump_stack();
+
+		pgdp = ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT+1);
+		printk("        new: %016lx\n", (unsigned long)pgdp);
+	}
+
 	/*
 	 * The user page tables get the full PGD, accessible from
 	 * userspace:
 	 */
 	kernel_to_user_pgdp(pgdp)-&gt;pgd = pgd.pgd;
 
+	/*
+	 * Also propogate the full PGD into the usercopy version:
+	 */
+	kernel_to_usercopy_pgdp(pgdp)-&gt;pgd = pgd.pgd;
+
 	/*
 	 * If this is normal user memory, make it NX in the kernel
 	 * pagetables so that, if we somehow screw up and return to
@@ -156,9 +186,28 @@ pgd_t __pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
 	 *  - we're clearing the PGD (i.e. the new pgd is not present).
 	 */
 	if ((pgd.pgd &amp; (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &amp;&amp;
-	    (__supported_pte_mask &amp; _PAGE_NX))
+	    (__supported_pte_mask &amp; _PAGE_NX)) {
+		extern struct mm_struct *poking_mm;
+
 		pgd.pgd |= _PAGE_NX;
 
+		if (ptr_in_page(pgdp, &amp;init_top_pgt)) {
+			printk_ratelimited("low pgdp: %016lx\n", (unsigned long)pgdp);
+			dump_stack();
+		} else if (ptr_in_page(pgdp, poking_mm-&gt;pgd)) {
+			printk_ratelimited("poking pgdp: %016lx\n", (unsigned long)pgdp);
+			dump_stack();
+		} else {
+			/*
+			 * Make the kernel *NOT* map userspace by default.  This
+			 * should be fun:
+			 */
+			printk("NOT zapping user pgd @ %016lx\n", (unsigned long)pgdp);
+			//dump_stack();
+			//pgd.pgd = 0;
+		}
+	}
+
 	/* return the copy of the PGD we want the kernel to use: */
 	return pgd;
 }
@@ -675,3 +724,96 @@ void pti_finalize(void)
 
 	debug_checkwx_user();
 }
+
+asmlinkage __visible
+void switch_to_usercopy_page_tables(struct task_struct *next_tsk)
+{
+	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	/*
+	 * Choose an ASID that is unused for anything else.  The
+	 * bottom 3 bits are used for the ASID cache and the top
+	 * bit (X86_CR3_PTI_PCID_USER_BIT) is used to select between
+	 * user and kernel.  The bit below there should leave
+	 * plenty of room:
+	 */
+	u32 usercopy_asid_mask = 1 &lt;&lt; (X86_CR3_PTI_PCID_USER_BIT - 1);
+	/* can probably be a build_cr3_noflush() */
+	unsigned long cr3;
+	pgd_t *usercopy_pgd;
+
+	if (!next_tsk || !next_tsk-&gt;mm)
+		return;
+
+	////printk("%s() real switch\n", __func__);
+	usercopy_pgd = kernel_to_usercopy_pgdp(next_tsk-&gt;mm-&gt;pgd);
+       	cr3 = build_cr3(usercopy_pgd, loaded_mm_asid);
+	/*
+	 * build_cr3() should be doing this, but just hack around it
+	 * for now:
+	 */
+	cr3 |= usercopy_asid_mask;
+
+	write_cr3(cr3);
+}
+
+asmlinkage __visible
+void switch_away_from_usercopy_page_tables(struct task_struct *next_tsk)
+{
+	u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long cr3;
+	pgd_t *kernel_pgd;
+
+	if (!next_tsk || !next_tsk-&gt;mm)
+		return;
+	//printk("%s() real switch\n", __func__);
+	kernel_pgd = next_tsk-&gt;mm-&gt;pgd;
+       	cr3 = build_cr3(kernel_pgd, loaded_mm_asid);
+
+	write_cr3(cr3);
+}
+
+/*
+ * When executing in the kernel, normal interrupt entry does not write
+ * CR3, so the usercopy CR3 value should persist into the interrupt
+ * handler.  Not great, but also not fatal.
+ *
+ * NMIs, on the other hand, unconditionally switch CR3, but they also
+ * restore the value they saw on entry so they should not clobber
+ * these changes.
+ */
+asmlinkage __visible
+void start_usercopy_page_tables(void)
+{
+	WARN_ON(current-&gt;in_usercopy &lt; 0);
+	//printk("%s()\n", __func__);
+	//dump_stack();
+	/*
+	 * Fun fact: lots of code uses the usercopy routines,
+	 * including in interrupts and execption handlers and early
+	 * boot.  So, this needs to be a real counter, not just an
+	 * in/out flag.
+	 */
+	current-&gt;in_usercopy++;
+	/*
+	 * At this point, if preempted, the context-switch code
+	 * may switch us to the usercopy page tables.
+	 */
+	if (current-&gt;in_usercopy == 1)
+		switch_to_usercopy_page_tables(current);
+}
+
+asmlinkage
+__visible
+void end_usercopy_page_tables(void)
+{
+	//printk("%s()\n", __func__);
+	current-&gt;in_usercopy--;
+	/*
+	 * At this point, if preempted, the context-switch code
+	 * may switch us away from the usercopy page tables.
+	 */
+	if (!current-&gt;in_usercopy)
+		switch_away_from_usercopy_page_tables(current);
+	WARN_ON(current-&gt;in_usercopy &lt; 0);
+}
+
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 91f6db92554c..d26e86983659 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -110,7 +110,8 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
 	*need_flush = true;
 }
 
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush,
+			    struct task_struct *next_tsk)
 {
 	unsigned long new_mm_cr3;
 
@@ -121,12 +122,18 @@ static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
 		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
 	}
 
-	/*
-	 * Caution: many callers of this function expect
-	 * that load_cr3() is serializing and orders TLB
-	 * fills with respect to the mm_cpumask writes.
-	 */
-	write_cr3(new_mm_cr3);
+	if (next_tsk &amp;&amp; next_tsk-&gt;in_usercopy) {
+		void switch_to_usercopy_page_tables(struct task_struct *next_tsk);
+		printk("%s() in_usercopy==1\n", __func__);
+		switch_to_usercopy_page_tables(next_tsk);
+	} else {
+		/*
+		 * Caution: many callers of this function expect
+		 * that load_cr3() is serializing and orders TLB
+		 * fills with respect to the mm_cpumask writes.
+		 */
+		write_cr3(new_mm_cr3);
+	}
 }
 
 void leave_mm(int cpu)
@@ -414,7 +421,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	if (need_flush) {
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next-&gt;context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-		load_new_mm_cr3(next-&gt;pgd, new_asid, true);
+		load_new_mm_cr3(next-&gt;pgd, new_asid, true, tsk);
 
 		/*
 		 * NB: This gets called via leave_mm() in the idle path
@@ -427,7 +434,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	} else {
 		/* The new ASID is already up to date. */
-		load_new_mm_cr3(next-&gt;pgd, new_asid, false);
+		load_new_mm_cr3(next-&gt;pgd, new_asid, false, tsk);
 
 		/* See above wrt _rcuidle. */
 		trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 11837410690f..bba413c8f175 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1200,6 +1200,7 @@ struct task_struct {
 	unsigned long			prev_lowest_stack;
 #endif
 
+	int in_usercopy;
 	/*
 	 * New fields for task_struct should be added above here, so that
 	 * they are included in the randomized portion of task_struct.
diff --git a/mm/gup.c b/mm/gup.c
index 1c922e169bbf..f2e6f1604ecd 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -316,6 +316,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	pte_unmap_unlock(ptep, ptl);
 	if (!pte_none(pte))
 		return NULL;
+	printk("no_page_table() @ %d\n", __LINE__);
 	return no_page_table(vma, flags);
 }
 
@@ -479,11 +480,15 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 	struct page *page;
 
 	p4d = p4d_offset(pgdp, address);
-	if (p4d_none(*p4d))
+	if (p4d_none(*p4d)) {
+		printk("no_page_table() @ %d\n", __LINE__);
 		return no_page_table(vma, flags);
+	}
 	BUILD_BUG_ON(p4d_huge(*p4d));
-	if (unlikely(p4d_bad(*p4d)))
+	if (unlikely(p4d_bad(*p4d))) {
+		printk("no_page_table() @ %d\n", __LINE__);
 		return no_page_table(vma, flags);
+	}
 
 	if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
 		page = follow_huge_pd(vma, address,
@@ -491,6 +496,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 				      P4D_SHIFT);
 		if (page)
 			return page;
+		printk("no_page_table() @ %d\n", __LINE__);
 		return no_page_table(vma, flags);
 	}
 	return follow_pud_mask(vma, address, p4d, flags, ctx);
@@ -533,14 +539,18 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
 	}
 
 	pgd = pgd_offset(mm, address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 
-	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
+	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) {
+		printk("no_page_table() @ %d\n", __LINE__);
 		return no_page_table(vma, flags);
+	}
 
 	if (pgd_huge(*pgd)) {
 		page = follow_huge_pgd(mm, address, pgd, flags);
 		if (page)
 			return page;
+		printk("no_page_table() @ %d\n", __LINE__);
 		return no_page_table(vma, flags);
 	}
 	if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
@@ -2101,6 +2111,7 @@ static void gup_pgd_range(unsigned long addr, unsigned long end,
 	pgd_t *pgdp;
 
 	pgdp = pgd_offset(current-&gt;mm, addr);
+	pgdp = kernel_to_usercopy_pgdp(pgdp);
 	do {
 		pgd_t pgd = READ_ONCE(*pgdp);
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3b156759a963..57710469e590 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2356,6 +2356,7 @@ void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
 	pmd_t *pmd;
 
 	pgd = pgd_offset(vma-&gt;vm_mm, address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (!pgd_present(*pgd))
 		return;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5cfcf2787566..640b7e8beee4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4804,9 +4804,14 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
  */
 int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
 {
-	pgd_t *pgd = pgd_offset(mm, *addr);
-	p4d_t *p4d = p4d_offset(pgd, *addr);
-	pud_t *pud = pud_offset(p4d, *addr);
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+
+	pgd = pgd_offset(mm, *addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
+	p4d = p4d_offset(pgd, *addr);
+	pud = pud_offset(p4d, *addr);
 
 	BUG_ON(page_count(virt_to_page(ptep)) == 0);
 	if (page_count(virt_to_page(ptep)) == 1)
@@ -4847,6 +4852,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm,
 	pte_t *pte = NULL;
 
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	p4d = p4d_alloc(mm, pgd, addr);
 	if (!p4d)
 		return NULL;
@@ -4885,6 +4891,7 @@ pte_t *huge_pte_offset(struct mm_struct *mm,
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (!pgd_present(*pgd))
 		return NULL;
 	p4d = p4d_offset(pgd, addr);
diff --git a/mm/memory.c b/mm/memory.c
index c178741d276a..19c211b337da 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -361,6 +361,7 @@ void free_pgd_range(struct mmu_gather *tlb,
 	 */
 	tlb_change_page_size(tlb, PAGE_SIZE);
 	pgd = pgd_offset(tlb-&gt;mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -1020,7 +1021,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 
 	ret = 0;
 	dst_pgd = pgd_offset(dst_mm, addr);
+	dst_pgd = kernel_to_usercopy_pgdp(dst_pgd);
 	src_pgd = pgd_offset(src_mm, addr);
+	src_pgd = kernel_to_usercopy_pgdp(src_pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(src_pgd))
@@ -1261,6 +1264,7 @@ void unmap_page_range(struct mmu_gather *tlb,
 	BUG_ON(addr &gt;= end);
 	tlb_start_vma(tlb, vma);
 	pgd = pgd_offset(vma-&gt;vm_mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
@@ -1420,6 +1424,7 @@ void zap_vma_ptes(struct vm_area_struct *vma, unsigned long address,
 }
 EXPORT_SYMBOL_GPL(zap_vma_ptes);
 
+extern struct mm_struct *poking_mm;
 pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			spinlock_t **ptl)
 {
@@ -1429,6 +1434,9 @@ pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
+	if (mm &amp;&amp; (mm != poking_mm)) {
+		pgd = kernel_to_usercopy_pgdp(pgd);
+	}
 	p4d = p4d_alloc(mm, pgd, addr);
 	if (!p4d)
 		return NULL;
@@ -1969,6 +1977,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	BUG_ON(addr &gt;= end);
 	pfn -= addr &gt;&gt; PAGE_SHIFT;
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	flush_cache_range(vma, addr, end);
 	do {
 		next = pgd_addr_end(addr, end);
@@ -2146,6 +2155,7 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
 		return -EINVAL;
 
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
@@ -3973,6 +3983,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf)
  * The mmap_sem may have been released depending on flags and our
  * return value.  See filemap_fault() and __lock_page_or_retry().
  */
+static int dump_every = 0;
 static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 		unsigned long address, unsigned int flags)
 {
@@ -3986,10 +3997,24 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma,
 	unsigned int dirty = flags &amp; FAULT_FLAG_WRITE;
 	struct mm_struct *mm = vma-&gt;vm_mm;
 	pgd_t *pgd;
+	pgd_t *opgd;
 	p4d_t *p4d;
 	vm_fault_t ret;
 
 	pgd = pgd_offset(mm, address);
+	opgd = pgd;
+	pgd = kernel_to_usercopy_pgdp(pgd);
+	printk("%s(%016lx) opgd@%016lx: %016lx pgd@%016lx: %016lx in_usercopy: %d %016lx\n", __func__, address,
+			(unsigned long)opgd,
+			*(unsigned long *)opgd,
+			(unsigned long)pgd,
+			*(unsigned long *)pgd,
+			current-&gt;in_usercopy,
+			__native_read_cr3());
+
+	if (++dump_every % 100 == 0)
+		dump_stack();
+
 	p4d = p4d_alloc(mm, pgd, address);
 	if (!p4d)
 		return VM_FAULT_OOM;
@@ -4207,6 +4232,7 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address,
 	pte_t *ptep;
 
 	pgd = pgd_offset(mm, address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		goto out;
 
diff --git a/mm/migrate.c b/mm/migrate.c
index e00caf1794ae..18a60460e378 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -2903,6 +2903,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate,
 		goto abort;
 
 	pgdp = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgdp);
 	p4dp = p4d_alloc(mm, pgdp, addr);
 	if (!p4dp)
 		goto abort;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 4f209a7ffa50..cebbf680db6f 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -301,6 +301,7 @@ static unsigned long change_protection_range(struct vm_area_struct *vma,
 
 	BUG_ON(addr &gt;= end);
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	flush_cache_range(vma, addr, end);
 	inc_tlb_flush_pending(mm);
 	do {
diff --git a/mm/mremap.c b/mm/mremap.c
index fc241d23cd97..9e2804364247 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -38,6 +38,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (pgd_none_or_clear_bad(pgd))
 		return NULL;
 
@@ -65,6 +66,7 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
 	pmd_t *pmd;
 
 	pgd = pgd_offset(mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	p4d = p4d_alloc(mm, pgd, addr);
 	if (!p4d)
 		return NULL;
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 11df03e71288..19fd77268823 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -166,6 +166,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
 	}
 restart:
 	pgd = pgd_offset(mm, pvmw-&gt;address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (!pgd_present(*pgd))
 		return false;
 	p4d = p4d_offset(pgd, pvmw-&gt;address);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index c3084ff2569d..3ccc75e8107e 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -148,6 +148,7 @@ static int walk_pgd_range(unsigned long addr, unsigned long end,
 	int err = 0;
 
 	pgd = pgd_offset(walk-&gt;mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd)) {
diff --git a/mm/rmap.c b/mm/rmap.c
index 741a1fdcd07f..1b7800d69aa8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -716,6 +716,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address)
 	pmd_t pmde;
 
 	pgd = pgd_offset(mm, address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	if (!pgd_present(*pgd))
 		goto out;
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 596ac98051c5..c0531bcc6adb 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1956,6 +1956,7 @@ static int unuse_vma(struct vm_area_struct *vma, unsigned int type,
 	end = vma-&gt;vm_end;
 
 	pgd = pgd_offset(vma-&gt;vm_mm, addr);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	do {
 		next = pgd_addr_end(addr, end);
 		if (pgd_none_or_clear_bad(pgd))
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index c7ae74ce5ff3..ddd7242c3e6b 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -151,6 +151,7 @@ static pmd_t *mm_alloc_pmd(struct mm_struct *mm, unsigned long address)
 	pud_t *pud;
 
 	pgd = pgd_offset(mm, address);
+	pgd = kernel_to_usercopy_pgdp(pgd);
 	p4d = p4d_alloc(mm, pgd, address);
 	if (!p4d)
 		return NULL;
</pre></body></html>